xref: /freebsd/sys/dev/pci/pci.c (revision eb6d21b4ca6d668cf89afd99eef7baeafa712197)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #ifdef __HAVE_ACPI
73 #include <contrib/dev/acpica/include/acpi.h>
74 #include "acpi_if.h"
75 #else
76 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
77 #endif
78 
79 static pci_addr_t	pci_mapbase(uint64_t mapreg);
80 static const char	*pci_maptype(uint64_t mapreg);
81 static int		pci_mapsize(uint64_t testval);
82 static int		pci_maprange(uint64_t mapreg);
83 static void		pci_fixancient(pcicfgregs *cfg);
84 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85 
86 static int		pci_porten(device_t dev);
87 static int		pci_memen(device_t dev);
88 static void		pci_assign_interrupt(device_t bus, device_t dev,
89 			    int force_route);
90 static int		pci_add_map(device_t bus, device_t dev, int reg,
91 			    struct resource_list *rl, int force, int prefetch);
92 static int		pci_probe(device_t dev);
93 static int		pci_attach(device_t dev);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_enable_msix(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix(device_t dev, u_int index);
115 static void		pci_unmask_msix(device_t dev, u_int index);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 
120 static device_method_t pci_methods[] = {
121 	/* Device interface */
122 	DEVMETHOD(device_probe,		pci_probe),
123 	DEVMETHOD(device_attach,	pci_attach),
124 	DEVMETHOD(device_detach,	bus_generic_detach),
125 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
126 	DEVMETHOD(device_suspend,	pci_suspend),
127 	DEVMETHOD(device_resume,	pci_resume),
128 
129 	/* Bus interface */
130 	DEVMETHOD(bus_print_child,	pci_print_child),
131 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
132 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
133 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
134 	DEVMETHOD(bus_driver_added,	pci_driver_added),
135 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
136 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
137 
138 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
139 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
140 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
141 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
142 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
143 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
144 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
145 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
146 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
147 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
148 
149 	/* PCI interface */
150 	DEVMETHOD(pci_read_config,	pci_read_config_method),
151 	DEVMETHOD(pci_write_config,	pci_write_config_method),
152 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
153 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
154 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
155 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
156 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
157 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
158 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
159 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
160 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
161 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
162 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
163 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
164 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
165 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
166 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
167 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
168 
169 	{ 0, 0 }
170 };
171 
172 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
173 
174 static devclass_t pci_devclass;
175 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
176 MODULE_VERSION(pci, 1);
177 
178 static char	*pci_vendordata;
179 static size_t	pci_vendordata_size;
180 
181 
182 struct pci_quirk {
183 	uint32_t devid;	/* Vendor/device of the card */
184 	int	type;
185 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
186 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
187 	int	arg1;
188 	int	arg2;
189 };
190 
191 struct pci_quirk pci_quirks[] = {
192 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
193 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
194 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
195 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
196 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
197 
198 	/*
199 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
200 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
201 	 */
202 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
203 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
204 
205 	/*
206 	 * MSI doesn't work on earlier Intel chipsets including
207 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
208 	 */
209 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
213 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 
217 	/*
218 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
219 	 * bridge.
220 	 */
221 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222 
223 	{ 0 }
224 };
225 
226 /* map register information */
227 #define	PCI_MAPMEM	0x01	/* memory map */
228 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
229 #define	PCI_MAPPORT	0x04	/* port map */
230 
231 struct devlist pci_devq;
232 uint32_t pci_generation;
233 uint32_t pci_numdevs = 0;
234 static int pcie_chipset, pcix_chipset;
235 
236 /* sysctl vars */
237 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
238 
239 static int pci_enable_io_modes = 1;
240 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
241 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
242     &pci_enable_io_modes, 1,
243     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
244 enable these bits correctly.  We'd like to do this all the time, but there\n\
245 are some peripherals that this causes problems with.");
246 
247 static int pci_do_power_nodriver = 0;
248 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
249 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
250     &pci_do_power_nodriver, 0,
251   "Place a function into D3 state when no driver attaches to it.  0 means\n\
252 disable.  1 means conservatively place devices into D3 state.  2 means\n\
253 agressively place devices into D3 state.  3 means put absolutely everything\n\
254 in D3 state.");
255 
256 int pci_do_power_resume = 1;
257 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
258 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
259     &pci_do_power_resume, 1,
260   "Transition from D3 -> D0 on resume.");
261 
262 static int pci_do_msi = 1;
263 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
264 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
265     "Enable support for MSI interrupts");
266 
267 static int pci_do_msix = 1;
268 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
269 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
270     "Enable support for MSI-X interrupts");
271 
272 static int pci_honor_msi_blacklist = 1;
273 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
274 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
275     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
276 
277 #if defined(__i386__) || defined(__amd64__)
278 static int pci_usb_takeover = 1;
279 #else
280 static int pci_usb_takeover = 0;
281 #endif
282 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
283 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
284     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
285 Disable this if you depend on BIOS emulation of USB devices, that is\n\
286 you use USB devices (like keyboard or mouse) but do not load USB drivers");
287 
288 /* Find a device_t by bus/slot/function in domain 0 */
289 
290 device_t
291 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
292 {
293 
294 	return (pci_find_dbsf(0, bus, slot, func));
295 }
296 
297 /* Find a device_t by domain/bus/slot/function */
298 
299 device_t
300 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
301 {
302 	struct pci_devinfo *dinfo;
303 
304 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
305 		if ((dinfo->cfg.domain == domain) &&
306 		    (dinfo->cfg.bus == bus) &&
307 		    (dinfo->cfg.slot == slot) &&
308 		    (dinfo->cfg.func == func)) {
309 			return (dinfo->cfg.dev);
310 		}
311 	}
312 
313 	return (NULL);
314 }
315 
316 /* Find a device_t by vendor/device ID */
317 
318 device_t
319 pci_find_device(uint16_t vendor, uint16_t device)
320 {
321 	struct pci_devinfo *dinfo;
322 
323 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
324 		if ((dinfo->cfg.vendor == vendor) &&
325 		    (dinfo->cfg.device == device)) {
326 			return (dinfo->cfg.dev);
327 		}
328 	}
329 
330 	return (NULL);
331 }
332 
333 static int
334 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
335 {
336 	va_list ap;
337 	int retval;
338 
339 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
340 	    cfg->func);
341 	va_start(ap, fmt);
342 	retval += vprintf(fmt, ap);
343 	va_end(ap);
344 	return (retval);
345 }
346 
347 /* return base address of memory or port map */
348 
349 static pci_addr_t
350 pci_mapbase(uint64_t mapreg)
351 {
352 
353 	if (PCI_BAR_MEM(mapreg))
354 		return (mapreg & PCIM_BAR_MEM_BASE);
355 	else
356 		return (mapreg & PCIM_BAR_IO_BASE);
357 }
358 
359 /* return map type of memory or port map */
360 
361 static const char *
362 pci_maptype(uint64_t mapreg)
363 {
364 
365 	if (PCI_BAR_IO(mapreg))
366 		return ("I/O Port");
367 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
368 		return ("Prefetchable Memory");
369 	return ("Memory");
370 }
371 
372 /* return log2 of map size decoded for memory or port map */
373 
374 static int
375 pci_mapsize(uint64_t testval)
376 {
377 	int ln2size;
378 
379 	testval = pci_mapbase(testval);
380 	ln2size = 0;
381 	if (testval != 0) {
382 		while ((testval & 1) == 0)
383 		{
384 			ln2size++;
385 			testval >>= 1;
386 		}
387 	}
388 	return (ln2size);
389 }
390 
391 /* return log2 of address range supported by map register */
392 
393 static int
394 pci_maprange(uint64_t mapreg)
395 {
396 	int ln2range = 0;
397 
398 	if (PCI_BAR_IO(mapreg))
399 		ln2range = 32;
400 	else
401 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
402 		case PCIM_BAR_MEM_32:
403 			ln2range = 32;
404 			break;
405 		case PCIM_BAR_MEM_1MB:
406 			ln2range = 20;
407 			break;
408 		case PCIM_BAR_MEM_64:
409 			ln2range = 64;
410 			break;
411 		}
412 	return (ln2range);
413 }
414 
415 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
416 
417 static void
418 pci_fixancient(pcicfgregs *cfg)
419 {
420 	if (cfg->hdrtype != 0)
421 		return;
422 
423 	/* PCI to PCI bridges use header type 1 */
424 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
425 		cfg->hdrtype = 1;
426 }
427 
428 /* extract header type specific config data */
429 
430 static void
431 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
432 {
433 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
434 	switch (cfg->hdrtype) {
435 	case 0:
436 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
437 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
438 		cfg->nummaps	    = PCI_MAXMAPS_0;
439 		break;
440 	case 1:
441 		cfg->nummaps	    = PCI_MAXMAPS_1;
442 		break;
443 	case 2:
444 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
445 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
446 		cfg->nummaps	    = PCI_MAXMAPS_2;
447 		break;
448 	}
449 #undef REG
450 }
451 
452 /* read configuration header into pcicfgregs structure */
453 struct pci_devinfo *
454 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
455 {
456 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
457 	pcicfgregs *cfg = NULL;
458 	struct pci_devinfo *devlist_entry;
459 	struct devlist *devlist_head;
460 
461 	devlist_head = &pci_devq;
462 
463 	devlist_entry = NULL;
464 
465 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
466 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
467 		if (devlist_entry == NULL)
468 			return (NULL);
469 
470 		cfg = &devlist_entry->cfg;
471 
472 		cfg->domain		= d;
473 		cfg->bus		= b;
474 		cfg->slot		= s;
475 		cfg->func		= f;
476 		cfg->vendor		= REG(PCIR_VENDOR, 2);
477 		cfg->device		= REG(PCIR_DEVICE, 2);
478 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
479 		cfg->statreg		= REG(PCIR_STATUS, 2);
480 		cfg->baseclass		= REG(PCIR_CLASS, 1);
481 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
482 		cfg->progif		= REG(PCIR_PROGIF, 1);
483 		cfg->revid		= REG(PCIR_REVID, 1);
484 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
485 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
486 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
487 		cfg->intpin		= REG(PCIR_INTPIN, 1);
488 		cfg->intline		= REG(PCIR_INTLINE, 1);
489 
490 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
491 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
492 
493 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
494 		cfg->hdrtype		&= ~PCIM_MFDEV;
495 
496 		pci_fixancient(cfg);
497 		pci_hdrtypedata(pcib, b, s, f, cfg);
498 
499 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
500 			pci_read_extcap(pcib, cfg);
501 
502 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
503 
504 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
505 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
506 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
507 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
508 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
509 
510 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
511 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
512 		devlist_entry->conf.pc_vendor = cfg->vendor;
513 		devlist_entry->conf.pc_device = cfg->device;
514 
515 		devlist_entry->conf.pc_class = cfg->baseclass;
516 		devlist_entry->conf.pc_subclass = cfg->subclass;
517 		devlist_entry->conf.pc_progif = cfg->progif;
518 		devlist_entry->conf.pc_revid = cfg->revid;
519 
520 		pci_numdevs++;
521 		pci_generation++;
522 	}
523 	return (devlist_entry);
524 #undef REG
525 }
526 
527 static void
528 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
529 {
530 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
531 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
532 #if defined(__i386__) || defined(__amd64__)
533 	uint64_t addr;
534 #endif
535 	uint32_t val;
536 	int	ptr, nextptr, ptrptr;
537 
538 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
539 	case 0:
540 	case 1:
541 		ptrptr = PCIR_CAP_PTR;
542 		break;
543 	case 2:
544 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
545 		break;
546 	default:
547 		return;		/* no extended capabilities support */
548 	}
549 	nextptr = REG(ptrptr, 1);	/* sanity check? */
550 
551 	/*
552 	 * Read capability entries.
553 	 */
554 	while (nextptr != 0) {
555 		/* Sanity check */
556 		if (nextptr > 255) {
557 			printf("illegal PCI extended capability offset %d\n",
558 			    nextptr);
559 			return;
560 		}
561 		/* Find the next entry */
562 		ptr = nextptr;
563 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
564 
565 		/* Process this entry */
566 		switch (REG(ptr + PCICAP_ID, 1)) {
567 		case PCIY_PMG:		/* PCI power management */
568 			if (cfg->pp.pp_cap == 0) {
569 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
570 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
571 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
572 				if ((nextptr - ptr) > PCIR_POWER_DATA)
573 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
574 			}
575 			break;
576 #if defined(__i386__) || defined(__amd64__)
577 		case PCIY_HT:		/* HyperTransport */
578 			/* Determine HT-specific capability type. */
579 			val = REG(ptr + PCIR_HT_COMMAND, 2);
580 			switch (val & PCIM_HTCMD_CAP_MASK) {
581 			case PCIM_HTCAP_MSI_MAPPING:
582 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
583 					/* Sanity check the mapping window. */
584 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
585 					    4);
586 					addr <<= 32;
587 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
588 					    4);
589 					if (addr != MSI_INTEL_ADDR_BASE)
590 						device_printf(pcib,
591 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
592 						    cfg->domain, cfg->bus,
593 						    cfg->slot, cfg->func,
594 						    (long long)addr);
595 				} else
596 					addr = MSI_INTEL_ADDR_BASE;
597 
598 				cfg->ht.ht_msimap = ptr;
599 				cfg->ht.ht_msictrl = val;
600 				cfg->ht.ht_msiaddr = addr;
601 				break;
602 			}
603 			break;
604 #endif
605 		case PCIY_MSI:		/* PCI MSI */
606 			cfg->msi.msi_location = ptr;
607 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
608 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
609 						     PCIM_MSICTRL_MMC_MASK)>>1);
610 			break;
611 		case PCIY_MSIX:		/* PCI MSI-X */
612 			cfg->msix.msix_location = ptr;
613 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
614 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
615 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
616 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
617 			cfg->msix.msix_table_bar = PCIR_BAR(val &
618 			    PCIM_MSIX_BIR_MASK);
619 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
620 			val = REG(ptr + PCIR_MSIX_PBA, 4);
621 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
622 			    PCIM_MSIX_BIR_MASK);
623 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
624 			break;
625 		case PCIY_VPD:		/* PCI Vital Product Data */
626 			cfg->vpd.vpd_reg = ptr;
627 			break;
628 		case PCIY_SUBVENDOR:
629 			/* Should always be true. */
630 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
631 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
632 				cfg->subvendor = val & 0xffff;
633 				cfg->subdevice = val >> 16;
634 			}
635 			break;
636 		case PCIY_PCIX:		/* PCI-X */
637 			/*
638 			 * Assume we have a PCI-X chipset if we have
639 			 * at least one PCI-PCI bridge with a PCI-X
640 			 * capability.  Note that some systems with
641 			 * PCI-express or HT chipsets might match on
642 			 * this check as well.
643 			 */
644 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
645 				pcix_chipset = 1;
646 			break;
647 		case PCIY_EXPRESS:	/* PCI-express */
648 			/*
649 			 * Assume we have a PCI-express chipset if we have
650 			 * at least one PCI-express device.
651 			 */
652 			pcie_chipset = 1;
653 			break;
654 		default:
655 			break;
656 		}
657 	}
658 /* REG and WREG use carry through to next functions */
659 }
660 
661 /*
662  * PCI Vital Product Data
663  */
664 
665 #define	PCI_VPD_TIMEOUT		1000000
666 
667 static int
668 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
669 {
670 	int count = PCI_VPD_TIMEOUT;
671 
672 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
673 
674 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
675 
676 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
677 		if (--count < 0)
678 			return (ENXIO);
679 		DELAY(1);	/* limit looping */
680 	}
681 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
682 
683 	return (0);
684 }
685 
686 #if 0
687 static int
688 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
689 {
690 	int count = PCI_VPD_TIMEOUT;
691 
692 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
693 
694 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
695 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
696 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
697 		if (--count < 0)
698 			return (ENXIO);
699 		DELAY(1);	/* limit looping */
700 	}
701 
702 	return (0);
703 }
704 #endif
705 
706 #undef PCI_VPD_TIMEOUT
707 
708 struct vpd_readstate {
709 	device_t	pcib;
710 	pcicfgregs	*cfg;
711 	uint32_t	val;
712 	int		bytesinval;
713 	int		off;
714 	uint8_t		cksum;
715 };
716 
717 static int
718 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
719 {
720 	uint32_t reg;
721 	uint8_t byte;
722 
723 	if (vrs->bytesinval == 0) {
724 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
725 			return (ENXIO);
726 		vrs->val = le32toh(reg);
727 		vrs->off += 4;
728 		byte = vrs->val & 0xff;
729 		vrs->bytesinval = 3;
730 	} else {
731 		vrs->val = vrs->val >> 8;
732 		byte = vrs->val & 0xff;
733 		vrs->bytesinval--;
734 	}
735 
736 	vrs->cksum += byte;
737 	*data = byte;
738 	return (0);
739 }
740 
741 static void
742 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
743 {
744 	struct vpd_readstate vrs;
745 	int state;
746 	int name;
747 	int remain;
748 	int i;
749 	int alloc, off;		/* alloc/off for RO/W arrays */
750 	int cksumvalid;
751 	int dflen;
752 	uint8_t byte;
753 	uint8_t byte2;
754 
755 	/* init vpd reader */
756 	vrs.bytesinval = 0;
757 	vrs.off = 0;
758 	vrs.pcib = pcib;
759 	vrs.cfg = cfg;
760 	vrs.cksum = 0;
761 
762 	state = 0;
763 	name = remain = i = 0;	/* shut up stupid gcc */
764 	alloc = off = 0;	/* shut up stupid gcc */
765 	dflen = 0;		/* shut up stupid gcc */
766 	cksumvalid = -1;
767 	while (state >= 0) {
768 		if (vpd_nextbyte(&vrs, &byte)) {
769 			state = -2;
770 			break;
771 		}
772 #if 0
773 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
774 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
775 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
776 #endif
777 		switch (state) {
778 		case 0:		/* item name */
779 			if (byte & 0x80) {
780 				if (vpd_nextbyte(&vrs, &byte2)) {
781 					state = -2;
782 					break;
783 				}
784 				remain = byte2;
785 				if (vpd_nextbyte(&vrs, &byte2)) {
786 					state = -2;
787 					break;
788 				}
789 				remain |= byte2 << 8;
790 				if (remain > (0x7f*4 - vrs.off)) {
791 					state = -1;
792 					printf(
793 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
794 					    cfg->domain, cfg->bus, cfg->slot,
795 					    cfg->func, remain);
796 				}
797 				name = byte & 0x7f;
798 			} else {
799 				remain = byte & 0x7;
800 				name = (byte >> 3) & 0xf;
801 			}
802 			switch (name) {
803 			case 0x2:	/* String */
804 				cfg->vpd.vpd_ident = malloc(remain + 1,
805 				    M_DEVBUF, M_WAITOK);
806 				i = 0;
807 				state = 1;
808 				break;
809 			case 0xf:	/* End */
810 				state = -1;
811 				break;
812 			case 0x10:	/* VPD-R */
813 				alloc = 8;
814 				off = 0;
815 				cfg->vpd.vpd_ros = malloc(alloc *
816 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
817 				    M_WAITOK | M_ZERO);
818 				state = 2;
819 				break;
820 			case 0x11:	/* VPD-W */
821 				alloc = 8;
822 				off = 0;
823 				cfg->vpd.vpd_w = malloc(alloc *
824 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
825 				    M_WAITOK | M_ZERO);
826 				state = 5;
827 				break;
828 			default:	/* Invalid data, abort */
829 				state = -1;
830 				break;
831 			}
832 			break;
833 
834 		case 1:	/* Identifier String */
835 			cfg->vpd.vpd_ident[i++] = byte;
836 			remain--;
837 			if (remain == 0)  {
838 				cfg->vpd.vpd_ident[i] = '\0';
839 				state = 0;
840 			}
841 			break;
842 
843 		case 2:	/* VPD-R Keyword Header */
844 			if (off == alloc) {
845 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
846 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
847 				    M_DEVBUF, M_WAITOK | M_ZERO);
848 			}
849 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
850 			if (vpd_nextbyte(&vrs, &byte2)) {
851 				state = -2;
852 				break;
853 			}
854 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
855 			if (vpd_nextbyte(&vrs, &byte2)) {
856 				state = -2;
857 				break;
858 			}
859 			dflen = byte2;
860 			if (dflen == 0 &&
861 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
862 			    2) == 0) {
863 				/*
864 				 * if this happens, we can't trust the rest
865 				 * of the VPD.
866 				 */
867 				printf(
868 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
869 				    cfg->domain, cfg->bus, cfg->slot,
870 				    cfg->func, dflen);
871 				cksumvalid = 0;
872 				state = -1;
873 				break;
874 			} else if (dflen == 0) {
875 				cfg->vpd.vpd_ros[off].value = malloc(1 *
876 				    sizeof(*cfg->vpd.vpd_ros[off].value),
877 				    M_DEVBUF, M_WAITOK);
878 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
879 			} else
880 				cfg->vpd.vpd_ros[off].value = malloc(
881 				    (dflen + 1) *
882 				    sizeof(*cfg->vpd.vpd_ros[off].value),
883 				    M_DEVBUF, M_WAITOK);
884 			remain -= 3;
885 			i = 0;
886 			/* keep in sync w/ state 3's transistions */
887 			if (dflen == 0 && remain == 0)
888 				state = 0;
889 			else if (dflen == 0)
890 				state = 2;
891 			else
892 				state = 3;
893 			break;
894 
895 		case 3:	/* VPD-R Keyword Value */
896 			cfg->vpd.vpd_ros[off].value[i++] = byte;
897 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
898 			    "RV", 2) == 0 && cksumvalid == -1) {
899 				if (vrs.cksum == 0)
900 					cksumvalid = 1;
901 				else {
902 					if (bootverbose)
903 						printf(
904 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
905 						    cfg->domain, cfg->bus,
906 						    cfg->slot, cfg->func,
907 						    vrs.cksum);
908 					cksumvalid = 0;
909 					state = -1;
910 					break;
911 				}
912 			}
913 			dflen--;
914 			remain--;
915 			/* keep in sync w/ state 2's transistions */
916 			if (dflen == 0)
917 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
918 			if (dflen == 0 && remain == 0) {
919 				cfg->vpd.vpd_rocnt = off;
920 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
921 				    off * sizeof(*cfg->vpd.vpd_ros),
922 				    M_DEVBUF, M_WAITOK | M_ZERO);
923 				state = 0;
924 			} else if (dflen == 0)
925 				state = 2;
926 			break;
927 
928 		case 4:
929 			remain--;
930 			if (remain == 0)
931 				state = 0;
932 			break;
933 
934 		case 5:	/* VPD-W Keyword Header */
935 			if (off == alloc) {
936 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
937 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
938 				    M_DEVBUF, M_WAITOK | M_ZERO);
939 			}
940 			cfg->vpd.vpd_w[off].keyword[0] = byte;
941 			if (vpd_nextbyte(&vrs, &byte2)) {
942 				state = -2;
943 				break;
944 			}
945 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
946 			if (vpd_nextbyte(&vrs, &byte2)) {
947 				state = -2;
948 				break;
949 			}
950 			cfg->vpd.vpd_w[off].len = dflen = byte2;
951 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
952 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
953 			    sizeof(*cfg->vpd.vpd_w[off].value),
954 			    M_DEVBUF, M_WAITOK);
955 			remain -= 3;
956 			i = 0;
957 			/* keep in sync w/ state 6's transistions */
958 			if (dflen == 0 && remain == 0)
959 				state = 0;
960 			else if (dflen == 0)
961 				state = 5;
962 			else
963 				state = 6;
964 			break;
965 
966 		case 6:	/* VPD-W Keyword Value */
967 			cfg->vpd.vpd_w[off].value[i++] = byte;
968 			dflen--;
969 			remain--;
970 			/* keep in sync w/ state 5's transistions */
971 			if (dflen == 0)
972 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
973 			if (dflen == 0 && remain == 0) {
974 				cfg->vpd.vpd_wcnt = off;
975 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
976 				    off * sizeof(*cfg->vpd.vpd_w),
977 				    M_DEVBUF, M_WAITOK | M_ZERO);
978 				state = 0;
979 			} else if (dflen == 0)
980 				state = 5;
981 			break;
982 
983 		default:
984 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
985 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
986 			    state);
987 			state = -1;
988 			break;
989 		}
990 	}
991 
992 	if (cksumvalid == 0 || state < -1) {
993 		/* read-only data bad, clean up */
994 		if (cfg->vpd.vpd_ros != NULL) {
995 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
996 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
997 			free(cfg->vpd.vpd_ros, M_DEVBUF);
998 			cfg->vpd.vpd_ros = NULL;
999 		}
1000 	}
1001 	if (state < -1) {
1002 		/* I/O error, clean up */
1003 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1004 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1005 		if (cfg->vpd.vpd_ident != NULL) {
1006 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1007 			cfg->vpd.vpd_ident = NULL;
1008 		}
1009 		if (cfg->vpd.vpd_w != NULL) {
1010 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1011 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1012 			free(cfg->vpd.vpd_w, M_DEVBUF);
1013 			cfg->vpd.vpd_w = NULL;
1014 		}
1015 	}
1016 	cfg->vpd.vpd_cached = 1;
1017 #undef REG
1018 #undef WREG
1019 }
1020 
1021 int
1022 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1023 {
1024 	struct pci_devinfo *dinfo = device_get_ivars(child);
1025 	pcicfgregs *cfg = &dinfo->cfg;
1026 
1027 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1028 		pci_read_vpd(device_get_parent(dev), cfg);
1029 
1030 	*identptr = cfg->vpd.vpd_ident;
1031 
1032 	if (*identptr == NULL)
1033 		return (ENXIO);
1034 
1035 	return (0);
1036 }
1037 
1038 int
1039 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1040 	const char **vptr)
1041 {
1042 	struct pci_devinfo *dinfo = device_get_ivars(child);
1043 	pcicfgregs *cfg = &dinfo->cfg;
1044 	int i;
1045 
1046 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1047 		pci_read_vpd(device_get_parent(dev), cfg);
1048 
1049 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1050 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1051 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1052 			*vptr = cfg->vpd.vpd_ros[i].value;
1053 		}
1054 
1055 	if (i != cfg->vpd.vpd_rocnt)
1056 		return (0);
1057 
1058 	*vptr = NULL;
1059 	return (ENXIO);
1060 }
1061 
1062 /*
1063  * Find the requested extended capability and return the offset in
1064  * configuration space via the pointer provided. The function returns
1065  * 0 on success and error code otherwise.
1066  */
1067 int
1068 pci_find_extcap_method(device_t dev, device_t child, int capability,
1069     int *capreg)
1070 {
1071 	struct pci_devinfo *dinfo = device_get_ivars(child);
1072 	pcicfgregs *cfg = &dinfo->cfg;
1073 	u_int32_t status;
1074 	u_int8_t ptr;
1075 
1076 	/*
1077 	 * Check the CAP_LIST bit of the PCI status register first.
1078 	 */
1079 	status = pci_read_config(child, PCIR_STATUS, 2);
1080 	if (!(status & PCIM_STATUS_CAPPRESENT))
1081 		return (ENXIO);
1082 
1083 	/*
1084 	 * Determine the start pointer of the capabilities list.
1085 	 */
1086 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1087 	case 0:
1088 	case 1:
1089 		ptr = PCIR_CAP_PTR;
1090 		break;
1091 	case 2:
1092 		ptr = PCIR_CAP_PTR_2;
1093 		break;
1094 	default:
1095 		/* XXX: panic? */
1096 		return (ENXIO);		/* no extended capabilities support */
1097 	}
1098 	ptr = pci_read_config(child, ptr, 1);
1099 
1100 	/*
1101 	 * Traverse the capabilities list.
1102 	 */
1103 	while (ptr != 0) {
1104 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1105 			if (capreg != NULL)
1106 				*capreg = ptr;
1107 			return (0);
1108 		}
1109 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1110 	}
1111 
1112 	return (ENOENT);
1113 }
1114 
1115 /*
1116  * Support for MSI-X message interrupts.
1117  */
1118 void
1119 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1120 {
1121 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1122 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1123 	uint32_t offset;
1124 
1125 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1126 	offset = msix->msix_table_offset + index * 16;
1127 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1128 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1129 	bus_write_4(msix->msix_table_res, offset + 8, data);
1130 
1131 	/* Enable MSI -> HT mapping. */
1132 	pci_ht_map_msi(dev, address);
1133 }
1134 
1135 void
1136 pci_mask_msix(device_t dev, u_int index)
1137 {
1138 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1139 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1140 	uint32_t offset, val;
1141 
1142 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1143 	offset = msix->msix_table_offset + index * 16 + 12;
1144 	val = bus_read_4(msix->msix_table_res, offset);
1145 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1146 		val |= PCIM_MSIX_VCTRL_MASK;
1147 		bus_write_4(msix->msix_table_res, offset, val);
1148 	}
1149 }
1150 
1151 void
1152 pci_unmask_msix(device_t dev, u_int index)
1153 {
1154 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1155 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1156 	uint32_t offset, val;
1157 
1158 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1159 	offset = msix->msix_table_offset + index * 16 + 12;
1160 	val = bus_read_4(msix->msix_table_res, offset);
1161 	if (val & PCIM_MSIX_VCTRL_MASK) {
1162 		val &= ~PCIM_MSIX_VCTRL_MASK;
1163 		bus_write_4(msix->msix_table_res, offset, val);
1164 	}
1165 }
1166 
1167 int
1168 pci_pending_msix(device_t dev, u_int index)
1169 {
1170 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1171 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1172 	uint32_t offset, bit;
1173 
1174 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1175 	offset = msix->msix_pba_offset + (index / 32) * 4;
1176 	bit = 1 << index % 32;
1177 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1178 }
1179 
1180 /*
1181  * Restore MSI-X registers and table during resume.  If MSI-X is
1182  * enabled then walk the virtual table to restore the actual MSI-X
1183  * table.
1184  */
1185 static void
1186 pci_resume_msix(device_t dev)
1187 {
1188 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1189 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1190 	struct msix_table_entry *mte;
1191 	struct msix_vector *mv;
1192 	int i;
1193 
1194 	if (msix->msix_alloc > 0) {
1195 		/* First, mask all vectors. */
1196 		for (i = 0; i < msix->msix_msgnum; i++)
1197 			pci_mask_msix(dev, i);
1198 
1199 		/* Second, program any messages with at least one handler. */
1200 		for (i = 0; i < msix->msix_table_len; i++) {
1201 			mte = &msix->msix_table[i];
1202 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1203 				continue;
1204 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1205 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1206 			pci_unmask_msix(dev, i);
1207 		}
1208 	}
1209 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1210 	    msix->msix_ctrl, 2);
1211 }
1212 
1213 /*
1214  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1215  * returned in *count.  After this function returns, each message will be
1216  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1217  */
1218 int
1219 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1220 {
1221 	struct pci_devinfo *dinfo = device_get_ivars(child);
1222 	pcicfgregs *cfg = &dinfo->cfg;
1223 	struct resource_list_entry *rle;
1224 	int actual, error, i, irq, max;
1225 
1226 	/* Don't let count == 0 get us into trouble. */
1227 	if (*count == 0)
1228 		return (EINVAL);
1229 
1230 	/* If rid 0 is allocated, then fail. */
1231 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1232 	if (rle != NULL && rle->res != NULL)
1233 		return (ENXIO);
1234 
1235 	/* Already have allocated messages? */
1236 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1237 		return (ENXIO);
1238 
1239 	/* If MSI is blacklisted for this system, fail. */
1240 	if (pci_msi_blacklisted())
1241 		return (ENXIO);
1242 
1243 	/* MSI-X capability present? */
1244 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1245 		return (ENODEV);
1246 
1247 	/* Make sure the appropriate BARs are mapped. */
1248 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1249 	    cfg->msix.msix_table_bar);
1250 	if (rle == NULL || rle->res == NULL ||
1251 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1252 		return (ENXIO);
1253 	cfg->msix.msix_table_res = rle->res;
1254 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1255 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1256 		    cfg->msix.msix_pba_bar);
1257 		if (rle == NULL || rle->res == NULL ||
1258 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1259 			return (ENXIO);
1260 	}
1261 	cfg->msix.msix_pba_res = rle->res;
1262 
1263 	if (bootverbose)
1264 		device_printf(child,
1265 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1266 		    *count, cfg->msix.msix_msgnum);
1267 	max = min(*count, cfg->msix.msix_msgnum);
1268 	for (i = 0; i < max; i++) {
1269 		/* Allocate a message. */
1270 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1271 		if (error)
1272 			break;
1273 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1274 		    irq, 1);
1275 	}
1276 	actual = i;
1277 
1278 	if (bootverbose) {
1279 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1280 		if (actual == 1)
1281 			device_printf(child, "using IRQ %lu for MSI-X\n",
1282 			    rle->start);
1283 		else {
1284 			int run;
1285 
1286 			/*
1287 			 * Be fancy and try to print contiguous runs of
1288 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1289 			 * 'run' is true if we are in a range.
1290 			 */
1291 			device_printf(child, "using IRQs %lu", rle->start);
1292 			irq = rle->start;
1293 			run = 0;
1294 			for (i = 1; i < actual; i++) {
1295 				rle = resource_list_find(&dinfo->resources,
1296 				    SYS_RES_IRQ, i + 1);
1297 
1298 				/* Still in a run? */
1299 				if (rle->start == irq + 1) {
1300 					run = 1;
1301 					irq++;
1302 					continue;
1303 				}
1304 
1305 				/* Finish previous range. */
1306 				if (run) {
1307 					printf("-%d", irq);
1308 					run = 0;
1309 				}
1310 
1311 				/* Start new range. */
1312 				printf(",%lu", rle->start);
1313 				irq = rle->start;
1314 			}
1315 
1316 			/* Unfinished range? */
1317 			if (run)
1318 				printf("-%d", irq);
1319 			printf(" for MSI-X\n");
1320 		}
1321 	}
1322 
1323 	/* Mask all vectors. */
1324 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1325 		pci_mask_msix(child, i);
1326 
1327 	/* Allocate and initialize vector data and virtual table. */
1328 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1329 	    M_DEVBUF, M_WAITOK | M_ZERO);
1330 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1331 	    M_DEVBUF, M_WAITOK | M_ZERO);
1332 	for (i = 0; i < actual; i++) {
1333 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1334 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1335 		cfg->msix.msix_table[i].mte_vector = i + 1;
1336 	}
1337 
1338 	/* Update control register to enable MSI-X. */
1339 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1340 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1341 	    cfg->msix.msix_ctrl, 2);
1342 
1343 	/* Update counts of alloc'd messages. */
1344 	cfg->msix.msix_alloc = actual;
1345 	cfg->msix.msix_table_len = actual;
1346 	*count = actual;
1347 	return (0);
1348 }
1349 
1350 /*
1351  * By default, pci_alloc_msix() will assign the allocated IRQ
1352  * resources consecutively to the first N messages in the MSI-X table.
1353  * However, device drivers may want to use different layouts if they
1354  * either receive fewer messages than they asked for, or they wish to
1355  * populate the MSI-X table sparsely.  This method allows the driver
1356  * to specify what layout it wants.  It must be called after a
1357  * successful pci_alloc_msix() but before any of the associated
1358  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1359  *
1360  * The 'vectors' array contains 'count' message vectors.  The array
1361  * maps directly to the MSI-X table in that index 0 in the array
1362  * specifies the vector for the first message in the MSI-X table, etc.
1363  * The vector value in each array index can either be 0 to indicate
1364  * that no vector should be assigned to a message slot, or it can be a
1365  * number from 1 to N (where N is the count returned from a
1366  * succcessful call to pci_alloc_msix()) to indicate which message
1367  * vector (IRQ) to be used for the corresponding message.
1368  *
1369  * On successful return, each message with a non-zero vector will have
1370  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1371  * 1.  Additionally, if any of the IRQs allocated via the previous
1372  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1373  * will be freed back to the system automatically.
1374  *
1375  * For example, suppose a driver has a MSI-X table with 6 messages and
1376  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1377  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1378  * C.  After the call to pci_alloc_msix(), the device will be setup to
1379  * have an MSI-X table of ABC--- (where - means no vector assigned).
1380  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1381  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1382  * be freed back to the system.  This device will also have valid
1383  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1384  *
1385  * In any case, the SYS_RES_IRQ rid X will always map to the message
1386  * at MSI-X table index X - 1 and will only be valid if a vector is
1387  * assigned to that table entry.
1388  */
1389 int
1390 pci_remap_msix_method(device_t dev, device_t child, int count,
1391     const u_int *vectors)
1392 {
1393 	struct pci_devinfo *dinfo = device_get_ivars(child);
1394 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1395 	struct resource_list_entry *rle;
1396 	int i, irq, j, *used;
1397 
1398 	/*
1399 	 * Have to have at least one message in the table but the
1400 	 * table can't be bigger than the actual MSI-X table in the
1401 	 * device.
1402 	 */
1403 	if (count == 0 || count > msix->msix_msgnum)
1404 		return (EINVAL);
1405 
1406 	/* Sanity check the vectors. */
1407 	for (i = 0; i < count; i++)
1408 		if (vectors[i] > msix->msix_alloc)
1409 			return (EINVAL);
1410 
1411 	/*
1412 	 * Make sure there aren't any holes in the vectors to be used.
1413 	 * It's a big pain to support it, and it doesn't really make
1414 	 * sense anyway.  Also, at least one vector must be used.
1415 	 */
1416 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1417 	    M_ZERO);
1418 	for (i = 0; i < count; i++)
1419 		if (vectors[i] != 0)
1420 			used[vectors[i] - 1] = 1;
1421 	for (i = 0; i < msix->msix_alloc - 1; i++)
1422 		if (used[i] == 0 && used[i + 1] == 1) {
1423 			free(used, M_DEVBUF);
1424 			return (EINVAL);
1425 		}
1426 	if (used[0] != 1) {
1427 		free(used, M_DEVBUF);
1428 		return (EINVAL);
1429 	}
1430 
1431 	/* Make sure none of the resources are allocated. */
1432 	for (i = 0; i < msix->msix_table_len; i++) {
1433 		if (msix->msix_table[i].mte_vector == 0)
1434 			continue;
1435 		if (msix->msix_table[i].mte_handlers > 0)
1436 			return (EBUSY);
1437 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1438 		KASSERT(rle != NULL, ("missing resource"));
1439 		if (rle->res != NULL)
1440 			return (EBUSY);
1441 	}
1442 
1443 	/* Free the existing resource list entries. */
1444 	for (i = 0; i < msix->msix_table_len; i++) {
1445 		if (msix->msix_table[i].mte_vector == 0)
1446 			continue;
1447 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1448 	}
1449 
1450 	/*
1451 	 * Build the new virtual table keeping track of which vectors are
1452 	 * used.
1453 	 */
1454 	free(msix->msix_table, M_DEVBUF);
1455 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1456 	    M_DEVBUF, M_WAITOK | M_ZERO);
1457 	for (i = 0; i < count; i++)
1458 		msix->msix_table[i].mte_vector = vectors[i];
1459 	msix->msix_table_len = count;
1460 
1461 	/* Free any unused IRQs and resize the vectors array if necessary. */
1462 	j = msix->msix_alloc - 1;
1463 	if (used[j] == 0) {
1464 		struct msix_vector *vec;
1465 
1466 		while (used[j] == 0) {
1467 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1468 			    msix->msix_vectors[j].mv_irq);
1469 			j--;
1470 		}
1471 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1472 		    M_WAITOK);
1473 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1474 		    (j + 1));
1475 		free(msix->msix_vectors, M_DEVBUF);
1476 		msix->msix_vectors = vec;
1477 		msix->msix_alloc = j + 1;
1478 	}
1479 	free(used, M_DEVBUF);
1480 
1481 	/* Map the IRQs onto the rids. */
1482 	for (i = 0; i < count; i++) {
1483 		if (vectors[i] == 0)
1484 			continue;
1485 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1486 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1487 		    irq, 1);
1488 	}
1489 
1490 	if (bootverbose) {
1491 		device_printf(child, "Remapped MSI-X IRQs as: ");
1492 		for (i = 0; i < count; i++) {
1493 			if (i != 0)
1494 				printf(", ");
1495 			if (vectors[i] == 0)
1496 				printf("---");
1497 			else
1498 				printf("%d",
1499 				    msix->msix_vectors[vectors[i]].mv_irq);
1500 		}
1501 		printf("\n");
1502 	}
1503 
1504 	return (0);
1505 }
1506 
1507 static int
1508 pci_release_msix(device_t dev, device_t child)
1509 {
1510 	struct pci_devinfo *dinfo = device_get_ivars(child);
1511 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1512 	struct resource_list_entry *rle;
1513 	int i;
1514 
1515 	/* Do we have any messages to release? */
1516 	if (msix->msix_alloc == 0)
1517 		return (ENODEV);
1518 
1519 	/* Make sure none of the resources are allocated. */
1520 	for (i = 0; i < msix->msix_table_len; i++) {
1521 		if (msix->msix_table[i].mte_vector == 0)
1522 			continue;
1523 		if (msix->msix_table[i].mte_handlers > 0)
1524 			return (EBUSY);
1525 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526 		KASSERT(rle != NULL, ("missing resource"));
1527 		if (rle->res != NULL)
1528 			return (EBUSY);
1529 	}
1530 
1531 	/* Update control register to disable MSI-X. */
1532 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1533 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1534 	    msix->msix_ctrl, 2);
1535 
1536 	/* Free the resource list entries. */
1537 	for (i = 0; i < msix->msix_table_len; i++) {
1538 		if (msix->msix_table[i].mte_vector == 0)
1539 			continue;
1540 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1541 	}
1542 	free(msix->msix_table, M_DEVBUF);
1543 	msix->msix_table_len = 0;
1544 
1545 	/* Release the IRQs. */
1546 	for (i = 0; i < msix->msix_alloc; i++)
1547 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1548 		    msix->msix_vectors[i].mv_irq);
1549 	free(msix->msix_vectors, M_DEVBUF);
1550 	msix->msix_alloc = 0;
1551 	return (0);
1552 }
1553 
1554 /*
1555  * Return the max supported MSI-X messages this device supports.
1556  * Basically, assuming the MD code can alloc messages, this function
1557  * should return the maximum value that pci_alloc_msix() can return.
1558  * Thus, it is subject to the tunables, etc.
1559  */
1560 int
1561 pci_msix_count_method(device_t dev, device_t child)
1562 {
1563 	struct pci_devinfo *dinfo = device_get_ivars(child);
1564 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1565 
1566 	if (pci_do_msix && msix->msix_location != 0)
1567 		return (msix->msix_msgnum);
1568 	return (0);
1569 }
1570 
1571 /*
1572  * HyperTransport MSI mapping control
1573  */
1574 void
1575 pci_ht_map_msi(device_t dev, uint64_t addr)
1576 {
1577 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1578 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1579 
1580 	if (!ht->ht_msimap)
1581 		return;
1582 
1583 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1584 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1585 		/* Enable MSI -> HT mapping. */
1586 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1587 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1588 		    ht->ht_msictrl, 2);
1589 	}
1590 
1591 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1592 		/* Disable MSI -> HT mapping. */
1593 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1594 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1595 		    ht->ht_msictrl, 2);
1596 	}
1597 }
1598 
1599 /*
1600  * Support for MSI message signalled interrupts.
1601  */
1602 void
1603 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1604 {
1605 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1606 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1607 
1608 	/* Write data and address values. */
1609 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1610 	    address & 0xffffffff, 4);
1611 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1612 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1613 		    address >> 32, 4);
1614 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1615 		    data, 2);
1616 	} else
1617 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1618 		    2);
1619 
1620 	/* Enable MSI in the control register. */
1621 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1622 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1623 	    2);
1624 
1625 	/* Enable MSI -> HT mapping. */
1626 	pci_ht_map_msi(dev, address);
1627 }
1628 
1629 void
1630 pci_disable_msi(device_t dev)
1631 {
1632 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1633 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1634 
1635 	/* Disable MSI -> HT mapping. */
1636 	pci_ht_map_msi(dev, 0);
1637 
1638 	/* Disable MSI in the control register. */
1639 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1640 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1641 	    2);
1642 }
1643 
1644 /*
1645  * Restore MSI registers during resume.  If MSI is enabled then
1646  * restore the data and address registers in addition to the control
1647  * register.
1648  */
1649 static void
1650 pci_resume_msi(device_t dev)
1651 {
1652 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1653 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1654 	uint64_t address;
1655 	uint16_t data;
1656 
1657 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1658 		address = msi->msi_addr;
1659 		data = msi->msi_data;
1660 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1661 		    address & 0xffffffff, 4);
1662 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1663 			pci_write_config(dev, msi->msi_location +
1664 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1665 			pci_write_config(dev, msi->msi_location +
1666 			    PCIR_MSI_DATA_64BIT, data, 2);
1667 		} else
1668 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1669 			    data, 2);
1670 	}
1671 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1672 	    2);
1673 }
1674 
1675 int
1676 pci_remap_msi_irq(device_t dev, u_int irq)
1677 {
1678 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1679 	pcicfgregs *cfg = &dinfo->cfg;
1680 	struct resource_list_entry *rle;
1681 	struct msix_table_entry *mte;
1682 	struct msix_vector *mv;
1683 	device_t bus;
1684 	uint64_t addr;
1685 	uint32_t data;
1686 	int error, i, j;
1687 
1688 	bus = device_get_parent(dev);
1689 
1690 	/*
1691 	 * Handle MSI first.  We try to find this IRQ among our list
1692 	 * of MSI IRQs.  If we find it, we request updated address and
1693 	 * data registers and apply the results.
1694 	 */
1695 	if (cfg->msi.msi_alloc > 0) {
1696 
1697 		/* If we don't have any active handlers, nothing to do. */
1698 		if (cfg->msi.msi_handlers == 0)
1699 			return (0);
1700 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1701 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1702 			    i + 1);
1703 			if (rle->start == irq) {
1704 				error = PCIB_MAP_MSI(device_get_parent(bus),
1705 				    dev, irq, &addr, &data);
1706 				if (error)
1707 					return (error);
1708 				pci_disable_msi(dev);
1709 				dinfo->cfg.msi.msi_addr = addr;
1710 				dinfo->cfg.msi.msi_data = data;
1711 				pci_enable_msi(dev, addr, data);
1712 				return (0);
1713 			}
1714 		}
1715 		return (ENOENT);
1716 	}
1717 
1718 	/*
1719 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1720 	 * we request the updated mapping info.  If that works, we go
1721 	 * through all the slots that use this IRQ and update them.
1722 	 */
1723 	if (cfg->msix.msix_alloc > 0) {
1724 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1725 			mv = &cfg->msix.msix_vectors[i];
1726 			if (mv->mv_irq == irq) {
1727 				error = PCIB_MAP_MSI(device_get_parent(bus),
1728 				    dev, irq, &addr, &data);
1729 				if (error)
1730 					return (error);
1731 				mv->mv_address = addr;
1732 				mv->mv_data = data;
1733 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1734 					mte = &cfg->msix.msix_table[j];
1735 					if (mte->mte_vector != i + 1)
1736 						continue;
1737 					if (mte->mte_handlers == 0)
1738 						continue;
1739 					pci_mask_msix(dev, j);
1740 					pci_enable_msix(dev, j, addr, data);
1741 					pci_unmask_msix(dev, j);
1742 				}
1743 			}
1744 		}
1745 		return (ENOENT);
1746 	}
1747 
1748 	return (ENOENT);
1749 }
1750 
1751 /*
1752  * Returns true if the specified device is blacklisted because MSI
1753  * doesn't work.
1754  */
1755 int
1756 pci_msi_device_blacklisted(device_t dev)
1757 {
1758 	struct pci_quirk *q;
1759 
1760 	if (!pci_honor_msi_blacklist)
1761 		return (0);
1762 
1763 	for (q = &pci_quirks[0]; q->devid; q++) {
1764 		if (q->devid == pci_get_devid(dev) &&
1765 		    q->type == PCI_QUIRK_DISABLE_MSI)
1766 			return (1);
1767 	}
1768 	return (0);
1769 }
1770 
1771 /*
1772  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1773  * we just check for blacklisted chipsets as represented by the
1774  * host-PCI bridge at device 0:0:0.  In the future, it may become
1775  * necessary to check other system attributes, such as the kenv values
1776  * that give the motherboard manufacturer and model number.
1777  */
1778 static int
1779 pci_msi_blacklisted(void)
1780 {
1781 	device_t dev;
1782 
1783 	if (!pci_honor_msi_blacklist)
1784 		return (0);
1785 
1786 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1787 	if (!(pcie_chipset || pcix_chipset))
1788 		return (1);
1789 
1790 	dev = pci_find_bsf(0, 0, 0);
1791 	if (dev != NULL)
1792 		return (pci_msi_device_blacklisted(dev));
1793 	return (0);
1794 }
1795 
1796 /*
1797  * Attempt to allocate *count MSI messages.  The actual number allocated is
1798  * returned in *count.  After this function returns, each message will be
1799  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1800  */
1801 int
1802 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1803 {
1804 	struct pci_devinfo *dinfo = device_get_ivars(child);
1805 	pcicfgregs *cfg = &dinfo->cfg;
1806 	struct resource_list_entry *rle;
1807 	int actual, error, i, irqs[32];
1808 	uint16_t ctrl;
1809 
1810 	/* Don't let count == 0 get us into trouble. */
1811 	if (*count == 0)
1812 		return (EINVAL);
1813 
1814 	/* If rid 0 is allocated, then fail. */
1815 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1816 	if (rle != NULL && rle->res != NULL)
1817 		return (ENXIO);
1818 
1819 	/* Already have allocated messages? */
1820 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1821 		return (ENXIO);
1822 
1823 	/* If MSI is blacklisted for this system, fail. */
1824 	if (pci_msi_blacklisted())
1825 		return (ENXIO);
1826 
1827 	/* MSI capability present? */
1828 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1829 		return (ENODEV);
1830 
1831 	if (bootverbose)
1832 		device_printf(child,
1833 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1834 		    *count, cfg->msi.msi_msgnum);
1835 
1836 	/* Don't ask for more than the device supports. */
1837 	actual = min(*count, cfg->msi.msi_msgnum);
1838 
1839 	/* Don't ask for more than 32 messages. */
1840 	actual = min(actual, 32);
1841 
1842 	/* MSI requires power of 2 number of messages. */
1843 	if (!powerof2(actual))
1844 		return (EINVAL);
1845 
1846 	for (;;) {
1847 		/* Try to allocate N messages. */
1848 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1849 		    cfg->msi.msi_msgnum, irqs);
1850 		if (error == 0)
1851 			break;
1852 		if (actual == 1)
1853 			return (error);
1854 
1855 		/* Try N / 2. */
1856 		actual >>= 1;
1857 	}
1858 
1859 	/*
1860 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1861 	 * resources in the irqs[] array, so add new resources
1862 	 * starting at rid 1.
1863 	 */
1864 	for (i = 0; i < actual; i++)
1865 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1866 		    irqs[i], irqs[i], 1);
1867 
1868 	if (bootverbose) {
1869 		if (actual == 1)
1870 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1871 		else {
1872 			int run;
1873 
1874 			/*
1875 			 * Be fancy and try to print contiguous runs
1876 			 * of IRQ values as ranges.  'run' is true if
1877 			 * we are in a range.
1878 			 */
1879 			device_printf(child, "using IRQs %d", irqs[0]);
1880 			run = 0;
1881 			for (i = 1; i < actual; i++) {
1882 
1883 				/* Still in a run? */
1884 				if (irqs[i] == irqs[i - 1] + 1) {
1885 					run = 1;
1886 					continue;
1887 				}
1888 
1889 				/* Finish previous range. */
1890 				if (run) {
1891 					printf("-%d", irqs[i - 1]);
1892 					run = 0;
1893 				}
1894 
1895 				/* Start new range. */
1896 				printf(",%d", irqs[i]);
1897 			}
1898 
1899 			/* Unfinished range? */
1900 			if (run)
1901 				printf("-%d", irqs[actual - 1]);
1902 			printf(" for MSI\n");
1903 		}
1904 	}
1905 
1906 	/* Update control register with actual count. */
1907 	ctrl = cfg->msi.msi_ctrl;
1908 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1909 	ctrl |= (ffs(actual) - 1) << 4;
1910 	cfg->msi.msi_ctrl = ctrl;
1911 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1912 
1913 	/* Update counts of alloc'd messages. */
1914 	cfg->msi.msi_alloc = actual;
1915 	cfg->msi.msi_handlers = 0;
1916 	*count = actual;
1917 	return (0);
1918 }
1919 
1920 /* Release the MSI messages associated with this device. */
1921 int
1922 pci_release_msi_method(device_t dev, device_t child)
1923 {
1924 	struct pci_devinfo *dinfo = device_get_ivars(child);
1925 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1926 	struct resource_list_entry *rle;
1927 	int error, i, irqs[32];
1928 
1929 	/* Try MSI-X first. */
1930 	error = pci_release_msix(dev, child);
1931 	if (error != ENODEV)
1932 		return (error);
1933 
1934 	/* Do we have any messages to release? */
1935 	if (msi->msi_alloc == 0)
1936 		return (ENODEV);
1937 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1938 
1939 	/* Make sure none of the resources are allocated. */
1940 	if (msi->msi_handlers > 0)
1941 		return (EBUSY);
1942 	for (i = 0; i < msi->msi_alloc; i++) {
1943 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1944 		KASSERT(rle != NULL, ("missing MSI resource"));
1945 		if (rle->res != NULL)
1946 			return (EBUSY);
1947 		irqs[i] = rle->start;
1948 	}
1949 
1950 	/* Update control register with 0 count. */
1951 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1952 	    ("%s: MSI still enabled", __func__));
1953 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1954 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1955 	    msi->msi_ctrl, 2);
1956 
1957 	/* Release the messages. */
1958 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1959 	for (i = 0; i < msi->msi_alloc; i++)
1960 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1961 
1962 	/* Update alloc count. */
1963 	msi->msi_alloc = 0;
1964 	msi->msi_addr = 0;
1965 	msi->msi_data = 0;
1966 	return (0);
1967 }
1968 
1969 /*
1970  * Return the max supported MSI messages this device supports.
1971  * Basically, assuming the MD code can alloc messages, this function
1972  * should return the maximum value that pci_alloc_msi() can return.
1973  * Thus, it is subject to the tunables, etc.
1974  */
1975 int
1976 pci_msi_count_method(device_t dev, device_t child)
1977 {
1978 	struct pci_devinfo *dinfo = device_get_ivars(child);
1979 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1980 
1981 	if (pci_do_msi && msi->msi_location != 0)
1982 		return (msi->msi_msgnum);
1983 	return (0);
1984 }
1985 
1986 /* free pcicfgregs structure and all depending data structures */
1987 
1988 int
1989 pci_freecfg(struct pci_devinfo *dinfo)
1990 {
1991 	struct devlist *devlist_head;
1992 	int i;
1993 
1994 	devlist_head = &pci_devq;
1995 
1996 	if (dinfo->cfg.vpd.vpd_reg) {
1997 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1998 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1999 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2000 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2001 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2002 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2003 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2004 	}
2005 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2006 	free(dinfo, M_DEVBUF);
2007 
2008 	/* increment the generation count */
2009 	pci_generation++;
2010 
2011 	/* we're losing one device */
2012 	pci_numdevs--;
2013 	return (0);
2014 }
2015 
2016 /*
2017  * PCI power manangement
2018  */
2019 int
2020 pci_set_powerstate_method(device_t dev, device_t child, int state)
2021 {
2022 	struct pci_devinfo *dinfo = device_get_ivars(child);
2023 	pcicfgregs *cfg = &dinfo->cfg;
2024 	uint16_t status;
2025 	int result, oldstate, highest, delay;
2026 
2027 	if (cfg->pp.pp_cap == 0)
2028 		return (EOPNOTSUPP);
2029 
2030 	/*
2031 	 * Optimize a no state change request away.  While it would be OK to
2032 	 * write to the hardware in theory, some devices have shown odd
2033 	 * behavior when going from D3 -> D3.
2034 	 */
2035 	oldstate = pci_get_powerstate(child);
2036 	if (oldstate == state)
2037 		return (0);
2038 
2039 	/*
2040 	 * The PCI power management specification states that after a state
2041 	 * transition between PCI power states, system software must
2042 	 * guarantee a minimal delay before the function accesses the device.
2043 	 * Compute the worst case delay that we need to guarantee before we
2044 	 * access the device.  Many devices will be responsive much more
2045 	 * quickly than this delay, but there are some that don't respond
2046 	 * instantly to state changes.  Transitions to/from D3 state require
2047 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2048 	 * is done below with DELAY rather than a sleeper function because
2049 	 * this function can be called from contexts where we cannot sleep.
2050 	 */
2051 	highest = (oldstate > state) ? oldstate : state;
2052 	if (highest == PCI_POWERSTATE_D3)
2053 	    delay = 10000;
2054 	else if (highest == PCI_POWERSTATE_D2)
2055 	    delay = 200;
2056 	else
2057 	    delay = 0;
2058 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2059 	    & ~PCIM_PSTAT_DMASK;
2060 	result = 0;
2061 	switch (state) {
2062 	case PCI_POWERSTATE_D0:
2063 		status |= PCIM_PSTAT_D0;
2064 		break;
2065 	case PCI_POWERSTATE_D1:
2066 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2067 			return (EOPNOTSUPP);
2068 		status |= PCIM_PSTAT_D1;
2069 		break;
2070 	case PCI_POWERSTATE_D2:
2071 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2072 			return (EOPNOTSUPP);
2073 		status |= PCIM_PSTAT_D2;
2074 		break;
2075 	case PCI_POWERSTATE_D3:
2076 		status |= PCIM_PSTAT_D3;
2077 		break;
2078 	default:
2079 		return (EINVAL);
2080 	}
2081 
2082 	if (bootverbose)
2083 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2084 		    state);
2085 
2086 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2087 	if (delay)
2088 		DELAY(delay);
2089 	return (0);
2090 }
2091 
2092 int
2093 pci_get_powerstate_method(device_t dev, device_t child)
2094 {
2095 	struct pci_devinfo *dinfo = device_get_ivars(child);
2096 	pcicfgregs *cfg = &dinfo->cfg;
2097 	uint16_t status;
2098 	int result;
2099 
2100 	if (cfg->pp.pp_cap != 0) {
2101 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2102 		switch (status & PCIM_PSTAT_DMASK) {
2103 		case PCIM_PSTAT_D0:
2104 			result = PCI_POWERSTATE_D0;
2105 			break;
2106 		case PCIM_PSTAT_D1:
2107 			result = PCI_POWERSTATE_D1;
2108 			break;
2109 		case PCIM_PSTAT_D2:
2110 			result = PCI_POWERSTATE_D2;
2111 			break;
2112 		case PCIM_PSTAT_D3:
2113 			result = PCI_POWERSTATE_D3;
2114 			break;
2115 		default:
2116 			result = PCI_POWERSTATE_UNKNOWN;
2117 			break;
2118 		}
2119 	} else {
2120 		/* No support, device is always at D0 */
2121 		result = PCI_POWERSTATE_D0;
2122 	}
2123 	return (result);
2124 }
2125 
2126 /*
2127  * Some convenience functions for PCI device drivers.
2128  */
2129 
2130 static __inline void
2131 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2132 {
2133 	uint16_t	command;
2134 
2135 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2136 	command |= bit;
2137 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2138 }
2139 
2140 static __inline void
2141 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2142 {
2143 	uint16_t	command;
2144 
2145 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2146 	command &= ~bit;
2147 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2148 }
2149 
2150 int
2151 pci_enable_busmaster_method(device_t dev, device_t child)
2152 {
2153 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2154 	return (0);
2155 }
2156 
2157 int
2158 pci_disable_busmaster_method(device_t dev, device_t child)
2159 {
2160 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2161 	return (0);
2162 }
2163 
2164 int
2165 pci_enable_io_method(device_t dev, device_t child, int space)
2166 {
2167 	uint16_t bit;
2168 
2169 	switch(space) {
2170 	case SYS_RES_IOPORT:
2171 		bit = PCIM_CMD_PORTEN;
2172 		break;
2173 	case SYS_RES_MEMORY:
2174 		bit = PCIM_CMD_MEMEN;
2175 		break;
2176 	default:
2177 		return (EINVAL);
2178 	}
2179 	pci_set_command_bit(dev, child, bit);
2180 	return (0);
2181 }
2182 
2183 int
2184 pci_disable_io_method(device_t dev, device_t child, int space)
2185 {
2186 	uint16_t bit;
2187 
2188 	switch(space) {
2189 	case SYS_RES_IOPORT:
2190 		bit = PCIM_CMD_PORTEN;
2191 		break;
2192 	case SYS_RES_MEMORY:
2193 		bit = PCIM_CMD_MEMEN;
2194 		break;
2195 	default:
2196 		return (EINVAL);
2197 	}
2198 	pci_clear_command_bit(dev, child, bit);
2199 	return (0);
2200 }
2201 
2202 /*
2203  * New style pci driver.  Parent device is either a pci-host-bridge or a
2204  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2205  */
2206 
2207 void
2208 pci_print_verbose(struct pci_devinfo *dinfo)
2209 {
2210 
2211 	if (bootverbose) {
2212 		pcicfgregs *cfg = &dinfo->cfg;
2213 
2214 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2215 		    cfg->vendor, cfg->device, cfg->revid);
2216 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2217 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2218 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2219 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2220 		    cfg->mfdev);
2221 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2222 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2223 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2224 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2225 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2226 		if (cfg->intpin > 0)
2227 			printf("\tintpin=%c, irq=%d\n",
2228 			    cfg->intpin +'a' -1, cfg->intline);
2229 		if (cfg->pp.pp_cap) {
2230 			uint16_t status;
2231 
2232 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2233 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2234 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2235 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2236 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2237 			    status & PCIM_PSTAT_DMASK);
2238 		}
2239 		if (cfg->msi.msi_location) {
2240 			int ctrl;
2241 
2242 			ctrl = cfg->msi.msi_ctrl;
2243 			printf("\tMSI supports %d message%s%s%s\n",
2244 			    cfg->msi.msi_msgnum,
2245 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2246 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2247 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2248 		}
2249 		if (cfg->msix.msix_location) {
2250 			printf("\tMSI-X supports %d message%s ",
2251 			    cfg->msix.msix_msgnum,
2252 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2253 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2254 				printf("in map 0x%x\n",
2255 				    cfg->msix.msix_table_bar);
2256 			else
2257 				printf("in maps 0x%x and 0x%x\n",
2258 				    cfg->msix.msix_table_bar,
2259 				    cfg->msix.msix_pba_bar);
2260 		}
2261 	}
2262 }
2263 
2264 static int
2265 pci_porten(device_t dev)
2266 {
2267 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2268 }
2269 
2270 static int
2271 pci_memen(device_t dev)
2272 {
2273 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2274 }
2275 
2276 static void
2277 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2278 {
2279 	pci_addr_t map, testval;
2280 	int ln2range;
2281 	uint16_t cmd;
2282 
2283 	map = pci_read_config(dev, reg, 4);
2284 	ln2range = pci_maprange(map);
2285 	if (ln2range == 64)
2286 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2287 
2288 	/*
2289 	 * Disable decoding via the command register before
2290 	 * determining the BAR's length since we will be placing it in
2291 	 * a weird state.
2292 	 */
2293 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2294 	pci_write_config(dev, PCIR_COMMAND,
2295 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2296 
2297 	/*
2298 	 * Determine the BAR's length by writing all 1's.  The bottom
2299 	 * log_2(size) bits of the BAR will stick as 0 when we read
2300 	 * the value back.
2301 	 */
2302 	pci_write_config(dev, reg, 0xffffffff, 4);
2303 	testval = pci_read_config(dev, reg, 4);
2304 	if (ln2range == 64) {
2305 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2306 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2307 	}
2308 
2309 	/*
2310 	 * Restore the original value of the BAR.  We may have reprogrammed
2311 	 * the BAR of the low-level console device and when booting verbose,
2312 	 * we need the console device addressable.
2313 	 */
2314 	pci_write_config(dev, reg, map, 4);
2315 	if (ln2range == 64)
2316 		pci_write_config(dev, reg + 4, map >> 32, 4);
2317 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2318 
2319 	*mapp = map;
2320 	*testvalp = testval;
2321 }
2322 
2323 static void
2324 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2325 {
2326 	pci_addr_t map;
2327 	int ln2range;
2328 
2329 	map = pci_read_config(dev, reg, 4);
2330 	ln2range = pci_maprange(map);
2331 	pci_write_config(dev, reg, base, 4);
2332 	if (ln2range == 64)
2333 		pci_write_config(dev, reg + 4, base >> 32, 4);
2334 }
2335 
2336 /*
2337  * Add a resource based on a pci map register. Return 1 if the map
2338  * register is a 32bit map register or 2 if it is a 64bit register.
2339  */
2340 static int
2341 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2342     int force, int prefetch)
2343 {
2344 	pci_addr_t base, map, testval;
2345 	pci_addr_t start, end, count;
2346 	int barlen, basezero, maprange, mapsize, type;
2347 	uint16_t cmd;
2348 	struct resource *res;
2349 
2350 	pci_read_bar(dev, reg, &map, &testval);
2351 	if (PCI_BAR_MEM(map)) {
2352 		type = SYS_RES_MEMORY;
2353 		if (map & PCIM_BAR_MEM_PREFETCH)
2354 			prefetch = 1;
2355 	} else
2356 		type = SYS_RES_IOPORT;
2357 	mapsize = pci_mapsize(testval);
2358 	base = pci_mapbase(map);
2359 #ifdef __PCI_BAR_ZERO_VALID
2360 	basezero = 0;
2361 #else
2362 	basezero = base == 0;
2363 #endif
2364 	maprange = pci_maprange(map);
2365 	barlen = maprange == 64 ? 2 : 1;
2366 
2367 	/*
2368 	 * For I/O registers, if bottom bit is set, and the next bit up
2369 	 * isn't clear, we know we have a BAR that doesn't conform to the
2370 	 * spec, so ignore it.  Also, sanity check the size of the data
2371 	 * areas to the type of memory involved.  Memory must be at least
2372 	 * 16 bytes in size, while I/O ranges must be at least 4.
2373 	 */
2374 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2375 		return (barlen);
2376 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2377 	    (type == SYS_RES_IOPORT && mapsize < 2))
2378 		return (barlen);
2379 
2380 	if (bootverbose) {
2381 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2382 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2383 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2384 			printf(", port disabled\n");
2385 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2386 			printf(", memory disabled\n");
2387 		else
2388 			printf(", enabled\n");
2389 	}
2390 
2391 	/*
2392 	 * If base is 0, then we have problems if this architecture does
2393 	 * not allow that.  It is best to ignore such entries for the
2394 	 * moment.  These will be allocated later if the driver specifically
2395 	 * requests them.  However, some removable busses look better when
2396 	 * all resources are allocated, so allow '0' to be overriden.
2397 	 *
2398 	 * Similarly treat maps whose values is the same as the test value
2399 	 * read back.  These maps have had all f's written to them by the
2400 	 * BIOS in an attempt to disable the resources.
2401 	 */
2402 	if (!force && (basezero || map == testval))
2403 		return (barlen);
2404 	if ((u_long)base != base) {
2405 		device_printf(bus,
2406 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2407 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2408 		    pci_get_function(dev), reg);
2409 		return (barlen);
2410 	}
2411 
2412 	/*
2413 	 * This code theoretically does the right thing, but has
2414 	 * undesirable side effects in some cases where peripherals
2415 	 * respond oddly to having these bits enabled.  Let the user
2416 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2417 	 * default).
2418 	 */
2419 	if (pci_enable_io_modes) {
2420 		/* Turn on resources that have been left off by a lazy BIOS */
2421 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2422 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2423 			cmd |= PCIM_CMD_PORTEN;
2424 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2425 		}
2426 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2427 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2428 			cmd |= PCIM_CMD_MEMEN;
2429 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2430 		}
2431 	} else {
2432 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2433 			return (barlen);
2434 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2435 			return (barlen);
2436 	}
2437 
2438 	count = 1 << mapsize;
2439 	if (basezero || base == pci_mapbase(testval)) {
2440 		start = 0;	/* Let the parent decide. */
2441 		end = ~0ULL;
2442 	} else {
2443 		start = base;
2444 		end = base + (1 << mapsize) - 1;
2445 	}
2446 	resource_list_add(rl, type, reg, start, end, count);
2447 
2448 	/*
2449 	 * Try to allocate the resource for this BAR from our parent
2450 	 * so that this resource range is already reserved.  The
2451 	 * driver for this device will later inherit this resource in
2452 	 * pci_alloc_resource().
2453 	 */
2454 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2455 	    prefetch ? RF_PREFETCHABLE : 0);
2456 	if (res == NULL) {
2457 		/*
2458 		 * If the allocation fails, clear the BAR and delete
2459 		 * the resource list entry to force
2460 		 * pci_alloc_resource() to allocate resources from the
2461 		 * parent.
2462 		 */
2463 		resource_list_delete(rl, type, reg);
2464 		start = 0;
2465 	} else
2466 		start = rman_get_start(res);
2467 	pci_write_bar(dev, reg, start);
2468 	return (barlen);
2469 }
2470 
2471 /*
2472  * For ATA devices we need to decide early what addressing mode to use.
2473  * Legacy demands that the primary and secondary ATA ports sits on the
2474  * same addresses that old ISA hardware did. This dictates that we use
2475  * those addresses and ignore the BAR's if we cannot set PCI native
2476  * addressing mode.
2477  */
2478 static void
2479 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2480     uint32_t prefetchmask)
2481 {
2482 	struct resource *r;
2483 	int rid, type, progif;
2484 #if 0
2485 	/* if this device supports PCI native addressing use it */
2486 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2487 	if ((progif & 0x8a) == 0x8a) {
2488 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2489 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2490 			printf("Trying ATA native PCI addressing mode\n");
2491 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2492 		}
2493 	}
2494 #endif
2495 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2496 	type = SYS_RES_IOPORT;
2497 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2498 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2499 		    prefetchmask & (1 << 0));
2500 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2501 		    prefetchmask & (1 << 1));
2502 	} else {
2503 		rid = PCIR_BAR(0);
2504 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2505 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2506 		    0x1f7, 8, 0);
2507 		rid = PCIR_BAR(1);
2508 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2509 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2510 		    0x3f6, 1, 0);
2511 	}
2512 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2513 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2514 		    prefetchmask & (1 << 2));
2515 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2516 		    prefetchmask & (1 << 3));
2517 	} else {
2518 		rid = PCIR_BAR(2);
2519 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2520 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2521 		    0x177, 8, 0);
2522 		rid = PCIR_BAR(3);
2523 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2524 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2525 		    0x376, 1, 0);
2526 	}
2527 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2528 	    prefetchmask & (1 << 4));
2529 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2530 	    prefetchmask & (1 << 5));
2531 }
2532 
2533 static void
2534 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2535 {
2536 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2537 	pcicfgregs *cfg = &dinfo->cfg;
2538 	char tunable_name[64];
2539 	int irq;
2540 
2541 	/* Has to have an intpin to have an interrupt. */
2542 	if (cfg->intpin == 0)
2543 		return;
2544 
2545 	/* Let the user override the IRQ with a tunable. */
2546 	irq = PCI_INVALID_IRQ;
2547 	snprintf(tunable_name, sizeof(tunable_name),
2548 	    "hw.pci%d.%d.%d.INT%c.irq",
2549 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2550 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2551 		irq = PCI_INVALID_IRQ;
2552 
2553 	/*
2554 	 * If we didn't get an IRQ via the tunable, then we either use the
2555 	 * IRQ value in the intline register or we ask the bus to route an
2556 	 * interrupt for us.  If force_route is true, then we only use the
2557 	 * value in the intline register if the bus was unable to assign an
2558 	 * IRQ.
2559 	 */
2560 	if (!PCI_INTERRUPT_VALID(irq)) {
2561 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2562 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2563 		if (!PCI_INTERRUPT_VALID(irq))
2564 			irq = cfg->intline;
2565 	}
2566 
2567 	/* If after all that we don't have an IRQ, just bail. */
2568 	if (!PCI_INTERRUPT_VALID(irq))
2569 		return;
2570 
2571 	/* Update the config register if it changed. */
2572 	if (irq != cfg->intline) {
2573 		cfg->intline = irq;
2574 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2575 	}
2576 
2577 	/* Add this IRQ as rid 0 interrupt resource. */
2578 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2579 }
2580 
2581 /* Perform early OHCI takeover from SMM. */
2582 static void
2583 ohci_early_takeover(device_t self)
2584 {
2585 	struct resource *res;
2586 	uint32_t ctl;
2587 	int rid;
2588 	int i;
2589 
2590 	rid = PCIR_BAR(0);
2591 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2592 	if (res == NULL)
2593 		return;
2594 
2595 	ctl = bus_read_4(res, OHCI_CONTROL);
2596 	if (ctl & OHCI_IR) {
2597 		if (bootverbose)
2598 			printf("ohci early: "
2599 			    "SMM active, request owner change\n");
2600 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2601 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2602 			DELAY(1000);
2603 			ctl = bus_read_4(res, OHCI_CONTROL);
2604 		}
2605 		if (ctl & OHCI_IR) {
2606 			if (bootverbose)
2607 				printf("ohci early: "
2608 				    "SMM does not respond, resetting\n");
2609 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2610 		}
2611 		/* Disable interrupts */
2612 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2613 	}
2614 
2615 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2616 }
2617 
2618 /* Perform early UHCI takeover from SMM. */
2619 static void
2620 uhci_early_takeover(device_t self)
2621 {
2622 	struct resource *res;
2623 	int rid;
2624 
2625 	/*
2626 	 * Set the PIRQD enable bit and switch off all the others. We don't
2627 	 * want legacy support to interfere with us XXX Does this also mean
2628 	 * that the BIOS won't touch the keyboard anymore if it is connected
2629 	 * to the ports of the root hub?
2630 	 */
2631 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2632 
2633 	/* Disable interrupts */
2634 	rid = PCI_UHCI_BASE_REG;
2635 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2636 	if (res != NULL) {
2637 		bus_write_2(res, UHCI_INTR, 0);
2638 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2639 	}
2640 }
2641 
2642 /* Perform early EHCI takeover from SMM. */
2643 static void
2644 ehci_early_takeover(device_t self)
2645 {
2646 	struct resource *res;
2647 	uint32_t cparams;
2648 	uint32_t eec;
2649 	uint8_t eecp;
2650 	uint8_t bios_sem;
2651 	uint8_t offs;
2652 	int rid;
2653 	int i;
2654 
2655 	rid = PCIR_BAR(0);
2656 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2657 	if (res == NULL)
2658 		return;
2659 
2660 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2661 
2662 	/* Synchronise with the BIOS if it owns the controller. */
2663 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2664 	    eecp = EHCI_EECP_NEXT(eec)) {
2665 		eec = pci_read_config(self, eecp, 4);
2666 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2667 			continue;
2668 		}
2669 		bios_sem = pci_read_config(self, eecp +
2670 		    EHCI_LEGSUP_BIOS_SEM, 1);
2671 		if (bios_sem == 0) {
2672 			continue;
2673 		}
2674 		if (bootverbose)
2675 			printf("ehci early: "
2676 			    "SMM active, request owner change\n");
2677 
2678 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2679 
2680 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2681 			DELAY(1000);
2682 			bios_sem = pci_read_config(self, eecp +
2683 			    EHCI_LEGSUP_BIOS_SEM, 1);
2684 		}
2685 
2686 		if (bios_sem != 0) {
2687 			if (bootverbose)
2688 				printf("ehci early: "
2689 				    "SMM does not respond\n");
2690 		}
2691 		/* Disable interrupts */
2692 		offs = bus_read_1(res, EHCI_CAPLENGTH);
2693 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2694 	}
2695 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2696 }
2697 
2698 void
2699 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2700 {
2701 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2702 	pcicfgregs *cfg = &dinfo->cfg;
2703 	struct resource_list *rl = &dinfo->resources;
2704 	struct pci_quirk *q;
2705 	int i;
2706 
2707 	/* ATA devices needs special map treatment */
2708 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2709 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2710 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2711 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2712 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2713 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2714 	else
2715 		for (i = 0; i < cfg->nummaps;)
2716 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2717 			    prefetchmask & (1 << i));
2718 
2719 	/*
2720 	 * Add additional, quirked resources.
2721 	 */
2722 	for (q = &pci_quirks[0]; q->devid; q++) {
2723 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2724 		    && q->type == PCI_QUIRK_MAP_REG)
2725 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2726 	}
2727 
2728 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2729 #ifdef __PCI_REROUTE_INTERRUPT
2730 		/*
2731 		 * Try to re-route interrupts. Sometimes the BIOS or
2732 		 * firmware may leave bogus values in these registers.
2733 		 * If the re-route fails, then just stick with what we
2734 		 * have.
2735 		 */
2736 		pci_assign_interrupt(bus, dev, 1);
2737 #else
2738 		pci_assign_interrupt(bus, dev, 0);
2739 #endif
2740 	}
2741 
2742 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2743 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2744 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2745 			ehci_early_takeover(dev);
2746 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2747 			ohci_early_takeover(dev);
2748 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2749 			uhci_early_takeover(dev);
2750 	}
2751 }
2752 
2753 void
2754 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2755 {
2756 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2757 	device_t pcib = device_get_parent(dev);
2758 	struct pci_devinfo *dinfo;
2759 	int maxslots;
2760 	int s, f, pcifunchigh;
2761 	uint8_t hdrtype;
2762 
2763 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2764 	    ("dinfo_size too small"));
2765 	maxslots = PCIB_MAXSLOTS(pcib);
2766 	for (s = 0; s <= maxslots; s++) {
2767 		pcifunchigh = 0;
2768 		f = 0;
2769 		DELAY(1);
2770 		hdrtype = REG(PCIR_HDRTYPE, 1);
2771 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2772 			continue;
2773 		if (hdrtype & PCIM_MFDEV)
2774 			pcifunchigh = PCI_FUNCMAX;
2775 		for (f = 0; f <= pcifunchigh; f++) {
2776 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2777 			    dinfo_size);
2778 			if (dinfo != NULL) {
2779 				pci_add_child(dev, dinfo);
2780 			}
2781 		}
2782 	}
2783 #undef REG
2784 }
2785 
2786 void
2787 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2788 {
2789 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2790 	device_set_ivars(dinfo->cfg.dev, dinfo);
2791 	resource_list_init(&dinfo->resources);
2792 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2793 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2794 	pci_print_verbose(dinfo);
2795 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2796 }
2797 
2798 static int
2799 pci_probe(device_t dev)
2800 {
2801 
2802 	device_set_desc(dev, "PCI bus");
2803 
2804 	/* Allow other subclasses to override this driver. */
2805 	return (BUS_PROBE_GENERIC);
2806 }
2807 
2808 static int
2809 pci_attach(device_t dev)
2810 {
2811 	int busno, domain;
2812 
2813 	/*
2814 	 * Since there can be multiple independantly numbered PCI
2815 	 * busses on systems with multiple PCI domains, we can't use
2816 	 * the unit number to decide which bus we are probing. We ask
2817 	 * the parent pcib what our domain and bus numbers are.
2818 	 */
2819 	domain = pcib_get_domain(dev);
2820 	busno = pcib_get_bus(dev);
2821 	if (bootverbose)
2822 		device_printf(dev, "domain=%d, physical bus=%d\n",
2823 		    domain, busno);
2824 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2825 	return (bus_generic_attach(dev));
2826 }
2827 
2828 int
2829 pci_suspend(device_t dev)
2830 {
2831 	int dstate, error, i, numdevs;
2832 	device_t acpi_dev, child, *devlist;
2833 	struct pci_devinfo *dinfo;
2834 
2835 	/*
2836 	 * Save the PCI configuration space for each child and set the
2837 	 * device in the appropriate power state for this sleep state.
2838 	 */
2839 	acpi_dev = NULL;
2840 	if (pci_do_power_resume)
2841 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2842 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2843 		return (error);
2844 	for (i = 0; i < numdevs; i++) {
2845 		child = devlist[i];
2846 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2847 		pci_cfg_save(child, dinfo, 0);
2848 	}
2849 
2850 	/* Suspend devices before potentially powering them down. */
2851 	error = bus_generic_suspend(dev);
2852 	if (error) {
2853 		free(devlist, M_TEMP);
2854 		return (error);
2855 	}
2856 
2857 	/*
2858 	 * Always set the device to D3.  If ACPI suggests a different
2859 	 * power state, use it instead.  If ACPI is not present, the
2860 	 * firmware is responsible for managing device power.  Skip
2861 	 * children who aren't attached since they are powered down
2862 	 * separately.  Only manage type 0 devices for now.
2863 	 */
2864 	for (i = 0; acpi_dev && i < numdevs; i++) {
2865 		child = devlist[i];
2866 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2867 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2868 			dstate = PCI_POWERSTATE_D3;
2869 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2870 			pci_set_powerstate(child, dstate);
2871 		}
2872 	}
2873 	free(devlist, M_TEMP);
2874 	return (0);
2875 }
2876 
2877 int
2878 pci_resume(device_t dev)
2879 {
2880 	int i, numdevs, error;
2881 	device_t acpi_dev, child, *devlist;
2882 	struct pci_devinfo *dinfo;
2883 
2884 	/*
2885 	 * Set each child to D0 and restore its PCI configuration space.
2886 	 */
2887 	acpi_dev = NULL;
2888 	if (pci_do_power_resume)
2889 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2890 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2891 		return (error);
2892 	for (i = 0; i < numdevs; i++) {
2893 		/*
2894 		 * Notify ACPI we're going to D0 but ignore the result.  If
2895 		 * ACPI is not present, the firmware is responsible for
2896 		 * managing device power.  Only manage type 0 devices for now.
2897 		 */
2898 		child = devlist[i];
2899 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2900 		if (acpi_dev && device_is_attached(child) &&
2901 		    dinfo->cfg.hdrtype == 0) {
2902 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2903 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2904 		}
2905 
2906 		/* Now the device is powered up, restore its config space. */
2907 		pci_cfg_restore(child, dinfo);
2908 	}
2909 	free(devlist, M_TEMP);
2910 	return (bus_generic_resume(dev));
2911 }
2912 
2913 static void
2914 pci_load_vendor_data(void)
2915 {
2916 	caddr_t vendordata, info;
2917 
2918 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2919 		info = preload_search_info(vendordata, MODINFO_ADDR);
2920 		pci_vendordata = *(char **)info;
2921 		info = preload_search_info(vendordata, MODINFO_SIZE);
2922 		pci_vendordata_size = *(size_t *)info;
2923 		/* terminate the database */
2924 		pci_vendordata[pci_vendordata_size] = '\n';
2925 	}
2926 }
2927 
2928 void
2929 pci_driver_added(device_t dev, driver_t *driver)
2930 {
2931 	int numdevs;
2932 	device_t *devlist;
2933 	device_t child;
2934 	struct pci_devinfo *dinfo;
2935 	int i;
2936 
2937 	if (bootverbose)
2938 		device_printf(dev, "driver added\n");
2939 	DEVICE_IDENTIFY(driver, dev);
2940 	if (device_get_children(dev, &devlist, &numdevs) != 0)
2941 		return;
2942 	for (i = 0; i < numdevs; i++) {
2943 		child = devlist[i];
2944 		if (device_get_state(child) != DS_NOTPRESENT)
2945 			continue;
2946 		dinfo = device_get_ivars(child);
2947 		pci_print_verbose(dinfo);
2948 		if (bootverbose)
2949 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2950 		pci_cfg_restore(child, dinfo);
2951 		if (device_probe_and_attach(child) != 0)
2952 			pci_cfg_save(child, dinfo, 1);
2953 	}
2954 	free(devlist, M_TEMP);
2955 }
2956 
2957 int
2958 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2959     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2960 {
2961 	struct pci_devinfo *dinfo;
2962 	struct msix_table_entry *mte;
2963 	struct msix_vector *mv;
2964 	uint64_t addr;
2965 	uint32_t data;
2966 	void *cookie;
2967 	int error, rid;
2968 
2969 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2970 	    arg, &cookie);
2971 	if (error)
2972 		return (error);
2973 
2974 	/* If this is not a direct child, just bail out. */
2975 	if (device_get_parent(child) != dev) {
2976 		*cookiep = cookie;
2977 		return(0);
2978 	}
2979 
2980 	rid = rman_get_rid(irq);
2981 	if (rid == 0) {
2982 		/* Make sure that INTx is enabled */
2983 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2984 	} else {
2985 		/*
2986 		 * Check to see if the interrupt is MSI or MSI-X.
2987 		 * Ask our parent to map the MSI and give
2988 		 * us the address and data register values.
2989 		 * If we fail for some reason, teardown the
2990 		 * interrupt handler.
2991 		 */
2992 		dinfo = device_get_ivars(child);
2993 		if (dinfo->cfg.msi.msi_alloc > 0) {
2994 			if (dinfo->cfg.msi.msi_addr == 0) {
2995 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2996 			    ("MSI has handlers, but vectors not mapped"));
2997 				error = PCIB_MAP_MSI(device_get_parent(dev),
2998 				    child, rman_get_start(irq), &addr, &data);
2999 				if (error)
3000 					goto bad;
3001 				dinfo->cfg.msi.msi_addr = addr;
3002 				dinfo->cfg.msi.msi_data = data;
3003 			}
3004 			if (dinfo->cfg.msi.msi_handlers == 0)
3005 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3006 				    dinfo->cfg.msi.msi_data);
3007 			dinfo->cfg.msi.msi_handlers++;
3008 		} else {
3009 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3010 			    ("No MSI or MSI-X interrupts allocated"));
3011 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3012 			    ("MSI-X index too high"));
3013 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3014 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3015 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3016 			KASSERT(mv->mv_irq == rman_get_start(irq),
3017 			    ("IRQ mismatch"));
3018 			if (mv->mv_address == 0) {
3019 				KASSERT(mte->mte_handlers == 0,
3020 		    ("MSI-X table entry has handlers, but vector not mapped"));
3021 				error = PCIB_MAP_MSI(device_get_parent(dev),
3022 				    child, rman_get_start(irq), &addr, &data);
3023 				if (error)
3024 					goto bad;
3025 				mv->mv_address = addr;
3026 				mv->mv_data = data;
3027 			}
3028 			if (mte->mte_handlers == 0) {
3029 				pci_enable_msix(child, rid - 1, mv->mv_address,
3030 				    mv->mv_data);
3031 				pci_unmask_msix(child, rid - 1);
3032 			}
3033 			mte->mte_handlers++;
3034 		}
3035 
3036 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3037 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3038 	bad:
3039 		if (error) {
3040 			(void)bus_generic_teardown_intr(dev, child, irq,
3041 			    cookie);
3042 			return (error);
3043 		}
3044 	}
3045 	*cookiep = cookie;
3046 	return (0);
3047 }
3048 
3049 int
3050 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3051     void *cookie)
3052 {
3053 	struct msix_table_entry *mte;
3054 	struct resource_list_entry *rle;
3055 	struct pci_devinfo *dinfo;
3056 	int error, rid;
3057 
3058 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3059 		return (EINVAL);
3060 
3061 	/* If this isn't a direct child, just bail out */
3062 	if (device_get_parent(child) != dev)
3063 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3064 
3065 	rid = rman_get_rid(irq);
3066 	if (rid == 0) {
3067 		/* Mask INTx */
3068 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3069 	} else {
3070 		/*
3071 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3072 		 * decrement the appropriate handlers count and mask the
3073 		 * MSI-X message, or disable MSI messages if the count
3074 		 * drops to 0.
3075 		 */
3076 		dinfo = device_get_ivars(child);
3077 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3078 		if (rle->res != irq)
3079 			return (EINVAL);
3080 		if (dinfo->cfg.msi.msi_alloc > 0) {
3081 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3082 			    ("MSI-X index too high"));
3083 			if (dinfo->cfg.msi.msi_handlers == 0)
3084 				return (EINVAL);
3085 			dinfo->cfg.msi.msi_handlers--;
3086 			if (dinfo->cfg.msi.msi_handlers == 0)
3087 				pci_disable_msi(child);
3088 		} else {
3089 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3090 			    ("No MSI or MSI-X interrupts allocated"));
3091 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3092 			    ("MSI-X index too high"));
3093 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3094 			if (mte->mte_handlers == 0)
3095 				return (EINVAL);
3096 			mte->mte_handlers--;
3097 			if (mte->mte_handlers == 0)
3098 				pci_mask_msix(child, rid - 1);
3099 		}
3100 	}
3101 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3102 	if (rid > 0)
3103 		KASSERT(error == 0,
3104 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3105 	return (error);
3106 }
3107 
3108 int
3109 pci_print_child(device_t dev, device_t child)
3110 {
3111 	struct pci_devinfo *dinfo;
3112 	struct resource_list *rl;
3113 	int retval = 0;
3114 
3115 	dinfo = device_get_ivars(child);
3116 	rl = &dinfo->resources;
3117 
3118 	retval += bus_print_child_header(dev, child);
3119 
3120 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3121 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3122 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3123 	if (device_get_flags(dev))
3124 		retval += printf(" flags %#x", device_get_flags(dev));
3125 
3126 	retval += printf(" at device %d.%d", pci_get_slot(child),
3127 	    pci_get_function(child));
3128 
3129 	retval += bus_print_child_footer(dev, child);
3130 
3131 	return (retval);
3132 }
3133 
3134 static struct
3135 {
3136 	int	class;
3137 	int	subclass;
3138 	char	*desc;
3139 } pci_nomatch_tab[] = {
3140 	{PCIC_OLD,		-1,			"old"},
3141 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3142 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3143 	{PCIC_STORAGE,		-1,			"mass storage"},
3144 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3145 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3146 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3147 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3148 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3149 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3150 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3151 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3152 	{PCIC_NETWORK,		-1,			"network"},
3153 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3154 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3155 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3156 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3157 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3158 	{PCIC_DISPLAY,		-1,			"display"},
3159 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3160 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3161 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3162 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3163 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3164 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3165 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3166 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3167 	{PCIC_MEMORY,		-1,			"memory"},
3168 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3169 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3170 	{PCIC_BRIDGE,		-1,			"bridge"},
3171 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3172 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3173 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3174 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3175 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3176 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3177 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3178 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3179 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3180 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3181 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3182 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3183 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3184 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3185 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3186 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3187 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3188 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3189 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3190 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3191 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3192 	{PCIC_INPUTDEV,		-1,			"input device"},
3193 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3194 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3195 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3196 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3197 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3198 	{PCIC_DOCKING,		-1,			"docking station"},
3199 	{PCIC_PROCESSOR,	-1,			"processor"},
3200 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3201 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3202 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3203 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3204 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3205 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3206 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3207 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3208 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3209 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3210 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3211 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3212 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3213 	{PCIC_SATCOM,		-1,			"satellite communication"},
3214 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3215 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3216 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3217 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3218 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3219 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3220 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3221 	{PCIC_DASP,		-1,			"dasp"},
3222 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3223 	{0, 0,		NULL}
3224 };
3225 
3226 void
3227 pci_probe_nomatch(device_t dev, device_t child)
3228 {
3229 	int	i;
3230 	char	*cp, *scp, *device;
3231 
3232 	/*
3233 	 * Look for a listing for this device in a loaded device database.
3234 	 */
3235 	if ((device = pci_describe_device(child)) != NULL) {
3236 		device_printf(dev, "<%s>", device);
3237 		free(device, M_DEVBUF);
3238 	} else {
3239 		/*
3240 		 * Scan the class/subclass descriptions for a general
3241 		 * description.
3242 		 */
3243 		cp = "unknown";
3244 		scp = NULL;
3245 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3246 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3247 				if (pci_nomatch_tab[i].subclass == -1) {
3248 					cp = pci_nomatch_tab[i].desc;
3249 				} else if (pci_nomatch_tab[i].subclass ==
3250 				    pci_get_subclass(child)) {
3251 					scp = pci_nomatch_tab[i].desc;
3252 				}
3253 			}
3254 		}
3255 		device_printf(dev, "<%s%s%s>",
3256 		    cp ? cp : "",
3257 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3258 		    scp ? scp : "");
3259 	}
3260 	printf(" at device %d.%d (no driver attached)\n",
3261 	    pci_get_slot(child), pci_get_function(child));
3262 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3263 	return;
3264 }
3265 
3266 /*
3267  * Parse the PCI device database, if loaded, and return a pointer to a
3268  * description of the device.
3269  *
3270  * The database is flat text formatted as follows:
3271  *
3272  * Any line not in a valid format is ignored.
3273  * Lines are terminated with newline '\n' characters.
3274  *
3275  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3276  * the vendor name.
3277  *
3278  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3279  * - devices cannot be listed without a corresponding VENDOR line.
3280  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3281  * another TAB, then the device name.
3282  */
3283 
3284 /*
3285  * Assuming (ptr) points to the beginning of a line in the database,
3286  * return the vendor or device and description of the next entry.
3287  * The value of (vendor) or (device) inappropriate for the entry type
3288  * is set to -1.  Returns nonzero at the end of the database.
3289  *
3290  * Note that this is slightly unrobust in the face of corrupt data;
3291  * we attempt to safeguard against this by spamming the end of the
3292  * database with a newline when we initialise.
3293  */
3294 static int
3295 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3296 {
3297 	char	*cp = *ptr;
3298 	int	left;
3299 
3300 	*device = -1;
3301 	*vendor = -1;
3302 	**desc = '\0';
3303 	for (;;) {
3304 		left = pci_vendordata_size - (cp - pci_vendordata);
3305 		if (left <= 0) {
3306 			*ptr = cp;
3307 			return(1);
3308 		}
3309 
3310 		/* vendor entry? */
3311 		if (*cp != '\t' &&
3312 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3313 			break;
3314 		/* device entry? */
3315 		if (*cp == '\t' &&
3316 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3317 			break;
3318 
3319 		/* skip to next line */
3320 		while (*cp != '\n' && left > 0) {
3321 			cp++;
3322 			left--;
3323 		}
3324 		if (*cp == '\n') {
3325 			cp++;
3326 			left--;
3327 		}
3328 	}
3329 	/* skip to next line */
3330 	while (*cp != '\n' && left > 0) {
3331 		cp++;
3332 		left--;
3333 	}
3334 	if (*cp == '\n' && left > 0)
3335 		cp++;
3336 	*ptr = cp;
3337 	return(0);
3338 }
3339 
3340 static char *
3341 pci_describe_device(device_t dev)
3342 {
3343 	int	vendor, device;
3344 	char	*desc, *vp, *dp, *line;
3345 
3346 	desc = vp = dp = NULL;
3347 
3348 	/*
3349 	 * If we have no vendor data, we can't do anything.
3350 	 */
3351 	if (pci_vendordata == NULL)
3352 		goto out;
3353 
3354 	/*
3355 	 * Scan the vendor data looking for this device
3356 	 */
3357 	line = pci_vendordata;
3358 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3359 		goto out;
3360 	for (;;) {
3361 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3362 			goto out;
3363 		if (vendor == pci_get_vendor(dev))
3364 			break;
3365 	}
3366 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3367 		goto out;
3368 	for (;;) {
3369 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3370 			*dp = 0;
3371 			break;
3372 		}
3373 		if (vendor != -1) {
3374 			*dp = 0;
3375 			break;
3376 		}
3377 		if (device == pci_get_device(dev))
3378 			break;
3379 	}
3380 	if (dp[0] == '\0')
3381 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3382 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3383 	    NULL)
3384 		sprintf(desc, "%s, %s", vp, dp);
3385  out:
3386 	if (vp != NULL)
3387 		free(vp, M_DEVBUF);
3388 	if (dp != NULL)
3389 		free(dp, M_DEVBUF);
3390 	return(desc);
3391 }
3392 
3393 int
3394 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3395 {
3396 	struct pci_devinfo *dinfo;
3397 	pcicfgregs *cfg;
3398 
3399 	dinfo = device_get_ivars(child);
3400 	cfg = &dinfo->cfg;
3401 
3402 	switch (which) {
3403 	case PCI_IVAR_ETHADDR:
3404 		/*
3405 		 * The generic accessor doesn't deal with failure, so
3406 		 * we set the return value, then return an error.
3407 		 */
3408 		*((uint8_t **) result) = NULL;
3409 		return (EINVAL);
3410 	case PCI_IVAR_SUBVENDOR:
3411 		*result = cfg->subvendor;
3412 		break;
3413 	case PCI_IVAR_SUBDEVICE:
3414 		*result = cfg->subdevice;
3415 		break;
3416 	case PCI_IVAR_VENDOR:
3417 		*result = cfg->vendor;
3418 		break;
3419 	case PCI_IVAR_DEVICE:
3420 		*result = cfg->device;
3421 		break;
3422 	case PCI_IVAR_DEVID:
3423 		*result = (cfg->device << 16) | cfg->vendor;
3424 		break;
3425 	case PCI_IVAR_CLASS:
3426 		*result = cfg->baseclass;
3427 		break;
3428 	case PCI_IVAR_SUBCLASS:
3429 		*result = cfg->subclass;
3430 		break;
3431 	case PCI_IVAR_PROGIF:
3432 		*result = cfg->progif;
3433 		break;
3434 	case PCI_IVAR_REVID:
3435 		*result = cfg->revid;
3436 		break;
3437 	case PCI_IVAR_INTPIN:
3438 		*result = cfg->intpin;
3439 		break;
3440 	case PCI_IVAR_IRQ:
3441 		*result = cfg->intline;
3442 		break;
3443 	case PCI_IVAR_DOMAIN:
3444 		*result = cfg->domain;
3445 		break;
3446 	case PCI_IVAR_BUS:
3447 		*result = cfg->bus;
3448 		break;
3449 	case PCI_IVAR_SLOT:
3450 		*result = cfg->slot;
3451 		break;
3452 	case PCI_IVAR_FUNCTION:
3453 		*result = cfg->func;
3454 		break;
3455 	case PCI_IVAR_CMDREG:
3456 		*result = cfg->cmdreg;
3457 		break;
3458 	case PCI_IVAR_CACHELNSZ:
3459 		*result = cfg->cachelnsz;
3460 		break;
3461 	case PCI_IVAR_MINGNT:
3462 		*result = cfg->mingnt;
3463 		break;
3464 	case PCI_IVAR_MAXLAT:
3465 		*result = cfg->maxlat;
3466 		break;
3467 	case PCI_IVAR_LATTIMER:
3468 		*result = cfg->lattimer;
3469 		break;
3470 	default:
3471 		return (ENOENT);
3472 	}
3473 	return (0);
3474 }
3475 
3476 int
3477 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3478 {
3479 	struct pci_devinfo *dinfo;
3480 
3481 	dinfo = device_get_ivars(child);
3482 
3483 	switch (which) {
3484 	case PCI_IVAR_INTPIN:
3485 		dinfo->cfg.intpin = value;
3486 		return (0);
3487 	case PCI_IVAR_ETHADDR:
3488 	case PCI_IVAR_SUBVENDOR:
3489 	case PCI_IVAR_SUBDEVICE:
3490 	case PCI_IVAR_VENDOR:
3491 	case PCI_IVAR_DEVICE:
3492 	case PCI_IVAR_DEVID:
3493 	case PCI_IVAR_CLASS:
3494 	case PCI_IVAR_SUBCLASS:
3495 	case PCI_IVAR_PROGIF:
3496 	case PCI_IVAR_REVID:
3497 	case PCI_IVAR_IRQ:
3498 	case PCI_IVAR_DOMAIN:
3499 	case PCI_IVAR_BUS:
3500 	case PCI_IVAR_SLOT:
3501 	case PCI_IVAR_FUNCTION:
3502 		return (EINVAL);	/* disallow for now */
3503 
3504 	default:
3505 		return (ENOENT);
3506 	}
3507 }
3508 
3509 
3510 #include "opt_ddb.h"
3511 #ifdef DDB
3512 #include <ddb/ddb.h>
3513 #include <sys/cons.h>
3514 
3515 /*
3516  * List resources based on pci map registers, used for within ddb
3517  */
3518 
3519 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3520 {
3521 	struct pci_devinfo *dinfo;
3522 	struct devlist *devlist_head;
3523 	struct pci_conf *p;
3524 	const char *name;
3525 	int i, error, none_count;
3526 
3527 	none_count = 0;
3528 	/* get the head of the device queue */
3529 	devlist_head = &pci_devq;
3530 
3531 	/*
3532 	 * Go through the list of devices and print out devices
3533 	 */
3534 	for (error = 0, i = 0,
3535 	     dinfo = STAILQ_FIRST(devlist_head);
3536 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3537 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3538 
3539 		/* Populate pd_name and pd_unit */
3540 		name = NULL;
3541 		if (dinfo->cfg.dev)
3542 			name = device_get_name(dinfo->cfg.dev);
3543 
3544 		p = &dinfo->conf;
3545 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3546 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3547 			(name && *name) ? name : "none",
3548 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3549 			none_count++,
3550 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3551 			p->pc_sel.pc_func, (p->pc_class << 16) |
3552 			(p->pc_subclass << 8) | p->pc_progif,
3553 			(p->pc_subdevice << 16) | p->pc_subvendor,
3554 			(p->pc_device << 16) | p->pc_vendor,
3555 			p->pc_revid, p->pc_hdr);
3556 	}
3557 }
3558 #endif /* DDB */
3559 
3560 static struct resource *
3561 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3562     u_long start, u_long end, u_long count, u_int flags)
3563 {
3564 	struct pci_devinfo *dinfo = device_get_ivars(child);
3565 	struct resource_list *rl = &dinfo->resources;
3566 	struct resource_list_entry *rle;
3567 	struct resource *res;
3568 	pci_addr_t map, testval;
3569 	int mapsize;
3570 
3571 	/*
3572 	 * Weed out the bogons, and figure out how large the BAR/map
3573 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3574 	 * Note: atapci in legacy mode are special and handled elsewhere
3575 	 * in the code.  If you have a atapci device in legacy mode and
3576 	 * it fails here, that other code is broken.
3577 	 */
3578 	res = NULL;
3579 	pci_read_bar(child, *rid, &map, &testval);
3580 
3581 	/* Ignore a BAR with a base of 0. */
3582 	if (pci_mapbase(testval) == 0)
3583 		goto out;
3584 
3585 	if (PCI_BAR_MEM(testval)) {
3586 		if (type != SYS_RES_MEMORY) {
3587 			if (bootverbose)
3588 				device_printf(dev,
3589 				    "child %s requested type %d for rid %#x,"
3590 				    " but the BAR says it is an memio\n",
3591 				    device_get_nameunit(child), type, *rid);
3592 			goto out;
3593 		}
3594 	} else {
3595 		if (type != SYS_RES_IOPORT) {
3596 			if (bootverbose)
3597 				device_printf(dev,
3598 				    "child %s requested type %d for rid %#x,"
3599 				    " but the BAR says it is an ioport\n",
3600 				    device_get_nameunit(child), type, *rid);
3601 			goto out;
3602 		}
3603 	}
3604 
3605 	/*
3606 	 * For real BARs, we need to override the size that
3607 	 * the driver requests, because that's what the BAR
3608 	 * actually uses and we would otherwise have a
3609 	 * situation where we might allocate the excess to
3610 	 * another driver, which won't work.
3611 	 */
3612 	mapsize = pci_mapsize(testval);
3613 	count = 1UL << mapsize;
3614 	if (RF_ALIGNMENT(flags) < mapsize)
3615 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3616 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3617 		flags |= RF_PREFETCHABLE;
3618 
3619 	/*
3620 	 * Allocate enough resource, and then write back the
3621 	 * appropriate bar for that resource.
3622 	 */
3623 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3624 	    start, end, count, flags & ~RF_ACTIVE);
3625 	if (res == NULL) {
3626 		device_printf(child,
3627 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3628 		    count, *rid, type, start, end);
3629 		goto out;
3630 	}
3631 	resource_list_add(rl, type, *rid, start, end, count);
3632 	rle = resource_list_find(rl, type, *rid);
3633 	if (rle == NULL)
3634 		panic("pci_reserve_map: unexpectedly can't find resource.");
3635 	rle->res = res;
3636 	rle->start = rman_get_start(res);
3637 	rle->end = rman_get_end(res);
3638 	rle->count = count;
3639 	rle->flags = RLE_RESERVED;
3640 	if (bootverbose)
3641 		device_printf(child,
3642 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3643 		    count, *rid, type, rman_get_start(res));
3644 	map = rman_get_start(res);
3645 	pci_write_bar(child, *rid, map);
3646 out:;
3647 	return (res);
3648 }
3649 
3650 
3651 struct resource *
3652 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3653 		   u_long start, u_long end, u_long count, u_int flags)
3654 {
3655 	struct pci_devinfo *dinfo = device_get_ivars(child);
3656 	struct resource_list *rl = &dinfo->resources;
3657 	struct resource_list_entry *rle;
3658 	struct resource *res;
3659 	pcicfgregs *cfg = &dinfo->cfg;
3660 
3661 	if (device_get_parent(child) != dev)
3662 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3663 		    type, rid, start, end, count, flags));
3664 
3665 	/*
3666 	 * Perform lazy resource allocation
3667 	 */
3668 	switch (type) {
3669 	case SYS_RES_IRQ:
3670 		/*
3671 		 * Can't alloc legacy interrupt once MSI messages have
3672 		 * been allocated.
3673 		 */
3674 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3675 		    cfg->msix.msix_alloc > 0))
3676 			return (NULL);
3677 
3678 		/*
3679 		 * If the child device doesn't have an interrupt
3680 		 * routed and is deserving of an interrupt, try to
3681 		 * assign it one.
3682 		 */
3683 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3684 		    (cfg->intpin != 0))
3685 			pci_assign_interrupt(dev, child, 0);
3686 		break;
3687 	case SYS_RES_IOPORT:
3688 	case SYS_RES_MEMORY:
3689 		/* Reserve resources for this BAR if needed. */
3690 		rle = resource_list_find(rl, type, *rid);
3691 		if (rle == NULL) {
3692 			res = pci_reserve_map(dev, child, type, rid, start, end,
3693 			    count, flags);
3694 			if (res == NULL)
3695 				return (NULL);
3696 		}
3697 	}
3698 	return (resource_list_alloc(rl, dev, child, type, rid,
3699 	    start, end, count, flags));
3700 }
3701 
3702 int
3703 pci_release_resource(device_t dev, device_t child, int type, int rid,
3704     struct resource *r)
3705 {
3706 
3707 	if (device_get_parent(child) != dev)
3708 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3709 		    type, rid, r));
3710 
3711 	/*
3712 	 * For BARs we don't actually want to release the resource.
3713 	 * Instead, we deactivate the resource if needed and then give
3714 	 * ownership of the BAR back to the bus.  This is handled for us
3715 	 * in resource_list_release() since we use resource_list_reserve()
3716 	 * for BARs.
3717 	 */
3718 	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3719 }
3720 
3721 int
3722 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3723     struct resource *r)
3724 {
3725 	int error;
3726 
3727 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3728 	if (error)
3729 		return (error);
3730 
3731 	/* Enable decoding in the command register when activating BARs. */
3732 	if (device_get_parent(child) == dev) {
3733 		switch (type) {
3734 		case SYS_RES_IOPORT:
3735 		case SYS_RES_MEMORY:
3736 			error = PCI_ENABLE_IO(dev, child, type);
3737 			break;
3738 		}
3739 	}
3740 	return (error);
3741 }
3742 
3743 void
3744 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3745 {
3746 	struct pci_devinfo *dinfo;
3747 	struct resource_list *rl;
3748 	struct resource_list_entry *rle;
3749 
3750 	if (device_get_parent(child) != dev)
3751 		return;
3752 
3753 	dinfo = device_get_ivars(child);
3754 	rl = &dinfo->resources;
3755 	rle = resource_list_find(rl, type, rid);
3756 	if (rle == NULL)
3757 		return;
3758 
3759 	if (rle->res) {
3760 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
3761 		    resource_list_busy(rl, type, rid)) {
3762 			device_printf(dev, "delete_resource: "
3763 			    "Resource still owned by child, oops. "
3764 			    "(type=%d, rid=%d, addr=%lx)\n",
3765 			    type, rid, rman_get_start(rle->res));
3766 			return;
3767 		}
3768 
3769 #ifndef __PCI_BAR_ZERO_VALID
3770 		/*
3771 		 * If this is a BAR, clear the BAR so it stops
3772 		 * decoding before releasing the resource.
3773 		 */
3774 		switch (type) {
3775 		case SYS_RES_IOPORT:
3776 		case SYS_RES_MEMORY:
3777 			pci_write_bar(child, rid, 0);
3778 			break;
3779 		}
3780 #endif
3781 		resource_list_unreserve(rl, dev, child, type, rid);
3782 	}
3783 	resource_list_delete(rl, type, rid);
3784 }
3785 
3786 struct resource_list *
3787 pci_get_resource_list (device_t dev, device_t child)
3788 {
3789 	struct pci_devinfo *dinfo = device_get_ivars(child);
3790 
3791 	return (&dinfo->resources);
3792 }
3793 
3794 uint32_t
3795 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3796 {
3797 	struct pci_devinfo *dinfo = device_get_ivars(child);
3798 	pcicfgregs *cfg = &dinfo->cfg;
3799 
3800 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3801 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3802 }
3803 
3804 void
3805 pci_write_config_method(device_t dev, device_t child, int reg,
3806     uint32_t val, int width)
3807 {
3808 	struct pci_devinfo *dinfo = device_get_ivars(child);
3809 	pcicfgregs *cfg = &dinfo->cfg;
3810 
3811 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3812 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3813 }
3814 
3815 int
3816 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3817     size_t buflen)
3818 {
3819 
3820 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3821 	    pci_get_function(child));
3822 	return (0);
3823 }
3824 
3825 int
3826 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3827     size_t buflen)
3828 {
3829 	struct pci_devinfo *dinfo;
3830 	pcicfgregs *cfg;
3831 
3832 	dinfo = device_get_ivars(child);
3833 	cfg = &dinfo->cfg;
3834 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3835 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3836 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3837 	    cfg->progif);
3838 	return (0);
3839 }
3840 
3841 int
3842 pci_assign_interrupt_method(device_t dev, device_t child)
3843 {
3844 	struct pci_devinfo *dinfo = device_get_ivars(child);
3845 	pcicfgregs *cfg = &dinfo->cfg;
3846 
3847 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3848 	    cfg->intpin));
3849 }
3850 
3851 static int
3852 pci_modevent(module_t mod, int what, void *arg)
3853 {
3854 	static struct cdev *pci_cdev;
3855 
3856 	switch (what) {
3857 	case MOD_LOAD:
3858 		STAILQ_INIT(&pci_devq);
3859 		pci_generation = 0;
3860 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3861 		    "pci");
3862 		pci_load_vendor_data();
3863 		break;
3864 
3865 	case MOD_UNLOAD:
3866 		destroy_dev(pci_cdev);
3867 		break;
3868 	}
3869 
3870 	return (0);
3871 }
3872 
3873 void
3874 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3875 {
3876 	int i;
3877 
3878 	/*
3879 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3880 	 * which we know need special treatment.  Type 2 devices are
3881 	 * cardbus bridges which also require special treatment.
3882 	 * Other types are unknown, and we err on the side of safety
3883 	 * by ignoring them.
3884 	 */
3885 	if (dinfo->cfg.hdrtype != 0)
3886 		return;
3887 
3888 	/*
3889 	 * Restore the device to full power mode.  We must do this
3890 	 * before we restore the registers because moving from D3 to
3891 	 * D0 will cause the chip's BARs and some other registers to
3892 	 * be reset to some unknown power on reset values.  Cut down
3893 	 * the noise on boot by doing nothing if we are already in
3894 	 * state D0.
3895 	 */
3896 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3897 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3898 	}
3899 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3900 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3901 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3902 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3903 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3904 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3905 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3906 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3907 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3908 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3909 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3910 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3911 
3912 	/* Restore MSI and MSI-X configurations if they are present. */
3913 	if (dinfo->cfg.msi.msi_location != 0)
3914 		pci_resume_msi(dev);
3915 	if (dinfo->cfg.msix.msix_location != 0)
3916 		pci_resume_msix(dev);
3917 }
3918 
3919 void
3920 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3921 {
3922 	int i;
3923 	uint32_t cls;
3924 	int ps;
3925 
3926 	/*
3927 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3928 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3929 	 * which also require special treatment.  Other types are unknown, and
3930 	 * we err on the side of safety by ignoring them.  Powering down
3931 	 * bridges should not be undertaken lightly.
3932 	 */
3933 	if (dinfo->cfg.hdrtype != 0)
3934 		return;
3935 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3936 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3937 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3938 
3939 	/*
3940 	 * Some drivers apparently write to these registers w/o updating our
3941 	 * cached copy.  No harm happens if we update the copy, so do so here
3942 	 * so we can restore them.  The COMMAND register is modified by the
3943 	 * bus w/o updating the cache.  This should represent the normally
3944 	 * writable portion of the 'defined' part of type 0 headers.  In
3945 	 * theory we also need to save/restore the PCI capability structures
3946 	 * we know about, but apart from power we don't know any that are
3947 	 * writable.
3948 	 */
3949 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3950 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3951 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3952 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3953 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3954 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3955 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3956 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3957 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3958 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3959 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3960 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3961 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3962 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3963 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3964 
3965 	/*
3966 	 * don't set the state for display devices, base peripherals and
3967 	 * memory devices since bad things happen when they are powered down.
3968 	 * We should (a) have drivers that can easily detach and (b) use
3969 	 * generic drivers for these devices so that some device actually
3970 	 * attaches.  We need to make sure that when we implement (a) we don't
3971 	 * power the device down on a reattach.
3972 	 */
3973 	cls = pci_get_class(dev);
3974 	if (!setstate)
3975 		return;
3976 	switch (pci_do_power_nodriver)
3977 	{
3978 		case 0:		/* NO powerdown at all */
3979 			return;
3980 		case 1:		/* Conservative about what to power down */
3981 			if (cls == PCIC_STORAGE)
3982 				return;
3983 			/*FALLTHROUGH*/
3984 		case 2:		/* Agressive about what to power down */
3985 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3986 			    cls == PCIC_BASEPERIPH)
3987 				return;
3988 			/*FALLTHROUGH*/
3989 		case 3:		/* Power down everything */
3990 			break;
3991 	}
3992 	/*
3993 	 * PCI spec says we can only go into D3 state from D0 state.
3994 	 * Transition from D[12] into D0 before going to D3 state.
3995 	 */
3996 	ps = pci_get_powerstate(dev);
3997 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3998 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3999 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4000 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4001 }
4002