xref: /freebsd/sys/dev/pci/pci.c (revision 195ebc7e9e4b129de810833791a19dfb4349d6a9)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include "pcib_if.h"
66 #include "pci_if.h"
67 
68 #ifdef __HAVE_ACPI
69 #include <contrib/dev/acpica/acpi.h>
70 #include "acpi_if.h"
71 #else
72 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
73 #endif
74 
75 static pci_addr_t	pci_mapbase(uint64_t mapreg);
76 static const char	*pci_maptype(uint64_t mapreg);
77 static int		pci_mapsize(uint64_t testval);
78 static int		pci_maprange(uint64_t mapreg);
79 static void		pci_fixancient(pcicfgregs *cfg);
80 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
81 
82 static int		pci_porten(device_t dev);
83 static int		pci_memen(device_t dev);
84 static void		pci_assign_interrupt(device_t bus, device_t dev,
85 			    int force_route);
86 static int		pci_add_map(device_t bus, device_t dev, int reg,
87 			    struct resource_list *rl, int force, int prefetch);
88 static int		pci_probe(device_t dev);
89 static int		pci_attach(device_t dev);
90 static void		pci_load_vendor_data(void);
91 static int		pci_describe_parse_line(char **ptr, int *vendor,
92 			    int *device, char **desc);
93 static char		*pci_describe_device(device_t dev);
94 static int		pci_modevent(module_t mod, int what, void *arg);
95 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
96 			    pcicfgregs *cfg);
97 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
98 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
99 			    int reg, uint32_t *data);
100 #if 0
101 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
102 			    int reg, uint32_t data);
103 #endif
104 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
105 static void		pci_disable_msi(device_t dev);
106 static void		pci_enable_msi(device_t dev, uint64_t address,
107 			    uint16_t data);
108 static void		pci_enable_msix(device_t dev, u_int index,
109 			    uint64_t address, uint32_t data);
110 static void		pci_mask_msix(device_t dev, u_int index);
111 static void		pci_unmask_msix(device_t dev, u_int index);
112 static int		pci_msi_blacklisted(void);
113 static void		pci_resume_msi(device_t dev);
114 static void		pci_resume_msix(device_t dev);
115 
116 static device_method_t pci_methods[] = {
117 	/* Device interface */
118 	DEVMETHOD(device_probe,		pci_probe),
119 	DEVMETHOD(device_attach,	pci_attach),
120 	DEVMETHOD(device_detach,	bus_generic_detach),
121 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
122 	DEVMETHOD(device_suspend,	pci_suspend),
123 	DEVMETHOD(device_resume,	pci_resume),
124 
125 	/* Bus interface */
126 	DEVMETHOD(bus_print_child,	pci_print_child),
127 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
128 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
129 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
130 	DEVMETHOD(bus_driver_added,	pci_driver_added),
131 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
132 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
133 
134 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
135 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
136 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
137 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
138 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
139 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
140 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
141 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
142 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
143 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
144 
145 	/* PCI interface */
146 	DEVMETHOD(pci_read_config,	pci_read_config_method),
147 	DEVMETHOD(pci_write_config,	pci_write_config_method),
148 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
149 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
150 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
151 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
152 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
153 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
154 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
155 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
156 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
157 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
158 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
159 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
160 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
161 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
162 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
163 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
164 
165 	{ 0, 0 }
166 };
167 
168 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
169 
170 static devclass_t pci_devclass;
171 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
172 MODULE_VERSION(pci, 1);
173 
174 static char	*pci_vendordata;
175 static size_t	pci_vendordata_size;
176 
177 
178 struct pci_quirk {
179 	uint32_t devid;	/* Vendor/device of the card */
180 	int	type;
181 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
182 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
183 	int	arg1;
184 	int	arg2;
185 };
186 
187 struct pci_quirk pci_quirks[] = {
188 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
189 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
191 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
192 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193 
194 	/*
195 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
196 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
197 	 */
198 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200 
201 	/*
202 	 * MSI doesn't work on earlier Intel chipsets including
203 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
204 	 */
205 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 
213 	/*
214 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
215 	 * bridge.
216 	 */
217 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 
219 	{ 0 }
220 };
221 
222 /* map register information */
223 #define	PCI_MAPMEM	0x01	/* memory map */
224 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
225 #define	PCI_MAPPORT	0x04	/* port map */
226 
227 struct devlist pci_devq;
228 uint32_t pci_generation;
229 uint32_t pci_numdevs = 0;
230 static int pcie_chipset, pcix_chipset;
231 
232 /* sysctl vars */
233 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
234 
235 static int pci_enable_io_modes = 1;
236 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
237 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
238     &pci_enable_io_modes, 1,
239     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
240 enable these bits correctly.  We'd like to do this all the time, but there\n\
241 are some peripherals that this causes problems with.");
242 
243 static int pci_do_power_nodriver = 0;
244 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
245 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
246     &pci_do_power_nodriver, 0,
247   "Place a function into D3 state when no driver attaches to it.  0 means\n\
248 disable.  1 means conservatively place devices into D3 state.  2 means\n\
249 agressively place devices into D3 state.  3 means put absolutely everything\n\
250 in D3 state.");
251 
252 static int pci_do_power_resume = 1;
253 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
254 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
255     &pci_do_power_resume, 1,
256   "Transition from D3 -> D0 on resume.");
257 
258 static int pci_do_msi = 1;
259 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
260 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
261     "Enable support for MSI interrupts");
262 
263 static int pci_do_msix = 1;
264 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
265 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
266     "Enable support for MSI-X interrupts");
267 
268 static int pci_honor_msi_blacklist = 1;
269 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
270 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
271     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
272 
273 /* Find a device_t by bus/slot/function in domain 0 */
274 
275 device_t
276 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
277 {
278 
279 	return (pci_find_dbsf(0, bus, slot, func));
280 }
281 
282 /* Find a device_t by domain/bus/slot/function */
283 
284 device_t
285 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
286 {
287 	struct pci_devinfo *dinfo;
288 
289 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
290 		if ((dinfo->cfg.domain == domain) &&
291 		    (dinfo->cfg.bus == bus) &&
292 		    (dinfo->cfg.slot == slot) &&
293 		    (dinfo->cfg.func == func)) {
294 			return (dinfo->cfg.dev);
295 		}
296 	}
297 
298 	return (NULL);
299 }
300 
301 /* Find a device_t by vendor/device ID */
302 
303 device_t
304 pci_find_device(uint16_t vendor, uint16_t device)
305 {
306 	struct pci_devinfo *dinfo;
307 
308 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
309 		if ((dinfo->cfg.vendor == vendor) &&
310 		    (dinfo->cfg.device == device)) {
311 			return (dinfo->cfg.dev);
312 		}
313 	}
314 
315 	return (NULL);
316 }
317 
318 static int
319 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
320 {
321 	va_list ap;
322 	int retval;
323 
324 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
325 	    cfg->func);
326 	va_start(ap, fmt);
327 	retval += vprintf(fmt, ap);
328 	va_end(ap);
329 	return (retval);
330 }
331 
332 /* return base address of memory or port map */
333 
334 static pci_addr_t
335 pci_mapbase(uint64_t mapreg)
336 {
337 
338 	if (PCI_BAR_MEM(mapreg))
339 		return (mapreg & PCIM_BAR_MEM_BASE);
340 	else
341 		return (mapreg & PCIM_BAR_IO_BASE);
342 }
343 
344 /* return map type of memory or port map */
345 
346 static const char *
347 pci_maptype(uint64_t mapreg)
348 {
349 
350 	if (PCI_BAR_IO(mapreg))
351 		return ("I/O Port");
352 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
353 		return ("Prefetchable Memory");
354 	return ("Memory");
355 }
356 
357 /* return log2 of map size decoded for memory or port map */
358 
359 static int
360 pci_mapsize(uint64_t testval)
361 {
362 	int ln2size;
363 
364 	testval = pci_mapbase(testval);
365 	ln2size = 0;
366 	if (testval != 0) {
367 		while ((testval & 1) == 0)
368 		{
369 			ln2size++;
370 			testval >>= 1;
371 		}
372 	}
373 	return (ln2size);
374 }
375 
376 /* return log2 of address range supported by map register */
377 
378 static int
379 pci_maprange(uint64_t mapreg)
380 {
381 	int ln2range = 0;
382 
383 	if (PCI_BAR_IO(mapreg))
384 		ln2range = 32;
385 	else
386 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
387 		case PCIM_BAR_MEM_32:
388 			ln2range = 32;
389 			break;
390 		case PCIM_BAR_MEM_1MB:
391 			ln2range = 20;
392 			break;
393 		case PCIM_BAR_MEM_64:
394 			ln2range = 64;
395 			break;
396 		}
397 	return (ln2range);
398 }
399 
400 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
401 
402 static void
403 pci_fixancient(pcicfgregs *cfg)
404 {
405 	if (cfg->hdrtype != 0)
406 		return;
407 
408 	/* PCI to PCI bridges use header type 1 */
409 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
410 		cfg->hdrtype = 1;
411 }
412 
413 /* extract header type specific config data */
414 
415 static void
416 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
417 {
418 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
419 	switch (cfg->hdrtype) {
420 	case 0:
421 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
422 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
423 		cfg->nummaps	    = PCI_MAXMAPS_0;
424 		break;
425 	case 1:
426 		cfg->nummaps	    = PCI_MAXMAPS_1;
427 		break;
428 	case 2:
429 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
430 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
431 		cfg->nummaps	    = PCI_MAXMAPS_2;
432 		break;
433 	}
434 #undef REG
435 }
436 
437 /* read configuration header into pcicfgregs structure */
438 struct pci_devinfo *
439 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
440 {
441 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
442 	pcicfgregs *cfg = NULL;
443 	struct pci_devinfo *devlist_entry;
444 	struct devlist *devlist_head;
445 
446 	devlist_head = &pci_devq;
447 
448 	devlist_entry = NULL;
449 
450 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
451 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
452 		if (devlist_entry == NULL)
453 			return (NULL);
454 
455 		cfg = &devlist_entry->cfg;
456 
457 		cfg->domain		= d;
458 		cfg->bus		= b;
459 		cfg->slot		= s;
460 		cfg->func		= f;
461 		cfg->vendor		= REG(PCIR_VENDOR, 2);
462 		cfg->device		= REG(PCIR_DEVICE, 2);
463 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
464 		cfg->statreg		= REG(PCIR_STATUS, 2);
465 		cfg->baseclass		= REG(PCIR_CLASS, 1);
466 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
467 		cfg->progif		= REG(PCIR_PROGIF, 1);
468 		cfg->revid		= REG(PCIR_REVID, 1);
469 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
470 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
471 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
472 		cfg->intpin		= REG(PCIR_INTPIN, 1);
473 		cfg->intline		= REG(PCIR_INTLINE, 1);
474 
475 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
476 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
477 
478 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
479 		cfg->hdrtype		&= ~PCIM_MFDEV;
480 
481 		pci_fixancient(cfg);
482 		pci_hdrtypedata(pcib, b, s, f, cfg);
483 
484 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
485 			pci_read_extcap(pcib, cfg);
486 
487 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
488 
489 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
490 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
491 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
492 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
493 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
494 
495 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
496 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
497 		devlist_entry->conf.pc_vendor = cfg->vendor;
498 		devlist_entry->conf.pc_device = cfg->device;
499 
500 		devlist_entry->conf.pc_class = cfg->baseclass;
501 		devlist_entry->conf.pc_subclass = cfg->subclass;
502 		devlist_entry->conf.pc_progif = cfg->progif;
503 		devlist_entry->conf.pc_revid = cfg->revid;
504 
505 		pci_numdevs++;
506 		pci_generation++;
507 	}
508 	return (devlist_entry);
509 #undef REG
510 }
511 
512 static void
513 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
514 {
515 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
516 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
517 #if defined(__i386__) || defined(__amd64__)
518 	uint64_t addr;
519 #endif
520 	uint32_t val;
521 	int	ptr, nextptr, ptrptr;
522 
523 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
524 	case 0:
525 	case 1:
526 		ptrptr = PCIR_CAP_PTR;
527 		break;
528 	case 2:
529 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
530 		break;
531 	default:
532 		return;		/* no extended capabilities support */
533 	}
534 	nextptr = REG(ptrptr, 1);	/* sanity check? */
535 
536 	/*
537 	 * Read capability entries.
538 	 */
539 	while (nextptr != 0) {
540 		/* Sanity check */
541 		if (nextptr > 255) {
542 			printf("illegal PCI extended capability offset %d\n",
543 			    nextptr);
544 			return;
545 		}
546 		/* Find the next entry */
547 		ptr = nextptr;
548 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
549 
550 		/* Process this entry */
551 		switch (REG(ptr + PCICAP_ID, 1)) {
552 		case PCIY_PMG:		/* PCI power management */
553 			if (cfg->pp.pp_cap == 0) {
554 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
555 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
556 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
557 				if ((nextptr - ptr) > PCIR_POWER_DATA)
558 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
559 			}
560 			break;
561 #if defined(__i386__) || defined(__amd64__)
562 		case PCIY_HT:		/* HyperTransport */
563 			/* Determine HT-specific capability type. */
564 			val = REG(ptr + PCIR_HT_COMMAND, 2);
565 			switch (val & PCIM_HTCMD_CAP_MASK) {
566 			case PCIM_HTCAP_MSI_MAPPING:
567 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
568 					/* Sanity check the mapping window. */
569 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
570 					    4);
571 					addr <<= 32;
572 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
573 					    4);
574 					if (addr != MSI_INTEL_ADDR_BASE)
575 						device_printf(pcib,
576 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
577 						    cfg->domain, cfg->bus,
578 						    cfg->slot, cfg->func,
579 						    (long long)addr);
580 				} else
581 					addr = MSI_INTEL_ADDR_BASE;
582 
583 				cfg->ht.ht_msimap = ptr;
584 				cfg->ht.ht_msictrl = val;
585 				cfg->ht.ht_msiaddr = addr;
586 				break;
587 			}
588 			break;
589 #endif
590 		case PCIY_MSI:		/* PCI MSI */
591 			cfg->msi.msi_location = ptr;
592 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
593 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
594 						     PCIM_MSICTRL_MMC_MASK)>>1);
595 			break;
596 		case PCIY_MSIX:		/* PCI MSI-X */
597 			cfg->msix.msix_location = ptr;
598 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
599 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
600 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
601 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
602 			cfg->msix.msix_table_bar = PCIR_BAR(val &
603 			    PCIM_MSIX_BIR_MASK);
604 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
605 			val = REG(ptr + PCIR_MSIX_PBA, 4);
606 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
607 			    PCIM_MSIX_BIR_MASK);
608 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
609 			break;
610 		case PCIY_VPD:		/* PCI Vital Product Data */
611 			cfg->vpd.vpd_reg = ptr;
612 			break;
613 		case PCIY_SUBVENDOR:
614 			/* Should always be true. */
615 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
616 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
617 				cfg->subvendor = val & 0xffff;
618 				cfg->subdevice = val >> 16;
619 			}
620 			break;
621 		case PCIY_PCIX:		/* PCI-X */
622 			/*
623 			 * Assume we have a PCI-X chipset if we have
624 			 * at least one PCI-PCI bridge with a PCI-X
625 			 * capability.  Note that some systems with
626 			 * PCI-express or HT chipsets might match on
627 			 * this check as well.
628 			 */
629 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
630 				pcix_chipset = 1;
631 			break;
632 		case PCIY_EXPRESS:	/* PCI-express */
633 			/*
634 			 * Assume we have a PCI-express chipset if we have
635 			 * at least one PCI-express device.
636 			 */
637 			pcie_chipset = 1;
638 			break;
639 		default:
640 			break;
641 		}
642 	}
643 /* REG and WREG use carry through to next functions */
644 }
645 
646 /*
647  * PCI Vital Product Data
648  */
649 
650 #define	PCI_VPD_TIMEOUT		1000000
651 
652 static int
653 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
654 {
655 	int count = PCI_VPD_TIMEOUT;
656 
657 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
658 
659 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
660 
661 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
662 		if (--count < 0)
663 			return (ENXIO);
664 		DELAY(1);	/* limit looping */
665 	}
666 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
667 
668 	return (0);
669 }
670 
671 #if 0
672 static int
673 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
674 {
675 	int count = PCI_VPD_TIMEOUT;
676 
677 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
678 
679 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
680 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
681 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
682 		if (--count < 0)
683 			return (ENXIO);
684 		DELAY(1);	/* limit looping */
685 	}
686 
687 	return (0);
688 }
689 #endif
690 
691 #undef PCI_VPD_TIMEOUT
692 
693 struct vpd_readstate {
694 	device_t	pcib;
695 	pcicfgregs	*cfg;
696 	uint32_t	val;
697 	int		bytesinval;
698 	int		off;
699 	uint8_t		cksum;
700 };
701 
702 static int
703 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
704 {
705 	uint32_t reg;
706 	uint8_t byte;
707 
708 	if (vrs->bytesinval == 0) {
709 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
710 			return (ENXIO);
711 		vrs->val = le32toh(reg);
712 		vrs->off += 4;
713 		byte = vrs->val & 0xff;
714 		vrs->bytesinval = 3;
715 	} else {
716 		vrs->val = vrs->val >> 8;
717 		byte = vrs->val & 0xff;
718 		vrs->bytesinval--;
719 	}
720 
721 	vrs->cksum += byte;
722 	*data = byte;
723 	return (0);
724 }
725 
726 static void
727 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
728 {
729 	struct vpd_readstate vrs;
730 	int state;
731 	int name;
732 	int remain;
733 	int i;
734 	int alloc, off;		/* alloc/off for RO/W arrays */
735 	int cksumvalid;
736 	int dflen;
737 	uint8_t byte;
738 	uint8_t byte2;
739 
740 	/* init vpd reader */
741 	vrs.bytesinval = 0;
742 	vrs.off = 0;
743 	vrs.pcib = pcib;
744 	vrs.cfg = cfg;
745 	vrs.cksum = 0;
746 
747 	state = 0;
748 	name = remain = i = 0;	/* shut up stupid gcc */
749 	alloc = off = 0;	/* shut up stupid gcc */
750 	dflen = 0;		/* shut up stupid gcc */
751 	cksumvalid = -1;
752 	while (state >= 0) {
753 		if (vpd_nextbyte(&vrs, &byte)) {
754 			state = -2;
755 			break;
756 		}
757 #if 0
758 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
759 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
760 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
761 #endif
762 		switch (state) {
763 		case 0:		/* item name */
764 			if (byte & 0x80) {
765 				if (vpd_nextbyte(&vrs, &byte2)) {
766 					state = -2;
767 					break;
768 				}
769 				remain = byte2;
770 				if (vpd_nextbyte(&vrs, &byte2)) {
771 					state = -2;
772 					break;
773 				}
774 				remain |= byte2 << 8;
775 				if (remain > (0x7f*4 - vrs.off)) {
776 					state = -1;
777 					printf(
778 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
779 					    cfg->domain, cfg->bus, cfg->slot,
780 					    cfg->func, remain);
781 				}
782 				name = byte & 0x7f;
783 			} else {
784 				remain = byte & 0x7;
785 				name = (byte >> 3) & 0xf;
786 			}
787 			switch (name) {
788 			case 0x2:	/* String */
789 				cfg->vpd.vpd_ident = malloc(remain + 1,
790 				    M_DEVBUF, M_WAITOK);
791 				i = 0;
792 				state = 1;
793 				break;
794 			case 0xf:	/* End */
795 				state = -1;
796 				break;
797 			case 0x10:	/* VPD-R */
798 				alloc = 8;
799 				off = 0;
800 				cfg->vpd.vpd_ros = malloc(alloc *
801 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
802 				    M_WAITOK | M_ZERO);
803 				state = 2;
804 				break;
805 			case 0x11:	/* VPD-W */
806 				alloc = 8;
807 				off = 0;
808 				cfg->vpd.vpd_w = malloc(alloc *
809 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
810 				    M_WAITOK | M_ZERO);
811 				state = 5;
812 				break;
813 			default:	/* Invalid data, abort */
814 				state = -1;
815 				break;
816 			}
817 			break;
818 
819 		case 1:	/* Identifier String */
820 			cfg->vpd.vpd_ident[i++] = byte;
821 			remain--;
822 			if (remain == 0)  {
823 				cfg->vpd.vpd_ident[i] = '\0';
824 				state = 0;
825 			}
826 			break;
827 
828 		case 2:	/* VPD-R Keyword Header */
829 			if (off == alloc) {
830 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
831 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
832 				    M_DEVBUF, M_WAITOK | M_ZERO);
833 			}
834 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
835 			if (vpd_nextbyte(&vrs, &byte2)) {
836 				state = -2;
837 				break;
838 			}
839 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
840 			if (vpd_nextbyte(&vrs, &byte2)) {
841 				state = -2;
842 				break;
843 			}
844 			dflen = byte2;
845 			if (dflen == 0 &&
846 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
847 			    2) == 0) {
848 				/*
849 				 * if this happens, we can't trust the rest
850 				 * of the VPD.
851 				 */
852 				printf(
853 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
854 				    cfg->domain, cfg->bus, cfg->slot,
855 				    cfg->func, dflen);
856 				cksumvalid = 0;
857 				state = -1;
858 				break;
859 			} else if (dflen == 0) {
860 				cfg->vpd.vpd_ros[off].value = malloc(1 *
861 				    sizeof(*cfg->vpd.vpd_ros[off].value),
862 				    M_DEVBUF, M_WAITOK);
863 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
864 			} else
865 				cfg->vpd.vpd_ros[off].value = malloc(
866 				    (dflen + 1) *
867 				    sizeof(*cfg->vpd.vpd_ros[off].value),
868 				    M_DEVBUF, M_WAITOK);
869 			remain -= 3;
870 			i = 0;
871 			/* keep in sync w/ state 3's transistions */
872 			if (dflen == 0 && remain == 0)
873 				state = 0;
874 			else if (dflen == 0)
875 				state = 2;
876 			else
877 				state = 3;
878 			break;
879 
880 		case 3:	/* VPD-R Keyword Value */
881 			cfg->vpd.vpd_ros[off].value[i++] = byte;
882 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
883 			    "RV", 2) == 0 && cksumvalid == -1) {
884 				if (vrs.cksum == 0)
885 					cksumvalid = 1;
886 				else {
887 					if (bootverbose)
888 						printf(
889 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
890 						    cfg->domain, cfg->bus,
891 						    cfg->slot, cfg->func,
892 						    vrs.cksum);
893 					cksumvalid = 0;
894 					state = -1;
895 					break;
896 				}
897 			}
898 			dflen--;
899 			remain--;
900 			/* keep in sync w/ state 2's transistions */
901 			if (dflen == 0)
902 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
903 			if (dflen == 0 && remain == 0) {
904 				cfg->vpd.vpd_rocnt = off;
905 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
906 				    off * sizeof(*cfg->vpd.vpd_ros),
907 				    M_DEVBUF, M_WAITOK | M_ZERO);
908 				state = 0;
909 			} else if (dflen == 0)
910 				state = 2;
911 			break;
912 
913 		case 4:
914 			remain--;
915 			if (remain == 0)
916 				state = 0;
917 			break;
918 
919 		case 5:	/* VPD-W Keyword Header */
920 			if (off == alloc) {
921 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
922 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
923 				    M_DEVBUF, M_WAITOK | M_ZERO);
924 			}
925 			cfg->vpd.vpd_w[off].keyword[0] = byte;
926 			if (vpd_nextbyte(&vrs, &byte2)) {
927 				state = -2;
928 				break;
929 			}
930 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
931 			if (vpd_nextbyte(&vrs, &byte2)) {
932 				state = -2;
933 				break;
934 			}
935 			cfg->vpd.vpd_w[off].len = dflen = byte2;
936 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
937 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
938 			    sizeof(*cfg->vpd.vpd_w[off].value),
939 			    M_DEVBUF, M_WAITOK);
940 			remain -= 3;
941 			i = 0;
942 			/* keep in sync w/ state 6's transistions */
943 			if (dflen == 0 && remain == 0)
944 				state = 0;
945 			else if (dflen == 0)
946 				state = 5;
947 			else
948 				state = 6;
949 			break;
950 
951 		case 6:	/* VPD-W Keyword Value */
952 			cfg->vpd.vpd_w[off].value[i++] = byte;
953 			dflen--;
954 			remain--;
955 			/* keep in sync w/ state 5's transistions */
956 			if (dflen == 0)
957 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
958 			if (dflen == 0 && remain == 0) {
959 				cfg->vpd.vpd_wcnt = off;
960 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
961 				    off * sizeof(*cfg->vpd.vpd_w),
962 				    M_DEVBUF, M_WAITOK | M_ZERO);
963 				state = 0;
964 			} else if (dflen == 0)
965 				state = 5;
966 			break;
967 
968 		default:
969 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
970 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
971 			    state);
972 			state = -1;
973 			break;
974 		}
975 	}
976 
977 	if (cksumvalid == 0 || state < -1) {
978 		/* read-only data bad, clean up */
979 		if (cfg->vpd.vpd_ros != NULL) {
980 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
981 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
982 			free(cfg->vpd.vpd_ros, M_DEVBUF);
983 			cfg->vpd.vpd_ros = NULL;
984 		}
985 	}
986 	if (state < -1) {
987 		/* I/O error, clean up */
988 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
989 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
990 		if (cfg->vpd.vpd_ident != NULL) {
991 			free(cfg->vpd.vpd_ident, M_DEVBUF);
992 			cfg->vpd.vpd_ident = NULL;
993 		}
994 		if (cfg->vpd.vpd_w != NULL) {
995 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
996 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
997 			free(cfg->vpd.vpd_w, M_DEVBUF);
998 			cfg->vpd.vpd_w = NULL;
999 		}
1000 	}
1001 	cfg->vpd.vpd_cached = 1;
1002 #undef REG
1003 #undef WREG
1004 }
1005 
1006 int
1007 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1008 {
1009 	struct pci_devinfo *dinfo = device_get_ivars(child);
1010 	pcicfgregs *cfg = &dinfo->cfg;
1011 
1012 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1013 		pci_read_vpd(device_get_parent(dev), cfg);
1014 
1015 	*identptr = cfg->vpd.vpd_ident;
1016 
1017 	if (*identptr == NULL)
1018 		return (ENXIO);
1019 
1020 	return (0);
1021 }
1022 
1023 int
1024 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1025 	const char **vptr)
1026 {
1027 	struct pci_devinfo *dinfo = device_get_ivars(child);
1028 	pcicfgregs *cfg = &dinfo->cfg;
1029 	int i;
1030 
1031 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1032 		pci_read_vpd(device_get_parent(dev), cfg);
1033 
1034 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1035 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1036 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1037 			*vptr = cfg->vpd.vpd_ros[i].value;
1038 		}
1039 
1040 	if (i != cfg->vpd.vpd_rocnt)
1041 		return (0);
1042 
1043 	*vptr = NULL;
1044 	return (ENXIO);
1045 }
1046 
1047 /*
1048  * Find the requested extended capability and return the offset in
1049  * configuration space via the pointer provided. The function returns
1050  * 0 on success and error code otherwise.
1051  */
1052 int
1053 pci_find_extcap_method(device_t dev, device_t child, int capability,
1054     int *capreg)
1055 {
1056 	struct pci_devinfo *dinfo = device_get_ivars(child);
1057 	pcicfgregs *cfg = &dinfo->cfg;
1058 	u_int32_t status;
1059 	u_int8_t ptr;
1060 
1061 	/*
1062 	 * Check the CAP_LIST bit of the PCI status register first.
1063 	 */
1064 	status = pci_read_config(child, PCIR_STATUS, 2);
1065 	if (!(status & PCIM_STATUS_CAPPRESENT))
1066 		return (ENXIO);
1067 
1068 	/*
1069 	 * Determine the start pointer of the capabilities list.
1070 	 */
1071 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1072 	case 0:
1073 	case 1:
1074 		ptr = PCIR_CAP_PTR;
1075 		break;
1076 	case 2:
1077 		ptr = PCIR_CAP_PTR_2;
1078 		break;
1079 	default:
1080 		/* XXX: panic? */
1081 		return (ENXIO);		/* no extended capabilities support */
1082 	}
1083 	ptr = pci_read_config(child, ptr, 1);
1084 
1085 	/*
1086 	 * Traverse the capabilities list.
1087 	 */
1088 	while (ptr != 0) {
1089 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1090 			if (capreg != NULL)
1091 				*capreg = ptr;
1092 			return (0);
1093 		}
1094 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1095 	}
1096 
1097 	return (ENOENT);
1098 }
1099 
1100 /*
1101  * Support for MSI-X message interrupts.
1102  */
1103 void
1104 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1105 {
1106 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1107 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1108 	uint32_t offset;
1109 
1110 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1111 	offset = msix->msix_table_offset + index * 16;
1112 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1113 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1114 	bus_write_4(msix->msix_table_res, offset + 8, data);
1115 
1116 	/* Enable MSI -> HT mapping. */
1117 	pci_ht_map_msi(dev, address);
1118 }
1119 
1120 void
1121 pci_mask_msix(device_t dev, u_int index)
1122 {
1123 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1124 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1125 	uint32_t offset, val;
1126 
1127 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1128 	offset = msix->msix_table_offset + index * 16 + 12;
1129 	val = bus_read_4(msix->msix_table_res, offset);
1130 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1131 		val |= PCIM_MSIX_VCTRL_MASK;
1132 		bus_write_4(msix->msix_table_res, offset, val);
1133 	}
1134 }
1135 
1136 void
1137 pci_unmask_msix(device_t dev, u_int index)
1138 {
1139 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1140 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1141 	uint32_t offset, val;
1142 
1143 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1144 	offset = msix->msix_table_offset + index * 16 + 12;
1145 	val = bus_read_4(msix->msix_table_res, offset);
1146 	if (val & PCIM_MSIX_VCTRL_MASK) {
1147 		val &= ~PCIM_MSIX_VCTRL_MASK;
1148 		bus_write_4(msix->msix_table_res, offset, val);
1149 	}
1150 }
1151 
1152 int
1153 pci_pending_msix(device_t dev, u_int index)
1154 {
1155 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1156 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1157 	uint32_t offset, bit;
1158 
1159 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1160 	offset = msix->msix_pba_offset + (index / 32) * 4;
1161 	bit = 1 << index % 32;
1162 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1163 }
1164 
1165 /*
1166  * Restore MSI-X registers and table during resume.  If MSI-X is
1167  * enabled then walk the virtual table to restore the actual MSI-X
1168  * table.
1169  */
1170 static void
1171 pci_resume_msix(device_t dev)
1172 {
1173 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1174 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1175 	struct msix_table_entry *mte;
1176 	struct msix_vector *mv;
1177 	int i;
1178 
1179 	if (msix->msix_alloc > 0) {
1180 		/* First, mask all vectors. */
1181 		for (i = 0; i < msix->msix_msgnum; i++)
1182 			pci_mask_msix(dev, i);
1183 
1184 		/* Second, program any messages with at least one handler. */
1185 		for (i = 0; i < msix->msix_table_len; i++) {
1186 			mte = &msix->msix_table[i];
1187 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1188 				continue;
1189 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1190 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1191 			pci_unmask_msix(dev, i);
1192 		}
1193 	}
1194 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1195 	    msix->msix_ctrl, 2);
1196 }
1197 
1198 /*
1199  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1200  * returned in *count.  After this function returns, each message will be
1201  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1202  */
1203 int
1204 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1205 {
1206 	struct pci_devinfo *dinfo = device_get_ivars(child);
1207 	pcicfgregs *cfg = &dinfo->cfg;
1208 	struct resource_list_entry *rle;
1209 	int actual, error, i, irq, max;
1210 
1211 	/* Don't let count == 0 get us into trouble. */
1212 	if (*count == 0)
1213 		return (EINVAL);
1214 
1215 	/* If rid 0 is allocated, then fail. */
1216 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1217 	if (rle != NULL && rle->res != NULL)
1218 		return (ENXIO);
1219 
1220 	/* Already have allocated messages? */
1221 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1222 		return (ENXIO);
1223 
1224 	/* If MSI is blacklisted for this system, fail. */
1225 	if (pci_msi_blacklisted())
1226 		return (ENXIO);
1227 
1228 	/* MSI-X capability present? */
1229 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1230 		return (ENODEV);
1231 
1232 	/* Make sure the appropriate BARs are mapped. */
1233 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1234 	    cfg->msix.msix_table_bar);
1235 	if (rle == NULL || rle->res == NULL ||
1236 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1237 		return (ENXIO);
1238 	cfg->msix.msix_table_res = rle->res;
1239 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1240 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1241 		    cfg->msix.msix_pba_bar);
1242 		if (rle == NULL || rle->res == NULL ||
1243 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1244 			return (ENXIO);
1245 	}
1246 	cfg->msix.msix_pba_res = rle->res;
1247 
1248 	if (bootverbose)
1249 		device_printf(child,
1250 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1251 		    *count, cfg->msix.msix_msgnum);
1252 	max = min(*count, cfg->msix.msix_msgnum);
1253 	for (i = 0; i < max; i++) {
1254 		/* Allocate a message. */
1255 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1256 		if (error)
1257 			break;
1258 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1259 		    irq, 1);
1260 	}
1261 	actual = i;
1262 
1263 	if (bootverbose) {
1264 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1265 		if (actual == 1)
1266 			device_printf(child, "using IRQ %lu for MSI-X\n",
1267 			    rle->start);
1268 		else {
1269 			int run;
1270 
1271 			/*
1272 			 * Be fancy and try to print contiguous runs of
1273 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1274 			 * 'run' is true if we are in a range.
1275 			 */
1276 			device_printf(child, "using IRQs %lu", rle->start);
1277 			irq = rle->start;
1278 			run = 0;
1279 			for (i = 1; i < actual; i++) {
1280 				rle = resource_list_find(&dinfo->resources,
1281 				    SYS_RES_IRQ, i + 1);
1282 
1283 				/* Still in a run? */
1284 				if (rle->start == irq + 1) {
1285 					run = 1;
1286 					irq++;
1287 					continue;
1288 				}
1289 
1290 				/* Finish previous range. */
1291 				if (run) {
1292 					printf("-%d", irq);
1293 					run = 0;
1294 				}
1295 
1296 				/* Start new range. */
1297 				printf(",%lu", rle->start);
1298 				irq = rle->start;
1299 			}
1300 
1301 			/* Unfinished range? */
1302 			if (run)
1303 				printf("-%d", irq);
1304 			printf(" for MSI-X\n");
1305 		}
1306 	}
1307 
1308 	/* Mask all vectors. */
1309 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1310 		pci_mask_msix(child, i);
1311 
1312 	/* Allocate and initialize vector data and virtual table. */
1313 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1314 	    M_DEVBUF, M_WAITOK | M_ZERO);
1315 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1316 	    M_DEVBUF, M_WAITOK | M_ZERO);
1317 	for (i = 0; i < actual; i++) {
1318 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1319 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1320 		cfg->msix.msix_table[i].mte_vector = i + 1;
1321 	}
1322 
1323 	/* Update control register to enable MSI-X. */
1324 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1325 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1326 	    cfg->msix.msix_ctrl, 2);
1327 
1328 	/* Update counts of alloc'd messages. */
1329 	cfg->msix.msix_alloc = actual;
1330 	cfg->msix.msix_table_len = actual;
1331 	*count = actual;
1332 	return (0);
1333 }
1334 
1335 /*
1336  * By default, pci_alloc_msix() will assign the allocated IRQ
1337  * resources consecutively to the first N messages in the MSI-X table.
1338  * However, device drivers may want to use different layouts if they
1339  * either receive fewer messages than they asked for, or they wish to
1340  * populate the MSI-X table sparsely.  This method allows the driver
1341  * to specify what layout it wants.  It must be called after a
1342  * successful pci_alloc_msix() but before any of the associated
1343  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1344  *
1345  * The 'vectors' array contains 'count' message vectors.  The array
1346  * maps directly to the MSI-X table in that index 0 in the array
1347  * specifies the vector for the first message in the MSI-X table, etc.
1348  * The vector value in each array index can either be 0 to indicate
1349  * that no vector should be assigned to a message slot, or it can be a
1350  * number from 1 to N (where N is the count returned from a
1351  * succcessful call to pci_alloc_msix()) to indicate which message
1352  * vector (IRQ) to be used for the corresponding message.
1353  *
1354  * On successful return, each message with a non-zero vector will have
1355  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1356  * 1.  Additionally, if any of the IRQs allocated via the previous
1357  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1358  * will be freed back to the system automatically.
1359  *
1360  * For example, suppose a driver has a MSI-X table with 6 messages and
1361  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1362  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1363  * C.  After the call to pci_alloc_msix(), the device will be setup to
1364  * have an MSI-X table of ABC--- (where - means no vector assigned).
1365  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1366  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1367  * be freed back to the system.  This device will also have valid
1368  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1369  *
1370  * In any case, the SYS_RES_IRQ rid X will always map to the message
1371  * at MSI-X table index X - 1 and will only be valid if a vector is
1372  * assigned to that table entry.
1373  */
1374 int
1375 pci_remap_msix_method(device_t dev, device_t child, int count,
1376     const u_int *vectors)
1377 {
1378 	struct pci_devinfo *dinfo = device_get_ivars(child);
1379 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1380 	struct resource_list_entry *rle;
1381 	int i, irq, j, *used;
1382 
1383 	/*
1384 	 * Have to have at least one message in the table but the
1385 	 * table can't be bigger than the actual MSI-X table in the
1386 	 * device.
1387 	 */
1388 	if (count == 0 || count > msix->msix_msgnum)
1389 		return (EINVAL);
1390 
1391 	/* Sanity check the vectors. */
1392 	for (i = 0; i < count; i++)
1393 		if (vectors[i] > msix->msix_alloc)
1394 			return (EINVAL);
1395 
1396 	/*
1397 	 * Make sure there aren't any holes in the vectors to be used.
1398 	 * It's a big pain to support it, and it doesn't really make
1399 	 * sense anyway.  Also, at least one vector must be used.
1400 	 */
1401 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1402 	    M_ZERO);
1403 	for (i = 0; i < count; i++)
1404 		if (vectors[i] != 0)
1405 			used[vectors[i] - 1] = 1;
1406 	for (i = 0; i < msix->msix_alloc - 1; i++)
1407 		if (used[i] == 0 && used[i + 1] == 1) {
1408 			free(used, M_DEVBUF);
1409 			return (EINVAL);
1410 		}
1411 	if (used[0] != 1) {
1412 		free(used, M_DEVBUF);
1413 		return (EINVAL);
1414 	}
1415 
1416 	/* Make sure none of the resources are allocated. */
1417 	for (i = 0; i < msix->msix_table_len; i++) {
1418 		if (msix->msix_table[i].mte_vector == 0)
1419 			continue;
1420 		if (msix->msix_table[i].mte_handlers > 0)
1421 			return (EBUSY);
1422 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1423 		KASSERT(rle != NULL, ("missing resource"));
1424 		if (rle->res != NULL)
1425 			return (EBUSY);
1426 	}
1427 
1428 	/* Free the existing resource list entries. */
1429 	for (i = 0; i < msix->msix_table_len; i++) {
1430 		if (msix->msix_table[i].mte_vector == 0)
1431 			continue;
1432 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1433 	}
1434 
1435 	/*
1436 	 * Build the new virtual table keeping track of which vectors are
1437 	 * used.
1438 	 */
1439 	free(msix->msix_table, M_DEVBUF);
1440 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1441 	    M_DEVBUF, M_WAITOK | M_ZERO);
1442 	for (i = 0; i < count; i++)
1443 		msix->msix_table[i].mte_vector = vectors[i];
1444 	msix->msix_table_len = count;
1445 
1446 	/* Free any unused IRQs and resize the vectors array if necessary. */
1447 	j = msix->msix_alloc - 1;
1448 	if (used[j] == 0) {
1449 		struct msix_vector *vec;
1450 
1451 		while (used[j] == 0) {
1452 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1453 			    msix->msix_vectors[j].mv_irq);
1454 			j--;
1455 		}
1456 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1457 		    M_WAITOK);
1458 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1459 		    (j + 1));
1460 		free(msix->msix_vectors, M_DEVBUF);
1461 		msix->msix_vectors = vec;
1462 		msix->msix_alloc = j + 1;
1463 	}
1464 	free(used, M_DEVBUF);
1465 
1466 	/* Map the IRQs onto the rids. */
1467 	for (i = 0; i < count; i++) {
1468 		if (vectors[i] == 0)
1469 			continue;
1470 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1471 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1472 		    irq, 1);
1473 	}
1474 
1475 	if (bootverbose) {
1476 		device_printf(child, "Remapped MSI-X IRQs as: ");
1477 		for (i = 0; i < count; i++) {
1478 			if (i != 0)
1479 				printf(", ");
1480 			if (vectors[i] == 0)
1481 				printf("---");
1482 			else
1483 				printf("%d",
1484 				    msix->msix_vectors[vectors[i]].mv_irq);
1485 		}
1486 		printf("\n");
1487 	}
1488 
1489 	return (0);
1490 }
1491 
1492 static int
1493 pci_release_msix(device_t dev, device_t child)
1494 {
1495 	struct pci_devinfo *dinfo = device_get_ivars(child);
1496 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1497 	struct resource_list_entry *rle;
1498 	int i;
1499 
1500 	/* Do we have any messages to release? */
1501 	if (msix->msix_alloc == 0)
1502 		return (ENODEV);
1503 
1504 	/* Make sure none of the resources are allocated. */
1505 	for (i = 0; i < msix->msix_table_len; i++) {
1506 		if (msix->msix_table[i].mte_vector == 0)
1507 			continue;
1508 		if (msix->msix_table[i].mte_handlers > 0)
1509 			return (EBUSY);
1510 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1511 		KASSERT(rle != NULL, ("missing resource"));
1512 		if (rle->res != NULL)
1513 			return (EBUSY);
1514 	}
1515 
1516 	/* Update control register to disable MSI-X. */
1517 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1518 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1519 	    msix->msix_ctrl, 2);
1520 
1521 	/* Free the resource list entries. */
1522 	for (i = 0; i < msix->msix_table_len; i++) {
1523 		if (msix->msix_table[i].mte_vector == 0)
1524 			continue;
1525 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526 	}
1527 	free(msix->msix_table, M_DEVBUF);
1528 	msix->msix_table_len = 0;
1529 
1530 	/* Release the IRQs. */
1531 	for (i = 0; i < msix->msix_alloc; i++)
1532 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1533 		    msix->msix_vectors[i].mv_irq);
1534 	free(msix->msix_vectors, M_DEVBUF);
1535 	msix->msix_alloc = 0;
1536 	return (0);
1537 }
1538 
1539 /*
1540  * Return the max supported MSI-X messages this device supports.
1541  * Basically, assuming the MD code can alloc messages, this function
1542  * should return the maximum value that pci_alloc_msix() can return.
1543  * Thus, it is subject to the tunables, etc.
1544  */
1545 int
1546 pci_msix_count_method(device_t dev, device_t child)
1547 {
1548 	struct pci_devinfo *dinfo = device_get_ivars(child);
1549 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1550 
1551 	if (pci_do_msix && msix->msix_location != 0)
1552 		return (msix->msix_msgnum);
1553 	return (0);
1554 }
1555 
1556 /*
1557  * HyperTransport MSI mapping control
1558  */
1559 void
1560 pci_ht_map_msi(device_t dev, uint64_t addr)
1561 {
1562 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1563 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1564 
1565 	if (!ht->ht_msimap)
1566 		return;
1567 
1568 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1569 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1570 		/* Enable MSI -> HT mapping. */
1571 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1572 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1573 		    ht->ht_msictrl, 2);
1574 	}
1575 
1576 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1577 		/* Disable MSI -> HT mapping. */
1578 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1579 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1580 		    ht->ht_msictrl, 2);
1581 	}
1582 }
1583 
1584 /*
1585  * Support for MSI message signalled interrupts.
1586  */
1587 void
1588 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1589 {
1590 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1591 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1592 
1593 	/* Write data and address values. */
1594 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1595 	    address & 0xffffffff, 4);
1596 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1597 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1598 		    address >> 32, 4);
1599 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1600 		    data, 2);
1601 	} else
1602 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1603 		    2);
1604 
1605 	/* Enable MSI in the control register. */
1606 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1607 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1608 	    2);
1609 
1610 	/* Enable MSI -> HT mapping. */
1611 	pci_ht_map_msi(dev, address);
1612 }
1613 
1614 void
1615 pci_disable_msi(device_t dev)
1616 {
1617 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1618 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1619 
1620 	/* Disable MSI -> HT mapping. */
1621 	pci_ht_map_msi(dev, 0);
1622 
1623 	/* Disable MSI in the control register. */
1624 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1625 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1626 	    2);
1627 }
1628 
1629 /*
1630  * Restore MSI registers during resume.  If MSI is enabled then
1631  * restore the data and address registers in addition to the control
1632  * register.
1633  */
1634 static void
1635 pci_resume_msi(device_t dev)
1636 {
1637 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1638 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1639 	uint64_t address;
1640 	uint16_t data;
1641 
1642 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1643 		address = msi->msi_addr;
1644 		data = msi->msi_data;
1645 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1646 		    address & 0xffffffff, 4);
1647 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1648 			pci_write_config(dev, msi->msi_location +
1649 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1650 			pci_write_config(dev, msi->msi_location +
1651 			    PCIR_MSI_DATA_64BIT, data, 2);
1652 		} else
1653 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1654 			    data, 2);
1655 	}
1656 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1657 	    2);
1658 }
1659 
1660 int
1661 pci_remap_msi_irq(device_t dev, u_int irq)
1662 {
1663 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1664 	pcicfgregs *cfg = &dinfo->cfg;
1665 	struct resource_list_entry *rle;
1666 	struct msix_table_entry *mte;
1667 	struct msix_vector *mv;
1668 	device_t bus;
1669 	uint64_t addr;
1670 	uint32_t data;
1671 	int error, i, j;
1672 
1673 	bus = device_get_parent(dev);
1674 
1675 	/*
1676 	 * Handle MSI first.  We try to find this IRQ among our list
1677 	 * of MSI IRQs.  If we find it, we request updated address and
1678 	 * data registers and apply the results.
1679 	 */
1680 	if (cfg->msi.msi_alloc > 0) {
1681 
1682 		/* If we don't have any active handlers, nothing to do. */
1683 		if (cfg->msi.msi_handlers == 0)
1684 			return (0);
1685 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1686 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1687 			    i + 1);
1688 			if (rle->start == irq) {
1689 				error = PCIB_MAP_MSI(device_get_parent(bus),
1690 				    dev, irq, &addr, &data);
1691 				if (error)
1692 					return (error);
1693 				pci_disable_msi(dev);
1694 				dinfo->cfg.msi.msi_addr = addr;
1695 				dinfo->cfg.msi.msi_data = data;
1696 				pci_enable_msi(dev, addr, data);
1697 				return (0);
1698 			}
1699 		}
1700 		return (ENOENT);
1701 	}
1702 
1703 	/*
1704 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1705 	 * we request the updated mapping info.  If that works, we go
1706 	 * through all the slots that use this IRQ and update them.
1707 	 */
1708 	if (cfg->msix.msix_alloc > 0) {
1709 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1710 			mv = &cfg->msix.msix_vectors[i];
1711 			if (mv->mv_irq == irq) {
1712 				error = PCIB_MAP_MSI(device_get_parent(bus),
1713 				    dev, irq, &addr, &data);
1714 				if (error)
1715 					return (error);
1716 				mv->mv_address = addr;
1717 				mv->mv_data = data;
1718 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1719 					mte = &cfg->msix.msix_table[j];
1720 					if (mte->mte_vector != i + 1)
1721 						continue;
1722 					if (mte->mte_handlers == 0)
1723 						continue;
1724 					pci_mask_msix(dev, j);
1725 					pci_enable_msix(dev, j, addr, data);
1726 					pci_unmask_msix(dev, j);
1727 				}
1728 			}
1729 		}
1730 		return (ENOENT);
1731 	}
1732 
1733 	return (ENOENT);
1734 }
1735 
1736 /*
1737  * Returns true if the specified device is blacklisted because MSI
1738  * doesn't work.
1739  */
1740 int
1741 pci_msi_device_blacklisted(device_t dev)
1742 {
1743 	struct pci_quirk *q;
1744 
1745 	if (!pci_honor_msi_blacklist)
1746 		return (0);
1747 
1748 	for (q = &pci_quirks[0]; q->devid; q++) {
1749 		if (q->devid == pci_get_devid(dev) &&
1750 		    q->type == PCI_QUIRK_DISABLE_MSI)
1751 			return (1);
1752 	}
1753 	return (0);
1754 }
1755 
1756 /*
1757  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1758  * we just check for blacklisted chipsets as represented by the
1759  * host-PCI bridge at device 0:0:0.  In the future, it may become
1760  * necessary to check other system attributes, such as the kenv values
1761  * that give the motherboard manufacturer and model number.
1762  */
1763 static int
1764 pci_msi_blacklisted(void)
1765 {
1766 	device_t dev;
1767 
1768 	if (!pci_honor_msi_blacklist)
1769 		return (0);
1770 
1771 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1772 	if (!(pcie_chipset || pcix_chipset))
1773 		return (1);
1774 
1775 	dev = pci_find_bsf(0, 0, 0);
1776 	if (dev != NULL)
1777 		return (pci_msi_device_blacklisted(dev));
1778 	return (0);
1779 }
1780 
1781 /*
1782  * Attempt to allocate *count MSI messages.  The actual number allocated is
1783  * returned in *count.  After this function returns, each message will be
1784  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1785  */
1786 int
1787 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1788 {
1789 	struct pci_devinfo *dinfo = device_get_ivars(child);
1790 	pcicfgregs *cfg = &dinfo->cfg;
1791 	struct resource_list_entry *rle;
1792 	int actual, error, i, irqs[32];
1793 	uint16_t ctrl;
1794 
1795 	/* Don't let count == 0 get us into trouble. */
1796 	if (*count == 0)
1797 		return (EINVAL);
1798 
1799 	/* If rid 0 is allocated, then fail. */
1800 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1801 	if (rle != NULL && rle->res != NULL)
1802 		return (ENXIO);
1803 
1804 	/* Already have allocated messages? */
1805 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1806 		return (ENXIO);
1807 
1808 	/* If MSI is blacklisted for this system, fail. */
1809 	if (pci_msi_blacklisted())
1810 		return (ENXIO);
1811 
1812 	/* MSI capability present? */
1813 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1814 		return (ENODEV);
1815 
1816 	if (bootverbose)
1817 		device_printf(child,
1818 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1819 		    *count, cfg->msi.msi_msgnum);
1820 
1821 	/* Don't ask for more than the device supports. */
1822 	actual = min(*count, cfg->msi.msi_msgnum);
1823 
1824 	/* Don't ask for more than 32 messages. */
1825 	actual = min(actual, 32);
1826 
1827 	/* MSI requires power of 2 number of messages. */
1828 	if (!powerof2(actual))
1829 		return (EINVAL);
1830 
1831 	for (;;) {
1832 		/* Try to allocate N messages. */
1833 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1834 		    cfg->msi.msi_msgnum, irqs);
1835 		if (error == 0)
1836 			break;
1837 		if (actual == 1)
1838 			return (error);
1839 
1840 		/* Try N / 2. */
1841 		actual >>= 1;
1842 	}
1843 
1844 	/*
1845 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1846 	 * resources in the irqs[] array, so add new resources
1847 	 * starting at rid 1.
1848 	 */
1849 	for (i = 0; i < actual; i++)
1850 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1851 		    irqs[i], irqs[i], 1);
1852 
1853 	if (bootverbose) {
1854 		if (actual == 1)
1855 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1856 		else {
1857 			int run;
1858 
1859 			/*
1860 			 * Be fancy and try to print contiguous runs
1861 			 * of IRQ values as ranges.  'run' is true if
1862 			 * we are in a range.
1863 			 */
1864 			device_printf(child, "using IRQs %d", irqs[0]);
1865 			run = 0;
1866 			for (i = 1; i < actual; i++) {
1867 
1868 				/* Still in a run? */
1869 				if (irqs[i] == irqs[i - 1] + 1) {
1870 					run = 1;
1871 					continue;
1872 				}
1873 
1874 				/* Finish previous range. */
1875 				if (run) {
1876 					printf("-%d", irqs[i - 1]);
1877 					run = 0;
1878 				}
1879 
1880 				/* Start new range. */
1881 				printf(",%d", irqs[i]);
1882 			}
1883 
1884 			/* Unfinished range? */
1885 			if (run)
1886 				printf("-%d", irqs[actual - 1]);
1887 			printf(" for MSI\n");
1888 		}
1889 	}
1890 
1891 	/* Update control register with actual count. */
1892 	ctrl = cfg->msi.msi_ctrl;
1893 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1894 	ctrl |= (ffs(actual) - 1) << 4;
1895 	cfg->msi.msi_ctrl = ctrl;
1896 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1897 
1898 	/* Update counts of alloc'd messages. */
1899 	cfg->msi.msi_alloc = actual;
1900 	cfg->msi.msi_handlers = 0;
1901 	*count = actual;
1902 	return (0);
1903 }
1904 
1905 /* Release the MSI messages associated with this device. */
1906 int
1907 pci_release_msi_method(device_t dev, device_t child)
1908 {
1909 	struct pci_devinfo *dinfo = device_get_ivars(child);
1910 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1911 	struct resource_list_entry *rle;
1912 	int error, i, irqs[32];
1913 
1914 	/* Try MSI-X first. */
1915 	error = pci_release_msix(dev, child);
1916 	if (error != ENODEV)
1917 		return (error);
1918 
1919 	/* Do we have any messages to release? */
1920 	if (msi->msi_alloc == 0)
1921 		return (ENODEV);
1922 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1923 
1924 	/* Make sure none of the resources are allocated. */
1925 	if (msi->msi_handlers > 0)
1926 		return (EBUSY);
1927 	for (i = 0; i < msi->msi_alloc; i++) {
1928 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1929 		KASSERT(rle != NULL, ("missing MSI resource"));
1930 		if (rle->res != NULL)
1931 			return (EBUSY);
1932 		irqs[i] = rle->start;
1933 	}
1934 
1935 	/* Update control register with 0 count. */
1936 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1937 	    ("%s: MSI still enabled", __func__));
1938 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1939 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1940 	    msi->msi_ctrl, 2);
1941 
1942 	/* Release the messages. */
1943 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1944 	for (i = 0; i < msi->msi_alloc; i++)
1945 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1946 
1947 	/* Update alloc count. */
1948 	msi->msi_alloc = 0;
1949 	msi->msi_addr = 0;
1950 	msi->msi_data = 0;
1951 	return (0);
1952 }
1953 
1954 /*
1955  * Return the max supported MSI messages this device supports.
1956  * Basically, assuming the MD code can alloc messages, this function
1957  * should return the maximum value that pci_alloc_msi() can return.
1958  * Thus, it is subject to the tunables, etc.
1959  */
1960 int
1961 pci_msi_count_method(device_t dev, device_t child)
1962 {
1963 	struct pci_devinfo *dinfo = device_get_ivars(child);
1964 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1965 
1966 	if (pci_do_msi && msi->msi_location != 0)
1967 		return (msi->msi_msgnum);
1968 	return (0);
1969 }
1970 
1971 /* free pcicfgregs structure and all depending data structures */
1972 
1973 int
1974 pci_freecfg(struct pci_devinfo *dinfo)
1975 {
1976 	struct devlist *devlist_head;
1977 	int i;
1978 
1979 	devlist_head = &pci_devq;
1980 
1981 	if (dinfo->cfg.vpd.vpd_reg) {
1982 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1983 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1984 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1985 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1986 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1987 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1988 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1989 	}
1990 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1991 	free(dinfo, M_DEVBUF);
1992 
1993 	/* increment the generation count */
1994 	pci_generation++;
1995 
1996 	/* we're losing one device */
1997 	pci_numdevs--;
1998 	return (0);
1999 }
2000 
2001 /*
2002  * PCI power manangement
2003  */
2004 int
2005 pci_set_powerstate_method(device_t dev, device_t child, int state)
2006 {
2007 	struct pci_devinfo *dinfo = device_get_ivars(child);
2008 	pcicfgregs *cfg = &dinfo->cfg;
2009 	uint16_t status;
2010 	int result, oldstate, highest, delay;
2011 
2012 	if (cfg->pp.pp_cap == 0)
2013 		return (EOPNOTSUPP);
2014 
2015 	/*
2016 	 * Optimize a no state change request away.  While it would be OK to
2017 	 * write to the hardware in theory, some devices have shown odd
2018 	 * behavior when going from D3 -> D3.
2019 	 */
2020 	oldstate = pci_get_powerstate(child);
2021 	if (oldstate == state)
2022 		return (0);
2023 
2024 	/*
2025 	 * The PCI power management specification states that after a state
2026 	 * transition between PCI power states, system software must
2027 	 * guarantee a minimal delay before the function accesses the device.
2028 	 * Compute the worst case delay that we need to guarantee before we
2029 	 * access the device.  Many devices will be responsive much more
2030 	 * quickly than this delay, but there are some that don't respond
2031 	 * instantly to state changes.  Transitions to/from D3 state require
2032 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2033 	 * is done below with DELAY rather than a sleeper function because
2034 	 * this function can be called from contexts where we cannot sleep.
2035 	 */
2036 	highest = (oldstate > state) ? oldstate : state;
2037 	if (highest == PCI_POWERSTATE_D3)
2038 	    delay = 10000;
2039 	else if (highest == PCI_POWERSTATE_D2)
2040 	    delay = 200;
2041 	else
2042 	    delay = 0;
2043 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2044 	    & ~PCIM_PSTAT_DMASK;
2045 	result = 0;
2046 	switch (state) {
2047 	case PCI_POWERSTATE_D0:
2048 		status |= PCIM_PSTAT_D0;
2049 		break;
2050 	case PCI_POWERSTATE_D1:
2051 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2052 			return (EOPNOTSUPP);
2053 		status |= PCIM_PSTAT_D1;
2054 		break;
2055 	case PCI_POWERSTATE_D2:
2056 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2057 			return (EOPNOTSUPP);
2058 		status |= PCIM_PSTAT_D2;
2059 		break;
2060 	case PCI_POWERSTATE_D3:
2061 		status |= PCIM_PSTAT_D3;
2062 		break;
2063 	default:
2064 		return (EINVAL);
2065 	}
2066 
2067 	if (bootverbose)
2068 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2069 		    state);
2070 
2071 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2072 	if (delay)
2073 		DELAY(delay);
2074 	return (0);
2075 }
2076 
2077 int
2078 pci_get_powerstate_method(device_t dev, device_t child)
2079 {
2080 	struct pci_devinfo *dinfo = device_get_ivars(child);
2081 	pcicfgregs *cfg = &dinfo->cfg;
2082 	uint16_t status;
2083 	int result;
2084 
2085 	if (cfg->pp.pp_cap != 0) {
2086 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2087 		switch (status & PCIM_PSTAT_DMASK) {
2088 		case PCIM_PSTAT_D0:
2089 			result = PCI_POWERSTATE_D0;
2090 			break;
2091 		case PCIM_PSTAT_D1:
2092 			result = PCI_POWERSTATE_D1;
2093 			break;
2094 		case PCIM_PSTAT_D2:
2095 			result = PCI_POWERSTATE_D2;
2096 			break;
2097 		case PCIM_PSTAT_D3:
2098 			result = PCI_POWERSTATE_D3;
2099 			break;
2100 		default:
2101 			result = PCI_POWERSTATE_UNKNOWN;
2102 			break;
2103 		}
2104 	} else {
2105 		/* No support, device is always at D0 */
2106 		result = PCI_POWERSTATE_D0;
2107 	}
2108 	return (result);
2109 }
2110 
2111 /*
2112  * Some convenience functions for PCI device drivers.
2113  */
2114 
2115 static __inline void
2116 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2117 {
2118 	uint16_t	command;
2119 
2120 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2121 	command |= bit;
2122 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2123 }
2124 
2125 static __inline void
2126 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2127 {
2128 	uint16_t	command;
2129 
2130 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2131 	command &= ~bit;
2132 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2133 }
2134 
2135 int
2136 pci_enable_busmaster_method(device_t dev, device_t child)
2137 {
2138 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2139 	return (0);
2140 }
2141 
2142 int
2143 pci_disable_busmaster_method(device_t dev, device_t child)
2144 {
2145 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2146 	return (0);
2147 }
2148 
2149 int
2150 pci_enable_io_method(device_t dev, device_t child, int space)
2151 {
2152 	uint16_t command;
2153 	uint16_t bit;
2154 	char *error;
2155 
2156 	bit = 0;
2157 	error = NULL;
2158 
2159 	switch(space) {
2160 	case SYS_RES_IOPORT:
2161 		bit = PCIM_CMD_PORTEN;
2162 		error = "port";
2163 		break;
2164 	case SYS_RES_MEMORY:
2165 		bit = PCIM_CMD_MEMEN;
2166 		error = "memory";
2167 		break;
2168 	default:
2169 		return (EINVAL);
2170 	}
2171 	pci_set_command_bit(dev, child, bit);
2172 	/* Some devices seem to need a brief stall here, what do to? */
2173 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2174 	if (command & bit)
2175 		return (0);
2176 	device_printf(child, "failed to enable %s mapping!\n", error);
2177 	return (ENXIO);
2178 }
2179 
2180 int
2181 pci_disable_io_method(device_t dev, device_t child, int space)
2182 {
2183 	uint16_t command;
2184 	uint16_t bit;
2185 	char *error;
2186 
2187 	bit = 0;
2188 	error = NULL;
2189 
2190 	switch(space) {
2191 	case SYS_RES_IOPORT:
2192 		bit = PCIM_CMD_PORTEN;
2193 		error = "port";
2194 		break;
2195 	case SYS_RES_MEMORY:
2196 		bit = PCIM_CMD_MEMEN;
2197 		error = "memory";
2198 		break;
2199 	default:
2200 		return (EINVAL);
2201 	}
2202 	pci_clear_command_bit(dev, child, bit);
2203 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2204 	if (command & bit) {
2205 		device_printf(child, "failed to disable %s mapping!\n", error);
2206 		return (ENXIO);
2207 	}
2208 	return (0);
2209 }
2210 
2211 /*
2212  * New style pci driver.  Parent device is either a pci-host-bridge or a
2213  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2214  */
2215 
2216 void
2217 pci_print_verbose(struct pci_devinfo *dinfo)
2218 {
2219 
2220 	if (bootverbose) {
2221 		pcicfgregs *cfg = &dinfo->cfg;
2222 
2223 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2224 		    cfg->vendor, cfg->device, cfg->revid);
2225 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2226 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2227 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2228 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2229 		    cfg->mfdev);
2230 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2231 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2232 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2233 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2234 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2235 		if (cfg->intpin > 0)
2236 			printf("\tintpin=%c, irq=%d\n",
2237 			    cfg->intpin +'a' -1, cfg->intline);
2238 		if (cfg->pp.pp_cap) {
2239 			uint16_t status;
2240 
2241 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2242 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2243 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2244 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2245 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2246 			    status & PCIM_PSTAT_DMASK);
2247 		}
2248 		if (cfg->msi.msi_location) {
2249 			int ctrl;
2250 
2251 			ctrl = cfg->msi.msi_ctrl;
2252 			printf("\tMSI supports %d message%s%s%s\n",
2253 			    cfg->msi.msi_msgnum,
2254 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2255 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2256 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2257 		}
2258 		if (cfg->msix.msix_location) {
2259 			printf("\tMSI-X supports %d message%s ",
2260 			    cfg->msix.msix_msgnum,
2261 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2262 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2263 				printf("in map 0x%x\n",
2264 				    cfg->msix.msix_table_bar);
2265 			else
2266 				printf("in maps 0x%x and 0x%x\n",
2267 				    cfg->msix.msix_table_bar,
2268 				    cfg->msix.msix_pba_bar);
2269 		}
2270 	}
2271 }
2272 
2273 static int
2274 pci_porten(device_t dev)
2275 {
2276 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2277 }
2278 
2279 static int
2280 pci_memen(device_t dev)
2281 {
2282 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2283 }
2284 
2285 static void
2286 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2287 {
2288 	pci_addr_t map, testval;
2289 	int ln2range;
2290 	uint16_t cmd;
2291 
2292 	map = pci_read_config(dev, reg, 4);
2293 	ln2range = pci_maprange(map);
2294 	if (ln2range == 64)
2295 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2296 
2297 	/*
2298 	 * Disable decoding via the command register before
2299 	 * determining the BAR's length since we will be placing it in
2300 	 * a weird state.
2301 	 */
2302 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2303 	pci_write_config(dev, PCIR_COMMAND,
2304 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2305 
2306 	/*
2307 	 * Determine the BAR's length by writing all 1's.  The bottom
2308 	 * log_2(size) bits of the BAR will stick as 0 when we read
2309 	 * the value back.
2310 	 */
2311 	pci_write_config(dev, reg, 0xffffffff, 4);
2312 	testval = pci_read_config(dev, reg, 4);
2313 	if (ln2range == 64) {
2314 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2315 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2316 	}
2317 
2318 	/*
2319 	 * Restore the original value of the BAR.  We may have reprogrammed
2320 	 * the BAR of the low-level console device and when booting verbose,
2321 	 * we need the console device addressable.
2322 	 */
2323 	pci_write_config(dev, reg, map, 4);
2324 	if (ln2range == 64)
2325 		pci_write_config(dev, reg + 4, map >> 32, 4);
2326 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2327 
2328 	*mapp = map;
2329 	*testvalp = testval;
2330 }
2331 
2332 static void
2333 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2334 {
2335 	pci_addr_t map;
2336 	int ln2range;
2337 
2338 	map = pci_read_config(dev, reg, 4);
2339 	ln2range = pci_maprange(map);
2340 	pci_write_config(dev, reg, base, 4);
2341 	if (ln2range == 64)
2342 		pci_write_config(dev, reg + 4, base >> 32, 4);
2343 }
2344 
2345 /*
2346  * Add a resource based on a pci map register. Return 1 if the map
2347  * register is a 32bit map register or 2 if it is a 64bit register.
2348  */
2349 static int
2350 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2351     int force, int prefetch)
2352 {
2353 	pci_addr_t base, map, testval;
2354 	pci_addr_t start, end, count;
2355 	int barlen, maprange, mapsize, type;
2356 	uint16_t cmd;
2357 	struct resource *res;
2358 
2359 	pci_read_bar(dev, reg, &map, &testval);
2360 	if (PCI_BAR_MEM(map)) {
2361 		type = SYS_RES_MEMORY;
2362 		if (map & PCIM_BAR_MEM_PREFETCH)
2363 			prefetch = 1;
2364 	} else
2365 		type = SYS_RES_IOPORT;
2366 	mapsize = pci_mapsize(testval);
2367 	base = pci_mapbase(map);
2368 	maprange = pci_maprange(map);
2369 	barlen = maprange == 64 ? 2 : 1;
2370 
2371 	/*
2372 	 * For I/O registers, if bottom bit is set, and the next bit up
2373 	 * isn't clear, we know we have a BAR that doesn't conform to the
2374 	 * spec, so ignore it.  Also, sanity check the size of the data
2375 	 * areas to the type of memory involved.  Memory must be at least
2376 	 * 16 bytes in size, while I/O ranges must be at least 4.
2377 	 */
2378 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2379 		return (barlen);
2380 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2381 	    (type == SYS_RES_IOPORT && mapsize < 2))
2382 		return (barlen);
2383 
2384 	if (bootverbose) {
2385 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2386 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2387 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2388 			printf(", port disabled\n");
2389 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2390 			printf(", memory disabled\n");
2391 		else
2392 			printf(", enabled\n");
2393 	}
2394 
2395 	/*
2396 	 * If base is 0, then we have problems.  It is best to ignore
2397 	 * such entries for the moment.  These will be allocated later if
2398 	 * the driver specifically requests them.  However, some
2399 	 * removable busses look better when all resources are allocated,
2400 	 * so allow '0' to be overriden.
2401 	 *
2402 	 * Similarly treat maps whose values is the same as the test value
2403 	 * read back.  These maps have had all f's written to them by the
2404 	 * BIOS in an attempt to disable the resources.
2405 	 */
2406 	if (!force && (base == 0 || map == testval))
2407 		return (barlen);
2408 	if ((u_long)base != base) {
2409 		device_printf(bus,
2410 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2411 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2412 		    pci_get_function(dev), reg);
2413 		return (barlen);
2414 	}
2415 
2416 	/*
2417 	 * This code theoretically does the right thing, but has
2418 	 * undesirable side effects in some cases where peripherals
2419 	 * respond oddly to having these bits enabled.  Let the user
2420 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2421 	 * default).
2422 	 */
2423 	if (pci_enable_io_modes) {
2424 		/* Turn on resources that have been left off by a lazy BIOS */
2425 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2426 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2427 			cmd |= PCIM_CMD_PORTEN;
2428 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2429 		}
2430 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2431 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2432 			cmd |= PCIM_CMD_MEMEN;
2433 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2434 		}
2435 	} else {
2436 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2437 			return (barlen);
2438 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2439 			return (barlen);
2440 	}
2441 
2442 	count = 1 << mapsize;
2443 	if (base == 0 || base == pci_mapbase(testval)) {
2444 		start = 0;	/* Let the parent decide. */
2445 		end = ~0ULL;
2446 	} else {
2447 		start = base;
2448 		end = base + (1 << mapsize) - 1;
2449 	}
2450 	resource_list_add(rl, type, reg, start, end, count);
2451 
2452 	/*
2453 	 * Try to allocate the resource for this BAR from our parent
2454 	 * so that this resource range is already reserved.  The
2455 	 * driver for this device will later inherit this resource in
2456 	 * pci_alloc_resource().
2457 	 */
2458 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2459 	    prefetch ? RF_PREFETCHABLE : 0);
2460 	if (res == NULL) {
2461 		/*
2462 		 * If the allocation fails, clear the BAR and delete
2463 		 * the resource list entry to force
2464 		 * pci_alloc_resource() to allocate resources from the
2465 		 * parent.
2466 		 */
2467 		resource_list_delete(rl, type, reg);
2468 		start = 0;
2469 	} else {
2470 		start = rman_get_start(res);
2471 		rman_set_device(res, bus);
2472 	}
2473 	pci_write_bar(dev, reg, start);
2474 	return (barlen);
2475 }
2476 
2477 /*
2478  * For ATA devices we need to decide early what addressing mode to use.
2479  * Legacy demands that the primary and secondary ATA ports sits on the
2480  * same addresses that old ISA hardware did. This dictates that we use
2481  * those addresses and ignore the BAR's if we cannot set PCI native
2482  * addressing mode.
2483  */
2484 static void
2485 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2486     uint32_t prefetchmask)
2487 {
2488 	struct resource *r;
2489 	int rid, type, progif;
2490 #if 0
2491 	/* if this device supports PCI native addressing use it */
2492 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2493 	if ((progif & 0x8a) == 0x8a) {
2494 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2495 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2496 			printf("Trying ATA native PCI addressing mode\n");
2497 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2498 		}
2499 	}
2500 #endif
2501 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2502 	type = SYS_RES_IOPORT;
2503 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2504 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2505 		    prefetchmask & (1 << 0));
2506 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2507 		    prefetchmask & (1 << 1));
2508 	} else {
2509 		rid = PCIR_BAR(0);
2510 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2511 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2512 		    8, 0);
2513 		rman_set_device(r, bus);
2514 		rid = PCIR_BAR(1);
2515 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2516 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2517 		    1, 0);
2518 		rman_set_device(r, bus);
2519 	}
2520 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2521 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2522 		    prefetchmask & (1 << 2));
2523 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2524 		    prefetchmask & (1 << 3));
2525 	} else {
2526 		rid = PCIR_BAR(2);
2527 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2528 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2529 		    8, 0);
2530 		rman_set_device(r, bus);
2531 		rid = PCIR_BAR(3);
2532 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2533 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2534 		    1, 0);
2535 		rman_set_device(r, bus);
2536 	}
2537 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2538 	    prefetchmask & (1 << 4));
2539 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2540 	    prefetchmask & (1 << 5));
2541 }
2542 
2543 static void
2544 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2545 {
2546 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2547 	pcicfgregs *cfg = &dinfo->cfg;
2548 	char tunable_name[64];
2549 	int irq;
2550 
2551 	/* Has to have an intpin to have an interrupt. */
2552 	if (cfg->intpin == 0)
2553 		return;
2554 
2555 	/* Let the user override the IRQ with a tunable. */
2556 	irq = PCI_INVALID_IRQ;
2557 	snprintf(tunable_name, sizeof(tunable_name),
2558 	    "hw.pci%d.%d.%d.INT%c.irq",
2559 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2560 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2561 		irq = PCI_INVALID_IRQ;
2562 
2563 	/*
2564 	 * If we didn't get an IRQ via the tunable, then we either use the
2565 	 * IRQ value in the intline register or we ask the bus to route an
2566 	 * interrupt for us.  If force_route is true, then we only use the
2567 	 * value in the intline register if the bus was unable to assign an
2568 	 * IRQ.
2569 	 */
2570 	if (!PCI_INTERRUPT_VALID(irq)) {
2571 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2572 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2573 		if (!PCI_INTERRUPT_VALID(irq))
2574 			irq = cfg->intline;
2575 	}
2576 
2577 	/* If after all that we don't have an IRQ, just bail. */
2578 	if (!PCI_INTERRUPT_VALID(irq))
2579 		return;
2580 
2581 	/* Update the config register if it changed. */
2582 	if (irq != cfg->intline) {
2583 		cfg->intline = irq;
2584 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2585 	}
2586 
2587 	/* Add this IRQ as rid 0 interrupt resource. */
2588 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2589 }
2590 
2591 void
2592 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2593 {
2594 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2595 	pcicfgregs *cfg = &dinfo->cfg;
2596 	struct resource_list *rl = &dinfo->resources;
2597 	struct pci_quirk *q;
2598 	int i;
2599 
2600 	/* ATA devices needs special map treatment */
2601 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2602 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2603 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2604 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2605 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2606 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2607 	else
2608 		for (i = 0; i < cfg->nummaps;)
2609 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2610 			    prefetchmask & (1 << i));
2611 
2612 	/*
2613 	 * Add additional, quirked resources.
2614 	 */
2615 	for (q = &pci_quirks[0]; q->devid; q++) {
2616 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2617 		    && q->type == PCI_QUIRK_MAP_REG)
2618 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2619 	}
2620 
2621 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2622 #ifdef __PCI_REROUTE_INTERRUPT
2623 		/*
2624 		 * Try to re-route interrupts. Sometimes the BIOS or
2625 		 * firmware may leave bogus values in these registers.
2626 		 * If the re-route fails, then just stick with what we
2627 		 * have.
2628 		 */
2629 		pci_assign_interrupt(bus, dev, 1);
2630 #else
2631 		pci_assign_interrupt(bus, dev, 0);
2632 #endif
2633 	}
2634 }
2635 
2636 void
2637 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2638 {
2639 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2640 	device_t pcib = device_get_parent(dev);
2641 	struct pci_devinfo *dinfo;
2642 	int maxslots;
2643 	int s, f, pcifunchigh;
2644 	uint8_t hdrtype;
2645 
2646 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2647 	    ("dinfo_size too small"));
2648 	maxslots = PCIB_MAXSLOTS(pcib);
2649 	for (s = 0; s <= maxslots; s++) {
2650 		pcifunchigh = 0;
2651 		f = 0;
2652 		DELAY(1);
2653 		hdrtype = REG(PCIR_HDRTYPE, 1);
2654 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2655 			continue;
2656 		if (hdrtype & PCIM_MFDEV)
2657 			pcifunchigh = PCI_FUNCMAX;
2658 		for (f = 0; f <= pcifunchigh; f++) {
2659 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2660 			    dinfo_size);
2661 			if (dinfo != NULL) {
2662 				pci_add_child(dev, dinfo);
2663 			}
2664 		}
2665 	}
2666 #undef REG
2667 }
2668 
2669 void
2670 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2671 {
2672 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2673 	device_set_ivars(dinfo->cfg.dev, dinfo);
2674 	resource_list_init(&dinfo->resources);
2675 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2676 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2677 	pci_print_verbose(dinfo);
2678 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2679 }
2680 
2681 static int
2682 pci_probe(device_t dev)
2683 {
2684 
2685 	device_set_desc(dev, "PCI bus");
2686 
2687 	/* Allow other subclasses to override this driver. */
2688 	return (BUS_PROBE_GENERIC);
2689 }
2690 
2691 static int
2692 pci_attach(device_t dev)
2693 {
2694 	int busno, domain;
2695 
2696 	/*
2697 	 * Since there can be multiple independantly numbered PCI
2698 	 * busses on systems with multiple PCI domains, we can't use
2699 	 * the unit number to decide which bus we are probing. We ask
2700 	 * the parent pcib what our domain and bus numbers are.
2701 	 */
2702 	domain = pcib_get_domain(dev);
2703 	busno = pcib_get_bus(dev);
2704 	if (bootverbose)
2705 		device_printf(dev, "domain=%d, physical bus=%d\n",
2706 		    domain, busno);
2707 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2708 	return (bus_generic_attach(dev));
2709 }
2710 
2711 int
2712 pci_suspend(device_t dev)
2713 {
2714 	int dstate, error, i, numdevs;
2715 	device_t acpi_dev, child, *devlist;
2716 	struct pci_devinfo *dinfo;
2717 
2718 	/*
2719 	 * Save the PCI configuration space for each child and set the
2720 	 * device in the appropriate power state for this sleep state.
2721 	 */
2722 	acpi_dev = NULL;
2723 	if (pci_do_power_resume)
2724 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2725 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2726 		return (error);
2727 	for (i = 0; i < numdevs; i++) {
2728 		child = devlist[i];
2729 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2730 		pci_cfg_save(child, dinfo, 0);
2731 	}
2732 
2733 	/* Suspend devices before potentially powering them down. */
2734 	error = bus_generic_suspend(dev);
2735 	if (error) {
2736 		free(devlist, M_TEMP);
2737 		return (error);
2738 	}
2739 
2740 	/*
2741 	 * Always set the device to D3.  If ACPI suggests a different
2742 	 * power state, use it instead.  If ACPI is not present, the
2743 	 * firmware is responsible for managing device power.  Skip
2744 	 * children who aren't attached since they are powered down
2745 	 * separately.  Only manage type 0 devices for now.
2746 	 */
2747 	for (i = 0; acpi_dev && i < numdevs; i++) {
2748 		child = devlist[i];
2749 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2750 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2751 			dstate = PCI_POWERSTATE_D3;
2752 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2753 			pci_set_powerstate(child, dstate);
2754 		}
2755 	}
2756 	free(devlist, M_TEMP);
2757 	return (0);
2758 }
2759 
2760 int
2761 pci_resume(device_t dev)
2762 {
2763 	int i, numdevs, error;
2764 	device_t acpi_dev, child, *devlist;
2765 	struct pci_devinfo *dinfo;
2766 
2767 	/*
2768 	 * Set each child to D0 and restore its PCI configuration space.
2769 	 */
2770 	acpi_dev = NULL;
2771 	if (pci_do_power_resume)
2772 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2773 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2774 		return (error);
2775 	for (i = 0; i < numdevs; i++) {
2776 		/*
2777 		 * Notify ACPI we're going to D0 but ignore the result.  If
2778 		 * ACPI is not present, the firmware is responsible for
2779 		 * managing device power.  Only manage type 0 devices for now.
2780 		 */
2781 		child = devlist[i];
2782 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2783 		if (acpi_dev && device_is_attached(child) &&
2784 		    dinfo->cfg.hdrtype == 0) {
2785 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2786 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2787 		}
2788 
2789 		/* Now the device is powered up, restore its config space. */
2790 		pci_cfg_restore(child, dinfo);
2791 	}
2792 	free(devlist, M_TEMP);
2793 	return (bus_generic_resume(dev));
2794 }
2795 
2796 static void
2797 pci_load_vendor_data(void)
2798 {
2799 	caddr_t vendordata, info;
2800 
2801 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2802 		info = preload_search_info(vendordata, MODINFO_ADDR);
2803 		pci_vendordata = *(char **)info;
2804 		info = preload_search_info(vendordata, MODINFO_SIZE);
2805 		pci_vendordata_size = *(size_t *)info;
2806 		/* terminate the database */
2807 		pci_vendordata[pci_vendordata_size] = '\n';
2808 	}
2809 }
2810 
2811 void
2812 pci_driver_added(device_t dev, driver_t *driver)
2813 {
2814 	int numdevs;
2815 	device_t *devlist;
2816 	device_t child;
2817 	struct pci_devinfo *dinfo;
2818 	int i;
2819 
2820 	if (bootverbose)
2821 		device_printf(dev, "driver added\n");
2822 	DEVICE_IDENTIFY(driver, dev);
2823 	if (device_get_children(dev, &devlist, &numdevs) != 0)
2824 		return;
2825 	for (i = 0; i < numdevs; i++) {
2826 		child = devlist[i];
2827 		if (device_get_state(child) != DS_NOTPRESENT)
2828 			continue;
2829 		dinfo = device_get_ivars(child);
2830 		pci_print_verbose(dinfo);
2831 		if (bootverbose)
2832 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2833 		pci_cfg_restore(child, dinfo);
2834 		if (device_probe_and_attach(child) != 0)
2835 			pci_cfg_save(child, dinfo, 1);
2836 	}
2837 	free(devlist, M_TEMP);
2838 }
2839 
2840 int
2841 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2842     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2843 {
2844 	struct pci_devinfo *dinfo;
2845 	struct msix_table_entry *mte;
2846 	struct msix_vector *mv;
2847 	uint64_t addr;
2848 	uint32_t data;
2849 	void *cookie;
2850 	int error, rid;
2851 
2852 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2853 	    arg, &cookie);
2854 	if (error)
2855 		return (error);
2856 
2857 	/* If this is not a direct child, just bail out. */
2858 	if (device_get_parent(child) != dev) {
2859 		*cookiep = cookie;
2860 		return(0);
2861 	}
2862 
2863 	rid = rman_get_rid(irq);
2864 	if (rid == 0) {
2865 		/* Make sure that INTx is enabled */
2866 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2867 	} else {
2868 		/*
2869 		 * Check to see if the interrupt is MSI or MSI-X.
2870 		 * Ask our parent to map the MSI and give
2871 		 * us the address and data register values.
2872 		 * If we fail for some reason, teardown the
2873 		 * interrupt handler.
2874 		 */
2875 		dinfo = device_get_ivars(child);
2876 		if (dinfo->cfg.msi.msi_alloc > 0) {
2877 			if (dinfo->cfg.msi.msi_addr == 0) {
2878 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2879 			    ("MSI has handlers, but vectors not mapped"));
2880 				error = PCIB_MAP_MSI(device_get_parent(dev),
2881 				    child, rman_get_start(irq), &addr, &data);
2882 				if (error)
2883 					goto bad;
2884 				dinfo->cfg.msi.msi_addr = addr;
2885 				dinfo->cfg.msi.msi_data = data;
2886 				pci_enable_msi(child, addr, data);
2887 			}
2888 			dinfo->cfg.msi.msi_handlers++;
2889 		} else {
2890 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2891 			    ("No MSI or MSI-X interrupts allocated"));
2892 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2893 			    ("MSI-X index too high"));
2894 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2895 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2896 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2897 			KASSERT(mv->mv_irq == rman_get_start(irq),
2898 			    ("IRQ mismatch"));
2899 			if (mv->mv_address == 0) {
2900 				KASSERT(mte->mte_handlers == 0,
2901 		    ("MSI-X table entry has handlers, but vector not mapped"));
2902 				error = PCIB_MAP_MSI(device_get_parent(dev),
2903 				    child, rman_get_start(irq), &addr, &data);
2904 				if (error)
2905 					goto bad;
2906 				mv->mv_address = addr;
2907 				mv->mv_data = data;
2908 			}
2909 			if (mte->mte_handlers == 0) {
2910 				pci_enable_msix(child, rid - 1, mv->mv_address,
2911 				    mv->mv_data);
2912 				pci_unmask_msix(child, rid - 1);
2913 			}
2914 			mte->mte_handlers++;
2915 		}
2916 
2917 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2918 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2919 	bad:
2920 		if (error) {
2921 			(void)bus_generic_teardown_intr(dev, child, irq,
2922 			    cookie);
2923 			return (error);
2924 		}
2925 	}
2926 	*cookiep = cookie;
2927 	return (0);
2928 }
2929 
2930 int
2931 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2932     void *cookie)
2933 {
2934 	struct msix_table_entry *mte;
2935 	struct resource_list_entry *rle;
2936 	struct pci_devinfo *dinfo;
2937 	int error, rid;
2938 
2939 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2940 		return (EINVAL);
2941 
2942 	/* If this isn't a direct child, just bail out */
2943 	if (device_get_parent(child) != dev)
2944 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2945 
2946 	rid = rman_get_rid(irq);
2947 	if (rid == 0) {
2948 		/* Mask INTx */
2949 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2950 	} else {
2951 		/*
2952 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2953 		 * decrement the appropriate handlers count and mask the
2954 		 * MSI-X message, or disable MSI messages if the count
2955 		 * drops to 0.
2956 		 */
2957 		dinfo = device_get_ivars(child);
2958 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2959 		if (rle->res != irq)
2960 			return (EINVAL);
2961 		if (dinfo->cfg.msi.msi_alloc > 0) {
2962 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2963 			    ("MSI-X index too high"));
2964 			if (dinfo->cfg.msi.msi_handlers == 0)
2965 				return (EINVAL);
2966 			dinfo->cfg.msi.msi_handlers--;
2967 			if (dinfo->cfg.msi.msi_handlers == 0)
2968 				pci_disable_msi(child);
2969 		} else {
2970 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2971 			    ("No MSI or MSI-X interrupts allocated"));
2972 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2973 			    ("MSI-X index too high"));
2974 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2975 			if (mte->mte_handlers == 0)
2976 				return (EINVAL);
2977 			mte->mte_handlers--;
2978 			if (mte->mte_handlers == 0)
2979 				pci_mask_msix(child, rid - 1);
2980 		}
2981 	}
2982 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2983 	if (rid > 0)
2984 		KASSERT(error == 0,
2985 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2986 	return (error);
2987 }
2988 
2989 int
2990 pci_print_child(device_t dev, device_t child)
2991 {
2992 	struct pci_devinfo *dinfo;
2993 	struct resource_list *rl;
2994 	int retval = 0;
2995 
2996 	dinfo = device_get_ivars(child);
2997 	rl = &dinfo->resources;
2998 
2999 	retval += bus_print_child_header(dev, child);
3000 
3001 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3002 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3003 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3004 	if (device_get_flags(dev))
3005 		retval += printf(" flags %#x", device_get_flags(dev));
3006 
3007 	retval += printf(" at device %d.%d", pci_get_slot(child),
3008 	    pci_get_function(child));
3009 
3010 	retval += bus_print_child_footer(dev, child);
3011 
3012 	return (retval);
3013 }
3014 
3015 static struct
3016 {
3017 	int	class;
3018 	int	subclass;
3019 	char	*desc;
3020 } pci_nomatch_tab[] = {
3021 	{PCIC_OLD,		-1,			"old"},
3022 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3023 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3024 	{PCIC_STORAGE,		-1,			"mass storage"},
3025 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3026 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3027 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3028 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3029 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3030 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3031 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3032 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3033 	{PCIC_NETWORK,		-1,			"network"},
3034 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3035 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3036 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3037 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3038 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3039 	{PCIC_DISPLAY,		-1,			"display"},
3040 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3041 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3042 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3043 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3044 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3045 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3046 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3047 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3048 	{PCIC_MEMORY,		-1,			"memory"},
3049 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3050 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3051 	{PCIC_BRIDGE,		-1,			"bridge"},
3052 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3053 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3054 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3055 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3056 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3057 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3058 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3059 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3060 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3061 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3062 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3063 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3064 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3065 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3066 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3067 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3068 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3069 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3070 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3071 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3072 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3073 	{PCIC_INPUTDEV,		-1,			"input device"},
3074 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3075 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3076 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3077 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3078 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3079 	{PCIC_DOCKING,		-1,			"docking station"},
3080 	{PCIC_PROCESSOR,	-1,			"processor"},
3081 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3082 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3083 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3084 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3085 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3086 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3087 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3088 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3089 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3090 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3091 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3092 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3093 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3094 	{PCIC_SATCOM,		-1,			"satellite communication"},
3095 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3096 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3097 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3098 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3099 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3100 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3101 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3102 	{PCIC_DASP,		-1,			"dasp"},
3103 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3104 	{0, 0,		NULL}
3105 };
3106 
3107 void
3108 pci_probe_nomatch(device_t dev, device_t child)
3109 {
3110 	int	i;
3111 	char	*cp, *scp, *device;
3112 
3113 	/*
3114 	 * Look for a listing for this device in a loaded device database.
3115 	 */
3116 	if ((device = pci_describe_device(child)) != NULL) {
3117 		device_printf(dev, "<%s>", device);
3118 		free(device, M_DEVBUF);
3119 	} else {
3120 		/*
3121 		 * Scan the class/subclass descriptions for a general
3122 		 * description.
3123 		 */
3124 		cp = "unknown";
3125 		scp = NULL;
3126 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3127 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3128 				if (pci_nomatch_tab[i].subclass == -1) {
3129 					cp = pci_nomatch_tab[i].desc;
3130 				} else if (pci_nomatch_tab[i].subclass ==
3131 				    pci_get_subclass(child)) {
3132 					scp = pci_nomatch_tab[i].desc;
3133 				}
3134 			}
3135 		}
3136 		device_printf(dev, "<%s%s%s>",
3137 		    cp ? cp : "",
3138 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3139 		    scp ? scp : "");
3140 	}
3141 	printf(" at device %d.%d (no driver attached)\n",
3142 	    pci_get_slot(child), pci_get_function(child));
3143 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3144 	return;
3145 }
3146 
3147 /*
3148  * Parse the PCI device database, if loaded, and return a pointer to a
3149  * description of the device.
3150  *
3151  * The database is flat text formatted as follows:
3152  *
3153  * Any line not in a valid format is ignored.
3154  * Lines are terminated with newline '\n' characters.
3155  *
3156  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3157  * the vendor name.
3158  *
3159  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3160  * - devices cannot be listed without a corresponding VENDOR line.
3161  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3162  * another TAB, then the device name.
3163  */
3164 
3165 /*
3166  * Assuming (ptr) points to the beginning of a line in the database,
3167  * return the vendor or device and description of the next entry.
3168  * The value of (vendor) or (device) inappropriate for the entry type
3169  * is set to -1.  Returns nonzero at the end of the database.
3170  *
3171  * Note that this is slightly unrobust in the face of corrupt data;
3172  * we attempt to safeguard against this by spamming the end of the
3173  * database with a newline when we initialise.
3174  */
3175 static int
3176 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3177 {
3178 	char	*cp = *ptr;
3179 	int	left;
3180 
3181 	*device = -1;
3182 	*vendor = -1;
3183 	**desc = '\0';
3184 	for (;;) {
3185 		left = pci_vendordata_size - (cp - pci_vendordata);
3186 		if (left <= 0) {
3187 			*ptr = cp;
3188 			return(1);
3189 		}
3190 
3191 		/* vendor entry? */
3192 		if (*cp != '\t' &&
3193 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3194 			break;
3195 		/* device entry? */
3196 		if (*cp == '\t' &&
3197 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3198 			break;
3199 
3200 		/* skip to next line */
3201 		while (*cp != '\n' && left > 0) {
3202 			cp++;
3203 			left--;
3204 		}
3205 		if (*cp == '\n') {
3206 			cp++;
3207 			left--;
3208 		}
3209 	}
3210 	/* skip to next line */
3211 	while (*cp != '\n' && left > 0) {
3212 		cp++;
3213 		left--;
3214 	}
3215 	if (*cp == '\n' && left > 0)
3216 		cp++;
3217 	*ptr = cp;
3218 	return(0);
3219 }
3220 
3221 static char *
3222 pci_describe_device(device_t dev)
3223 {
3224 	int	vendor, device;
3225 	char	*desc, *vp, *dp, *line;
3226 
3227 	desc = vp = dp = NULL;
3228 
3229 	/*
3230 	 * If we have no vendor data, we can't do anything.
3231 	 */
3232 	if (pci_vendordata == NULL)
3233 		goto out;
3234 
3235 	/*
3236 	 * Scan the vendor data looking for this device
3237 	 */
3238 	line = pci_vendordata;
3239 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3240 		goto out;
3241 	for (;;) {
3242 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3243 			goto out;
3244 		if (vendor == pci_get_vendor(dev))
3245 			break;
3246 	}
3247 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3248 		goto out;
3249 	for (;;) {
3250 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3251 			*dp = 0;
3252 			break;
3253 		}
3254 		if (vendor != -1) {
3255 			*dp = 0;
3256 			break;
3257 		}
3258 		if (device == pci_get_device(dev))
3259 			break;
3260 	}
3261 	if (dp[0] == '\0')
3262 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3263 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3264 	    NULL)
3265 		sprintf(desc, "%s, %s", vp, dp);
3266  out:
3267 	if (vp != NULL)
3268 		free(vp, M_DEVBUF);
3269 	if (dp != NULL)
3270 		free(dp, M_DEVBUF);
3271 	return(desc);
3272 }
3273 
3274 int
3275 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3276 {
3277 	struct pci_devinfo *dinfo;
3278 	pcicfgregs *cfg;
3279 
3280 	dinfo = device_get_ivars(child);
3281 	cfg = &dinfo->cfg;
3282 
3283 	switch (which) {
3284 	case PCI_IVAR_ETHADDR:
3285 		/*
3286 		 * The generic accessor doesn't deal with failure, so
3287 		 * we set the return value, then return an error.
3288 		 */
3289 		*((uint8_t **) result) = NULL;
3290 		return (EINVAL);
3291 	case PCI_IVAR_SUBVENDOR:
3292 		*result = cfg->subvendor;
3293 		break;
3294 	case PCI_IVAR_SUBDEVICE:
3295 		*result = cfg->subdevice;
3296 		break;
3297 	case PCI_IVAR_VENDOR:
3298 		*result = cfg->vendor;
3299 		break;
3300 	case PCI_IVAR_DEVICE:
3301 		*result = cfg->device;
3302 		break;
3303 	case PCI_IVAR_DEVID:
3304 		*result = (cfg->device << 16) | cfg->vendor;
3305 		break;
3306 	case PCI_IVAR_CLASS:
3307 		*result = cfg->baseclass;
3308 		break;
3309 	case PCI_IVAR_SUBCLASS:
3310 		*result = cfg->subclass;
3311 		break;
3312 	case PCI_IVAR_PROGIF:
3313 		*result = cfg->progif;
3314 		break;
3315 	case PCI_IVAR_REVID:
3316 		*result = cfg->revid;
3317 		break;
3318 	case PCI_IVAR_INTPIN:
3319 		*result = cfg->intpin;
3320 		break;
3321 	case PCI_IVAR_IRQ:
3322 		*result = cfg->intline;
3323 		break;
3324 	case PCI_IVAR_DOMAIN:
3325 		*result = cfg->domain;
3326 		break;
3327 	case PCI_IVAR_BUS:
3328 		*result = cfg->bus;
3329 		break;
3330 	case PCI_IVAR_SLOT:
3331 		*result = cfg->slot;
3332 		break;
3333 	case PCI_IVAR_FUNCTION:
3334 		*result = cfg->func;
3335 		break;
3336 	case PCI_IVAR_CMDREG:
3337 		*result = cfg->cmdreg;
3338 		break;
3339 	case PCI_IVAR_CACHELNSZ:
3340 		*result = cfg->cachelnsz;
3341 		break;
3342 	case PCI_IVAR_MINGNT:
3343 		*result = cfg->mingnt;
3344 		break;
3345 	case PCI_IVAR_MAXLAT:
3346 		*result = cfg->maxlat;
3347 		break;
3348 	case PCI_IVAR_LATTIMER:
3349 		*result = cfg->lattimer;
3350 		break;
3351 	default:
3352 		return (ENOENT);
3353 	}
3354 	return (0);
3355 }
3356 
3357 int
3358 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3359 {
3360 	struct pci_devinfo *dinfo;
3361 
3362 	dinfo = device_get_ivars(child);
3363 
3364 	switch (which) {
3365 	case PCI_IVAR_INTPIN:
3366 		dinfo->cfg.intpin = value;
3367 		return (0);
3368 	case PCI_IVAR_ETHADDR:
3369 	case PCI_IVAR_SUBVENDOR:
3370 	case PCI_IVAR_SUBDEVICE:
3371 	case PCI_IVAR_VENDOR:
3372 	case PCI_IVAR_DEVICE:
3373 	case PCI_IVAR_DEVID:
3374 	case PCI_IVAR_CLASS:
3375 	case PCI_IVAR_SUBCLASS:
3376 	case PCI_IVAR_PROGIF:
3377 	case PCI_IVAR_REVID:
3378 	case PCI_IVAR_IRQ:
3379 	case PCI_IVAR_DOMAIN:
3380 	case PCI_IVAR_BUS:
3381 	case PCI_IVAR_SLOT:
3382 	case PCI_IVAR_FUNCTION:
3383 		return (EINVAL);	/* disallow for now */
3384 
3385 	default:
3386 		return (ENOENT);
3387 	}
3388 }
3389 
3390 
3391 #include "opt_ddb.h"
3392 #ifdef DDB
3393 #include <ddb/ddb.h>
3394 #include <sys/cons.h>
3395 
3396 /*
3397  * List resources based on pci map registers, used for within ddb
3398  */
3399 
3400 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3401 {
3402 	struct pci_devinfo *dinfo;
3403 	struct devlist *devlist_head;
3404 	struct pci_conf *p;
3405 	const char *name;
3406 	int i, error, none_count;
3407 
3408 	none_count = 0;
3409 	/* get the head of the device queue */
3410 	devlist_head = &pci_devq;
3411 
3412 	/*
3413 	 * Go through the list of devices and print out devices
3414 	 */
3415 	for (error = 0, i = 0,
3416 	     dinfo = STAILQ_FIRST(devlist_head);
3417 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3418 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3419 
3420 		/* Populate pd_name and pd_unit */
3421 		name = NULL;
3422 		if (dinfo->cfg.dev)
3423 			name = device_get_name(dinfo->cfg.dev);
3424 
3425 		p = &dinfo->conf;
3426 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3427 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3428 			(name && *name) ? name : "none",
3429 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3430 			none_count++,
3431 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3432 			p->pc_sel.pc_func, (p->pc_class << 16) |
3433 			(p->pc_subclass << 8) | p->pc_progif,
3434 			(p->pc_subdevice << 16) | p->pc_subvendor,
3435 			(p->pc_device << 16) | p->pc_vendor,
3436 			p->pc_revid, p->pc_hdr);
3437 	}
3438 }
3439 #endif /* DDB */
3440 
3441 static struct resource *
3442 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3443     u_long start, u_long end, u_long count, u_int flags)
3444 {
3445 	struct pci_devinfo *dinfo = device_get_ivars(child);
3446 	struct resource_list *rl = &dinfo->resources;
3447 	struct resource_list_entry *rle;
3448 	struct resource *res;
3449 	pci_addr_t map, testval;
3450 	int mapsize;
3451 
3452 	/*
3453 	 * Weed out the bogons, and figure out how large the BAR/map
3454 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3455 	 * Note: atapci in legacy mode are special and handled elsewhere
3456 	 * in the code.  If you have a atapci device in legacy mode and
3457 	 * it fails here, that other code is broken.
3458 	 */
3459 	res = NULL;
3460 	pci_read_bar(child, *rid, &map, &testval);
3461 
3462 	/* Ignore a BAR with a base of 0. */
3463 	if (pci_mapbase(testval) == 0)
3464 		goto out;
3465 
3466 	if (PCI_BAR_MEM(testval)) {
3467 		if (type != SYS_RES_MEMORY) {
3468 			if (bootverbose)
3469 				device_printf(dev,
3470 				    "child %s requested type %d for rid %#x,"
3471 				    " but the BAR says it is an memio\n",
3472 				    device_get_nameunit(child), type, *rid);
3473 			goto out;
3474 		}
3475 	} else {
3476 		if (type != SYS_RES_IOPORT) {
3477 			if (bootverbose)
3478 				device_printf(dev,
3479 				    "child %s requested type %d for rid %#x,"
3480 				    " but the BAR says it is an ioport\n",
3481 				    device_get_nameunit(child), type, *rid);
3482 			goto out;
3483 		}
3484 	}
3485 
3486 	/*
3487 	 * For real BARs, we need to override the size that
3488 	 * the driver requests, because that's what the BAR
3489 	 * actually uses and we would otherwise have a
3490 	 * situation where we might allocate the excess to
3491 	 * another driver, which won't work.
3492 	 */
3493 	mapsize = pci_mapsize(testval);
3494 	count = 1UL << mapsize;
3495 	if (RF_ALIGNMENT(flags) < mapsize)
3496 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3497 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3498 		flags |= RF_PREFETCHABLE;
3499 
3500 	/*
3501 	 * Allocate enough resource, and then write back the
3502 	 * appropriate bar for that resource.
3503 	 */
3504 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3505 	    start, end, count, flags & ~RF_ACTIVE);
3506 	if (res == NULL) {
3507 		device_printf(child,
3508 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3509 		    count, *rid, type, start, end);
3510 		goto out;
3511 	}
3512 	rman_set_device(res, dev);
3513 	resource_list_add(rl, type, *rid, start, end, count);
3514 	rle = resource_list_find(rl, type, *rid);
3515 	if (rle == NULL)
3516 		panic("pci_alloc_map: unexpectedly can't find resource.");
3517 	rle->res = res;
3518 	rle->start = rman_get_start(res);
3519 	rle->end = rman_get_end(res);
3520 	rle->count = count;
3521 	if (bootverbose)
3522 		device_printf(child,
3523 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3524 		    count, *rid, type, rman_get_start(res));
3525 	map = rman_get_start(res);
3526 	pci_write_bar(child, *rid, map);
3527 out:;
3528 	return (res);
3529 }
3530 
3531 
3532 struct resource *
3533 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3534 		   u_long start, u_long end, u_long count, u_int flags)
3535 {
3536 	struct pci_devinfo *dinfo = device_get_ivars(child);
3537 	struct resource_list *rl = &dinfo->resources;
3538 	struct resource_list_entry *rle;
3539 	struct resource *res;
3540 	pcicfgregs *cfg = &dinfo->cfg;
3541 
3542 	if (device_get_parent(child) != dev)
3543 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3544 		    type, rid, start, end, count, flags));
3545 
3546 	/*
3547 	 * Perform lazy resource allocation
3548 	 */
3549 	switch (type) {
3550 	case SYS_RES_IRQ:
3551 		/*
3552 		 * Can't alloc legacy interrupt once MSI messages have
3553 		 * been allocated.
3554 		 */
3555 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3556 		    cfg->msix.msix_alloc > 0))
3557 			return (NULL);
3558 
3559 		/*
3560 		 * If the child device doesn't have an interrupt
3561 		 * routed and is deserving of an interrupt, try to
3562 		 * assign it one.
3563 		 */
3564 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3565 		    (cfg->intpin != 0))
3566 			pci_assign_interrupt(dev, child, 0);
3567 		break;
3568 	case SYS_RES_IOPORT:
3569 	case SYS_RES_MEMORY:
3570 		/* Allocate resources for this BAR if needed. */
3571 		rle = resource_list_find(rl, type, *rid);
3572 		if (rle == NULL) {
3573 			res = pci_alloc_map(dev, child, type, rid, start, end,
3574 			    count, flags);
3575 			if (res == NULL)
3576 				return (NULL);
3577 			rle = resource_list_find(rl, type, *rid);
3578 		}
3579 
3580 		/*
3581 		 * If the resource belongs to the bus, then give it to
3582 		 * the child.  We need to activate it if requested
3583 		 * since the bus always allocates inactive resources.
3584 		 */
3585 		if (rle != NULL && rle->res != NULL &&
3586 		    rman_get_device(rle->res) == dev) {
3587 			if (bootverbose)
3588 				device_printf(child,
3589 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3590 				    rman_get_size(rle->res), *rid, type,
3591 				    rman_get_start(rle->res));
3592 			rman_set_device(rle->res, child);
3593 			if ((flags & RF_ACTIVE) &&
3594 			    bus_activate_resource(child, type, *rid,
3595 			    rle->res) != 0)
3596 				return (NULL);
3597 			return (rle->res);
3598 		}
3599 	}
3600 	return (resource_list_alloc(rl, dev, child, type, rid,
3601 	    start, end, count, flags));
3602 }
3603 
3604 int
3605 pci_release_resource(device_t dev, device_t child, int type, int rid,
3606     struct resource *r)
3607 {
3608 	int error;
3609 
3610 	if (device_get_parent(child) != dev)
3611 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3612 		    type, rid, r));
3613 
3614 	/*
3615 	 * For BARs we don't actually want to release the resource.
3616 	 * Instead, we deactivate the resource if needed and then give
3617 	 * ownership of the BAR back to the bus.
3618 	 */
3619 	switch (type) {
3620 	case SYS_RES_IOPORT:
3621 	case SYS_RES_MEMORY:
3622 		if (rman_get_device(r) != child)
3623 			return (EINVAL);
3624 		if (rman_get_flags(r) & RF_ACTIVE) {
3625 			error = bus_deactivate_resource(child, type, rid, r);
3626 			if (error)
3627 				return (error);
3628 		}
3629 		rman_set_device(r, dev);
3630 		return (0);
3631 	}
3632 	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3633 }
3634 
3635 int
3636 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3637     struct resource *r)
3638 {
3639 	int error;
3640 
3641 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3642 	if (error)
3643 		return (error);
3644 
3645 	/* Enable decoding in the command register when activating BARs. */
3646 	if (device_get_parent(child) == dev) {
3647 		switch (type) {
3648 		case SYS_RES_IOPORT:
3649 		case SYS_RES_MEMORY:
3650 			error = PCI_ENABLE_IO(dev, child, type);
3651 			break;
3652 		}
3653 	}
3654 	return (error);
3655 }
3656 
3657 void
3658 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3659 {
3660 	struct pci_devinfo *dinfo;
3661 	struct resource_list *rl;
3662 	struct resource_list_entry *rle;
3663 
3664 	if (device_get_parent(child) != dev)
3665 		return;
3666 
3667 	dinfo = device_get_ivars(child);
3668 	rl = &dinfo->resources;
3669 	rle = resource_list_find(rl, type, rid);
3670 	if (rle == NULL)
3671 		return;
3672 
3673 	if (rle->res) {
3674 		if (rman_get_device(rle->res) != dev ||
3675 		    rman_get_flags(rle->res) & RF_ACTIVE) {
3676 			device_printf(dev, "delete_resource: "
3677 			    "Resource still owned by child, oops. "
3678 			    "(type=%d, rid=%d, addr=%lx)\n",
3679 			    rle->type, rle->rid,
3680 			    rman_get_start(rle->res));
3681 			return;
3682 		}
3683 
3684 		/*
3685 		 * If this is a BAR, clear the BAR so it stops
3686 		 * decoding before releasing the resource.
3687 		 */
3688 		switch (type) {
3689 		case SYS_RES_IOPORT:
3690 		case SYS_RES_MEMORY:
3691 			pci_write_bar(child, rid, 0);
3692 			break;
3693 		}
3694 		bus_release_resource(dev, type, rid, rle->res);
3695 	}
3696 	resource_list_delete(rl, type, rid);
3697 }
3698 
3699 struct resource_list *
3700 pci_get_resource_list (device_t dev, device_t child)
3701 {
3702 	struct pci_devinfo *dinfo = device_get_ivars(child);
3703 
3704 	return (&dinfo->resources);
3705 }
3706 
3707 uint32_t
3708 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3709 {
3710 	struct pci_devinfo *dinfo = device_get_ivars(child);
3711 	pcicfgregs *cfg = &dinfo->cfg;
3712 
3713 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3714 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3715 }
3716 
3717 void
3718 pci_write_config_method(device_t dev, device_t child, int reg,
3719     uint32_t val, int width)
3720 {
3721 	struct pci_devinfo *dinfo = device_get_ivars(child);
3722 	pcicfgregs *cfg = &dinfo->cfg;
3723 
3724 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3725 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3726 }
3727 
3728 int
3729 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3730     size_t buflen)
3731 {
3732 
3733 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3734 	    pci_get_function(child));
3735 	return (0);
3736 }
3737 
3738 int
3739 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3740     size_t buflen)
3741 {
3742 	struct pci_devinfo *dinfo;
3743 	pcicfgregs *cfg;
3744 
3745 	dinfo = device_get_ivars(child);
3746 	cfg = &dinfo->cfg;
3747 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3748 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3749 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3750 	    cfg->progif);
3751 	return (0);
3752 }
3753 
3754 int
3755 pci_assign_interrupt_method(device_t dev, device_t child)
3756 {
3757 	struct pci_devinfo *dinfo = device_get_ivars(child);
3758 	pcicfgregs *cfg = &dinfo->cfg;
3759 
3760 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3761 	    cfg->intpin));
3762 }
3763 
3764 static int
3765 pci_modevent(module_t mod, int what, void *arg)
3766 {
3767 	static struct cdev *pci_cdev;
3768 
3769 	switch (what) {
3770 	case MOD_LOAD:
3771 		STAILQ_INIT(&pci_devq);
3772 		pci_generation = 0;
3773 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3774 		    "pci");
3775 		pci_load_vendor_data();
3776 		break;
3777 
3778 	case MOD_UNLOAD:
3779 		destroy_dev(pci_cdev);
3780 		break;
3781 	}
3782 
3783 	return (0);
3784 }
3785 
3786 void
3787 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3788 {
3789 	int i;
3790 
3791 	/*
3792 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3793 	 * which we know need special treatment.  Type 2 devices are
3794 	 * cardbus bridges which also require special treatment.
3795 	 * Other types are unknown, and we err on the side of safety
3796 	 * by ignoring them.
3797 	 */
3798 	if (dinfo->cfg.hdrtype != 0)
3799 		return;
3800 
3801 	/*
3802 	 * Restore the device to full power mode.  We must do this
3803 	 * before we restore the registers because moving from D3 to
3804 	 * D0 will cause the chip's BARs and some other registers to
3805 	 * be reset to some unknown power on reset values.  Cut down
3806 	 * the noise on boot by doing nothing if we are already in
3807 	 * state D0.
3808 	 */
3809 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3810 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3811 	}
3812 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3813 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3814 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3815 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3816 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3817 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3818 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3819 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3820 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3821 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3822 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3823 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3824 
3825 	/* Restore MSI and MSI-X configurations if they are present. */
3826 	if (dinfo->cfg.msi.msi_location != 0)
3827 		pci_resume_msi(dev);
3828 	if (dinfo->cfg.msix.msix_location != 0)
3829 		pci_resume_msix(dev);
3830 }
3831 
3832 void
3833 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3834 {
3835 	int i;
3836 	uint32_t cls;
3837 	int ps;
3838 
3839 	/*
3840 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3841 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3842 	 * which also require special treatment.  Other types are unknown, and
3843 	 * we err on the side of safety by ignoring them.  Powering down
3844 	 * bridges should not be undertaken lightly.
3845 	 */
3846 	if (dinfo->cfg.hdrtype != 0)
3847 		return;
3848 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3849 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3850 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3851 
3852 	/*
3853 	 * Some drivers apparently write to these registers w/o updating our
3854 	 * cached copy.  No harm happens if we update the copy, so do so here
3855 	 * so we can restore them.  The COMMAND register is modified by the
3856 	 * bus w/o updating the cache.  This should represent the normally
3857 	 * writable portion of the 'defined' part of type 0 headers.  In
3858 	 * theory we also need to save/restore the PCI capability structures
3859 	 * we know about, but apart from power we don't know any that are
3860 	 * writable.
3861 	 */
3862 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3863 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3864 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3865 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3866 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3867 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3868 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3869 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3870 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3871 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3872 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3873 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3874 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3875 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3876 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3877 
3878 	/*
3879 	 * don't set the state for display devices, base peripherals and
3880 	 * memory devices since bad things happen when they are powered down.
3881 	 * We should (a) have drivers that can easily detach and (b) use
3882 	 * generic drivers for these devices so that some device actually
3883 	 * attaches.  We need to make sure that when we implement (a) we don't
3884 	 * power the device down on a reattach.
3885 	 */
3886 	cls = pci_get_class(dev);
3887 	if (!setstate)
3888 		return;
3889 	switch (pci_do_power_nodriver)
3890 	{
3891 		case 0:		/* NO powerdown at all */
3892 			return;
3893 		case 1:		/* Conservative about what to power down */
3894 			if (cls == PCIC_STORAGE)
3895 				return;
3896 			/*FALLTHROUGH*/
3897 		case 2:		/* Agressive about what to power down */
3898 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3899 			    cls == PCIC_BASEPERIPH)
3900 				return;
3901 			/*FALLTHROUGH*/
3902 		case 3:		/* Power down everything */
3903 			break;
3904 	}
3905 	/*
3906 	 * PCI spec says we can only go into D3 state from D0 state.
3907 	 * Transition from D[12] into D0 before going to D3 state.
3908 	 */
3909 	ps = pci_get_powerstate(dev);
3910 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3911 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3912 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3913 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3914 }
3915