xref: /freebsd/sys/dev/pci/pci.c (revision 25dd52cdb10d223b9258836e23cc6ae4ea333b86)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static pci_addr_t	pci_mapbase(uint64_t mapreg);
75 static const char	*pci_maptype(uint64_t mapreg);
76 static int		pci_mapsize(uint64_t testval);
77 static int		pci_maprange(uint64_t mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
80 
81 static int		pci_porten(device_t dev);
82 static int		pci_memen(device_t dev);
83 static void		pci_assign_interrupt(device_t bus, device_t dev,
84 			    int force_route);
85 static int		pci_add_map(device_t bus, device_t dev, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 
115 static device_method_t pci_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_probe,		pci_probe),
118 	DEVMETHOD(device_attach,	pci_attach),
119 	DEVMETHOD(device_detach,	bus_generic_detach),
120 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121 	DEVMETHOD(device_suspend,	pci_suspend),
122 	DEVMETHOD(device_resume,	pci_resume),
123 
124 	/* Bus interface */
125 	DEVMETHOD(bus_print_child,	pci_print_child),
126 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129 	DEVMETHOD(bus_driver_added,	pci_driver_added),
130 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132 
133 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
140 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143 
144 	/* PCI interface */
145 	DEVMETHOD(pci_read_config,	pci_read_config_method),
146 	DEVMETHOD(pci_write_config,	pci_write_config_method),
147 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163 
164 	{ 0, 0 }
165 };
166 
167 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168 
169 static devclass_t pci_devclass;
170 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171 MODULE_VERSION(pci, 1);
172 
173 static char	*pci_vendordata;
174 static size_t	pci_vendordata_size;
175 
176 
177 struct pci_quirk {
178 	uint32_t devid;	/* Vendor/device of the card */
179 	int	type;
180 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182 	int	arg1;
183 	int	arg2;
184 };
185 
186 struct pci_quirk pci_quirks[] = {
187 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 
193 	/*
194 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196 	 */
197 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 
200 	/*
201 	 * MSI doesn't work on earlier Intel chipsets including
202 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203 	 */
204 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 
212 	/*
213 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214 	 * bridge.
215 	 */
216 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	{ 0 }
219 };
220 
221 /* map register information */
222 #define	PCI_MAPMEM	0x01	/* memory map */
223 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224 #define	PCI_MAPPORT	0x04	/* port map */
225 
226 struct devlist pci_devq;
227 uint32_t pci_generation;
228 uint32_t pci_numdevs = 0;
229 static int pcie_chipset, pcix_chipset;
230 
231 /* sysctl vars */
232 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233 
234 static int pci_enable_io_modes = 1;
235 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237     &pci_enable_io_modes, 1,
238     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239 enable these bits correctly.  We'd like to do this all the time, but there\n\
240 are some peripherals that this causes problems with.");
241 
242 static int pci_do_power_nodriver = 0;
243 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245     &pci_do_power_nodriver, 0,
246   "Place a function into D3 state when no driver attaches to it.  0 means\n\
247 disable.  1 means conservatively place devices into D3 state.  2 means\n\
248 agressively place devices into D3 state.  3 means put absolutely everything\n\
249 in D3 state.");
250 
251 static int pci_do_power_resume = 1;
252 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254     &pci_do_power_resume, 1,
255   "Transition from D3 -> D0 on resume.");
256 
257 static int pci_do_msi = 1;
258 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260     "Enable support for MSI interrupts");
261 
262 static int pci_do_msix = 1;
263 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265     "Enable support for MSI-X interrupts");
266 
267 static int pci_honor_msi_blacklist = 1;
268 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271 
272 /* Find a device_t by bus/slot/function in domain 0 */
273 
274 device_t
275 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276 {
277 
278 	return (pci_find_dbsf(0, bus, slot, func));
279 }
280 
281 /* Find a device_t by domain/bus/slot/function */
282 
283 device_t
284 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285 {
286 	struct pci_devinfo *dinfo;
287 
288 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289 		if ((dinfo->cfg.domain == domain) &&
290 		    (dinfo->cfg.bus == bus) &&
291 		    (dinfo->cfg.slot == slot) &&
292 		    (dinfo->cfg.func == func)) {
293 			return (dinfo->cfg.dev);
294 		}
295 	}
296 
297 	return (NULL);
298 }
299 
300 /* Find a device_t by vendor/device ID */
301 
302 device_t
303 pci_find_device(uint16_t vendor, uint16_t device)
304 {
305 	struct pci_devinfo *dinfo;
306 
307 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308 		if ((dinfo->cfg.vendor == vendor) &&
309 		    (dinfo->cfg.device == device)) {
310 			return (dinfo->cfg.dev);
311 		}
312 	}
313 
314 	return (NULL);
315 }
316 
317 static int
318 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
319 {
320 	va_list ap;
321 	int retval;
322 
323 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
324 	    cfg->func);
325 	va_start(ap, fmt);
326 	retval += vprintf(fmt, ap);
327 	va_end(ap);
328 	return (retval);
329 }
330 
331 /* return base address of memory or port map */
332 
333 static pci_addr_t
334 pci_mapbase(uint64_t mapreg)
335 {
336 
337 	if (PCI_BAR_MEM(mapreg))
338 		return (mapreg & PCIM_BAR_MEM_BASE);
339 	else
340 		return (mapreg & PCIM_BAR_IO_BASE);
341 }
342 
343 /* return map type of memory or port map */
344 
345 static const char *
346 pci_maptype(uint64_t mapreg)
347 {
348 
349 	if (PCI_BAR_IO(mapreg))
350 		return ("I/O Port");
351 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
352 		return ("Prefetchable Memory");
353 	return ("Memory");
354 }
355 
356 /* return log2 of map size decoded for memory or port map */
357 
358 static int
359 pci_mapsize(uint64_t testval)
360 {
361 	int ln2size;
362 
363 	testval = pci_mapbase(testval);
364 	ln2size = 0;
365 	if (testval != 0) {
366 		while ((testval & 1) == 0)
367 		{
368 			ln2size++;
369 			testval >>= 1;
370 		}
371 	}
372 	return (ln2size);
373 }
374 
375 /* return log2 of address range supported by map register */
376 
377 static int
378 pci_maprange(uint64_t mapreg)
379 {
380 	int ln2range = 0;
381 
382 	if (PCI_BAR_IO(mapreg))
383 		ln2range = 32;
384 	else
385 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
386 		case PCIM_BAR_MEM_32:
387 			ln2range = 32;
388 			break;
389 		case PCIM_BAR_MEM_1MB:
390 			ln2range = 20;
391 			break;
392 		case PCIM_BAR_MEM_64:
393 			ln2range = 64;
394 			break;
395 		}
396 	return (ln2range);
397 }
398 
399 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
400 
401 static void
402 pci_fixancient(pcicfgregs *cfg)
403 {
404 	if (cfg->hdrtype != 0)
405 		return;
406 
407 	/* PCI to PCI bridges use header type 1 */
408 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
409 		cfg->hdrtype = 1;
410 }
411 
412 /* extract header type specific config data */
413 
414 static void
415 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
416 {
417 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
418 	switch (cfg->hdrtype) {
419 	case 0:
420 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
421 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
422 		cfg->nummaps	    = PCI_MAXMAPS_0;
423 		break;
424 	case 1:
425 		cfg->nummaps	    = PCI_MAXMAPS_1;
426 		break;
427 	case 2:
428 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
429 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
430 		cfg->nummaps	    = PCI_MAXMAPS_2;
431 		break;
432 	}
433 #undef REG
434 }
435 
436 /* read configuration header into pcicfgregs structure */
437 struct pci_devinfo *
438 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
439 {
440 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
441 	pcicfgregs *cfg = NULL;
442 	struct pci_devinfo *devlist_entry;
443 	struct devlist *devlist_head;
444 
445 	devlist_head = &pci_devq;
446 
447 	devlist_entry = NULL;
448 
449 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
450 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
451 		if (devlist_entry == NULL)
452 			return (NULL);
453 
454 		cfg = &devlist_entry->cfg;
455 
456 		cfg->domain		= d;
457 		cfg->bus		= b;
458 		cfg->slot		= s;
459 		cfg->func		= f;
460 		cfg->vendor		= REG(PCIR_VENDOR, 2);
461 		cfg->device		= REG(PCIR_DEVICE, 2);
462 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
463 		cfg->statreg		= REG(PCIR_STATUS, 2);
464 		cfg->baseclass		= REG(PCIR_CLASS, 1);
465 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
466 		cfg->progif		= REG(PCIR_PROGIF, 1);
467 		cfg->revid		= REG(PCIR_REVID, 1);
468 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
469 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
470 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
471 		cfg->intpin		= REG(PCIR_INTPIN, 1);
472 		cfg->intline		= REG(PCIR_INTLINE, 1);
473 
474 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
475 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
476 
477 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
478 		cfg->hdrtype		&= ~PCIM_MFDEV;
479 
480 		pci_fixancient(cfg);
481 		pci_hdrtypedata(pcib, b, s, f, cfg);
482 
483 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
484 			pci_read_extcap(pcib, cfg);
485 
486 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
487 
488 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
489 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
490 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
491 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
492 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
493 
494 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
495 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
496 		devlist_entry->conf.pc_vendor = cfg->vendor;
497 		devlist_entry->conf.pc_device = cfg->device;
498 
499 		devlist_entry->conf.pc_class = cfg->baseclass;
500 		devlist_entry->conf.pc_subclass = cfg->subclass;
501 		devlist_entry->conf.pc_progif = cfg->progif;
502 		devlist_entry->conf.pc_revid = cfg->revid;
503 
504 		pci_numdevs++;
505 		pci_generation++;
506 	}
507 	return (devlist_entry);
508 #undef REG
509 }
510 
511 static void
512 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
513 {
514 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
515 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
516 #if defined(__i386__) || defined(__amd64__)
517 	uint64_t addr;
518 #endif
519 	uint32_t val;
520 	int	ptr, nextptr, ptrptr;
521 
522 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
523 	case 0:
524 	case 1:
525 		ptrptr = PCIR_CAP_PTR;
526 		break;
527 	case 2:
528 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
529 		break;
530 	default:
531 		return;		/* no extended capabilities support */
532 	}
533 	nextptr = REG(ptrptr, 1);	/* sanity check? */
534 
535 	/*
536 	 * Read capability entries.
537 	 */
538 	while (nextptr != 0) {
539 		/* Sanity check */
540 		if (nextptr > 255) {
541 			printf("illegal PCI extended capability offset %d\n",
542 			    nextptr);
543 			return;
544 		}
545 		/* Find the next entry */
546 		ptr = nextptr;
547 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
548 
549 		/* Process this entry */
550 		switch (REG(ptr + PCICAP_ID, 1)) {
551 		case PCIY_PMG:		/* PCI power management */
552 			if (cfg->pp.pp_cap == 0) {
553 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
554 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
555 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
556 				if ((nextptr - ptr) > PCIR_POWER_DATA)
557 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
558 			}
559 			break;
560 #if defined(__i386__) || defined(__amd64__)
561 		case PCIY_HT:		/* HyperTransport */
562 			/* Determine HT-specific capability type. */
563 			val = REG(ptr + PCIR_HT_COMMAND, 2);
564 			switch (val & PCIM_HTCMD_CAP_MASK) {
565 			case PCIM_HTCAP_MSI_MAPPING:
566 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
567 					/* Sanity check the mapping window. */
568 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
569 					    4);
570 					addr <<= 32;
571 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
572 					    4);
573 					if (addr != MSI_INTEL_ADDR_BASE)
574 						device_printf(pcib,
575 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
576 						    cfg->domain, cfg->bus,
577 						    cfg->slot, cfg->func,
578 						    (long long)addr);
579 				} else
580 					addr = MSI_INTEL_ADDR_BASE;
581 
582 				cfg->ht.ht_msimap = ptr;
583 				cfg->ht.ht_msictrl = val;
584 				cfg->ht.ht_msiaddr = addr;
585 				break;
586 			}
587 			break;
588 #endif
589 		case PCIY_MSI:		/* PCI MSI */
590 			cfg->msi.msi_location = ptr;
591 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
592 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
593 						     PCIM_MSICTRL_MMC_MASK)>>1);
594 			break;
595 		case PCIY_MSIX:		/* PCI MSI-X */
596 			cfg->msix.msix_location = ptr;
597 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
598 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
599 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
600 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
601 			cfg->msix.msix_table_bar = PCIR_BAR(val &
602 			    PCIM_MSIX_BIR_MASK);
603 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
604 			val = REG(ptr + PCIR_MSIX_PBA, 4);
605 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
606 			    PCIM_MSIX_BIR_MASK);
607 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
608 			break;
609 		case PCIY_VPD:		/* PCI Vital Product Data */
610 			cfg->vpd.vpd_reg = ptr;
611 			break;
612 		case PCIY_SUBVENDOR:
613 			/* Should always be true. */
614 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
615 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
616 				cfg->subvendor = val & 0xffff;
617 				cfg->subdevice = val >> 16;
618 			}
619 			break;
620 		case PCIY_PCIX:		/* PCI-X */
621 			/*
622 			 * Assume we have a PCI-X chipset if we have
623 			 * at least one PCI-PCI bridge with a PCI-X
624 			 * capability.  Note that some systems with
625 			 * PCI-express or HT chipsets might match on
626 			 * this check as well.
627 			 */
628 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
629 				pcix_chipset = 1;
630 			break;
631 		case PCIY_EXPRESS:	/* PCI-express */
632 			/*
633 			 * Assume we have a PCI-express chipset if we have
634 			 * at least one PCI-express device.
635 			 */
636 			pcie_chipset = 1;
637 			break;
638 		default:
639 			break;
640 		}
641 	}
642 /* REG and WREG use carry through to next functions */
643 }
644 
645 /*
646  * PCI Vital Product Data
647  */
648 
649 #define	PCI_VPD_TIMEOUT		1000000
650 
651 static int
652 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
653 {
654 	int count = PCI_VPD_TIMEOUT;
655 
656 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
657 
658 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
659 
660 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
661 		if (--count < 0)
662 			return (ENXIO);
663 		DELAY(1);	/* limit looping */
664 	}
665 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
666 
667 	return (0);
668 }
669 
670 #if 0
671 static int
672 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
673 {
674 	int count = PCI_VPD_TIMEOUT;
675 
676 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
677 
678 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
679 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
680 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
681 		if (--count < 0)
682 			return (ENXIO);
683 		DELAY(1);	/* limit looping */
684 	}
685 
686 	return (0);
687 }
688 #endif
689 
690 #undef PCI_VPD_TIMEOUT
691 
692 struct vpd_readstate {
693 	device_t	pcib;
694 	pcicfgregs	*cfg;
695 	uint32_t	val;
696 	int		bytesinval;
697 	int		off;
698 	uint8_t		cksum;
699 };
700 
701 static int
702 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
703 {
704 	uint32_t reg;
705 	uint8_t byte;
706 
707 	if (vrs->bytesinval == 0) {
708 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
709 			return (ENXIO);
710 		vrs->val = le32toh(reg);
711 		vrs->off += 4;
712 		byte = vrs->val & 0xff;
713 		vrs->bytesinval = 3;
714 	} else {
715 		vrs->val = vrs->val >> 8;
716 		byte = vrs->val & 0xff;
717 		vrs->bytesinval--;
718 	}
719 
720 	vrs->cksum += byte;
721 	*data = byte;
722 	return (0);
723 }
724 
725 static void
726 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
727 {
728 	struct vpd_readstate vrs;
729 	int state;
730 	int name;
731 	int remain;
732 	int i;
733 	int alloc, off;		/* alloc/off for RO/W arrays */
734 	int cksumvalid;
735 	int dflen;
736 	uint8_t byte;
737 	uint8_t byte2;
738 
739 	/* init vpd reader */
740 	vrs.bytesinval = 0;
741 	vrs.off = 0;
742 	vrs.pcib = pcib;
743 	vrs.cfg = cfg;
744 	vrs.cksum = 0;
745 
746 	state = 0;
747 	name = remain = i = 0;	/* shut up stupid gcc */
748 	alloc = off = 0;	/* shut up stupid gcc */
749 	dflen = 0;		/* shut up stupid gcc */
750 	cksumvalid = -1;
751 	while (state >= 0) {
752 		if (vpd_nextbyte(&vrs, &byte)) {
753 			state = -2;
754 			break;
755 		}
756 #if 0
757 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
758 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
759 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
760 #endif
761 		switch (state) {
762 		case 0:		/* item name */
763 			if (byte & 0x80) {
764 				if (vpd_nextbyte(&vrs, &byte2)) {
765 					state = -2;
766 					break;
767 				}
768 				remain = byte2;
769 				if (vpd_nextbyte(&vrs, &byte2)) {
770 					state = -2;
771 					break;
772 				}
773 				remain |= byte2 << 8;
774 				if (remain > (0x7f*4 - vrs.off)) {
775 					state = -1;
776 					printf(
777 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
778 					    cfg->domain, cfg->bus, cfg->slot,
779 					    cfg->func, remain);
780 				}
781 				name = byte & 0x7f;
782 			} else {
783 				remain = byte & 0x7;
784 				name = (byte >> 3) & 0xf;
785 			}
786 			switch (name) {
787 			case 0x2:	/* String */
788 				cfg->vpd.vpd_ident = malloc(remain + 1,
789 				    M_DEVBUF, M_WAITOK);
790 				i = 0;
791 				state = 1;
792 				break;
793 			case 0xf:	/* End */
794 				state = -1;
795 				break;
796 			case 0x10:	/* VPD-R */
797 				alloc = 8;
798 				off = 0;
799 				cfg->vpd.vpd_ros = malloc(alloc *
800 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
801 				    M_WAITOK | M_ZERO);
802 				state = 2;
803 				break;
804 			case 0x11:	/* VPD-W */
805 				alloc = 8;
806 				off = 0;
807 				cfg->vpd.vpd_w = malloc(alloc *
808 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
809 				    M_WAITOK | M_ZERO);
810 				state = 5;
811 				break;
812 			default:	/* Invalid data, abort */
813 				state = -1;
814 				break;
815 			}
816 			break;
817 
818 		case 1:	/* Identifier String */
819 			cfg->vpd.vpd_ident[i++] = byte;
820 			remain--;
821 			if (remain == 0)  {
822 				cfg->vpd.vpd_ident[i] = '\0';
823 				state = 0;
824 			}
825 			break;
826 
827 		case 2:	/* VPD-R Keyword Header */
828 			if (off == alloc) {
829 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
830 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
831 				    M_DEVBUF, M_WAITOK | M_ZERO);
832 			}
833 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
834 			if (vpd_nextbyte(&vrs, &byte2)) {
835 				state = -2;
836 				break;
837 			}
838 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
839 			if (vpd_nextbyte(&vrs, &byte2)) {
840 				state = -2;
841 				break;
842 			}
843 			dflen = byte2;
844 			if (dflen == 0 &&
845 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
846 			    2) == 0) {
847 				/*
848 				 * if this happens, we can't trust the rest
849 				 * of the VPD.
850 				 */
851 				printf(
852 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
853 				    cfg->domain, cfg->bus, cfg->slot,
854 				    cfg->func, dflen);
855 				cksumvalid = 0;
856 				state = -1;
857 				break;
858 			} else if (dflen == 0) {
859 				cfg->vpd.vpd_ros[off].value = malloc(1 *
860 				    sizeof(*cfg->vpd.vpd_ros[off].value),
861 				    M_DEVBUF, M_WAITOK);
862 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
863 			} else
864 				cfg->vpd.vpd_ros[off].value = malloc(
865 				    (dflen + 1) *
866 				    sizeof(*cfg->vpd.vpd_ros[off].value),
867 				    M_DEVBUF, M_WAITOK);
868 			remain -= 3;
869 			i = 0;
870 			/* keep in sync w/ state 3's transistions */
871 			if (dflen == 0 && remain == 0)
872 				state = 0;
873 			else if (dflen == 0)
874 				state = 2;
875 			else
876 				state = 3;
877 			break;
878 
879 		case 3:	/* VPD-R Keyword Value */
880 			cfg->vpd.vpd_ros[off].value[i++] = byte;
881 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
882 			    "RV", 2) == 0 && cksumvalid == -1) {
883 				if (vrs.cksum == 0)
884 					cksumvalid = 1;
885 				else {
886 					if (bootverbose)
887 						printf(
888 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
889 						    cfg->domain, cfg->bus,
890 						    cfg->slot, cfg->func,
891 						    vrs.cksum);
892 					cksumvalid = 0;
893 					state = -1;
894 					break;
895 				}
896 			}
897 			dflen--;
898 			remain--;
899 			/* keep in sync w/ state 2's transistions */
900 			if (dflen == 0)
901 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
902 			if (dflen == 0 && remain == 0) {
903 				cfg->vpd.vpd_rocnt = off;
904 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
905 				    off * sizeof(*cfg->vpd.vpd_ros),
906 				    M_DEVBUF, M_WAITOK | M_ZERO);
907 				state = 0;
908 			} else if (dflen == 0)
909 				state = 2;
910 			break;
911 
912 		case 4:
913 			remain--;
914 			if (remain == 0)
915 				state = 0;
916 			break;
917 
918 		case 5:	/* VPD-W Keyword Header */
919 			if (off == alloc) {
920 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
921 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
922 				    M_DEVBUF, M_WAITOK | M_ZERO);
923 			}
924 			cfg->vpd.vpd_w[off].keyword[0] = byte;
925 			if (vpd_nextbyte(&vrs, &byte2)) {
926 				state = -2;
927 				break;
928 			}
929 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
930 			if (vpd_nextbyte(&vrs, &byte2)) {
931 				state = -2;
932 				break;
933 			}
934 			cfg->vpd.vpd_w[off].len = dflen = byte2;
935 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
936 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
937 			    sizeof(*cfg->vpd.vpd_w[off].value),
938 			    M_DEVBUF, M_WAITOK);
939 			remain -= 3;
940 			i = 0;
941 			/* keep in sync w/ state 6's transistions */
942 			if (dflen == 0 && remain == 0)
943 				state = 0;
944 			else if (dflen == 0)
945 				state = 5;
946 			else
947 				state = 6;
948 			break;
949 
950 		case 6:	/* VPD-W Keyword Value */
951 			cfg->vpd.vpd_w[off].value[i++] = byte;
952 			dflen--;
953 			remain--;
954 			/* keep in sync w/ state 5's transistions */
955 			if (dflen == 0)
956 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
957 			if (dflen == 0 && remain == 0) {
958 				cfg->vpd.vpd_wcnt = off;
959 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
960 				    off * sizeof(*cfg->vpd.vpd_w),
961 				    M_DEVBUF, M_WAITOK | M_ZERO);
962 				state = 0;
963 			} else if (dflen == 0)
964 				state = 5;
965 			break;
966 
967 		default:
968 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
969 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
970 			    state);
971 			state = -1;
972 			break;
973 		}
974 	}
975 
976 	if (cksumvalid == 0 || state < -1) {
977 		/* read-only data bad, clean up */
978 		if (cfg->vpd.vpd_ros != NULL) {
979 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
980 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
981 			free(cfg->vpd.vpd_ros, M_DEVBUF);
982 			cfg->vpd.vpd_ros = NULL;
983 		}
984 	}
985 	if (state < -1) {
986 		/* I/O error, clean up */
987 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
988 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
989 		if (cfg->vpd.vpd_ident != NULL) {
990 			free(cfg->vpd.vpd_ident, M_DEVBUF);
991 			cfg->vpd.vpd_ident = NULL;
992 		}
993 		if (cfg->vpd.vpd_w != NULL) {
994 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
995 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
996 			free(cfg->vpd.vpd_w, M_DEVBUF);
997 			cfg->vpd.vpd_w = NULL;
998 		}
999 	}
1000 	cfg->vpd.vpd_cached = 1;
1001 #undef REG
1002 #undef WREG
1003 }
1004 
1005 int
1006 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1007 {
1008 	struct pci_devinfo *dinfo = device_get_ivars(child);
1009 	pcicfgregs *cfg = &dinfo->cfg;
1010 
1011 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1012 		pci_read_vpd(device_get_parent(dev), cfg);
1013 
1014 	*identptr = cfg->vpd.vpd_ident;
1015 
1016 	if (*identptr == NULL)
1017 		return (ENXIO);
1018 
1019 	return (0);
1020 }
1021 
1022 int
1023 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1024 	const char **vptr)
1025 {
1026 	struct pci_devinfo *dinfo = device_get_ivars(child);
1027 	pcicfgregs *cfg = &dinfo->cfg;
1028 	int i;
1029 
1030 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1031 		pci_read_vpd(device_get_parent(dev), cfg);
1032 
1033 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1034 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1035 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1036 			*vptr = cfg->vpd.vpd_ros[i].value;
1037 		}
1038 
1039 	if (i != cfg->vpd.vpd_rocnt)
1040 		return (0);
1041 
1042 	*vptr = NULL;
1043 	return (ENXIO);
1044 }
1045 
1046 /*
1047  * Find the requested extended capability and return the offset in
1048  * configuration space via the pointer provided. The function returns
1049  * 0 on success and error code otherwise.
1050  */
1051 int
1052 pci_find_extcap_method(device_t dev, device_t child, int capability,
1053     int *capreg)
1054 {
1055 	struct pci_devinfo *dinfo = device_get_ivars(child);
1056 	pcicfgregs *cfg = &dinfo->cfg;
1057 	u_int32_t status;
1058 	u_int8_t ptr;
1059 
1060 	/*
1061 	 * Check the CAP_LIST bit of the PCI status register first.
1062 	 */
1063 	status = pci_read_config(child, PCIR_STATUS, 2);
1064 	if (!(status & PCIM_STATUS_CAPPRESENT))
1065 		return (ENXIO);
1066 
1067 	/*
1068 	 * Determine the start pointer of the capabilities list.
1069 	 */
1070 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1071 	case 0:
1072 	case 1:
1073 		ptr = PCIR_CAP_PTR;
1074 		break;
1075 	case 2:
1076 		ptr = PCIR_CAP_PTR_2;
1077 		break;
1078 	default:
1079 		/* XXX: panic? */
1080 		return (ENXIO);		/* no extended capabilities support */
1081 	}
1082 	ptr = pci_read_config(child, ptr, 1);
1083 
1084 	/*
1085 	 * Traverse the capabilities list.
1086 	 */
1087 	while (ptr != 0) {
1088 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1089 			if (capreg != NULL)
1090 				*capreg = ptr;
1091 			return (0);
1092 		}
1093 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1094 	}
1095 
1096 	return (ENOENT);
1097 }
1098 
1099 /*
1100  * Support for MSI-X message interrupts.
1101  */
1102 void
1103 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1104 {
1105 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1106 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1107 	uint32_t offset;
1108 
1109 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1110 	offset = msix->msix_table_offset + index * 16;
1111 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1112 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1113 	bus_write_4(msix->msix_table_res, offset + 8, data);
1114 
1115 	/* Enable MSI -> HT mapping. */
1116 	pci_ht_map_msi(dev, address);
1117 }
1118 
1119 void
1120 pci_mask_msix(device_t dev, u_int index)
1121 {
1122 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1123 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1124 	uint32_t offset, val;
1125 
1126 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1127 	offset = msix->msix_table_offset + index * 16 + 12;
1128 	val = bus_read_4(msix->msix_table_res, offset);
1129 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1130 		val |= PCIM_MSIX_VCTRL_MASK;
1131 		bus_write_4(msix->msix_table_res, offset, val);
1132 	}
1133 }
1134 
1135 void
1136 pci_unmask_msix(device_t dev, u_int index)
1137 {
1138 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1139 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1140 	uint32_t offset, val;
1141 
1142 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1143 	offset = msix->msix_table_offset + index * 16 + 12;
1144 	val = bus_read_4(msix->msix_table_res, offset);
1145 	if (val & PCIM_MSIX_VCTRL_MASK) {
1146 		val &= ~PCIM_MSIX_VCTRL_MASK;
1147 		bus_write_4(msix->msix_table_res, offset, val);
1148 	}
1149 }
1150 
1151 int
1152 pci_pending_msix(device_t dev, u_int index)
1153 {
1154 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1155 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1156 	uint32_t offset, bit;
1157 
1158 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1159 	offset = msix->msix_pba_offset + (index / 32) * 4;
1160 	bit = 1 << index % 32;
1161 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1162 }
1163 
1164 /*
1165  * Restore MSI-X registers and table during resume.  If MSI-X is
1166  * enabled then walk the virtual table to restore the actual MSI-X
1167  * table.
1168  */
1169 static void
1170 pci_resume_msix(device_t dev)
1171 {
1172 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1173 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1174 	struct msix_table_entry *mte;
1175 	struct msix_vector *mv;
1176 	int i;
1177 
1178 	if (msix->msix_alloc > 0) {
1179 		/* First, mask all vectors. */
1180 		for (i = 0; i < msix->msix_msgnum; i++)
1181 			pci_mask_msix(dev, i);
1182 
1183 		/* Second, program any messages with at least one handler. */
1184 		for (i = 0; i < msix->msix_table_len; i++) {
1185 			mte = &msix->msix_table[i];
1186 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1187 				continue;
1188 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1189 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1190 			pci_unmask_msix(dev, i);
1191 		}
1192 	}
1193 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1194 	    msix->msix_ctrl, 2);
1195 }
1196 
1197 /*
1198  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1199  * returned in *count.  After this function returns, each message will be
1200  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1201  */
1202 int
1203 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1204 {
1205 	struct pci_devinfo *dinfo = device_get_ivars(child);
1206 	pcicfgregs *cfg = &dinfo->cfg;
1207 	struct resource_list_entry *rle;
1208 	int actual, error, i, irq, max;
1209 
1210 	/* Don't let count == 0 get us into trouble. */
1211 	if (*count == 0)
1212 		return (EINVAL);
1213 
1214 	/* If rid 0 is allocated, then fail. */
1215 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1216 	if (rle != NULL && rle->res != NULL)
1217 		return (ENXIO);
1218 
1219 	/* Already have allocated messages? */
1220 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1221 		return (ENXIO);
1222 
1223 	/* If MSI is blacklisted for this system, fail. */
1224 	if (pci_msi_blacklisted())
1225 		return (ENXIO);
1226 
1227 	/* MSI-X capability present? */
1228 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1229 		return (ENODEV);
1230 
1231 	/* Make sure the appropriate BARs are mapped. */
1232 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1233 	    cfg->msix.msix_table_bar);
1234 	if (rle == NULL || rle->res == NULL ||
1235 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1236 		return (ENXIO);
1237 	cfg->msix.msix_table_res = rle->res;
1238 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1239 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1240 		    cfg->msix.msix_pba_bar);
1241 		if (rle == NULL || rle->res == NULL ||
1242 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1243 			return (ENXIO);
1244 	}
1245 	cfg->msix.msix_pba_res = rle->res;
1246 
1247 	if (bootverbose)
1248 		device_printf(child,
1249 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1250 		    *count, cfg->msix.msix_msgnum);
1251 	max = min(*count, cfg->msix.msix_msgnum);
1252 	for (i = 0; i < max; i++) {
1253 		/* Allocate a message. */
1254 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1255 		if (error)
1256 			break;
1257 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1258 		    irq, 1);
1259 	}
1260 	actual = i;
1261 
1262 	if (bootverbose) {
1263 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1264 		if (actual == 1)
1265 			device_printf(child, "using IRQ %lu for MSI-X\n",
1266 			    rle->start);
1267 		else {
1268 			int run;
1269 
1270 			/*
1271 			 * Be fancy and try to print contiguous runs of
1272 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1273 			 * 'run' is true if we are in a range.
1274 			 */
1275 			device_printf(child, "using IRQs %lu", rle->start);
1276 			irq = rle->start;
1277 			run = 0;
1278 			for (i = 1; i < actual; i++) {
1279 				rle = resource_list_find(&dinfo->resources,
1280 				    SYS_RES_IRQ, i + 1);
1281 
1282 				/* Still in a run? */
1283 				if (rle->start == irq + 1) {
1284 					run = 1;
1285 					irq++;
1286 					continue;
1287 				}
1288 
1289 				/* Finish previous range. */
1290 				if (run) {
1291 					printf("-%d", irq);
1292 					run = 0;
1293 				}
1294 
1295 				/* Start new range. */
1296 				printf(",%lu", rle->start);
1297 				irq = rle->start;
1298 			}
1299 
1300 			/* Unfinished range? */
1301 			if (run)
1302 				printf("-%d", irq);
1303 			printf(" for MSI-X\n");
1304 		}
1305 	}
1306 
1307 	/* Mask all vectors. */
1308 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1309 		pci_mask_msix(child, i);
1310 
1311 	/* Allocate and initialize vector data and virtual table. */
1312 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1313 	    M_DEVBUF, M_WAITOK | M_ZERO);
1314 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1315 	    M_DEVBUF, M_WAITOK | M_ZERO);
1316 	for (i = 0; i < actual; i++) {
1317 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1318 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1319 		cfg->msix.msix_table[i].mte_vector = i + 1;
1320 	}
1321 
1322 	/* Update control register to enable MSI-X. */
1323 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1324 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1325 	    cfg->msix.msix_ctrl, 2);
1326 
1327 	/* Update counts of alloc'd messages. */
1328 	cfg->msix.msix_alloc = actual;
1329 	cfg->msix.msix_table_len = actual;
1330 	*count = actual;
1331 	return (0);
1332 }
1333 
1334 /*
1335  * By default, pci_alloc_msix() will assign the allocated IRQ
1336  * resources consecutively to the first N messages in the MSI-X table.
1337  * However, device drivers may want to use different layouts if they
1338  * either receive fewer messages than they asked for, or they wish to
1339  * populate the MSI-X table sparsely.  This method allows the driver
1340  * to specify what layout it wants.  It must be called after a
1341  * successful pci_alloc_msix() but before any of the associated
1342  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1343  *
1344  * The 'vectors' array contains 'count' message vectors.  The array
1345  * maps directly to the MSI-X table in that index 0 in the array
1346  * specifies the vector for the first message in the MSI-X table, etc.
1347  * The vector value in each array index can either be 0 to indicate
1348  * that no vector should be assigned to a message slot, or it can be a
1349  * number from 1 to N (where N is the count returned from a
1350  * succcessful call to pci_alloc_msix()) to indicate which message
1351  * vector (IRQ) to be used for the corresponding message.
1352  *
1353  * On successful return, each message with a non-zero vector will have
1354  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1355  * 1.  Additionally, if any of the IRQs allocated via the previous
1356  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1357  * will be freed back to the system automatically.
1358  *
1359  * For example, suppose a driver has a MSI-X table with 6 messages and
1360  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1361  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1362  * C.  After the call to pci_alloc_msix(), the device will be setup to
1363  * have an MSI-X table of ABC--- (where - means no vector assigned).
1364  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1365  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1366  * be freed back to the system.  This device will also have valid
1367  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1368  *
1369  * In any case, the SYS_RES_IRQ rid X will always map to the message
1370  * at MSI-X table index X - 1 and will only be valid if a vector is
1371  * assigned to that table entry.
1372  */
1373 int
1374 pci_remap_msix_method(device_t dev, device_t child, int count,
1375     const u_int *vectors)
1376 {
1377 	struct pci_devinfo *dinfo = device_get_ivars(child);
1378 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1379 	struct resource_list_entry *rle;
1380 	int i, irq, j, *used;
1381 
1382 	/*
1383 	 * Have to have at least one message in the table but the
1384 	 * table can't be bigger than the actual MSI-X table in the
1385 	 * device.
1386 	 */
1387 	if (count == 0 || count > msix->msix_msgnum)
1388 		return (EINVAL);
1389 
1390 	/* Sanity check the vectors. */
1391 	for (i = 0; i < count; i++)
1392 		if (vectors[i] > msix->msix_alloc)
1393 			return (EINVAL);
1394 
1395 	/*
1396 	 * Make sure there aren't any holes in the vectors to be used.
1397 	 * It's a big pain to support it, and it doesn't really make
1398 	 * sense anyway.  Also, at least one vector must be used.
1399 	 */
1400 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1401 	    M_ZERO);
1402 	for (i = 0; i < count; i++)
1403 		if (vectors[i] != 0)
1404 			used[vectors[i] - 1] = 1;
1405 	for (i = 0; i < msix->msix_alloc - 1; i++)
1406 		if (used[i] == 0 && used[i + 1] == 1) {
1407 			free(used, M_DEVBUF);
1408 			return (EINVAL);
1409 		}
1410 	if (used[0] != 1) {
1411 		free(used, M_DEVBUF);
1412 		return (EINVAL);
1413 	}
1414 
1415 	/* Make sure none of the resources are allocated. */
1416 	for (i = 0; i < msix->msix_table_len; i++) {
1417 		if (msix->msix_table[i].mte_vector == 0)
1418 			continue;
1419 		if (msix->msix_table[i].mte_handlers > 0)
1420 			return (EBUSY);
1421 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1422 		KASSERT(rle != NULL, ("missing resource"));
1423 		if (rle->res != NULL)
1424 			return (EBUSY);
1425 	}
1426 
1427 	/* Free the existing resource list entries. */
1428 	for (i = 0; i < msix->msix_table_len; i++) {
1429 		if (msix->msix_table[i].mte_vector == 0)
1430 			continue;
1431 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1432 	}
1433 
1434 	/*
1435 	 * Build the new virtual table keeping track of which vectors are
1436 	 * used.
1437 	 */
1438 	free(msix->msix_table, M_DEVBUF);
1439 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1440 	    M_DEVBUF, M_WAITOK | M_ZERO);
1441 	for (i = 0; i < count; i++)
1442 		msix->msix_table[i].mte_vector = vectors[i];
1443 	msix->msix_table_len = count;
1444 
1445 	/* Free any unused IRQs and resize the vectors array if necessary. */
1446 	j = msix->msix_alloc - 1;
1447 	if (used[j] == 0) {
1448 		struct msix_vector *vec;
1449 
1450 		while (used[j] == 0) {
1451 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1452 			    msix->msix_vectors[j].mv_irq);
1453 			j--;
1454 		}
1455 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1456 		    M_WAITOK);
1457 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1458 		    (j + 1));
1459 		free(msix->msix_vectors, M_DEVBUF);
1460 		msix->msix_vectors = vec;
1461 		msix->msix_alloc = j + 1;
1462 	}
1463 	free(used, M_DEVBUF);
1464 
1465 	/* Map the IRQs onto the rids. */
1466 	for (i = 0; i < count; i++) {
1467 		if (vectors[i] == 0)
1468 			continue;
1469 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1470 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1471 		    irq, 1);
1472 	}
1473 
1474 	if (bootverbose) {
1475 		device_printf(child, "Remapped MSI-X IRQs as: ");
1476 		for (i = 0; i < count; i++) {
1477 			if (i != 0)
1478 				printf(", ");
1479 			if (vectors[i] == 0)
1480 				printf("---");
1481 			else
1482 				printf("%d",
1483 				    msix->msix_vectors[vectors[i]].mv_irq);
1484 		}
1485 		printf("\n");
1486 	}
1487 
1488 	return (0);
1489 }
1490 
1491 static int
1492 pci_release_msix(device_t dev, device_t child)
1493 {
1494 	struct pci_devinfo *dinfo = device_get_ivars(child);
1495 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1496 	struct resource_list_entry *rle;
1497 	int i;
1498 
1499 	/* Do we have any messages to release? */
1500 	if (msix->msix_alloc == 0)
1501 		return (ENODEV);
1502 
1503 	/* Make sure none of the resources are allocated. */
1504 	for (i = 0; i < msix->msix_table_len; i++) {
1505 		if (msix->msix_table[i].mte_vector == 0)
1506 			continue;
1507 		if (msix->msix_table[i].mte_handlers > 0)
1508 			return (EBUSY);
1509 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1510 		KASSERT(rle != NULL, ("missing resource"));
1511 		if (rle->res != NULL)
1512 			return (EBUSY);
1513 	}
1514 
1515 	/* Update control register to disable MSI-X. */
1516 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1517 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1518 	    msix->msix_ctrl, 2);
1519 
1520 	/* Free the resource list entries. */
1521 	for (i = 0; i < msix->msix_table_len; i++) {
1522 		if (msix->msix_table[i].mte_vector == 0)
1523 			continue;
1524 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1525 	}
1526 	free(msix->msix_table, M_DEVBUF);
1527 	msix->msix_table_len = 0;
1528 
1529 	/* Release the IRQs. */
1530 	for (i = 0; i < msix->msix_alloc; i++)
1531 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1532 		    msix->msix_vectors[i].mv_irq);
1533 	free(msix->msix_vectors, M_DEVBUF);
1534 	msix->msix_alloc = 0;
1535 	return (0);
1536 }
1537 
1538 /*
1539  * Return the max supported MSI-X messages this device supports.
1540  * Basically, assuming the MD code can alloc messages, this function
1541  * should return the maximum value that pci_alloc_msix() can return.
1542  * Thus, it is subject to the tunables, etc.
1543  */
1544 int
1545 pci_msix_count_method(device_t dev, device_t child)
1546 {
1547 	struct pci_devinfo *dinfo = device_get_ivars(child);
1548 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1549 
1550 	if (pci_do_msix && msix->msix_location != 0)
1551 		return (msix->msix_msgnum);
1552 	return (0);
1553 }
1554 
1555 /*
1556  * HyperTransport MSI mapping control
1557  */
1558 void
1559 pci_ht_map_msi(device_t dev, uint64_t addr)
1560 {
1561 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1562 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1563 
1564 	if (!ht->ht_msimap)
1565 		return;
1566 
1567 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1568 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1569 		/* Enable MSI -> HT mapping. */
1570 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1571 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1572 		    ht->ht_msictrl, 2);
1573 	}
1574 
1575 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1576 		/* Disable MSI -> HT mapping. */
1577 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1578 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1579 		    ht->ht_msictrl, 2);
1580 	}
1581 }
1582 
1583 /*
1584  * Support for MSI message signalled interrupts.
1585  */
1586 void
1587 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1588 {
1589 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1590 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1591 
1592 	/* Write data and address values. */
1593 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1594 	    address & 0xffffffff, 4);
1595 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1596 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1597 		    address >> 32, 4);
1598 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1599 		    data, 2);
1600 	} else
1601 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1602 		    2);
1603 
1604 	/* Enable MSI in the control register. */
1605 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1606 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1607 	    2);
1608 
1609 	/* Enable MSI -> HT mapping. */
1610 	pci_ht_map_msi(dev, address);
1611 }
1612 
1613 void
1614 pci_disable_msi(device_t dev)
1615 {
1616 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1617 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1618 
1619 	/* Disable MSI -> HT mapping. */
1620 	pci_ht_map_msi(dev, 0);
1621 
1622 	/* Disable MSI in the control register. */
1623 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1624 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1625 	    2);
1626 }
1627 
1628 /*
1629  * Restore MSI registers during resume.  If MSI is enabled then
1630  * restore the data and address registers in addition to the control
1631  * register.
1632  */
1633 static void
1634 pci_resume_msi(device_t dev)
1635 {
1636 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1637 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1638 	uint64_t address;
1639 	uint16_t data;
1640 
1641 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1642 		address = msi->msi_addr;
1643 		data = msi->msi_data;
1644 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1645 		    address & 0xffffffff, 4);
1646 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1647 			pci_write_config(dev, msi->msi_location +
1648 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1649 			pci_write_config(dev, msi->msi_location +
1650 			    PCIR_MSI_DATA_64BIT, data, 2);
1651 		} else
1652 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1653 			    data, 2);
1654 	}
1655 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1656 	    2);
1657 }
1658 
1659 int
1660 pci_remap_msi_irq(device_t dev, u_int irq)
1661 {
1662 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1663 	pcicfgregs *cfg = &dinfo->cfg;
1664 	struct resource_list_entry *rle;
1665 	struct msix_table_entry *mte;
1666 	struct msix_vector *mv;
1667 	device_t bus;
1668 	uint64_t addr;
1669 	uint32_t data;
1670 	int error, i, j;
1671 
1672 	bus = device_get_parent(dev);
1673 
1674 	/*
1675 	 * Handle MSI first.  We try to find this IRQ among our list
1676 	 * of MSI IRQs.  If we find it, we request updated address and
1677 	 * data registers and apply the results.
1678 	 */
1679 	if (cfg->msi.msi_alloc > 0) {
1680 
1681 		/* If we don't have any active handlers, nothing to do. */
1682 		if (cfg->msi.msi_handlers == 0)
1683 			return (0);
1684 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1685 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1686 			    i + 1);
1687 			if (rle->start == irq) {
1688 				error = PCIB_MAP_MSI(device_get_parent(bus),
1689 				    dev, irq, &addr, &data);
1690 				if (error)
1691 					return (error);
1692 				pci_disable_msi(dev);
1693 				dinfo->cfg.msi.msi_addr = addr;
1694 				dinfo->cfg.msi.msi_data = data;
1695 				pci_enable_msi(dev, addr, data);
1696 				return (0);
1697 			}
1698 		}
1699 		return (ENOENT);
1700 	}
1701 
1702 	/*
1703 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1704 	 * we request the updated mapping info.  If that works, we go
1705 	 * through all the slots that use this IRQ and update them.
1706 	 */
1707 	if (cfg->msix.msix_alloc > 0) {
1708 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1709 			mv = &cfg->msix.msix_vectors[i];
1710 			if (mv->mv_irq == irq) {
1711 				error = PCIB_MAP_MSI(device_get_parent(bus),
1712 				    dev, irq, &addr, &data);
1713 				if (error)
1714 					return (error);
1715 				mv->mv_address = addr;
1716 				mv->mv_data = data;
1717 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1718 					mte = &cfg->msix.msix_table[j];
1719 					if (mte->mte_vector != i + 1)
1720 						continue;
1721 					if (mte->mte_handlers == 0)
1722 						continue;
1723 					pci_mask_msix(dev, j);
1724 					pci_enable_msix(dev, j, addr, data);
1725 					pci_unmask_msix(dev, j);
1726 				}
1727 			}
1728 		}
1729 		return (ENOENT);
1730 	}
1731 
1732 	return (ENOENT);
1733 }
1734 
1735 /*
1736  * Returns true if the specified device is blacklisted because MSI
1737  * doesn't work.
1738  */
1739 int
1740 pci_msi_device_blacklisted(device_t dev)
1741 {
1742 	struct pci_quirk *q;
1743 
1744 	if (!pci_honor_msi_blacklist)
1745 		return (0);
1746 
1747 	for (q = &pci_quirks[0]; q->devid; q++) {
1748 		if (q->devid == pci_get_devid(dev) &&
1749 		    q->type == PCI_QUIRK_DISABLE_MSI)
1750 			return (1);
1751 	}
1752 	return (0);
1753 }
1754 
1755 /*
1756  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1757  * we just check for blacklisted chipsets as represented by the
1758  * host-PCI bridge at device 0:0:0.  In the future, it may become
1759  * necessary to check other system attributes, such as the kenv values
1760  * that give the motherboard manufacturer and model number.
1761  */
1762 static int
1763 pci_msi_blacklisted(void)
1764 {
1765 	device_t dev;
1766 
1767 	if (!pci_honor_msi_blacklist)
1768 		return (0);
1769 
1770 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1771 	if (!(pcie_chipset || pcix_chipset))
1772 		return (1);
1773 
1774 	dev = pci_find_bsf(0, 0, 0);
1775 	if (dev != NULL)
1776 		return (pci_msi_device_blacklisted(dev));
1777 	return (0);
1778 }
1779 
1780 /*
1781  * Attempt to allocate *count MSI messages.  The actual number allocated is
1782  * returned in *count.  After this function returns, each message will be
1783  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1784  */
1785 int
1786 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1787 {
1788 	struct pci_devinfo *dinfo = device_get_ivars(child);
1789 	pcicfgregs *cfg = &dinfo->cfg;
1790 	struct resource_list_entry *rle;
1791 	int actual, error, i, irqs[32];
1792 	uint16_t ctrl;
1793 
1794 	/* Don't let count == 0 get us into trouble. */
1795 	if (*count == 0)
1796 		return (EINVAL);
1797 
1798 	/* If rid 0 is allocated, then fail. */
1799 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1800 	if (rle != NULL && rle->res != NULL)
1801 		return (ENXIO);
1802 
1803 	/* Already have allocated messages? */
1804 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1805 		return (ENXIO);
1806 
1807 	/* If MSI is blacklisted for this system, fail. */
1808 	if (pci_msi_blacklisted())
1809 		return (ENXIO);
1810 
1811 	/* MSI capability present? */
1812 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1813 		return (ENODEV);
1814 
1815 	if (bootverbose)
1816 		device_printf(child,
1817 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1818 		    *count, cfg->msi.msi_msgnum);
1819 
1820 	/* Don't ask for more than the device supports. */
1821 	actual = min(*count, cfg->msi.msi_msgnum);
1822 
1823 	/* Don't ask for more than 32 messages. */
1824 	actual = min(actual, 32);
1825 
1826 	/* MSI requires power of 2 number of messages. */
1827 	if (!powerof2(actual))
1828 		return (EINVAL);
1829 
1830 	for (;;) {
1831 		/* Try to allocate N messages. */
1832 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1833 		    cfg->msi.msi_msgnum, irqs);
1834 		if (error == 0)
1835 			break;
1836 		if (actual == 1)
1837 			return (error);
1838 
1839 		/* Try N / 2. */
1840 		actual >>= 1;
1841 	}
1842 
1843 	/*
1844 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1845 	 * resources in the irqs[] array, so add new resources
1846 	 * starting at rid 1.
1847 	 */
1848 	for (i = 0; i < actual; i++)
1849 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1850 		    irqs[i], irqs[i], 1);
1851 
1852 	if (bootverbose) {
1853 		if (actual == 1)
1854 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1855 		else {
1856 			int run;
1857 
1858 			/*
1859 			 * Be fancy and try to print contiguous runs
1860 			 * of IRQ values as ranges.  'run' is true if
1861 			 * we are in a range.
1862 			 */
1863 			device_printf(child, "using IRQs %d", irqs[0]);
1864 			run = 0;
1865 			for (i = 1; i < actual; i++) {
1866 
1867 				/* Still in a run? */
1868 				if (irqs[i] == irqs[i - 1] + 1) {
1869 					run = 1;
1870 					continue;
1871 				}
1872 
1873 				/* Finish previous range. */
1874 				if (run) {
1875 					printf("-%d", irqs[i - 1]);
1876 					run = 0;
1877 				}
1878 
1879 				/* Start new range. */
1880 				printf(",%d", irqs[i]);
1881 			}
1882 
1883 			/* Unfinished range? */
1884 			if (run)
1885 				printf("-%d", irqs[actual - 1]);
1886 			printf(" for MSI\n");
1887 		}
1888 	}
1889 
1890 	/* Update control register with actual count. */
1891 	ctrl = cfg->msi.msi_ctrl;
1892 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1893 	ctrl |= (ffs(actual) - 1) << 4;
1894 	cfg->msi.msi_ctrl = ctrl;
1895 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1896 
1897 	/* Update counts of alloc'd messages. */
1898 	cfg->msi.msi_alloc = actual;
1899 	cfg->msi.msi_handlers = 0;
1900 	*count = actual;
1901 	return (0);
1902 }
1903 
1904 /* Release the MSI messages associated with this device. */
1905 int
1906 pci_release_msi_method(device_t dev, device_t child)
1907 {
1908 	struct pci_devinfo *dinfo = device_get_ivars(child);
1909 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1910 	struct resource_list_entry *rle;
1911 	int error, i, irqs[32];
1912 
1913 	/* Try MSI-X first. */
1914 	error = pci_release_msix(dev, child);
1915 	if (error != ENODEV)
1916 		return (error);
1917 
1918 	/* Do we have any messages to release? */
1919 	if (msi->msi_alloc == 0)
1920 		return (ENODEV);
1921 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1922 
1923 	/* Make sure none of the resources are allocated. */
1924 	if (msi->msi_handlers > 0)
1925 		return (EBUSY);
1926 	for (i = 0; i < msi->msi_alloc; i++) {
1927 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1928 		KASSERT(rle != NULL, ("missing MSI resource"));
1929 		if (rle->res != NULL)
1930 			return (EBUSY);
1931 		irqs[i] = rle->start;
1932 	}
1933 
1934 	/* Update control register with 0 count. */
1935 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1936 	    ("%s: MSI still enabled", __func__));
1937 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1938 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1939 	    msi->msi_ctrl, 2);
1940 
1941 	/* Release the messages. */
1942 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1943 	for (i = 0; i < msi->msi_alloc; i++)
1944 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1945 
1946 	/* Update alloc count. */
1947 	msi->msi_alloc = 0;
1948 	msi->msi_addr = 0;
1949 	msi->msi_data = 0;
1950 	return (0);
1951 }
1952 
1953 /*
1954  * Return the max supported MSI messages this device supports.
1955  * Basically, assuming the MD code can alloc messages, this function
1956  * should return the maximum value that pci_alloc_msi() can return.
1957  * Thus, it is subject to the tunables, etc.
1958  */
1959 int
1960 pci_msi_count_method(device_t dev, device_t child)
1961 {
1962 	struct pci_devinfo *dinfo = device_get_ivars(child);
1963 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1964 
1965 	if (pci_do_msi && msi->msi_location != 0)
1966 		return (msi->msi_msgnum);
1967 	return (0);
1968 }
1969 
1970 /* free pcicfgregs structure and all depending data structures */
1971 
1972 int
1973 pci_freecfg(struct pci_devinfo *dinfo)
1974 {
1975 	struct devlist *devlist_head;
1976 	int i;
1977 
1978 	devlist_head = &pci_devq;
1979 
1980 	if (dinfo->cfg.vpd.vpd_reg) {
1981 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1982 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1983 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1984 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1985 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1986 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1987 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1988 	}
1989 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1990 	free(dinfo, M_DEVBUF);
1991 
1992 	/* increment the generation count */
1993 	pci_generation++;
1994 
1995 	/* we're losing one device */
1996 	pci_numdevs--;
1997 	return (0);
1998 }
1999 
2000 /*
2001  * PCI power manangement
2002  */
2003 int
2004 pci_set_powerstate_method(device_t dev, device_t child, int state)
2005 {
2006 	struct pci_devinfo *dinfo = device_get_ivars(child);
2007 	pcicfgregs *cfg = &dinfo->cfg;
2008 	uint16_t status;
2009 	int result, oldstate, highest, delay;
2010 
2011 	if (cfg->pp.pp_cap == 0)
2012 		return (EOPNOTSUPP);
2013 
2014 	/*
2015 	 * Optimize a no state change request away.  While it would be OK to
2016 	 * write to the hardware in theory, some devices have shown odd
2017 	 * behavior when going from D3 -> D3.
2018 	 */
2019 	oldstate = pci_get_powerstate(child);
2020 	if (oldstate == state)
2021 		return (0);
2022 
2023 	/*
2024 	 * The PCI power management specification states that after a state
2025 	 * transition between PCI power states, system software must
2026 	 * guarantee a minimal delay before the function accesses the device.
2027 	 * Compute the worst case delay that we need to guarantee before we
2028 	 * access the device.  Many devices will be responsive much more
2029 	 * quickly than this delay, but there are some that don't respond
2030 	 * instantly to state changes.  Transitions to/from D3 state require
2031 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2032 	 * is done below with DELAY rather than a sleeper function because
2033 	 * this function can be called from contexts where we cannot sleep.
2034 	 */
2035 	highest = (oldstate > state) ? oldstate : state;
2036 	if (highest == PCI_POWERSTATE_D3)
2037 	    delay = 10000;
2038 	else if (highest == PCI_POWERSTATE_D2)
2039 	    delay = 200;
2040 	else
2041 	    delay = 0;
2042 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2043 	    & ~PCIM_PSTAT_DMASK;
2044 	result = 0;
2045 	switch (state) {
2046 	case PCI_POWERSTATE_D0:
2047 		status |= PCIM_PSTAT_D0;
2048 		break;
2049 	case PCI_POWERSTATE_D1:
2050 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2051 			return (EOPNOTSUPP);
2052 		status |= PCIM_PSTAT_D1;
2053 		break;
2054 	case PCI_POWERSTATE_D2:
2055 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2056 			return (EOPNOTSUPP);
2057 		status |= PCIM_PSTAT_D2;
2058 		break;
2059 	case PCI_POWERSTATE_D3:
2060 		status |= PCIM_PSTAT_D3;
2061 		break;
2062 	default:
2063 		return (EINVAL);
2064 	}
2065 
2066 	if (bootverbose)
2067 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2068 		    state);
2069 
2070 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2071 	if (delay)
2072 		DELAY(delay);
2073 	return (0);
2074 }
2075 
2076 int
2077 pci_get_powerstate_method(device_t dev, device_t child)
2078 {
2079 	struct pci_devinfo *dinfo = device_get_ivars(child);
2080 	pcicfgregs *cfg = &dinfo->cfg;
2081 	uint16_t status;
2082 	int result;
2083 
2084 	if (cfg->pp.pp_cap != 0) {
2085 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2086 		switch (status & PCIM_PSTAT_DMASK) {
2087 		case PCIM_PSTAT_D0:
2088 			result = PCI_POWERSTATE_D0;
2089 			break;
2090 		case PCIM_PSTAT_D1:
2091 			result = PCI_POWERSTATE_D1;
2092 			break;
2093 		case PCIM_PSTAT_D2:
2094 			result = PCI_POWERSTATE_D2;
2095 			break;
2096 		case PCIM_PSTAT_D3:
2097 			result = PCI_POWERSTATE_D3;
2098 			break;
2099 		default:
2100 			result = PCI_POWERSTATE_UNKNOWN;
2101 			break;
2102 		}
2103 	} else {
2104 		/* No support, device is always at D0 */
2105 		result = PCI_POWERSTATE_D0;
2106 	}
2107 	return (result);
2108 }
2109 
2110 /*
2111  * Some convenience functions for PCI device drivers.
2112  */
2113 
2114 static __inline void
2115 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2116 {
2117 	uint16_t	command;
2118 
2119 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2120 	command |= bit;
2121 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2122 }
2123 
2124 static __inline void
2125 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2126 {
2127 	uint16_t	command;
2128 
2129 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2130 	command &= ~bit;
2131 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2132 }
2133 
2134 int
2135 pci_enable_busmaster_method(device_t dev, device_t child)
2136 {
2137 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2138 	return (0);
2139 }
2140 
2141 int
2142 pci_disable_busmaster_method(device_t dev, device_t child)
2143 {
2144 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2145 	return (0);
2146 }
2147 
2148 int
2149 pci_enable_io_method(device_t dev, device_t child, int space)
2150 {
2151 	uint16_t command;
2152 	uint16_t bit;
2153 	char *error;
2154 
2155 	bit = 0;
2156 	error = NULL;
2157 
2158 	switch(space) {
2159 	case SYS_RES_IOPORT:
2160 		bit = PCIM_CMD_PORTEN;
2161 		error = "port";
2162 		break;
2163 	case SYS_RES_MEMORY:
2164 		bit = PCIM_CMD_MEMEN;
2165 		error = "memory";
2166 		break;
2167 	default:
2168 		return (EINVAL);
2169 	}
2170 	pci_set_command_bit(dev, child, bit);
2171 	/* Some devices seem to need a brief stall here, what do to? */
2172 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2173 	if (command & bit)
2174 		return (0);
2175 	device_printf(child, "failed to enable %s mapping!\n", error);
2176 	return (ENXIO);
2177 }
2178 
2179 int
2180 pci_disable_io_method(device_t dev, device_t child, int space)
2181 {
2182 	uint16_t command;
2183 	uint16_t bit;
2184 	char *error;
2185 
2186 	bit = 0;
2187 	error = NULL;
2188 
2189 	switch(space) {
2190 	case SYS_RES_IOPORT:
2191 		bit = PCIM_CMD_PORTEN;
2192 		error = "port";
2193 		break;
2194 	case SYS_RES_MEMORY:
2195 		bit = PCIM_CMD_MEMEN;
2196 		error = "memory";
2197 		break;
2198 	default:
2199 		return (EINVAL);
2200 	}
2201 	pci_clear_command_bit(dev, child, bit);
2202 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2203 	if (command & bit) {
2204 		device_printf(child, "failed to disable %s mapping!\n", error);
2205 		return (ENXIO);
2206 	}
2207 	return (0);
2208 }
2209 
2210 /*
2211  * New style pci driver.  Parent device is either a pci-host-bridge or a
2212  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2213  */
2214 
2215 void
2216 pci_print_verbose(struct pci_devinfo *dinfo)
2217 {
2218 
2219 	if (bootverbose) {
2220 		pcicfgregs *cfg = &dinfo->cfg;
2221 
2222 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2223 		    cfg->vendor, cfg->device, cfg->revid);
2224 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2225 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2226 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2227 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2228 		    cfg->mfdev);
2229 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2230 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2231 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2232 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2233 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2234 		if (cfg->intpin > 0)
2235 			printf("\tintpin=%c, irq=%d\n",
2236 			    cfg->intpin +'a' -1, cfg->intline);
2237 		if (cfg->pp.pp_cap) {
2238 			uint16_t status;
2239 
2240 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2241 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2242 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2243 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2244 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2245 			    status & PCIM_PSTAT_DMASK);
2246 		}
2247 		if (cfg->msi.msi_location) {
2248 			int ctrl;
2249 
2250 			ctrl = cfg->msi.msi_ctrl;
2251 			printf("\tMSI supports %d message%s%s%s\n",
2252 			    cfg->msi.msi_msgnum,
2253 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2254 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2255 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2256 		}
2257 		if (cfg->msix.msix_location) {
2258 			printf("\tMSI-X supports %d message%s ",
2259 			    cfg->msix.msix_msgnum,
2260 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2261 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2262 				printf("in map 0x%x\n",
2263 				    cfg->msix.msix_table_bar);
2264 			else
2265 				printf("in maps 0x%x and 0x%x\n",
2266 				    cfg->msix.msix_table_bar,
2267 				    cfg->msix.msix_pba_bar);
2268 		}
2269 	}
2270 }
2271 
2272 static int
2273 pci_porten(device_t dev)
2274 {
2275 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2276 }
2277 
2278 static int
2279 pci_memen(device_t dev)
2280 {
2281 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2282 }
2283 
2284 static void
2285 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2286 {
2287 	pci_addr_t map, testval;
2288 	int ln2range;
2289 	uint16_t cmd;
2290 
2291 	map = pci_read_config(dev, reg, 4);
2292 	ln2range = pci_maprange(map);
2293 	if (ln2range == 64)
2294 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2295 
2296 	/*
2297 	 * Disable decoding via the command register before
2298 	 * determining the BAR's length since we will be placing it in
2299 	 * a weird state.
2300 	 */
2301 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2302 	pci_write_config(dev, PCIR_COMMAND,
2303 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2304 
2305 	/*
2306 	 * Determine the BAR's length by writing all 1's.  The bottom
2307 	 * log_2(size) bits of the BAR will stick as 0 when we read
2308 	 * the value back.
2309 	 */
2310 	pci_write_config(dev, reg, 0xffffffff, 4);
2311 	testval = pci_read_config(dev, reg, 4);
2312 	if (ln2range == 64) {
2313 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2314 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2315 	}
2316 
2317 	/*
2318 	 * Restore the original value of the BAR.  We may have reprogrammed
2319 	 * the BAR of the low-level console device and when booting verbose,
2320 	 * we need the console device addressable.
2321 	 */
2322 	pci_write_config(dev, reg, map, 4);
2323 	if (ln2range == 64)
2324 		pci_write_config(dev, reg + 4, map >> 32, 4);
2325 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2326 
2327 	*mapp = map;
2328 	*testvalp = testval;
2329 }
2330 
2331 static void
2332 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2333 {
2334 	pci_addr_t map;
2335 	int ln2range;
2336 
2337 	map = pci_read_config(dev, reg, 4);
2338 	ln2range = pci_maprange(map);
2339 	pci_write_config(dev, reg, base, 4);
2340 	if (ln2range == 64)
2341 		pci_write_config(dev, reg + 4, base >> 32, 4);
2342 }
2343 
2344 /*
2345  * Add a resource based on a pci map register. Return 1 if the map
2346  * register is a 32bit map register or 2 if it is a 64bit register.
2347  */
2348 static int
2349 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2350     int force, int prefetch)
2351 {
2352 	pci_addr_t base, map, testval;
2353 	pci_addr_t start, end, count;
2354 	int barlen, maprange, mapsize, type;
2355 	uint16_t cmd;
2356 	struct resource *res;
2357 
2358 	pci_read_bar(dev, reg, &map, &testval);
2359 	if (PCI_BAR_MEM(map)) {
2360 		type = SYS_RES_MEMORY;
2361 		if (map & PCIM_BAR_MEM_PREFETCH)
2362 			prefetch = 1;
2363 	} else
2364 		type = SYS_RES_IOPORT;
2365 	mapsize = pci_mapsize(testval);
2366 	base = pci_mapbase(map);
2367 	maprange = pci_maprange(map);
2368 	barlen = maprange == 64 ? 2 : 1;
2369 
2370 	/*
2371 	 * For I/O registers, if bottom bit is set, and the next bit up
2372 	 * isn't clear, we know we have a BAR that doesn't conform to the
2373 	 * spec, so ignore it.  Also, sanity check the size of the data
2374 	 * areas to the type of memory involved.  Memory must be at least
2375 	 * 16 bytes in size, while I/O ranges must be at least 4.
2376 	 */
2377 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2378 		return (barlen);
2379 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2380 	    (type == SYS_RES_IOPORT && mapsize < 2))
2381 		return (barlen);
2382 
2383 	if (bootverbose) {
2384 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2385 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2386 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2387 			printf(", port disabled\n");
2388 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2389 			printf(", memory disabled\n");
2390 		else
2391 			printf(", enabled\n");
2392 	}
2393 
2394 	/*
2395 	 * If base is 0, then we have problems.  It is best to ignore
2396 	 * such entries for the moment.  These will be allocated later if
2397 	 * the driver specifically requests them.  However, some
2398 	 * removable busses look better when all resources are allocated,
2399 	 * so allow '0' to be overriden.
2400 	 *
2401 	 * Similarly treat maps whose values is the same as the test value
2402 	 * read back.  These maps have had all f's written to them by the
2403 	 * BIOS in an attempt to disable the resources.
2404 	 */
2405 	if (!force && (base == 0 || map == testval))
2406 		return (barlen);
2407 	if ((u_long)base != base) {
2408 		device_printf(bus,
2409 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2410 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2411 		    pci_get_function(dev), reg);
2412 		return (barlen);
2413 	}
2414 
2415 	/*
2416 	 * This code theoretically does the right thing, but has
2417 	 * undesirable side effects in some cases where peripherals
2418 	 * respond oddly to having these bits enabled.  Let the user
2419 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2420 	 * default).
2421 	 */
2422 	if (pci_enable_io_modes) {
2423 		/* Turn on resources that have been left off by a lazy BIOS */
2424 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2425 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2426 			cmd |= PCIM_CMD_PORTEN;
2427 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2428 		}
2429 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2430 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2431 			cmd |= PCIM_CMD_MEMEN;
2432 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2433 		}
2434 	} else {
2435 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2436 			return (barlen);
2437 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2438 			return (barlen);
2439 	}
2440 
2441 	count = 1 << mapsize;
2442 	if (base == 0 || base == pci_mapbase(testval)) {
2443 		start = 0;	/* Let the parent decide. */
2444 		end = ~0ULL;
2445 	} else {
2446 		start = base;
2447 		end = base + (1 << mapsize) - 1;
2448 	}
2449 	resource_list_add(rl, type, reg, start, end, count);
2450 
2451 	/*
2452 	 * Try to allocate the resource for this BAR from our parent
2453 	 * so that this resource range is already reserved.  The
2454 	 * driver for this device will later inherit this resource in
2455 	 * pci_alloc_resource().
2456 	 */
2457 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2458 	    prefetch ? RF_PREFETCHABLE : 0);
2459 	if (res == NULL) {
2460 		/*
2461 		 * If the allocation fails, clear the BAR and delete
2462 		 * the resource list entry to force
2463 		 * pci_alloc_resource() to allocate resources from the
2464 		 * parent.
2465 		 */
2466 		resource_list_delete(rl, type, reg);
2467 		start = 0;
2468 	} else {
2469 		start = rman_get_start(res);
2470 		rman_set_device(res, bus);
2471 	}
2472 	pci_write_bar(dev, reg, start);
2473 	return (barlen);
2474 }
2475 
2476 /*
2477  * For ATA devices we need to decide early what addressing mode to use.
2478  * Legacy demands that the primary and secondary ATA ports sits on the
2479  * same addresses that old ISA hardware did. This dictates that we use
2480  * those addresses and ignore the BAR's if we cannot set PCI native
2481  * addressing mode.
2482  */
2483 static void
2484 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2485     uint32_t prefetchmask)
2486 {
2487 	struct resource *r;
2488 	int rid, type, progif;
2489 #if 0
2490 	/* if this device supports PCI native addressing use it */
2491 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2492 	if ((progif & 0x8a) == 0x8a) {
2493 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2494 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2495 			printf("Trying ATA native PCI addressing mode\n");
2496 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2497 		}
2498 	}
2499 #endif
2500 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2501 	type = SYS_RES_IOPORT;
2502 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2503 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2504 		    prefetchmask & (1 << 0));
2505 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2506 		    prefetchmask & (1 << 1));
2507 	} else {
2508 		rid = PCIR_BAR(0);
2509 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2510 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2511 		    8, 0);
2512 		rman_set_device(r, bus);
2513 		rid = PCIR_BAR(1);
2514 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2515 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2516 		    1, 0);
2517 		rman_set_device(r, bus);
2518 	}
2519 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2520 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2521 		    prefetchmask & (1 << 2));
2522 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2523 		    prefetchmask & (1 << 3));
2524 	} else {
2525 		rid = PCIR_BAR(2);
2526 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2527 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2528 		    8, 0);
2529 		rman_set_device(r, bus);
2530 		rid = PCIR_BAR(3);
2531 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2532 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2533 		    1, 0);
2534 		rman_set_device(r, bus);
2535 	}
2536 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2537 	    prefetchmask & (1 << 4));
2538 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2539 	    prefetchmask & (1 << 5));
2540 }
2541 
2542 static void
2543 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2544 {
2545 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2546 	pcicfgregs *cfg = &dinfo->cfg;
2547 	char tunable_name[64];
2548 	int irq;
2549 
2550 	/* Has to have an intpin to have an interrupt. */
2551 	if (cfg->intpin == 0)
2552 		return;
2553 
2554 	/* Let the user override the IRQ with a tunable. */
2555 	irq = PCI_INVALID_IRQ;
2556 	snprintf(tunable_name, sizeof(tunable_name),
2557 	    "hw.pci%d.%d.%d.INT%c.irq",
2558 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2559 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2560 		irq = PCI_INVALID_IRQ;
2561 
2562 	/*
2563 	 * If we didn't get an IRQ via the tunable, then we either use the
2564 	 * IRQ value in the intline register or we ask the bus to route an
2565 	 * interrupt for us.  If force_route is true, then we only use the
2566 	 * value in the intline register if the bus was unable to assign an
2567 	 * IRQ.
2568 	 */
2569 	if (!PCI_INTERRUPT_VALID(irq)) {
2570 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2571 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2572 		if (!PCI_INTERRUPT_VALID(irq))
2573 			irq = cfg->intline;
2574 	}
2575 
2576 	/* If after all that we don't have an IRQ, just bail. */
2577 	if (!PCI_INTERRUPT_VALID(irq))
2578 		return;
2579 
2580 	/* Update the config register if it changed. */
2581 	if (irq != cfg->intline) {
2582 		cfg->intline = irq;
2583 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2584 	}
2585 
2586 	/* Add this IRQ as rid 0 interrupt resource. */
2587 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2588 }
2589 
2590 void
2591 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2592 {
2593 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2594 	pcicfgregs *cfg = &dinfo->cfg;
2595 	struct resource_list *rl = &dinfo->resources;
2596 	struct pci_quirk *q;
2597 	int i;
2598 
2599 	/* ATA devices needs special map treatment */
2600 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2601 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2602 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2603 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2604 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2605 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2606 	else
2607 		for (i = 0; i < cfg->nummaps;)
2608 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2609 			    prefetchmask & (1 << i));
2610 
2611 	/*
2612 	 * Add additional, quirked resources.
2613 	 */
2614 	for (q = &pci_quirks[0]; q->devid; q++) {
2615 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2616 		    && q->type == PCI_QUIRK_MAP_REG)
2617 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2618 	}
2619 
2620 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2621 #ifdef __PCI_REROUTE_INTERRUPT
2622 		/*
2623 		 * Try to re-route interrupts. Sometimes the BIOS or
2624 		 * firmware may leave bogus values in these registers.
2625 		 * If the re-route fails, then just stick with what we
2626 		 * have.
2627 		 */
2628 		pci_assign_interrupt(bus, dev, 1);
2629 #else
2630 		pci_assign_interrupt(bus, dev, 0);
2631 #endif
2632 	}
2633 }
2634 
2635 void
2636 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2637 {
2638 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2639 	device_t pcib = device_get_parent(dev);
2640 	struct pci_devinfo *dinfo;
2641 	int maxslots;
2642 	int s, f, pcifunchigh;
2643 	uint8_t hdrtype;
2644 
2645 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2646 	    ("dinfo_size too small"));
2647 	maxslots = PCIB_MAXSLOTS(pcib);
2648 	for (s = 0; s <= maxslots; s++) {
2649 		pcifunchigh = 0;
2650 		f = 0;
2651 		DELAY(1);
2652 		hdrtype = REG(PCIR_HDRTYPE, 1);
2653 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2654 			continue;
2655 		if (hdrtype & PCIM_MFDEV)
2656 			pcifunchigh = PCI_FUNCMAX;
2657 		for (f = 0; f <= pcifunchigh; f++) {
2658 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2659 			    dinfo_size);
2660 			if (dinfo != NULL) {
2661 				pci_add_child(dev, dinfo);
2662 			}
2663 		}
2664 	}
2665 #undef REG
2666 }
2667 
2668 void
2669 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2670 {
2671 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2672 	device_set_ivars(dinfo->cfg.dev, dinfo);
2673 	resource_list_init(&dinfo->resources);
2674 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2675 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2676 	pci_print_verbose(dinfo);
2677 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2678 }
2679 
2680 static int
2681 pci_probe(device_t dev)
2682 {
2683 
2684 	device_set_desc(dev, "PCI bus");
2685 
2686 	/* Allow other subclasses to override this driver. */
2687 	return (BUS_PROBE_GENERIC);
2688 }
2689 
2690 static int
2691 pci_attach(device_t dev)
2692 {
2693 	int busno, domain;
2694 
2695 	/*
2696 	 * Since there can be multiple independantly numbered PCI
2697 	 * busses on systems with multiple PCI domains, we can't use
2698 	 * the unit number to decide which bus we are probing. We ask
2699 	 * the parent pcib what our domain and bus numbers are.
2700 	 */
2701 	domain = pcib_get_domain(dev);
2702 	busno = pcib_get_bus(dev);
2703 	if (bootverbose)
2704 		device_printf(dev, "domain=%d, physical bus=%d\n",
2705 		    domain, busno);
2706 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2707 	return (bus_generic_attach(dev));
2708 }
2709 
2710 int
2711 pci_suspend(device_t dev)
2712 {
2713 	int dstate, error, i, numdevs;
2714 	device_t acpi_dev, child, *devlist;
2715 	struct pci_devinfo *dinfo;
2716 
2717 	/*
2718 	 * Save the PCI configuration space for each child and set the
2719 	 * device in the appropriate power state for this sleep state.
2720 	 */
2721 	acpi_dev = NULL;
2722 	if (pci_do_power_resume)
2723 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2724 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2725 		return (error);
2726 	for (i = 0; i < numdevs; i++) {
2727 		child = devlist[i];
2728 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2729 		pci_cfg_save(child, dinfo, 0);
2730 	}
2731 
2732 	/* Suspend devices before potentially powering them down. */
2733 	error = bus_generic_suspend(dev);
2734 	if (error) {
2735 		free(devlist, M_TEMP);
2736 		return (error);
2737 	}
2738 
2739 	/*
2740 	 * Always set the device to D3.  If ACPI suggests a different
2741 	 * power state, use it instead.  If ACPI is not present, the
2742 	 * firmware is responsible for managing device power.  Skip
2743 	 * children who aren't attached since they are powered down
2744 	 * separately.  Only manage type 0 devices for now.
2745 	 */
2746 	for (i = 0; acpi_dev && i < numdevs; i++) {
2747 		child = devlist[i];
2748 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2749 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2750 			dstate = PCI_POWERSTATE_D3;
2751 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2752 			pci_set_powerstate(child, dstate);
2753 		}
2754 	}
2755 	free(devlist, M_TEMP);
2756 	return (0);
2757 }
2758 
2759 int
2760 pci_resume(device_t dev)
2761 {
2762 	int i, numdevs, error;
2763 	device_t acpi_dev, child, *devlist;
2764 	struct pci_devinfo *dinfo;
2765 
2766 	/*
2767 	 * Set each child to D0 and restore its PCI configuration space.
2768 	 */
2769 	acpi_dev = NULL;
2770 	if (pci_do_power_resume)
2771 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2772 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2773 		return (error);
2774 	for (i = 0; i < numdevs; i++) {
2775 		/*
2776 		 * Notify ACPI we're going to D0 but ignore the result.  If
2777 		 * ACPI is not present, the firmware is responsible for
2778 		 * managing device power.  Only manage type 0 devices for now.
2779 		 */
2780 		child = devlist[i];
2781 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2782 		if (acpi_dev && device_is_attached(child) &&
2783 		    dinfo->cfg.hdrtype == 0) {
2784 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2785 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2786 		}
2787 
2788 		/* Now the device is powered up, restore its config space. */
2789 		pci_cfg_restore(child, dinfo);
2790 	}
2791 	free(devlist, M_TEMP);
2792 	return (bus_generic_resume(dev));
2793 }
2794 
2795 static void
2796 pci_load_vendor_data(void)
2797 {
2798 	caddr_t vendordata, info;
2799 
2800 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2801 		info = preload_search_info(vendordata, MODINFO_ADDR);
2802 		pci_vendordata = *(char **)info;
2803 		info = preload_search_info(vendordata, MODINFO_SIZE);
2804 		pci_vendordata_size = *(size_t *)info;
2805 		/* terminate the database */
2806 		pci_vendordata[pci_vendordata_size] = '\n';
2807 	}
2808 }
2809 
2810 void
2811 pci_driver_added(device_t dev, driver_t *driver)
2812 {
2813 	int numdevs;
2814 	device_t *devlist;
2815 	device_t child;
2816 	struct pci_devinfo *dinfo;
2817 	int i;
2818 
2819 	if (bootverbose)
2820 		device_printf(dev, "driver added\n");
2821 	DEVICE_IDENTIFY(driver, dev);
2822 	if (device_get_children(dev, &devlist, &numdevs) != 0)
2823 		return;
2824 	for (i = 0; i < numdevs; i++) {
2825 		child = devlist[i];
2826 		if (device_get_state(child) != DS_NOTPRESENT)
2827 			continue;
2828 		dinfo = device_get_ivars(child);
2829 		pci_print_verbose(dinfo);
2830 		if (bootverbose)
2831 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2832 		pci_cfg_restore(child, dinfo);
2833 		if (device_probe_and_attach(child) != 0)
2834 			pci_cfg_save(child, dinfo, 1);
2835 	}
2836 	free(devlist, M_TEMP);
2837 }
2838 
2839 int
2840 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2841     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2842 {
2843 	struct pci_devinfo *dinfo;
2844 	struct msix_table_entry *mte;
2845 	struct msix_vector *mv;
2846 	uint64_t addr;
2847 	uint32_t data;
2848 	void *cookie;
2849 	int error, rid;
2850 
2851 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2852 	    arg, &cookie);
2853 	if (error)
2854 		return (error);
2855 
2856 	/* If this is not a direct child, just bail out. */
2857 	if (device_get_parent(child) != dev) {
2858 		*cookiep = cookie;
2859 		return(0);
2860 	}
2861 
2862 	rid = rman_get_rid(irq);
2863 	if (rid == 0) {
2864 		/* Make sure that INTx is enabled */
2865 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2866 	} else {
2867 		/*
2868 		 * Check to see if the interrupt is MSI or MSI-X.
2869 		 * Ask our parent to map the MSI and give
2870 		 * us the address and data register values.
2871 		 * If we fail for some reason, teardown the
2872 		 * interrupt handler.
2873 		 */
2874 		dinfo = device_get_ivars(child);
2875 		if (dinfo->cfg.msi.msi_alloc > 0) {
2876 			if (dinfo->cfg.msi.msi_addr == 0) {
2877 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2878 			    ("MSI has handlers, but vectors not mapped"));
2879 				error = PCIB_MAP_MSI(device_get_parent(dev),
2880 				    child, rman_get_start(irq), &addr, &data);
2881 				if (error)
2882 					goto bad;
2883 				dinfo->cfg.msi.msi_addr = addr;
2884 				dinfo->cfg.msi.msi_data = data;
2885 				pci_enable_msi(child, addr, data);
2886 			}
2887 			dinfo->cfg.msi.msi_handlers++;
2888 		} else {
2889 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2890 			    ("No MSI or MSI-X interrupts allocated"));
2891 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2892 			    ("MSI-X index too high"));
2893 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2894 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2895 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2896 			KASSERT(mv->mv_irq == rman_get_start(irq),
2897 			    ("IRQ mismatch"));
2898 			if (mv->mv_address == 0) {
2899 				KASSERT(mte->mte_handlers == 0,
2900 		    ("MSI-X table entry has handlers, but vector not mapped"));
2901 				error = PCIB_MAP_MSI(device_get_parent(dev),
2902 				    child, rman_get_start(irq), &addr, &data);
2903 				if (error)
2904 					goto bad;
2905 				mv->mv_address = addr;
2906 				mv->mv_data = data;
2907 			}
2908 			if (mte->mte_handlers == 0) {
2909 				pci_enable_msix(child, rid - 1, mv->mv_address,
2910 				    mv->mv_data);
2911 				pci_unmask_msix(child, rid - 1);
2912 			}
2913 			mte->mte_handlers++;
2914 		}
2915 
2916 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2917 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2918 	bad:
2919 		if (error) {
2920 			(void)bus_generic_teardown_intr(dev, child, irq,
2921 			    cookie);
2922 			return (error);
2923 		}
2924 	}
2925 	*cookiep = cookie;
2926 	return (0);
2927 }
2928 
2929 int
2930 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2931     void *cookie)
2932 {
2933 	struct msix_table_entry *mte;
2934 	struct resource_list_entry *rle;
2935 	struct pci_devinfo *dinfo;
2936 	int error, rid;
2937 
2938 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2939 		return (EINVAL);
2940 
2941 	/* If this isn't a direct child, just bail out */
2942 	if (device_get_parent(child) != dev)
2943 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2944 
2945 	rid = rman_get_rid(irq);
2946 	if (rid == 0) {
2947 		/* Mask INTx */
2948 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2949 	} else {
2950 		/*
2951 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2952 		 * decrement the appropriate handlers count and mask the
2953 		 * MSI-X message, or disable MSI messages if the count
2954 		 * drops to 0.
2955 		 */
2956 		dinfo = device_get_ivars(child);
2957 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2958 		if (rle->res != irq)
2959 			return (EINVAL);
2960 		if (dinfo->cfg.msi.msi_alloc > 0) {
2961 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2962 			    ("MSI-X index too high"));
2963 			if (dinfo->cfg.msi.msi_handlers == 0)
2964 				return (EINVAL);
2965 			dinfo->cfg.msi.msi_handlers--;
2966 			if (dinfo->cfg.msi.msi_handlers == 0)
2967 				pci_disable_msi(child);
2968 		} else {
2969 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2970 			    ("No MSI or MSI-X interrupts allocated"));
2971 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2972 			    ("MSI-X index too high"));
2973 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2974 			if (mte->mte_handlers == 0)
2975 				return (EINVAL);
2976 			mte->mte_handlers--;
2977 			if (mte->mte_handlers == 0)
2978 				pci_mask_msix(child, rid - 1);
2979 		}
2980 	}
2981 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2982 	if (rid > 0)
2983 		KASSERT(error == 0,
2984 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2985 	return (error);
2986 }
2987 
2988 int
2989 pci_print_child(device_t dev, device_t child)
2990 {
2991 	struct pci_devinfo *dinfo;
2992 	struct resource_list *rl;
2993 	int retval = 0;
2994 
2995 	dinfo = device_get_ivars(child);
2996 	rl = &dinfo->resources;
2997 
2998 	retval += bus_print_child_header(dev, child);
2999 
3000 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3001 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3002 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3003 	if (device_get_flags(dev))
3004 		retval += printf(" flags %#x", device_get_flags(dev));
3005 
3006 	retval += printf(" at device %d.%d", pci_get_slot(child),
3007 	    pci_get_function(child));
3008 
3009 	retval += bus_print_child_footer(dev, child);
3010 
3011 	return (retval);
3012 }
3013 
3014 static struct
3015 {
3016 	int	class;
3017 	int	subclass;
3018 	char	*desc;
3019 } pci_nomatch_tab[] = {
3020 	{PCIC_OLD,		-1,			"old"},
3021 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3022 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3023 	{PCIC_STORAGE,		-1,			"mass storage"},
3024 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3025 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3026 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3027 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3028 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3029 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3030 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3031 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3032 	{PCIC_NETWORK,		-1,			"network"},
3033 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3034 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3035 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3036 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3037 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3038 	{PCIC_DISPLAY,		-1,			"display"},
3039 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3040 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3041 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3042 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3043 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3044 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3045 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3046 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3047 	{PCIC_MEMORY,		-1,			"memory"},
3048 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3049 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3050 	{PCIC_BRIDGE,		-1,			"bridge"},
3051 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3052 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3053 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3054 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3055 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3056 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3057 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3058 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3059 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3060 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3061 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3062 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3063 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3064 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3065 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3066 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3067 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3068 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3069 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3070 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3071 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3072 	{PCIC_INPUTDEV,		-1,			"input device"},
3073 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3074 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3075 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3076 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3077 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3078 	{PCIC_DOCKING,		-1,			"docking station"},
3079 	{PCIC_PROCESSOR,	-1,			"processor"},
3080 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3081 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3082 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3083 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3084 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3085 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3086 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3087 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3088 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3089 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3090 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3091 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3092 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3093 	{PCIC_SATCOM,		-1,			"satellite communication"},
3094 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3095 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3096 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3097 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3098 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3099 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3100 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3101 	{PCIC_DASP,		-1,			"dasp"},
3102 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3103 	{0, 0,		NULL}
3104 };
3105 
3106 void
3107 pci_probe_nomatch(device_t dev, device_t child)
3108 {
3109 	int	i;
3110 	char	*cp, *scp, *device;
3111 
3112 	/*
3113 	 * Look for a listing for this device in a loaded device database.
3114 	 */
3115 	if ((device = pci_describe_device(child)) != NULL) {
3116 		device_printf(dev, "<%s>", device);
3117 		free(device, M_DEVBUF);
3118 	} else {
3119 		/*
3120 		 * Scan the class/subclass descriptions for a general
3121 		 * description.
3122 		 */
3123 		cp = "unknown";
3124 		scp = NULL;
3125 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3126 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3127 				if (pci_nomatch_tab[i].subclass == -1) {
3128 					cp = pci_nomatch_tab[i].desc;
3129 				} else if (pci_nomatch_tab[i].subclass ==
3130 				    pci_get_subclass(child)) {
3131 					scp = pci_nomatch_tab[i].desc;
3132 				}
3133 			}
3134 		}
3135 		device_printf(dev, "<%s%s%s>",
3136 		    cp ? cp : "",
3137 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3138 		    scp ? scp : "");
3139 	}
3140 	printf(" at device %d.%d (no driver attached)\n",
3141 	    pci_get_slot(child), pci_get_function(child));
3142 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3143 	return;
3144 }
3145 
3146 /*
3147  * Parse the PCI device database, if loaded, and return a pointer to a
3148  * description of the device.
3149  *
3150  * The database is flat text formatted as follows:
3151  *
3152  * Any line not in a valid format is ignored.
3153  * Lines are terminated with newline '\n' characters.
3154  *
3155  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3156  * the vendor name.
3157  *
3158  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3159  * - devices cannot be listed without a corresponding VENDOR line.
3160  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3161  * another TAB, then the device name.
3162  */
3163 
3164 /*
3165  * Assuming (ptr) points to the beginning of a line in the database,
3166  * return the vendor or device and description of the next entry.
3167  * The value of (vendor) or (device) inappropriate for the entry type
3168  * is set to -1.  Returns nonzero at the end of the database.
3169  *
3170  * Note that this is slightly unrobust in the face of corrupt data;
3171  * we attempt to safeguard against this by spamming the end of the
3172  * database with a newline when we initialise.
3173  */
3174 static int
3175 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3176 {
3177 	char	*cp = *ptr;
3178 	int	left;
3179 
3180 	*device = -1;
3181 	*vendor = -1;
3182 	**desc = '\0';
3183 	for (;;) {
3184 		left = pci_vendordata_size - (cp - pci_vendordata);
3185 		if (left <= 0) {
3186 			*ptr = cp;
3187 			return(1);
3188 		}
3189 
3190 		/* vendor entry? */
3191 		if (*cp != '\t' &&
3192 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3193 			break;
3194 		/* device entry? */
3195 		if (*cp == '\t' &&
3196 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3197 			break;
3198 
3199 		/* skip to next line */
3200 		while (*cp != '\n' && left > 0) {
3201 			cp++;
3202 			left--;
3203 		}
3204 		if (*cp == '\n') {
3205 			cp++;
3206 			left--;
3207 		}
3208 	}
3209 	/* skip to next line */
3210 	while (*cp != '\n' && left > 0) {
3211 		cp++;
3212 		left--;
3213 	}
3214 	if (*cp == '\n' && left > 0)
3215 		cp++;
3216 	*ptr = cp;
3217 	return(0);
3218 }
3219 
3220 static char *
3221 pci_describe_device(device_t dev)
3222 {
3223 	int	vendor, device;
3224 	char	*desc, *vp, *dp, *line;
3225 
3226 	desc = vp = dp = NULL;
3227 
3228 	/*
3229 	 * If we have no vendor data, we can't do anything.
3230 	 */
3231 	if (pci_vendordata == NULL)
3232 		goto out;
3233 
3234 	/*
3235 	 * Scan the vendor data looking for this device
3236 	 */
3237 	line = pci_vendordata;
3238 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3239 		goto out;
3240 	for (;;) {
3241 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3242 			goto out;
3243 		if (vendor == pci_get_vendor(dev))
3244 			break;
3245 	}
3246 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3247 		goto out;
3248 	for (;;) {
3249 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3250 			*dp = 0;
3251 			break;
3252 		}
3253 		if (vendor != -1) {
3254 			*dp = 0;
3255 			break;
3256 		}
3257 		if (device == pci_get_device(dev))
3258 			break;
3259 	}
3260 	if (dp[0] == '\0')
3261 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3262 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3263 	    NULL)
3264 		sprintf(desc, "%s, %s", vp, dp);
3265  out:
3266 	if (vp != NULL)
3267 		free(vp, M_DEVBUF);
3268 	if (dp != NULL)
3269 		free(dp, M_DEVBUF);
3270 	return(desc);
3271 }
3272 
3273 int
3274 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3275 {
3276 	struct pci_devinfo *dinfo;
3277 	pcicfgregs *cfg;
3278 
3279 	dinfo = device_get_ivars(child);
3280 	cfg = &dinfo->cfg;
3281 
3282 	switch (which) {
3283 	case PCI_IVAR_ETHADDR:
3284 		/*
3285 		 * The generic accessor doesn't deal with failure, so
3286 		 * we set the return value, then return an error.
3287 		 */
3288 		*((uint8_t **) result) = NULL;
3289 		return (EINVAL);
3290 	case PCI_IVAR_SUBVENDOR:
3291 		*result = cfg->subvendor;
3292 		break;
3293 	case PCI_IVAR_SUBDEVICE:
3294 		*result = cfg->subdevice;
3295 		break;
3296 	case PCI_IVAR_VENDOR:
3297 		*result = cfg->vendor;
3298 		break;
3299 	case PCI_IVAR_DEVICE:
3300 		*result = cfg->device;
3301 		break;
3302 	case PCI_IVAR_DEVID:
3303 		*result = (cfg->device << 16) | cfg->vendor;
3304 		break;
3305 	case PCI_IVAR_CLASS:
3306 		*result = cfg->baseclass;
3307 		break;
3308 	case PCI_IVAR_SUBCLASS:
3309 		*result = cfg->subclass;
3310 		break;
3311 	case PCI_IVAR_PROGIF:
3312 		*result = cfg->progif;
3313 		break;
3314 	case PCI_IVAR_REVID:
3315 		*result = cfg->revid;
3316 		break;
3317 	case PCI_IVAR_INTPIN:
3318 		*result = cfg->intpin;
3319 		break;
3320 	case PCI_IVAR_IRQ:
3321 		*result = cfg->intline;
3322 		break;
3323 	case PCI_IVAR_DOMAIN:
3324 		*result = cfg->domain;
3325 		break;
3326 	case PCI_IVAR_BUS:
3327 		*result = cfg->bus;
3328 		break;
3329 	case PCI_IVAR_SLOT:
3330 		*result = cfg->slot;
3331 		break;
3332 	case PCI_IVAR_FUNCTION:
3333 		*result = cfg->func;
3334 		break;
3335 	case PCI_IVAR_CMDREG:
3336 		*result = cfg->cmdreg;
3337 		break;
3338 	case PCI_IVAR_CACHELNSZ:
3339 		*result = cfg->cachelnsz;
3340 		break;
3341 	case PCI_IVAR_MINGNT:
3342 		*result = cfg->mingnt;
3343 		break;
3344 	case PCI_IVAR_MAXLAT:
3345 		*result = cfg->maxlat;
3346 		break;
3347 	case PCI_IVAR_LATTIMER:
3348 		*result = cfg->lattimer;
3349 		break;
3350 	default:
3351 		return (ENOENT);
3352 	}
3353 	return (0);
3354 }
3355 
3356 int
3357 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3358 {
3359 	struct pci_devinfo *dinfo;
3360 
3361 	dinfo = device_get_ivars(child);
3362 
3363 	switch (which) {
3364 	case PCI_IVAR_INTPIN:
3365 		dinfo->cfg.intpin = value;
3366 		return (0);
3367 	case PCI_IVAR_ETHADDR:
3368 	case PCI_IVAR_SUBVENDOR:
3369 	case PCI_IVAR_SUBDEVICE:
3370 	case PCI_IVAR_VENDOR:
3371 	case PCI_IVAR_DEVICE:
3372 	case PCI_IVAR_DEVID:
3373 	case PCI_IVAR_CLASS:
3374 	case PCI_IVAR_SUBCLASS:
3375 	case PCI_IVAR_PROGIF:
3376 	case PCI_IVAR_REVID:
3377 	case PCI_IVAR_IRQ:
3378 	case PCI_IVAR_DOMAIN:
3379 	case PCI_IVAR_BUS:
3380 	case PCI_IVAR_SLOT:
3381 	case PCI_IVAR_FUNCTION:
3382 		return (EINVAL);	/* disallow for now */
3383 
3384 	default:
3385 		return (ENOENT);
3386 	}
3387 }
3388 
3389 
3390 #include "opt_ddb.h"
3391 #ifdef DDB
3392 #include <ddb/ddb.h>
3393 #include <sys/cons.h>
3394 
3395 /*
3396  * List resources based on pci map registers, used for within ddb
3397  */
3398 
3399 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3400 {
3401 	struct pci_devinfo *dinfo;
3402 	struct devlist *devlist_head;
3403 	struct pci_conf *p;
3404 	const char *name;
3405 	int i, error, none_count;
3406 
3407 	none_count = 0;
3408 	/* get the head of the device queue */
3409 	devlist_head = &pci_devq;
3410 
3411 	/*
3412 	 * Go through the list of devices and print out devices
3413 	 */
3414 	for (error = 0, i = 0,
3415 	     dinfo = STAILQ_FIRST(devlist_head);
3416 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3417 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3418 
3419 		/* Populate pd_name and pd_unit */
3420 		name = NULL;
3421 		if (dinfo->cfg.dev)
3422 			name = device_get_name(dinfo->cfg.dev);
3423 
3424 		p = &dinfo->conf;
3425 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3426 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3427 			(name && *name) ? name : "none",
3428 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3429 			none_count++,
3430 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3431 			p->pc_sel.pc_func, (p->pc_class << 16) |
3432 			(p->pc_subclass << 8) | p->pc_progif,
3433 			(p->pc_subdevice << 16) | p->pc_subvendor,
3434 			(p->pc_device << 16) | p->pc_vendor,
3435 			p->pc_revid, p->pc_hdr);
3436 	}
3437 }
3438 #endif /* DDB */
3439 
3440 static struct resource *
3441 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3442     u_long start, u_long end, u_long count, u_int flags)
3443 {
3444 	struct pci_devinfo *dinfo = device_get_ivars(child);
3445 	struct resource_list *rl = &dinfo->resources;
3446 	struct resource_list_entry *rle;
3447 	struct resource *res;
3448 	pci_addr_t map, testval;
3449 	int mapsize;
3450 
3451 	/*
3452 	 * Weed out the bogons, and figure out how large the BAR/map
3453 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3454 	 * Note: atapci in legacy mode are special and handled elsewhere
3455 	 * in the code.  If you have a atapci device in legacy mode and
3456 	 * it fails here, that other code is broken.
3457 	 */
3458 	res = NULL;
3459 	pci_read_bar(child, *rid, &map, &testval);
3460 
3461 	/* Ignore a BAR with a base of 0. */
3462 	if (pci_mapbase(testval) == 0)
3463 		goto out;
3464 
3465 	if (PCI_BAR_MEM(testval)) {
3466 		if (type != SYS_RES_MEMORY) {
3467 			if (bootverbose)
3468 				device_printf(dev,
3469 				    "child %s requested type %d for rid %#x,"
3470 				    " but the BAR says it is an memio\n",
3471 				    device_get_nameunit(child), type, *rid);
3472 			goto out;
3473 		}
3474 	} else {
3475 		if (type != SYS_RES_IOPORT) {
3476 			if (bootverbose)
3477 				device_printf(dev,
3478 				    "child %s requested type %d for rid %#x,"
3479 				    " but the BAR says it is an ioport\n",
3480 				    device_get_nameunit(child), type, *rid);
3481 			goto out;
3482 		}
3483 	}
3484 
3485 	/*
3486 	 * For real BARs, we need to override the size that
3487 	 * the driver requests, because that's what the BAR
3488 	 * actually uses and we would otherwise have a
3489 	 * situation where we might allocate the excess to
3490 	 * another driver, which won't work.
3491 	 */
3492 	mapsize = pci_mapsize(testval);
3493 	count = 1UL << mapsize;
3494 	if (RF_ALIGNMENT(flags) < mapsize)
3495 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3496 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3497 		flags |= RF_PREFETCHABLE;
3498 
3499 	/*
3500 	 * Allocate enough resource, and then write back the
3501 	 * appropriate bar for that resource.
3502 	 */
3503 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3504 	    start, end, count, flags & ~RF_ACTIVE);
3505 	if (res == NULL) {
3506 		device_printf(child,
3507 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3508 		    count, *rid, type, start, end);
3509 		goto out;
3510 	}
3511 	rman_set_device(res, dev);
3512 	resource_list_add(rl, type, *rid, start, end, count);
3513 	rle = resource_list_find(rl, type, *rid);
3514 	if (rle == NULL)
3515 		panic("pci_alloc_map: unexpectedly can't find resource.");
3516 	rle->res = res;
3517 	rle->start = rman_get_start(res);
3518 	rle->end = rman_get_end(res);
3519 	rle->count = count;
3520 	if (bootverbose)
3521 		device_printf(child,
3522 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3523 		    count, *rid, type, rman_get_start(res));
3524 	map = rman_get_start(res);
3525 	pci_write_bar(child, *rid, map);
3526 out:;
3527 	return (res);
3528 }
3529 
3530 
3531 struct resource *
3532 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3533 		   u_long start, u_long end, u_long count, u_int flags)
3534 {
3535 	struct pci_devinfo *dinfo = device_get_ivars(child);
3536 	struct resource_list *rl = &dinfo->resources;
3537 	struct resource_list_entry *rle;
3538 	struct resource *res;
3539 	pcicfgregs *cfg = &dinfo->cfg;
3540 
3541 	if (device_get_parent(child) != dev)
3542 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3543 		    type, rid, start, end, count, flags));
3544 
3545 	/*
3546 	 * Perform lazy resource allocation
3547 	 */
3548 	switch (type) {
3549 	case SYS_RES_IRQ:
3550 		/*
3551 		 * Can't alloc legacy interrupt once MSI messages have
3552 		 * been allocated.
3553 		 */
3554 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3555 		    cfg->msix.msix_alloc > 0))
3556 			return (NULL);
3557 
3558 		/*
3559 		 * If the child device doesn't have an interrupt
3560 		 * routed and is deserving of an interrupt, try to
3561 		 * assign it one.
3562 		 */
3563 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3564 		    (cfg->intpin != 0))
3565 			pci_assign_interrupt(dev, child, 0);
3566 		break;
3567 	case SYS_RES_IOPORT:
3568 	case SYS_RES_MEMORY:
3569 		/* Allocate resources for this BAR if needed. */
3570 		rle = resource_list_find(rl, type, *rid);
3571 		if (rle == NULL) {
3572 			res = pci_alloc_map(dev, child, type, rid, start, end,
3573 			    count, flags);
3574 			if (res == NULL)
3575 				return (NULL);
3576 			rle = resource_list_find(rl, type, *rid);
3577 		}
3578 
3579 		/*
3580 		 * If the resource belongs to the bus, then give it to
3581 		 * the child.  We need to activate it if requested
3582 		 * since the bus always allocates inactive resources.
3583 		 */
3584 		if (rle != NULL && rle->res != NULL &&
3585 		    rman_get_device(rle->res) == dev) {
3586 			if (bootverbose)
3587 				device_printf(child,
3588 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3589 				    rman_get_size(rle->res), *rid, type,
3590 				    rman_get_start(rle->res));
3591 			rman_set_device(rle->res, child);
3592 			if ((flags & RF_ACTIVE) &&
3593 			    bus_activate_resource(child, type, *rid,
3594 			    rle->res) != 0)
3595 				return (NULL);
3596 			return (rle->res);
3597 		}
3598 	}
3599 	return (resource_list_alloc(rl, dev, child, type, rid,
3600 	    start, end, count, flags));
3601 }
3602 
3603 int
3604 pci_release_resource(device_t dev, device_t child, int type, int rid,
3605     struct resource *r)
3606 {
3607 	int error;
3608 
3609 	if (device_get_parent(child) != dev)
3610 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3611 		    type, rid, r));
3612 
3613 	/*
3614 	 * For BARs we don't actually want to release the resource.
3615 	 * Instead, we deactivate the resource if needed and then give
3616 	 * ownership of the BAR back to the bus.
3617 	 */
3618 	switch (type) {
3619 	case SYS_RES_IOPORT:
3620 	case SYS_RES_MEMORY:
3621 		if (rman_get_device(r) != child)
3622 			return (EINVAL);
3623 		if (rman_get_flags(r) & RF_ACTIVE) {
3624 			error = bus_deactivate_resource(child, type, rid, r);
3625 			if (error)
3626 				return (error);
3627 		}
3628 		rman_set_device(r, dev);
3629 		return (0);
3630 	}
3631 	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3632 }
3633 
3634 int
3635 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3636     struct resource *r)
3637 {
3638 	int error;
3639 
3640 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3641 	if (error)
3642 		return (error);
3643 
3644 	/* Enable decoding in the command register when activating BARs. */
3645 	if (device_get_parent(child) == dev) {
3646 		switch (type) {
3647 		case SYS_RES_IOPORT:
3648 		case SYS_RES_MEMORY:
3649 			error = PCI_ENABLE_IO(dev, child, type);
3650 			break;
3651 		}
3652 	}
3653 	return (error);
3654 }
3655 
3656 void
3657 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3658 {
3659 	struct pci_devinfo *dinfo;
3660 	struct resource_list *rl;
3661 	struct resource_list_entry *rle;
3662 
3663 	if (device_get_parent(child) != dev)
3664 		return;
3665 
3666 	dinfo = device_get_ivars(child);
3667 	rl = &dinfo->resources;
3668 	rle = resource_list_find(rl, type, rid);
3669 	if (rle == NULL)
3670 		return;
3671 
3672 	if (rle->res) {
3673 		if (rman_get_device(rle->res) != dev ||
3674 		    rman_get_flags(rle->res) & RF_ACTIVE) {
3675 			device_printf(dev, "delete_resource: "
3676 			    "Resource still owned by child, oops. "
3677 			    "(type=%d, rid=%d, addr=%lx)\n",
3678 			    rle->type, rle->rid,
3679 			    rman_get_start(rle->res));
3680 			return;
3681 		}
3682 
3683 		/*
3684 		 * If this is a BAR, clear the BAR so it stops
3685 		 * decoding before releasing the resource.
3686 		 */
3687 		switch (type) {
3688 		case SYS_RES_IOPORT:
3689 		case SYS_RES_MEMORY:
3690 			pci_write_bar(child, rid, 0);
3691 			break;
3692 		}
3693 		bus_release_resource(dev, type, rid, rle->res);
3694 	}
3695 	resource_list_delete(rl, type, rid);
3696 }
3697 
3698 struct resource_list *
3699 pci_get_resource_list (device_t dev, device_t child)
3700 {
3701 	struct pci_devinfo *dinfo = device_get_ivars(child);
3702 
3703 	return (&dinfo->resources);
3704 }
3705 
3706 uint32_t
3707 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3708 {
3709 	struct pci_devinfo *dinfo = device_get_ivars(child);
3710 	pcicfgregs *cfg = &dinfo->cfg;
3711 
3712 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3713 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3714 }
3715 
3716 void
3717 pci_write_config_method(device_t dev, device_t child, int reg,
3718     uint32_t val, int width)
3719 {
3720 	struct pci_devinfo *dinfo = device_get_ivars(child);
3721 	pcicfgregs *cfg = &dinfo->cfg;
3722 
3723 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3724 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3725 }
3726 
3727 int
3728 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3729     size_t buflen)
3730 {
3731 
3732 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3733 	    pci_get_function(child));
3734 	return (0);
3735 }
3736 
3737 int
3738 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3739     size_t buflen)
3740 {
3741 	struct pci_devinfo *dinfo;
3742 	pcicfgregs *cfg;
3743 
3744 	dinfo = device_get_ivars(child);
3745 	cfg = &dinfo->cfg;
3746 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3747 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3748 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3749 	    cfg->progif);
3750 	return (0);
3751 }
3752 
3753 int
3754 pci_assign_interrupt_method(device_t dev, device_t child)
3755 {
3756 	struct pci_devinfo *dinfo = device_get_ivars(child);
3757 	pcicfgregs *cfg = &dinfo->cfg;
3758 
3759 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3760 	    cfg->intpin));
3761 }
3762 
3763 static int
3764 pci_modevent(module_t mod, int what, void *arg)
3765 {
3766 	static struct cdev *pci_cdev;
3767 
3768 	switch (what) {
3769 	case MOD_LOAD:
3770 		STAILQ_INIT(&pci_devq);
3771 		pci_generation = 0;
3772 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3773 		    "pci");
3774 		pci_load_vendor_data();
3775 		break;
3776 
3777 	case MOD_UNLOAD:
3778 		destroy_dev(pci_cdev);
3779 		break;
3780 	}
3781 
3782 	return (0);
3783 }
3784 
3785 void
3786 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3787 {
3788 	int i;
3789 
3790 	/*
3791 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3792 	 * which we know need special treatment.  Type 2 devices are
3793 	 * cardbus bridges which also require special treatment.
3794 	 * Other types are unknown, and we err on the side of safety
3795 	 * by ignoring them.
3796 	 */
3797 	if (dinfo->cfg.hdrtype != 0)
3798 		return;
3799 
3800 	/*
3801 	 * Restore the device to full power mode.  We must do this
3802 	 * before we restore the registers because moving from D3 to
3803 	 * D0 will cause the chip's BARs and some other registers to
3804 	 * be reset to some unknown power on reset values.  Cut down
3805 	 * the noise on boot by doing nothing if we are already in
3806 	 * state D0.
3807 	 */
3808 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3809 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3810 	}
3811 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3812 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3813 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3814 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3815 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3816 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3817 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3818 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3819 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3820 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3821 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3822 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3823 
3824 	/* Restore MSI and MSI-X configurations if they are present. */
3825 	if (dinfo->cfg.msi.msi_location != 0)
3826 		pci_resume_msi(dev);
3827 	if (dinfo->cfg.msix.msix_location != 0)
3828 		pci_resume_msix(dev);
3829 }
3830 
3831 void
3832 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3833 {
3834 	int i;
3835 	uint32_t cls;
3836 	int ps;
3837 
3838 	/*
3839 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3840 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3841 	 * which also require special treatment.  Other types are unknown, and
3842 	 * we err on the side of safety by ignoring them.  Powering down
3843 	 * bridges should not be undertaken lightly.
3844 	 */
3845 	if (dinfo->cfg.hdrtype != 0)
3846 		return;
3847 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3848 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3849 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3850 
3851 	/*
3852 	 * Some drivers apparently write to these registers w/o updating our
3853 	 * cached copy.  No harm happens if we update the copy, so do so here
3854 	 * so we can restore them.  The COMMAND register is modified by the
3855 	 * bus w/o updating the cache.  This should represent the normally
3856 	 * writable portion of the 'defined' part of type 0 headers.  In
3857 	 * theory we also need to save/restore the PCI capability structures
3858 	 * we know about, but apart from power we don't know any that are
3859 	 * writable.
3860 	 */
3861 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3862 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3863 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3864 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3865 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3866 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3867 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3868 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3869 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3870 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3871 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3872 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3873 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3874 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3875 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3876 
3877 	/*
3878 	 * don't set the state for display devices, base peripherals and
3879 	 * memory devices since bad things happen when they are powered down.
3880 	 * We should (a) have drivers that can easily detach and (b) use
3881 	 * generic drivers for these devices so that some device actually
3882 	 * attaches.  We need to make sure that when we implement (a) we don't
3883 	 * power the device down on a reattach.
3884 	 */
3885 	cls = pci_get_class(dev);
3886 	if (!setstate)
3887 		return;
3888 	switch (pci_do_power_nodriver)
3889 	{
3890 		case 0:		/* NO powerdown at all */
3891 			return;
3892 		case 1:		/* Conservative about what to power down */
3893 			if (cls == PCIC_STORAGE)
3894 				return;
3895 			/*FALLTHROUGH*/
3896 		case 2:		/* Agressive about what to power down */
3897 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3898 			    cls == PCIC_BASEPERIPH)
3899 				return;
3900 			/*FALLTHROUGH*/
3901 		case 3:		/* Power down everything */
3902 			break;
3903 	}
3904 	/*
3905 	 * PCI spec says we can only go into D3 state from D0 state.
3906 	 * Transition from D[12] into D0 before going to D3 state.
3907 	 */
3908 	ps = pci_get_powerstate(dev);
3909 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3910 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3911 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3912 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3913 }
3914