xref: /freebsd/sys/dev/pci/pci.c (revision 9517e866259191fcd39434a97ad849a9b59b9b9f)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include "pcib_if.h"
66 #include "pci_if.h"
67 
68 #ifdef __HAVE_ACPI
69 #include <contrib/dev/acpica/include/acpi.h>
70 #include "acpi_if.h"
71 #else
72 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
73 #endif
74 
75 static pci_addr_t	pci_mapbase(uint64_t mapreg);
76 static const char	*pci_maptype(uint64_t mapreg);
77 static int		pci_mapsize(uint64_t testval);
78 static int		pci_maprange(uint64_t mapreg);
79 static void		pci_fixancient(pcicfgregs *cfg);
80 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
81 
82 static int		pci_porten(device_t dev);
83 static int		pci_memen(device_t dev);
84 static void		pci_assign_interrupt(device_t bus, device_t dev,
85 			    int force_route);
86 static int		pci_add_map(device_t bus, device_t dev, int reg,
87 			    struct resource_list *rl, int force, int prefetch);
88 static int		pci_probe(device_t dev);
89 static int		pci_attach(device_t dev);
90 static void		pci_load_vendor_data(void);
91 static int		pci_describe_parse_line(char **ptr, int *vendor,
92 			    int *device, char **desc);
93 static char		*pci_describe_device(device_t dev);
94 static int		pci_modevent(module_t mod, int what, void *arg);
95 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
96 			    pcicfgregs *cfg);
97 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
98 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
99 			    int reg, uint32_t *data);
100 #if 0
101 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
102 			    int reg, uint32_t data);
103 #endif
104 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
105 static void		pci_disable_msi(device_t dev);
106 static void		pci_enable_msi(device_t dev, uint64_t address,
107 			    uint16_t data);
108 static void		pci_enable_msix(device_t dev, u_int index,
109 			    uint64_t address, uint32_t data);
110 static void		pci_mask_msix(device_t dev, u_int index);
111 static void		pci_unmask_msix(device_t dev, u_int index);
112 static int		pci_msi_blacklisted(void);
113 static void		pci_resume_msi(device_t dev);
114 static void		pci_resume_msix(device_t dev);
115 
116 static device_method_t pci_methods[] = {
117 	/* Device interface */
118 	DEVMETHOD(device_probe,		pci_probe),
119 	DEVMETHOD(device_attach,	pci_attach),
120 	DEVMETHOD(device_detach,	bus_generic_detach),
121 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
122 	DEVMETHOD(device_suspend,	pci_suspend),
123 	DEVMETHOD(device_resume,	pci_resume),
124 
125 	/* Bus interface */
126 	DEVMETHOD(bus_print_child,	pci_print_child),
127 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
128 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
129 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
130 	DEVMETHOD(bus_driver_added,	pci_driver_added),
131 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
132 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
133 
134 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
135 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
136 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
137 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
138 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
139 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
140 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
141 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
142 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
143 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
144 
145 	/* PCI interface */
146 	DEVMETHOD(pci_read_config,	pci_read_config_method),
147 	DEVMETHOD(pci_write_config,	pci_write_config_method),
148 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
149 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
150 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
151 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
152 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
153 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
154 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
155 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
156 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
157 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
158 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
159 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
160 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
161 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
162 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
163 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
164 
165 	{ 0, 0 }
166 };
167 
168 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
169 
170 static devclass_t pci_devclass;
171 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
172 MODULE_VERSION(pci, 1);
173 
174 static char	*pci_vendordata;
175 static size_t	pci_vendordata_size;
176 
177 
178 struct pci_quirk {
179 	uint32_t devid;	/* Vendor/device of the card */
180 	int	type;
181 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
182 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
183 	int	arg1;
184 	int	arg2;
185 };
186 
187 struct pci_quirk pci_quirks[] = {
188 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
189 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
191 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
192 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
193 
194 	/*
195 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
196 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
197 	 */
198 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200 
201 	/*
202 	 * MSI doesn't work on earlier Intel chipsets including
203 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
204 	 */
205 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
212 
213 	/*
214 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
215 	 * bridge.
216 	 */
217 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 
219 	{ 0 }
220 };
221 
222 /* map register information */
223 #define	PCI_MAPMEM	0x01	/* memory map */
224 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
225 #define	PCI_MAPPORT	0x04	/* port map */
226 
227 struct devlist pci_devq;
228 uint32_t pci_generation;
229 uint32_t pci_numdevs = 0;
230 static int pcie_chipset, pcix_chipset;
231 
232 /* sysctl vars */
233 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
234 
235 static int pci_enable_io_modes = 1;
236 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
237 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
238     &pci_enable_io_modes, 1,
239     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
240 enable these bits correctly.  We'd like to do this all the time, but there\n\
241 are some peripherals that this causes problems with.");
242 
243 static int pci_do_power_nodriver = 0;
244 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
245 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
246     &pci_do_power_nodriver, 0,
247   "Place a function into D3 state when no driver attaches to it.  0 means\n\
248 disable.  1 means conservatively place devices into D3 state.  2 means\n\
249 agressively place devices into D3 state.  3 means put absolutely everything\n\
250 in D3 state.");
251 
252 static int pci_do_power_resume = 1;
253 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
254 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
255     &pci_do_power_resume, 1,
256   "Transition from D3 -> D0 on resume.");
257 
258 static int pci_do_msi = 1;
259 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
260 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
261     "Enable support for MSI interrupts");
262 
263 static int pci_do_msix = 1;
264 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
265 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
266     "Enable support for MSI-X interrupts");
267 
268 static int pci_honor_msi_blacklist = 1;
269 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
270 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
271     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
272 
273 /* Find a device_t by bus/slot/function in domain 0 */
274 
275 device_t
276 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
277 {
278 
279 	return (pci_find_dbsf(0, bus, slot, func));
280 }
281 
282 /* Find a device_t by domain/bus/slot/function */
283 
284 device_t
285 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
286 {
287 	struct pci_devinfo *dinfo;
288 
289 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
290 		if ((dinfo->cfg.domain == domain) &&
291 		    (dinfo->cfg.bus == bus) &&
292 		    (dinfo->cfg.slot == slot) &&
293 		    (dinfo->cfg.func == func)) {
294 			return (dinfo->cfg.dev);
295 		}
296 	}
297 
298 	return (NULL);
299 }
300 
301 /* Find a device_t by vendor/device ID */
302 
303 device_t
304 pci_find_device(uint16_t vendor, uint16_t device)
305 {
306 	struct pci_devinfo *dinfo;
307 
308 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
309 		if ((dinfo->cfg.vendor == vendor) &&
310 		    (dinfo->cfg.device == device)) {
311 			return (dinfo->cfg.dev);
312 		}
313 	}
314 
315 	return (NULL);
316 }
317 
318 static int
319 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
320 {
321 	va_list ap;
322 	int retval;
323 
324 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
325 	    cfg->func);
326 	va_start(ap, fmt);
327 	retval += vprintf(fmt, ap);
328 	va_end(ap);
329 	return (retval);
330 }
331 
332 /* return base address of memory or port map */
333 
334 static pci_addr_t
335 pci_mapbase(uint64_t mapreg)
336 {
337 
338 	if (PCI_BAR_MEM(mapreg))
339 		return (mapreg & PCIM_BAR_MEM_BASE);
340 	else
341 		return (mapreg & PCIM_BAR_IO_BASE);
342 }
343 
344 /* return map type of memory or port map */
345 
346 static const char *
347 pci_maptype(uint64_t mapreg)
348 {
349 
350 	if (PCI_BAR_IO(mapreg))
351 		return ("I/O Port");
352 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
353 		return ("Prefetchable Memory");
354 	return ("Memory");
355 }
356 
357 /* return log2 of map size decoded for memory or port map */
358 
359 static int
360 pci_mapsize(uint64_t testval)
361 {
362 	int ln2size;
363 
364 	testval = pci_mapbase(testval);
365 	ln2size = 0;
366 	if (testval != 0) {
367 		while ((testval & 1) == 0)
368 		{
369 			ln2size++;
370 			testval >>= 1;
371 		}
372 	}
373 	return (ln2size);
374 }
375 
376 /* return log2 of address range supported by map register */
377 
378 static int
379 pci_maprange(uint64_t mapreg)
380 {
381 	int ln2range = 0;
382 
383 	if (PCI_BAR_IO(mapreg))
384 		ln2range = 32;
385 	else
386 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
387 		case PCIM_BAR_MEM_32:
388 			ln2range = 32;
389 			break;
390 		case PCIM_BAR_MEM_1MB:
391 			ln2range = 20;
392 			break;
393 		case PCIM_BAR_MEM_64:
394 			ln2range = 64;
395 			break;
396 		}
397 	return (ln2range);
398 }
399 
400 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
401 
402 static void
403 pci_fixancient(pcicfgregs *cfg)
404 {
405 	if (cfg->hdrtype != 0)
406 		return;
407 
408 	/* PCI to PCI bridges use header type 1 */
409 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
410 		cfg->hdrtype = 1;
411 }
412 
413 /* extract header type specific config data */
414 
415 static void
416 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
417 {
418 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
419 	switch (cfg->hdrtype) {
420 	case 0:
421 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
422 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
423 		cfg->nummaps	    = PCI_MAXMAPS_0;
424 		break;
425 	case 1:
426 		cfg->nummaps	    = PCI_MAXMAPS_1;
427 		break;
428 	case 2:
429 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
430 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
431 		cfg->nummaps	    = PCI_MAXMAPS_2;
432 		break;
433 	}
434 #undef REG
435 }
436 
437 /* read configuration header into pcicfgregs structure */
438 struct pci_devinfo *
439 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
440 {
441 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
442 	pcicfgregs *cfg = NULL;
443 	struct pci_devinfo *devlist_entry;
444 	struct devlist *devlist_head;
445 
446 	devlist_head = &pci_devq;
447 
448 	devlist_entry = NULL;
449 
450 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
451 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
452 		if (devlist_entry == NULL)
453 			return (NULL);
454 
455 		cfg = &devlist_entry->cfg;
456 
457 		cfg->domain		= d;
458 		cfg->bus		= b;
459 		cfg->slot		= s;
460 		cfg->func		= f;
461 		cfg->vendor		= REG(PCIR_VENDOR, 2);
462 		cfg->device		= REG(PCIR_DEVICE, 2);
463 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
464 		cfg->statreg		= REG(PCIR_STATUS, 2);
465 		cfg->baseclass		= REG(PCIR_CLASS, 1);
466 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
467 		cfg->progif		= REG(PCIR_PROGIF, 1);
468 		cfg->revid		= REG(PCIR_REVID, 1);
469 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
470 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
471 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
472 		cfg->intpin		= REG(PCIR_INTPIN, 1);
473 		cfg->intline		= REG(PCIR_INTLINE, 1);
474 
475 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
476 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
477 
478 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
479 		cfg->hdrtype		&= ~PCIM_MFDEV;
480 
481 		pci_fixancient(cfg);
482 		pci_hdrtypedata(pcib, b, s, f, cfg);
483 
484 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
485 			pci_read_extcap(pcib, cfg);
486 
487 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
488 
489 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
490 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
491 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
492 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
493 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
494 
495 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
496 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
497 		devlist_entry->conf.pc_vendor = cfg->vendor;
498 		devlist_entry->conf.pc_device = cfg->device;
499 
500 		devlist_entry->conf.pc_class = cfg->baseclass;
501 		devlist_entry->conf.pc_subclass = cfg->subclass;
502 		devlist_entry->conf.pc_progif = cfg->progif;
503 		devlist_entry->conf.pc_revid = cfg->revid;
504 
505 		pci_numdevs++;
506 		pci_generation++;
507 	}
508 	return (devlist_entry);
509 #undef REG
510 }
511 
512 static void
513 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
514 {
515 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
516 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
517 #if defined(__i386__) || defined(__amd64__)
518 	uint64_t addr;
519 #endif
520 	uint32_t val;
521 	int	ptr, nextptr, ptrptr;
522 
523 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
524 	case 0:
525 	case 1:
526 		ptrptr = PCIR_CAP_PTR;
527 		break;
528 	case 2:
529 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
530 		break;
531 	default:
532 		return;		/* no extended capabilities support */
533 	}
534 	nextptr = REG(ptrptr, 1);	/* sanity check? */
535 
536 	/*
537 	 * Read capability entries.
538 	 */
539 	while (nextptr != 0) {
540 		/* Sanity check */
541 		if (nextptr > 255) {
542 			printf("illegal PCI extended capability offset %d\n",
543 			    nextptr);
544 			return;
545 		}
546 		/* Find the next entry */
547 		ptr = nextptr;
548 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
549 
550 		/* Process this entry */
551 		switch (REG(ptr + PCICAP_ID, 1)) {
552 		case PCIY_PMG:		/* PCI power management */
553 			if (cfg->pp.pp_cap == 0) {
554 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
555 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
556 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
557 				if ((nextptr - ptr) > PCIR_POWER_DATA)
558 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
559 			}
560 			break;
561 #if defined(__i386__) || defined(__amd64__)
562 		case PCIY_HT:		/* HyperTransport */
563 			/* Determine HT-specific capability type. */
564 			val = REG(ptr + PCIR_HT_COMMAND, 2);
565 			switch (val & PCIM_HTCMD_CAP_MASK) {
566 			case PCIM_HTCAP_MSI_MAPPING:
567 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
568 					/* Sanity check the mapping window. */
569 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
570 					    4);
571 					addr <<= 32;
572 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
573 					    4);
574 					if (addr != MSI_INTEL_ADDR_BASE)
575 						device_printf(pcib,
576 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
577 						    cfg->domain, cfg->bus,
578 						    cfg->slot, cfg->func,
579 						    (long long)addr);
580 				} else
581 					addr = MSI_INTEL_ADDR_BASE;
582 
583 				cfg->ht.ht_msimap = ptr;
584 				cfg->ht.ht_msictrl = val;
585 				cfg->ht.ht_msiaddr = addr;
586 				break;
587 			}
588 			break;
589 #endif
590 		case PCIY_MSI:		/* PCI MSI */
591 			cfg->msi.msi_location = ptr;
592 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
593 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
594 						     PCIM_MSICTRL_MMC_MASK)>>1);
595 			break;
596 		case PCIY_MSIX:		/* PCI MSI-X */
597 			cfg->msix.msix_location = ptr;
598 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
599 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
600 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
601 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
602 			cfg->msix.msix_table_bar = PCIR_BAR(val &
603 			    PCIM_MSIX_BIR_MASK);
604 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
605 			val = REG(ptr + PCIR_MSIX_PBA, 4);
606 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
607 			    PCIM_MSIX_BIR_MASK);
608 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
609 			break;
610 		case PCIY_VPD:		/* PCI Vital Product Data */
611 			cfg->vpd.vpd_reg = ptr;
612 			break;
613 		case PCIY_SUBVENDOR:
614 			/* Should always be true. */
615 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
616 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
617 				cfg->subvendor = val & 0xffff;
618 				cfg->subdevice = val >> 16;
619 			}
620 			break;
621 		case PCIY_PCIX:		/* PCI-X */
622 			/*
623 			 * Assume we have a PCI-X chipset if we have
624 			 * at least one PCI-PCI bridge with a PCI-X
625 			 * capability.  Note that some systems with
626 			 * PCI-express or HT chipsets might match on
627 			 * this check as well.
628 			 */
629 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
630 				pcix_chipset = 1;
631 			break;
632 		case PCIY_EXPRESS:	/* PCI-express */
633 			/*
634 			 * Assume we have a PCI-express chipset if we have
635 			 * at least one PCI-express device.
636 			 */
637 			pcie_chipset = 1;
638 			break;
639 		default:
640 			break;
641 		}
642 	}
643 /* REG and WREG use carry through to next functions */
644 }
645 
646 /*
647  * PCI Vital Product Data
648  */
649 
650 #define	PCI_VPD_TIMEOUT		1000000
651 
652 static int
653 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
654 {
655 	int count = PCI_VPD_TIMEOUT;
656 
657 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
658 
659 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
660 
661 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
662 		if (--count < 0)
663 			return (ENXIO);
664 		DELAY(1);	/* limit looping */
665 	}
666 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
667 
668 	return (0);
669 }
670 
671 #if 0
672 static int
673 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
674 {
675 	int count = PCI_VPD_TIMEOUT;
676 
677 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
678 
679 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
680 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
681 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
682 		if (--count < 0)
683 			return (ENXIO);
684 		DELAY(1);	/* limit looping */
685 	}
686 
687 	return (0);
688 }
689 #endif
690 
691 #undef PCI_VPD_TIMEOUT
692 
693 struct vpd_readstate {
694 	device_t	pcib;
695 	pcicfgregs	*cfg;
696 	uint32_t	val;
697 	int		bytesinval;
698 	int		off;
699 	uint8_t		cksum;
700 };
701 
702 static int
703 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
704 {
705 	uint32_t reg;
706 	uint8_t byte;
707 
708 	if (vrs->bytesinval == 0) {
709 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
710 			return (ENXIO);
711 		vrs->val = le32toh(reg);
712 		vrs->off += 4;
713 		byte = vrs->val & 0xff;
714 		vrs->bytesinval = 3;
715 	} else {
716 		vrs->val = vrs->val >> 8;
717 		byte = vrs->val & 0xff;
718 		vrs->bytesinval--;
719 	}
720 
721 	vrs->cksum += byte;
722 	*data = byte;
723 	return (0);
724 }
725 
726 static void
727 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
728 {
729 	struct vpd_readstate vrs;
730 	int state;
731 	int name;
732 	int remain;
733 	int i;
734 	int alloc, off;		/* alloc/off for RO/W arrays */
735 	int cksumvalid;
736 	int dflen;
737 	uint8_t byte;
738 	uint8_t byte2;
739 
740 	/* init vpd reader */
741 	vrs.bytesinval = 0;
742 	vrs.off = 0;
743 	vrs.pcib = pcib;
744 	vrs.cfg = cfg;
745 	vrs.cksum = 0;
746 
747 	state = 0;
748 	name = remain = i = 0;	/* shut up stupid gcc */
749 	alloc = off = 0;	/* shut up stupid gcc */
750 	dflen = 0;		/* shut up stupid gcc */
751 	cksumvalid = -1;
752 	while (state >= 0) {
753 		if (vpd_nextbyte(&vrs, &byte)) {
754 			state = -2;
755 			break;
756 		}
757 #if 0
758 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
759 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
760 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
761 #endif
762 		switch (state) {
763 		case 0:		/* item name */
764 			if (byte & 0x80) {
765 				if (vpd_nextbyte(&vrs, &byte2)) {
766 					state = -2;
767 					break;
768 				}
769 				remain = byte2;
770 				if (vpd_nextbyte(&vrs, &byte2)) {
771 					state = -2;
772 					break;
773 				}
774 				remain |= byte2 << 8;
775 				if (remain > (0x7f*4 - vrs.off)) {
776 					state = -1;
777 					printf(
778 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
779 					    cfg->domain, cfg->bus, cfg->slot,
780 					    cfg->func, remain);
781 				}
782 				name = byte & 0x7f;
783 			} else {
784 				remain = byte & 0x7;
785 				name = (byte >> 3) & 0xf;
786 			}
787 			switch (name) {
788 			case 0x2:	/* String */
789 				cfg->vpd.vpd_ident = malloc(remain + 1,
790 				    M_DEVBUF, M_WAITOK);
791 				i = 0;
792 				state = 1;
793 				break;
794 			case 0xf:	/* End */
795 				state = -1;
796 				break;
797 			case 0x10:	/* VPD-R */
798 				alloc = 8;
799 				off = 0;
800 				cfg->vpd.vpd_ros = malloc(alloc *
801 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
802 				    M_WAITOK | M_ZERO);
803 				state = 2;
804 				break;
805 			case 0x11:	/* VPD-W */
806 				alloc = 8;
807 				off = 0;
808 				cfg->vpd.vpd_w = malloc(alloc *
809 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
810 				    M_WAITOK | M_ZERO);
811 				state = 5;
812 				break;
813 			default:	/* Invalid data, abort */
814 				state = -1;
815 				break;
816 			}
817 			break;
818 
819 		case 1:	/* Identifier String */
820 			cfg->vpd.vpd_ident[i++] = byte;
821 			remain--;
822 			if (remain == 0)  {
823 				cfg->vpd.vpd_ident[i] = '\0';
824 				state = 0;
825 			}
826 			break;
827 
828 		case 2:	/* VPD-R Keyword Header */
829 			if (off == alloc) {
830 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
831 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
832 				    M_DEVBUF, M_WAITOK | M_ZERO);
833 			}
834 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
835 			if (vpd_nextbyte(&vrs, &byte2)) {
836 				state = -2;
837 				break;
838 			}
839 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
840 			if (vpd_nextbyte(&vrs, &byte2)) {
841 				state = -2;
842 				break;
843 			}
844 			dflen = byte2;
845 			if (dflen == 0 &&
846 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
847 			    2) == 0) {
848 				/*
849 				 * if this happens, we can't trust the rest
850 				 * of the VPD.
851 				 */
852 				printf(
853 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
854 				    cfg->domain, cfg->bus, cfg->slot,
855 				    cfg->func, dflen);
856 				cksumvalid = 0;
857 				state = -1;
858 				break;
859 			} else if (dflen == 0) {
860 				cfg->vpd.vpd_ros[off].value = malloc(1 *
861 				    sizeof(*cfg->vpd.vpd_ros[off].value),
862 				    M_DEVBUF, M_WAITOK);
863 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
864 			} else
865 				cfg->vpd.vpd_ros[off].value = malloc(
866 				    (dflen + 1) *
867 				    sizeof(*cfg->vpd.vpd_ros[off].value),
868 				    M_DEVBUF, M_WAITOK);
869 			remain -= 3;
870 			i = 0;
871 			/* keep in sync w/ state 3's transistions */
872 			if (dflen == 0 && remain == 0)
873 				state = 0;
874 			else if (dflen == 0)
875 				state = 2;
876 			else
877 				state = 3;
878 			break;
879 
880 		case 3:	/* VPD-R Keyword Value */
881 			cfg->vpd.vpd_ros[off].value[i++] = byte;
882 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
883 			    "RV", 2) == 0 && cksumvalid == -1) {
884 				if (vrs.cksum == 0)
885 					cksumvalid = 1;
886 				else {
887 					if (bootverbose)
888 						printf(
889 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
890 						    cfg->domain, cfg->bus,
891 						    cfg->slot, cfg->func,
892 						    vrs.cksum);
893 					cksumvalid = 0;
894 					state = -1;
895 					break;
896 				}
897 			}
898 			dflen--;
899 			remain--;
900 			/* keep in sync w/ state 2's transistions */
901 			if (dflen == 0)
902 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
903 			if (dflen == 0 && remain == 0) {
904 				cfg->vpd.vpd_rocnt = off;
905 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
906 				    off * sizeof(*cfg->vpd.vpd_ros),
907 				    M_DEVBUF, M_WAITOK | M_ZERO);
908 				state = 0;
909 			} else if (dflen == 0)
910 				state = 2;
911 			break;
912 
913 		case 4:
914 			remain--;
915 			if (remain == 0)
916 				state = 0;
917 			break;
918 
919 		case 5:	/* VPD-W Keyword Header */
920 			if (off == alloc) {
921 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
922 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
923 				    M_DEVBUF, M_WAITOK | M_ZERO);
924 			}
925 			cfg->vpd.vpd_w[off].keyword[0] = byte;
926 			if (vpd_nextbyte(&vrs, &byte2)) {
927 				state = -2;
928 				break;
929 			}
930 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
931 			if (vpd_nextbyte(&vrs, &byte2)) {
932 				state = -2;
933 				break;
934 			}
935 			cfg->vpd.vpd_w[off].len = dflen = byte2;
936 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
937 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
938 			    sizeof(*cfg->vpd.vpd_w[off].value),
939 			    M_DEVBUF, M_WAITOK);
940 			remain -= 3;
941 			i = 0;
942 			/* keep in sync w/ state 6's transistions */
943 			if (dflen == 0 && remain == 0)
944 				state = 0;
945 			else if (dflen == 0)
946 				state = 5;
947 			else
948 				state = 6;
949 			break;
950 
951 		case 6:	/* VPD-W Keyword Value */
952 			cfg->vpd.vpd_w[off].value[i++] = byte;
953 			dflen--;
954 			remain--;
955 			/* keep in sync w/ state 5's transistions */
956 			if (dflen == 0)
957 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
958 			if (dflen == 0 && remain == 0) {
959 				cfg->vpd.vpd_wcnt = off;
960 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
961 				    off * sizeof(*cfg->vpd.vpd_w),
962 				    M_DEVBUF, M_WAITOK | M_ZERO);
963 				state = 0;
964 			} else if (dflen == 0)
965 				state = 5;
966 			break;
967 
968 		default:
969 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
970 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
971 			    state);
972 			state = -1;
973 			break;
974 		}
975 	}
976 
977 	if (cksumvalid == 0 || state < -1) {
978 		/* read-only data bad, clean up */
979 		if (cfg->vpd.vpd_ros != NULL) {
980 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
981 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
982 			free(cfg->vpd.vpd_ros, M_DEVBUF);
983 			cfg->vpd.vpd_ros = NULL;
984 		}
985 	}
986 	if (state < -1) {
987 		/* I/O error, clean up */
988 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
989 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
990 		if (cfg->vpd.vpd_ident != NULL) {
991 			free(cfg->vpd.vpd_ident, M_DEVBUF);
992 			cfg->vpd.vpd_ident = NULL;
993 		}
994 		if (cfg->vpd.vpd_w != NULL) {
995 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
996 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
997 			free(cfg->vpd.vpd_w, M_DEVBUF);
998 			cfg->vpd.vpd_w = NULL;
999 		}
1000 	}
1001 	cfg->vpd.vpd_cached = 1;
1002 #undef REG
1003 #undef WREG
1004 }
1005 
1006 int
1007 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1008 {
1009 	struct pci_devinfo *dinfo = device_get_ivars(child);
1010 	pcicfgregs *cfg = &dinfo->cfg;
1011 
1012 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1013 		pci_read_vpd(device_get_parent(dev), cfg);
1014 
1015 	*identptr = cfg->vpd.vpd_ident;
1016 
1017 	if (*identptr == NULL)
1018 		return (ENXIO);
1019 
1020 	return (0);
1021 }
1022 
1023 int
1024 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1025 	const char **vptr)
1026 {
1027 	struct pci_devinfo *dinfo = device_get_ivars(child);
1028 	pcicfgregs *cfg = &dinfo->cfg;
1029 	int i;
1030 
1031 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1032 		pci_read_vpd(device_get_parent(dev), cfg);
1033 
1034 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1035 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1036 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1037 			*vptr = cfg->vpd.vpd_ros[i].value;
1038 		}
1039 
1040 	if (i != cfg->vpd.vpd_rocnt)
1041 		return (0);
1042 
1043 	*vptr = NULL;
1044 	return (ENXIO);
1045 }
1046 
1047 /*
1048  * Find the requested extended capability and return the offset in
1049  * configuration space via the pointer provided. The function returns
1050  * 0 on success and error code otherwise.
1051  */
1052 int
1053 pci_find_extcap_method(device_t dev, device_t child, int capability,
1054     int *capreg)
1055 {
1056 	struct pci_devinfo *dinfo = device_get_ivars(child);
1057 	pcicfgregs *cfg = &dinfo->cfg;
1058 	u_int32_t status;
1059 	u_int8_t ptr;
1060 
1061 	/*
1062 	 * Check the CAP_LIST bit of the PCI status register first.
1063 	 */
1064 	status = pci_read_config(child, PCIR_STATUS, 2);
1065 	if (!(status & PCIM_STATUS_CAPPRESENT))
1066 		return (ENXIO);
1067 
1068 	/*
1069 	 * Determine the start pointer of the capabilities list.
1070 	 */
1071 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1072 	case 0:
1073 	case 1:
1074 		ptr = PCIR_CAP_PTR;
1075 		break;
1076 	case 2:
1077 		ptr = PCIR_CAP_PTR_2;
1078 		break;
1079 	default:
1080 		/* XXX: panic? */
1081 		return (ENXIO);		/* no extended capabilities support */
1082 	}
1083 	ptr = pci_read_config(child, ptr, 1);
1084 
1085 	/*
1086 	 * Traverse the capabilities list.
1087 	 */
1088 	while (ptr != 0) {
1089 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1090 			if (capreg != NULL)
1091 				*capreg = ptr;
1092 			return (0);
1093 		}
1094 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1095 	}
1096 
1097 	return (ENOENT);
1098 }
1099 
1100 /*
1101  * Support for MSI-X message interrupts.
1102  */
1103 void
1104 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1105 {
1106 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1107 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1108 	uint32_t offset;
1109 
1110 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1111 	offset = msix->msix_table_offset + index * 16;
1112 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1113 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1114 	bus_write_4(msix->msix_table_res, offset + 8, data);
1115 
1116 	/* Enable MSI -> HT mapping. */
1117 	pci_ht_map_msi(dev, address);
1118 }
1119 
1120 void
1121 pci_mask_msix(device_t dev, u_int index)
1122 {
1123 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1124 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1125 	uint32_t offset, val;
1126 
1127 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1128 	offset = msix->msix_table_offset + index * 16 + 12;
1129 	val = bus_read_4(msix->msix_table_res, offset);
1130 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1131 		val |= PCIM_MSIX_VCTRL_MASK;
1132 		bus_write_4(msix->msix_table_res, offset, val);
1133 	}
1134 }
1135 
1136 void
1137 pci_unmask_msix(device_t dev, u_int index)
1138 {
1139 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1140 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1141 	uint32_t offset, val;
1142 
1143 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1144 	offset = msix->msix_table_offset + index * 16 + 12;
1145 	val = bus_read_4(msix->msix_table_res, offset);
1146 	if (val & PCIM_MSIX_VCTRL_MASK) {
1147 		val &= ~PCIM_MSIX_VCTRL_MASK;
1148 		bus_write_4(msix->msix_table_res, offset, val);
1149 	}
1150 }
1151 
1152 int
1153 pci_pending_msix(device_t dev, u_int index)
1154 {
1155 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1156 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1157 	uint32_t offset, bit;
1158 
1159 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1160 	offset = msix->msix_pba_offset + (index / 32) * 4;
1161 	bit = 1 << index % 32;
1162 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1163 }
1164 
1165 /*
1166  * Restore MSI-X registers and table during resume.  If MSI-X is
1167  * enabled then walk the virtual table to restore the actual MSI-X
1168  * table.
1169  */
1170 static void
1171 pci_resume_msix(device_t dev)
1172 {
1173 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1174 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1175 	struct msix_table_entry *mte;
1176 	struct msix_vector *mv;
1177 	int i;
1178 
1179 	if (msix->msix_alloc > 0) {
1180 		/* First, mask all vectors. */
1181 		for (i = 0; i < msix->msix_msgnum; i++)
1182 			pci_mask_msix(dev, i);
1183 
1184 		/* Second, program any messages with at least one handler. */
1185 		for (i = 0; i < msix->msix_table_len; i++) {
1186 			mte = &msix->msix_table[i];
1187 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1188 				continue;
1189 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1190 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1191 			pci_unmask_msix(dev, i);
1192 		}
1193 	}
1194 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1195 	    msix->msix_ctrl, 2);
1196 }
1197 
1198 /*
1199  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1200  * returned in *count.  After this function returns, each message will be
1201  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1202  */
1203 int
1204 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1205 {
1206 	struct pci_devinfo *dinfo = device_get_ivars(child);
1207 	pcicfgregs *cfg = &dinfo->cfg;
1208 	struct resource_list_entry *rle;
1209 	int actual, error, i, irq, max;
1210 
1211 	/* Don't let count == 0 get us into trouble. */
1212 	if (*count == 0)
1213 		return (EINVAL);
1214 
1215 	/* If rid 0 is allocated, then fail. */
1216 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1217 	if (rle != NULL && rle->res != NULL)
1218 		return (ENXIO);
1219 
1220 	/* Already have allocated messages? */
1221 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1222 		return (ENXIO);
1223 
1224 	/* If MSI is blacklisted for this system, fail. */
1225 	if (pci_msi_blacklisted())
1226 		return (ENXIO);
1227 
1228 	/* MSI-X capability present? */
1229 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1230 		return (ENODEV);
1231 
1232 	/* Make sure the appropriate BARs are mapped. */
1233 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1234 	    cfg->msix.msix_table_bar);
1235 	if (rle == NULL || rle->res == NULL ||
1236 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1237 		return (ENXIO);
1238 	cfg->msix.msix_table_res = rle->res;
1239 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1240 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1241 		    cfg->msix.msix_pba_bar);
1242 		if (rle == NULL || rle->res == NULL ||
1243 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1244 			return (ENXIO);
1245 	}
1246 	cfg->msix.msix_pba_res = rle->res;
1247 
1248 	if (bootverbose)
1249 		device_printf(child,
1250 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1251 		    *count, cfg->msix.msix_msgnum);
1252 	max = min(*count, cfg->msix.msix_msgnum);
1253 	for (i = 0; i < max; i++) {
1254 		/* Allocate a message. */
1255 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1256 		if (error)
1257 			break;
1258 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1259 		    irq, 1);
1260 	}
1261 	actual = i;
1262 
1263 	if (bootverbose) {
1264 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1265 		if (actual == 1)
1266 			device_printf(child, "using IRQ %lu for MSI-X\n",
1267 			    rle->start);
1268 		else {
1269 			int run;
1270 
1271 			/*
1272 			 * Be fancy and try to print contiguous runs of
1273 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1274 			 * 'run' is true if we are in a range.
1275 			 */
1276 			device_printf(child, "using IRQs %lu", rle->start);
1277 			irq = rle->start;
1278 			run = 0;
1279 			for (i = 1; i < actual; i++) {
1280 				rle = resource_list_find(&dinfo->resources,
1281 				    SYS_RES_IRQ, i + 1);
1282 
1283 				/* Still in a run? */
1284 				if (rle->start == irq + 1) {
1285 					run = 1;
1286 					irq++;
1287 					continue;
1288 				}
1289 
1290 				/* Finish previous range. */
1291 				if (run) {
1292 					printf("-%d", irq);
1293 					run = 0;
1294 				}
1295 
1296 				/* Start new range. */
1297 				printf(",%lu", rle->start);
1298 				irq = rle->start;
1299 			}
1300 
1301 			/* Unfinished range? */
1302 			if (run)
1303 				printf("-%d", irq);
1304 			printf(" for MSI-X\n");
1305 		}
1306 	}
1307 
1308 	/* Mask all vectors. */
1309 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1310 		pci_mask_msix(child, i);
1311 
1312 	/* Allocate and initialize vector data and virtual table. */
1313 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1314 	    M_DEVBUF, M_WAITOK | M_ZERO);
1315 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1316 	    M_DEVBUF, M_WAITOK | M_ZERO);
1317 	for (i = 0; i < actual; i++) {
1318 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1319 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1320 		cfg->msix.msix_table[i].mte_vector = i + 1;
1321 	}
1322 
1323 	/* Update control register to enable MSI-X. */
1324 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1325 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1326 	    cfg->msix.msix_ctrl, 2);
1327 
1328 	/* Update counts of alloc'd messages. */
1329 	cfg->msix.msix_alloc = actual;
1330 	cfg->msix.msix_table_len = actual;
1331 	*count = actual;
1332 	return (0);
1333 }
1334 
1335 /*
1336  * By default, pci_alloc_msix() will assign the allocated IRQ
1337  * resources consecutively to the first N messages in the MSI-X table.
1338  * However, device drivers may want to use different layouts if they
1339  * either receive fewer messages than they asked for, or they wish to
1340  * populate the MSI-X table sparsely.  This method allows the driver
1341  * to specify what layout it wants.  It must be called after a
1342  * successful pci_alloc_msix() but before any of the associated
1343  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1344  *
1345  * The 'vectors' array contains 'count' message vectors.  The array
1346  * maps directly to the MSI-X table in that index 0 in the array
1347  * specifies the vector for the first message in the MSI-X table, etc.
1348  * The vector value in each array index can either be 0 to indicate
1349  * that no vector should be assigned to a message slot, or it can be a
1350  * number from 1 to N (where N is the count returned from a
1351  * succcessful call to pci_alloc_msix()) to indicate which message
1352  * vector (IRQ) to be used for the corresponding message.
1353  *
1354  * On successful return, each message with a non-zero vector will have
1355  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1356  * 1.  Additionally, if any of the IRQs allocated via the previous
1357  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1358  * will be freed back to the system automatically.
1359  *
1360  * For example, suppose a driver has a MSI-X table with 6 messages and
1361  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1362  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1363  * C.  After the call to pci_alloc_msix(), the device will be setup to
1364  * have an MSI-X table of ABC--- (where - means no vector assigned).
1365  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1366  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1367  * be freed back to the system.  This device will also have valid
1368  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1369  *
1370  * In any case, the SYS_RES_IRQ rid X will always map to the message
1371  * at MSI-X table index X - 1 and will only be valid if a vector is
1372  * assigned to that table entry.
1373  */
1374 int
1375 pci_remap_msix_method(device_t dev, device_t child, int count,
1376     const u_int *vectors)
1377 {
1378 	struct pci_devinfo *dinfo = device_get_ivars(child);
1379 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1380 	struct resource_list_entry *rle;
1381 	int i, irq, j, *used;
1382 
1383 	/*
1384 	 * Have to have at least one message in the table but the
1385 	 * table can't be bigger than the actual MSI-X table in the
1386 	 * device.
1387 	 */
1388 	if (count == 0 || count > msix->msix_msgnum)
1389 		return (EINVAL);
1390 
1391 	/* Sanity check the vectors. */
1392 	for (i = 0; i < count; i++)
1393 		if (vectors[i] > msix->msix_alloc)
1394 			return (EINVAL);
1395 
1396 	/*
1397 	 * Make sure there aren't any holes in the vectors to be used.
1398 	 * It's a big pain to support it, and it doesn't really make
1399 	 * sense anyway.  Also, at least one vector must be used.
1400 	 */
1401 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1402 	    M_ZERO);
1403 	for (i = 0; i < count; i++)
1404 		if (vectors[i] != 0)
1405 			used[vectors[i] - 1] = 1;
1406 	for (i = 0; i < msix->msix_alloc - 1; i++)
1407 		if (used[i] == 0 && used[i + 1] == 1) {
1408 			free(used, M_DEVBUF);
1409 			return (EINVAL);
1410 		}
1411 	if (used[0] != 1) {
1412 		free(used, M_DEVBUF);
1413 		return (EINVAL);
1414 	}
1415 
1416 	/* Make sure none of the resources are allocated. */
1417 	for (i = 0; i < msix->msix_table_len; i++) {
1418 		if (msix->msix_table[i].mte_vector == 0)
1419 			continue;
1420 		if (msix->msix_table[i].mte_handlers > 0)
1421 			return (EBUSY);
1422 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1423 		KASSERT(rle != NULL, ("missing resource"));
1424 		if (rle->res != NULL)
1425 			return (EBUSY);
1426 	}
1427 
1428 	/* Free the existing resource list entries. */
1429 	for (i = 0; i < msix->msix_table_len; i++) {
1430 		if (msix->msix_table[i].mte_vector == 0)
1431 			continue;
1432 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1433 	}
1434 
1435 	/*
1436 	 * Build the new virtual table keeping track of which vectors are
1437 	 * used.
1438 	 */
1439 	free(msix->msix_table, M_DEVBUF);
1440 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1441 	    M_DEVBUF, M_WAITOK | M_ZERO);
1442 	for (i = 0; i < count; i++)
1443 		msix->msix_table[i].mte_vector = vectors[i];
1444 	msix->msix_table_len = count;
1445 
1446 	/* Free any unused IRQs and resize the vectors array if necessary. */
1447 	j = msix->msix_alloc - 1;
1448 	if (used[j] == 0) {
1449 		struct msix_vector *vec;
1450 
1451 		while (used[j] == 0) {
1452 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1453 			    msix->msix_vectors[j].mv_irq);
1454 			j--;
1455 		}
1456 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1457 		    M_WAITOK);
1458 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1459 		    (j + 1));
1460 		free(msix->msix_vectors, M_DEVBUF);
1461 		msix->msix_vectors = vec;
1462 		msix->msix_alloc = j + 1;
1463 	}
1464 	free(used, M_DEVBUF);
1465 
1466 	/* Map the IRQs onto the rids. */
1467 	for (i = 0; i < count; i++) {
1468 		if (vectors[i] == 0)
1469 			continue;
1470 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1471 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1472 		    irq, 1);
1473 	}
1474 
1475 	if (bootverbose) {
1476 		device_printf(child, "Remapped MSI-X IRQs as: ");
1477 		for (i = 0; i < count; i++) {
1478 			if (i != 0)
1479 				printf(", ");
1480 			if (vectors[i] == 0)
1481 				printf("---");
1482 			else
1483 				printf("%d",
1484 				    msix->msix_vectors[vectors[i]].mv_irq);
1485 		}
1486 		printf("\n");
1487 	}
1488 
1489 	return (0);
1490 }
1491 
1492 static int
1493 pci_release_msix(device_t dev, device_t child)
1494 {
1495 	struct pci_devinfo *dinfo = device_get_ivars(child);
1496 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1497 	struct resource_list_entry *rle;
1498 	int i;
1499 
1500 	/* Do we have any messages to release? */
1501 	if (msix->msix_alloc == 0)
1502 		return (ENODEV);
1503 
1504 	/* Make sure none of the resources are allocated. */
1505 	for (i = 0; i < msix->msix_table_len; i++) {
1506 		if (msix->msix_table[i].mte_vector == 0)
1507 			continue;
1508 		if (msix->msix_table[i].mte_handlers > 0)
1509 			return (EBUSY);
1510 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1511 		KASSERT(rle != NULL, ("missing resource"));
1512 		if (rle->res != NULL)
1513 			return (EBUSY);
1514 	}
1515 
1516 	/* Update control register to disable MSI-X. */
1517 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1518 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1519 	    msix->msix_ctrl, 2);
1520 
1521 	/* Free the resource list entries. */
1522 	for (i = 0; i < msix->msix_table_len; i++) {
1523 		if (msix->msix_table[i].mte_vector == 0)
1524 			continue;
1525 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1526 	}
1527 	free(msix->msix_table, M_DEVBUF);
1528 	msix->msix_table_len = 0;
1529 
1530 	/* Release the IRQs. */
1531 	for (i = 0; i < msix->msix_alloc; i++)
1532 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1533 		    msix->msix_vectors[i].mv_irq);
1534 	free(msix->msix_vectors, M_DEVBUF);
1535 	msix->msix_alloc = 0;
1536 	return (0);
1537 }
1538 
1539 /*
1540  * Return the max supported MSI-X messages this device supports.
1541  * Basically, assuming the MD code can alloc messages, this function
1542  * should return the maximum value that pci_alloc_msix() can return.
1543  * Thus, it is subject to the tunables, etc.
1544  */
1545 int
1546 pci_msix_count_method(device_t dev, device_t child)
1547 {
1548 	struct pci_devinfo *dinfo = device_get_ivars(child);
1549 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1550 
1551 	if (pci_do_msix && msix->msix_location != 0)
1552 		return (msix->msix_msgnum);
1553 	return (0);
1554 }
1555 
1556 /*
1557  * HyperTransport MSI mapping control
1558  */
1559 void
1560 pci_ht_map_msi(device_t dev, uint64_t addr)
1561 {
1562 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1563 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1564 
1565 	if (!ht->ht_msimap)
1566 		return;
1567 
1568 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1569 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1570 		/* Enable MSI -> HT mapping. */
1571 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1572 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1573 		    ht->ht_msictrl, 2);
1574 	}
1575 
1576 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1577 		/* Disable MSI -> HT mapping. */
1578 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1579 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1580 		    ht->ht_msictrl, 2);
1581 	}
1582 }
1583 
1584 /*
1585  * Support for MSI message signalled interrupts.
1586  */
1587 void
1588 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1589 {
1590 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1591 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1592 
1593 	/* Write data and address values. */
1594 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1595 	    address & 0xffffffff, 4);
1596 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1597 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1598 		    address >> 32, 4);
1599 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1600 		    data, 2);
1601 	} else
1602 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1603 		    2);
1604 
1605 	/* Enable MSI in the control register. */
1606 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1607 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1608 	    2);
1609 
1610 	/* Enable MSI -> HT mapping. */
1611 	pci_ht_map_msi(dev, address);
1612 }
1613 
1614 void
1615 pci_disable_msi(device_t dev)
1616 {
1617 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1618 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1619 
1620 	/* Disable MSI -> HT mapping. */
1621 	pci_ht_map_msi(dev, 0);
1622 
1623 	/* Disable MSI in the control register. */
1624 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1625 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1626 	    2);
1627 }
1628 
1629 /*
1630  * Restore MSI registers during resume.  If MSI is enabled then
1631  * restore the data and address registers in addition to the control
1632  * register.
1633  */
1634 static void
1635 pci_resume_msi(device_t dev)
1636 {
1637 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1638 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1639 	uint64_t address;
1640 	uint16_t data;
1641 
1642 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1643 		address = msi->msi_addr;
1644 		data = msi->msi_data;
1645 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1646 		    address & 0xffffffff, 4);
1647 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1648 			pci_write_config(dev, msi->msi_location +
1649 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1650 			pci_write_config(dev, msi->msi_location +
1651 			    PCIR_MSI_DATA_64BIT, data, 2);
1652 		} else
1653 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1654 			    data, 2);
1655 	}
1656 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1657 	    2);
1658 }
1659 
1660 int
1661 pci_remap_msi_irq(device_t dev, u_int irq)
1662 {
1663 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1664 	pcicfgregs *cfg = &dinfo->cfg;
1665 	struct resource_list_entry *rle;
1666 	struct msix_table_entry *mte;
1667 	struct msix_vector *mv;
1668 	device_t bus;
1669 	uint64_t addr;
1670 	uint32_t data;
1671 	int error, i, j;
1672 
1673 	bus = device_get_parent(dev);
1674 
1675 	/*
1676 	 * Handle MSI first.  We try to find this IRQ among our list
1677 	 * of MSI IRQs.  If we find it, we request updated address and
1678 	 * data registers and apply the results.
1679 	 */
1680 	if (cfg->msi.msi_alloc > 0) {
1681 
1682 		/* If we don't have any active handlers, nothing to do. */
1683 		if (cfg->msi.msi_handlers == 0)
1684 			return (0);
1685 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1686 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1687 			    i + 1);
1688 			if (rle->start == irq) {
1689 				error = PCIB_MAP_MSI(device_get_parent(bus),
1690 				    dev, irq, &addr, &data);
1691 				if (error)
1692 					return (error);
1693 				pci_disable_msi(dev);
1694 				dinfo->cfg.msi.msi_addr = addr;
1695 				dinfo->cfg.msi.msi_data = data;
1696 				pci_enable_msi(dev, addr, data);
1697 				return (0);
1698 			}
1699 		}
1700 		return (ENOENT);
1701 	}
1702 
1703 	/*
1704 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1705 	 * we request the updated mapping info.  If that works, we go
1706 	 * through all the slots that use this IRQ and update them.
1707 	 */
1708 	if (cfg->msix.msix_alloc > 0) {
1709 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1710 			mv = &cfg->msix.msix_vectors[i];
1711 			if (mv->mv_irq == irq) {
1712 				error = PCIB_MAP_MSI(device_get_parent(bus),
1713 				    dev, irq, &addr, &data);
1714 				if (error)
1715 					return (error);
1716 				mv->mv_address = addr;
1717 				mv->mv_data = data;
1718 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1719 					mte = &cfg->msix.msix_table[j];
1720 					if (mte->mte_vector != i + 1)
1721 						continue;
1722 					if (mte->mte_handlers == 0)
1723 						continue;
1724 					pci_mask_msix(dev, j);
1725 					pci_enable_msix(dev, j, addr, data);
1726 					pci_unmask_msix(dev, j);
1727 				}
1728 			}
1729 		}
1730 		return (ENOENT);
1731 	}
1732 
1733 	return (ENOENT);
1734 }
1735 
1736 /*
1737  * Returns true if the specified device is blacklisted because MSI
1738  * doesn't work.
1739  */
1740 int
1741 pci_msi_device_blacklisted(device_t dev)
1742 {
1743 	struct pci_quirk *q;
1744 
1745 	if (!pci_honor_msi_blacklist)
1746 		return (0);
1747 
1748 	for (q = &pci_quirks[0]; q->devid; q++) {
1749 		if (q->devid == pci_get_devid(dev) &&
1750 		    q->type == PCI_QUIRK_DISABLE_MSI)
1751 			return (1);
1752 	}
1753 	return (0);
1754 }
1755 
1756 /*
1757  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1758  * we just check for blacklisted chipsets as represented by the
1759  * host-PCI bridge at device 0:0:0.  In the future, it may become
1760  * necessary to check other system attributes, such as the kenv values
1761  * that give the motherboard manufacturer and model number.
1762  */
1763 static int
1764 pci_msi_blacklisted(void)
1765 {
1766 	device_t dev;
1767 
1768 	if (!pci_honor_msi_blacklist)
1769 		return (0);
1770 
1771 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1772 	if (!(pcie_chipset || pcix_chipset))
1773 		return (1);
1774 
1775 	dev = pci_find_bsf(0, 0, 0);
1776 	if (dev != NULL)
1777 		return (pci_msi_device_blacklisted(dev));
1778 	return (0);
1779 }
1780 
1781 /*
1782  * Attempt to allocate *count MSI messages.  The actual number allocated is
1783  * returned in *count.  After this function returns, each message will be
1784  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1785  */
1786 int
1787 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1788 {
1789 	struct pci_devinfo *dinfo = device_get_ivars(child);
1790 	pcicfgregs *cfg = &dinfo->cfg;
1791 	struct resource_list_entry *rle;
1792 	int actual, error, i, irqs[32];
1793 	uint16_t ctrl;
1794 
1795 	/* Don't let count == 0 get us into trouble. */
1796 	if (*count == 0)
1797 		return (EINVAL);
1798 
1799 	/* If rid 0 is allocated, then fail. */
1800 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1801 	if (rle != NULL && rle->res != NULL)
1802 		return (ENXIO);
1803 
1804 	/* Already have allocated messages? */
1805 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1806 		return (ENXIO);
1807 
1808 	/* If MSI is blacklisted for this system, fail. */
1809 	if (pci_msi_blacklisted())
1810 		return (ENXIO);
1811 
1812 	/* MSI capability present? */
1813 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1814 		return (ENODEV);
1815 
1816 	if (bootverbose)
1817 		device_printf(child,
1818 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1819 		    *count, cfg->msi.msi_msgnum);
1820 
1821 	/* Don't ask for more than the device supports. */
1822 	actual = min(*count, cfg->msi.msi_msgnum);
1823 
1824 	/* Don't ask for more than 32 messages. */
1825 	actual = min(actual, 32);
1826 
1827 	/* MSI requires power of 2 number of messages. */
1828 	if (!powerof2(actual))
1829 		return (EINVAL);
1830 
1831 	for (;;) {
1832 		/* Try to allocate N messages. */
1833 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1834 		    cfg->msi.msi_msgnum, irqs);
1835 		if (error == 0)
1836 			break;
1837 		if (actual == 1)
1838 			return (error);
1839 
1840 		/* Try N / 2. */
1841 		actual >>= 1;
1842 	}
1843 
1844 	/*
1845 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1846 	 * resources in the irqs[] array, so add new resources
1847 	 * starting at rid 1.
1848 	 */
1849 	for (i = 0; i < actual; i++)
1850 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1851 		    irqs[i], irqs[i], 1);
1852 
1853 	if (bootverbose) {
1854 		if (actual == 1)
1855 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1856 		else {
1857 			int run;
1858 
1859 			/*
1860 			 * Be fancy and try to print contiguous runs
1861 			 * of IRQ values as ranges.  'run' is true if
1862 			 * we are in a range.
1863 			 */
1864 			device_printf(child, "using IRQs %d", irqs[0]);
1865 			run = 0;
1866 			for (i = 1; i < actual; i++) {
1867 
1868 				/* Still in a run? */
1869 				if (irqs[i] == irqs[i - 1] + 1) {
1870 					run = 1;
1871 					continue;
1872 				}
1873 
1874 				/* Finish previous range. */
1875 				if (run) {
1876 					printf("-%d", irqs[i - 1]);
1877 					run = 0;
1878 				}
1879 
1880 				/* Start new range. */
1881 				printf(",%d", irqs[i]);
1882 			}
1883 
1884 			/* Unfinished range? */
1885 			if (run)
1886 				printf("-%d", irqs[actual - 1]);
1887 			printf(" for MSI\n");
1888 		}
1889 	}
1890 
1891 	/* Update control register with actual count. */
1892 	ctrl = cfg->msi.msi_ctrl;
1893 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1894 	ctrl |= (ffs(actual) - 1) << 4;
1895 	cfg->msi.msi_ctrl = ctrl;
1896 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1897 
1898 	/* Update counts of alloc'd messages. */
1899 	cfg->msi.msi_alloc = actual;
1900 	cfg->msi.msi_handlers = 0;
1901 	*count = actual;
1902 	return (0);
1903 }
1904 
1905 /* Release the MSI messages associated with this device. */
1906 int
1907 pci_release_msi_method(device_t dev, device_t child)
1908 {
1909 	struct pci_devinfo *dinfo = device_get_ivars(child);
1910 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1911 	struct resource_list_entry *rle;
1912 	int error, i, irqs[32];
1913 
1914 	/* Try MSI-X first. */
1915 	error = pci_release_msix(dev, child);
1916 	if (error != ENODEV)
1917 		return (error);
1918 
1919 	/* Do we have any messages to release? */
1920 	if (msi->msi_alloc == 0)
1921 		return (ENODEV);
1922 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1923 
1924 	/* Make sure none of the resources are allocated. */
1925 	if (msi->msi_handlers > 0)
1926 		return (EBUSY);
1927 	for (i = 0; i < msi->msi_alloc; i++) {
1928 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1929 		KASSERT(rle != NULL, ("missing MSI resource"));
1930 		if (rle->res != NULL)
1931 			return (EBUSY);
1932 		irqs[i] = rle->start;
1933 	}
1934 
1935 	/* Update control register with 0 count. */
1936 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1937 	    ("%s: MSI still enabled", __func__));
1938 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1939 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1940 	    msi->msi_ctrl, 2);
1941 
1942 	/* Release the messages. */
1943 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1944 	for (i = 0; i < msi->msi_alloc; i++)
1945 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1946 
1947 	/* Update alloc count. */
1948 	msi->msi_alloc = 0;
1949 	msi->msi_addr = 0;
1950 	msi->msi_data = 0;
1951 	return (0);
1952 }
1953 
1954 /*
1955  * Return the max supported MSI messages this device supports.
1956  * Basically, assuming the MD code can alloc messages, this function
1957  * should return the maximum value that pci_alloc_msi() can return.
1958  * Thus, it is subject to the tunables, etc.
1959  */
1960 int
1961 pci_msi_count_method(device_t dev, device_t child)
1962 {
1963 	struct pci_devinfo *dinfo = device_get_ivars(child);
1964 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1965 
1966 	if (pci_do_msi && msi->msi_location != 0)
1967 		return (msi->msi_msgnum);
1968 	return (0);
1969 }
1970 
1971 /* free pcicfgregs structure and all depending data structures */
1972 
1973 int
1974 pci_freecfg(struct pci_devinfo *dinfo)
1975 {
1976 	struct devlist *devlist_head;
1977 	int i;
1978 
1979 	devlist_head = &pci_devq;
1980 
1981 	if (dinfo->cfg.vpd.vpd_reg) {
1982 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1983 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1984 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1985 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1986 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1987 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1988 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1989 	}
1990 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1991 	free(dinfo, M_DEVBUF);
1992 
1993 	/* increment the generation count */
1994 	pci_generation++;
1995 
1996 	/* we're losing one device */
1997 	pci_numdevs--;
1998 	return (0);
1999 }
2000 
2001 /*
2002  * PCI power manangement
2003  */
2004 int
2005 pci_set_powerstate_method(device_t dev, device_t child, int state)
2006 {
2007 	struct pci_devinfo *dinfo = device_get_ivars(child);
2008 	pcicfgregs *cfg = &dinfo->cfg;
2009 	uint16_t status;
2010 	int result, oldstate, highest, delay;
2011 
2012 	if (cfg->pp.pp_cap == 0)
2013 		return (EOPNOTSUPP);
2014 
2015 	/*
2016 	 * Optimize a no state change request away.  While it would be OK to
2017 	 * write to the hardware in theory, some devices have shown odd
2018 	 * behavior when going from D3 -> D3.
2019 	 */
2020 	oldstate = pci_get_powerstate(child);
2021 	if (oldstate == state)
2022 		return (0);
2023 
2024 	/*
2025 	 * The PCI power management specification states that after a state
2026 	 * transition between PCI power states, system software must
2027 	 * guarantee a minimal delay before the function accesses the device.
2028 	 * Compute the worst case delay that we need to guarantee before we
2029 	 * access the device.  Many devices will be responsive much more
2030 	 * quickly than this delay, but there are some that don't respond
2031 	 * instantly to state changes.  Transitions to/from D3 state require
2032 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2033 	 * is done below with DELAY rather than a sleeper function because
2034 	 * this function can be called from contexts where we cannot sleep.
2035 	 */
2036 	highest = (oldstate > state) ? oldstate : state;
2037 	if (highest == PCI_POWERSTATE_D3)
2038 	    delay = 10000;
2039 	else if (highest == PCI_POWERSTATE_D2)
2040 	    delay = 200;
2041 	else
2042 	    delay = 0;
2043 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2044 	    & ~PCIM_PSTAT_DMASK;
2045 	result = 0;
2046 	switch (state) {
2047 	case PCI_POWERSTATE_D0:
2048 		status |= PCIM_PSTAT_D0;
2049 		break;
2050 	case PCI_POWERSTATE_D1:
2051 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2052 			return (EOPNOTSUPP);
2053 		status |= PCIM_PSTAT_D1;
2054 		break;
2055 	case PCI_POWERSTATE_D2:
2056 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2057 			return (EOPNOTSUPP);
2058 		status |= PCIM_PSTAT_D2;
2059 		break;
2060 	case PCI_POWERSTATE_D3:
2061 		status |= PCIM_PSTAT_D3;
2062 		break;
2063 	default:
2064 		return (EINVAL);
2065 	}
2066 
2067 	if (bootverbose)
2068 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2069 		    state);
2070 
2071 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2072 	if (delay)
2073 		DELAY(delay);
2074 	return (0);
2075 }
2076 
2077 int
2078 pci_get_powerstate_method(device_t dev, device_t child)
2079 {
2080 	struct pci_devinfo *dinfo = device_get_ivars(child);
2081 	pcicfgregs *cfg = &dinfo->cfg;
2082 	uint16_t status;
2083 	int result;
2084 
2085 	if (cfg->pp.pp_cap != 0) {
2086 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2087 		switch (status & PCIM_PSTAT_DMASK) {
2088 		case PCIM_PSTAT_D0:
2089 			result = PCI_POWERSTATE_D0;
2090 			break;
2091 		case PCIM_PSTAT_D1:
2092 			result = PCI_POWERSTATE_D1;
2093 			break;
2094 		case PCIM_PSTAT_D2:
2095 			result = PCI_POWERSTATE_D2;
2096 			break;
2097 		case PCIM_PSTAT_D3:
2098 			result = PCI_POWERSTATE_D3;
2099 			break;
2100 		default:
2101 			result = PCI_POWERSTATE_UNKNOWN;
2102 			break;
2103 		}
2104 	} else {
2105 		/* No support, device is always at D0 */
2106 		result = PCI_POWERSTATE_D0;
2107 	}
2108 	return (result);
2109 }
2110 
2111 /*
2112  * Some convenience functions for PCI device drivers.
2113  */
2114 
2115 static __inline void
2116 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2117 {
2118 	uint16_t	command;
2119 
2120 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2121 	command |= bit;
2122 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2123 }
2124 
2125 static __inline void
2126 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2127 {
2128 	uint16_t	command;
2129 
2130 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2131 	command &= ~bit;
2132 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2133 }
2134 
2135 int
2136 pci_enable_busmaster_method(device_t dev, device_t child)
2137 {
2138 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2139 	return (0);
2140 }
2141 
2142 int
2143 pci_disable_busmaster_method(device_t dev, device_t child)
2144 {
2145 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2146 	return (0);
2147 }
2148 
2149 int
2150 pci_enable_io_method(device_t dev, device_t child, int space)
2151 {
2152 	uint16_t command;
2153 	uint16_t bit;
2154 	char *error;
2155 
2156 	bit = 0;
2157 	error = NULL;
2158 
2159 	switch(space) {
2160 	case SYS_RES_IOPORT:
2161 		bit = PCIM_CMD_PORTEN;
2162 		error = "port";
2163 		break;
2164 	case SYS_RES_MEMORY:
2165 		bit = PCIM_CMD_MEMEN;
2166 		error = "memory";
2167 		break;
2168 	default:
2169 		return (EINVAL);
2170 	}
2171 	pci_set_command_bit(dev, child, bit);
2172 	/* Some devices seem to need a brief stall here, what do to? */
2173 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2174 	if (command & bit)
2175 		return (0);
2176 	device_printf(child, "failed to enable %s mapping!\n", error);
2177 	return (ENXIO);
2178 }
2179 
2180 int
2181 pci_disable_io_method(device_t dev, device_t child, int space)
2182 {
2183 	uint16_t command;
2184 	uint16_t bit;
2185 	char *error;
2186 
2187 	bit = 0;
2188 	error = NULL;
2189 
2190 	switch(space) {
2191 	case SYS_RES_IOPORT:
2192 		bit = PCIM_CMD_PORTEN;
2193 		error = "port";
2194 		break;
2195 	case SYS_RES_MEMORY:
2196 		bit = PCIM_CMD_MEMEN;
2197 		error = "memory";
2198 		break;
2199 	default:
2200 		return (EINVAL);
2201 	}
2202 	pci_clear_command_bit(dev, child, bit);
2203 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2204 	if (command & bit) {
2205 		device_printf(child, "failed to disable %s mapping!\n", error);
2206 		return (ENXIO);
2207 	}
2208 	return (0);
2209 }
2210 
2211 /*
2212  * New style pci driver.  Parent device is either a pci-host-bridge or a
2213  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2214  */
2215 
2216 void
2217 pci_print_verbose(struct pci_devinfo *dinfo)
2218 {
2219 
2220 	if (bootverbose) {
2221 		pcicfgregs *cfg = &dinfo->cfg;
2222 
2223 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2224 		    cfg->vendor, cfg->device, cfg->revid);
2225 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2226 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2227 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2228 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2229 		    cfg->mfdev);
2230 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2231 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2232 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2233 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2234 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2235 		if (cfg->intpin > 0)
2236 			printf("\tintpin=%c, irq=%d\n",
2237 			    cfg->intpin +'a' -1, cfg->intline);
2238 		if (cfg->pp.pp_cap) {
2239 			uint16_t status;
2240 
2241 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2242 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2243 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2244 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2245 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2246 			    status & PCIM_PSTAT_DMASK);
2247 		}
2248 		if (cfg->msi.msi_location) {
2249 			int ctrl;
2250 
2251 			ctrl = cfg->msi.msi_ctrl;
2252 			printf("\tMSI supports %d message%s%s%s\n",
2253 			    cfg->msi.msi_msgnum,
2254 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2255 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2256 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2257 		}
2258 		if (cfg->msix.msix_location) {
2259 			printf("\tMSI-X supports %d message%s ",
2260 			    cfg->msix.msix_msgnum,
2261 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2262 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2263 				printf("in map 0x%x\n",
2264 				    cfg->msix.msix_table_bar);
2265 			else
2266 				printf("in maps 0x%x and 0x%x\n",
2267 				    cfg->msix.msix_table_bar,
2268 				    cfg->msix.msix_pba_bar);
2269 		}
2270 	}
2271 }
2272 
2273 static int
2274 pci_porten(device_t dev)
2275 {
2276 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2277 }
2278 
2279 static int
2280 pci_memen(device_t dev)
2281 {
2282 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2283 }
2284 
2285 static void
2286 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2287 {
2288 	pci_addr_t map, testval;
2289 	int ln2range;
2290 	uint16_t cmd;
2291 
2292 	map = pci_read_config(dev, reg, 4);
2293 	ln2range = pci_maprange(map);
2294 	if (ln2range == 64)
2295 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2296 
2297 	/*
2298 	 * Disable decoding via the command register before
2299 	 * determining the BAR's length since we will be placing it in
2300 	 * a weird state.
2301 	 */
2302 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2303 	pci_write_config(dev, PCIR_COMMAND,
2304 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2305 
2306 	/*
2307 	 * Determine the BAR's length by writing all 1's.  The bottom
2308 	 * log_2(size) bits of the BAR will stick as 0 when we read
2309 	 * the value back.
2310 	 */
2311 	pci_write_config(dev, reg, 0xffffffff, 4);
2312 	testval = pci_read_config(dev, reg, 4);
2313 	if (ln2range == 64) {
2314 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2315 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2316 	}
2317 
2318 	/*
2319 	 * Restore the original value of the BAR.  We may have reprogrammed
2320 	 * the BAR of the low-level console device and when booting verbose,
2321 	 * we need the console device addressable.
2322 	 */
2323 	pci_write_config(dev, reg, map, 4);
2324 	if (ln2range == 64)
2325 		pci_write_config(dev, reg + 4, map >> 32, 4);
2326 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2327 
2328 	*mapp = map;
2329 	*testvalp = testval;
2330 }
2331 
2332 static void
2333 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2334 {
2335 	pci_addr_t map;
2336 	int ln2range;
2337 
2338 	map = pci_read_config(dev, reg, 4);
2339 	ln2range = pci_maprange(map);
2340 	pci_write_config(dev, reg, base, 4);
2341 	if (ln2range == 64)
2342 		pci_write_config(dev, reg + 4, base >> 32, 4);
2343 }
2344 
2345 /*
2346  * Add a resource based on a pci map register. Return 1 if the map
2347  * register is a 32bit map register or 2 if it is a 64bit register.
2348  */
2349 static int
2350 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2351     int force, int prefetch)
2352 {
2353 	pci_addr_t base, map, testval;
2354 	pci_addr_t start, end, count;
2355 	int barlen, basezero, maprange, mapsize, type;
2356 	uint16_t cmd;
2357 	struct resource *res;
2358 
2359 	pci_read_bar(dev, reg, &map, &testval);
2360 	if (PCI_BAR_MEM(map)) {
2361 		type = SYS_RES_MEMORY;
2362 		if (map & PCIM_BAR_MEM_PREFETCH)
2363 			prefetch = 1;
2364 	} else
2365 		type = SYS_RES_IOPORT;
2366 	mapsize = pci_mapsize(testval);
2367 	base = pci_mapbase(map);
2368 #ifdef __PCI_BAR_ZERO_VALID
2369 	basezero = 0;
2370 #else
2371 	basezero = base == 0;
2372 #endif
2373 	maprange = pci_maprange(map);
2374 	barlen = maprange == 64 ? 2 : 1;
2375 
2376 	/*
2377 	 * For I/O registers, if bottom bit is set, and the next bit up
2378 	 * isn't clear, we know we have a BAR that doesn't conform to the
2379 	 * spec, so ignore it.  Also, sanity check the size of the data
2380 	 * areas to the type of memory involved.  Memory must be at least
2381 	 * 16 bytes in size, while I/O ranges must be at least 4.
2382 	 */
2383 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2384 		return (barlen);
2385 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2386 	    (type == SYS_RES_IOPORT && mapsize < 2))
2387 		return (barlen);
2388 
2389 	if (bootverbose) {
2390 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2391 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2392 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2393 			printf(", port disabled\n");
2394 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2395 			printf(", memory disabled\n");
2396 		else
2397 			printf(", enabled\n");
2398 	}
2399 
2400 	/*
2401 	 * If base is 0, then we have problems if this architecture does
2402 	 * not allow that.  It is best to ignore such entries for the
2403 	 * moment.  These will be allocated later if the driver specifically
2404 	 * requests them.  However, some removable busses look better when
2405 	 * all resources are allocated, so allow '0' to be overriden.
2406 	 *
2407 	 * Similarly treat maps whose values is the same as the test value
2408 	 * read back.  These maps have had all f's written to them by the
2409 	 * BIOS in an attempt to disable the resources.
2410 	 */
2411 	if (!force && (basezero || map == testval))
2412 		return (barlen);
2413 	if ((u_long)base != base) {
2414 		device_printf(bus,
2415 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2416 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2417 		    pci_get_function(dev), reg);
2418 		return (barlen);
2419 	}
2420 
2421 	/*
2422 	 * This code theoretically does the right thing, but has
2423 	 * undesirable side effects in some cases where peripherals
2424 	 * respond oddly to having these bits enabled.  Let the user
2425 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2426 	 * default).
2427 	 */
2428 	if (pci_enable_io_modes) {
2429 		/* Turn on resources that have been left off by a lazy BIOS */
2430 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2431 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2432 			cmd |= PCIM_CMD_PORTEN;
2433 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2434 		}
2435 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2436 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2437 			cmd |= PCIM_CMD_MEMEN;
2438 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2439 		}
2440 	} else {
2441 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2442 			return (barlen);
2443 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2444 			return (barlen);
2445 	}
2446 
2447 	count = 1 << mapsize;
2448 	if (basezero || base == pci_mapbase(testval)) {
2449 		start = 0;	/* Let the parent decide. */
2450 		end = ~0ULL;
2451 	} else {
2452 		start = base;
2453 		end = base + (1 << mapsize) - 1;
2454 	}
2455 	resource_list_add(rl, type, reg, start, end, count);
2456 
2457 	/*
2458 	 * Try to allocate the resource for this BAR from our parent
2459 	 * so that this resource range is already reserved.  The
2460 	 * driver for this device will later inherit this resource in
2461 	 * pci_alloc_resource().
2462 	 */
2463 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2464 	    prefetch ? RF_PREFETCHABLE : 0);
2465 	if (res == NULL) {
2466 		/*
2467 		 * If the allocation fails, clear the BAR and delete
2468 		 * the resource list entry to force
2469 		 * pci_alloc_resource() to allocate resources from the
2470 		 * parent.
2471 		 */
2472 		resource_list_delete(rl, type, reg);
2473 		start = 0;
2474 	} else {
2475 		start = rman_get_start(res);
2476 		rman_set_device(res, bus);
2477 	}
2478 	pci_write_bar(dev, reg, start);
2479 	return (barlen);
2480 }
2481 
2482 /*
2483  * For ATA devices we need to decide early what addressing mode to use.
2484  * Legacy demands that the primary and secondary ATA ports sits on the
2485  * same addresses that old ISA hardware did. This dictates that we use
2486  * those addresses and ignore the BAR's if we cannot set PCI native
2487  * addressing mode.
2488  */
2489 static void
2490 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2491     uint32_t prefetchmask)
2492 {
2493 	struct resource *r;
2494 	int rid, type, progif;
2495 #if 0
2496 	/* if this device supports PCI native addressing use it */
2497 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2498 	if ((progif & 0x8a) == 0x8a) {
2499 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2500 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2501 			printf("Trying ATA native PCI addressing mode\n");
2502 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2503 		}
2504 	}
2505 #endif
2506 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2507 	type = SYS_RES_IOPORT;
2508 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2509 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2510 		    prefetchmask & (1 << 0));
2511 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2512 		    prefetchmask & (1 << 1));
2513 	} else {
2514 		rid = PCIR_BAR(0);
2515 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2516 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2517 		    8, 0);
2518 		rman_set_device(r, bus);
2519 		rid = PCIR_BAR(1);
2520 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2521 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2522 		    1, 0);
2523 		rman_set_device(r, bus);
2524 	}
2525 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2526 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2527 		    prefetchmask & (1 << 2));
2528 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2529 		    prefetchmask & (1 << 3));
2530 	} else {
2531 		rid = PCIR_BAR(2);
2532 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2533 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2534 		    8, 0);
2535 		rman_set_device(r, bus);
2536 		rid = PCIR_BAR(3);
2537 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2538 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2539 		    1, 0);
2540 		rman_set_device(r, bus);
2541 	}
2542 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2543 	    prefetchmask & (1 << 4));
2544 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2545 	    prefetchmask & (1 << 5));
2546 }
2547 
2548 static void
2549 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2550 {
2551 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2552 	pcicfgregs *cfg = &dinfo->cfg;
2553 	char tunable_name[64];
2554 	int irq;
2555 
2556 	/* Has to have an intpin to have an interrupt. */
2557 	if (cfg->intpin == 0)
2558 		return;
2559 
2560 	/* Let the user override the IRQ with a tunable. */
2561 	irq = PCI_INVALID_IRQ;
2562 	snprintf(tunable_name, sizeof(tunable_name),
2563 	    "hw.pci%d.%d.%d.INT%c.irq",
2564 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2565 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2566 		irq = PCI_INVALID_IRQ;
2567 
2568 	/*
2569 	 * If we didn't get an IRQ via the tunable, then we either use the
2570 	 * IRQ value in the intline register or we ask the bus to route an
2571 	 * interrupt for us.  If force_route is true, then we only use the
2572 	 * value in the intline register if the bus was unable to assign an
2573 	 * IRQ.
2574 	 */
2575 	if (!PCI_INTERRUPT_VALID(irq)) {
2576 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2577 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2578 		if (!PCI_INTERRUPT_VALID(irq))
2579 			irq = cfg->intline;
2580 	}
2581 
2582 	/* If after all that we don't have an IRQ, just bail. */
2583 	if (!PCI_INTERRUPT_VALID(irq))
2584 		return;
2585 
2586 	/* Update the config register if it changed. */
2587 	if (irq != cfg->intline) {
2588 		cfg->intline = irq;
2589 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2590 	}
2591 
2592 	/* Add this IRQ as rid 0 interrupt resource. */
2593 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2594 }
2595 
2596 void
2597 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2598 {
2599 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2600 	pcicfgregs *cfg = &dinfo->cfg;
2601 	struct resource_list *rl = &dinfo->resources;
2602 	struct pci_quirk *q;
2603 	int i;
2604 
2605 	/* ATA devices needs special map treatment */
2606 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2607 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2608 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2609 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2610 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2611 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2612 	else
2613 		for (i = 0; i < cfg->nummaps;)
2614 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2615 			    prefetchmask & (1 << i));
2616 
2617 	/*
2618 	 * Add additional, quirked resources.
2619 	 */
2620 	for (q = &pci_quirks[0]; q->devid; q++) {
2621 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2622 		    && q->type == PCI_QUIRK_MAP_REG)
2623 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2624 	}
2625 
2626 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2627 #ifdef __PCI_REROUTE_INTERRUPT
2628 		/*
2629 		 * Try to re-route interrupts. Sometimes the BIOS or
2630 		 * firmware may leave bogus values in these registers.
2631 		 * If the re-route fails, then just stick with what we
2632 		 * have.
2633 		 */
2634 		pci_assign_interrupt(bus, dev, 1);
2635 #else
2636 		pci_assign_interrupt(bus, dev, 0);
2637 #endif
2638 	}
2639 }
2640 
2641 void
2642 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2643 {
2644 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2645 	device_t pcib = device_get_parent(dev);
2646 	struct pci_devinfo *dinfo;
2647 	int maxslots;
2648 	int s, f, pcifunchigh;
2649 	uint8_t hdrtype;
2650 
2651 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2652 	    ("dinfo_size too small"));
2653 	maxslots = PCIB_MAXSLOTS(pcib);
2654 	for (s = 0; s <= maxslots; s++) {
2655 		pcifunchigh = 0;
2656 		f = 0;
2657 		DELAY(1);
2658 		hdrtype = REG(PCIR_HDRTYPE, 1);
2659 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2660 			continue;
2661 		if (hdrtype & PCIM_MFDEV)
2662 			pcifunchigh = PCI_FUNCMAX;
2663 		for (f = 0; f <= pcifunchigh; f++) {
2664 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2665 			    dinfo_size);
2666 			if (dinfo != NULL) {
2667 				pci_add_child(dev, dinfo);
2668 			}
2669 		}
2670 	}
2671 #undef REG
2672 }
2673 
2674 void
2675 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2676 {
2677 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2678 	device_set_ivars(dinfo->cfg.dev, dinfo);
2679 	resource_list_init(&dinfo->resources);
2680 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2681 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2682 	pci_print_verbose(dinfo);
2683 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2684 }
2685 
2686 static int
2687 pci_probe(device_t dev)
2688 {
2689 
2690 	device_set_desc(dev, "PCI bus");
2691 
2692 	/* Allow other subclasses to override this driver. */
2693 	return (BUS_PROBE_GENERIC);
2694 }
2695 
2696 static int
2697 pci_attach(device_t dev)
2698 {
2699 	int busno, domain;
2700 
2701 	/*
2702 	 * Since there can be multiple independantly numbered PCI
2703 	 * busses on systems with multiple PCI domains, we can't use
2704 	 * the unit number to decide which bus we are probing. We ask
2705 	 * the parent pcib what our domain and bus numbers are.
2706 	 */
2707 	domain = pcib_get_domain(dev);
2708 	busno = pcib_get_bus(dev);
2709 	if (bootverbose)
2710 		device_printf(dev, "domain=%d, physical bus=%d\n",
2711 		    domain, busno);
2712 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2713 	return (bus_generic_attach(dev));
2714 }
2715 
2716 int
2717 pci_suspend(device_t dev)
2718 {
2719 	int dstate, error, i, numdevs;
2720 	device_t acpi_dev, child, *devlist;
2721 	struct pci_devinfo *dinfo;
2722 
2723 	/*
2724 	 * Save the PCI configuration space for each child and set the
2725 	 * device in the appropriate power state for this sleep state.
2726 	 */
2727 	acpi_dev = NULL;
2728 	if (pci_do_power_resume)
2729 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2730 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2731 		return (error);
2732 	for (i = 0; i < numdevs; i++) {
2733 		child = devlist[i];
2734 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2735 		pci_cfg_save(child, dinfo, 0);
2736 	}
2737 
2738 	/* Suspend devices before potentially powering them down. */
2739 	error = bus_generic_suspend(dev);
2740 	if (error) {
2741 		free(devlist, M_TEMP);
2742 		return (error);
2743 	}
2744 
2745 	/*
2746 	 * Always set the device to D3.  If ACPI suggests a different
2747 	 * power state, use it instead.  If ACPI is not present, the
2748 	 * firmware is responsible for managing device power.  Skip
2749 	 * children who aren't attached since they are powered down
2750 	 * separately.  Only manage type 0 devices for now.
2751 	 */
2752 	for (i = 0; acpi_dev && i < numdevs; i++) {
2753 		child = devlist[i];
2754 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2755 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2756 			dstate = PCI_POWERSTATE_D3;
2757 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2758 			pci_set_powerstate(child, dstate);
2759 		}
2760 	}
2761 	free(devlist, M_TEMP);
2762 	return (0);
2763 }
2764 
2765 int
2766 pci_resume(device_t dev)
2767 {
2768 	int i, numdevs, error;
2769 	device_t acpi_dev, child, *devlist;
2770 	struct pci_devinfo *dinfo;
2771 
2772 	/*
2773 	 * Set each child to D0 and restore its PCI configuration space.
2774 	 */
2775 	acpi_dev = NULL;
2776 	if (pci_do_power_resume)
2777 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2778 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2779 		return (error);
2780 	for (i = 0; i < numdevs; i++) {
2781 		/*
2782 		 * Notify ACPI we're going to D0 but ignore the result.  If
2783 		 * ACPI is not present, the firmware is responsible for
2784 		 * managing device power.  Only manage type 0 devices for now.
2785 		 */
2786 		child = devlist[i];
2787 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2788 		if (acpi_dev && device_is_attached(child) &&
2789 		    dinfo->cfg.hdrtype == 0) {
2790 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2791 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2792 		}
2793 
2794 		/* Now the device is powered up, restore its config space. */
2795 		pci_cfg_restore(child, dinfo);
2796 	}
2797 	free(devlist, M_TEMP);
2798 	return (bus_generic_resume(dev));
2799 }
2800 
2801 static void
2802 pci_load_vendor_data(void)
2803 {
2804 	caddr_t vendordata, info;
2805 
2806 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2807 		info = preload_search_info(vendordata, MODINFO_ADDR);
2808 		pci_vendordata = *(char **)info;
2809 		info = preload_search_info(vendordata, MODINFO_SIZE);
2810 		pci_vendordata_size = *(size_t *)info;
2811 		/* terminate the database */
2812 		pci_vendordata[pci_vendordata_size] = '\n';
2813 	}
2814 }
2815 
2816 void
2817 pci_driver_added(device_t dev, driver_t *driver)
2818 {
2819 	int numdevs;
2820 	device_t *devlist;
2821 	device_t child;
2822 	struct pci_devinfo *dinfo;
2823 	int i;
2824 
2825 	if (bootverbose)
2826 		device_printf(dev, "driver added\n");
2827 	DEVICE_IDENTIFY(driver, dev);
2828 	if (device_get_children(dev, &devlist, &numdevs) != 0)
2829 		return;
2830 	for (i = 0; i < numdevs; i++) {
2831 		child = devlist[i];
2832 		if (device_get_state(child) != DS_NOTPRESENT)
2833 			continue;
2834 		dinfo = device_get_ivars(child);
2835 		pci_print_verbose(dinfo);
2836 		if (bootverbose)
2837 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
2838 		pci_cfg_restore(child, dinfo);
2839 		if (device_probe_and_attach(child) != 0)
2840 			pci_cfg_save(child, dinfo, 1);
2841 	}
2842 	free(devlist, M_TEMP);
2843 }
2844 
2845 int
2846 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2847     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2848 {
2849 	struct pci_devinfo *dinfo;
2850 	struct msix_table_entry *mte;
2851 	struct msix_vector *mv;
2852 	uint64_t addr;
2853 	uint32_t data;
2854 	void *cookie;
2855 	int error, rid;
2856 
2857 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2858 	    arg, &cookie);
2859 	if (error)
2860 		return (error);
2861 
2862 	/* If this is not a direct child, just bail out. */
2863 	if (device_get_parent(child) != dev) {
2864 		*cookiep = cookie;
2865 		return(0);
2866 	}
2867 
2868 	rid = rman_get_rid(irq);
2869 	if (rid == 0) {
2870 		/* Make sure that INTx is enabled */
2871 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2872 	} else {
2873 		/*
2874 		 * Check to see if the interrupt is MSI or MSI-X.
2875 		 * Ask our parent to map the MSI and give
2876 		 * us the address and data register values.
2877 		 * If we fail for some reason, teardown the
2878 		 * interrupt handler.
2879 		 */
2880 		dinfo = device_get_ivars(child);
2881 		if (dinfo->cfg.msi.msi_alloc > 0) {
2882 			if (dinfo->cfg.msi.msi_addr == 0) {
2883 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2884 			    ("MSI has handlers, but vectors not mapped"));
2885 				error = PCIB_MAP_MSI(device_get_parent(dev),
2886 				    child, rman_get_start(irq), &addr, &data);
2887 				if (error)
2888 					goto bad;
2889 				dinfo->cfg.msi.msi_addr = addr;
2890 				dinfo->cfg.msi.msi_data = data;
2891 			}
2892 			if (dinfo->cfg.msi.msi_handlers == 0)
2893 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
2894 				    dinfo->cfg.msi.msi_data);
2895 			dinfo->cfg.msi.msi_handlers++;
2896 		} else {
2897 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2898 			    ("No MSI or MSI-X interrupts allocated"));
2899 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2900 			    ("MSI-X index too high"));
2901 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2902 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2903 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2904 			KASSERT(mv->mv_irq == rman_get_start(irq),
2905 			    ("IRQ mismatch"));
2906 			if (mv->mv_address == 0) {
2907 				KASSERT(mte->mte_handlers == 0,
2908 		    ("MSI-X table entry has handlers, but vector not mapped"));
2909 				error = PCIB_MAP_MSI(device_get_parent(dev),
2910 				    child, rman_get_start(irq), &addr, &data);
2911 				if (error)
2912 					goto bad;
2913 				mv->mv_address = addr;
2914 				mv->mv_data = data;
2915 			}
2916 			if (mte->mte_handlers == 0) {
2917 				pci_enable_msix(child, rid - 1, mv->mv_address,
2918 				    mv->mv_data);
2919 				pci_unmask_msix(child, rid - 1);
2920 			}
2921 			mte->mte_handlers++;
2922 		}
2923 
2924 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2925 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2926 	bad:
2927 		if (error) {
2928 			(void)bus_generic_teardown_intr(dev, child, irq,
2929 			    cookie);
2930 			return (error);
2931 		}
2932 	}
2933 	*cookiep = cookie;
2934 	return (0);
2935 }
2936 
2937 int
2938 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2939     void *cookie)
2940 {
2941 	struct msix_table_entry *mte;
2942 	struct resource_list_entry *rle;
2943 	struct pci_devinfo *dinfo;
2944 	int error, rid;
2945 
2946 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2947 		return (EINVAL);
2948 
2949 	/* If this isn't a direct child, just bail out */
2950 	if (device_get_parent(child) != dev)
2951 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2952 
2953 	rid = rman_get_rid(irq);
2954 	if (rid == 0) {
2955 		/* Mask INTx */
2956 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2957 	} else {
2958 		/*
2959 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2960 		 * decrement the appropriate handlers count and mask the
2961 		 * MSI-X message, or disable MSI messages if the count
2962 		 * drops to 0.
2963 		 */
2964 		dinfo = device_get_ivars(child);
2965 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2966 		if (rle->res != irq)
2967 			return (EINVAL);
2968 		if (dinfo->cfg.msi.msi_alloc > 0) {
2969 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2970 			    ("MSI-X index too high"));
2971 			if (dinfo->cfg.msi.msi_handlers == 0)
2972 				return (EINVAL);
2973 			dinfo->cfg.msi.msi_handlers--;
2974 			if (dinfo->cfg.msi.msi_handlers == 0)
2975 				pci_disable_msi(child);
2976 		} else {
2977 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2978 			    ("No MSI or MSI-X interrupts allocated"));
2979 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2980 			    ("MSI-X index too high"));
2981 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2982 			if (mte->mte_handlers == 0)
2983 				return (EINVAL);
2984 			mte->mte_handlers--;
2985 			if (mte->mte_handlers == 0)
2986 				pci_mask_msix(child, rid - 1);
2987 		}
2988 	}
2989 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2990 	if (rid > 0)
2991 		KASSERT(error == 0,
2992 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2993 	return (error);
2994 }
2995 
2996 int
2997 pci_print_child(device_t dev, device_t child)
2998 {
2999 	struct pci_devinfo *dinfo;
3000 	struct resource_list *rl;
3001 	int retval = 0;
3002 
3003 	dinfo = device_get_ivars(child);
3004 	rl = &dinfo->resources;
3005 
3006 	retval += bus_print_child_header(dev, child);
3007 
3008 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3009 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3010 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3011 	if (device_get_flags(dev))
3012 		retval += printf(" flags %#x", device_get_flags(dev));
3013 
3014 	retval += printf(" at device %d.%d", pci_get_slot(child),
3015 	    pci_get_function(child));
3016 
3017 	retval += bus_print_child_footer(dev, child);
3018 
3019 	return (retval);
3020 }
3021 
3022 static struct
3023 {
3024 	int	class;
3025 	int	subclass;
3026 	char	*desc;
3027 } pci_nomatch_tab[] = {
3028 	{PCIC_OLD,		-1,			"old"},
3029 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3030 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3031 	{PCIC_STORAGE,		-1,			"mass storage"},
3032 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3033 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3034 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3035 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3036 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3037 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3038 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3039 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3040 	{PCIC_NETWORK,		-1,			"network"},
3041 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3042 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3043 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3044 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3045 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3046 	{PCIC_DISPLAY,		-1,			"display"},
3047 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3048 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3049 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3050 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3051 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3052 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3053 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3054 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3055 	{PCIC_MEMORY,		-1,			"memory"},
3056 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3057 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3058 	{PCIC_BRIDGE,		-1,			"bridge"},
3059 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3060 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3061 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3062 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3063 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3064 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3065 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3066 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3067 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3068 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3069 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3070 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3071 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3072 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3073 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3074 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3075 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3076 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3077 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3078 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3079 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3080 	{PCIC_INPUTDEV,		-1,			"input device"},
3081 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3082 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3083 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3084 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3085 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3086 	{PCIC_DOCKING,		-1,			"docking station"},
3087 	{PCIC_PROCESSOR,	-1,			"processor"},
3088 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3089 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3090 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3091 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3092 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3093 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3094 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3095 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3096 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3097 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3098 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3099 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3100 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3101 	{PCIC_SATCOM,		-1,			"satellite communication"},
3102 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3103 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3104 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3105 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3106 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3107 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3108 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3109 	{PCIC_DASP,		-1,			"dasp"},
3110 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3111 	{0, 0,		NULL}
3112 };
3113 
3114 void
3115 pci_probe_nomatch(device_t dev, device_t child)
3116 {
3117 	int	i;
3118 	char	*cp, *scp, *device;
3119 
3120 	/*
3121 	 * Look for a listing for this device in a loaded device database.
3122 	 */
3123 	if ((device = pci_describe_device(child)) != NULL) {
3124 		device_printf(dev, "<%s>", device);
3125 		free(device, M_DEVBUF);
3126 	} else {
3127 		/*
3128 		 * Scan the class/subclass descriptions for a general
3129 		 * description.
3130 		 */
3131 		cp = "unknown";
3132 		scp = NULL;
3133 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3134 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3135 				if (pci_nomatch_tab[i].subclass == -1) {
3136 					cp = pci_nomatch_tab[i].desc;
3137 				} else if (pci_nomatch_tab[i].subclass ==
3138 				    pci_get_subclass(child)) {
3139 					scp = pci_nomatch_tab[i].desc;
3140 				}
3141 			}
3142 		}
3143 		device_printf(dev, "<%s%s%s>",
3144 		    cp ? cp : "",
3145 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3146 		    scp ? scp : "");
3147 	}
3148 	printf(" at device %d.%d (no driver attached)\n",
3149 	    pci_get_slot(child), pci_get_function(child));
3150 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3151 	return;
3152 }
3153 
3154 /*
3155  * Parse the PCI device database, if loaded, and return a pointer to a
3156  * description of the device.
3157  *
3158  * The database is flat text formatted as follows:
3159  *
3160  * Any line not in a valid format is ignored.
3161  * Lines are terminated with newline '\n' characters.
3162  *
3163  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3164  * the vendor name.
3165  *
3166  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3167  * - devices cannot be listed without a corresponding VENDOR line.
3168  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3169  * another TAB, then the device name.
3170  */
3171 
3172 /*
3173  * Assuming (ptr) points to the beginning of a line in the database,
3174  * return the vendor or device and description of the next entry.
3175  * The value of (vendor) or (device) inappropriate for the entry type
3176  * is set to -1.  Returns nonzero at the end of the database.
3177  *
3178  * Note that this is slightly unrobust in the face of corrupt data;
3179  * we attempt to safeguard against this by spamming the end of the
3180  * database with a newline when we initialise.
3181  */
3182 static int
3183 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3184 {
3185 	char	*cp = *ptr;
3186 	int	left;
3187 
3188 	*device = -1;
3189 	*vendor = -1;
3190 	**desc = '\0';
3191 	for (;;) {
3192 		left = pci_vendordata_size - (cp - pci_vendordata);
3193 		if (left <= 0) {
3194 			*ptr = cp;
3195 			return(1);
3196 		}
3197 
3198 		/* vendor entry? */
3199 		if (*cp != '\t' &&
3200 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3201 			break;
3202 		/* device entry? */
3203 		if (*cp == '\t' &&
3204 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3205 			break;
3206 
3207 		/* skip to next line */
3208 		while (*cp != '\n' && left > 0) {
3209 			cp++;
3210 			left--;
3211 		}
3212 		if (*cp == '\n') {
3213 			cp++;
3214 			left--;
3215 		}
3216 	}
3217 	/* skip to next line */
3218 	while (*cp != '\n' && left > 0) {
3219 		cp++;
3220 		left--;
3221 	}
3222 	if (*cp == '\n' && left > 0)
3223 		cp++;
3224 	*ptr = cp;
3225 	return(0);
3226 }
3227 
3228 static char *
3229 pci_describe_device(device_t dev)
3230 {
3231 	int	vendor, device;
3232 	char	*desc, *vp, *dp, *line;
3233 
3234 	desc = vp = dp = NULL;
3235 
3236 	/*
3237 	 * If we have no vendor data, we can't do anything.
3238 	 */
3239 	if (pci_vendordata == NULL)
3240 		goto out;
3241 
3242 	/*
3243 	 * Scan the vendor data looking for this device
3244 	 */
3245 	line = pci_vendordata;
3246 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3247 		goto out;
3248 	for (;;) {
3249 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3250 			goto out;
3251 		if (vendor == pci_get_vendor(dev))
3252 			break;
3253 	}
3254 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3255 		goto out;
3256 	for (;;) {
3257 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3258 			*dp = 0;
3259 			break;
3260 		}
3261 		if (vendor != -1) {
3262 			*dp = 0;
3263 			break;
3264 		}
3265 		if (device == pci_get_device(dev))
3266 			break;
3267 	}
3268 	if (dp[0] == '\0')
3269 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3270 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3271 	    NULL)
3272 		sprintf(desc, "%s, %s", vp, dp);
3273  out:
3274 	if (vp != NULL)
3275 		free(vp, M_DEVBUF);
3276 	if (dp != NULL)
3277 		free(dp, M_DEVBUF);
3278 	return(desc);
3279 }
3280 
3281 int
3282 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3283 {
3284 	struct pci_devinfo *dinfo;
3285 	pcicfgregs *cfg;
3286 
3287 	dinfo = device_get_ivars(child);
3288 	cfg = &dinfo->cfg;
3289 
3290 	switch (which) {
3291 	case PCI_IVAR_ETHADDR:
3292 		/*
3293 		 * The generic accessor doesn't deal with failure, so
3294 		 * we set the return value, then return an error.
3295 		 */
3296 		*((uint8_t **) result) = NULL;
3297 		return (EINVAL);
3298 	case PCI_IVAR_SUBVENDOR:
3299 		*result = cfg->subvendor;
3300 		break;
3301 	case PCI_IVAR_SUBDEVICE:
3302 		*result = cfg->subdevice;
3303 		break;
3304 	case PCI_IVAR_VENDOR:
3305 		*result = cfg->vendor;
3306 		break;
3307 	case PCI_IVAR_DEVICE:
3308 		*result = cfg->device;
3309 		break;
3310 	case PCI_IVAR_DEVID:
3311 		*result = (cfg->device << 16) | cfg->vendor;
3312 		break;
3313 	case PCI_IVAR_CLASS:
3314 		*result = cfg->baseclass;
3315 		break;
3316 	case PCI_IVAR_SUBCLASS:
3317 		*result = cfg->subclass;
3318 		break;
3319 	case PCI_IVAR_PROGIF:
3320 		*result = cfg->progif;
3321 		break;
3322 	case PCI_IVAR_REVID:
3323 		*result = cfg->revid;
3324 		break;
3325 	case PCI_IVAR_INTPIN:
3326 		*result = cfg->intpin;
3327 		break;
3328 	case PCI_IVAR_IRQ:
3329 		*result = cfg->intline;
3330 		break;
3331 	case PCI_IVAR_DOMAIN:
3332 		*result = cfg->domain;
3333 		break;
3334 	case PCI_IVAR_BUS:
3335 		*result = cfg->bus;
3336 		break;
3337 	case PCI_IVAR_SLOT:
3338 		*result = cfg->slot;
3339 		break;
3340 	case PCI_IVAR_FUNCTION:
3341 		*result = cfg->func;
3342 		break;
3343 	case PCI_IVAR_CMDREG:
3344 		*result = cfg->cmdreg;
3345 		break;
3346 	case PCI_IVAR_CACHELNSZ:
3347 		*result = cfg->cachelnsz;
3348 		break;
3349 	case PCI_IVAR_MINGNT:
3350 		*result = cfg->mingnt;
3351 		break;
3352 	case PCI_IVAR_MAXLAT:
3353 		*result = cfg->maxlat;
3354 		break;
3355 	case PCI_IVAR_LATTIMER:
3356 		*result = cfg->lattimer;
3357 		break;
3358 	default:
3359 		return (ENOENT);
3360 	}
3361 	return (0);
3362 }
3363 
3364 int
3365 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3366 {
3367 	struct pci_devinfo *dinfo;
3368 
3369 	dinfo = device_get_ivars(child);
3370 
3371 	switch (which) {
3372 	case PCI_IVAR_INTPIN:
3373 		dinfo->cfg.intpin = value;
3374 		return (0);
3375 	case PCI_IVAR_ETHADDR:
3376 	case PCI_IVAR_SUBVENDOR:
3377 	case PCI_IVAR_SUBDEVICE:
3378 	case PCI_IVAR_VENDOR:
3379 	case PCI_IVAR_DEVICE:
3380 	case PCI_IVAR_DEVID:
3381 	case PCI_IVAR_CLASS:
3382 	case PCI_IVAR_SUBCLASS:
3383 	case PCI_IVAR_PROGIF:
3384 	case PCI_IVAR_REVID:
3385 	case PCI_IVAR_IRQ:
3386 	case PCI_IVAR_DOMAIN:
3387 	case PCI_IVAR_BUS:
3388 	case PCI_IVAR_SLOT:
3389 	case PCI_IVAR_FUNCTION:
3390 		return (EINVAL);	/* disallow for now */
3391 
3392 	default:
3393 		return (ENOENT);
3394 	}
3395 }
3396 
3397 
3398 #include "opt_ddb.h"
3399 #ifdef DDB
3400 #include <ddb/ddb.h>
3401 #include <sys/cons.h>
3402 
3403 /*
3404  * List resources based on pci map registers, used for within ddb
3405  */
3406 
3407 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3408 {
3409 	struct pci_devinfo *dinfo;
3410 	struct devlist *devlist_head;
3411 	struct pci_conf *p;
3412 	const char *name;
3413 	int i, error, none_count;
3414 
3415 	none_count = 0;
3416 	/* get the head of the device queue */
3417 	devlist_head = &pci_devq;
3418 
3419 	/*
3420 	 * Go through the list of devices and print out devices
3421 	 */
3422 	for (error = 0, i = 0,
3423 	     dinfo = STAILQ_FIRST(devlist_head);
3424 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3425 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3426 
3427 		/* Populate pd_name and pd_unit */
3428 		name = NULL;
3429 		if (dinfo->cfg.dev)
3430 			name = device_get_name(dinfo->cfg.dev);
3431 
3432 		p = &dinfo->conf;
3433 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3434 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3435 			(name && *name) ? name : "none",
3436 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3437 			none_count++,
3438 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3439 			p->pc_sel.pc_func, (p->pc_class << 16) |
3440 			(p->pc_subclass << 8) | p->pc_progif,
3441 			(p->pc_subdevice << 16) | p->pc_subvendor,
3442 			(p->pc_device << 16) | p->pc_vendor,
3443 			p->pc_revid, p->pc_hdr);
3444 	}
3445 }
3446 #endif /* DDB */
3447 
3448 static struct resource *
3449 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3450     u_long start, u_long end, u_long count, u_int flags)
3451 {
3452 	struct pci_devinfo *dinfo = device_get_ivars(child);
3453 	struct resource_list *rl = &dinfo->resources;
3454 	struct resource_list_entry *rle;
3455 	struct resource *res;
3456 	pci_addr_t map, testval;
3457 	int mapsize;
3458 
3459 	/*
3460 	 * Weed out the bogons, and figure out how large the BAR/map
3461 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3462 	 * Note: atapci in legacy mode are special and handled elsewhere
3463 	 * in the code.  If you have a atapci device in legacy mode and
3464 	 * it fails here, that other code is broken.
3465 	 */
3466 	res = NULL;
3467 	pci_read_bar(child, *rid, &map, &testval);
3468 
3469 	/* Ignore a BAR with a base of 0. */
3470 	if (pci_mapbase(testval) == 0)
3471 		goto out;
3472 
3473 	if (PCI_BAR_MEM(testval)) {
3474 		if (type != SYS_RES_MEMORY) {
3475 			if (bootverbose)
3476 				device_printf(dev,
3477 				    "child %s requested type %d for rid %#x,"
3478 				    " but the BAR says it is an memio\n",
3479 				    device_get_nameunit(child), type, *rid);
3480 			goto out;
3481 		}
3482 	} else {
3483 		if (type != SYS_RES_IOPORT) {
3484 			if (bootverbose)
3485 				device_printf(dev,
3486 				    "child %s requested type %d for rid %#x,"
3487 				    " but the BAR says it is an ioport\n",
3488 				    device_get_nameunit(child), type, *rid);
3489 			goto out;
3490 		}
3491 	}
3492 
3493 	/*
3494 	 * For real BARs, we need to override the size that
3495 	 * the driver requests, because that's what the BAR
3496 	 * actually uses and we would otherwise have a
3497 	 * situation where we might allocate the excess to
3498 	 * another driver, which won't work.
3499 	 */
3500 	mapsize = pci_mapsize(testval);
3501 	count = 1UL << mapsize;
3502 	if (RF_ALIGNMENT(flags) < mapsize)
3503 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3504 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3505 		flags |= RF_PREFETCHABLE;
3506 
3507 	/*
3508 	 * Allocate enough resource, and then write back the
3509 	 * appropriate bar for that resource.
3510 	 */
3511 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3512 	    start, end, count, flags & ~RF_ACTIVE);
3513 	if (res == NULL) {
3514 		device_printf(child,
3515 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3516 		    count, *rid, type, start, end);
3517 		goto out;
3518 	}
3519 	rman_set_device(res, dev);
3520 	resource_list_add(rl, type, *rid, start, end, count);
3521 	rle = resource_list_find(rl, type, *rid);
3522 	if (rle == NULL)
3523 		panic("pci_alloc_map: unexpectedly can't find resource.");
3524 	rle->res = res;
3525 	rle->start = rman_get_start(res);
3526 	rle->end = rman_get_end(res);
3527 	rle->count = count;
3528 	if (bootverbose)
3529 		device_printf(child,
3530 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3531 		    count, *rid, type, rman_get_start(res));
3532 	map = rman_get_start(res);
3533 	pci_write_bar(child, *rid, map);
3534 out:;
3535 	return (res);
3536 }
3537 
3538 
3539 struct resource *
3540 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3541 		   u_long start, u_long end, u_long count, u_int flags)
3542 {
3543 	struct pci_devinfo *dinfo = device_get_ivars(child);
3544 	struct resource_list *rl = &dinfo->resources;
3545 	struct resource_list_entry *rle;
3546 	struct resource *res;
3547 	pcicfgregs *cfg = &dinfo->cfg;
3548 
3549 	if (device_get_parent(child) != dev)
3550 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3551 		    type, rid, start, end, count, flags));
3552 
3553 	/*
3554 	 * Perform lazy resource allocation
3555 	 */
3556 	switch (type) {
3557 	case SYS_RES_IRQ:
3558 		/*
3559 		 * Can't alloc legacy interrupt once MSI messages have
3560 		 * been allocated.
3561 		 */
3562 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3563 		    cfg->msix.msix_alloc > 0))
3564 			return (NULL);
3565 
3566 		/*
3567 		 * If the child device doesn't have an interrupt
3568 		 * routed and is deserving of an interrupt, try to
3569 		 * assign it one.
3570 		 */
3571 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3572 		    (cfg->intpin != 0))
3573 			pci_assign_interrupt(dev, child, 0);
3574 		break;
3575 	case SYS_RES_IOPORT:
3576 	case SYS_RES_MEMORY:
3577 		/* Allocate resources for this BAR if needed. */
3578 		rle = resource_list_find(rl, type, *rid);
3579 		if (rle == NULL) {
3580 			res = pci_alloc_map(dev, child, type, rid, start, end,
3581 			    count, flags);
3582 			if (res == NULL)
3583 				return (NULL);
3584 			rle = resource_list_find(rl, type, *rid);
3585 		}
3586 
3587 		/*
3588 		 * If the resource belongs to the bus, then give it to
3589 		 * the child.  We need to activate it if requested
3590 		 * since the bus always allocates inactive resources.
3591 		 */
3592 		if (rle != NULL && rle->res != NULL &&
3593 		    rman_get_device(rle->res) == dev) {
3594 			if (bootverbose)
3595 				device_printf(child,
3596 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3597 				    rman_get_size(rle->res), *rid, type,
3598 				    rman_get_start(rle->res));
3599 			rman_set_device(rle->res, child);
3600 			if ((flags & RF_ACTIVE) &&
3601 			    bus_activate_resource(child, type, *rid,
3602 			    rle->res) != 0)
3603 				return (NULL);
3604 			return (rle->res);
3605 		}
3606 	}
3607 	return (resource_list_alloc(rl, dev, child, type, rid,
3608 	    start, end, count, flags));
3609 }
3610 
3611 int
3612 pci_release_resource(device_t dev, device_t child, int type, int rid,
3613     struct resource *r)
3614 {
3615 	int error;
3616 
3617 	if (device_get_parent(child) != dev)
3618 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3619 		    type, rid, r));
3620 
3621 	/*
3622 	 * For BARs we don't actually want to release the resource.
3623 	 * Instead, we deactivate the resource if needed and then give
3624 	 * ownership of the BAR back to the bus.
3625 	 */
3626 	switch (type) {
3627 	case SYS_RES_IOPORT:
3628 	case SYS_RES_MEMORY:
3629 		if (rman_get_device(r) != child)
3630 			return (EINVAL);
3631 		if (rman_get_flags(r) & RF_ACTIVE) {
3632 			error = bus_deactivate_resource(child, type, rid, r);
3633 			if (error)
3634 				return (error);
3635 		}
3636 		rman_set_device(r, dev);
3637 		return (0);
3638 	}
3639 	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3640 }
3641 
3642 int
3643 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3644     struct resource *r)
3645 {
3646 	int error;
3647 
3648 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3649 	if (error)
3650 		return (error);
3651 
3652 	/* Enable decoding in the command register when activating BARs. */
3653 	if (device_get_parent(child) == dev) {
3654 		switch (type) {
3655 		case SYS_RES_IOPORT:
3656 		case SYS_RES_MEMORY:
3657 			error = PCI_ENABLE_IO(dev, child, type);
3658 			break;
3659 		}
3660 	}
3661 	return (error);
3662 }
3663 
3664 void
3665 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3666 {
3667 	struct pci_devinfo *dinfo;
3668 	struct resource_list *rl;
3669 	struct resource_list_entry *rle;
3670 
3671 	if (device_get_parent(child) != dev)
3672 		return;
3673 
3674 	dinfo = device_get_ivars(child);
3675 	rl = &dinfo->resources;
3676 	rle = resource_list_find(rl, type, rid);
3677 	if (rle == NULL)
3678 		return;
3679 
3680 	if (rle->res) {
3681 		if (rman_get_device(rle->res) != dev ||
3682 		    rman_get_flags(rle->res) & RF_ACTIVE) {
3683 			device_printf(dev, "delete_resource: "
3684 			    "Resource still owned by child, oops. "
3685 			    "(type=%d, rid=%d, addr=%lx)\n",
3686 			    rle->type, rle->rid,
3687 			    rman_get_start(rle->res));
3688 			return;
3689 		}
3690 
3691 #ifndef __PCI_BAR_ZERO_VALID
3692 		/*
3693 		 * If this is a BAR, clear the BAR so it stops
3694 		 * decoding before releasing the resource.
3695 		 */
3696 		switch (type) {
3697 		case SYS_RES_IOPORT:
3698 		case SYS_RES_MEMORY:
3699 			pci_write_bar(child, rid, 0);
3700 			break;
3701 		}
3702 #endif
3703 		bus_release_resource(dev, type, rid, rle->res);
3704 	}
3705 	resource_list_delete(rl, type, rid);
3706 }
3707 
3708 struct resource_list *
3709 pci_get_resource_list (device_t dev, device_t child)
3710 {
3711 	struct pci_devinfo *dinfo = device_get_ivars(child);
3712 
3713 	return (&dinfo->resources);
3714 }
3715 
3716 uint32_t
3717 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3718 {
3719 	struct pci_devinfo *dinfo = device_get_ivars(child);
3720 	pcicfgregs *cfg = &dinfo->cfg;
3721 
3722 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3723 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3724 }
3725 
3726 void
3727 pci_write_config_method(device_t dev, device_t child, int reg,
3728     uint32_t val, int width)
3729 {
3730 	struct pci_devinfo *dinfo = device_get_ivars(child);
3731 	pcicfgregs *cfg = &dinfo->cfg;
3732 
3733 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3734 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3735 }
3736 
3737 int
3738 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3739     size_t buflen)
3740 {
3741 
3742 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3743 	    pci_get_function(child));
3744 	return (0);
3745 }
3746 
3747 int
3748 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3749     size_t buflen)
3750 {
3751 	struct pci_devinfo *dinfo;
3752 	pcicfgregs *cfg;
3753 
3754 	dinfo = device_get_ivars(child);
3755 	cfg = &dinfo->cfg;
3756 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3757 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3758 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3759 	    cfg->progif);
3760 	return (0);
3761 }
3762 
3763 int
3764 pci_assign_interrupt_method(device_t dev, device_t child)
3765 {
3766 	struct pci_devinfo *dinfo = device_get_ivars(child);
3767 	pcicfgregs *cfg = &dinfo->cfg;
3768 
3769 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3770 	    cfg->intpin));
3771 }
3772 
3773 static int
3774 pci_modevent(module_t mod, int what, void *arg)
3775 {
3776 	static struct cdev *pci_cdev;
3777 
3778 	switch (what) {
3779 	case MOD_LOAD:
3780 		STAILQ_INIT(&pci_devq);
3781 		pci_generation = 0;
3782 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3783 		    "pci");
3784 		pci_load_vendor_data();
3785 		break;
3786 
3787 	case MOD_UNLOAD:
3788 		destroy_dev(pci_cdev);
3789 		break;
3790 	}
3791 
3792 	return (0);
3793 }
3794 
3795 void
3796 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3797 {
3798 	int i;
3799 
3800 	/*
3801 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3802 	 * which we know need special treatment.  Type 2 devices are
3803 	 * cardbus bridges which also require special treatment.
3804 	 * Other types are unknown, and we err on the side of safety
3805 	 * by ignoring them.
3806 	 */
3807 	if (dinfo->cfg.hdrtype != 0)
3808 		return;
3809 
3810 	/*
3811 	 * Restore the device to full power mode.  We must do this
3812 	 * before we restore the registers because moving from D3 to
3813 	 * D0 will cause the chip's BARs and some other registers to
3814 	 * be reset to some unknown power on reset values.  Cut down
3815 	 * the noise on boot by doing nothing if we are already in
3816 	 * state D0.
3817 	 */
3818 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3819 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3820 	}
3821 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3822 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3823 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3824 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3825 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3826 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3827 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3828 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3829 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3830 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3831 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3832 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3833 
3834 	/* Restore MSI and MSI-X configurations if they are present. */
3835 	if (dinfo->cfg.msi.msi_location != 0)
3836 		pci_resume_msi(dev);
3837 	if (dinfo->cfg.msix.msix_location != 0)
3838 		pci_resume_msix(dev);
3839 }
3840 
3841 void
3842 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3843 {
3844 	int i;
3845 	uint32_t cls;
3846 	int ps;
3847 
3848 	/*
3849 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3850 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3851 	 * which also require special treatment.  Other types are unknown, and
3852 	 * we err on the side of safety by ignoring them.  Powering down
3853 	 * bridges should not be undertaken lightly.
3854 	 */
3855 	if (dinfo->cfg.hdrtype != 0)
3856 		return;
3857 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3858 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3859 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3860 
3861 	/*
3862 	 * Some drivers apparently write to these registers w/o updating our
3863 	 * cached copy.  No harm happens if we update the copy, so do so here
3864 	 * so we can restore them.  The COMMAND register is modified by the
3865 	 * bus w/o updating the cache.  This should represent the normally
3866 	 * writable portion of the 'defined' part of type 0 headers.  In
3867 	 * theory we also need to save/restore the PCI capability structures
3868 	 * we know about, but apart from power we don't know any that are
3869 	 * writable.
3870 	 */
3871 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3872 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3873 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3874 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3875 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3876 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3877 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3878 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3879 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3880 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3881 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3882 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3883 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3884 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3885 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3886 
3887 	/*
3888 	 * don't set the state for display devices, base peripherals and
3889 	 * memory devices since bad things happen when they are powered down.
3890 	 * We should (a) have drivers that can easily detach and (b) use
3891 	 * generic drivers for these devices so that some device actually
3892 	 * attaches.  We need to make sure that when we implement (a) we don't
3893 	 * power the device down on a reattach.
3894 	 */
3895 	cls = pci_get_class(dev);
3896 	if (!setstate)
3897 		return;
3898 	switch (pci_do_power_nodriver)
3899 	{
3900 		case 0:		/* NO powerdown at all */
3901 			return;
3902 		case 1:		/* Conservative about what to power down */
3903 			if (cls == PCIC_STORAGE)
3904 				return;
3905 			/*FALLTHROUGH*/
3906 		case 2:		/* Agressive about what to power down */
3907 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3908 			    cls == PCIC_BASEPERIPH)
3909 				return;
3910 			/*FALLTHROUGH*/
3911 		case 3:		/* Power down everything */
3912 			break;
3913 	}
3914 	/*
3915 	 * PCI spec says we can only go into D3 state from D0 state.
3916 	 * Transition from D[12] into D0 before going to D3 state.
3917 	 */
3918 	ps = pci_get_powerstate(dev);
3919 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3920 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3921 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3922 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3923 }
3924