xref: /freebsd/sys/dev/pci/pci.c (revision dc60165b73e4c4d829a2cb9fed5cce585e93d9a9)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static pci_addr_t	pci_mapbase(uint64_t mapreg);
75 static const char	*pci_maptype(uint64_t mapreg);
76 static int		pci_mapsize(uint64_t testval);
77 static int		pci_maprange(uint64_t mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 
115 static device_method_t pci_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_probe,		pci_probe),
118 	DEVMETHOD(device_attach,	pci_attach),
119 	DEVMETHOD(device_detach,	bus_generic_detach),
120 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121 	DEVMETHOD(device_suspend,	pci_suspend),
122 	DEVMETHOD(device_resume,	pci_resume),
123 
124 	/* Bus interface */
125 	DEVMETHOD(bus_print_child,	pci_print_child),
126 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129 	DEVMETHOD(bus_driver_added,	pci_driver_added),
130 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132 
133 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
140 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143 
144 	/* PCI interface */
145 	DEVMETHOD(pci_read_config,	pci_read_config_method),
146 	DEVMETHOD(pci_write_config,	pci_write_config_method),
147 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163 
164 	{ 0, 0 }
165 };
166 
167 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168 
169 static devclass_t pci_devclass;
170 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171 MODULE_VERSION(pci, 1);
172 
173 static char	*pci_vendordata;
174 static size_t	pci_vendordata_size;
175 
176 
177 struct pci_quirk {
178 	uint32_t devid;	/* Vendor/device of the card */
179 	int	type;
180 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182 	int	arg1;
183 	int	arg2;
184 };
185 
186 struct pci_quirk pci_quirks[] = {
187 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 
193 	/*
194 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196 	 */
197 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 
200 	/*
201 	 * MSI doesn't work on earlier Intel chipsets including
202 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203 	 */
204 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 
212 	/*
213 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214 	 * bridge.
215 	 */
216 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	{ 0 }
219 };
220 
221 /* map register information */
222 #define	PCI_MAPMEM	0x01	/* memory map */
223 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224 #define	PCI_MAPPORT	0x04	/* port map */
225 
226 struct devlist pci_devq;
227 uint32_t pci_generation;
228 uint32_t pci_numdevs = 0;
229 static int pcie_chipset, pcix_chipset;
230 
231 /* sysctl vars */
232 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233 
234 static int pci_enable_io_modes = 1;
235 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237     &pci_enable_io_modes, 1,
238     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239 enable these bits correctly.  We'd like to do this all the time, but there\n\
240 are some peripherals that this causes problems with.");
241 
242 static int pci_do_power_nodriver = 0;
243 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245     &pci_do_power_nodriver, 0,
246   "Place a function into D3 state when no driver attaches to it.  0 means\n\
247 disable.  1 means conservatively place devices into D3 state.  2 means\n\
248 agressively place devices into D3 state.  3 means put absolutely everything\n\
249 in D3 state.");
250 
251 static int pci_do_power_resume = 1;
252 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254     &pci_do_power_resume, 1,
255   "Transition from D3 -> D0 on resume.");
256 
257 static int pci_do_msi = 1;
258 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260     "Enable support for MSI interrupts");
261 
262 static int pci_do_msix = 1;
263 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265     "Enable support for MSI-X interrupts");
266 
267 static int pci_honor_msi_blacklist = 1;
268 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271 
272 /* Find a device_t by bus/slot/function in domain 0 */
273 
274 device_t
275 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276 {
277 
278 	return (pci_find_dbsf(0, bus, slot, func));
279 }
280 
281 /* Find a device_t by domain/bus/slot/function */
282 
283 device_t
284 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285 {
286 	struct pci_devinfo *dinfo;
287 
288 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289 		if ((dinfo->cfg.domain == domain) &&
290 		    (dinfo->cfg.bus == bus) &&
291 		    (dinfo->cfg.slot == slot) &&
292 		    (dinfo->cfg.func == func)) {
293 			return (dinfo->cfg.dev);
294 		}
295 	}
296 
297 	return (NULL);
298 }
299 
300 /* Find a device_t by vendor/device ID */
301 
302 device_t
303 pci_find_device(uint16_t vendor, uint16_t device)
304 {
305 	struct pci_devinfo *dinfo;
306 
307 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308 		if ((dinfo->cfg.vendor == vendor) &&
309 		    (dinfo->cfg.device == device)) {
310 			return (dinfo->cfg.dev);
311 		}
312 	}
313 
314 	return (NULL);
315 }
316 
317 /* return base address of memory or port map */
318 
319 static pci_addr_t
320 pci_mapbase(uint64_t mapreg)
321 {
322 
323 	if (PCI_BAR_MEM(mapreg))
324 		return (mapreg & PCIM_BAR_MEM_BASE);
325 	else
326 		return (mapreg & PCIM_BAR_IO_BASE);
327 }
328 
329 /* return map type of memory or port map */
330 
331 static const char *
332 pci_maptype(uint64_t mapreg)
333 {
334 
335 	if (PCI_BAR_IO(mapreg))
336 		return ("I/O Port");
337 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
338 		return ("Prefetchable Memory");
339 	return ("Memory");
340 }
341 
342 /* return log2 of map size decoded for memory or port map */
343 
344 static int
345 pci_mapsize(uint64_t testval)
346 {
347 	int ln2size;
348 
349 	testval = pci_mapbase(testval);
350 	ln2size = 0;
351 	if (testval != 0) {
352 		while ((testval & 1) == 0)
353 		{
354 			ln2size++;
355 			testval >>= 1;
356 		}
357 	}
358 	return (ln2size);
359 }
360 
361 /* return log2 of address range supported by map register */
362 
363 static int
364 pci_maprange(uint64_t mapreg)
365 {
366 	int ln2range = 0;
367 
368 	if (PCI_BAR_IO(mapreg))
369 		ln2range = 32;
370 	else
371 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
372 		case PCIM_BAR_MEM_32:
373 			ln2range = 32;
374 			break;
375 		case PCIM_BAR_MEM_1MB:
376 			ln2range = 20;
377 			break;
378 		case PCIM_BAR_MEM_64:
379 			ln2range = 64;
380 			break;
381 		}
382 	return (ln2range);
383 }
384 
385 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
386 
387 static void
388 pci_fixancient(pcicfgregs *cfg)
389 {
390 	if (cfg->hdrtype != 0)
391 		return;
392 
393 	/* PCI to PCI bridges use header type 1 */
394 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
395 		cfg->hdrtype = 1;
396 }
397 
398 /* extract header type specific config data */
399 
400 static void
401 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
402 {
403 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
404 	switch (cfg->hdrtype) {
405 	case 0:
406 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
407 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
408 		cfg->nummaps	    = PCI_MAXMAPS_0;
409 		break;
410 	case 1:
411 		cfg->nummaps	    = PCI_MAXMAPS_1;
412 		break;
413 	case 2:
414 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
415 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
416 		cfg->nummaps	    = PCI_MAXMAPS_2;
417 		break;
418 	}
419 #undef REG
420 }
421 
422 /* read configuration header into pcicfgregs structure */
423 struct pci_devinfo *
424 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
425 {
426 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
427 	pcicfgregs *cfg = NULL;
428 	struct pci_devinfo *devlist_entry;
429 	struct devlist *devlist_head;
430 
431 	devlist_head = &pci_devq;
432 
433 	devlist_entry = NULL;
434 
435 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
436 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
437 		if (devlist_entry == NULL)
438 			return (NULL);
439 
440 		cfg = &devlist_entry->cfg;
441 
442 		cfg->domain		= d;
443 		cfg->bus		= b;
444 		cfg->slot		= s;
445 		cfg->func		= f;
446 		cfg->vendor		= REG(PCIR_VENDOR, 2);
447 		cfg->device		= REG(PCIR_DEVICE, 2);
448 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
449 		cfg->statreg		= REG(PCIR_STATUS, 2);
450 		cfg->baseclass		= REG(PCIR_CLASS, 1);
451 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
452 		cfg->progif		= REG(PCIR_PROGIF, 1);
453 		cfg->revid		= REG(PCIR_REVID, 1);
454 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
455 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
456 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
457 		cfg->intpin		= REG(PCIR_INTPIN, 1);
458 		cfg->intline		= REG(PCIR_INTLINE, 1);
459 
460 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
461 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
462 
463 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
464 		cfg->hdrtype		&= ~PCIM_MFDEV;
465 
466 		pci_fixancient(cfg);
467 		pci_hdrtypedata(pcib, b, s, f, cfg);
468 
469 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
470 			pci_read_extcap(pcib, cfg);
471 
472 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
473 
474 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
475 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
476 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
477 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
478 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
479 
480 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
481 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
482 		devlist_entry->conf.pc_vendor = cfg->vendor;
483 		devlist_entry->conf.pc_device = cfg->device;
484 
485 		devlist_entry->conf.pc_class = cfg->baseclass;
486 		devlist_entry->conf.pc_subclass = cfg->subclass;
487 		devlist_entry->conf.pc_progif = cfg->progif;
488 		devlist_entry->conf.pc_revid = cfg->revid;
489 
490 		pci_numdevs++;
491 		pci_generation++;
492 	}
493 	return (devlist_entry);
494 #undef REG
495 }
496 
497 static void
498 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
499 {
500 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
501 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
502 #if defined(__i386__) || defined(__amd64__)
503 	uint64_t addr;
504 #endif
505 	uint32_t val;
506 	int	ptr, nextptr, ptrptr;
507 
508 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
509 	case 0:
510 	case 1:
511 		ptrptr = PCIR_CAP_PTR;
512 		break;
513 	case 2:
514 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
515 		break;
516 	default:
517 		return;		/* no extended capabilities support */
518 	}
519 	nextptr = REG(ptrptr, 1);	/* sanity check? */
520 
521 	/*
522 	 * Read capability entries.
523 	 */
524 	while (nextptr != 0) {
525 		/* Sanity check */
526 		if (nextptr > 255) {
527 			printf("illegal PCI extended capability offset %d\n",
528 			    nextptr);
529 			return;
530 		}
531 		/* Find the next entry */
532 		ptr = nextptr;
533 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
534 
535 		/* Process this entry */
536 		switch (REG(ptr + PCICAP_ID, 1)) {
537 		case PCIY_PMG:		/* PCI power management */
538 			if (cfg->pp.pp_cap == 0) {
539 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
540 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
541 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
542 				if ((nextptr - ptr) > PCIR_POWER_DATA)
543 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
544 			}
545 			break;
546 #if defined(__i386__) || defined(__amd64__)
547 		case PCIY_HT:		/* HyperTransport */
548 			/* Determine HT-specific capability type. */
549 			val = REG(ptr + PCIR_HT_COMMAND, 2);
550 			switch (val & PCIM_HTCMD_CAP_MASK) {
551 			case PCIM_HTCAP_MSI_MAPPING:
552 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
553 					/* Sanity check the mapping window. */
554 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
555 					    4);
556 					addr <<= 32;
557 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
558 					    4);
559 					if (addr != MSI_INTEL_ADDR_BASE)
560 						device_printf(pcib,
561 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
562 						    cfg->domain, cfg->bus,
563 						    cfg->slot, cfg->func,
564 						    (long long)addr);
565 				} else
566 					addr = MSI_INTEL_ADDR_BASE;
567 
568 				cfg->ht.ht_msimap = ptr;
569 				cfg->ht.ht_msictrl = val;
570 				cfg->ht.ht_msiaddr = addr;
571 				break;
572 			}
573 			break;
574 #endif
575 		case PCIY_MSI:		/* PCI MSI */
576 			cfg->msi.msi_location = ptr;
577 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
578 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
579 						     PCIM_MSICTRL_MMC_MASK)>>1);
580 			break;
581 		case PCIY_MSIX:		/* PCI MSI-X */
582 			cfg->msix.msix_location = ptr;
583 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
584 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
585 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
586 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
587 			cfg->msix.msix_table_bar = PCIR_BAR(val &
588 			    PCIM_MSIX_BIR_MASK);
589 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
590 			val = REG(ptr + PCIR_MSIX_PBA, 4);
591 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
592 			    PCIM_MSIX_BIR_MASK);
593 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
594 			break;
595 		case PCIY_VPD:		/* PCI Vital Product Data */
596 			cfg->vpd.vpd_reg = ptr;
597 			break;
598 		case PCIY_SUBVENDOR:
599 			/* Should always be true. */
600 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
601 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
602 				cfg->subvendor = val & 0xffff;
603 				cfg->subdevice = val >> 16;
604 			}
605 			break;
606 		case PCIY_PCIX:		/* PCI-X */
607 			/*
608 			 * Assume we have a PCI-X chipset if we have
609 			 * at least one PCI-PCI bridge with a PCI-X
610 			 * capability.  Note that some systems with
611 			 * PCI-express or HT chipsets might match on
612 			 * this check as well.
613 			 */
614 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
615 				pcix_chipset = 1;
616 			break;
617 		case PCIY_EXPRESS:	/* PCI-express */
618 			/*
619 			 * Assume we have a PCI-express chipset if we have
620 			 * at least one PCI-express device.
621 			 */
622 			pcie_chipset = 1;
623 			break;
624 		default:
625 			break;
626 		}
627 	}
628 /* REG and WREG use carry through to next functions */
629 }
630 
631 /*
632  * PCI Vital Product Data
633  */
634 
635 #define	PCI_VPD_TIMEOUT		1000000
636 
637 static int
638 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
639 {
640 	int count = PCI_VPD_TIMEOUT;
641 
642 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
643 
644 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
645 
646 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
647 		if (--count < 0)
648 			return (ENXIO);
649 		DELAY(1);	/* limit looping */
650 	}
651 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
652 
653 	return (0);
654 }
655 
656 #if 0
657 static int
658 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
659 {
660 	int count = PCI_VPD_TIMEOUT;
661 
662 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
663 
664 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
665 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
666 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
667 		if (--count < 0)
668 			return (ENXIO);
669 		DELAY(1);	/* limit looping */
670 	}
671 
672 	return (0);
673 }
674 #endif
675 
676 #undef PCI_VPD_TIMEOUT
677 
678 struct vpd_readstate {
679 	device_t	pcib;
680 	pcicfgregs	*cfg;
681 	uint32_t	val;
682 	int		bytesinval;
683 	int		off;
684 	uint8_t		cksum;
685 };
686 
687 static int
688 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
689 {
690 	uint32_t reg;
691 	uint8_t byte;
692 
693 	if (vrs->bytesinval == 0) {
694 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
695 			return (ENXIO);
696 		vrs->val = le32toh(reg);
697 		vrs->off += 4;
698 		byte = vrs->val & 0xff;
699 		vrs->bytesinval = 3;
700 	} else {
701 		vrs->val = vrs->val >> 8;
702 		byte = vrs->val & 0xff;
703 		vrs->bytesinval--;
704 	}
705 
706 	vrs->cksum += byte;
707 	*data = byte;
708 	return (0);
709 }
710 
711 static void
712 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
713 {
714 	struct vpd_readstate vrs;
715 	int state;
716 	int name;
717 	int remain;
718 	int i;
719 	int alloc, off;		/* alloc/off for RO/W arrays */
720 	int cksumvalid;
721 	int dflen;
722 	uint8_t byte;
723 	uint8_t byte2;
724 
725 	/* init vpd reader */
726 	vrs.bytesinval = 0;
727 	vrs.off = 0;
728 	vrs.pcib = pcib;
729 	vrs.cfg = cfg;
730 	vrs.cksum = 0;
731 
732 	state = 0;
733 	name = remain = i = 0;	/* shut up stupid gcc */
734 	alloc = off = 0;	/* shut up stupid gcc */
735 	dflen = 0;		/* shut up stupid gcc */
736 	cksumvalid = -1;
737 	while (state >= 0) {
738 		if (vpd_nextbyte(&vrs, &byte)) {
739 			state = -2;
740 			break;
741 		}
742 #if 0
743 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
744 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
745 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
746 #endif
747 		switch (state) {
748 		case 0:		/* item name */
749 			if (byte & 0x80) {
750 				if (vpd_nextbyte(&vrs, &byte2)) {
751 					state = -2;
752 					break;
753 				}
754 				remain = byte2;
755 				if (vpd_nextbyte(&vrs, &byte2)) {
756 					state = -2;
757 					break;
758 				}
759 				remain |= byte2 << 8;
760 				if (remain > (0x7f*4 - vrs.off)) {
761 					state = -1;
762 					printf(
763 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
764 					    cfg->domain, cfg->bus, cfg->slot,
765 					    cfg->func, remain);
766 				}
767 				name = byte & 0x7f;
768 			} else {
769 				remain = byte & 0x7;
770 				name = (byte >> 3) & 0xf;
771 			}
772 			switch (name) {
773 			case 0x2:	/* String */
774 				cfg->vpd.vpd_ident = malloc(remain + 1,
775 				    M_DEVBUF, M_WAITOK);
776 				i = 0;
777 				state = 1;
778 				break;
779 			case 0xf:	/* End */
780 				state = -1;
781 				break;
782 			case 0x10:	/* VPD-R */
783 				alloc = 8;
784 				off = 0;
785 				cfg->vpd.vpd_ros = malloc(alloc *
786 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
787 				    M_WAITOK | M_ZERO);
788 				state = 2;
789 				break;
790 			case 0x11:	/* VPD-W */
791 				alloc = 8;
792 				off = 0;
793 				cfg->vpd.vpd_w = malloc(alloc *
794 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
795 				    M_WAITOK | M_ZERO);
796 				state = 5;
797 				break;
798 			default:	/* Invalid data, abort */
799 				state = -1;
800 				break;
801 			}
802 			break;
803 
804 		case 1:	/* Identifier String */
805 			cfg->vpd.vpd_ident[i++] = byte;
806 			remain--;
807 			if (remain == 0)  {
808 				cfg->vpd.vpd_ident[i] = '\0';
809 				state = 0;
810 			}
811 			break;
812 
813 		case 2:	/* VPD-R Keyword Header */
814 			if (off == alloc) {
815 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
816 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
817 				    M_DEVBUF, M_WAITOK | M_ZERO);
818 			}
819 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
820 			if (vpd_nextbyte(&vrs, &byte2)) {
821 				state = -2;
822 				break;
823 			}
824 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
825 			if (vpd_nextbyte(&vrs, &byte2)) {
826 				state = -2;
827 				break;
828 			}
829 			dflen = byte2;
830 			if (dflen == 0 &&
831 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
832 			    2) == 0) {
833 				/*
834 				 * if this happens, we can't trust the rest
835 				 * of the VPD.
836 				 */
837 				printf(
838 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
839 				    cfg->domain, cfg->bus, cfg->slot,
840 				    cfg->func, dflen);
841 				cksumvalid = 0;
842 				state = -1;
843 				break;
844 			} else if (dflen == 0) {
845 				cfg->vpd.vpd_ros[off].value = malloc(1 *
846 				    sizeof(*cfg->vpd.vpd_ros[off].value),
847 				    M_DEVBUF, M_WAITOK);
848 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
849 			} else
850 				cfg->vpd.vpd_ros[off].value = malloc(
851 				    (dflen + 1) *
852 				    sizeof(*cfg->vpd.vpd_ros[off].value),
853 				    M_DEVBUF, M_WAITOK);
854 			remain -= 3;
855 			i = 0;
856 			/* keep in sync w/ state 3's transistions */
857 			if (dflen == 0 && remain == 0)
858 				state = 0;
859 			else if (dflen == 0)
860 				state = 2;
861 			else
862 				state = 3;
863 			break;
864 
865 		case 3:	/* VPD-R Keyword Value */
866 			cfg->vpd.vpd_ros[off].value[i++] = byte;
867 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
868 			    "RV", 2) == 0 && cksumvalid == -1) {
869 				if (vrs.cksum == 0)
870 					cksumvalid = 1;
871 				else {
872 					if (bootverbose)
873 						printf(
874 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
875 						    cfg->domain, cfg->bus,
876 						    cfg->slot, cfg->func,
877 						    vrs.cksum);
878 					cksumvalid = 0;
879 					state = -1;
880 					break;
881 				}
882 			}
883 			dflen--;
884 			remain--;
885 			/* keep in sync w/ state 2's transistions */
886 			if (dflen == 0)
887 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
888 			if (dflen == 0 && remain == 0) {
889 				cfg->vpd.vpd_rocnt = off;
890 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
891 				    off * sizeof(*cfg->vpd.vpd_ros),
892 				    M_DEVBUF, M_WAITOK | M_ZERO);
893 				state = 0;
894 			} else if (dflen == 0)
895 				state = 2;
896 			break;
897 
898 		case 4:
899 			remain--;
900 			if (remain == 0)
901 				state = 0;
902 			break;
903 
904 		case 5:	/* VPD-W Keyword Header */
905 			if (off == alloc) {
906 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
907 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
908 				    M_DEVBUF, M_WAITOK | M_ZERO);
909 			}
910 			cfg->vpd.vpd_w[off].keyword[0] = byte;
911 			if (vpd_nextbyte(&vrs, &byte2)) {
912 				state = -2;
913 				break;
914 			}
915 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
916 			if (vpd_nextbyte(&vrs, &byte2)) {
917 				state = -2;
918 				break;
919 			}
920 			cfg->vpd.vpd_w[off].len = dflen = byte2;
921 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
922 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
923 			    sizeof(*cfg->vpd.vpd_w[off].value),
924 			    M_DEVBUF, M_WAITOK);
925 			remain -= 3;
926 			i = 0;
927 			/* keep in sync w/ state 6's transistions */
928 			if (dflen == 0 && remain == 0)
929 				state = 0;
930 			else if (dflen == 0)
931 				state = 5;
932 			else
933 				state = 6;
934 			break;
935 
936 		case 6:	/* VPD-W Keyword Value */
937 			cfg->vpd.vpd_w[off].value[i++] = byte;
938 			dflen--;
939 			remain--;
940 			/* keep in sync w/ state 5's transistions */
941 			if (dflen == 0)
942 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
943 			if (dflen == 0 && remain == 0) {
944 				cfg->vpd.vpd_wcnt = off;
945 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
946 				    off * sizeof(*cfg->vpd.vpd_w),
947 				    M_DEVBUF, M_WAITOK | M_ZERO);
948 				state = 0;
949 			} else if (dflen == 0)
950 				state = 5;
951 			break;
952 
953 		default:
954 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
955 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
956 			    state);
957 			state = -1;
958 			break;
959 		}
960 	}
961 
962 	if (cksumvalid == 0 || state < -1) {
963 		/* read-only data bad, clean up */
964 		if (cfg->vpd.vpd_ros != NULL) {
965 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
966 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
967 			free(cfg->vpd.vpd_ros, M_DEVBUF);
968 			cfg->vpd.vpd_ros = NULL;
969 		}
970 	}
971 	if (state < -1) {
972 		/* I/O error, clean up */
973 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
974 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
975 		if (cfg->vpd.vpd_ident != NULL) {
976 			free(cfg->vpd.vpd_ident, M_DEVBUF);
977 			cfg->vpd.vpd_ident = NULL;
978 		}
979 		if (cfg->vpd.vpd_w != NULL) {
980 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
981 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
982 			free(cfg->vpd.vpd_w, M_DEVBUF);
983 			cfg->vpd.vpd_w = NULL;
984 		}
985 	}
986 	cfg->vpd.vpd_cached = 1;
987 #undef REG
988 #undef WREG
989 }
990 
991 int
992 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
993 {
994 	struct pci_devinfo *dinfo = device_get_ivars(child);
995 	pcicfgregs *cfg = &dinfo->cfg;
996 
997 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
998 		pci_read_vpd(device_get_parent(dev), cfg);
999 
1000 	*identptr = cfg->vpd.vpd_ident;
1001 
1002 	if (*identptr == NULL)
1003 		return (ENXIO);
1004 
1005 	return (0);
1006 }
1007 
1008 int
1009 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1010 	const char **vptr)
1011 {
1012 	struct pci_devinfo *dinfo = device_get_ivars(child);
1013 	pcicfgregs *cfg = &dinfo->cfg;
1014 	int i;
1015 
1016 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1017 		pci_read_vpd(device_get_parent(dev), cfg);
1018 
1019 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1020 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1021 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1022 			*vptr = cfg->vpd.vpd_ros[i].value;
1023 		}
1024 
1025 	if (i != cfg->vpd.vpd_rocnt)
1026 		return (0);
1027 
1028 	*vptr = NULL;
1029 	return (ENXIO);
1030 }
1031 
1032 /*
1033  * Find the requested extended capability and return the offset in
1034  * configuration space via the pointer provided. The function returns
1035  * 0 on success and error code otherwise.
1036  */
1037 int
1038 pci_find_extcap_method(device_t dev, device_t child, int capability,
1039     int *capreg)
1040 {
1041 	struct pci_devinfo *dinfo = device_get_ivars(child);
1042 	pcicfgregs *cfg = &dinfo->cfg;
1043 	u_int32_t status;
1044 	u_int8_t ptr;
1045 
1046 	/*
1047 	 * Check the CAP_LIST bit of the PCI status register first.
1048 	 */
1049 	status = pci_read_config(child, PCIR_STATUS, 2);
1050 	if (!(status & PCIM_STATUS_CAPPRESENT))
1051 		return (ENXIO);
1052 
1053 	/*
1054 	 * Determine the start pointer of the capabilities list.
1055 	 */
1056 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1057 	case 0:
1058 	case 1:
1059 		ptr = PCIR_CAP_PTR;
1060 		break;
1061 	case 2:
1062 		ptr = PCIR_CAP_PTR_2;
1063 		break;
1064 	default:
1065 		/* XXX: panic? */
1066 		return (ENXIO);		/* no extended capabilities support */
1067 	}
1068 	ptr = pci_read_config(child, ptr, 1);
1069 
1070 	/*
1071 	 * Traverse the capabilities list.
1072 	 */
1073 	while (ptr != 0) {
1074 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1075 			if (capreg != NULL)
1076 				*capreg = ptr;
1077 			return (0);
1078 		}
1079 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1080 	}
1081 
1082 	return (ENOENT);
1083 }
1084 
1085 /*
1086  * Support for MSI-X message interrupts.
1087  */
1088 void
1089 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1090 {
1091 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1092 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1093 	uint32_t offset;
1094 
1095 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1096 	offset = msix->msix_table_offset + index * 16;
1097 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1098 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1099 	bus_write_4(msix->msix_table_res, offset + 8, data);
1100 
1101 	/* Enable MSI -> HT mapping. */
1102 	pci_ht_map_msi(dev, address);
1103 }
1104 
1105 void
1106 pci_mask_msix(device_t dev, u_int index)
1107 {
1108 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1109 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1110 	uint32_t offset, val;
1111 
1112 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1113 	offset = msix->msix_table_offset + index * 16 + 12;
1114 	val = bus_read_4(msix->msix_table_res, offset);
1115 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1116 		val |= PCIM_MSIX_VCTRL_MASK;
1117 		bus_write_4(msix->msix_table_res, offset, val);
1118 	}
1119 }
1120 
1121 void
1122 pci_unmask_msix(device_t dev, u_int index)
1123 {
1124 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1125 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1126 	uint32_t offset, val;
1127 
1128 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1129 	offset = msix->msix_table_offset + index * 16 + 12;
1130 	val = bus_read_4(msix->msix_table_res, offset);
1131 	if (val & PCIM_MSIX_VCTRL_MASK) {
1132 		val &= ~PCIM_MSIX_VCTRL_MASK;
1133 		bus_write_4(msix->msix_table_res, offset, val);
1134 	}
1135 }
1136 
1137 int
1138 pci_pending_msix(device_t dev, u_int index)
1139 {
1140 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1141 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1142 	uint32_t offset, bit;
1143 
1144 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1145 	offset = msix->msix_pba_offset + (index / 32) * 4;
1146 	bit = 1 << index % 32;
1147 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1148 }
1149 
1150 /*
1151  * Restore MSI-X registers and table during resume.  If MSI-X is
1152  * enabled then walk the virtual table to restore the actual MSI-X
1153  * table.
1154  */
1155 static void
1156 pci_resume_msix(device_t dev)
1157 {
1158 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1159 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1160 	struct msix_table_entry *mte;
1161 	struct msix_vector *mv;
1162 	int i;
1163 
1164 	if (msix->msix_alloc > 0) {
1165 		/* First, mask all vectors. */
1166 		for (i = 0; i < msix->msix_msgnum; i++)
1167 			pci_mask_msix(dev, i);
1168 
1169 		/* Second, program any messages with at least one handler. */
1170 		for (i = 0; i < msix->msix_table_len; i++) {
1171 			mte = &msix->msix_table[i];
1172 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1173 				continue;
1174 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1175 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1176 			pci_unmask_msix(dev, i);
1177 		}
1178 	}
1179 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1180 	    msix->msix_ctrl, 2);
1181 }
1182 
1183 /*
1184  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1185  * returned in *count.  After this function returns, each message will be
1186  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1187  */
1188 int
1189 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1190 {
1191 	struct pci_devinfo *dinfo = device_get_ivars(child);
1192 	pcicfgregs *cfg = &dinfo->cfg;
1193 	struct resource_list_entry *rle;
1194 	int actual, error, i, irq, max;
1195 
1196 	/* Don't let count == 0 get us into trouble. */
1197 	if (*count == 0)
1198 		return (EINVAL);
1199 
1200 	/* If rid 0 is allocated, then fail. */
1201 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1202 	if (rle != NULL && rle->res != NULL)
1203 		return (ENXIO);
1204 
1205 	/* Already have allocated messages? */
1206 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1207 		return (ENXIO);
1208 
1209 	/* If MSI is blacklisted for this system, fail. */
1210 	if (pci_msi_blacklisted())
1211 		return (ENXIO);
1212 
1213 	/* MSI-X capability present? */
1214 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1215 		return (ENODEV);
1216 
1217 	/* Make sure the appropriate BARs are mapped. */
1218 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1219 	    cfg->msix.msix_table_bar);
1220 	if (rle == NULL || rle->res == NULL ||
1221 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1222 		return (ENXIO);
1223 	cfg->msix.msix_table_res = rle->res;
1224 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1225 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1226 		    cfg->msix.msix_pba_bar);
1227 		if (rle == NULL || rle->res == NULL ||
1228 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1229 			return (ENXIO);
1230 	}
1231 	cfg->msix.msix_pba_res = rle->res;
1232 
1233 	if (bootverbose)
1234 		device_printf(child,
1235 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1236 		    *count, cfg->msix.msix_msgnum);
1237 	max = min(*count, cfg->msix.msix_msgnum);
1238 	for (i = 0; i < max; i++) {
1239 		/* Allocate a message. */
1240 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1241 		if (error)
1242 			break;
1243 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1244 		    irq, 1);
1245 	}
1246 	actual = i;
1247 
1248 	if (bootverbose) {
1249 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1250 		if (actual == 1)
1251 			device_printf(child, "using IRQ %lu for MSI-X\n",
1252 			    rle->start);
1253 		else {
1254 			int run;
1255 
1256 			/*
1257 			 * Be fancy and try to print contiguous runs of
1258 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1259 			 * 'run' is true if we are in a range.
1260 			 */
1261 			device_printf(child, "using IRQs %lu", rle->start);
1262 			irq = rle->start;
1263 			run = 0;
1264 			for (i = 1; i < actual; i++) {
1265 				rle = resource_list_find(&dinfo->resources,
1266 				    SYS_RES_IRQ, i + 1);
1267 
1268 				/* Still in a run? */
1269 				if (rle->start == irq + 1) {
1270 					run = 1;
1271 					irq++;
1272 					continue;
1273 				}
1274 
1275 				/* Finish previous range. */
1276 				if (run) {
1277 					printf("-%d", irq);
1278 					run = 0;
1279 				}
1280 
1281 				/* Start new range. */
1282 				printf(",%lu", rle->start);
1283 				irq = rle->start;
1284 			}
1285 
1286 			/* Unfinished range? */
1287 			if (run)
1288 				printf("-%d", irq);
1289 			printf(" for MSI-X\n");
1290 		}
1291 	}
1292 
1293 	/* Mask all vectors. */
1294 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1295 		pci_mask_msix(child, i);
1296 
1297 	/* Allocate and initialize vector data and virtual table. */
1298 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1299 	    M_DEVBUF, M_WAITOK | M_ZERO);
1300 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1301 	    M_DEVBUF, M_WAITOK | M_ZERO);
1302 	for (i = 0; i < actual; i++) {
1303 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1304 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1305 		cfg->msix.msix_table[i].mte_vector = i + 1;
1306 	}
1307 
1308 	/* Update control register to enable MSI-X. */
1309 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1310 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1311 	    cfg->msix.msix_ctrl, 2);
1312 
1313 	/* Update counts of alloc'd messages. */
1314 	cfg->msix.msix_alloc = actual;
1315 	cfg->msix.msix_table_len = actual;
1316 	*count = actual;
1317 	return (0);
1318 }
1319 
1320 /*
1321  * By default, pci_alloc_msix() will assign the allocated IRQ
1322  * resources consecutively to the first N messages in the MSI-X table.
1323  * However, device drivers may want to use different layouts if they
1324  * either receive fewer messages than they asked for, or they wish to
1325  * populate the MSI-X table sparsely.  This method allows the driver
1326  * to specify what layout it wants.  It must be called after a
1327  * successful pci_alloc_msix() but before any of the associated
1328  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1329  *
1330  * The 'vectors' array contains 'count' message vectors.  The array
1331  * maps directly to the MSI-X table in that index 0 in the array
1332  * specifies the vector for the first message in the MSI-X table, etc.
1333  * The vector value in each array index can either be 0 to indicate
1334  * that no vector should be assigned to a message slot, or it can be a
1335  * number from 1 to N (where N is the count returned from a
1336  * succcessful call to pci_alloc_msix()) to indicate which message
1337  * vector (IRQ) to be used for the corresponding message.
1338  *
1339  * On successful return, each message with a non-zero vector will have
1340  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1341  * 1.  Additionally, if any of the IRQs allocated via the previous
1342  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1343  * will be freed back to the system automatically.
1344  *
1345  * For example, suppose a driver has a MSI-X table with 6 messages and
1346  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1347  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1348  * C.  After the call to pci_alloc_msix(), the device will be setup to
1349  * have an MSI-X table of ABC--- (where - means no vector assigned).
1350  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1351  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1352  * be freed back to the system.  This device will also have valid
1353  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1354  *
1355  * In any case, the SYS_RES_IRQ rid X will always map to the message
1356  * at MSI-X table index X - 1 and will only be valid if a vector is
1357  * assigned to that table entry.
1358  */
1359 int
1360 pci_remap_msix_method(device_t dev, device_t child, int count,
1361     const u_int *vectors)
1362 {
1363 	struct pci_devinfo *dinfo = device_get_ivars(child);
1364 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1365 	struct resource_list_entry *rle;
1366 	int i, irq, j, *used;
1367 
1368 	/*
1369 	 * Have to have at least one message in the table but the
1370 	 * table can't be bigger than the actual MSI-X table in the
1371 	 * device.
1372 	 */
1373 	if (count == 0 || count > msix->msix_msgnum)
1374 		return (EINVAL);
1375 
1376 	/* Sanity check the vectors. */
1377 	for (i = 0; i < count; i++)
1378 		if (vectors[i] > msix->msix_alloc)
1379 			return (EINVAL);
1380 
1381 	/*
1382 	 * Make sure there aren't any holes in the vectors to be used.
1383 	 * It's a big pain to support it, and it doesn't really make
1384 	 * sense anyway.  Also, at least one vector must be used.
1385 	 */
1386 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1387 	    M_ZERO);
1388 	for (i = 0; i < count; i++)
1389 		if (vectors[i] != 0)
1390 			used[vectors[i] - 1] = 1;
1391 	for (i = 0; i < msix->msix_alloc - 1; i++)
1392 		if (used[i] == 0 && used[i + 1] == 1) {
1393 			free(used, M_DEVBUF);
1394 			return (EINVAL);
1395 		}
1396 	if (used[0] != 1) {
1397 		free(used, M_DEVBUF);
1398 		return (EINVAL);
1399 	}
1400 
1401 	/* Make sure none of the resources are allocated. */
1402 	for (i = 0; i < msix->msix_table_len; i++) {
1403 		if (msix->msix_table[i].mte_vector == 0)
1404 			continue;
1405 		if (msix->msix_table[i].mte_handlers > 0)
1406 			return (EBUSY);
1407 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1408 		KASSERT(rle != NULL, ("missing resource"));
1409 		if (rle->res != NULL)
1410 			return (EBUSY);
1411 	}
1412 
1413 	/* Free the existing resource list entries. */
1414 	for (i = 0; i < msix->msix_table_len; i++) {
1415 		if (msix->msix_table[i].mte_vector == 0)
1416 			continue;
1417 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1418 	}
1419 
1420 	/*
1421 	 * Build the new virtual table keeping track of which vectors are
1422 	 * used.
1423 	 */
1424 	free(msix->msix_table, M_DEVBUF);
1425 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1426 	    M_DEVBUF, M_WAITOK | M_ZERO);
1427 	for (i = 0; i < count; i++)
1428 		msix->msix_table[i].mte_vector = vectors[i];
1429 	msix->msix_table_len = count;
1430 
1431 	/* Free any unused IRQs and resize the vectors array if necessary. */
1432 	j = msix->msix_alloc - 1;
1433 	if (used[j] == 0) {
1434 		struct msix_vector *vec;
1435 
1436 		while (used[j] == 0) {
1437 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1438 			    msix->msix_vectors[j].mv_irq);
1439 			j--;
1440 		}
1441 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1442 		    M_WAITOK);
1443 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1444 		    (j + 1));
1445 		free(msix->msix_vectors, M_DEVBUF);
1446 		msix->msix_vectors = vec;
1447 		msix->msix_alloc = j + 1;
1448 	}
1449 	free(used, M_DEVBUF);
1450 
1451 	/* Map the IRQs onto the rids. */
1452 	for (i = 0; i < count; i++) {
1453 		if (vectors[i] == 0)
1454 			continue;
1455 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1456 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1457 		    irq, 1);
1458 	}
1459 
1460 	if (bootverbose) {
1461 		device_printf(child, "Remapped MSI-X IRQs as: ");
1462 		for (i = 0; i < count; i++) {
1463 			if (i != 0)
1464 				printf(", ");
1465 			if (vectors[i] == 0)
1466 				printf("---");
1467 			else
1468 				printf("%d",
1469 				    msix->msix_vectors[vectors[i]].mv_irq);
1470 		}
1471 		printf("\n");
1472 	}
1473 
1474 	return (0);
1475 }
1476 
1477 static int
1478 pci_release_msix(device_t dev, device_t child)
1479 {
1480 	struct pci_devinfo *dinfo = device_get_ivars(child);
1481 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1482 	struct resource_list_entry *rle;
1483 	int i;
1484 
1485 	/* Do we have any messages to release? */
1486 	if (msix->msix_alloc == 0)
1487 		return (ENODEV);
1488 
1489 	/* Make sure none of the resources are allocated. */
1490 	for (i = 0; i < msix->msix_table_len; i++) {
1491 		if (msix->msix_table[i].mte_vector == 0)
1492 			continue;
1493 		if (msix->msix_table[i].mte_handlers > 0)
1494 			return (EBUSY);
1495 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1496 		KASSERT(rle != NULL, ("missing resource"));
1497 		if (rle->res != NULL)
1498 			return (EBUSY);
1499 	}
1500 
1501 	/* Update control register to disable MSI-X. */
1502 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1503 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1504 	    msix->msix_ctrl, 2);
1505 
1506 	/* Free the resource list entries. */
1507 	for (i = 0; i < msix->msix_table_len; i++) {
1508 		if (msix->msix_table[i].mte_vector == 0)
1509 			continue;
1510 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1511 	}
1512 	free(msix->msix_table, M_DEVBUF);
1513 	msix->msix_table_len = 0;
1514 
1515 	/* Release the IRQs. */
1516 	for (i = 0; i < msix->msix_alloc; i++)
1517 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1518 		    msix->msix_vectors[i].mv_irq);
1519 	free(msix->msix_vectors, M_DEVBUF);
1520 	msix->msix_alloc = 0;
1521 	return (0);
1522 }
1523 
1524 /*
1525  * Return the max supported MSI-X messages this device supports.
1526  * Basically, assuming the MD code can alloc messages, this function
1527  * should return the maximum value that pci_alloc_msix() can return.
1528  * Thus, it is subject to the tunables, etc.
1529  */
1530 int
1531 pci_msix_count_method(device_t dev, device_t child)
1532 {
1533 	struct pci_devinfo *dinfo = device_get_ivars(child);
1534 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1535 
1536 	if (pci_do_msix && msix->msix_location != 0)
1537 		return (msix->msix_msgnum);
1538 	return (0);
1539 }
1540 
1541 /*
1542  * HyperTransport MSI mapping control
1543  */
1544 void
1545 pci_ht_map_msi(device_t dev, uint64_t addr)
1546 {
1547 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1548 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1549 
1550 	if (!ht->ht_msimap)
1551 		return;
1552 
1553 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1554 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1555 		/* Enable MSI -> HT mapping. */
1556 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1557 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1558 		    ht->ht_msictrl, 2);
1559 	}
1560 
1561 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1562 		/* Disable MSI -> HT mapping. */
1563 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1564 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1565 		    ht->ht_msictrl, 2);
1566 	}
1567 }
1568 
1569 /*
1570  * Support for MSI message signalled interrupts.
1571  */
1572 void
1573 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1574 {
1575 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1576 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1577 
1578 	/* Write data and address values. */
1579 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1580 	    address & 0xffffffff, 4);
1581 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1582 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1583 		    address >> 32, 4);
1584 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1585 		    data, 2);
1586 	} else
1587 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1588 		    2);
1589 
1590 	/* Enable MSI in the control register. */
1591 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1592 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1593 	    2);
1594 
1595 	/* Enable MSI -> HT mapping. */
1596 	pci_ht_map_msi(dev, address);
1597 }
1598 
1599 void
1600 pci_disable_msi(device_t dev)
1601 {
1602 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1603 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1604 
1605 	/* Disable MSI -> HT mapping. */
1606 	pci_ht_map_msi(dev, 0);
1607 
1608 	/* Disable MSI in the control register. */
1609 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1610 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1611 	    2);
1612 }
1613 
1614 /*
1615  * Restore MSI registers during resume.  If MSI is enabled then
1616  * restore the data and address registers in addition to the control
1617  * register.
1618  */
1619 static void
1620 pci_resume_msi(device_t dev)
1621 {
1622 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1623 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1624 	uint64_t address;
1625 	uint16_t data;
1626 
1627 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1628 		address = msi->msi_addr;
1629 		data = msi->msi_data;
1630 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1631 		    address & 0xffffffff, 4);
1632 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1633 			pci_write_config(dev, msi->msi_location +
1634 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1635 			pci_write_config(dev, msi->msi_location +
1636 			    PCIR_MSI_DATA_64BIT, data, 2);
1637 		} else
1638 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1639 			    data, 2);
1640 	}
1641 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1642 	    2);
1643 }
1644 
1645 int
1646 pci_remap_msi_irq(device_t dev, u_int irq)
1647 {
1648 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1649 	pcicfgregs *cfg = &dinfo->cfg;
1650 	struct resource_list_entry *rle;
1651 	struct msix_table_entry *mte;
1652 	struct msix_vector *mv;
1653 	device_t bus;
1654 	uint64_t addr;
1655 	uint32_t data;
1656 	int error, i, j;
1657 
1658 	bus = device_get_parent(dev);
1659 
1660 	/*
1661 	 * Handle MSI first.  We try to find this IRQ among our list
1662 	 * of MSI IRQs.  If we find it, we request updated address and
1663 	 * data registers and apply the results.
1664 	 */
1665 	if (cfg->msi.msi_alloc > 0) {
1666 
1667 		/* If we don't have any active handlers, nothing to do. */
1668 		if (cfg->msi.msi_handlers == 0)
1669 			return (0);
1670 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1671 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1672 			    i + 1);
1673 			if (rle->start == irq) {
1674 				error = PCIB_MAP_MSI(device_get_parent(bus),
1675 				    dev, irq, &addr, &data);
1676 				if (error)
1677 					return (error);
1678 				pci_disable_msi(dev);
1679 				dinfo->cfg.msi.msi_addr = addr;
1680 				dinfo->cfg.msi.msi_data = data;
1681 				pci_enable_msi(dev, addr, data);
1682 				return (0);
1683 			}
1684 		}
1685 		return (ENOENT);
1686 	}
1687 
1688 	/*
1689 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1690 	 * we request the updated mapping info.  If that works, we go
1691 	 * through all the slots that use this IRQ and update them.
1692 	 */
1693 	if (cfg->msix.msix_alloc > 0) {
1694 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1695 			mv = &cfg->msix.msix_vectors[i];
1696 			if (mv->mv_irq == irq) {
1697 				error = PCIB_MAP_MSI(device_get_parent(bus),
1698 				    dev, irq, &addr, &data);
1699 				if (error)
1700 					return (error);
1701 				mv->mv_address = addr;
1702 				mv->mv_data = data;
1703 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1704 					mte = &cfg->msix.msix_table[j];
1705 					if (mte->mte_vector != i + 1)
1706 						continue;
1707 					if (mte->mte_handlers == 0)
1708 						continue;
1709 					pci_mask_msix(dev, j);
1710 					pci_enable_msix(dev, j, addr, data);
1711 					pci_unmask_msix(dev, j);
1712 				}
1713 			}
1714 		}
1715 		return (ENOENT);
1716 	}
1717 
1718 	return (ENOENT);
1719 }
1720 
1721 /*
1722  * Returns true if the specified device is blacklisted because MSI
1723  * doesn't work.
1724  */
1725 int
1726 pci_msi_device_blacklisted(device_t dev)
1727 {
1728 	struct pci_quirk *q;
1729 
1730 	if (!pci_honor_msi_blacklist)
1731 		return (0);
1732 
1733 	for (q = &pci_quirks[0]; q->devid; q++) {
1734 		if (q->devid == pci_get_devid(dev) &&
1735 		    q->type == PCI_QUIRK_DISABLE_MSI)
1736 			return (1);
1737 	}
1738 	return (0);
1739 }
1740 
1741 /*
1742  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1743  * we just check for blacklisted chipsets as represented by the
1744  * host-PCI bridge at device 0:0:0.  In the future, it may become
1745  * necessary to check other system attributes, such as the kenv values
1746  * that give the motherboard manufacturer and model number.
1747  */
1748 static int
1749 pci_msi_blacklisted(void)
1750 {
1751 	device_t dev;
1752 
1753 	if (!pci_honor_msi_blacklist)
1754 		return (0);
1755 
1756 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1757 	if (!(pcie_chipset || pcix_chipset))
1758 		return (1);
1759 
1760 	dev = pci_find_bsf(0, 0, 0);
1761 	if (dev != NULL)
1762 		return (pci_msi_device_blacklisted(dev));
1763 	return (0);
1764 }
1765 
1766 /*
1767  * Attempt to allocate *count MSI messages.  The actual number allocated is
1768  * returned in *count.  After this function returns, each message will be
1769  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1770  */
1771 int
1772 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1773 {
1774 	struct pci_devinfo *dinfo = device_get_ivars(child);
1775 	pcicfgregs *cfg = &dinfo->cfg;
1776 	struct resource_list_entry *rle;
1777 	int actual, error, i, irqs[32];
1778 	uint16_t ctrl;
1779 
1780 	/* Don't let count == 0 get us into trouble. */
1781 	if (*count == 0)
1782 		return (EINVAL);
1783 
1784 	/* If rid 0 is allocated, then fail. */
1785 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1786 	if (rle != NULL && rle->res != NULL)
1787 		return (ENXIO);
1788 
1789 	/* Already have allocated messages? */
1790 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1791 		return (ENXIO);
1792 
1793 	/* If MSI is blacklisted for this system, fail. */
1794 	if (pci_msi_blacklisted())
1795 		return (ENXIO);
1796 
1797 	/* MSI capability present? */
1798 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1799 		return (ENODEV);
1800 
1801 	if (bootverbose)
1802 		device_printf(child,
1803 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1804 		    *count, cfg->msi.msi_msgnum);
1805 
1806 	/* Don't ask for more than the device supports. */
1807 	actual = min(*count, cfg->msi.msi_msgnum);
1808 
1809 	/* Don't ask for more than 32 messages. */
1810 	actual = min(actual, 32);
1811 
1812 	/* MSI requires power of 2 number of messages. */
1813 	if (!powerof2(actual))
1814 		return (EINVAL);
1815 
1816 	for (;;) {
1817 		/* Try to allocate N messages. */
1818 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1819 		    cfg->msi.msi_msgnum, irqs);
1820 		if (error == 0)
1821 			break;
1822 		if (actual == 1)
1823 			return (error);
1824 
1825 		/* Try N / 2. */
1826 		actual >>= 1;
1827 	}
1828 
1829 	/*
1830 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1831 	 * resources in the irqs[] array, so add new resources
1832 	 * starting at rid 1.
1833 	 */
1834 	for (i = 0; i < actual; i++)
1835 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1836 		    irqs[i], irqs[i], 1);
1837 
1838 	if (bootverbose) {
1839 		if (actual == 1)
1840 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1841 		else {
1842 			int run;
1843 
1844 			/*
1845 			 * Be fancy and try to print contiguous runs
1846 			 * of IRQ values as ranges.  'run' is true if
1847 			 * we are in a range.
1848 			 */
1849 			device_printf(child, "using IRQs %d", irqs[0]);
1850 			run = 0;
1851 			for (i = 1; i < actual; i++) {
1852 
1853 				/* Still in a run? */
1854 				if (irqs[i] == irqs[i - 1] + 1) {
1855 					run = 1;
1856 					continue;
1857 				}
1858 
1859 				/* Finish previous range. */
1860 				if (run) {
1861 					printf("-%d", irqs[i - 1]);
1862 					run = 0;
1863 				}
1864 
1865 				/* Start new range. */
1866 				printf(",%d", irqs[i]);
1867 			}
1868 
1869 			/* Unfinished range? */
1870 			if (run)
1871 				printf("-%d", irqs[actual - 1]);
1872 			printf(" for MSI\n");
1873 		}
1874 	}
1875 
1876 	/* Update control register with actual count. */
1877 	ctrl = cfg->msi.msi_ctrl;
1878 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1879 	ctrl |= (ffs(actual) - 1) << 4;
1880 	cfg->msi.msi_ctrl = ctrl;
1881 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1882 
1883 	/* Update counts of alloc'd messages. */
1884 	cfg->msi.msi_alloc = actual;
1885 	cfg->msi.msi_handlers = 0;
1886 	*count = actual;
1887 	return (0);
1888 }
1889 
1890 /* Release the MSI messages associated with this device. */
1891 int
1892 pci_release_msi_method(device_t dev, device_t child)
1893 {
1894 	struct pci_devinfo *dinfo = device_get_ivars(child);
1895 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1896 	struct resource_list_entry *rle;
1897 	int error, i, irqs[32];
1898 
1899 	/* Try MSI-X first. */
1900 	error = pci_release_msix(dev, child);
1901 	if (error != ENODEV)
1902 		return (error);
1903 
1904 	/* Do we have any messages to release? */
1905 	if (msi->msi_alloc == 0)
1906 		return (ENODEV);
1907 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1908 
1909 	/* Make sure none of the resources are allocated. */
1910 	if (msi->msi_handlers > 0)
1911 		return (EBUSY);
1912 	for (i = 0; i < msi->msi_alloc; i++) {
1913 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1914 		KASSERT(rle != NULL, ("missing MSI resource"));
1915 		if (rle->res != NULL)
1916 			return (EBUSY);
1917 		irqs[i] = rle->start;
1918 	}
1919 
1920 	/* Update control register with 0 count. */
1921 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1922 	    ("%s: MSI still enabled", __func__));
1923 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1924 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1925 	    msi->msi_ctrl, 2);
1926 
1927 	/* Release the messages. */
1928 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1929 	for (i = 0; i < msi->msi_alloc; i++)
1930 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1931 
1932 	/* Update alloc count. */
1933 	msi->msi_alloc = 0;
1934 	msi->msi_addr = 0;
1935 	msi->msi_data = 0;
1936 	return (0);
1937 }
1938 
1939 /*
1940  * Return the max supported MSI messages this device supports.
1941  * Basically, assuming the MD code can alloc messages, this function
1942  * should return the maximum value that pci_alloc_msi() can return.
1943  * Thus, it is subject to the tunables, etc.
1944  */
1945 int
1946 pci_msi_count_method(device_t dev, device_t child)
1947 {
1948 	struct pci_devinfo *dinfo = device_get_ivars(child);
1949 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1950 
1951 	if (pci_do_msi && msi->msi_location != 0)
1952 		return (msi->msi_msgnum);
1953 	return (0);
1954 }
1955 
1956 /* free pcicfgregs structure and all depending data structures */
1957 
1958 int
1959 pci_freecfg(struct pci_devinfo *dinfo)
1960 {
1961 	struct devlist *devlist_head;
1962 	int i;
1963 
1964 	devlist_head = &pci_devq;
1965 
1966 	if (dinfo->cfg.vpd.vpd_reg) {
1967 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1968 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1969 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1970 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1971 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1972 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1973 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1974 	}
1975 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1976 	free(dinfo, M_DEVBUF);
1977 
1978 	/* increment the generation count */
1979 	pci_generation++;
1980 
1981 	/* we're losing one device */
1982 	pci_numdevs--;
1983 	return (0);
1984 }
1985 
1986 /*
1987  * PCI power manangement
1988  */
1989 int
1990 pci_set_powerstate_method(device_t dev, device_t child, int state)
1991 {
1992 	struct pci_devinfo *dinfo = device_get_ivars(child);
1993 	pcicfgregs *cfg = &dinfo->cfg;
1994 	uint16_t status;
1995 	int result, oldstate, highest, delay;
1996 
1997 	if (cfg->pp.pp_cap == 0)
1998 		return (EOPNOTSUPP);
1999 
2000 	/*
2001 	 * Optimize a no state change request away.  While it would be OK to
2002 	 * write to the hardware in theory, some devices have shown odd
2003 	 * behavior when going from D3 -> D3.
2004 	 */
2005 	oldstate = pci_get_powerstate(child);
2006 	if (oldstate == state)
2007 		return (0);
2008 
2009 	/*
2010 	 * The PCI power management specification states that after a state
2011 	 * transition between PCI power states, system software must
2012 	 * guarantee a minimal delay before the function accesses the device.
2013 	 * Compute the worst case delay that we need to guarantee before we
2014 	 * access the device.  Many devices will be responsive much more
2015 	 * quickly than this delay, but there are some that don't respond
2016 	 * instantly to state changes.  Transitions to/from D3 state require
2017 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2018 	 * is done below with DELAY rather than a sleeper function because
2019 	 * this function can be called from contexts where we cannot sleep.
2020 	 */
2021 	highest = (oldstate > state) ? oldstate : state;
2022 	if (highest == PCI_POWERSTATE_D3)
2023 	    delay = 10000;
2024 	else if (highest == PCI_POWERSTATE_D2)
2025 	    delay = 200;
2026 	else
2027 	    delay = 0;
2028 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2029 	    & ~PCIM_PSTAT_DMASK;
2030 	result = 0;
2031 	switch (state) {
2032 	case PCI_POWERSTATE_D0:
2033 		status |= PCIM_PSTAT_D0;
2034 		break;
2035 	case PCI_POWERSTATE_D1:
2036 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2037 			return (EOPNOTSUPP);
2038 		status |= PCIM_PSTAT_D1;
2039 		break;
2040 	case PCI_POWERSTATE_D2:
2041 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2042 			return (EOPNOTSUPP);
2043 		status |= PCIM_PSTAT_D2;
2044 		break;
2045 	case PCI_POWERSTATE_D3:
2046 		status |= PCIM_PSTAT_D3;
2047 		break;
2048 	default:
2049 		return (EINVAL);
2050 	}
2051 
2052 	if (bootverbose)
2053 		printf(
2054 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2055 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2056 		    dinfo->cfg.func, oldstate, state);
2057 
2058 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2059 	if (delay)
2060 		DELAY(delay);
2061 	return (0);
2062 }
2063 
2064 int
2065 pci_get_powerstate_method(device_t dev, device_t child)
2066 {
2067 	struct pci_devinfo *dinfo = device_get_ivars(child);
2068 	pcicfgregs *cfg = &dinfo->cfg;
2069 	uint16_t status;
2070 	int result;
2071 
2072 	if (cfg->pp.pp_cap != 0) {
2073 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2074 		switch (status & PCIM_PSTAT_DMASK) {
2075 		case PCIM_PSTAT_D0:
2076 			result = PCI_POWERSTATE_D0;
2077 			break;
2078 		case PCIM_PSTAT_D1:
2079 			result = PCI_POWERSTATE_D1;
2080 			break;
2081 		case PCIM_PSTAT_D2:
2082 			result = PCI_POWERSTATE_D2;
2083 			break;
2084 		case PCIM_PSTAT_D3:
2085 			result = PCI_POWERSTATE_D3;
2086 			break;
2087 		default:
2088 			result = PCI_POWERSTATE_UNKNOWN;
2089 			break;
2090 		}
2091 	} else {
2092 		/* No support, device is always at D0 */
2093 		result = PCI_POWERSTATE_D0;
2094 	}
2095 	return (result);
2096 }
2097 
2098 /*
2099  * Some convenience functions for PCI device drivers.
2100  */
2101 
2102 static __inline void
2103 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2104 {
2105 	uint16_t	command;
2106 
2107 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2108 	command |= bit;
2109 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2110 }
2111 
2112 static __inline void
2113 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2114 {
2115 	uint16_t	command;
2116 
2117 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2118 	command &= ~bit;
2119 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2120 }
2121 
2122 int
2123 pci_enable_busmaster_method(device_t dev, device_t child)
2124 {
2125 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2126 	return (0);
2127 }
2128 
2129 int
2130 pci_disable_busmaster_method(device_t dev, device_t child)
2131 {
2132 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2133 	return (0);
2134 }
2135 
2136 int
2137 pci_enable_io_method(device_t dev, device_t child, int space)
2138 {
2139 	uint16_t command;
2140 	uint16_t bit;
2141 	char *error;
2142 
2143 	bit = 0;
2144 	error = NULL;
2145 
2146 	switch(space) {
2147 	case SYS_RES_IOPORT:
2148 		bit = PCIM_CMD_PORTEN;
2149 		error = "port";
2150 		break;
2151 	case SYS_RES_MEMORY:
2152 		bit = PCIM_CMD_MEMEN;
2153 		error = "memory";
2154 		break;
2155 	default:
2156 		return (EINVAL);
2157 	}
2158 	pci_set_command_bit(dev, child, bit);
2159 	/* Some devices seem to need a brief stall here, what do to? */
2160 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2161 	if (command & bit)
2162 		return (0);
2163 	device_printf(child, "failed to enable %s mapping!\n", error);
2164 	return (ENXIO);
2165 }
2166 
2167 int
2168 pci_disable_io_method(device_t dev, device_t child, int space)
2169 {
2170 	uint16_t command;
2171 	uint16_t bit;
2172 	char *error;
2173 
2174 	bit = 0;
2175 	error = NULL;
2176 
2177 	switch(space) {
2178 	case SYS_RES_IOPORT:
2179 		bit = PCIM_CMD_PORTEN;
2180 		error = "port";
2181 		break;
2182 	case SYS_RES_MEMORY:
2183 		bit = PCIM_CMD_MEMEN;
2184 		error = "memory";
2185 		break;
2186 	default:
2187 		return (EINVAL);
2188 	}
2189 	pci_clear_command_bit(dev, child, bit);
2190 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2191 	if (command & bit) {
2192 		device_printf(child, "failed to disable %s mapping!\n", error);
2193 		return (ENXIO);
2194 	}
2195 	return (0);
2196 }
2197 
2198 /*
2199  * New style pci driver.  Parent device is either a pci-host-bridge or a
2200  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2201  */
2202 
2203 void
2204 pci_print_verbose(struct pci_devinfo *dinfo)
2205 {
2206 
2207 	if (bootverbose) {
2208 		pcicfgregs *cfg = &dinfo->cfg;
2209 
2210 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2211 		    cfg->vendor, cfg->device, cfg->revid);
2212 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2213 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2214 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2215 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2216 		    cfg->mfdev);
2217 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2218 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2219 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2220 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2221 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2222 		if (cfg->intpin > 0)
2223 			printf("\tintpin=%c, irq=%d\n",
2224 			    cfg->intpin +'a' -1, cfg->intline);
2225 		if (cfg->pp.pp_cap) {
2226 			uint16_t status;
2227 
2228 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2229 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2230 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2231 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2232 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2233 			    status & PCIM_PSTAT_DMASK);
2234 		}
2235 		if (cfg->msi.msi_location) {
2236 			int ctrl;
2237 
2238 			ctrl = cfg->msi.msi_ctrl;
2239 			printf("\tMSI supports %d message%s%s%s\n",
2240 			    cfg->msi.msi_msgnum,
2241 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2242 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2243 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2244 		}
2245 		if (cfg->msix.msix_location) {
2246 			printf("\tMSI-X supports %d message%s ",
2247 			    cfg->msix.msix_msgnum,
2248 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2249 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2250 				printf("in map 0x%x\n",
2251 				    cfg->msix.msix_table_bar);
2252 			else
2253 				printf("in maps 0x%x and 0x%x\n",
2254 				    cfg->msix.msix_table_bar,
2255 				    cfg->msix.msix_pba_bar);
2256 		}
2257 	}
2258 }
2259 
2260 static int
2261 pci_porten(device_t pcib, int b, int s, int f)
2262 {
2263 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2264 		& PCIM_CMD_PORTEN) != 0;
2265 }
2266 
2267 static int
2268 pci_memen(device_t pcib, int b, int s, int f)
2269 {
2270 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2271 		& PCIM_CMD_MEMEN) != 0;
2272 }
2273 
2274 /*
2275  * Add a resource based on a pci map register. Return 1 if the map
2276  * register is a 32bit map register or 2 if it is a 64bit register.
2277  */
2278 static int
2279 pci_add_map(device_t pcib, device_t bus, device_t dev,
2280     int b, int s, int f, int reg, struct resource_list *rl, int force,
2281     int prefetch)
2282 {
2283 	pci_addr_t base, map;
2284 	pci_addr_t start, end, count;
2285 	uint8_t ln2size;
2286 	uint8_t ln2range;
2287 	uint32_t testval;
2288 	uint16_t cmd;
2289 	int type;
2290 	int barlen;
2291 	struct resource *res;
2292 
2293 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2294 	ln2range = pci_maprange(map);
2295 	if (ln2range == 64)
2296 		map |= (uint64_t)PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) <<
2297 		    32;
2298 
2299 	/*
2300 	 * Disable decoding via the command register before
2301 	 * determining the BAR's length since we will be placing it in
2302 	 * a weird state.
2303 	 */
2304 	cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2305 	PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND,
2306 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2307 
2308 	/*
2309 	 * Determine the BAR's length by writing all 1's.  The bottom
2310 	 * log_2(size) bits of the BAR will stick as 0 when we read
2311 	 * the value back.
2312 	 */
2313 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2314 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2315 	if (ln2range == 64) {
2316 		PCIB_WRITE_CONFIG(pcib, b, s, f, reg + 4, 0xffffffff, 4);
2317 		testval |= (uint64_t)PCIB_READ_CONFIG(pcib, b, s, f, reg + 4,
2318 		    4) << 32;
2319 	}
2320 
2321 	/* Restore the BAR and command register. */
2322 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2323 	if (ln2range == 64)
2324 		PCIB_WRITE_CONFIG(pcib, b, s, f, reg + 4, map >> 32, 4);
2325 	PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2326 
2327 	if (PCI_BAR_MEM(map)) {
2328 		type = SYS_RES_MEMORY;
2329 		if (map & PCIM_BAR_MEM_PREFETCH)
2330 			prefetch = 1;
2331 	} else
2332 		type = SYS_RES_IOPORT;
2333 	ln2size = pci_mapsize(testval);
2334 	base = pci_mapbase(map);
2335 	barlen = ln2range == 64 ? 2 : 1;
2336 
2337 	/*
2338 	 * For I/O registers, if bottom bit is set, and the next bit up
2339 	 * isn't clear, we know we have a BAR that doesn't conform to the
2340 	 * spec, so ignore it.  Also, sanity check the size of the data
2341 	 * areas to the type of memory involved.  Memory must be at least
2342 	 * 16 bytes in size, while I/O ranges must be at least 4.
2343 	 */
2344 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2345 		return (barlen);
2346 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2347 	    (type == SYS_RES_IOPORT && ln2size < 2))
2348 		return (barlen);
2349 
2350 	if (bootverbose) {
2351 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2352 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2353 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2354 			printf(", port disabled\n");
2355 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2356 			printf(", memory disabled\n");
2357 		else
2358 			printf(", enabled\n");
2359 	}
2360 
2361 	/*
2362 	 * If base is 0, then we have problems.  It is best to ignore
2363 	 * such entries for the moment.  These will be allocated later if
2364 	 * the driver specifically requests them.  However, some
2365 	 * removable busses look better when all resources are allocated,
2366 	 * so allow '0' to be overriden.
2367 	 *
2368 	 * Similarly treat maps whose values is the same as the test value
2369 	 * read back.  These maps have had all f's written to them by the
2370 	 * BIOS in an attempt to disable the resources.
2371 	 */
2372 	if (!force && (base == 0 || map == testval))
2373 		return (barlen);
2374 	if ((u_long)base != base) {
2375 		device_printf(bus,
2376 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2377 		    pci_get_domain(dev), b, s, f, reg);
2378 		return (barlen);
2379 	}
2380 
2381 	/*
2382 	 * This code theoretically does the right thing, but has
2383 	 * undesirable side effects in some cases where peripherals
2384 	 * respond oddly to having these bits enabled.  Let the user
2385 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2386 	 * default).
2387 	 */
2388 	if (pci_enable_io_modes) {
2389 		/* Turn on resources that have been left off by a lazy BIOS */
2390 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2391 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2392 			cmd |= PCIM_CMD_PORTEN;
2393 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2394 		}
2395 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2396 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2397 			cmd |= PCIM_CMD_MEMEN;
2398 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2399 		}
2400 	} else {
2401 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2402 			return (barlen);
2403 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2404 			return (barlen);
2405 	}
2406 
2407 	count = 1 << ln2size;
2408 	if (base == 0 || base == pci_mapbase(testval)) {
2409 		start = 0;	/* Let the parent decide. */
2410 		end = ~0ULL;
2411 	} else {
2412 		start = base;
2413 		end = base + (1 << ln2size) - 1;
2414 	}
2415 	resource_list_add(rl, type, reg, start, end, count);
2416 
2417 	/*
2418 	 * Try to allocate the resource for this BAR from our parent
2419 	 * so that this resource range is already reserved.  The
2420 	 * driver for this device will later inherit this resource in
2421 	 * pci_alloc_resource().
2422 	 */
2423 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2424 	    prefetch ? RF_PREFETCHABLE : 0);
2425 	if (res == NULL) {
2426 		/*
2427 		 * If the allocation fails, clear the BAR and delete
2428 		 * the resource list entry to force
2429 		 * pci_alloc_resource() to allocate resources from the
2430 		 * parent.
2431 		 */
2432 		resource_list_delete(rl, type, reg);
2433 		start = 0;
2434 	} else {
2435 		start = rman_get_start(res);
2436 		rman_set_device(res, bus);
2437 	}
2438 	pci_write_config(dev, reg, start, 4);
2439 	if (ln2range == 64)
2440 		pci_write_config(dev, reg + 4, start >> 32, 4);
2441 	return (barlen);
2442 }
2443 
2444 /*
2445  * For ATA devices we need to decide early what addressing mode to use.
2446  * Legacy demands that the primary and secondary ATA ports sits on the
2447  * same addresses that old ISA hardware did. This dictates that we use
2448  * those addresses and ignore the BAR's if we cannot set PCI native
2449  * addressing mode.
2450  */
2451 static void
2452 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2453     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2454 {
2455 	struct resource *r;
2456 	int rid, type, progif;
2457 #if 0
2458 	/* if this device supports PCI native addressing use it */
2459 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2460 	if ((progif & 0x8a) == 0x8a) {
2461 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2462 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2463 			printf("Trying ATA native PCI addressing mode\n");
2464 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2465 		}
2466 	}
2467 #endif
2468 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2469 	type = SYS_RES_IOPORT;
2470 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2471 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2472 		    prefetchmask & (1 << 0));
2473 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2474 		    prefetchmask & (1 << 1));
2475 	} else {
2476 		rid = PCIR_BAR(0);
2477 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2478 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2479 		    8, 0);
2480 		rman_set_device(r, bus);
2481 		rid = PCIR_BAR(1);
2482 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2483 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2484 		    1, 0);
2485 		rman_set_device(r, bus);
2486 	}
2487 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2488 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2489 		    prefetchmask & (1 << 2));
2490 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2491 		    prefetchmask & (1 << 3));
2492 	} else {
2493 		rid = PCIR_BAR(2);
2494 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2495 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2496 		    8, 0);
2497 		rman_set_device(r, bus);
2498 		rid = PCIR_BAR(3);
2499 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2500 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2501 		    1, 0);
2502 		rman_set_device(r, bus);
2503 	}
2504 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2505 	    prefetchmask & (1 << 4));
2506 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2507 	    prefetchmask & (1 << 5));
2508 }
2509 
2510 static void
2511 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2512 {
2513 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2514 	pcicfgregs *cfg = &dinfo->cfg;
2515 	char tunable_name[64];
2516 	int irq;
2517 
2518 	/* Has to have an intpin to have an interrupt. */
2519 	if (cfg->intpin == 0)
2520 		return;
2521 
2522 	/* Let the user override the IRQ with a tunable. */
2523 	irq = PCI_INVALID_IRQ;
2524 	snprintf(tunable_name, sizeof(tunable_name),
2525 	    "hw.pci%d.%d.%d.INT%c.irq",
2526 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2527 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2528 		irq = PCI_INVALID_IRQ;
2529 
2530 	/*
2531 	 * If we didn't get an IRQ via the tunable, then we either use the
2532 	 * IRQ value in the intline register or we ask the bus to route an
2533 	 * interrupt for us.  If force_route is true, then we only use the
2534 	 * value in the intline register if the bus was unable to assign an
2535 	 * IRQ.
2536 	 */
2537 	if (!PCI_INTERRUPT_VALID(irq)) {
2538 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2539 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2540 		if (!PCI_INTERRUPT_VALID(irq))
2541 			irq = cfg->intline;
2542 	}
2543 
2544 	/* If after all that we don't have an IRQ, just bail. */
2545 	if (!PCI_INTERRUPT_VALID(irq))
2546 		return;
2547 
2548 	/* Update the config register if it changed. */
2549 	if (irq != cfg->intline) {
2550 		cfg->intline = irq;
2551 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2552 	}
2553 
2554 	/* Add this IRQ as rid 0 interrupt resource. */
2555 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2556 }
2557 
2558 void
2559 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2560 {
2561 	device_t pcib;
2562 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2563 	pcicfgregs *cfg = &dinfo->cfg;
2564 	struct resource_list *rl = &dinfo->resources;
2565 	struct pci_quirk *q;
2566 	int b, i, f, s;
2567 
2568 	pcib = device_get_parent(bus);
2569 
2570 	b = cfg->bus;
2571 	s = cfg->slot;
2572 	f = cfg->func;
2573 
2574 	/* ATA devices needs special map treatment */
2575 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2576 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2577 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2578 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2579 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2580 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2581 	else
2582 		for (i = 0; i < cfg->nummaps;)
2583 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2584 			    rl, force, prefetchmask & (1 << i));
2585 
2586 	/*
2587 	 * Add additional, quirked resources.
2588 	 */
2589 	for (q = &pci_quirks[0]; q->devid; q++) {
2590 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2591 		    && q->type == PCI_QUIRK_MAP_REG)
2592 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2593 			  force, 0);
2594 	}
2595 
2596 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2597 #ifdef __PCI_REROUTE_INTERRUPT
2598 		/*
2599 		 * Try to re-route interrupts. Sometimes the BIOS or
2600 		 * firmware may leave bogus values in these registers.
2601 		 * If the re-route fails, then just stick with what we
2602 		 * have.
2603 		 */
2604 		pci_assign_interrupt(bus, dev, 1);
2605 #else
2606 		pci_assign_interrupt(bus, dev, 0);
2607 #endif
2608 	}
2609 }
2610 
2611 void
2612 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2613 {
2614 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2615 	device_t pcib = device_get_parent(dev);
2616 	struct pci_devinfo *dinfo;
2617 	int maxslots;
2618 	int s, f, pcifunchigh;
2619 	uint8_t hdrtype;
2620 
2621 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2622 	    ("dinfo_size too small"));
2623 	maxslots = PCIB_MAXSLOTS(pcib);
2624 	for (s = 0; s <= maxslots; s++) {
2625 		pcifunchigh = 0;
2626 		f = 0;
2627 		DELAY(1);
2628 		hdrtype = REG(PCIR_HDRTYPE, 1);
2629 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2630 			continue;
2631 		if (hdrtype & PCIM_MFDEV)
2632 			pcifunchigh = PCI_FUNCMAX;
2633 		for (f = 0; f <= pcifunchigh; f++) {
2634 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2635 			    dinfo_size);
2636 			if (dinfo != NULL) {
2637 				pci_add_child(dev, dinfo);
2638 			}
2639 		}
2640 	}
2641 #undef REG
2642 }
2643 
2644 void
2645 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2646 {
2647 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2648 	device_set_ivars(dinfo->cfg.dev, dinfo);
2649 	resource_list_init(&dinfo->resources);
2650 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2651 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2652 	pci_print_verbose(dinfo);
2653 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2654 }
2655 
2656 static int
2657 pci_probe(device_t dev)
2658 {
2659 
2660 	device_set_desc(dev, "PCI bus");
2661 
2662 	/* Allow other subclasses to override this driver. */
2663 	return (BUS_PROBE_GENERIC);
2664 }
2665 
2666 static int
2667 pci_attach(device_t dev)
2668 {
2669 	int busno, domain;
2670 
2671 	/*
2672 	 * Since there can be multiple independantly numbered PCI
2673 	 * busses on systems with multiple PCI domains, we can't use
2674 	 * the unit number to decide which bus we are probing. We ask
2675 	 * the parent pcib what our domain and bus numbers are.
2676 	 */
2677 	domain = pcib_get_domain(dev);
2678 	busno = pcib_get_bus(dev);
2679 	if (bootverbose)
2680 		device_printf(dev, "domain=%d, physical bus=%d\n",
2681 		    domain, busno);
2682 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2683 	return (bus_generic_attach(dev));
2684 }
2685 
2686 int
2687 pci_suspend(device_t dev)
2688 {
2689 	int dstate, error, i, numdevs;
2690 	device_t acpi_dev, child, *devlist;
2691 	struct pci_devinfo *dinfo;
2692 
2693 	/*
2694 	 * Save the PCI configuration space for each child and set the
2695 	 * device in the appropriate power state for this sleep state.
2696 	 */
2697 	acpi_dev = NULL;
2698 	if (pci_do_power_resume)
2699 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2700 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2701 		return (error);
2702 	for (i = 0; i < numdevs; i++) {
2703 		child = devlist[i];
2704 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2705 		pci_cfg_save(child, dinfo, 0);
2706 	}
2707 
2708 	/* Suspend devices before potentially powering them down. */
2709 	error = bus_generic_suspend(dev);
2710 	if (error) {
2711 		free(devlist, M_TEMP);
2712 		return (error);
2713 	}
2714 
2715 	/*
2716 	 * Always set the device to D3.  If ACPI suggests a different
2717 	 * power state, use it instead.  If ACPI is not present, the
2718 	 * firmware is responsible for managing device power.  Skip
2719 	 * children who aren't attached since they are powered down
2720 	 * separately.  Only manage type 0 devices for now.
2721 	 */
2722 	for (i = 0; acpi_dev && i < numdevs; i++) {
2723 		child = devlist[i];
2724 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2725 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2726 			dstate = PCI_POWERSTATE_D3;
2727 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2728 			pci_set_powerstate(child, dstate);
2729 		}
2730 	}
2731 	free(devlist, M_TEMP);
2732 	return (0);
2733 }
2734 
2735 int
2736 pci_resume(device_t dev)
2737 {
2738 	int i, numdevs, error;
2739 	device_t acpi_dev, child, *devlist;
2740 	struct pci_devinfo *dinfo;
2741 
2742 	/*
2743 	 * Set each child to D0 and restore its PCI configuration space.
2744 	 */
2745 	acpi_dev = NULL;
2746 	if (pci_do_power_resume)
2747 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2748 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2749 		return (error);
2750 	for (i = 0; i < numdevs; i++) {
2751 		/*
2752 		 * Notify ACPI we're going to D0 but ignore the result.  If
2753 		 * ACPI is not present, the firmware is responsible for
2754 		 * managing device power.  Only manage type 0 devices for now.
2755 		 */
2756 		child = devlist[i];
2757 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2758 		if (acpi_dev && device_is_attached(child) &&
2759 		    dinfo->cfg.hdrtype == 0) {
2760 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2761 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2762 		}
2763 
2764 		/* Now the device is powered up, restore its config space. */
2765 		pci_cfg_restore(child, dinfo);
2766 	}
2767 	free(devlist, M_TEMP);
2768 	return (bus_generic_resume(dev));
2769 }
2770 
2771 static void
2772 pci_load_vendor_data(void)
2773 {
2774 	caddr_t vendordata, info;
2775 
2776 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2777 		info = preload_search_info(vendordata, MODINFO_ADDR);
2778 		pci_vendordata = *(char **)info;
2779 		info = preload_search_info(vendordata, MODINFO_SIZE);
2780 		pci_vendordata_size = *(size_t *)info;
2781 		/* terminate the database */
2782 		pci_vendordata[pci_vendordata_size] = '\n';
2783 	}
2784 }
2785 
2786 void
2787 pci_driver_added(device_t dev, driver_t *driver)
2788 {
2789 	int numdevs;
2790 	device_t *devlist;
2791 	device_t child;
2792 	struct pci_devinfo *dinfo;
2793 	int i;
2794 
2795 	if (bootverbose)
2796 		device_printf(dev, "driver added\n");
2797 	DEVICE_IDENTIFY(driver, dev);
2798 	if (device_get_children(dev, &devlist, &numdevs) != 0)
2799 		return;
2800 	for (i = 0; i < numdevs; i++) {
2801 		child = devlist[i];
2802 		if (device_get_state(child) != DS_NOTPRESENT)
2803 			continue;
2804 		dinfo = device_get_ivars(child);
2805 		pci_print_verbose(dinfo);
2806 		if (bootverbose)
2807 			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2808 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2809 			    dinfo->cfg.func);
2810 		pci_cfg_restore(child, dinfo);
2811 		if (device_probe_and_attach(child) != 0)
2812 			pci_cfg_save(child, dinfo, 1);
2813 	}
2814 	free(devlist, M_TEMP);
2815 }
2816 
2817 int
2818 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2819     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2820 {
2821 	struct pci_devinfo *dinfo;
2822 	struct msix_table_entry *mte;
2823 	struct msix_vector *mv;
2824 	uint64_t addr;
2825 	uint32_t data;
2826 	void *cookie;
2827 	int error, rid;
2828 
2829 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2830 	    arg, &cookie);
2831 	if (error)
2832 		return (error);
2833 
2834 	/* If this is not a direct child, just bail out. */
2835 	if (device_get_parent(child) != dev) {
2836 		*cookiep = cookie;
2837 		return(0);
2838 	}
2839 
2840 	rid = rman_get_rid(irq);
2841 	if (rid == 0) {
2842 		/* Make sure that INTx is enabled */
2843 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2844 	} else {
2845 		/*
2846 		 * Check to see if the interrupt is MSI or MSI-X.
2847 		 * Ask our parent to map the MSI and give
2848 		 * us the address and data register values.
2849 		 * If we fail for some reason, teardown the
2850 		 * interrupt handler.
2851 		 */
2852 		dinfo = device_get_ivars(child);
2853 		if (dinfo->cfg.msi.msi_alloc > 0) {
2854 			if (dinfo->cfg.msi.msi_addr == 0) {
2855 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2856 			    ("MSI has handlers, but vectors not mapped"));
2857 				error = PCIB_MAP_MSI(device_get_parent(dev),
2858 				    child, rman_get_start(irq), &addr, &data);
2859 				if (error)
2860 					goto bad;
2861 				dinfo->cfg.msi.msi_addr = addr;
2862 				dinfo->cfg.msi.msi_data = data;
2863 				pci_enable_msi(child, addr, data);
2864 			}
2865 			dinfo->cfg.msi.msi_handlers++;
2866 		} else {
2867 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2868 			    ("No MSI or MSI-X interrupts allocated"));
2869 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2870 			    ("MSI-X index too high"));
2871 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2872 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2873 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2874 			KASSERT(mv->mv_irq == rman_get_start(irq),
2875 			    ("IRQ mismatch"));
2876 			if (mv->mv_address == 0) {
2877 				KASSERT(mte->mte_handlers == 0,
2878 		    ("MSI-X table entry has handlers, but vector not mapped"));
2879 				error = PCIB_MAP_MSI(device_get_parent(dev),
2880 				    child, rman_get_start(irq), &addr, &data);
2881 				if (error)
2882 					goto bad;
2883 				mv->mv_address = addr;
2884 				mv->mv_data = data;
2885 			}
2886 			if (mte->mte_handlers == 0) {
2887 				pci_enable_msix(child, rid - 1, mv->mv_address,
2888 				    mv->mv_data);
2889 				pci_unmask_msix(child, rid - 1);
2890 			}
2891 			mte->mte_handlers++;
2892 		}
2893 
2894 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2895 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2896 	bad:
2897 		if (error) {
2898 			(void)bus_generic_teardown_intr(dev, child, irq,
2899 			    cookie);
2900 			return (error);
2901 		}
2902 	}
2903 	*cookiep = cookie;
2904 	return (0);
2905 }
2906 
2907 int
2908 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2909     void *cookie)
2910 {
2911 	struct msix_table_entry *mte;
2912 	struct resource_list_entry *rle;
2913 	struct pci_devinfo *dinfo;
2914 	int error, rid;
2915 
2916 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2917 		return (EINVAL);
2918 
2919 	/* If this isn't a direct child, just bail out */
2920 	if (device_get_parent(child) != dev)
2921 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2922 
2923 	rid = rman_get_rid(irq);
2924 	if (rid == 0) {
2925 		/* Mask INTx */
2926 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2927 	} else {
2928 		/*
2929 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2930 		 * decrement the appropriate handlers count and mask the
2931 		 * MSI-X message, or disable MSI messages if the count
2932 		 * drops to 0.
2933 		 */
2934 		dinfo = device_get_ivars(child);
2935 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2936 		if (rle->res != irq)
2937 			return (EINVAL);
2938 		if (dinfo->cfg.msi.msi_alloc > 0) {
2939 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2940 			    ("MSI-X index too high"));
2941 			if (dinfo->cfg.msi.msi_handlers == 0)
2942 				return (EINVAL);
2943 			dinfo->cfg.msi.msi_handlers--;
2944 			if (dinfo->cfg.msi.msi_handlers == 0)
2945 				pci_disable_msi(child);
2946 		} else {
2947 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2948 			    ("No MSI or MSI-X interrupts allocated"));
2949 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2950 			    ("MSI-X index too high"));
2951 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2952 			if (mte->mte_handlers == 0)
2953 				return (EINVAL);
2954 			mte->mte_handlers--;
2955 			if (mte->mte_handlers == 0)
2956 				pci_mask_msix(child, rid - 1);
2957 		}
2958 	}
2959 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2960 	if (rid > 0)
2961 		KASSERT(error == 0,
2962 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2963 	return (error);
2964 }
2965 
2966 int
2967 pci_print_child(device_t dev, device_t child)
2968 {
2969 	struct pci_devinfo *dinfo;
2970 	struct resource_list *rl;
2971 	int retval = 0;
2972 
2973 	dinfo = device_get_ivars(child);
2974 	rl = &dinfo->resources;
2975 
2976 	retval += bus_print_child_header(dev, child);
2977 
2978 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2979 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2980 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2981 	if (device_get_flags(dev))
2982 		retval += printf(" flags %#x", device_get_flags(dev));
2983 
2984 	retval += printf(" at device %d.%d", pci_get_slot(child),
2985 	    pci_get_function(child));
2986 
2987 	retval += bus_print_child_footer(dev, child);
2988 
2989 	return (retval);
2990 }
2991 
2992 static struct
2993 {
2994 	int	class;
2995 	int	subclass;
2996 	char	*desc;
2997 } pci_nomatch_tab[] = {
2998 	{PCIC_OLD,		-1,			"old"},
2999 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3000 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3001 	{PCIC_STORAGE,		-1,			"mass storage"},
3002 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3003 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3004 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3005 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3006 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3007 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3008 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3009 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3010 	{PCIC_NETWORK,		-1,			"network"},
3011 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3012 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3013 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3014 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3015 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3016 	{PCIC_DISPLAY,		-1,			"display"},
3017 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3018 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3019 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3020 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3021 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3022 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3023 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3024 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3025 	{PCIC_MEMORY,		-1,			"memory"},
3026 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3027 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3028 	{PCIC_BRIDGE,		-1,			"bridge"},
3029 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3030 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3031 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3032 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3033 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3034 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3035 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3036 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3037 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3038 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3039 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3040 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3041 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3042 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3043 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3044 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3045 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3046 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3047 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3048 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3049 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3050 	{PCIC_INPUTDEV,		-1,			"input device"},
3051 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3052 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3053 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3054 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3055 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3056 	{PCIC_DOCKING,		-1,			"docking station"},
3057 	{PCIC_PROCESSOR,	-1,			"processor"},
3058 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3059 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3060 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3061 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3062 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3063 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3064 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3065 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3066 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3067 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3068 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3069 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3070 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3071 	{PCIC_SATCOM,		-1,			"satellite communication"},
3072 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3073 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3074 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3075 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3076 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3077 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3078 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3079 	{PCIC_DASP,		-1,			"dasp"},
3080 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3081 	{0, 0,		NULL}
3082 };
3083 
3084 void
3085 pci_probe_nomatch(device_t dev, device_t child)
3086 {
3087 	int	i;
3088 	char	*cp, *scp, *device;
3089 
3090 	/*
3091 	 * Look for a listing for this device in a loaded device database.
3092 	 */
3093 	if ((device = pci_describe_device(child)) != NULL) {
3094 		device_printf(dev, "<%s>", device);
3095 		free(device, M_DEVBUF);
3096 	} else {
3097 		/*
3098 		 * Scan the class/subclass descriptions for a general
3099 		 * description.
3100 		 */
3101 		cp = "unknown";
3102 		scp = NULL;
3103 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3104 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3105 				if (pci_nomatch_tab[i].subclass == -1) {
3106 					cp = pci_nomatch_tab[i].desc;
3107 				} else if (pci_nomatch_tab[i].subclass ==
3108 				    pci_get_subclass(child)) {
3109 					scp = pci_nomatch_tab[i].desc;
3110 				}
3111 			}
3112 		}
3113 		device_printf(dev, "<%s%s%s>",
3114 		    cp ? cp : "",
3115 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3116 		    scp ? scp : "");
3117 	}
3118 	printf(" at device %d.%d (no driver attached)\n",
3119 	    pci_get_slot(child), pci_get_function(child));
3120 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3121 	return;
3122 }
3123 
3124 /*
3125  * Parse the PCI device database, if loaded, and return a pointer to a
3126  * description of the device.
3127  *
3128  * The database is flat text formatted as follows:
3129  *
3130  * Any line not in a valid format is ignored.
3131  * Lines are terminated with newline '\n' characters.
3132  *
3133  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3134  * the vendor name.
3135  *
3136  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3137  * - devices cannot be listed without a corresponding VENDOR line.
3138  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3139  * another TAB, then the device name.
3140  */
3141 
3142 /*
3143  * Assuming (ptr) points to the beginning of a line in the database,
3144  * return the vendor or device and description of the next entry.
3145  * The value of (vendor) or (device) inappropriate for the entry type
3146  * is set to -1.  Returns nonzero at the end of the database.
3147  *
3148  * Note that this is slightly unrobust in the face of corrupt data;
3149  * we attempt to safeguard against this by spamming the end of the
3150  * database with a newline when we initialise.
3151  */
3152 static int
3153 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3154 {
3155 	char	*cp = *ptr;
3156 	int	left;
3157 
3158 	*device = -1;
3159 	*vendor = -1;
3160 	**desc = '\0';
3161 	for (;;) {
3162 		left = pci_vendordata_size - (cp - pci_vendordata);
3163 		if (left <= 0) {
3164 			*ptr = cp;
3165 			return(1);
3166 		}
3167 
3168 		/* vendor entry? */
3169 		if (*cp != '\t' &&
3170 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3171 			break;
3172 		/* device entry? */
3173 		if (*cp == '\t' &&
3174 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3175 			break;
3176 
3177 		/* skip to next line */
3178 		while (*cp != '\n' && left > 0) {
3179 			cp++;
3180 			left--;
3181 		}
3182 		if (*cp == '\n') {
3183 			cp++;
3184 			left--;
3185 		}
3186 	}
3187 	/* skip to next line */
3188 	while (*cp != '\n' && left > 0) {
3189 		cp++;
3190 		left--;
3191 	}
3192 	if (*cp == '\n' && left > 0)
3193 		cp++;
3194 	*ptr = cp;
3195 	return(0);
3196 }
3197 
3198 static char *
3199 pci_describe_device(device_t dev)
3200 {
3201 	int	vendor, device;
3202 	char	*desc, *vp, *dp, *line;
3203 
3204 	desc = vp = dp = NULL;
3205 
3206 	/*
3207 	 * If we have no vendor data, we can't do anything.
3208 	 */
3209 	if (pci_vendordata == NULL)
3210 		goto out;
3211 
3212 	/*
3213 	 * Scan the vendor data looking for this device
3214 	 */
3215 	line = pci_vendordata;
3216 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3217 		goto out;
3218 	for (;;) {
3219 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3220 			goto out;
3221 		if (vendor == pci_get_vendor(dev))
3222 			break;
3223 	}
3224 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3225 		goto out;
3226 	for (;;) {
3227 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3228 			*dp = 0;
3229 			break;
3230 		}
3231 		if (vendor != -1) {
3232 			*dp = 0;
3233 			break;
3234 		}
3235 		if (device == pci_get_device(dev))
3236 			break;
3237 	}
3238 	if (dp[0] == '\0')
3239 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3240 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3241 	    NULL)
3242 		sprintf(desc, "%s, %s", vp, dp);
3243  out:
3244 	if (vp != NULL)
3245 		free(vp, M_DEVBUF);
3246 	if (dp != NULL)
3247 		free(dp, M_DEVBUF);
3248 	return(desc);
3249 }
3250 
3251 int
3252 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3253 {
3254 	struct pci_devinfo *dinfo;
3255 	pcicfgregs *cfg;
3256 
3257 	dinfo = device_get_ivars(child);
3258 	cfg = &dinfo->cfg;
3259 
3260 	switch (which) {
3261 	case PCI_IVAR_ETHADDR:
3262 		/*
3263 		 * The generic accessor doesn't deal with failure, so
3264 		 * we set the return value, then return an error.
3265 		 */
3266 		*((uint8_t **) result) = NULL;
3267 		return (EINVAL);
3268 	case PCI_IVAR_SUBVENDOR:
3269 		*result = cfg->subvendor;
3270 		break;
3271 	case PCI_IVAR_SUBDEVICE:
3272 		*result = cfg->subdevice;
3273 		break;
3274 	case PCI_IVAR_VENDOR:
3275 		*result = cfg->vendor;
3276 		break;
3277 	case PCI_IVAR_DEVICE:
3278 		*result = cfg->device;
3279 		break;
3280 	case PCI_IVAR_DEVID:
3281 		*result = (cfg->device << 16) | cfg->vendor;
3282 		break;
3283 	case PCI_IVAR_CLASS:
3284 		*result = cfg->baseclass;
3285 		break;
3286 	case PCI_IVAR_SUBCLASS:
3287 		*result = cfg->subclass;
3288 		break;
3289 	case PCI_IVAR_PROGIF:
3290 		*result = cfg->progif;
3291 		break;
3292 	case PCI_IVAR_REVID:
3293 		*result = cfg->revid;
3294 		break;
3295 	case PCI_IVAR_INTPIN:
3296 		*result = cfg->intpin;
3297 		break;
3298 	case PCI_IVAR_IRQ:
3299 		*result = cfg->intline;
3300 		break;
3301 	case PCI_IVAR_DOMAIN:
3302 		*result = cfg->domain;
3303 		break;
3304 	case PCI_IVAR_BUS:
3305 		*result = cfg->bus;
3306 		break;
3307 	case PCI_IVAR_SLOT:
3308 		*result = cfg->slot;
3309 		break;
3310 	case PCI_IVAR_FUNCTION:
3311 		*result = cfg->func;
3312 		break;
3313 	case PCI_IVAR_CMDREG:
3314 		*result = cfg->cmdreg;
3315 		break;
3316 	case PCI_IVAR_CACHELNSZ:
3317 		*result = cfg->cachelnsz;
3318 		break;
3319 	case PCI_IVAR_MINGNT:
3320 		*result = cfg->mingnt;
3321 		break;
3322 	case PCI_IVAR_MAXLAT:
3323 		*result = cfg->maxlat;
3324 		break;
3325 	case PCI_IVAR_LATTIMER:
3326 		*result = cfg->lattimer;
3327 		break;
3328 	default:
3329 		return (ENOENT);
3330 	}
3331 	return (0);
3332 }
3333 
3334 int
3335 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3336 {
3337 	struct pci_devinfo *dinfo;
3338 
3339 	dinfo = device_get_ivars(child);
3340 
3341 	switch (which) {
3342 	case PCI_IVAR_INTPIN:
3343 		dinfo->cfg.intpin = value;
3344 		return (0);
3345 	case PCI_IVAR_ETHADDR:
3346 	case PCI_IVAR_SUBVENDOR:
3347 	case PCI_IVAR_SUBDEVICE:
3348 	case PCI_IVAR_VENDOR:
3349 	case PCI_IVAR_DEVICE:
3350 	case PCI_IVAR_DEVID:
3351 	case PCI_IVAR_CLASS:
3352 	case PCI_IVAR_SUBCLASS:
3353 	case PCI_IVAR_PROGIF:
3354 	case PCI_IVAR_REVID:
3355 	case PCI_IVAR_IRQ:
3356 	case PCI_IVAR_DOMAIN:
3357 	case PCI_IVAR_BUS:
3358 	case PCI_IVAR_SLOT:
3359 	case PCI_IVAR_FUNCTION:
3360 		return (EINVAL);	/* disallow for now */
3361 
3362 	default:
3363 		return (ENOENT);
3364 	}
3365 }
3366 
3367 
3368 #include "opt_ddb.h"
3369 #ifdef DDB
3370 #include <ddb/ddb.h>
3371 #include <sys/cons.h>
3372 
3373 /*
3374  * List resources based on pci map registers, used for within ddb
3375  */
3376 
3377 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3378 {
3379 	struct pci_devinfo *dinfo;
3380 	struct devlist *devlist_head;
3381 	struct pci_conf *p;
3382 	const char *name;
3383 	int i, error, none_count;
3384 
3385 	none_count = 0;
3386 	/* get the head of the device queue */
3387 	devlist_head = &pci_devq;
3388 
3389 	/*
3390 	 * Go through the list of devices and print out devices
3391 	 */
3392 	for (error = 0, i = 0,
3393 	     dinfo = STAILQ_FIRST(devlist_head);
3394 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3395 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3396 
3397 		/* Populate pd_name and pd_unit */
3398 		name = NULL;
3399 		if (dinfo->cfg.dev)
3400 			name = device_get_name(dinfo->cfg.dev);
3401 
3402 		p = &dinfo->conf;
3403 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3404 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3405 			(name && *name) ? name : "none",
3406 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3407 			none_count++,
3408 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3409 			p->pc_sel.pc_func, (p->pc_class << 16) |
3410 			(p->pc_subclass << 8) | p->pc_progif,
3411 			(p->pc_subdevice << 16) | p->pc_subvendor,
3412 			(p->pc_device << 16) | p->pc_vendor,
3413 			p->pc_revid, p->pc_hdr);
3414 	}
3415 }
3416 #endif /* DDB */
3417 
3418 static struct resource *
3419 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3420     u_long start, u_long end, u_long count, u_int flags)
3421 {
3422 	struct pci_devinfo *dinfo = device_get_ivars(child);
3423 	struct resource_list *rl = &dinfo->resources;
3424 	struct resource_list_entry *rle;
3425 	struct resource *res;
3426 	pci_addr_t map, testval;
3427 	uint16_t cmd;
3428 	int maprange, mapsize;
3429 
3430 	/*
3431 	 * Weed out the bogons, and figure out how large the BAR/map
3432 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3433 	 * Note: atapci in legacy mode are special and handled elsewhere
3434 	 * in the code.  If you have a atapci device in legacy mode and
3435 	 * it fails here, that other code is broken.
3436 	 */
3437 	res = NULL;
3438 	map = pci_read_config(child, *rid, 4);
3439 	maprange = pci_maprange(map);
3440 	if (maprange == 64)
3441 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3442 
3443 	/*
3444 	 * Disable decoding via the command register before
3445 	 * determining the BAR's length since we will be placing it in
3446 	 * a weird state.
3447 	 */
3448 	cmd = pci_read_config(child, PCIR_COMMAND, 2);
3449 	pci_write_config(child, PCIR_COMMAND,
3450 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
3451 
3452 	/* Determine the BAR's length. */
3453 	pci_write_config(child, *rid, 0xffffffff, 4);
3454 	testval = pci_read_config(child, *rid, 4);
3455 	if (maprange == 64) {
3456 		pci_write_config(child, *rid + 4, 0xffffffff, 4);
3457 		testval |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) <<
3458 		    32;
3459 	}
3460 
3461 	/*
3462 	 * Restore the original value of the BAR.  We may have reprogrammed
3463 	 * the BAR of the low-level console device and when booting verbose,
3464 	 * we need the console device addressable.
3465 	 */
3466 	pci_write_config(child, *rid, map, 4);
3467 	if (maprange == 64)
3468 		pci_write_config(child, *rid + 4, map >> 32, 4);
3469 	pci_write_config(child, PCIR_COMMAND, cmd, 2);
3470 
3471 	/* Ignore a BAR with a base of 0. */
3472 	if (pci_mapbase(testval) == 0)
3473 		goto out;
3474 
3475 	if (PCI_BAR_MEM(testval)) {
3476 		if (type != SYS_RES_MEMORY) {
3477 			if (bootverbose)
3478 				device_printf(dev,
3479 				    "child %s requested type %d for rid %#x,"
3480 				    " but the BAR says it is an memio\n",
3481 				    device_get_nameunit(child), type, *rid);
3482 			goto out;
3483 		}
3484 	} else {
3485 		if (type != SYS_RES_IOPORT) {
3486 			if (bootverbose)
3487 				device_printf(dev,
3488 				    "child %s requested type %d for rid %#x,"
3489 				    " but the BAR says it is an ioport\n",
3490 				    device_get_nameunit(child), type, *rid);
3491 			goto out;
3492 		}
3493 	}
3494 	/*
3495 	 * For real BARs, we need to override the size that
3496 	 * the driver requests, because that's what the BAR
3497 	 * actually uses and we would otherwise have a
3498 	 * situation where we might allocate the excess to
3499 	 * another driver, which won't work.
3500 	 */
3501 	mapsize = pci_mapsize(testval);
3502 	count = 1UL << mapsize;
3503 	if (RF_ALIGNMENT(flags) < mapsize)
3504 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3505 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3506 		flags |= RF_PREFETCHABLE;
3507 
3508 	/*
3509 	 * Allocate enough resource, and then write back the
3510 	 * appropriate bar for that resource.
3511 	 */
3512 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3513 	    start, end, count, flags & ~RF_ACTIVE);
3514 	if (res == NULL) {
3515 		device_printf(child,
3516 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3517 		    count, *rid, type, start, end);
3518 		goto out;
3519 	}
3520 	rman_set_device(res, dev);
3521 	resource_list_add(rl, type, *rid, start, end, count);
3522 	rle = resource_list_find(rl, type, *rid);
3523 	if (rle == NULL)
3524 		panic("pci_alloc_map: unexpectedly can't find resource.");
3525 	rle->res = res;
3526 	rle->start = rman_get_start(res);
3527 	rle->end = rman_get_end(res);
3528 	rle->count = count;
3529 	if (bootverbose)
3530 		device_printf(child,
3531 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3532 		    count, *rid, type, rman_get_start(res));
3533 	map = rman_get_start(res);
3534 	pci_write_config(child, *rid, map, 4);
3535 	if (maprange == 64)
3536 		pci_write_config(child, *rid + 4, map >> 32, 4);
3537 out:;
3538 	return (res);
3539 }
3540 
3541 
3542 struct resource *
3543 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3544 		   u_long start, u_long end, u_long count, u_int flags)
3545 {
3546 	struct pci_devinfo *dinfo = device_get_ivars(child);
3547 	struct resource_list *rl = &dinfo->resources;
3548 	struct resource_list_entry *rle;
3549 	struct resource *res;
3550 	pcicfgregs *cfg = &dinfo->cfg;
3551 
3552 	if (device_get_parent(child) != dev)
3553 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3554 		    type, rid, start, end, count, flags));
3555 
3556 	/*
3557 	 * Perform lazy resource allocation
3558 	 */
3559 	switch (type) {
3560 	case SYS_RES_IRQ:
3561 		/*
3562 		 * Can't alloc legacy interrupt once MSI messages have
3563 		 * been allocated.
3564 		 */
3565 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3566 		    cfg->msix.msix_alloc > 0))
3567 			return (NULL);
3568 
3569 		/*
3570 		 * If the child device doesn't have an interrupt
3571 		 * routed and is deserving of an interrupt, try to
3572 		 * assign it one.
3573 		 */
3574 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3575 		    (cfg->intpin != 0))
3576 			pci_assign_interrupt(dev, child, 0);
3577 		break;
3578 	case SYS_RES_IOPORT:
3579 	case SYS_RES_MEMORY:
3580 		/* Allocate resources for this BAR if needed. */
3581 		rle = resource_list_find(rl, type, *rid);
3582 		if (rle == NULL) {
3583 			res = pci_alloc_map(dev, child, type, rid, start, end,
3584 			    count, flags);
3585 			if (res == NULL)
3586 				return (NULL);
3587 			rle = resource_list_find(rl, type, *rid);
3588 		}
3589 
3590 		/*
3591 		 * If the resource belongs to the bus, then give it to
3592 		 * the child.  We need to activate it if requested
3593 		 * since the bus always allocates inactive resources.
3594 		 */
3595 		if (rle != NULL && rle->res != NULL &&
3596 		    rman_get_device(rle->res) == dev) {
3597 			if (bootverbose)
3598 				device_printf(child,
3599 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3600 				    rman_get_size(rle->res), *rid, type,
3601 				    rman_get_start(rle->res));
3602 			rman_set_device(rle->res, child);
3603 			if ((flags & RF_ACTIVE) &&
3604 			    bus_activate_resource(child, type, *rid,
3605 			    rle->res) != 0)
3606 				return (NULL);
3607 			return (rle->res);
3608 		}
3609 	}
3610 	return (resource_list_alloc(rl, dev, child, type, rid,
3611 	    start, end, count, flags));
3612 }
3613 
3614 int
3615 pci_release_resource(device_t dev, device_t child, int type, int rid,
3616     struct resource *r)
3617 {
3618 	int error;
3619 
3620 	if (device_get_parent(child) != dev)
3621 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3622 		    type, rid, r));
3623 
3624 	/*
3625 	 * For BARs we don't actually want to release the resource.
3626 	 * Instead, we deactivate the resource if needed and then give
3627 	 * ownership of the BAR back to the bus.
3628 	 */
3629 	switch (type) {
3630 	case SYS_RES_IOPORT:
3631 	case SYS_RES_MEMORY:
3632 		if (rman_get_device(r) != child)
3633 			return (EINVAL);
3634 		if (rman_get_flags(r) & RF_ACTIVE) {
3635 			error = bus_deactivate_resource(child, type, rid, r);
3636 			if (error)
3637 				return (error);
3638 		}
3639 		rman_set_device(r, dev);
3640 		return (0);
3641 	}
3642 	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3643 }
3644 
3645 int
3646 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3647     struct resource *r)
3648 {
3649 	int error;
3650 
3651 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3652 	if (error)
3653 		return (error);
3654 
3655 	/* Enable decoding in the command register when activating BARs. */
3656 	if (device_get_parent(child) == dev) {
3657 		switch (type) {
3658 		case SYS_RES_IOPORT:
3659 		case SYS_RES_MEMORY:
3660 			error = PCI_ENABLE_IO(dev, child, type);
3661 			break;
3662 		}
3663 	}
3664 	return (error);
3665 }
3666 
3667 void
3668 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3669 {
3670 	struct pci_devinfo *dinfo;
3671 	struct resource_list *rl;
3672 	struct resource_list_entry *rle;
3673 
3674 	if (device_get_parent(child) != dev)
3675 		return;
3676 
3677 	dinfo = device_get_ivars(child);
3678 	rl = &dinfo->resources;
3679 	rle = resource_list_find(rl, type, rid);
3680 	if (rle == NULL)
3681 		return;
3682 
3683 	if (rle->res) {
3684 		if (rman_get_device(rle->res) != dev ||
3685 		    rman_get_flags(rle->res) & RF_ACTIVE) {
3686 			device_printf(dev, "delete_resource: "
3687 			    "Resource still owned by child, oops. "
3688 			    "(type=%d, rid=%d, addr=%lx)\n",
3689 			    rle->type, rle->rid,
3690 			    rman_get_start(rle->res));
3691 			return;
3692 		}
3693 
3694 		/*
3695 		 * If this is a BAR, clear the BAR so it stops
3696 		 * decoding before releasing the resource.
3697 		 */
3698 		switch (type) {
3699 		case SYS_RES_IOPORT:
3700 		case SYS_RES_MEMORY:
3701 			/* XXX: 64-bit BARs? */
3702 			pci_write_config(child, rid, 0, 4);
3703 			break;
3704 		}
3705 		bus_release_resource(dev, type, rid, rle->res);
3706 	}
3707 	resource_list_delete(rl, type, rid);
3708 }
3709 
3710 struct resource_list *
3711 pci_get_resource_list (device_t dev, device_t child)
3712 {
3713 	struct pci_devinfo *dinfo = device_get_ivars(child);
3714 
3715 	return (&dinfo->resources);
3716 }
3717 
3718 uint32_t
3719 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3720 {
3721 	struct pci_devinfo *dinfo = device_get_ivars(child);
3722 	pcicfgregs *cfg = &dinfo->cfg;
3723 
3724 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3725 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3726 }
3727 
3728 void
3729 pci_write_config_method(device_t dev, device_t child, int reg,
3730     uint32_t val, int width)
3731 {
3732 	struct pci_devinfo *dinfo = device_get_ivars(child);
3733 	pcicfgregs *cfg = &dinfo->cfg;
3734 
3735 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3736 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3737 }
3738 
3739 int
3740 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3741     size_t buflen)
3742 {
3743 
3744 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3745 	    pci_get_function(child));
3746 	return (0);
3747 }
3748 
3749 int
3750 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3751     size_t buflen)
3752 {
3753 	struct pci_devinfo *dinfo;
3754 	pcicfgregs *cfg;
3755 
3756 	dinfo = device_get_ivars(child);
3757 	cfg = &dinfo->cfg;
3758 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3759 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3760 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3761 	    cfg->progif);
3762 	return (0);
3763 }
3764 
3765 int
3766 pci_assign_interrupt_method(device_t dev, device_t child)
3767 {
3768 	struct pci_devinfo *dinfo = device_get_ivars(child);
3769 	pcicfgregs *cfg = &dinfo->cfg;
3770 
3771 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3772 	    cfg->intpin));
3773 }
3774 
3775 static int
3776 pci_modevent(module_t mod, int what, void *arg)
3777 {
3778 	static struct cdev *pci_cdev;
3779 
3780 	switch (what) {
3781 	case MOD_LOAD:
3782 		STAILQ_INIT(&pci_devq);
3783 		pci_generation = 0;
3784 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3785 		    "pci");
3786 		pci_load_vendor_data();
3787 		break;
3788 
3789 	case MOD_UNLOAD:
3790 		destroy_dev(pci_cdev);
3791 		break;
3792 	}
3793 
3794 	return (0);
3795 }
3796 
3797 void
3798 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3799 {
3800 	int i;
3801 
3802 	/*
3803 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3804 	 * which we know need special treatment.  Type 2 devices are
3805 	 * cardbus bridges which also require special treatment.
3806 	 * Other types are unknown, and we err on the side of safety
3807 	 * by ignoring them.
3808 	 */
3809 	if (dinfo->cfg.hdrtype != 0)
3810 		return;
3811 
3812 	/*
3813 	 * Restore the device to full power mode.  We must do this
3814 	 * before we restore the registers because moving from D3 to
3815 	 * D0 will cause the chip's BARs and some other registers to
3816 	 * be reset to some unknown power on reset values.  Cut down
3817 	 * the noise on boot by doing nothing if we are already in
3818 	 * state D0.
3819 	 */
3820 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3821 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3822 	}
3823 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3824 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3825 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3826 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3827 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3828 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3829 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3830 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3831 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3832 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3833 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3834 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3835 
3836 	/* Restore MSI and MSI-X configurations if they are present. */
3837 	if (dinfo->cfg.msi.msi_location != 0)
3838 		pci_resume_msi(dev);
3839 	if (dinfo->cfg.msix.msix_location != 0)
3840 		pci_resume_msix(dev);
3841 }
3842 
3843 void
3844 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3845 {
3846 	int i;
3847 	uint32_t cls;
3848 	int ps;
3849 
3850 	/*
3851 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3852 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3853 	 * which also require special treatment.  Other types are unknown, and
3854 	 * we err on the side of safety by ignoring them.  Powering down
3855 	 * bridges should not be undertaken lightly.
3856 	 */
3857 	if (dinfo->cfg.hdrtype != 0)
3858 		return;
3859 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3860 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3861 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3862 
3863 	/*
3864 	 * Some drivers apparently write to these registers w/o updating our
3865 	 * cached copy.  No harm happens if we update the copy, so do so here
3866 	 * so we can restore them.  The COMMAND register is modified by the
3867 	 * bus w/o updating the cache.  This should represent the normally
3868 	 * writable portion of the 'defined' part of type 0 headers.  In
3869 	 * theory we also need to save/restore the PCI capability structures
3870 	 * we know about, but apart from power we don't know any that are
3871 	 * writable.
3872 	 */
3873 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3874 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3875 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3876 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3877 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3878 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3879 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3880 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3881 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3882 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3883 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3884 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3885 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3886 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3887 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3888 
3889 	/*
3890 	 * don't set the state for display devices, base peripherals and
3891 	 * memory devices since bad things happen when they are powered down.
3892 	 * We should (a) have drivers that can easily detach and (b) use
3893 	 * generic drivers for these devices so that some device actually
3894 	 * attaches.  We need to make sure that when we implement (a) we don't
3895 	 * power the device down on a reattach.
3896 	 */
3897 	cls = pci_get_class(dev);
3898 	if (!setstate)
3899 		return;
3900 	switch (pci_do_power_nodriver)
3901 	{
3902 		case 0:		/* NO powerdown at all */
3903 			return;
3904 		case 1:		/* Conservative about what to power down */
3905 			if (cls == PCIC_STORAGE)
3906 				return;
3907 			/*FALLTHROUGH*/
3908 		case 2:		/* Agressive about what to power down */
3909 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3910 			    cls == PCIC_BASEPERIPH)
3911 				return;
3912 			/*FALLTHROUGH*/
3913 		case 3:		/* Power down everything */
3914 			break;
3915 	}
3916 	/*
3917 	 * PCI spec says we can only go into D3 state from D0 state.
3918 	 * Transition from D[12] into D0 before going to D3 state.
3919 	 */
3920 	ps = pci_get_powerstate(dev);
3921 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3922 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3923 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3924 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3925 }
3926