xref: /freebsd/sys/dev/pci/pci.c (revision c0020399a650364d0134f79f3fa319f84064372d)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static pci_addr_t	pci_mapbase(uint64_t mapreg);
75 static const char	*pci_maptype(uint64_t mapreg);
76 static int		pci_mapsize(uint64_t testval);
77 static int		pci_maprange(uint64_t mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t dev);
81 static int		pci_memen(device_t dev);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t bus, device_t dev, int reg,
85 			    struct resource_list *rl, int force, int prefetch);
86 static int		pci_probe(device_t dev);
87 static int		pci_attach(device_t dev);
88 static void		pci_load_vendor_data(void);
89 static int		pci_describe_parse_line(char **ptr, int *vendor,
90 			    int *device, char **desc);
91 static char		*pci_describe_device(device_t dev);
92 static int		pci_modevent(module_t mod, int what, void *arg);
93 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
94 			    pcicfgregs *cfg);
95 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
96 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
97 			    int reg, uint32_t *data);
98 #if 0
99 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
100 			    int reg, uint32_t data);
101 #endif
102 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
103 static void		pci_disable_msi(device_t dev);
104 static void		pci_enable_msi(device_t dev, uint64_t address,
105 			    uint16_t data);
106 static void		pci_enable_msix(device_t dev, u_int index,
107 			    uint64_t address, uint32_t data);
108 static void		pci_mask_msix(device_t dev, u_int index);
109 static void		pci_unmask_msix(device_t dev, u_int index);
110 static int		pci_msi_blacklisted(void);
111 static void		pci_resume_msi(device_t dev);
112 static void		pci_resume_msix(device_t dev);
113 
114 static device_method_t pci_methods[] = {
115 	/* Device interface */
116 	DEVMETHOD(device_probe,		pci_probe),
117 	DEVMETHOD(device_attach,	pci_attach),
118 	DEVMETHOD(device_detach,	bus_generic_detach),
119 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
120 	DEVMETHOD(device_suspend,	pci_suspend),
121 	DEVMETHOD(device_resume,	pci_resume),
122 
123 	/* Bus interface */
124 	DEVMETHOD(bus_print_child,	pci_print_child),
125 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
126 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
127 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
128 	DEVMETHOD(bus_driver_added,	pci_driver_added),
129 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
130 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
131 
132 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
133 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
134 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
135 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
136 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
137 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
138 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
139 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
140 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
141 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
142 
143 	/* PCI interface */
144 	DEVMETHOD(pci_read_config,	pci_read_config_method),
145 	DEVMETHOD(pci_write_config,	pci_write_config_method),
146 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
147 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
148 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
149 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
150 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
151 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
152 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
153 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
154 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
155 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
156 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
157 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
158 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
159 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
160 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
161 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
162 
163 	{ 0, 0 }
164 };
165 
166 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
167 
168 static devclass_t pci_devclass;
169 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
170 MODULE_VERSION(pci, 1);
171 
172 static char	*pci_vendordata;
173 static size_t	pci_vendordata_size;
174 
175 
176 struct pci_quirk {
177 	uint32_t devid;	/* Vendor/device of the card */
178 	int	type;
179 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
180 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
181 	int	arg1;
182 	int	arg2;
183 };
184 
185 struct pci_quirk pci_quirks[] = {
186 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
187 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
188 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
190 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
191 
192 	/*
193 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
194 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
195 	 */
196 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 
199 	/*
200 	 * MSI doesn't work on earlier Intel chipsets including
201 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
202 	 */
203 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
204 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 
211 	/*
212 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
213 	 * bridge.
214 	 */
215 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 
217 	{ 0 }
218 };
219 
220 /* map register information */
221 #define	PCI_MAPMEM	0x01	/* memory map */
222 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
223 #define	PCI_MAPPORT	0x04	/* port map */
224 
225 struct devlist pci_devq;
226 uint32_t pci_generation;
227 uint32_t pci_numdevs = 0;
228 static int pcie_chipset, pcix_chipset;
229 
230 /* sysctl vars */
231 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
232 
233 static int pci_enable_io_modes = 1;
234 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
235 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
236     &pci_enable_io_modes, 1,
237     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
238 enable these bits correctly.  We'd like to do this all the time, but there\n\
239 are some peripherals that this causes problems with.");
240 
241 static int pci_do_power_nodriver = 0;
242 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
243 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
244     &pci_do_power_nodriver, 0,
245   "Place a function into D3 state when no driver attaches to it.  0 means\n\
246 disable.  1 means conservatively place devices into D3 state.  2 means\n\
247 agressively place devices into D3 state.  3 means put absolutely everything\n\
248 in D3 state.");
249 
250 static int pci_do_power_resume = 1;
251 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
252 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
253     &pci_do_power_resume, 1,
254   "Transition from D3 -> D0 on resume.");
255 
256 static int pci_do_msi = 1;
257 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
258 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
259     "Enable support for MSI interrupts");
260 
261 static int pci_do_msix = 1;
262 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
263 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
264     "Enable support for MSI-X interrupts");
265 
266 static int pci_honor_msi_blacklist = 1;
267 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
268 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
269     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
270 
271 /* Find a device_t by bus/slot/function in domain 0 */
272 
273 device_t
274 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
275 {
276 
277 	return (pci_find_dbsf(0, bus, slot, func));
278 }
279 
280 /* Find a device_t by domain/bus/slot/function */
281 
282 device_t
283 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
284 {
285 	struct pci_devinfo *dinfo;
286 
287 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
288 		if ((dinfo->cfg.domain == domain) &&
289 		    (dinfo->cfg.bus == bus) &&
290 		    (dinfo->cfg.slot == slot) &&
291 		    (dinfo->cfg.func == func)) {
292 			return (dinfo->cfg.dev);
293 		}
294 	}
295 
296 	return (NULL);
297 }
298 
299 /* Find a device_t by vendor/device ID */
300 
301 device_t
302 pci_find_device(uint16_t vendor, uint16_t device)
303 {
304 	struct pci_devinfo *dinfo;
305 
306 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
307 		if ((dinfo->cfg.vendor == vendor) &&
308 		    (dinfo->cfg.device == device)) {
309 			return (dinfo->cfg.dev);
310 		}
311 	}
312 
313 	return (NULL);
314 }
315 
316 /* return base address of memory or port map */
317 
318 static pci_addr_t
319 pci_mapbase(uint64_t mapreg)
320 {
321 
322 	if (PCI_BAR_MEM(mapreg))
323 		return (mapreg & PCIM_BAR_MEM_BASE);
324 	else
325 		return (mapreg & PCIM_BAR_IO_BASE);
326 }
327 
328 /* return map type of memory or port map */
329 
330 static const char *
331 pci_maptype(uint64_t mapreg)
332 {
333 
334 	if (PCI_BAR_IO(mapreg))
335 		return ("I/O Port");
336 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
337 		return ("Prefetchable Memory");
338 	return ("Memory");
339 }
340 
341 /* return log2 of map size decoded for memory or port map */
342 
343 static int
344 pci_mapsize(uint64_t testval)
345 {
346 	int ln2size;
347 
348 	testval = pci_mapbase(testval);
349 	ln2size = 0;
350 	if (testval != 0) {
351 		while ((testval & 1) == 0)
352 		{
353 			ln2size++;
354 			testval >>= 1;
355 		}
356 	}
357 	return (ln2size);
358 }
359 
360 /* return log2 of address range supported by map register */
361 
362 static int
363 pci_maprange(uint64_t mapreg)
364 {
365 	int ln2range = 0;
366 
367 	if (PCI_BAR_IO(mapreg))
368 		ln2range = 32;
369 	else
370 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
371 		case PCIM_BAR_MEM_32:
372 			ln2range = 32;
373 			break;
374 		case PCIM_BAR_MEM_1MB:
375 			ln2range = 20;
376 			break;
377 		case PCIM_BAR_MEM_64:
378 			ln2range = 64;
379 			break;
380 		}
381 	return (ln2range);
382 }
383 
384 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
385 
386 static void
387 pci_fixancient(pcicfgregs *cfg)
388 {
389 	if (cfg->hdrtype != 0)
390 		return;
391 
392 	/* PCI to PCI bridges use header type 1 */
393 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
394 		cfg->hdrtype = 1;
395 }
396 
397 /* extract header type specific config data */
398 
399 static void
400 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
401 {
402 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
403 	switch (cfg->hdrtype) {
404 	case 0:
405 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
406 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
407 		cfg->nummaps	    = PCI_MAXMAPS_0;
408 		break;
409 	case 1:
410 		cfg->nummaps	    = PCI_MAXMAPS_1;
411 		break;
412 	case 2:
413 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
414 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
415 		cfg->nummaps	    = PCI_MAXMAPS_2;
416 		break;
417 	}
418 #undef REG
419 }
420 
421 /* read configuration header into pcicfgregs structure */
422 struct pci_devinfo *
423 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
424 {
425 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
426 	pcicfgregs *cfg = NULL;
427 	struct pci_devinfo *devlist_entry;
428 	struct devlist *devlist_head;
429 
430 	devlist_head = &pci_devq;
431 
432 	devlist_entry = NULL;
433 
434 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
435 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
436 		if (devlist_entry == NULL)
437 			return (NULL);
438 
439 		cfg = &devlist_entry->cfg;
440 
441 		cfg->domain		= d;
442 		cfg->bus		= b;
443 		cfg->slot		= s;
444 		cfg->func		= f;
445 		cfg->vendor		= REG(PCIR_VENDOR, 2);
446 		cfg->device		= REG(PCIR_DEVICE, 2);
447 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
448 		cfg->statreg		= REG(PCIR_STATUS, 2);
449 		cfg->baseclass		= REG(PCIR_CLASS, 1);
450 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
451 		cfg->progif		= REG(PCIR_PROGIF, 1);
452 		cfg->revid		= REG(PCIR_REVID, 1);
453 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
454 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
455 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
456 		cfg->intpin		= REG(PCIR_INTPIN, 1);
457 		cfg->intline		= REG(PCIR_INTLINE, 1);
458 
459 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
460 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
461 
462 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
463 		cfg->hdrtype		&= ~PCIM_MFDEV;
464 
465 		pci_fixancient(cfg);
466 		pci_hdrtypedata(pcib, b, s, f, cfg);
467 
468 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
469 			pci_read_extcap(pcib, cfg);
470 
471 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
472 
473 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
474 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
475 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
476 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
477 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
478 
479 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
480 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
481 		devlist_entry->conf.pc_vendor = cfg->vendor;
482 		devlist_entry->conf.pc_device = cfg->device;
483 
484 		devlist_entry->conf.pc_class = cfg->baseclass;
485 		devlist_entry->conf.pc_subclass = cfg->subclass;
486 		devlist_entry->conf.pc_progif = cfg->progif;
487 		devlist_entry->conf.pc_revid = cfg->revid;
488 
489 		pci_numdevs++;
490 		pci_generation++;
491 	}
492 	return (devlist_entry);
493 #undef REG
494 }
495 
496 static void
497 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
498 {
499 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
500 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
501 #if defined(__i386__) || defined(__amd64__)
502 	uint64_t addr;
503 #endif
504 	uint32_t val;
505 	int	ptr, nextptr, ptrptr;
506 
507 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
508 	case 0:
509 	case 1:
510 		ptrptr = PCIR_CAP_PTR;
511 		break;
512 	case 2:
513 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
514 		break;
515 	default:
516 		return;		/* no extended capabilities support */
517 	}
518 	nextptr = REG(ptrptr, 1);	/* sanity check? */
519 
520 	/*
521 	 * Read capability entries.
522 	 */
523 	while (nextptr != 0) {
524 		/* Sanity check */
525 		if (nextptr > 255) {
526 			printf("illegal PCI extended capability offset %d\n",
527 			    nextptr);
528 			return;
529 		}
530 		/* Find the next entry */
531 		ptr = nextptr;
532 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
533 
534 		/* Process this entry */
535 		switch (REG(ptr + PCICAP_ID, 1)) {
536 		case PCIY_PMG:		/* PCI power management */
537 			if (cfg->pp.pp_cap == 0) {
538 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
539 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
540 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
541 				if ((nextptr - ptr) > PCIR_POWER_DATA)
542 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
543 			}
544 			break;
545 #if defined(__i386__) || defined(__amd64__)
546 		case PCIY_HT:		/* HyperTransport */
547 			/* Determine HT-specific capability type. */
548 			val = REG(ptr + PCIR_HT_COMMAND, 2);
549 			switch (val & PCIM_HTCMD_CAP_MASK) {
550 			case PCIM_HTCAP_MSI_MAPPING:
551 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
552 					/* Sanity check the mapping window. */
553 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
554 					    4);
555 					addr <<= 32;
556 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
557 					    4);
558 					if (addr != MSI_INTEL_ADDR_BASE)
559 						device_printf(pcib,
560 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
561 						    cfg->domain, cfg->bus,
562 						    cfg->slot, cfg->func,
563 						    (long long)addr);
564 				} else
565 					addr = MSI_INTEL_ADDR_BASE;
566 
567 				cfg->ht.ht_msimap = ptr;
568 				cfg->ht.ht_msictrl = val;
569 				cfg->ht.ht_msiaddr = addr;
570 				break;
571 			}
572 			break;
573 #endif
574 		case PCIY_MSI:		/* PCI MSI */
575 			cfg->msi.msi_location = ptr;
576 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
577 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
578 						     PCIM_MSICTRL_MMC_MASK)>>1);
579 			break;
580 		case PCIY_MSIX:		/* PCI MSI-X */
581 			cfg->msix.msix_location = ptr;
582 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
583 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
584 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
585 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
586 			cfg->msix.msix_table_bar = PCIR_BAR(val &
587 			    PCIM_MSIX_BIR_MASK);
588 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
589 			val = REG(ptr + PCIR_MSIX_PBA, 4);
590 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
591 			    PCIM_MSIX_BIR_MASK);
592 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
593 			break;
594 		case PCIY_VPD:		/* PCI Vital Product Data */
595 			cfg->vpd.vpd_reg = ptr;
596 			break;
597 		case PCIY_SUBVENDOR:
598 			/* Should always be true. */
599 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
600 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
601 				cfg->subvendor = val & 0xffff;
602 				cfg->subdevice = val >> 16;
603 			}
604 			break;
605 		case PCIY_PCIX:		/* PCI-X */
606 			/*
607 			 * Assume we have a PCI-X chipset if we have
608 			 * at least one PCI-PCI bridge with a PCI-X
609 			 * capability.  Note that some systems with
610 			 * PCI-express or HT chipsets might match on
611 			 * this check as well.
612 			 */
613 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
614 				pcix_chipset = 1;
615 			break;
616 		case PCIY_EXPRESS:	/* PCI-express */
617 			/*
618 			 * Assume we have a PCI-express chipset if we have
619 			 * at least one PCI-express device.
620 			 */
621 			pcie_chipset = 1;
622 			break;
623 		default:
624 			break;
625 		}
626 	}
627 /* REG and WREG use carry through to next functions */
628 }
629 
630 /*
631  * PCI Vital Product Data
632  */
633 
634 #define	PCI_VPD_TIMEOUT		1000000
635 
636 static int
637 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
638 {
639 	int count = PCI_VPD_TIMEOUT;
640 
641 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
642 
643 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
644 
645 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
646 		if (--count < 0)
647 			return (ENXIO);
648 		DELAY(1);	/* limit looping */
649 	}
650 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
651 
652 	return (0);
653 }
654 
655 #if 0
656 static int
657 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
658 {
659 	int count = PCI_VPD_TIMEOUT;
660 
661 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
662 
663 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
664 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
665 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
666 		if (--count < 0)
667 			return (ENXIO);
668 		DELAY(1);	/* limit looping */
669 	}
670 
671 	return (0);
672 }
673 #endif
674 
675 #undef PCI_VPD_TIMEOUT
676 
677 struct vpd_readstate {
678 	device_t	pcib;
679 	pcicfgregs	*cfg;
680 	uint32_t	val;
681 	int		bytesinval;
682 	int		off;
683 	uint8_t		cksum;
684 };
685 
686 static int
687 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
688 {
689 	uint32_t reg;
690 	uint8_t byte;
691 
692 	if (vrs->bytesinval == 0) {
693 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
694 			return (ENXIO);
695 		vrs->val = le32toh(reg);
696 		vrs->off += 4;
697 		byte = vrs->val & 0xff;
698 		vrs->bytesinval = 3;
699 	} else {
700 		vrs->val = vrs->val >> 8;
701 		byte = vrs->val & 0xff;
702 		vrs->bytesinval--;
703 	}
704 
705 	vrs->cksum += byte;
706 	*data = byte;
707 	return (0);
708 }
709 
710 static void
711 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
712 {
713 	struct vpd_readstate vrs;
714 	int state;
715 	int name;
716 	int remain;
717 	int i;
718 	int alloc, off;		/* alloc/off for RO/W arrays */
719 	int cksumvalid;
720 	int dflen;
721 	uint8_t byte;
722 	uint8_t byte2;
723 
724 	/* init vpd reader */
725 	vrs.bytesinval = 0;
726 	vrs.off = 0;
727 	vrs.pcib = pcib;
728 	vrs.cfg = cfg;
729 	vrs.cksum = 0;
730 
731 	state = 0;
732 	name = remain = i = 0;	/* shut up stupid gcc */
733 	alloc = off = 0;	/* shut up stupid gcc */
734 	dflen = 0;		/* shut up stupid gcc */
735 	cksumvalid = -1;
736 	while (state >= 0) {
737 		if (vpd_nextbyte(&vrs, &byte)) {
738 			state = -2;
739 			break;
740 		}
741 #if 0
742 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
743 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
744 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
745 #endif
746 		switch (state) {
747 		case 0:		/* item name */
748 			if (byte & 0x80) {
749 				if (vpd_nextbyte(&vrs, &byte2)) {
750 					state = -2;
751 					break;
752 				}
753 				remain = byte2;
754 				if (vpd_nextbyte(&vrs, &byte2)) {
755 					state = -2;
756 					break;
757 				}
758 				remain |= byte2 << 8;
759 				if (remain > (0x7f*4 - vrs.off)) {
760 					state = -1;
761 					printf(
762 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
763 					    cfg->domain, cfg->bus, cfg->slot,
764 					    cfg->func, remain);
765 				}
766 				name = byte & 0x7f;
767 			} else {
768 				remain = byte & 0x7;
769 				name = (byte >> 3) & 0xf;
770 			}
771 			switch (name) {
772 			case 0x2:	/* String */
773 				cfg->vpd.vpd_ident = malloc(remain + 1,
774 				    M_DEVBUF, M_WAITOK);
775 				i = 0;
776 				state = 1;
777 				break;
778 			case 0xf:	/* End */
779 				state = -1;
780 				break;
781 			case 0x10:	/* VPD-R */
782 				alloc = 8;
783 				off = 0;
784 				cfg->vpd.vpd_ros = malloc(alloc *
785 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
786 				    M_WAITOK | M_ZERO);
787 				state = 2;
788 				break;
789 			case 0x11:	/* VPD-W */
790 				alloc = 8;
791 				off = 0;
792 				cfg->vpd.vpd_w = malloc(alloc *
793 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
794 				    M_WAITOK | M_ZERO);
795 				state = 5;
796 				break;
797 			default:	/* Invalid data, abort */
798 				state = -1;
799 				break;
800 			}
801 			break;
802 
803 		case 1:	/* Identifier String */
804 			cfg->vpd.vpd_ident[i++] = byte;
805 			remain--;
806 			if (remain == 0)  {
807 				cfg->vpd.vpd_ident[i] = '\0';
808 				state = 0;
809 			}
810 			break;
811 
812 		case 2:	/* VPD-R Keyword Header */
813 			if (off == alloc) {
814 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
815 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
816 				    M_DEVBUF, M_WAITOK | M_ZERO);
817 			}
818 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
819 			if (vpd_nextbyte(&vrs, &byte2)) {
820 				state = -2;
821 				break;
822 			}
823 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
824 			if (vpd_nextbyte(&vrs, &byte2)) {
825 				state = -2;
826 				break;
827 			}
828 			dflen = byte2;
829 			if (dflen == 0 &&
830 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
831 			    2) == 0) {
832 				/*
833 				 * if this happens, we can't trust the rest
834 				 * of the VPD.
835 				 */
836 				printf(
837 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
838 				    cfg->domain, cfg->bus, cfg->slot,
839 				    cfg->func, dflen);
840 				cksumvalid = 0;
841 				state = -1;
842 				break;
843 			} else if (dflen == 0) {
844 				cfg->vpd.vpd_ros[off].value = malloc(1 *
845 				    sizeof(*cfg->vpd.vpd_ros[off].value),
846 				    M_DEVBUF, M_WAITOK);
847 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
848 			} else
849 				cfg->vpd.vpd_ros[off].value = malloc(
850 				    (dflen + 1) *
851 				    sizeof(*cfg->vpd.vpd_ros[off].value),
852 				    M_DEVBUF, M_WAITOK);
853 			remain -= 3;
854 			i = 0;
855 			/* keep in sync w/ state 3's transistions */
856 			if (dflen == 0 && remain == 0)
857 				state = 0;
858 			else if (dflen == 0)
859 				state = 2;
860 			else
861 				state = 3;
862 			break;
863 
864 		case 3:	/* VPD-R Keyword Value */
865 			cfg->vpd.vpd_ros[off].value[i++] = byte;
866 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
867 			    "RV", 2) == 0 && cksumvalid == -1) {
868 				if (vrs.cksum == 0)
869 					cksumvalid = 1;
870 				else {
871 					if (bootverbose)
872 						printf(
873 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
874 						    cfg->domain, cfg->bus,
875 						    cfg->slot, cfg->func,
876 						    vrs.cksum);
877 					cksumvalid = 0;
878 					state = -1;
879 					break;
880 				}
881 			}
882 			dflen--;
883 			remain--;
884 			/* keep in sync w/ state 2's transistions */
885 			if (dflen == 0)
886 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
887 			if (dflen == 0 && remain == 0) {
888 				cfg->vpd.vpd_rocnt = off;
889 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
890 				    off * sizeof(*cfg->vpd.vpd_ros),
891 				    M_DEVBUF, M_WAITOK | M_ZERO);
892 				state = 0;
893 			} else if (dflen == 0)
894 				state = 2;
895 			break;
896 
897 		case 4:
898 			remain--;
899 			if (remain == 0)
900 				state = 0;
901 			break;
902 
903 		case 5:	/* VPD-W Keyword Header */
904 			if (off == alloc) {
905 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
906 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
907 				    M_DEVBUF, M_WAITOK | M_ZERO);
908 			}
909 			cfg->vpd.vpd_w[off].keyword[0] = byte;
910 			if (vpd_nextbyte(&vrs, &byte2)) {
911 				state = -2;
912 				break;
913 			}
914 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
915 			if (vpd_nextbyte(&vrs, &byte2)) {
916 				state = -2;
917 				break;
918 			}
919 			cfg->vpd.vpd_w[off].len = dflen = byte2;
920 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
921 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
922 			    sizeof(*cfg->vpd.vpd_w[off].value),
923 			    M_DEVBUF, M_WAITOK);
924 			remain -= 3;
925 			i = 0;
926 			/* keep in sync w/ state 6's transistions */
927 			if (dflen == 0 && remain == 0)
928 				state = 0;
929 			else if (dflen == 0)
930 				state = 5;
931 			else
932 				state = 6;
933 			break;
934 
935 		case 6:	/* VPD-W Keyword Value */
936 			cfg->vpd.vpd_w[off].value[i++] = byte;
937 			dflen--;
938 			remain--;
939 			/* keep in sync w/ state 5's transistions */
940 			if (dflen == 0)
941 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
942 			if (dflen == 0 && remain == 0) {
943 				cfg->vpd.vpd_wcnt = off;
944 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
945 				    off * sizeof(*cfg->vpd.vpd_w),
946 				    M_DEVBUF, M_WAITOK | M_ZERO);
947 				state = 0;
948 			} else if (dflen == 0)
949 				state = 5;
950 			break;
951 
952 		default:
953 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
954 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
955 			    state);
956 			state = -1;
957 			break;
958 		}
959 	}
960 
961 	if (cksumvalid == 0 || state < -1) {
962 		/* read-only data bad, clean up */
963 		if (cfg->vpd.vpd_ros != NULL) {
964 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
965 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
966 			free(cfg->vpd.vpd_ros, M_DEVBUF);
967 			cfg->vpd.vpd_ros = NULL;
968 		}
969 	}
970 	if (state < -1) {
971 		/* I/O error, clean up */
972 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
973 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
974 		if (cfg->vpd.vpd_ident != NULL) {
975 			free(cfg->vpd.vpd_ident, M_DEVBUF);
976 			cfg->vpd.vpd_ident = NULL;
977 		}
978 		if (cfg->vpd.vpd_w != NULL) {
979 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
980 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
981 			free(cfg->vpd.vpd_w, M_DEVBUF);
982 			cfg->vpd.vpd_w = NULL;
983 		}
984 	}
985 	cfg->vpd.vpd_cached = 1;
986 #undef REG
987 #undef WREG
988 }
989 
990 int
991 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
992 {
993 	struct pci_devinfo *dinfo = device_get_ivars(child);
994 	pcicfgregs *cfg = &dinfo->cfg;
995 
996 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
997 		pci_read_vpd(device_get_parent(dev), cfg);
998 
999 	*identptr = cfg->vpd.vpd_ident;
1000 
1001 	if (*identptr == NULL)
1002 		return (ENXIO);
1003 
1004 	return (0);
1005 }
1006 
1007 int
1008 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1009 	const char **vptr)
1010 {
1011 	struct pci_devinfo *dinfo = device_get_ivars(child);
1012 	pcicfgregs *cfg = &dinfo->cfg;
1013 	int i;
1014 
1015 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1016 		pci_read_vpd(device_get_parent(dev), cfg);
1017 
1018 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1019 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1020 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1021 			*vptr = cfg->vpd.vpd_ros[i].value;
1022 		}
1023 
1024 	if (i != cfg->vpd.vpd_rocnt)
1025 		return (0);
1026 
1027 	*vptr = NULL;
1028 	return (ENXIO);
1029 }
1030 
1031 /*
1032  * Find the requested extended capability and return the offset in
1033  * configuration space via the pointer provided. The function returns
1034  * 0 on success and error code otherwise.
1035  */
1036 int
1037 pci_find_extcap_method(device_t dev, device_t child, int capability,
1038     int *capreg)
1039 {
1040 	struct pci_devinfo *dinfo = device_get_ivars(child);
1041 	pcicfgregs *cfg = &dinfo->cfg;
1042 	u_int32_t status;
1043 	u_int8_t ptr;
1044 
1045 	/*
1046 	 * Check the CAP_LIST bit of the PCI status register first.
1047 	 */
1048 	status = pci_read_config(child, PCIR_STATUS, 2);
1049 	if (!(status & PCIM_STATUS_CAPPRESENT))
1050 		return (ENXIO);
1051 
1052 	/*
1053 	 * Determine the start pointer of the capabilities list.
1054 	 */
1055 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1056 	case 0:
1057 	case 1:
1058 		ptr = PCIR_CAP_PTR;
1059 		break;
1060 	case 2:
1061 		ptr = PCIR_CAP_PTR_2;
1062 		break;
1063 	default:
1064 		/* XXX: panic? */
1065 		return (ENXIO);		/* no extended capabilities support */
1066 	}
1067 	ptr = pci_read_config(child, ptr, 1);
1068 
1069 	/*
1070 	 * Traverse the capabilities list.
1071 	 */
1072 	while (ptr != 0) {
1073 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1074 			if (capreg != NULL)
1075 				*capreg = ptr;
1076 			return (0);
1077 		}
1078 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1079 	}
1080 
1081 	return (ENOENT);
1082 }
1083 
1084 /*
1085  * Support for MSI-X message interrupts.
1086  */
1087 void
1088 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1089 {
1090 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1091 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1092 	uint32_t offset;
1093 
1094 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1095 	offset = msix->msix_table_offset + index * 16;
1096 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1097 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1098 	bus_write_4(msix->msix_table_res, offset + 8, data);
1099 
1100 	/* Enable MSI -> HT mapping. */
1101 	pci_ht_map_msi(dev, address);
1102 }
1103 
1104 void
1105 pci_mask_msix(device_t dev, u_int index)
1106 {
1107 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1108 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1109 	uint32_t offset, val;
1110 
1111 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1112 	offset = msix->msix_table_offset + index * 16 + 12;
1113 	val = bus_read_4(msix->msix_table_res, offset);
1114 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1115 		val |= PCIM_MSIX_VCTRL_MASK;
1116 		bus_write_4(msix->msix_table_res, offset, val);
1117 	}
1118 }
1119 
1120 void
1121 pci_unmask_msix(device_t dev, u_int index)
1122 {
1123 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1124 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1125 	uint32_t offset, val;
1126 
1127 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1128 	offset = msix->msix_table_offset + index * 16 + 12;
1129 	val = bus_read_4(msix->msix_table_res, offset);
1130 	if (val & PCIM_MSIX_VCTRL_MASK) {
1131 		val &= ~PCIM_MSIX_VCTRL_MASK;
1132 		bus_write_4(msix->msix_table_res, offset, val);
1133 	}
1134 }
1135 
1136 int
1137 pci_pending_msix(device_t dev, u_int index)
1138 {
1139 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1140 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1141 	uint32_t offset, bit;
1142 
1143 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1144 	offset = msix->msix_pba_offset + (index / 32) * 4;
1145 	bit = 1 << index % 32;
1146 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1147 }
1148 
1149 /*
1150  * Restore MSI-X registers and table during resume.  If MSI-X is
1151  * enabled then walk the virtual table to restore the actual MSI-X
1152  * table.
1153  */
1154 static void
1155 pci_resume_msix(device_t dev)
1156 {
1157 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1158 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1159 	struct msix_table_entry *mte;
1160 	struct msix_vector *mv;
1161 	int i;
1162 
1163 	if (msix->msix_alloc > 0) {
1164 		/* First, mask all vectors. */
1165 		for (i = 0; i < msix->msix_msgnum; i++)
1166 			pci_mask_msix(dev, i);
1167 
1168 		/* Second, program any messages with at least one handler. */
1169 		for (i = 0; i < msix->msix_table_len; i++) {
1170 			mte = &msix->msix_table[i];
1171 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1172 				continue;
1173 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1174 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1175 			pci_unmask_msix(dev, i);
1176 		}
1177 	}
1178 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1179 	    msix->msix_ctrl, 2);
1180 }
1181 
1182 /*
1183  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1184  * returned in *count.  After this function returns, each message will be
1185  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1186  */
1187 int
1188 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1189 {
1190 	struct pci_devinfo *dinfo = device_get_ivars(child);
1191 	pcicfgregs *cfg = &dinfo->cfg;
1192 	struct resource_list_entry *rle;
1193 	int actual, error, i, irq, max;
1194 
1195 	/* Don't let count == 0 get us into trouble. */
1196 	if (*count == 0)
1197 		return (EINVAL);
1198 
1199 	/* If rid 0 is allocated, then fail. */
1200 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1201 	if (rle != NULL && rle->res != NULL)
1202 		return (ENXIO);
1203 
1204 	/* Already have allocated messages? */
1205 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1206 		return (ENXIO);
1207 
1208 	/* If MSI is blacklisted for this system, fail. */
1209 	if (pci_msi_blacklisted())
1210 		return (ENXIO);
1211 
1212 	/* MSI-X capability present? */
1213 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1214 		return (ENODEV);
1215 
1216 	/* Make sure the appropriate BARs are mapped. */
1217 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1218 	    cfg->msix.msix_table_bar);
1219 	if (rle == NULL || rle->res == NULL ||
1220 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1221 		return (ENXIO);
1222 	cfg->msix.msix_table_res = rle->res;
1223 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1224 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1225 		    cfg->msix.msix_pba_bar);
1226 		if (rle == NULL || rle->res == NULL ||
1227 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1228 			return (ENXIO);
1229 	}
1230 	cfg->msix.msix_pba_res = rle->res;
1231 
1232 	if (bootverbose)
1233 		device_printf(child,
1234 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1235 		    *count, cfg->msix.msix_msgnum);
1236 	max = min(*count, cfg->msix.msix_msgnum);
1237 	for (i = 0; i < max; i++) {
1238 		/* Allocate a message. */
1239 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1240 		if (error)
1241 			break;
1242 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1243 		    irq, 1);
1244 	}
1245 	actual = i;
1246 
1247 	if (bootverbose) {
1248 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1249 		if (actual == 1)
1250 			device_printf(child, "using IRQ %lu for MSI-X\n",
1251 			    rle->start);
1252 		else {
1253 			int run;
1254 
1255 			/*
1256 			 * Be fancy and try to print contiguous runs of
1257 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1258 			 * 'run' is true if we are in a range.
1259 			 */
1260 			device_printf(child, "using IRQs %lu", rle->start);
1261 			irq = rle->start;
1262 			run = 0;
1263 			for (i = 1; i < actual; i++) {
1264 				rle = resource_list_find(&dinfo->resources,
1265 				    SYS_RES_IRQ, i + 1);
1266 
1267 				/* Still in a run? */
1268 				if (rle->start == irq + 1) {
1269 					run = 1;
1270 					irq++;
1271 					continue;
1272 				}
1273 
1274 				/* Finish previous range. */
1275 				if (run) {
1276 					printf("-%d", irq);
1277 					run = 0;
1278 				}
1279 
1280 				/* Start new range. */
1281 				printf(",%lu", rle->start);
1282 				irq = rle->start;
1283 			}
1284 
1285 			/* Unfinished range? */
1286 			if (run)
1287 				printf("-%d", irq);
1288 			printf(" for MSI-X\n");
1289 		}
1290 	}
1291 
1292 	/* Mask all vectors. */
1293 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1294 		pci_mask_msix(child, i);
1295 
1296 	/* Allocate and initialize vector data and virtual table. */
1297 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1298 	    M_DEVBUF, M_WAITOK | M_ZERO);
1299 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1300 	    M_DEVBUF, M_WAITOK | M_ZERO);
1301 	for (i = 0; i < actual; i++) {
1302 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1303 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1304 		cfg->msix.msix_table[i].mte_vector = i + 1;
1305 	}
1306 
1307 	/* Update control register to enable MSI-X. */
1308 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1309 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1310 	    cfg->msix.msix_ctrl, 2);
1311 
1312 	/* Update counts of alloc'd messages. */
1313 	cfg->msix.msix_alloc = actual;
1314 	cfg->msix.msix_table_len = actual;
1315 	*count = actual;
1316 	return (0);
1317 }
1318 
1319 /*
1320  * By default, pci_alloc_msix() will assign the allocated IRQ
1321  * resources consecutively to the first N messages in the MSI-X table.
1322  * However, device drivers may want to use different layouts if they
1323  * either receive fewer messages than they asked for, or they wish to
1324  * populate the MSI-X table sparsely.  This method allows the driver
1325  * to specify what layout it wants.  It must be called after a
1326  * successful pci_alloc_msix() but before any of the associated
1327  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1328  *
1329  * The 'vectors' array contains 'count' message vectors.  The array
1330  * maps directly to the MSI-X table in that index 0 in the array
1331  * specifies the vector for the first message in the MSI-X table, etc.
1332  * The vector value in each array index can either be 0 to indicate
1333  * that no vector should be assigned to a message slot, or it can be a
1334  * number from 1 to N (where N is the count returned from a
1335  * succcessful call to pci_alloc_msix()) to indicate which message
1336  * vector (IRQ) to be used for the corresponding message.
1337  *
1338  * On successful return, each message with a non-zero vector will have
1339  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1340  * 1.  Additionally, if any of the IRQs allocated via the previous
1341  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1342  * will be freed back to the system automatically.
1343  *
1344  * For example, suppose a driver has a MSI-X table with 6 messages and
1345  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1346  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1347  * C.  After the call to pci_alloc_msix(), the device will be setup to
1348  * have an MSI-X table of ABC--- (where - means no vector assigned).
1349  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1350  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1351  * be freed back to the system.  This device will also have valid
1352  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1353  *
1354  * In any case, the SYS_RES_IRQ rid X will always map to the message
1355  * at MSI-X table index X - 1 and will only be valid if a vector is
1356  * assigned to that table entry.
1357  */
1358 int
1359 pci_remap_msix_method(device_t dev, device_t child, int count,
1360     const u_int *vectors)
1361 {
1362 	struct pci_devinfo *dinfo = device_get_ivars(child);
1363 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1364 	struct resource_list_entry *rle;
1365 	int i, irq, j, *used;
1366 
1367 	/*
1368 	 * Have to have at least one message in the table but the
1369 	 * table can't be bigger than the actual MSI-X table in the
1370 	 * device.
1371 	 */
1372 	if (count == 0 || count > msix->msix_msgnum)
1373 		return (EINVAL);
1374 
1375 	/* Sanity check the vectors. */
1376 	for (i = 0; i < count; i++)
1377 		if (vectors[i] > msix->msix_alloc)
1378 			return (EINVAL);
1379 
1380 	/*
1381 	 * Make sure there aren't any holes in the vectors to be used.
1382 	 * It's a big pain to support it, and it doesn't really make
1383 	 * sense anyway.  Also, at least one vector must be used.
1384 	 */
1385 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1386 	    M_ZERO);
1387 	for (i = 0; i < count; i++)
1388 		if (vectors[i] != 0)
1389 			used[vectors[i] - 1] = 1;
1390 	for (i = 0; i < msix->msix_alloc - 1; i++)
1391 		if (used[i] == 0 && used[i + 1] == 1) {
1392 			free(used, M_DEVBUF);
1393 			return (EINVAL);
1394 		}
1395 	if (used[0] != 1) {
1396 		free(used, M_DEVBUF);
1397 		return (EINVAL);
1398 	}
1399 
1400 	/* Make sure none of the resources are allocated. */
1401 	for (i = 0; i < msix->msix_table_len; i++) {
1402 		if (msix->msix_table[i].mte_vector == 0)
1403 			continue;
1404 		if (msix->msix_table[i].mte_handlers > 0)
1405 			return (EBUSY);
1406 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1407 		KASSERT(rle != NULL, ("missing resource"));
1408 		if (rle->res != NULL)
1409 			return (EBUSY);
1410 	}
1411 
1412 	/* Free the existing resource list entries. */
1413 	for (i = 0; i < msix->msix_table_len; i++) {
1414 		if (msix->msix_table[i].mte_vector == 0)
1415 			continue;
1416 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1417 	}
1418 
1419 	/*
1420 	 * Build the new virtual table keeping track of which vectors are
1421 	 * used.
1422 	 */
1423 	free(msix->msix_table, M_DEVBUF);
1424 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1425 	    M_DEVBUF, M_WAITOK | M_ZERO);
1426 	for (i = 0; i < count; i++)
1427 		msix->msix_table[i].mte_vector = vectors[i];
1428 	msix->msix_table_len = count;
1429 
1430 	/* Free any unused IRQs and resize the vectors array if necessary. */
1431 	j = msix->msix_alloc - 1;
1432 	if (used[j] == 0) {
1433 		struct msix_vector *vec;
1434 
1435 		while (used[j] == 0) {
1436 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1437 			    msix->msix_vectors[j].mv_irq);
1438 			j--;
1439 		}
1440 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1441 		    M_WAITOK);
1442 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1443 		    (j + 1));
1444 		free(msix->msix_vectors, M_DEVBUF);
1445 		msix->msix_vectors = vec;
1446 		msix->msix_alloc = j + 1;
1447 	}
1448 	free(used, M_DEVBUF);
1449 
1450 	/* Map the IRQs onto the rids. */
1451 	for (i = 0; i < count; i++) {
1452 		if (vectors[i] == 0)
1453 			continue;
1454 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1455 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1456 		    irq, 1);
1457 	}
1458 
1459 	if (bootverbose) {
1460 		device_printf(child, "Remapped MSI-X IRQs as: ");
1461 		for (i = 0; i < count; i++) {
1462 			if (i != 0)
1463 				printf(", ");
1464 			if (vectors[i] == 0)
1465 				printf("---");
1466 			else
1467 				printf("%d",
1468 				    msix->msix_vectors[vectors[i]].mv_irq);
1469 		}
1470 		printf("\n");
1471 	}
1472 
1473 	return (0);
1474 }
1475 
1476 static int
1477 pci_release_msix(device_t dev, device_t child)
1478 {
1479 	struct pci_devinfo *dinfo = device_get_ivars(child);
1480 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1481 	struct resource_list_entry *rle;
1482 	int i;
1483 
1484 	/* Do we have any messages to release? */
1485 	if (msix->msix_alloc == 0)
1486 		return (ENODEV);
1487 
1488 	/* Make sure none of the resources are allocated. */
1489 	for (i = 0; i < msix->msix_table_len; i++) {
1490 		if (msix->msix_table[i].mte_vector == 0)
1491 			continue;
1492 		if (msix->msix_table[i].mte_handlers > 0)
1493 			return (EBUSY);
1494 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1495 		KASSERT(rle != NULL, ("missing resource"));
1496 		if (rle->res != NULL)
1497 			return (EBUSY);
1498 	}
1499 
1500 	/* Update control register to disable MSI-X. */
1501 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1502 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1503 	    msix->msix_ctrl, 2);
1504 
1505 	/* Free the resource list entries. */
1506 	for (i = 0; i < msix->msix_table_len; i++) {
1507 		if (msix->msix_table[i].mte_vector == 0)
1508 			continue;
1509 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1510 	}
1511 	free(msix->msix_table, M_DEVBUF);
1512 	msix->msix_table_len = 0;
1513 
1514 	/* Release the IRQs. */
1515 	for (i = 0; i < msix->msix_alloc; i++)
1516 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1517 		    msix->msix_vectors[i].mv_irq);
1518 	free(msix->msix_vectors, M_DEVBUF);
1519 	msix->msix_alloc = 0;
1520 	return (0);
1521 }
1522 
1523 /*
1524  * Return the max supported MSI-X messages this device supports.
1525  * Basically, assuming the MD code can alloc messages, this function
1526  * should return the maximum value that pci_alloc_msix() can return.
1527  * Thus, it is subject to the tunables, etc.
1528  */
1529 int
1530 pci_msix_count_method(device_t dev, device_t child)
1531 {
1532 	struct pci_devinfo *dinfo = device_get_ivars(child);
1533 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1534 
1535 	if (pci_do_msix && msix->msix_location != 0)
1536 		return (msix->msix_msgnum);
1537 	return (0);
1538 }
1539 
1540 /*
1541  * HyperTransport MSI mapping control
1542  */
1543 void
1544 pci_ht_map_msi(device_t dev, uint64_t addr)
1545 {
1546 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1547 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1548 
1549 	if (!ht->ht_msimap)
1550 		return;
1551 
1552 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1553 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1554 		/* Enable MSI -> HT mapping. */
1555 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1556 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1557 		    ht->ht_msictrl, 2);
1558 	}
1559 
1560 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1561 		/* Disable MSI -> HT mapping. */
1562 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1563 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1564 		    ht->ht_msictrl, 2);
1565 	}
1566 }
1567 
1568 /*
1569  * Support for MSI message signalled interrupts.
1570  */
1571 void
1572 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1573 {
1574 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1575 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1576 
1577 	/* Write data and address values. */
1578 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1579 	    address & 0xffffffff, 4);
1580 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1581 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1582 		    address >> 32, 4);
1583 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1584 		    data, 2);
1585 	} else
1586 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1587 		    2);
1588 
1589 	/* Enable MSI in the control register. */
1590 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1591 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1592 	    2);
1593 
1594 	/* Enable MSI -> HT mapping. */
1595 	pci_ht_map_msi(dev, address);
1596 }
1597 
1598 void
1599 pci_disable_msi(device_t dev)
1600 {
1601 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1602 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1603 
1604 	/* Disable MSI -> HT mapping. */
1605 	pci_ht_map_msi(dev, 0);
1606 
1607 	/* Disable MSI in the control register. */
1608 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1609 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1610 	    2);
1611 }
1612 
1613 /*
1614  * Restore MSI registers during resume.  If MSI is enabled then
1615  * restore the data and address registers in addition to the control
1616  * register.
1617  */
1618 static void
1619 pci_resume_msi(device_t dev)
1620 {
1621 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1622 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1623 	uint64_t address;
1624 	uint16_t data;
1625 
1626 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1627 		address = msi->msi_addr;
1628 		data = msi->msi_data;
1629 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1630 		    address & 0xffffffff, 4);
1631 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1632 			pci_write_config(dev, msi->msi_location +
1633 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1634 			pci_write_config(dev, msi->msi_location +
1635 			    PCIR_MSI_DATA_64BIT, data, 2);
1636 		} else
1637 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1638 			    data, 2);
1639 	}
1640 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1641 	    2);
1642 }
1643 
1644 int
1645 pci_remap_msi_irq(device_t dev, u_int irq)
1646 {
1647 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1648 	pcicfgregs *cfg = &dinfo->cfg;
1649 	struct resource_list_entry *rle;
1650 	struct msix_table_entry *mte;
1651 	struct msix_vector *mv;
1652 	device_t bus;
1653 	uint64_t addr;
1654 	uint32_t data;
1655 	int error, i, j;
1656 
1657 	bus = device_get_parent(dev);
1658 
1659 	/*
1660 	 * Handle MSI first.  We try to find this IRQ among our list
1661 	 * of MSI IRQs.  If we find it, we request updated address and
1662 	 * data registers and apply the results.
1663 	 */
1664 	if (cfg->msi.msi_alloc > 0) {
1665 
1666 		/* If we don't have any active handlers, nothing to do. */
1667 		if (cfg->msi.msi_handlers == 0)
1668 			return (0);
1669 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1670 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1671 			    i + 1);
1672 			if (rle->start == irq) {
1673 				error = PCIB_MAP_MSI(device_get_parent(bus),
1674 				    dev, irq, &addr, &data);
1675 				if (error)
1676 					return (error);
1677 				pci_disable_msi(dev);
1678 				dinfo->cfg.msi.msi_addr = addr;
1679 				dinfo->cfg.msi.msi_data = data;
1680 				pci_enable_msi(dev, addr, data);
1681 				return (0);
1682 			}
1683 		}
1684 		return (ENOENT);
1685 	}
1686 
1687 	/*
1688 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1689 	 * we request the updated mapping info.  If that works, we go
1690 	 * through all the slots that use this IRQ and update them.
1691 	 */
1692 	if (cfg->msix.msix_alloc > 0) {
1693 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1694 			mv = &cfg->msix.msix_vectors[i];
1695 			if (mv->mv_irq == irq) {
1696 				error = PCIB_MAP_MSI(device_get_parent(bus),
1697 				    dev, irq, &addr, &data);
1698 				if (error)
1699 					return (error);
1700 				mv->mv_address = addr;
1701 				mv->mv_data = data;
1702 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1703 					mte = &cfg->msix.msix_table[j];
1704 					if (mte->mte_vector != i + 1)
1705 						continue;
1706 					if (mte->mte_handlers == 0)
1707 						continue;
1708 					pci_mask_msix(dev, j);
1709 					pci_enable_msix(dev, j, addr, data);
1710 					pci_unmask_msix(dev, j);
1711 				}
1712 			}
1713 		}
1714 		return (ENOENT);
1715 	}
1716 
1717 	return (ENOENT);
1718 }
1719 
1720 /*
1721  * Returns true if the specified device is blacklisted because MSI
1722  * doesn't work.
1723  */
1724 int
1725 pci_msi_device_blacklisted(device_t dev)
1726 {
1727 	struct pci_quirk *q;
1728 
1729 	if (!pci_honor_msi_blacklist)
1730 		return (0);
1731 
1732 	for (q = &pci_quirks[0]; q->devid; q++) {
1733 		if (q->devid == pci_get_devid(dev) &&
1734 		    q->type == PCI_QUIRK_DISABLE_MSI)
1735 			return (1);
1736 	}
1737 	return (0);
1738 }
1739 
1740 /*
1741  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1742  * we just check for blacklisted chipsets as represented by the
1743  * host-PCI bridge at device 0:0:0.  In the future, it may become
1744  * necessary to check other system attributes, such as the kenv values
1745  * that give the motherboard manufacturer and model number.
1746  */
1747 static int
1748 pci_msi_blacklisted(void)
1749 {
1750 	device_t dev;
1751 
1752 	if (!pci_honor_msi_blacklist)
1753 		return (0);
1754 
1755 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1756 	if (!(pcie_chipset || pcix_chipset))
1757 		return (1);
1758 
1759 	dev = pci_find_bsf(0, 0, 0);
1760 	if (dev != NULL)
1761 		return (pci_msi_device_blacklisted(dev));
1762 	return (0);
1763 }
1764 
1765 /*
1766  * Attempt to allocate *count MSI messages.  The actual number allocated is
1767  * returned in *count.  After this function returns, each message will be
1768  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1769  */
1770 int
1771 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1772 {
1773 	struct pci_devinfo *dinfo = device_get_ivars(child);
1774 	pcicfgregs *cfg = &dinfo->cfg;
1775 	struct resource_list_entry *rle;
1776 	int actual, error, i, irqs[32];
1777 	uint16_t ctrl;
1778 
1779 	/* Don't let count == 0 get us into trouble. */
1780 	if (*count == 0)
1781 		return (EINVAL);
1782 
1783 	/* If rid 0 is allocated, then fail. */
1784 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1785 	if (rle != NULL && rle->res != NULL)
1786 		return (ENXIO);
1787 
1788 	/* Already have allocated messages? */
1789 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1790 		return (ENXIO);
1791 
1792 	/* If MSI is blacklisted for this system, fail. */
1793 	if (pci_msi_blacklisted())
1794 		return (ENXIO);
1795 
1796 	/* MSI capability present? */
1797 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1798 		return (ENODEV);
1799 
1800 	if (bootverbose)
1801 		device_printf(child,
1802 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1803 		    *count, cfg->msi.msi_msgnum);
1804 
1805 	/* Don't ask for more than the device supports. */
1806 	actual = min(*count, cfg->msi.msi_msgnum);
1807 
1808 	/* Don't ask for more than 32 messages. */
1809 	actual = min(actual, 32);
1810 
1811 	/* MSI requires power of 2 number of messages. */
1812 	if (!powerof2(actual))
1813 		return (EINVAL);
1814 
1815 	for (;;) {
1816 		/* Try to allocate N messages. */
1817 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1818 		    cfg->msi.msi_msgnum, irqs);
1819 		if (error == 0)
1820 			break;
1821 		if (actual == 1)
1822 			return (error);
1823 
1824 		/* Try N / 2. */
1825 		actual >>= 1;
1826 	}
1827 
1828 	/*
1829 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1830 	 * resources in the irqs[] array, so add new resources
1831 	 * starting at rid 1.
1832 	 */
1833 	for (i = 0; i < actual; i++)
1834 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1835 		    irqs[i], irqs[i], 1);
1836 
1837 	if (bootverbose) {
1838 		if (actual == 1)
1839 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1840 		else {
1841 			int run;
1842 
1843 			/*
1844 			 * Be fancy and try to print contiguous runs
1845 			 * of IRQ values as ranges.  'run' is true if
1846 			 * we are in a range.
1847 			 */
1848 			device_printf(child, "using IRQs %d", irqs[0]);
1849 			run = 0;
1850 			for (i = 1; i < actual; i++) {
1851 
1852 				/* Still in a run? */
1853 				if (irqs[i] == irqs[i - 1] + 1) {
1854 					run = 1;
1855 					continue;
1856 				}
1857 
1858 				/* Finish previous range. */
1859 				if (run) {
1860 					printf("-%d", irqs[i - 1]);
1861 					run = 0;
1862 				}
1863 
1864 				/* Start new range. */
1865 				printf(",%d", irqs[i]);
1866 			}
1867 
1868 			/* Unfinished range? */
1869 			if (run)
1870 				printf("-%d", irqs[actual - 1]);
1871 			printf(" for MSI\n");
1872 		}
1873 	}
1874 
1875 	/* Update control register with actual count. */
1876 	ctrl = cfg->msi.msi_ctrl;
1877 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1878 	ctrl |= (ffs(actual) - 1) << 4;
1879 	cfg->msi.msi_ctrl = ctrl;
1880 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1881 
1882 	/* Update counts of alloc'd messages. */
1883 	cfg->msi.msi_alloc = actual;
1884 	cfg->msi.msi_handlers = 0;
1885 	*count = actual;
1886 	return (0);
1887 }
1888 
1889 /* Release the MSI messages associated with this device. */
1890 int
1891 pci_release_msi_method(device_t dev, device_t child)
1892 {
1893 	struct pci_devinfo *dinfo = device_get_ivars(child);
1894 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1895 	struct resource_list_entry *rle;
1896 	int error, i, irqs[32];
1897 
1898 	/* Try MSI-X first. */
1899 	error = pci_release_msix(dev, child);
1900 	if (error != ENODEV)
1901 		return (error);
1902 
1903 	/* Do we have any messages to release? */
1904 	if (msi->msi_alloc == 0)
1905 		return (ENODEV);
1906 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1907 
1908 	/* Make sure none of the resources are allocated. */
1909 	if (msi->msi_handlers > 0)
1910 		return (EBUSY);
1911 	for (i = 0; i < msi->msi_alloc; i++) {
1912 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1913 		KASSERT(rle != NULL, ("missing MSI resource"));
1914 		if (rle->res != NULL)
1915 			return (EBUSY);
1916 		irqs[i] = rle->start;
1917 	}
1918 
1919 	/* Update control register with 0 count. */
1920 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1921 	    ("%s: MSI still enabled", __func__));
1922 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1923 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1924 	    msi->msi_ctrl, 2);
1925 
1926 	/* Release the messages. */
1927 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1928 	for (i = 0; i < msi->msi_alloc; i++)
1929 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1930 
1931 	/* Update alloc count. */
1932 	msi->msi_alloc = 0;
1933 	msi->msi_addr = 0;
1934 	msi->msi_data = 0;
1935 	return (0);
1936 }
1937 
1938 /*
1939  * Return the max supported MSI messages this device supports.
1940  * Basically, assuming the MD code can alloc messages, this function
1941  * should return the maximum value that pci_alloc_msi() can return.
1942  * Thus, it is subject to the tunables, etc.
1943  */
1944 int
1945 pci_msi_count_method(device_t dev, device_t child)
1946 {
1947 	struct pci_devinfo *dinfo = device_get_ivars(child);
1948 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1949 
1950 	if (pci_do_msi && msi->msi_location != 0)
1951 		return (msi->msi_msgnum);
1952 	return (0);
1953 }
1954 
1955 /* free pcicfgregs structure and all depending data structures */
1956 
1957 int
1958 pci_freecfg(struct pci_devinfo *dinfo)
1959 {
1960 	struct devlist *devlist_head;
1961 	int i;
1962 
1963 	devlist_head = &pci_devq;
1964 
1965 	if (dinfo->cfg.vpd.vpd_reg) {
1966 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1967 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1968 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1969 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1970 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1971 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1972 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1973 	}
1974 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1975 	free(dinfo, M_DEVBUF);
1976 
1977 	/* increment the generation count */
1978 	pci_generation++;
1979 
1980 	/* we're losing one device */
1981 	pci_numdevs--;
1982 	return (0);
1983 }
1984 
1985 /*
1986  * PCI power manangement
1987  */
1988 int
1989 pci_set_powerstate_method(device_t dev, device_t child, int state)
1990 {
1991 	struct pci_devinfo *dinfo = device_get_ivars(child);
1992 	pcicfgregs *cfg = &dinfo->cfg;
1993 	uint16_t status;
1994 	int result, oldstate, highest, delay;
1995 
1996 	if (cfg->pp.pp_cap == 0)
1997 		return (EOPNOTSUPP);
1998 
1999 	/*
2000 	 * Optimize a no state change request away.  While it would be OK to
2001 	 * write to the hardware in theory, some devices have shown odd
2002 	 * behavior when going from D3 -> D3.
2003 	 */
2004 	oldstate = pci_get_powerstate(child);
2005 	if (oldstate == state)
2006 		return (0);
2007 
2008 	/*
2009 	 * The PCI power management specification states that after a state
2010 	 * transition between PCI power states, system software must
2011 	 * guarantee a minimal delay before the function accesses the device.
2012 	 * Compute the worst case delay that we need to guarantee before we
2013 	 * access the device.  Many devices will be responsive much more
2014 	 * quickly than this delay, but there are some that don't respond
2015 	 * instantly to state changes.  Transitions to/from D3 state require
2016 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2017 	 * is done below with DELAY rather than a sleeper function because
2018 	 * this function can be called from contexts where we cannot sleep.
2019 	 */
2020 	highest = (oldstate > state) ? oldstate : state;
2021 	if (highest == PCI_POWERSTATE_D3)
2022 	    delay = 10000;
2023 	else if (highest == PCI_POWERSTATE_D2)
2024 	    delay = 200;
2025 	else
2026 	    delay = 0;
2027 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2028 	    & ~PCIM_PSTAT_DMASK;
2029 	result = 0;
2030 	switch (state) {
2031 	case PCI_POWERSTATE_D0:
2032 		status |= PCIM_PSTAT_D0;
2033 		break;
2034 	case PCI_POWERSTATE_D1:
2035 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2036 			return (EOPNOTSUPP);
2037 		status |= PCIM_PSTAT_D1;
2038 		break;
2039 	case PCI_POWERSTATE_D2:
2040 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2041 			return (EOPNOTSUPP);
2042 		status |= PCIM_PSTAT_D2;
2043 		break;
2044 	case PCI_POWERSTATE_D3:
2045 		status |= PCIM_PSTAT_D3;
2046 		break;
2047 	default:
2048 		return (EINVAL);
2049 	}
2050 
2051 	if (bootverbose)
2052 		printf(
2053 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2054 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2055 		    dinfo->cfg.func, oldstate, state);
2056 
2057 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2058 	if (delay)
2059 		DELAY(delay);
2060 	return (0);
2061 }
2062 
2063 int
2064 pci_get_powerstate_method(device_t dev, device_t child)
2065 {
2066 	struct pci_devinfo *dinfo = device_get_ivars(child);
2067 	pcicfgregs *cfg = &dinfo->cfg;
2068 	uint16_t status;
2069 	int result;
2070 
2071 	if (cfg->pp.pp_cap != 0) {
2072 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2073 		switch (status & PCIM_PSTAT_DMASK) {
2074 		case PCIM_PSTAT_D0:
2075 			result = PCI_POWERSTATE_D0;
2076 			break;
2077 		case PCIM_PSTAT_D1:
2078 			result = PCI_POWERSTATE_D1;
2079 			break;
2080 		case PCIM_PSTAT_D2:
2081 			result = PCI_POWERSTATE_D2;
2082 			break;
2083 		case PCIM_PSTAT_D3:
2084 			result = PCI_POWERSTATE_D3;
2085 			break;
2086 		default:
2087 			result = PCI_POWERSTATE_UNKNOWN;
2088 			break;
2089 		}
2090 	} else {
2091 		/* No support, device is always at D0 */
2092 		result = PCI_POWERSTATE_D0;
2093 	}
2094 	return (result);
2095 }
2096 
2097 /*
2098  * Some convenience functions for PCI device drivers.
2099  */
2100 
2101 static __inline void
2102 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2103 {
2104 	uint16_t	command;
2105 
2106 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2107 	command |= bit;
2108 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2109 }
2110 
2111 static __inline void
2112 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2113 {
2114 	uint16_t	command;
2115 
2116 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2117 	command &= ~bit;
2118 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2119 }
2120 
2121 int
2122 pci_enable_busmaster_method(device_t dev, device_t child)
2123 {
2124 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2125 	return (0);
2126 }
2127 
2128 int
2129 pci_disable_busmaster_method(device_t dev, device_t child)
2130 {
2131 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2132 	return (0);
2133 }
2134 
2135 int
2136 pci_enable_io_method(device_t dev, device_t child, int space)
2137 {
2138 	uint16_t command;
2139 	uint16_t bit;
2140 	char *error;
2141 
2142 	bit = 0;
2143 	error = NULL;
2144 
2145 	switch(space) {
2146 	case SYS_RES_IOPORT:
2147 		bit = PCIM_CMD_PORTEN;
2148 		error = "port";
2149 		break;
2150 	case SYS_RES_MEMORY:
2151 		bit = PCIM_CMD_MEMEN;
2152 		error = "memory";
2153 		break;
2154 	default:
2155 		return (EINVAL);
2156 	}
2157 	pci_set_command_bit(dev, child, bit);
2158 	/* Some devices seem to need a brief stall here, what do to? */
2159 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2160 	if (command & bit)
2161 		return (0);
2162 	device_printf(child, "failed to enable %s mapping!\n", error);
2163 	return (ENXIO);
2164 }
2165 
2166 int
2167 pci_disable_io_method(device_t dev, device_t child, int space)
2168 {
2169 	uint16_t command;
2170 	uint16_t bit;
2171 	char *error;
2172 
2173 	bit = 0;
2174 	error = NULL;
2175 
2176 	switch(space) {
2177 	case SYS_RES_IOPORT:
2178 		bit = PCIM_CMD_PORTEN;
2179 		error = "port";
2180 		break;
2181 	case SYS_RES_MEMORY:
2182 		bit = PCIM_CMD_MEMEN;
2183 		error = "memory";
2184 		break;
2185 	default:
2186 		return (EINVAL);
2187 	}
2188 	pci_clear_command_bit(dev, child, bit);
2189 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2190 	if (command & bit) {
2191 		device_printf(child, "failed to disable %s mapping!\n", error);
2192 		return (ENXIO);
2193 	}
2194 	return (0);
2195 }
2196 
2197 /*
2198  * New style pci driver.  Parent device is either a pci-host-bridge or a
2199  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2200  */
2201 
2202 void
2203 pci_print_verbose(struct pci_devinfo *dinfo)
2204 {
2205 
2206 	if (bootverbose) {
2207 		pcicfgregs *cfg = &dinfo->cfg;
2208 
2209 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2210 		    cfg->vendor, cfg->device, cfg->revid);
2211 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2212 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2213 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2214 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2215 		    cfg->mfdev);
2216 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2217 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2218 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2219 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2220 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2221 		if (cfg->intpin > 0)
2222 			printf("\tintpin=%c, irq=%d\n",
2223 			    cfg->intpin +'a' -1, cfg->intline);
2224 		if (cfg->pp.pp_cap) {
2225 			uint16_t status;
2226 
2227 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2228 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2229 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2230 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2231 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2232 			    status & PCIM_PSTAT_DMASK);
2233 		}
2234 		if (cfg->msi.msi_location) {
2235 			int ctrl;
2236 
2237 			ctrl = cfg->msi.msi_ctrl;
2238 			printf("\tMSI supports %d message%s%s%s\n",
2239 			    cfg->msi.msi_msgnum,
2240 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2241 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2242 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2243 		}
2244 		if (cfg->msix.msix_location) {
2245 			printf("\tMSI-X supports %d message%s ",
2246 			    cfg->msix.msix_msgnum,
2247 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2248 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2249 				printf("in map 0x%x\n",
2250 				    cfg->msix.msix_table_bar);
2251 			else
2252 				printf("in maps 0x%x and 0x%x\n",
2253 				    cfg->msix.msix_table_bar,
2254 				    cfg->msix.msix_pba_bar);
2255 		}
2256 	}
2257 }
2258 
2259 static int
2260 pci_porten(device_t dev)
2261 {
2262 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2263 }
2264 
2265 static int
2266 pci_memen(device_t dev)
2267 {
2268 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2269 }
2270 
2271 static void
2272 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2273 {
2274 	pci_addr_t map, testval;
2275 	int ln2range;
2276 	uint16_t cmd;
2277 
2278 	map = pci_read_config(dev, reg, 4);
2279 	ln2range = pci_maprange(map);
2280 	if (ln2range == 64)
2281 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2282 
2283 	/*
2284 	 * Disable decoding via the command register before
2285 	 * determining the BAR's length since we will be placing it in
2286 	 * a weird state.
2287 	 */
2288 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2289 	pci_write_config(dev, PCIR_COMMAND,
2290 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2291 
2292 	/*
2293 	 * Determine the BAR's length by writing all 1's.  The bottom
2294 	 * log_2(size) bits of the BAR will stick as 0 when we read
2295 	 * the value back.
2296 	 */
2297 	pci_write_config(dev, reg, 0xffffffff, 4);
2298 	testval = pci_read_config(dev, reg, 4);
2299 	if (ln2range == 64) {
2300 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2301 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2302 	}
2303 
2304 	/*
2305 	 * Restore the original value of the BAR.  We may have reprogrammed
2306 	 * the BAR of the low-level console device and when booting verbose,
2307 	 * we need the console device addressable.
2308 	 */
2309 	pci_write_config(dev, reg, map, 4);
2310 	if (ln2range == 64)
2311 		pci_write_config(dev, reg + 4, map >> 32, 4);
2312 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2313 
2314 	*mapp = map;
2315 	*testvalp = testval;
2316 }
2317 
2318 static void
2319 pci_write_bar(device_t dev, int reg, pci_addr_t base)
2320 {
2321 	pci_addr_t map;
2322 	int ln2range;
2323 
2324 	map = pci_read_config(dev, reg, 4);
2325 	ln2range = pci_maprange(map);
2326 	pci_write_config(dev, reg, base, 4);
2327 	if (ln2range == 64)
2328 		pci_write_config(dev, reg + 4, base >> 32, 4);
2329 }
2330 
2331 /*
2332  * Add a resource based on a pci map register. Return 1 if the map
2333  * register is a 32bit map register or 2 if it is a 64bit register.
2334  */
2335 static int
2336 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2337     int force, int prefetch)
2338 {
2339 	pci_addr_t base, map, testval;
2340 	pci_addr_t start, end, count;
2341 	int barlen, maprange, mapsize, type;
2342 	uint16_t cmd;
2343 	struct resource *res;
2344 
2345 	pci_read_bar(dev, reg, &map, &testval);
2346 	if (PCI_BAR_MEM(map)) {
2347 		type = SYS_RES_MEMORY;
2348 		if (map & PCIM_BAR_MEM_PREFETCH)
2349 			prefetch = 1;
2350 	} else
2351 		type = SYS_RES_IOPORT;
2352 	mapsize = pci_mapsize(testval);
2353 	base = pci_mapbase(map);
2354 	maprange = pci_maprange(map);
2355 	barlen = maprange == 64 ? 2 : 1;
2356 
2357 	/*
2358 	 * For I/O registers, if bottom bit is set, and the next bit up
2359 	 * isn't clear, we know we have a BAR that doesn't conform to the
2360 	 * spec, so ignore it.  Also, sanity check the size of the data
2361 	 * areas to the type of memory involved.  Memory must be at least
2362 	 * 16 bytes in size, while I/O ranges must be at least 4.
2363 	 */
2364 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2365 		return (barlen);
2366 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2367 	    (type == SYS_RES_IOPORT && mapsize < 2))
2368 		return (barlen);
2369 
2370 	if (bootverbose) {
2371 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2372 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2373 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2374 			printf(", port disabled\n");
2375 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2376 			printf(", memory disabled\n");
2377 		else
2378 			printf(", enabled\n");
2379 	}
2380 
2381 	/*
2382 	 * If base is 0, then we have problems.  It is best to ignore
2383 	 * such entries for the moment.  These will be allocated later if
2384 	 * the driver specifically requests them.  However, some
2385 	 * removable busses look better when all resources are allocated,
2386 	 * so allow '0' to be overriden.
2387 	 *
2388 	 * Similarly treat maps whose values is the same as the test value
2389 	 * read back.  These maps have had all f's written to them by the
2390 	 * BIOS in an attempt to disable the resources.
2391 	 */
2392 	if (!force && (base == 0 || map == testval))
2393 		return (barlen);
2394 	if ((u_long)base != base) {
2395 		device_printf(bus,
2396 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2397 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2398 		    pci_get_function(dev), reg);
2399 		return (barlen);
2400 	}
2401 
2402 	/*
2403 	 * This code theoretically does the right thing, but has
2404 	 * undesirable side effects in some cases where peripherals
2405 	 * respond oddly to having these bits enabled.  Let the user
2406 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2407 	 * default).
2408 	 */
2409 	if (pci_enable_io_modes) {
2410 		/* Turn on resources that have been left off by a lazy BIOS */
2411 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2412 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2413 			cmd |= PCIM_CMD_PORTEN;
2414 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2415 		}
2416 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2417 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2418 			cmd |= PCIM_CMD_MEMEN;
2419 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2420 		}
2421 	} else {
2422 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2423 			return (barlen);
2424 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2425 			return (barlen);
2426 	}
2427 
2428 	count = 1 << mapsize;
2429 	if (base == 0 || base == pci_mapbase(testval)) {
2430 		start = 0;	/* Let the parent decide. */
2431 		end = ~0ULL;
2432 	} else {
2433 		start = base;
2434 		end = base + (1 << mapsize) - 1;
2435 	}
2436 	resource_list_add(rl, type, reg, start, end, count);
2437 
2438 	/*
2439 	 * Try to allocate the resource for this BAR from our parent
2440 	 * so that this resource range is already reserved.  The
2441 	 * driver for this device will later inherit this resource in
2442 	 * pci_alloc_resource().
2443 	 */
2444 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2445 	    prefetch ? RF_PREFETCHABLE : 0);
2446 	if (res == NULL) {
2447 		/*
2448 		 * If the allocation fails, clear the BAR and delete
2449 		 * the resource list entry to force
2450 		 * pci_alloc_resource() to allocate resources from the
2451 		 * parent.
2452 		 */
2453 		resource_list_delete(rl, type, reg);
2454 		start = 0;
2455 	} else {
2456 		start = rman_get_start(res);
2457 		rman_set_device(res, bus);
2458 	}
2459 	pci_write_bar(dev, reg, start);
2460 	return (barlen);
2461 }
2462 
2463 /*
2464  * For ATA devices we need to decide early what addressing mode to use.
2465  * Legacy demands that the primary and secondary ATA ports sits on the
2466  * same addresses that old ISA hardware did. This dictates that we use
2467  * those addresses and ignore the BAR's if we cannot set PCI native
2468  * addressing mode.
2469  */
2470 static void
2471 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2472     uint32_t prefetchmask)
2473 {
2474 	struct resource *r;
2475 	int rid, type, progif;
2476 #if 0
2477 	/* if this device supports PCI native addressing use it */
2478 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2479 	if ((progif & 0x8a) == 0x8a) {
2480 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2481 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2482 			printf("Trying ATA native PCI addressing mode\n");
2483 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2484 		}
2485 	}
2486 #endif
2487 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2488 	type = SYS_RES_IOPORT;
2489 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2490 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2491 		    prefetchmask & (1 << 0));
2492 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2493 		    prefetchmask & (1 << 1));
2494 	} else {
2495 		rid = PCIR_BAR(0);
2496 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2497 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7,
2498 		    8, 0);
2499 		rman_set_device(r, bus);
2500 		rid = PCIR_BAR(1);
2501 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2502 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6,
2503 		    1, 0);
2504 		rman_set_device(r, bus);
2505 	}
2506 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2507 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2508 		    prefetchmask & (1 << 2));
2509 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2510 		    prefetchmask & (1 << 3));
2511 	} else {
2512 		rid = PCIR_BAR(2);
2513 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2514 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177,
2515 		    8, 0);
2516 		rman_set_device(r, bus);
2517 		rid = PCIR_BAR(3);
2518 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2519 		r = resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376,
2520 		    1, 0);
2521 		rman_set_device(r, bus);
2522 	}
2523 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2524 	    prefetchmask & (1 << 4));
2525 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2526 	    prefetchmask & (1 << 5));
2527 }
2528 
2529 static void
2530 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2531 {
2532 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2533 	pcicfgregs *cfg = &dinfo->cfg;
2534 	char tunable_name[64];
2535 	int irq;
2536 
2537 	/* Has to have an intpin to have an interrupt. */
2538 	if (cfg->intpin == 0)
2539 		return;
2540 
2541 	/* Let the user override the IRQ with a tunable. */
2542 	irq = PCI_INVALID_IRQ;
2543 	snprintf(tunable_name, sizeof(tunable_name),
2544 	    "hw.pci%d.%d.%d.INT%c.irq",
2545 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2546 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2547 		irq = PCI_INVALID_IRQ;
2548 
2549 	/*
2550 	 * If we didn't get an IRQ via the tunable, then we either use the
2551 	 * IRQ value in the intline register or we ask the bus to route an
2552 	 * interrupt for us.  If force_route is true, then we only use the
2553 	 * value in the intline register if the bus was unable to assign an
2554 	 * IRQ.
2555 	 */
2556 	if (!PCI_INTERRUPT_VALID(irq)) {
2557 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2558 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2559 		if (!PCI_INTERRUPT_VALID(irq))
2560 			irq = cfg->intline;
2561 	}
2562 
2563 	/* If after all that we don't have an IRQ, just bail. */
2564 	if (!PCI_INTERRUPT_VALID(irq))
2565 		return;
2566 
2567 	/* Update the config register if it changed. */
2568 	if (irq != cfg->intline) {
2569 		cfg->intline = irq;
2570 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2571 	}
2572 
2573 	/* Add this IRQ as rid 0 interrupt resource. */
2574 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2575 }
2576 
2577 void
2578 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2579 {
2580 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2581 	pcicfgregs *cfg = &dinfo->cfg;
2582 	struct resource_list *rl = &dinfo->resources;
2583 	struct pci_quirk *q;
2584 	int i;
2585 
2586 	/* ATA devices needs special map treatment */
2587 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2588 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2589 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2590 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2591 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2592 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2593 	else
2594 		for (i = 0; i < cfg->nummaps;)
2595 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2596 			    prefetchmask & (1 << i));
2597 
2598 	/*
2599 	 * Add additional, quirked resources.
2600 	 */
2601 	for (q = &pci_quirks[0]; q->devid; q++) {
2602 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2603 		    && q->type == PCI_QUIRK_MAP_REG)
2604 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2605 	}
2606 
2607 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2608 #ifdef __PCI_REROUTE_INTERRUPT
2609 		/*
2610 		 * Try to re-route interrupts. Sometimes the BIOS or
2611 		 * firmware may leave bogus values in these registers.
2612 		 * If the re-route fails, then just stick with what we
2613 		 * have.
2614 		 */
2615 		pci_assign_interrupt(bus, dev, 1);
2616 #else
2617 		pci_assign_interrupt(bus, dev, 0);
2618 #endif
2619 	}
2620 }
2621 
2622 void
2623 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2624 {
2625 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2626 	device_t pcib = device_get_parent(dev);
2627 	struct pci_devinfo *dinfo;
2628 	int maxslots;
2629 	int s, f, pcifunchigh;
2630 	uint8_t hdrtype;
2631 
2632 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2633 	    ("dinfo_size too small"));
2634 	maxslots = PCIB_MAXSLOTS(pcib);
2635 	for (s = 0; s <= maxslots; s++) {
2636 		pcifunchigh = 0;
2637 		f = 0;
2638 		DELAY(1);
2639 		hdrtype = REG(PCIR_HDRTYPE, 1);
2640 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2641 			continue;
2642 		if (hdrtype & PCIM_MFDEV)
2643 			pcifunchigh = PCI_FUNCMAX;
2644 		for (f = 0; f <= pcifunchigh; f++) {
2645 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2646 			    dinfo_size);
2647 			if (dinfo != NULL) {
2648 				pci_add_child(dev, dinfo);
2649 			}
2650 		}
2651 	}
2652 #undef REG
2653 }
2654 
2655 void
2656 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2657 {
2658 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2659 	device_set_ivars(dinfo->cfg.dev, dinfo);
2660 	resource_list_init(&dinfo->resources);
2661 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2662 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2663 	pci_print_verbose(dinfo);
2664 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2665 }
2666 
2667 static int
2668 pci_probe(device_t dev)
2669 {
2670 
2671 	device_set_desc(dev, "PCI bus");
2672 
2673 	/* Allow other subclasses to override this driver. */
2674 	return (BUS_PROBE_GENERIC);
2675 }
2676 
2677 static int
2678 pci_attach(device_t dev)
2679 {
2680 	int busno, domain;
2681 
2682 	/*
2683 	 * Since there can be multiple independantly numbered PCI
2684 	 * busses on systems with multiple PCI domains, we can't use
2685 	 * the unit number to decide which bus we are probing. We ask
2686 	 * the parent pcib what our domain and bus numbers are.
2687 	 */
2688 	domain = pcib_get_domain(dev);
2689 	busno = pcib_get_bus(dev);
2690 	if (bootverbose)
2691 		device_printf(dev, "domain=%d, physical bus=%d\n",
2692 		    domain, busno);
2693 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2694 	return (bus_generic_attach(dev));
2695 }
2696 
2697 int
2698 pci_suspend(device_t dev)
2699 {
2700 	int dstate, error, i, numdevs;
2701 	device_t acpi_dev, child, *devlist;
2702 	struct pci_devinfo *dinfo;
2703 
2704 	/*
2705 	 * Save the PCI configuration space for each child and set the
2706 	 * device in the appropriate power state for this sleep state.
2707 	 */
2708 	acpi_dev = NULL;
2709 	if (pci_do_power_resume)
2710 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2711 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2712 		return (error);
2713 	for (i = 0; i < numdevs; i++) {
2714 		child = devlist[i];
2715 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2716 		pci_cfg_save(child, dinfo, 0);
2717 	}
2718 
2719 	/* Suspend devices before potentially powering them down. */
2720 	error = bus_generic_suspend(dev);
2721 	if (error) {
2722 		free(devlist, M_TEMP);
2723 		return (error);
2724 	}
2725 
2726 	/*
2727 	 * Always set the device to D3.  If ACPI suggests a different
2728 	 * power state, use it instead.  If ACPI is not present, the
2729 	 * firmware is responsible for managing device power.  Skip
2730 	 * children who aren't attached since they are powered down
2731 	 * separately.  Only manage type 0 devices for now.
2732 	 */
2733 	for (i = 0; acpi_dev && i < numdevs; i++) {
2734 		child = devlist[i];
2735 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2736 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2737 			dstate = PCI_POWERSTATE_D3;
2738 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2739 			pci_set_powerstate(child, dstate);
2740 		}
2741 	}
2742 	free(devlist, M_TEMP);
2743 	return (0);
2744 }
2745 
2746 int
2747 pci_resume(device_t dev)
2748 {
2749 	int i, numdevs, error;
2750 	device_t acpi_dev, child, *devlist;
2751 	struct pci_devinfo *dinfo;
2752 
2753 	/*
2754 	 * Set each child to D0 and restore its PCI configuration space.
2755 	 */
2756 	acpi_dev = NULL;
2757 	if (pci_do_power_resume)
2758 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2759 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
2760 		return (error);
2761 	for (i = 0; i < numdevs; i++) {
2762 		/*
2763 		 * Notify ACPI we're going to D0 but ignore the result.  If
2764 		 * ACPI is not present, the firmware is responsible for
2765 		 * managing device power.  Only manage type 0 devices for now.
2766 		 */
2767 		child = devlist[i];
2768 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2769 		if (acpi_dev && device_is_attached(child) &&
2770 		    dinfo->cfg.hdrtype == 0) {
2771 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2772 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2773 		}
2774 
2775 		/* Now the device is powered up, restore its config space. */
2776 		pci_cfg_restore(child, dinfo);
2777 	}
2778 	free(devlist, M_TEMP);
2779 	return (bus_generic_resume(dev));
2780 }
2781 
2782 static void
2783 pci_load_vendor_data(void)
2784 {
2785 	caddr_t vendordata, info;
2786 
2787 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2788 		info = preload_search_info(vendordata, MODINFO_ADDR);
2789 		pci_vendordata = *(char **)info;
2790 		info = preload_search_info(vendordata, MODINFO_SIZE);
2791 		pci_vendordata_size = *(size_t *)info;
2792 		/* terminate the database */
2793 		pci_vendordata[pci_vendordata_size] = '\n';
2794 	}
2795 }
2796 
2797 void
2798 pci_driver_added(device_t dev, driver_t *driver)
2799 {
2800 	int numdevs;
2801 	device_t *devlist;
2802 	device_t child;
2803 	struct pci_devinfo *dinfo;
2804 	int i;
2805 
2806 	if (bootverbose)
2807 		device_printf(dev, "driver added\n");
2808 	DEVICE_IDENTIFY(driver, dev);
2809 	if (device_get_children(dev, &devlist, &numdevs) != 0)
2810 		return;
2811 	for (i = 0; i < numdevs; i++) {
2812 		child = devlist[i];
2813 		if (device_get_state(child) != DS_NOTPRESENT)
2814 			continue;
2815 		dinfo = device_get_ivars(child);
2816 		pci_print_verbose(dinfo);
2817 		if (bootverbose)
2818 			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2819 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2820 			    dinfo->cfg.func);
2821 		pci_cfg_restore(child, dinfo);
2822 		if (device_probe_and_attach(child) != 0)
2823 			pci_cfg_save(child, dinfo, 1);
2824 	}
2825 	free(devlist, M_TEMP);
2826 }
2827 
2828 int
2829 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2830     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2831 {
2832 	struct pci_devinfo *dinfo;
2833 	struct msix_table_entry *mte;
2834 	struct msix_vector *mv;
2835 	uint64_t addr;
2836 	uint32_t data;
2837 	void *cookie;
2838 	int error, rid;
2839 
2840 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2841 	    arg, &cookie);
2842 	if (error)
2843 		return (error);
2844 
2845 	/* If this is not a direct child, just bail out. */
2846 	if (device_get_parent(child) != dev) {
2847 		*cookiep = cookie;
2848 		return(0);
2849 	}
2850 
2851 	rid = rman_get_rid(irq);
2852 	if (rid == 0) {
2853 		/* Make sure that INTx is enabled */
2854 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
2855 	} else {
2856 		/*
2857 		 * Check to see if the interrupt is MSI or MSI-X.
2858 		 * Ask our parent to map the MSI and give
2859 		 * us the address and data register values.
2860 		 * If we fail for some reason, teardown the
2861 		 * interrupt handler.
2862 		 */
2863 		dinfo = device_get_ivars(child);
2864 		if (dinfo->cfg.msi.msi_alloc > 0) {
2865 			if (dinfo->cfg.msi.msi_addr == 0) {
2866 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2867 			    ("MSI has handlers, but vectors not mapped"));
2868 				error = PCIB_MAP_MSI(device_get_parent(dev),
2869 				    child, rman_get_start(irq), &addr, &data);
2870 				if (error)
2871 					goto bad;
2872 				dinfo->cfg.msi.msi_addr = addr;
2873 				dinfo->cfg.msi.msi_data = data;
2874 				pci_enable_msi(child, addr, data);
2875 			}
2876 			dinfo->cfg.msi.msi_handlers++;
2877 		} else {
2878 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2879 			    ("No MSI or MSI-X interrupts allocated"));
2880 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2881 			    ("MSI-X index too high"));
2882 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2883 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2884 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2885 			KASSERT(mv->mv_irq == rman_get_start(irq),
2886 			    ("IRQ mismatch"));
2887 			if (mv->mv_address == 0) {
2888 				KASSERT(mte->mte_handlers == 0,
2889 		    ("MSI-X table entry has handlers, but vector not mapped"));
2890 				error = PCIB_MAP_MSI(device_get_parent(dev),
2891 				    child, rman_get_start(irq), &addr, &data);
2892 				if (error)
2893 					goto bad;
2894 				mv->mv_address = addr;
2895 				mv->mv_data = data;
2896 			}
2897 			if (mte->mte_handlers == 0) {
2898 				pci_enable_msix(child, rid - 1, mv->mv_address,
2899 				    mv->mv_data);
2900 				pci_unmask_msix(child, rid - 1);
2901 			}
2902 			mte->mte_handlers++;
2903 		}
2904 
2905 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
2906 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2907 	bad:
2908 		if (error) {
2909 			(void)bus_generic_teardown_intr(dev, child, irq,
2910 			    cookie);
2911 			return (error);
2912 		}
2913 	}
2914 	*cookiep = cookie;
2915 	return (0);
2916 }
2917 
2918 int
2919 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2920     void *cookie)
2921 {
2922 	struct msix_table_entry *mte;
2923 	struct resource_list_entry *rle;
2924 	struct pci_devinfo *dinfo;
2925 	int error, rid;
2926 
2927 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2928 		return (EINVAL);
2929 
2930 	/* If this isn't a direct child, just bail out */
2931 	if (device_get_parent(child) != dev)
2932 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
2933 
2934 	rid = rman_get_rid(irq);
2935 	if (rid == 0) {
2936 		/* Mask INTx */
2937 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
2938 	} else {
2939 		/*
2940 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
2941 		 * decrement the appropriate handlers count and mask the
2942 		 * MSI-X message, or disable MSI messages if the count
2943 		 * drops to 0.
2944 		 */
2945 		dinfo = device_get_ivars(child);
2946 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2947 		if (rle->res != irq)
2948 			return (EINVAL);
2949 		if (dinfo->cfg.msi.msi_alloc > 0) {
2950 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2951 			    ("MSI-X index too high"));
2952 			if (dinfo->cfg.msi.msi_handlers == 0)
2953 				return (EINVAL);
2954 			dinfo->cfg.msi.msi_handlers--;
2955 			if (dinfo->cfg.msi.msi_handlers == 0)
2956 				pci_disable_msi(child);
2957 		} else {
2958 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2959 			    ("No MSI or MSI-X interrupts allocated"));
2960 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2961 			    ("MSI-X index too high"));
2962 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2963 			if (mte->mte_handlers == 0)
2964 				return (EINVAL);
2965 			mte->mte_handlers--;
2966 			if (mte->mte_handlers == 0)
2967 				pci_mask_msix(child, rid - 1);
2968 		}
2969 	}
2970 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2971 	if (rid > 0)
2972 		KASSERT(error == 0,
2973 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2974 	return (error);
2975 }
2976 
2977 int
2978 pci_print_child(device_t dev, device_t child)
2979 {
2980 	struct pci_devinfo *dinfo;
2981 	struct resource_list *rl;
2982 	int retval = 0;
2983 
2984 	dinfo = device_get_ivars(child);
2985 	rl = &dinfo->resources;
2986 
2987 	retval += bus_print_child_header(dev, child);
2988 
2989 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2990 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2991 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2992 	if (device_get_flags(dev))
2993 		retval += printf(" flags %#x", device_get_flags(dev));
2994 
2995 	retval += printf(" at device %d.%d", pci_get_slot(child),
2996 	    pci_get_function(child));
2997 
2998 	retval += bus_print_child_footer(dev, child);
2999 
3000 	return (retval);
3001 }
3002 
3003 static struct
3004 {
3005 	int	class;
3006 	int	subclass;
3007 	char	*desc;
3008 } pci_nomatch_tab[] = {
3009 	{PCIC_OLD,		-1,			"old"},
3010 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3011 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3012 	{PCIC_STORAGE,		-1,			"mass storage"},
3013 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3014 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3015 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3016 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3017 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3018 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3019 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3020 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3021 	{PCIC_NETWORK,		-1,			"network"},
3022 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3023 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3024 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3025 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3026 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3027 	{PCIC_DISPLAY,		-1,			"display"},
3028 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3029 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3030 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3031 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3032 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3033 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3034 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3035 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3036 	{PCIC_MEMORY,		-1,			"memory"},
3037 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3038 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3039 	{PCIC_BRIDGE,		-1,			"bridge"},
3040 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3041 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3042 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3043 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3044 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3045 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3046 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3047 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3048 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3049 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3050 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3051 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3052 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3053 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3054 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3055 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3056 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3057 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3058 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3059 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3060 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3061 	{PCIC_INPUTDEV,		-1,			"input device"},
3062 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3063 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3064 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3065 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3066 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3067 	{PCIC_DOCKING,		-1,			"docking station"},
3068 	{PCIC_PROCESSOR,	-1,			"processor"},
3069 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3070 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3071 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3072 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3073 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3074 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3075 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3076 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3077 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3078 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3079 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3080 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3081 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3082 	{PCIC_SATCOM,		-1,			"satellite communication"},
3083 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3084 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3085 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3086 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3087 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3088 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3089 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3090 	{PCIC_DASP,		-1,			"dasp"},
3091 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3092 	{0, 0,		NULL}
3093 };
3094 
3095 void
3096 pci_probe_nomatch(device_t dev, device_t child)
3097 {
3098 	int	i;
3099 	char	*cp, *scp, *device;
3100 
3101 	/*
3102 	 * Look for a listing for this device in a loaded device database.
3103 	 */
3104 	if ((device = pci_describe_device(child)) != NULL) {
3105 		device_printf(dev, "<%s>", device);
3106 		free(device, M_DEVBUF);
3107 	} else {
3108 		/*
3109 		 * Scan the class/subclass descriptions for a general
3110 		 * description.
3111 		 */
3112 		cp = "unknown";
3113 		scp = NULL;
3114 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3115 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3116 				if (pci_nomatch_tab[i].subclass == -1) {
3117 					cp = pci_nomatch_tab[i].desc;
3118 				} else if (pci_nomatch_tab[i].subclass ==
3119 				    pci_get_subclass(child)) {
3120 					scp = pci_nomatch_tab[i].desc;
3121 				}
3122 			}
3123 		}
3124 		device_printf(dev, "<%s%s%s>",
3125 		    cp ? cp : "",
3126 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3127 		    scp ? scp : "");
3128 	}
3129 	printf(" at device %d.%d (no driver attached)\n",
3130 	    pci_get_slot(child), pci_get_function(child));
3131 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3132 	return;
3133 }
3134 
3135 /*
3136  * Parse the PCI device database, if loaded, and return a pointer to a
3137  * description of the device.
3138  *
3139  * The database is flat text formatted as follows:
3140  *
3141  * Any line not in a valid format is ignored.
3142  * Lines are terminated with newline '\n' characters.
3143  *
3144  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3145  * the vendor name.
3146  *
3147  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3148  * - devices cannot be listed without a corresponding VENDOR line.
3149  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3150  * another TAB, then the device name.
3151  */
3152 
3153 /*
3154  * Assuming (ptr) points to the beginning of a line in the database,
3155  * return the vendor or device and description of the next entry.
3156  * The value of (vendor) or (device) inappropriate for the entry type
3157  * is set to -1.  Returns nonzero at the end of the database.
3158  *
3159  * Note that this is slightly unrobust in the face of corrupt data;
3160  * we attempt to safeguard against this by spamming the end of the
3161  * database with a newline when we initialise.
3162  */
3163 static int
3164 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3165 {
3166 	char	*cp = *ptr;
3167 	int	left;
3168 
3169 	*device = -1;
3170 	*vendor = -1;
3171 	**desc = '\0';
3172 	for (;;) {
3173 		left = pci_vendordata_size - (cp - pci_vendordata);
3174 		if (left <= 0) {
3175 			*ptr = cp;
3176 			return(1);
3177 		}
3178 
3179 		/* vendor entry? */
3180 		if (*cp != '\t' &&
3181 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3182 			break;
3183 		/* device entry? */
3184 		if (*cp == '\t' &&
3185 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3186 			break;
3187 
3188 		/* skip to next line */
3189 		while (*cp != '\n' && left > 0) {
3190 			cp++;
3191 			left--;
3192 		}
3193 		if (*cp == '\n') {
3194 			cp++;
3195 			left--;
3196 		}
3197 	}
3198 	/* skip to next line */
3199 	while (*cp != '\n' && left > 0) {
3200 		cp++;
3201 		left--;
3202 	}
3203 	if (*cp == '\n' && left > 0)
3204 		cp++;
3205 	*ptr = cp;
3206 	return(0);
3207 }
3208 
3209 static char *
3210 pci_describe_device(device_t dev)
3211 {
3212 	int	vendor, device;
3213 	char	*desc, *vp, *dp, *line;
3214 
3215 	desc = vp = dp = NULL;
3216 
3217 	/*
3218 	 * If we have no vendor data, we can't do anything.
3219 	 */
3220 	if (pci_vendordata == NULL)
3221 		goto out;
3222 
3223 	/*
3224 	 * Scan the vendor data looking for this device
3225 	 */
3226 	line = pci_vendordata;
3227 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3228 		goto out;
3229 	for (;;) {
3230 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3231 			goto out;
3232 		if (vendor == pci_get_vendor(dev))
3233 			break;
3234 	}
3235 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3236 		goto out;
3237 	for (;;) {
3238 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3239 			*dp = 0;
3240 			break;
3241 		}
3242 		if (vendor != -1) {
3243 			*dp = 0;
3244 			break;
3245 		}
3246 		if (device == pci_get_device(dev))
3247 			break;
3248 	}
3249 	if (dp[0] == '\0')
3250 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3251 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3252 	    NULL)
3253 		sprintf(desc, "%s, %s", vp, dp);
3254  out:
3255 	if (vp != NULL)
3256 		free(vp, M_DEVBUF);
3257 	if (dp != NULL)
3258 		free(dp, M_DEVBUF);
3259 	return(desc);
3260 }
3261 
3262 int
3263 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3264 {
3265 	struct pci_devinfo *dinfo;
3266 	pcicfgregs *cfg;
3267 
3268 	dinfo = device_get_ivars(child);
3269 	cfg = &dinfo->cfg;
3270 
3271 	switch (which) {
3272 	case PCI_IVAR_ETHADDR:
3273 		/*
3274 		 * The generic accessor doesn't deal with failure, so
3275 		 * we set the return value, then return an error.
3276 		 */
3277 		*((uint8_t **) result) = NULL;
3278 		return (EINVAL);
3279 	case PCI_IVAR_SUBVENDOR:
3280 		*result = cfg->subvendor;
3281 		break;
3282 	case PCI_IVAR_SUBDEVICE:
3283 		*result = cfg->subdevice;
3284 		break;
3285 	case PCI_IVAR_VENDOR:
3286 		*result = cfg->vendor;
3287 		break;
3288 	case PCI_IVAR_DEVICE:
3289 		*result = cfg->device;
3290 		break;
3291 	case PCI_IVAR_DEVID:
3292 		*result = (cfg->device << 16) | cfg->vendor;
3293 		break;
3294 	case PCI_IVAR_CLASS:
3295 		*result = cfg->baseclass;
3296 		break;
3297 	case PCI_IVAR_SUBCLASS:
3298 		*result = cfg->subclass;
3299 		break;
3300 	case PCI_IVAR_PROGIF:
3301 		*result = cfg->progif;
3302 		break;
3303 	case PCI_IVAR_REVID:
3304 		*result = cfg->revid;
3305 		break;
3306 	case PCI_IVAR_INTPIN:
3307 		*result = cfg->intpin;
3308 		break;
3309 	case PCI_IVAR_IRQ:
3310 		*result = cfg->intline;
3311 		break;
3312 	case PCI_IVAR_DOMAIN:
3313 		*result = cfg->domain;
3314 		break;
3315 	case PCI_IVAR_BUS:
3316 		*result = cfg->bus;
3317 		break;
3318 	case PCI_IVAR_SLOT:
3319 		*result = cfg->slot;
3320 		break;
3321 	case PCI_IVAR_FUNCTION:
3322 		*result = cfg->func;
3323 		break;
3324 	case PCI_IVAR_CMDREG:
3325 		*result = cfg->cmdreg;
3326 		break;
3327 	case PCI_IVAR_CACHELNSZ:
3328 		*result = cfg->cachelnsz;
3329 		break;
3330 	case PCI_IVAR_MINGNT:
3331 		*result = cfg->mingnt;
3332 		break;
3333 	case PCI_IVAR_MAXLAT:
3334 		*result = cfg->maxlat;
3335 		break;
3336 	case PCI_IVAR_LATTIMER:
3337 		*result = cfg->lattimer;
3338 		break;
3339 	default:
3340 		return (ENOENT);
3341 	}
3342 	return (0);
3343 }
3344 
3345 int
3346 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3347 {
3348 	struct pci_devinfo *dinfo;
3349 
3350 	dinfo = device_get_ivars(child);
3351 
3352 	switch (which) {
3353 	case PCI_IVAR_INTPIN:
3354 		dinfo->cfg.intpin = value;
3355 		return (0);
3356 	case PCI_IVAR_ETHADDR:
3357 	case PCI_IVAR_SUBVENDOR:
3358 	case PCI_IVAR_SUBDEVICE:
3359 	case PCI_IVAR_VENDOR:
3360 	case PCI_IVAR_DEVICE:
3361 	case PCI_IVAR_DEVID:
3362 	case PCI_IVAR_CLASS:
3363 	case PCI_IVAR_SUBCLASS:
3364 	case PCI_IVAR_PROGIF:
3365 	case PCI_IVAR_REVID:
3366 	case PCI_IVAR_IRQ:
3367 	case PCI_IVAR_DOMAIN:
3368 	case PCI_IVAR_BUS:
3369 	case PCI_IVAR_SLOT:
3370 	case PCI_IVAR_FUNCTION:
3371 		return (EINVAL);	/* disallow for now */
3372 
3373 	default:
3374 		return (ENOENT);
3375 	}
3376 }
3377 
3378 
3379 #include "opt_ddb.h"
3380 #ifdef DDB
3381 #include <ddb/ddb.h>
3382 #include <sys/cons.h>
3383 
3384 /*
3385  * List resources based on pci map registers, used for within ddb
3386  */
3387 
3388 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3389 {
3390 	struct pci_devinfo *dinfo;
3391 	struct devlist *devlist_head;
3392 	struct pci_conf *p;
3393 	const char *name;
3394 	int i, error, none_count;
3395 
3396 	none_count = 0;
3397 	/* get the head of the device queue */
3398 	devlist_head = &pci_devq;
3399 
3400 	/*
3401 	 * Go through the list of devices and print out devices
3402 	 */
3403 	for (error = 0, i = 0,
3404 	     dinfo = STAILQ_FIRST(devlist_head);
3405 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3406 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3407 
3408 		/* Populate pd_name and pd_unit */
3409 		name = NULL;
3410 		if (dinfo->cfg.dev)
3411 			name = device_get_name(dinfo->cfg.dev);
3412 
3413 		p = &dinfo->conf;
3414 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3415 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3416 			(name && *name) ? name : "none",
3417 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3418 			none_count++,
3419 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3420 			p->pc_sel.pc_func, (p->pc_class << 16) |
3421 			(p->pc_subclass << 8) | p->pc_progif,
3422 			(p->pc_subdevice << 16) | p->pc_subvendor,
3423 			(p->pc_device << 16) | p->pc_vendor,
3424 			p->pc_revid, p->pc_hdr);
3425 	}
3426 }
3427 #endif /* DDB */
3428 
3429 static struct resource *
3430 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3431     u_long start, u_long end, u_long count, u_int flags)
3432 {
3433 	struct pci_devinfo *dinfo = device_get_ivars(child);
3434 	struct resource_list *rl = &dinfo->resources;
3435 	struct resource_list_entry *rle;
3436 	struct resource *res;
3437 	pci_addr_t map, testval;
3438 	int mapsize;
3439 
3440 	/*
3441 	 * Weed out the bogons, and figure out how large the BAR/map
3442 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3443 	 * Note: atapci in legacy mode are special and handled elsewhere
3444 	 * in the code.  If you have a atapci device in legacy mode and
3445 	 * it fails here, that other code is broken.
3446 	 */
3447 	res = NULL;
3448 	pci_read_bar(child, *rid, &map, &testval);
3449 
3450 	/* Ignore a BAR with a base of 0. */
3451 	if (pci_mapbase(testval) == 0)
3452 		goto out;
3453 
3454 	if (PCI_BAR_MEM(testval)) {
3455 		if (type != SYS_RES_MEMORY) {
3456 			if (bootverbose)
3457 				device_printf(dev,
3458 				    "child %s requested type %d for rid %#x,"
3459 				    " but the BAR says it is an memio\n",
3460 				    device_get_nameunit(child), type, *rid);
3461 			goto out;
3462 		}
3463 	} else {
3464 		if (type != SYS_RES_IOPORT) {
3465 			if (bootverbose)
3466 				device_printf(dev,
3467 				    "child %s requested type %d for rid %#x,"
3468 				    " but the BAR says it is an ioport\n",
3469 				    device_get_nameunit(child), type, *rid);
3470 			goto out;
3471 		}
3472 	}
3473 
3474 	/*
3475 	 * For real BARs, we need to override the size that
3476 	 * the driver requests, because that's what the BAR
3477 	 * actually uses and we would otherwise have a
3478 	 * situation where we might allocate the excess to
3479 	 * another driver, which won't work.
3480 	 */
3481 	mapsize = pci_mapsize(testval);
3482 	count = 1UL << mapsize;
3483 	if (RF_ALIGNMENT(flags) < mapsize)
3484 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3485 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3486 		flags |= RF_PREFETCHABLE;
3487 
3488 	/*
3489 	 * Allocate enough resource, and then write back the
3490 	 * appropriate bar for that resource.
3491 	 */
3492 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3493 	    start, end, count, flags & ~RF_ACTIVE);
3494 	if (res == NULL) {
3495 		device_printf(child,
3496 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3497 		    count, *rid, type, start, end);
3498 		goto out;
3499 	}
3500 	rman_set_device(res, dev);
3501 	resource_list_add(rl, type, *rid, start, end, count);
3502 	rle = resource_list_find(rl, type, *rid);
3503 	if (rle == NULL)
3504 		panic("pci_alloc_map: unexpectedly can't find resource.");
3505 	rle->res = res;
3506 	rle->start = rman_get_start(res);
3507 	rle->end = rman_get_end(res);
3508 	rle->count = count;
3509 	if (bootverbose)
3510 		device_printf(child,
3511 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3512 		    count, *rid, type, rman_get_start(res));
3513 	map = rman_get_start(res);
3514 	pci_write_bar(child, *rid, map);
3515 out:;
3516 	return (res);
3517 }
3518 
3519 
3520 struct resource *
3521 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3522 		   u_long start, u_long end, u_long count, u_int flags)
3523 {
3524 	struct pci_devinfo *dinfo = device_get_ivars(child);
3525 	struct resource_list *rl = &dinfo->resources;
3526 	struct resource_list_entry *rle;
3527 	struct resource *res;
3528 	pcicfgregs *cfg = &dinfo->cfg;
3529 
3530 	if (device_get_parent(child) != dev)
3531 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3532 		    type, rid, start, end, count, flags));
3533 
3534 	/*
3535 	 * Perform lazy resource allocation
3536 	 */
3537 	switch (type) {
3538 	case SYS_RES_IRQ:
3539 		/*
3540 		 * Can't alloc legacy interrupt once MSI messages have
3541 		 * been allocated.
3542 		 */
3543 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3544 		    cfg->msix.msix_alloc > 0))
3545 			return (NULL);
3546 
3547 		/*
3548 		 * If the child device doesn't have an interrupt
3549 		 * routed and is deserving of an interrupt, try to
3550 		 * assign it one.
3551 		 */
3552 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3553 		    (cfg->intpin != 0))
3554 			pci_assign_interrupt(dev, child, 0);
3555 		break;
3556 	case SYS_RES_IOPORT:
3557 	case SYS_RES_MEMORY:
3558 		/* Allocate resources for this BAR if needed. */
3559 		rle = resource_list_find(rl, type, *rid);
3560 		if (rle == NULL) {
3561 			res = pci_alloc_map(dev, child, type, rid, start, end,
3562 			    count, flags);
3563 			if (res == NULL)
3564 				return (NULL);
3565 			rle = resource_list_find(rl, type, *rid);
3566 		}
3567 
3568 		/*
3569 		 * If the resource belongs to the bus, then give it to
3570 		 * the child.  We need to activate it if requested
3571 		 * since the bus always allocates inactive resources.
3572 		 */
3573 		if (rle != NULL && rle->res != NULL &&
3574 		    rman_get_device(rle->res) == dev) {
3575 			if (bootverbose)
3576 				device_printf(child,
3577 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3578 				    rman_get_size(rle->res), *rid, type,
3579 				    rman_get_start(rle->res));
3580 			rman_set_device(rle->res, child);
3581 			if ((flags & RF_ACTIVE) &&
3582 			    bus_activate_resource(child, type, *rid,
3583 			    rle->res) != 0)
3584 				return (NULL);
3585 			return (rle->res);
3586 		}
3587 	}
3588 	return (resource_list_alloc(rl, dev, child, type, rid,
3589 	    start, end, count, flags));
3590 }
3591 
3592 int
3593 pci_release_resource(device_t dev, device_t child, int type, int rid,
3594     struct resource *r)
3595 {
3596 	int error;
3597 
3598 	if (device_get_parent(child) != dev)
3599 		return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child,
3600 		    type, rid, r));
3601 
3602 	/*
3603 	 * For BARs we don't actually want to release the resource.
3604 	 * Instead, we deactivate the resource if needed and then give
3605 	 * ownership of the BAR back to the bus.
3606 	 */
3607 	switch (type) {
3608 	case SYS_RES_IOPORT:
3609 	case SYS_RES_MEMORY:
3610 		if (rman_get_device(r) != child)
3611 			return (EINVAL);
3612 		if (rman_get_flags(r) & RF_ACTIVE) {
3613 			error = bus_deactivate_resource(child, type, rid, r);
3614 			if (error)
3615 				return (error);
3616 		}
3617 		rman_set_device(r, dev);
3618 		return (0);
3619 	}
3620 	return (bus_generic_rl_release_resource(dev, child, type, rid, r));
3621 }
3622 
3623 int
3624 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3625     struct resource *r)
3626 {
3627 	int error;
3628 
3629 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3630 	if (error)
3631 		return (error);
3632 
3633 	/* Enable decoding in the command register when activating BARs. */
3634 	if (device_get_parent(child) == dev) {
3635 		switch (type) {
3636 		case SYS_RES_IOPORT:
3637 		case SYS_RES_MEMORY:
3638 			error = PCI_ENABLE_IO(dev, child, type);
3639 			break;
3640 		}
3641 	}
3642 	return (error);
3643 }
3644 
3645 void
3646 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3647 {
3648 	struct pci_devinfo *dinfo;
3649 	struct resource_list *rl;
3650 	struct resource_list_entry *rle;
3651 
3652 	if (device_get_parent(child) != dev)
3653 		return;
3654 
3655 	dinfo = device_get_ivars(child);
3656 	rl = &dinfo->resources;
3657 	rle = resource_list_find(rl, type, rid);
3658 	if (rle == NULL)
3659 		return;
3660 
3661 	if (rle->res) {
3662 		if (rman_get_device(rle->res) != dev ||
3663 		    rman_get_flags(rle->res) & RF_ACTIVE) {
3664 			device_printf(dev, "delete_resource: "
3665 			    "Resource still owned by child, oops. "
3666 			    "(type=%d, rid=%d, addr=%lx)\n",
3667 			    rle->type, rle->rid,
3668 			    rman_get_start(rle->res));
3669 			return;
3670 		}
3671 
3672 		/*
3673 		 * If this is a BAR, clear the BAR so it stops
3674 		 * decoding before releasing the resource.
3675 		 */
3676 		switch (type) {
3677 		case SYS_RES_IOPORT:
3678 		case SYS_RES_MEMORY:
3679 			pci_write_bar(child, rid, 0);
3680 			break;
3681 		}
3682 		bus_release_resource(dev, type, rid, rle->res);
3683 	}
3684 	resource_list_delete(rl, type, rid);
3685 }
3686 
3687 struct resource_list *
3688 pci_get_resource_list (device_t dev, device_t child)
3689 {
3690 	struct pci_devinfo *dinfo = device_get_ivars(child);
3691 
3692 	return (&dinfo->resources);
3693 }
3694 
3695 uint32_t
3696 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3697 {
3698 	struct pci_devinfo *dinfo = device_get_ivars(child);
3699 	pcicfgregs *cfg = &dinfo->cfg;
3700 
3701 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3702 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3703 }
3704 
3705 void
3706 pci_write_config_method(device_t dev, device_t child, int reg,
3707     uint32_t val, int width)
3708 {
3709 	struct pci_devinfo *dinfo = device_get_ivars(child);
3710 	pcicfgregs *cfg = &dinfo->cfg;
3711 
3712 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3713 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3714 }
3715 
3716 int
3717 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3718     size_t buflen)
3719 {
3720 
3721 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3722 	    pci_get_function(child));
3723 	return (0);
3724 }
3725 
3726 int
3727 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3728     size_t buflen)
3729 {
3730 	struct pci_devinfo *dinfo;
3731 	pcicfgregs *cfg;
3732 
3733 	dinfo = device_get_ivars(child);
3734 	cfg = &dinfo->cfg;
3735 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3736 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3737 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3738 	    cfg->progif);
3739 	return (0);
3740 }
3741 
3742 int
3743 pci_assign_interrupt_method(device_t dev, device_t child)
3744 {
3745 	struct pci_devinfo *dinfo = device_get_ivars(child);
3746 	pcicfgregs *cfg = &dinfo->cfg;
3747 
3748 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3749 	    cfg->intpin));
3750 }
3751 
3752 static int
3753 pci_modevent(module_t mod, int what, void *arg)
3754 {
3755 	static struct cdev *pci_cdev;
3756 
3757 	switch (what) {
3758 	case MOD_LOAD:
3759 		STAILQ_INIT(&pci_devq);
3760 		pci_generation = 0;
3761 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3762 		    "pci");
3763 		pci_load_vendor_data();
3764 		break;
3765 
3766 	case MOD_UNLOAD:
3767 		destroy_dev(pci_cdev);
3768 		break;
3769 	}
3770 
3771 	return (0);
3772 }
3773 
3774 void
3775 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3776 {
3777 	int i;
3778 
3779 	/*
3780 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3781 	 * which we know need special treatment.  Type 2 devices are
3782 	 * cardbus bridges which also require special treatment.
3783 	 * Other types are unknown, and we err on the side of safety
3784 	 * by ignoring them.
3785 	 */
3786 	if (dinfo->cfg.hdrtype != 0)
3787 		return;
3788 
3789 	/*
3790 	 * Restore the device to full power mode.  We must do this
3791 	 * before we restore the registers because moving from D3 to
3792 	 * D0 will cause the chip's BARs and some other registers to
3793 	 * be reset to some unknown power on reset values.  Cut down
3794 	 * the noise on boot by doing nothing if we are already in
3795 	 * state D0.
3796 	 */
3797 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3798 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3799 	}
3800 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3801 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3802 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3803 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3804 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3805 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3806 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3807 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3808 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3809 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3810 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3811 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3812 
3813 	/* Restore MSI and MSI-X configurations if they are present. */
3814 	if (dinfo->cfg.msi.msi_location != 0)
3815 		pci_resume_msi(dev);
3816 	if (dinfo->cfg.msix.msix_location != 0)
3817 		pci_resume_msix(dev);
3818 }
3819 
3820 void
3821 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3822 {
3823 	int i;
3824 	uint32_t cls;
3825 	int ps;
3826 
3827 	/*
3828 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3829 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3830 	 * which also require special treatment.  Other types are unknown, and
3831 	 * we err on the side of safety by ignoring them.  Powering down
3832 	 * bridges should not be undertaken lightly.
3833 	 */
3834 	if (dinfo->cfg.hdrtype != 0)
3835 		return;
3836 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3837 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3838 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3839 
3840 	/*
3841 	 * Some drivers apparently write to these registers w/o updating our
3842 	 * cached copy.  No harm happens if we update the copy, so do so here
3843 	 * so we can restore them.  The COMMAND register is modified by the
3844 	 * bus w/o updating the cache.  This should represent the normally
3845 	 * writable portion of the 'defined' part of type 0 headers.  In
3846 	 * theory we also need to save/restore the PCI capability structures
3847 	 * we know about, but apart from power we don't know any that are
3848 	 * writable.
3849 	 */
3850 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3851 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3852 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3853 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3854 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3855 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3856 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3857 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3858 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3859 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3860 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3861 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3862 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3863 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3864 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3865 
3866 	/*
3867 	 * don't set the state for display devices, base peripherals and
3868 	 * memory devices since bad things happen when they are powered down.
3869 	 * We should (a) have drivers that can easily detach and (b) use
3870 	 * generic drivers for these devices so that some device actually
3871 	 * attaches.  We need to make sure that when we implement (a) we don't
3872 	 * power the device down on a reattach.
3873 	 */
3874 	cls = pci_get_class(dev);
3875 	if (!setstate)
3876 		return;
3877 	switch (pci_do_power_nodriver)
3878 	{
3879 		case 0:		/* NO powerdown at all */
3880 			return;
3881 		case 1:		/* Conservative about what to power down */
3882 			if (cls == PCIC_STORAGE)
3883 				return;
3884 			/*FALLTHROUGH*/
3885 		case 2:		/* Agressive about what to power down */
3886 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3887 			    cls == PCIC_BASEPERIPH)
3888 				return;
3889 			/*FALLTHROUGH*/
3890 		case 3:		/* Power down everything */
3891 			break;
3892 	}
3893 	/*
3894 	 * PCI spec says we can only go into D3 state from D0 state.
3895 	 * Transition from D[12] into D0 before going to D3 state.
3896 	 */
3897 	ps = pci_get_powerstate(dev);
3898 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3899 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3900 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3901 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3902 }
3903