xref: /freebsd/sys/dev/pci/pci.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static const char	*pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 
115 static device_method_t pci_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_probe,		pci_probe),
118 	DEVMETHOD(device_attach,	pci_attach),
119 	DEVMETHOD(device_detach,	bus_generic_detach),
120 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121 	DEVMETHOD(device_suspend,	pci_suspend),
122 	DEVMETHOD(device_resume,	pci_resume),
123 
124 	/* Bus interface */
125 	DEVMETHOD(bus_print_child,	pci_print_child),
126 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129 	DEVMETHOD(bus_driver_added,	pci_driver_added),
130 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132 
133 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
140 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143 
144 	/* PCI interface */
145 	DEVMETHOD(pci_read_config,	pci_read_config_method),
146 	DEVMETHOD(pci_write_config,	pci_write_config_method),
147 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163 
164 	{ 0, 0 }
165 };
166 
167 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168 
169 static devclass_t pci_devclass;
170 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171 MODULE_VERSION(pci, 1);
172 
173 static char	*pci_vendordata;
174 static size_t	pci_vendordata_size;
175 
176 
177 struct pci_quirk {
178 	uint32_t devid;	/* Vendor/device of the card */
179 	int	type;
180 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182 	int	arg1;
183 	int	arg2;
184 };
185 
186 struct pci_quirk pci_quirks[] = {
187 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 
193 	/*
194 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196 	 */
197 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 
200 	/*
201 	 * MSI doesn't work on earlier Intel chipsets including
202 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203 	 */
204 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 
212 	/*
213 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214 	 * bridge.
215 	 */
216 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	{ 0 }
219 };
220 
221 /* map register information */
222 #define	PCI_MAPMEM	0x01	/* memory map */
223 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224 #define	PCI_MAPPORT	0x04	/* port map */
225 
226 struct devlist pci_devq;
227 uint32_t pci_generation;
228 uint32_t pci_numdevs = 0;
229 static int pcie_chipset, pcix_chipset;
230 
231 /* sysctl vars */
232 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233 
234 static int pci_enable_io_modes = 1;
235 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237     &pci_enable_io_modes, 1,
238     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239 enable these bits correctly.  We'd like to do this all the time, but there\n\
240 are some peripherals that this causes problems with.");
241 
242 static int pci_do_power_nodriver = 0;
243 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245     &pci_do_power_nodriver, 0,
246   "Place a function into D3 state when no driver attaches to it.  0 means\n\
247 disable.  1 means conservatively place devices into D3 state.  2 means\n\
248 agressively place devices into D3 state.  3 means put absolutely everything\n\
249 in D3 state.");
250 
251 static int pci_do_power_resume = 1;
252 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254     &pci_do_power_resume, 1,
255   "Transition from D3 -> D0 on resume.");
256 
257 static int pci_do_msi = 1;
258 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260     "Enable support for MSI interrupts");
261 
262 static int pci_do_msix = 1;
263 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265     "Enable support for MSI-X interrupts");
266 
267 static int pci_honor_msi_blacklist = 1;
268 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271 
272 /* Find a device_t by bus/slot/function in domain 0 */
273 
274 device_t
275 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276 {
277 
278 	return (pci_find_dbsf(0, bus, slot, func));
279 }
280 
281 /* Find a device_t by domain/bus/slot/function */
282 
283 device_t
284 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285 {
286 	struct pci_devinfo *dinfo;
287 
288 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289 		if ((dinfo->cfg.domain == domain) &&
290 		    (dinfo->cfg.bus == bus) &&
291 		    (dinfo->cfg.slot == slot) &&
292 		    (dinfo->cfg.func == func)) {
293 			return (dinfo->cfg.dev);
294 		}
295 	}
296 
297 	return (NULL);
298 }
299 
300 /* Find a device_t by vendor/device ID */
301 
302 device_t
303 pci_find_device(uint16_t vendor, uint16_t device)
304 {
305 	struct pci_devinfo *dinfo;
306 
307 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308 		if ((dinfo->cfg.vendor == vendor) &&
309 		    (dinfo->cfg.device == device)) {
310 			return (dinfo->cfg.dev);
311 		}
312 	}
313 
314 	return (NULL);
315 }
316 
317 /* return base address of memory or port map */
318 
319 static uint32_t
320 pci_mapbase(uint32_t mapreg)
321 {
322 
323 	if (PCI_BAR_MEM(mapreg))
324 		return (mapreg & PCIM_BAR_MEM_BASE);
325 	else
326 		return (mapreg & PCIM_BAR_IO_BASE);
327 }
328 
329 /* return map type of memory or port map */
330 
331 static const char *
332 pci_maptype(unsigned mapreg)
333 {
334 
335 	if (PCI_BAR_IO(mapreg))
336 		return ("I/O Port");
337 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
338 		return ("Prefetchable Memory");
339 	return ("Memory");
340 }
341 
342 /* return log2 of map size decoded for memory or port map */
343 
344 static int
345 pci_mapsize(uint32_t testval)
346 {
347 	int ln2size;
348 
349 	testval = pci_mapbase(testval);
350 	ln2size = 0;
351 	if (testval != 0) {
352 		while ((testval & 1) == 0)
353 		{
354 			ln2size++;
355 			testval >>= 1;
356 		}
357 	}
358 	return (ln2size);
359 }
360 
361 /* return log2 of address range supported by map register */
362 
363 static int
364 pci_maprange(unsigned mapreg)
365 {
366 	int ln2range = 0;
367 
368 	if (PCI_BAR_IO(mapreg))
369 		ln2range = 32;
370 	else
371 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
372 		case PCIM_BAR_MEM_32:
373 			ln2range = 32;
374 			break;
375 		case PCIM_BAR_MEM_1MB:
376 			ln2range = 20;
377 			break;
378 		case PCIM_BAR_MEM_64:
379 			ln2range = 64;
380 			break;
381 		}
382 	return (ln2range);
383 }
384 
385 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
386 
387 static void
388 pci_fixancient(pcicfgregs *cfg)
389 {
390 	if (cfg->hdrtype != 0)
391 		return;
392 
393 	/* PCI to PCI bridges use header type 1 */
394 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
395 		cfg->hdrtype = 1;
396 }
397 
398 /* extract header type specific config data */
399 
400 static void
401 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
402 {
403 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
404 	switch (cfg->hdrtype) {
405 	case 0:
406 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
407 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
408 		cfg->nummaps	    = PCI_MAXMAPS_0;
409 		break;
410 	case 1:
411 		cfg->nummaps	    = PCI_MAXMAPS_1;
412 		break;
413 	case 2:
414 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
415 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
416 		cfg->nummaps	    = PCI_MAXMAPS_2;
417 		break;
418 	}
419 #undef REG
420 }
421 
422 /* read configuration header into pcicfgregs structure */
423 struct pci_devinfo *
424 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
425 {
426 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
427 	pcicfgregs *cfg = NULL;
428 	struct pci_devinfo *devlist_entry;
429 	struct devlist *devlist_head;
430 
431 	devlist_head = &pci_devq;
432 
433 	devlist_entry = NULL;
434 
435 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
436 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
437 		if (devlist_entry == NULL)
438 			return (NULL);
439 
440 		cfg = &devlist_entry->cfg;
441 
442 		cfg->domain		= d;
443 		cfg->bus		= b;
444 		cfg->slot		= s;
445 		cfg->func		= f;
446 		cfg->vendor		= REG(PCIR_VENDOR, 2);
447 		cfg->device		= REG(PCIR_DEVICE, 2);
448 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
449 		cfg->statreg		= REG(PCIR_STATUS, 2);
450 		cfg->baseclass		= REG(PCIR_CLASS, 1);
451 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
452 		cfg->progif		= REG(PCIR_PROGIF, 1);
453 		cfg->revid		= REG(PCIR_REVID, 1);
454 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
455 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
456 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
457 		cfg->intpin		= REG(PCIR_INTPIN, 1);
458 		cfg->intline		= REG(PCIR_INTLINE, 1);
459 
460 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
461 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
462 
463 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
464 		cfg->hdrtype		&= ~PCIM_MFDEV;
465 
466 		pci_fixancient(cfg);
467 		pci_hdrtypedata(pcib, b, s, f, cfg);
468 
469 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
470 			pci_read_extcap(pcib, cfg);
471 
472 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
473 
474 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
475 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
476 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
477 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
478 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
479 
480 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
481 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
482 		devlist_entry->conf.pc_vendor = cfg->vendor;
483 		devlist_entry->conf.pc_device = cfg->device;
484 
485 		devlist_entry->conf.pc_class = cfg->baseclass;
486 		devlist_entry->conf.pc_subclass = cfg->subclass;
487 		devlist_entry->conf.pc_progif = cfg->progif;
488 		devlist_entry->conf.pc_revid = cfg->revid;
489 
490 		pci_numdevs++;
491 		pci_generation++;
492 	}
493 	return (devlist_entry);
494 #undef REG
495 }
496 
497 static void
498 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
499 {
500 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
501 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
502 #if defined(__i386__) || defined(__amd64__)
503 	uint64_t addr;
504 #endif
505 	uint32_t val;
506 	int	ptr, nextptr, ptrptr;
507 
508 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
509 	case 0:
510 	case 1:
511 		ptrptr = PCIR_CAP_PTR;
512 		break;
513 	case 2:
514 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
515 		break;
516 	default:
517 		return;		/* no extended capabilities support */
518 	}
519 	nextptr = REG(ptrptr, 1);	/* sanity check? */
520 
521 	/*
522 	 * Read capability entries.
523 	 */
524 	while (nextptr != 0) {
525 		/* Sanity check */
526 		if (nextptr > 255) {
527 			printf("illegal PCI extended capability offset %d\n",
528 			    nextptr);
529 			return;
530 		}
531 		/* Find the next entry */
532 		ptr = nextptr;
533 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
534 
535 		/* Process this entry */
536 		switch (REG(ptr + PCICAP_ID, 1)) {
537 		case PCIY_PMG:		/* PCI power management */
538 			if (cfg->pp.pp_cap == 0) {
539 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
540 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
541 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
542 				if ((nextptr - ptr) > PCIR_POWER_DATA)
543 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
544 			}
545 			break;
546 #if defined(__i386__) || defined(__amd64__)
547 		case PCIY_HT:		/* HyperTransport */
548 			/* Determine HT-specific capability type. */
549 			val = REG(ptr + PCIR_HT_COMMAND, 2);
550 			switch (val & PCIM_HTCMD_CAP_MASK) {
551 			case PCIM_HTCAP_MSI_MAPPING:
552 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
553 					/* Sanity check the mapping window. */
554 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
555 					    4);
556 					addr <<= 32;
557 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
558 					    4);
559 					if (addr != MSI_INTEL_ADDR_BASE)
560 						device_printf(pcib,
561 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
562 						    cfg->domain, cfg->bus,
563 						    cfg->slot, cfg->func,
564 						    (long long)addr);
565 				}
566 
567 				/* Enable MSI -> HT mapping. */
568 				val |= PCIM_HTCMD_MSI_ENABLE;
569 				WREG(ptr + PCIR_HT_COMMAND, val, 2);
570 				break;
571 			}
572 			break;
573 #endif
574 		case PCIY_MSI:		/* PCI MSI */
575 			cfg->msi.msi_location = ptr;
576 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
577 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
578 						     PCIM_MSICTRL_MMC_MASK)>>1);
579 			break;
580 		case PCIY_MSIX:		/* PCI MSI-X */
581 			cfg->msix.msix_location = ptr;
582 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
583 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
584 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
585 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
586 			cfg->msix.msix_table_bar = PCIR_BAR(val &
587 			    PCIM_MSIX_BIR_MASK);
588 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
589 			val = REG(ptr + PCIR_MSIX_PBA, 4);
590 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
591 			    PCIM_MSIX_BIR_MASK);
592 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
593 			break;
594 		case PCIY_VPD:		/* PCI Vital Product Data */
595 			cfg->vpd.vpd_reg = ptr;
596 			break;
597 		case PCIY_SUBVENDOR:
598 			/* Should always be true. */
599 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
600 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
601 				cfg->subvendor = val & 0xffff;
602 				cfg->subdevice = val >> 16;
603 			}
604 			break;
605 		case PCIY_PCIX:		/* PCI-X */
606 			/*
607 			 * Assume we have a PCI-X chipset if we have
608 			 * at least one PCI-PCI bridge with a PCI-X
609 			 * capability.  Note that some systems with
610 			 * PCI-express or HT chipsets might match on
611 			 * this check as well.
612 			 */
613 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
614 				pcix_chipset = 1;
615 			break;
616 		case PCIY_EXPRESS:	/* PCI-express */
617 			/*
618 			 * Assume we have a PCI-express chipset if we have
619 			 * at least one PCI-express root port.
620 			 */
621 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
622 			if ((val & PCIM_EXP_FLAGS_TYPE) ==
623 			    PCIM_EXP_TYPE_ROOT_PORT)
624 				pcie_chipset = 1;
625 			break;
626 		default:
627 			break;
628 		}
629 	}
630 /* REG and WREG use carry through to next functions */
631 }
632 
633 /*
634  * PCI Vital Product Data
635  */
636 
637 #define	PCI_VPD_TIMEOUT		1000000
638 
639 static int
640 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
641 {
642 	int count = PCI_VPD_TIMEOUT;
643 
644 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
645 
646 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
647 
648 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
649 		if (--count < 0)
650 			return (ENXIO);
651 		DELAY(1);	/* limit looping */
652 	}
653 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
654 
655 	return (0);
656 }
657 
658 #if 0
659 static int
660 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
661 {
662 	int count = PCI_VPD_TIMEOUT;
663 
664 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
665 
666 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
667 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
668 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
669 		if (--count < 0)
670 			return (ENXIO);
671 		DELAY(1);	/* limit looping */
672 	}
673 
674 	return (0);
675 }
676 #endif
677 
678 #undef PCI_VPD_TIMEOUT
679 
680 struct vpd_readstate {
681 	device_t	pcib;
682 	pcicfgregs	*cfg;
683 	uint32_t	val;
684 	int		bytesinval;
685 	int		off;
686 	uint8_t		cksum;
687 };
688 
689 static int
690 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
691 {
692 	uint32_t reg;
693 	uint8_t byte;
694 
695 	if (vrs->bytesinval == 0) {
696 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
697 			return (ENXIO);
698 		vrs->val = le32toh(reg);
699 		vrs->off += 4;
700 		byte = vrs->val & 0xff;
701 		vrs->bytesinval = 3;
702 	} else {
703 		vrs->val = vrs->val >> 8;
704 		byte = vrs->val & 0xff;
705 		vrs->bytesinval--;
706 	}
707 
708 	vrs->cksum += byte;
709 	*data = byte;
710 	return (0);
711 }
712 
713 static void
714 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
715 {
716 	struct vpd_readstate vrs;
717 	int state;
718 	int name;
719 	int remain;
720 	int i;
721 	int alloc, off;		/* alloc/off for RO/W arrays */
722 	int cksumvalid;
723 	int dflen;
724 	uint8_t byte;
725 	uint8_t byte2;
726 
727 	/* init vpd reader */
728 	vrs.bytesinval = 0;
729 	vrs.off = 0;
730 	vrs.pcib = pcib;
731 	vrs.cfg = cfg;
732 	vrs.cksum = 0;
733 
734 	state = 0;
735 	name = remain = i = 0;	/* shut up stupid gcc */
736 	alloc = off = 0;	/* shut up stupid gcc */
737 	dflen = 0;		/* shut up stupid gcc */
738 	cksumvalid = -1;
739 	while (state >= 0) {
740 		if (vpd_nextbyte(&vrs, &byte)) {
741 			state = -2;
742 			break;
743 		}
744 #if 0
745 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
746 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
747 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
748 #endif
749 		switch (state) {
750 		case 0:		/* item name */
751 			if (byte & 0x80) {
752 				if (vpd_nextbyte(&vrs, &byte2)) {
753 					state = -2;
754 					break;
755 				}
756 				remain = byte2;
757 				if (vpd_nextbyte(&vrs, &byte2)) {
758 					state = -2;
759 					break;
760 				}
761 				remain |= byte2 << 8;
762 				if (remain > (0x7f*4 - vrs.off)) {
763 					state = -1;
764 					printf(
765 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
766 					    cfg->domain, cfg->bus, cfg->slot,
767 					    cfg->func, remain);
768 				}
769 				name = byte & 0x7f;
770 			} else {
771 				remain = byte & 0x7;
772 				name = (byte >> 3) & 0xf;
773 			}
774 			switch (name) {
775 			case 0x2:	/* String */
776 				cfg->vpd.vpd_ident = malloc(remain + 1,
777 				    M_DEVBUF, M_WAITOK);
778 				i = 0;
779 				state = 1;
780 				break;
781 			case 0xf:	/* End */
782 				state = -1;
783 				break;
784 			case 0x10:	/* VPD-R */
785 				alloc = 8;
786 				off = 0;
787 				cfg->vpd.vpd_ros = malloc(alloc *
788 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
789 				    M_WAITOK | M_ZERO);
790 				state = 2;
791 				break;
792 			case 0x11:	/* VPD-W */
793 				alloc = 8;
794 				off = 0;
795 				cfg->vpd.vpd_w = malloc(alloc *
796 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
797 				    M_WAITOK | M_ZERO);
798 				state = 5;
799 				break;
800 			default:	/* Invalid data, abort */
801 				state = -1;
802 				break;
803 			}
804 			break;
805 
806 		case 1:	/* Identifier String */
807 			cfg->vpd.vpd_ident[i++] = byte;
808 			remain--;
809 			if (remain == 0)  {
810 				cfg->vpd.vpd_ident[i] = '\0';
811 				state = 0;
812 			}
813 			break;
814 
815 		case 2:	/* VPD-R Keyword Header */
816 			if (off == alloc) {
817 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
818 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
819 				    M_DEVBUF, M_WAITOK | M_ZERO);
820 			}
821 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
822 			if (vpd_nextbyte(&vrs, &byte2)) {
823 				state = -2;
824 				break;
825 			}
826 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
827 			if (vpd_nextbyte(&vrs, &byte2)) {
828 				state = -2;
829 				break;
830 			}
831 			dflen = byte2;
832 			if (dflen == 0 &&
833 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
834 			    2) == 0) {
835 				/*
836 				 * if this happens, we can't trust the rest
837 				 * of the VPD.
838 				 */
839 				printf(
840 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
841 				    cfg->domain, cfg->bus, cfg->slot,
842 				    cfg->func, dflen);
843 				cksumvalid = 0;
844 				state = -1;
845 				break;
846 			} else if (dflen == 0) {
847 				cfg->vpd.vpd_ros[off].value = malloc(1 *
848 				    sizeof(*cfg->vpd.vpd_ros[off].value),
849 				    M_DEVBUF, M_WAITOK);
850 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
851 			} else
852 				cfg->vpd.vpd_ros[off].value = malloc(
853 				    (dflen + 1) *
854 				    sizeof(*cfg->vpd.vpd_ros[off].value),
855 				    M_DEVBUF, M_WAITOK);
856 			remain -= 3;
857 			i = 0;
858 			/* keep in sync w/ state 3's transistions */
859 			if (dflen == 0 && remain == 0)
860 				state = 0;
861 			else if (dflen == 0)
862 				state = 2;
863 			else
864 				state = 3;
865 			break;
866 
867 		case 3:	/* VPD-R Keyword Value */
868 			cfg->vpd.vpd_ros[off].value[i++] = byte;
869 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
870 			    "RV", 2) == 0 && cksumvalid == -1) {
871 				if (vrs.cksum == 0)
872 					cksumvalid = 1;
873 				else {
874 					if (bootverbose)
875 						printf(
876 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
877 						    cfg->domain, cfg->bus,
878 						    cfg->slot, cfg->func,
879 						    vrs.cksum);
880 					cksumvalid = 0;
881 					state = -1;
882 					break;
883 				}
884 			}
885 			dflen--;
886 			remain--;
887 			/* keep in sync w/ state 2's transistions */
888 			if (dflen == 0)
889 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
890 			if (dflen == 0 && remain == 0) {
891 				cfg->vpd.vpd_rocnt = off;
892 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
893 				    off * sizeof(*cfg->vpd.vpd_ros),
894 				    M_DEVBUF, M_WAITOK | M_ZERO);
895 				state = 0;
896 			} else if (dflen == 0)
897 				state = 2;
898 			break;
899 
900 		case 4:
901 			remain--;
902 			if (remain == 0)
903 				state = 0;
904 			break;
905 
906 		case 5:	/* VPD-W Keyword Header */
907 			if (off == alloc) {
908 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
909 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
910 				    M_DEVBUF, M_WAITOK | M_ZERO);
911 			}
912 			cfg->vpd.vpd_w[off].keyword[0] = byte;
913 			if (vpd_nextbyte(&vrs, &byte2)) {
914 				state = -2;
915 				break;
916 			}
917 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
918 			if (vpd_nextbyte(&vrs, &byte2)) {
919 				state = -2;
920 				break;
921 			}
922 			cfg->vpd.vpd_w[off].len = dflen = byte2;
923 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
924 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
925 			    sizeof(*cfg->vpd.vpd_w[off].value),
926 			    M_DEVBUF, M_WAITOK);
927 			remain -= 3;
928 			i = 0;
929 			/* keep in sync w/ state 6's transistions */
930 			if (dflen == 0 && remain == 0)
931 				state = 0;
932 			else if (dflen == 0)
933 				state = 5;
934 			else
935 				state = 6;
936 			break;
937 
938 		case 6:	/* VPD-W Keyword Value */
939 			cfg->vpd.vpd_w[off].value[i++] = byte;
940 			dflen--;
941 			remain--;
942 			/* keep in sync w/ state 5's transistions */
943 			if (dflen == 0)
944 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
945 			if (dflen == 0 && remain == 0) {
946 				cfg->vpd.vpd_wcnt = off;
947 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
948 				    off * sizeof(*cfg->vpd.vpd_w),
949 				    M_DEVBUF, M_WAITOK | M_ZERO);
950 				state = 0;
951 			} else if (dflen == 0)
952 				state = 5;
953 			break;
954 
955 		default:
956 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
957 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
958 			    state);
959 			state = -1;
960 			break;
961 		}
962 	}
963 
964 	if (cksumvalid == 0 || state < -1) {
965 		/* read-only data bad, clean up */
966 		if (cfg->vpd.vpd_ros != NULL) {
967 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
968 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
969 			free(cfg->vpd.vpd_ros, M_DEVBUF);
970 			cfg->vpd.vpd_ros = NULL;
971 		}
972 	}
973 	if (state < -1) {
974 		/* I/O error, clean up */
975 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
976 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
977 		if (cfg->vpd.vpd_ident != NULL) {
978 			free(cfg->vpd.vpd_ident, M_DEVBUF);
979 			cfg->vpd.vpd_ident = NULL;
980 		}
981 		if (cfg->vpd.vpd_w != NULL) {
982 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
983 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
984 			free(cfg->vpd.vpd_w, M_DEVBUF);
985 			cfg->vpd.vpd_w = NULL;
986 		}
987 	}
988 	cfg->vpd.vpd_cached = 1;
989 #undef REG
990 #undef WREG
991 }
992 
993 int
994 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
995 {
996 	struct pci_devinfo *dinfo = device_get_ivars(child);
997 	pcicfgregs *cfg = &dinfo->cfg;
998 
999 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1000 		pci_read_vpd(device_get_parent(dev), cfg);
1001 
1002 	*identptr = cfg->vpd.vpd_ident;
1003 
1004 	if (*identptr == NULL)
1005 		return (ENXIO);
1006 
1007 	return (0);
1008 }
1009 
1010 int
1011 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1012 	const char **vptr)
1013 {
1014 	struct pci_devinfo *dinfo = device_get_ivars(child);
1015 	pcicfgregs *cfg = &dinfo->cfg;
1016 	int i;
1017 
1018 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1019 		pci_read_vpd(device_get_parent(dev), cfg);
1020 
1021 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1022 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1023 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1024 			*vptr = cfg->vpd.vpd_ros[i].value;
1025 		}
1026 
1027 	if (i != cfg->vpd.vpd_rocnt)
1028 		return (0);
1029 
1030 	*vptr = NULL;
1031 	return (ENXIO);
1032 }
1033 
1034 /*
1035  * Return the offset in configuration space of the requested extended
1036  * capability entry or 0 if the specified capability was not found.
1037  */
1038 int
1039 pci_find_extcap_method(device_t dev, device_t child, int capability,
1040     int *capreg)
1041 {
1042 	struct pci_devinfo *dinfo = device_get_ivars(child);
1043 	pcicfgregs *cfg = &dinfo->cfg;
1044 	u_int32_t status;
1045 	u_int8_t ptr;
1046 
1047 	/*
1048 	 * Check the CAP_LIST bit of the PCI status register first.
1049 	 */
1050 	status = pci_read_config(child, PCIR_STATUS, 2);
1051 	if (!(status & PCIM_STATUS_CAPPRESENT))
1052 		return (ENXIO);
1053 
1054 	/*
1055 	 * Determine the start pointer of the capabilities list.
1056 	 */
1057 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1058 	case 0:
1059 	case 1:
1060 		ptr = PCIR_CAP_PTR;
1061 		break;
1062 	case 2:
1063 		ptr = PCIR_CAP_PTR_2;
1064 		break;
1065 	default:
1066 		/* XXX: panic? */
1067 		return (ENXIO);		/* no extended capabilities support */
1068 	}
1069 	ptr = pci_read_config(child, ptr, 1);
1070 
1071 	/*
1072 	 * Traverse the capabilities list.
1073 	 */
1074 	while (ptr != 0) {
1075 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1076 			if (capreg != NULL)
1077 				*capreg = ptr;
1078 			return (0);
1079 		}
1080 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1081 	}
1082 
1083 	return (ENOENT);
1084 }
1085 
1086 /*
1087  * Support for MSI-X message interrupts.
1088  */
1089 void
1090 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1091 {
1092 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1093 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1094 	uint32_t offset;
1095 
1096 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1097 	offset = msix->msix_table_offset + index * 16;
1098 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1099 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1100 	bus_write_4(msix->msix_table_res, offset + 8, data);
1101 }
1102 
1103 void
1104 pci_mask_msix(device_t dev, u_int index)
1105 {
1106 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1107 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1108 	uint32_t offset, val;
1109 
1110 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1111 	offset = msix->msix_table_offset + index * 16 + 12;
1112 	val = bus_read_4(msix->msix_table_res, offset);
1113 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1114 		val |= PCIM_MSIX_VCTRL_MASK;
1115 		bus_write_4(msix->msix_table_res, offset, val);
1116 	}
1117 }
1118 
1119 void
1120 pci_unmask_msix(device_t dev, u_int index)
1121 {
1122 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1123 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1124 	uint32_t offset, val;
1125 
1126 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1127 	offset = msix->msix_table_offset + index * 16 + 12;
1128 	val = bus_read_4(msix->msix_table_res, offset);
1129 	if (val & PCIM_MSIX_VCTRL_MASK) {
1130 		val &= ~PCIM_MSIX_VCTRL_MASK;
1131 		bus_write_4(msix->msix_table_res, offset, val);
1132 	}
1133 }
1134 
1135 int
1136 pci_pending_msix(device_t dev, u_int index)
1137 {
1138 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1139 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1140 	uint32_t offset, bit;
1141 
1142 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1143 	offset = msix->msix_pba_offset + (index / 32) * 4;
1144 	bit = 1 << index % 32;
1145 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1146 }
1147 
1148 /*
1149  * Restore MSI-X registers and table during resume.  If MSI-X is
1150  * enabled then walk the virtual table to restore the actual MSI-X
1151  * table.
1152  */
1153 static void
1154 pci_resume_msix(device_t dev)
1155 {
1156 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1157 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1158 	struct msix_table_entry *mte;
1159 	struct msix_vector *mv;
1160 	int i;
1161 
1162 	if (msix->msix_alloc > 0) {
1163 		/* First, mask all vectors. */
1164 		for (i = 0; i < msix->msix_msgnum; i++)
1165 			pci_mask_msix(dev, i);
1166 
1167 		/* Second, program any messages with at least one handler. */
1168 		for (i = 0; i < msix->msix_table_len; i++) {
1169 			mte = &msix->msix_table[i];
1170 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1171 				continue;
1172 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1173 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1174 			pci_unmask_msix(dev, i);
1175 		}
1176 	}
1177 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1178 	    msix->msix_ctrl, 2);
1179 }
1180 
1181 /*
1182  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1183  * returned in *count.  After this function returns, each message will be
1184  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1185  */
1186 int
1187 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1188 {
1189 	struct pci_devinfo *dinfo = device_get_ivars(child);
1190 	pcicfgregs *cfg = &dinfo->cfg;
1191 	struct resource_list_entry *rle;
1192 	int actual, error, i, irq, max;
1193 
1194 	/* Don't let count == 0 get us into trouble. */
1195 	if (*count == 0)
1196 		return (EINVAL);
1197 
1198 	/* If rid 0 is allocated, then fail. */
1199 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1200 	if (rle != NULL && rle->res != NULL)
1201 		return (ENXIO);
1202 
1203 	/* Already have allocated messages? */
1204 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1205 		return (ENXIO);
1206 
1207 	/* If MSI is blacklisted for this system, fail. */
1208 	if (pci_msi_blacklisted())
1209 		return (ENXIO);
1210 
1211 	/* MSI-X capability present? */
1212 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1213 		return (ENODEV);
1214 
1215 	/* Make sure the appropriate BARs are mapped. */
1216 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1217 	    cfg->msix.msix_table_bar);
1218 	if (rle == NULL || rle->res == NULL ||
1219 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1220 		return (ENXIO);
1221 	cfg->msix.msix_table_res = rle->res;
1222 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1223 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1224 		    cfg->msix.msix_pba_bar);
1225 		if (rle == NULL || rle->res == NULL ||
1226 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1227 			return (ENXIO);
1228 	}
1229 	cfg->msix.msix_pba_res = rle->res;
1230 
1231 	if (bootverbose)
1232 		device_printf(child,
1233 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1234 		    *count, cfg->msix.msix_msgnum);
1235 	max = min(*count, cfg->msix.msix_msgnum);
1236 	for (i = 0; i < max; i++) {
1237 		/* Allocate a message. */
1238 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1239 		if (error)
1240 			break;
1241 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1242 		    irq, 1);
1243 	}
1244 	actual = i;
1245 
1246 	if (bootverbose) {
1247 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1248 		if (actual == 1)
1249 			device_printf(child, "using IRQ %lu for MSI-X\n",
1250 			    rle->start);
1251 		else {
1252 			int run;
1253 
1254 			/*
1255 			 * Be fancy and try to print contiguous runs of
1256 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1257 			 * 'run' is true if we are in a range.
1258 			 */
1259 			device_printf(child, "using IRQs %lu", rle->start);
1260 			irq = rle->start;
1261 			run = 0;
1262 			for (i = 1; i < actual; i++) {
1263 				rle = resource_list_find(&dinfo->resources,
1264 				    SYS_RES_IRQ, i + 1);
1265 
1266 				/* Still in a run? */
1267 				if (rle->start == irq + 1) {
1268 					run = 1;
1269 					irq++;
1270 					continue;
1271 				}
1272 
1273 				/* Finish previous range. */
1274 				if (run) {
1275 					printf("-%d", irq);
1276 					run = 0;
1277 				}
1278 
1279 				/* Start new range. */
1280 				printf(",%lu", rle->start);
1281 				irq = rle->start;
1282 			}
1283 
1284 			/* Unfinished range? */
1285 			if (run)
1286 				printf("-%d", irq);
1287 			printf(" for MSI-X\n");
1288 		}
1289 	}
1290 
1291 	/* Mask all vectors. */
1292 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1293 		pci_mask_msix(child, i);
1294 
1295 	/* Allocate and initialize vector data and virtual table. */
1296 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1297 	    M_DEVBUF, M_WAITOK | M_ZERO);
1298 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1299 	    M_DEVBUF, M_WAITOK | M_ZERO);
1300 	for (i = 0; i < actual; i++) {
1301 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1302 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1303 		cfg->msix.msix_table[i].mte_vector = i + 1;
1304 	}
1305 
1306 	/* Update control register to enable MSI-X. */
1307 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1308 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1309 	    cfg->msix.msix_ctrl, 2);
1310 
1311 	/* Update counts of alloc'd messages. */
1312 	cfg->msix.msix_alloc = actual;
1313 	cfg->msix.msix_table_len = actual;
1314 	*count = actual;
1315 	return (0);
1316 }
1317 
1318 /*
1319  * By default, pci_alloc_msix() will assign the allocated IRQ
1320  * resources consecutively to the first N messages in the MSI-X table.
1321  * However, device drivers may want to use different layouts if they
1322  * either receive fewer messages than they asked for, or they wish to
1323  * populate the MSI-X table sparsely.  This method allows the driver
1324  * to specify what layout it wants.  It must be called after a
1325  * successful pci_alloc_msix() but before any of the associated
1326  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1327  *
1328  * The 'vectors' array contains 'count' message vectors.  The array
1329  * maps directly to the MSI-X table in that index 0 in the array
1330  * specifies the vector for the first message in the MSI-X table, etc.
1331  * The vector value in each array index can either be 0 to indicate
1332  * that no vector should be assigned to a message slot, or it can be a
1333  * number from 1 to N (where N is the count returned from a
1334  * succcessful call to pci_alloc_msix()) to indicate which message
1335  * vector (IRQ) to be used for the corresponding message.
1336  *
1337  * On successful return, each message with a non-zero vector will have
1338  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1339  * 1.  Additionally, if any of the IRQs allocated via the previous
1340  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1341  * will be freed back to the system automatically.
1342  *
1343  * For example, suppose a driver has a MSI-X table with 6 messages and
1344  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1345  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1346  * C.  After the call to pci_alloc_msix(), the device will be setup to
1347  * have an MSI-X table of ABC--- (where - means no vector assigned).
1348  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1349  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1350  * be freed back to the system.  This device will also have valid
1351  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1352  *
1353  * In any case, the SYS_RES_IRQ rid X will always map to the message
1354  * at MSI-X table index X - 1 and will only be valid if a vector is
1355  * assigned to that table entry.
1356  */
1357 int
1358 pci_remap_msix_method(device_t dev, device_t child, int count,
1359     const u_int *vectors)
1360 {
1361 	struct pci_devinfo *dinfo = device_get_ivars(child);
1362 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1363 	struct resource_list_entry *rle;
1364 	int i, irq, j, *used;
1365 
1366 	/*
1367 	 * Have to have at least one message in the table but the
1368 	 * table can't be bigger than the actual MSI-X table in the
1369 	 * device.
1370 	 */
1371 	if (count == 0 || count > msix->msix_msgnum)
1372 		return (EINVAL);
1373 
1374 	/* Sanity check the vectors. */
1375 	for (i = 0; i < count; i++)
1376 		if (vectors[i] > msix->msix_alloc)
1377 			return (EINVAL);
1378 
1379 	/*
1380 	 * Make sure there aren't any holes in the vectors to be used.
1381 	 * It's a big pain to support it, and it doesn't really make
1382 	 * sense anyway.  Also, at least one vector must be used.
1383 	 */
1384 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1385 	    M_ZERO);
1386 	for (i = 0; i < count; i++)
1387 		if (vectors[i] != 0)
1388 			used[vectors[i] - 1] = 1;
1389 	for (i = 0; i < msix->msix_alloc - 1; i++)
1390 		if (used[i] == 0 && used[i + 1] == 1) {
1391 			free(used, M_DEVBUF);
1392 			return (EINVAL);
1393 		}
1394 	if (used[0] != 1) {
1395 		free(used, M_DEVBUF);
1396 		return (EINVAL);
1397 	}
1398 
1399 	/* Make sure none of the resources are allocated. */
1400 	for (i = 0; i < msix->msix_table_len; i++) {
1401 		if (msix->msix_table[i].mte_vector == 0)
1402 			continue;
1403 		if (msix->msix_table[i].mte_handlers > 0)
1404 			return (EBUSY);
1405 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1406 		KASSERT(rle != NULL, ("missing resource"));
1407 		if (rle->res != NULL)
1408 			return (EBUSY);
1409 	}
1410 
1411 	/* Free the existing resource list entries. */
1412 	for (i = 0; i < msix->msix_table_len; i++) {
1413 		if (msix->msix_table[i].mte_vector == 0)
1414 			continue;
1415 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1416 	}
1417 
1418 	/*
1419 	 * Build the new virtual table keeping track of which vectors are
1420 	 * used.
1421 	 */
1422 	free(msix->msix_table, M_DEVBUF);
1423 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1424 	    M_DEVBUF, M_WAITOK | M_ZERO);
1425 	for (i = 0; i < count; i++)
1426 		msix->msix_table[i].mte_vector = vectors[i];
1427 	msix->msix_table_len = count;
1428 
1429 	/* Free any unused IRQs and resize the vectors array if necessary. */
1430 	j = msix->msix_alloc - 1;
1431 	if (used[j] == 0) {
1432 		struct msix_vector *vec;
1433 
1434 		while (used[j] == 0) {
1435 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1436 			    msix->msix_vectors[j].mv_irq);
1437 			j--;
1438 		}
1439 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1440 		    M_WAITOK);
1441 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1442 		    (j + 1));
1443 		free(msix->msix_vectors, M_DEVBUF);
1444 		msix->msix_vectors = vec;
1445 		msix->msix_alloc = j + 1;
1446 	}
1447 	free(used, M_DEVBUF);
1448 
1449 	/* Map the IRQs onto the rids. */
1450 	for (i = 0; i < count; i++) {
1451 		if (vectors[i] == 0)
1452 			continue;
1453 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1454 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1455 		    irq, 1);
1456 	}
1457 
1458 	if (bootverbose) {
1459 		device_printf(child, "Remapped MSI-X IRQs as: ");
1460 		for (i = 0; i < count; i++) {
1461 			if (i != 0)
1462 				printf(", ");
1463 			if (vectors[i] == 0)
1464 				printf("---");
1465 			else
1466 				printf("%d",
1467 				    msix->msix_vectors[vectors[i]].mv_irq);
1468 		}
1469 		printf("\n");
1470 	}
1471 
1472 	return (0);
1473 }
1474 
1475 static int
1476 pci_release_msix(device_t dev, device_t child)
1477 {
1478 	struct pci_devinfo *dinfo = device_get_ivars(child);
1479 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1480 	struct resource_list_entry *rle;
1481 	int i;
1482 
1483 	/* Do we have any messages to release? */
1484 	if (msix->msix_alloc == 0)
1485 		return (ENODEV);
1486 
1487 	/* Make sure none of the resources are allocated. */
1488 	for (i = 0; i < msix->msix_table_len; i++) {
1489 		if (msix->msix_table[i].mte_vector == 0)
1490 			continue;
1491 		if (msix->msix_table[i].mte_handlers > 0)
1492 			return (EBUSY);
1493 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1494 		KASSERT(rle != NULL, ("missing resource"));
1495 		if (rle->res != NULL)
1496 			return (EBUSY);
1497 	}
1498 
1499 	/* Update control register to disable MSI-X. */
1500 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1501 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1502 	    msix->msix_ctrl, 2);
1503 
1504 	/* Free the resource list entries. */
1505 	for (i = 0; i < msix->msix_table_len; i++) {
1506 		if (msix->msix_table[i].mte_vector == 0)
1507 			continue;
1508 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1509 	}
1510 	free(msix->msix_table, M_DEVBUF);
1511 	msix->msix_table_len = 0;
1512 
1513 	/* Release the IRQs. */
1514 	for (i = 0; i < msix->msix_alloc; i++)
1515 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1516 		    msix->msix_vectors[i].mv_irq);
1517 	free(msix->msix_vectors, M_DEVBUF);
1518 	msix->msix_alloc = 0;
1519 	return (0);
1520 }
1521 
1522 /*
1523  * Return the max supported MSI-X messages this device supports.
1524  * Basically, assuming the MD code can alloc messages, this function
1525  * should return the maximum value that pci_alloc_msix() can return.
1526  * Thus, it is subject to the tunables, etc.
1527  */
1528 int
1529 pci_msix_count_method(device_t dev, device_t child)
1530 {
1531 	struct pci_devinfo *dinfo = device_get_ivars(child);
1532 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1533 
1534 	if (pci_do_msix && msix->msix_location != 0)
1535 		return (msix->msix_msgnum);
1536 	return (0);
1537 }
1538 
1539 /*
1540  * Support for MSI message signalled interrupts.
1541  */
1542 void
1543 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1544 {
1545 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1546 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1547 
1548 	/* Write data and address values. */
1549 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1550 	    address & 0xffffffff, 4);
1551 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1552 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1553 		    address >> 32, 4);
1554 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1555 		    data, 2);
1556 	} else
1557 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1558 		    2);
1559 
1560 	/* Enable MSI in the control register. */
1561 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1562 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1563 	    2);
1564 }
1565 
1566 void
1567 pci_disable_msi(device_t dev)
1568 {
1569 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1570 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1571 
1572 	/* Disable MSI in the control register. */
1573 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1574 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1575 	    2);
1576 }
1577 
1578 /*
1579  * Restore MSI registers during resume.  If MSI is enabled then
1580  * restore the data and address registers in addition to the control
1581  * register.
1582  */
1583 static void
1584 pci_resume_msi(device_t dev)
1585 {
1586 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1587 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1588 	uint64_t address;
1589 	uint16_t data;
1590 
1591 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1592 		address = msi->msi_addr;
1593 		data = msi->msi_data;
1594 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1595 		    address & 0xffffffff, 4);
1596 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1597 			pci_write_config(dev, msi->msi_location +
1598 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1599 			pci_write_config(dev, msi->msi_location +
1600 			    PCIR_MSI_DATA_64BIT, data, 2);
1601 		} else
1602 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1603 			    data, 2);
1604 	}
1605 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1606 	    2);
1607 }
1608 
1609 int
1610 pci_remap_msi_irq(device_t dev, u_int irq)
1611 {
1612 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1613 	pcicfgregs *cfg = &dinfo->cfg;
1614 	struct resource_list_entry *rle;
1615 	struct msix_table_entry *mte;
1616 	struct msix_vector *mv;
1617 	device_t bus;
1618 	uint64_t addr;
1619 	uint32_t data;
1620 	int error, i, j;
1621 
1622 	bus = device_get_parent(dev);
1623 
1624 	/*
1625 	 * Handle MSI first.  We try to find this IRQ among our list
1626 	 * of MSI IRQs.  If we find it, we request updated address and
1627 	 * data registers and apply the results.
1628 	 */
1629 	if (cfg->msi.msi_alloc > 0) {
1630 
1631 		/* If we don't have any active handlers, nothing to do. */
1632 		if (cfg->msi.msi_handlers == 0)
1633 			return (0);
1634 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1635 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1636 			    i + 1);
1637 			if (rle->start == irq) {
1638 				error = PCIB_MAP_MSI(device_get_parent(bus),
1639 				    dev, irq, &addr, &data);
1640 				if (error)
1641 					return (error);
1642 				pci_disable_msi(dev);
1643 				dinfo->cfg.msi.msi_addr = addr;
1644 				dinfo->cfg.msi.msi_data = data;
1645 				pci_enable_msi(dev, addr, data);
1646 				return (0);
1647 			}
1648 		}
1649 		return (ENOENT);
1650 	}
1651 
1652 	/*
1653 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1654 	 * we request the updated mapping info.  If that works, we go
1655 	 * through all the slots that use this IRQ and update them.
1656 	 */
1657 	if (cfg->msix.msix_alloc > 0) {
1658 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1659 			mv = &cfg->msix.msix_vectors[i];
1660 			if (mv->mv_irq == irq) {
1661 				error = PCIB_MAP_MSI(device_get_parent(bus),
1662 				    dev, irq, &addr, &data);
1663 				if (error)
1664 					return (error);
1665 				mv->mv_address = addr;
1666 				mv->mv_data = data;
1667 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1668 					mte = &cfg->msix.msix_table[j];
1669 					if (mte->mte_vector != i + 1)
1670 						continue;
1671 					if (mte->mte_handlers == 0)
1672 						continue;
1673 					pci_mask_msix(dev, j);
1674 					pci_enable_msix(dev, j, addr, data);
1675 					pci_unmask_msix(dev, j);
1676 				}
1677 			}
1678 		}
1679 		return (ENOENT);
1680 	}
1681 
1682 	return (ENOENT);
1683 }
1684 
1685 /*
1686  * Returns true if the specified device is blacklisted because MSI
1687  * doesn't work.
1688  */
1689 int
1690 pci_msi_device_blacklisted(device_t dev)
1691 {
1692 	struct pci_quirk *q;
1693 
1694 	if (!pci_honor_msi_blacklist)
1695 		return (0);
1696 
1697 	for (q = &pci_quirks[0]; q->devid; q++) {
1698 		if (q->devid == pci_get_devid(dev) &&
1699 		    q->type == PCI_QUIRK_DISABLE_MSI)
1700 			return (1);
1701 	}
1702 	return (0);
1703 }
1704 
1705 /*
1706  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1707  * we just check for blacklisted chipsets as represented by the
1708  * host-PCI bridge at device 0:0:0.  In the future, it may become
1709  * necessary to check other system attributes, such as the kenv values
1710  * that give the motherboard manufacturer and model number.
1711  */
1712 static int
1713 pci_msi_blacklisted(void)
1714 {
1715 	device_t dev;
1716 
1717 	if (!pci_honor_msi_blacklist)
1718 		return (0);
1719 
1720 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1721 	if (!(pcie_chipset || pcix_chipset))
1722 		return (1);
1723 
1724 	dev = pci_find_bsf(0, 0, 0);
1725 	if (dev != NULL)
1726 		return (pci_msi_device_blacklisted(dev));
1727 	return (0);
1728 }
1729 
1730 /*
1731  * Attempt to allocate *count MSI messages.  The actual number allocated is
1732  * returned in *count.  After this function returns, each message will be
1733  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1734  */
1735 int
1736 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1737 {
1738 	struct pci_devinfo *dinfo = device_get_ivars(child);
1739 	pcicfgregs *cfg = &dinfo->cfg;
1740 	struct resource_list_entry *rle;
1741 	int actual, error, i, irqs[32];
1742 	uint16_t ctrl;
1743 
1744 	/* Don't let count == 0 get us into trouble. */
1745 	if (*count == 0)
1746 		return (EINVAL);
1747 
1748 	/* If rid 0 is allocated, then fail. */
1749 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1750 	if (rle != NULL && rle->res != NULL)
1751 		return (ENXIO);
1752 
1753 	/* Already have allocated messages? */
1754 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1755 		return (ENXIO);
1756 
1757 	/* If MSI is blacklisted for this system, fail. */
1758 	if (pci_msi_blacklisted())
1759 		return (ENXIO);
1760 
1761 	/* MSI capability present? */
1762 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1763 		return (ENODEV);
1764 
1765 	if (bootverbose)
1766 		device_printf(child,
1767 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1768 		    *count, cfg->msi.msi_msgnum);
1769 
1770 	/* Don't ask for more than the device supports. */
1771 	actual = min(*count, cfg->msi.msi_msgnum);
1772 
1773 	/* Don't ask for more than 32 messages. */
1774 	actual = min(actual, 32);
1775 
1776 	/* MSI requires power of 2 number of messages. */
1777 	if (!powerof2(actual))
1778 		return (EINVAL);
1779 
1780 	for (;;) {
1781 		/* Try to allocate N messages. */
1782 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1783 		    cfg->msi.msi_msgnum, irqs);
1784 		if (error == 0)
1785 			break;
1786 		if (actual == 1)
1787 			return (error);
1788 
1789 		/* Try N / 2. */
1790 		actual >>= 1;
1791 	}
1792 
1793 	/*
1794 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1795 	 * resources in the irqs[] array, so add new resources
1796 	 * starting at rid 1.
1797 	 */
1798 	for (i = 0; i < actual; i++)
1799 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1800 		    irqs[i], irqs[i], 1);
1801 
1802 	if (bootverbose) {
1803 		if (actual == 1)
1804 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1805 		else {
1806 			int run;
1807 
1808 			/*
1809 			 * Be fancy and try to print contiguous runs
1810 			 * of IRQ values as ranges.  'run' is true if
1811 			 * we are in a range.
1812 			 */
1813 			device_printf(child, "using IRQs %d", irqs[0]);
1814 			run = 0;
1815 			for (i = 1; i < actual; i++) {
1816 
1817 				/* Still in a run? */
1818 				if (irqs[i] == irqs[i - 1] + 1) {
1819 					run = 1;
1820 					continue;
1821 				}
1822 
1823 				/* Finish previous range. */
1824 				if (run) {
1825 					printf("-%d", irqs[i - 1]);
1826 					run = 0;
1827 				}
1828 
1829 				/* Start new range. */
1830 				printf(",%d", irqs[i]);
1831 			}
1832 
1833 			/* Unfinished range? */
1834 			if (run)
1835 				printf("-%d", irqs[actual - 1]);
1836 			printf(" for MSI\n");
1837 		}
1838 	}
1839 
1840 	/* Update control register with actual count. */
1841 	ctrl = cfg->msi.msi_ctrl;
1842 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1843 	ctrl |= (ffs(actual) - 1) << 4;
1844 	cfg->msi.msi_ctrl = ctrl;
1845 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1846 
1847 	/* Update counts of alloc'd messages. */
1848 	cfg->msi.msi_alloc = actual;
1849 	cfg->msi.msi_handlers = 0;
1850 	*count = actual;
1851 	return (0);
1852 }
1853 
1854 /* Release the MSI messages associated with this device. */
1855 int
1856 pci_release_msi_method(device_t dev, device_t child)
1857 {
1858 	struct pci_devinfo *dinfo = device_get_ivars(child);
1859 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1860 	struct resource_list_entry *rle;
1861 	int error, i, irqs[32];
1862 
1863 	/* Try MSI-X first. */
1864 	error = pci_release_msix(dev, child);
1865 	if (error != ENODEV)
1866 		return (error);
1867 
1868 	/* Do we have any messages to release? */
1869 	if (msi->msi_alloc == 0)
1870 		return (ENODEV);
1871 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1872 
1873 	/* Make sure none of the resources are allocated. */
1874 	if (msi->msi_handlers > 0)
1875 		return (EBUSY);
1876 	for (i = 0; i < msi->msi_alloc; i++) {
1877 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1878 		KASSERT(rle != NULL, ("missing MSI resource"));
1879 		if (rle->res != NULL)
1880 			return (EBUSY);
1881 		irqs[i] = rle->start;
1882 	}
1883 
1884 	/* Update control register with 0 count. */
1885 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1886 	    ("%s: MSI still enabled", __func__));
1887 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1888 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1889 	    msi->msi_ctrl, 2);
1890 
1891 	/* Release the messages. */
1892 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1893 	for (i = 0; i < msi->msi_alloc; i++)
1894 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1895 
1896 	/* Update alloc count. */
1897 	msi->msi_alloc = 0;
1898 	msi->msi_addr = 0;
1899 	msi->msi_data = 0;
1900 	return (0);
1901 }
1902 
1903 /*
1904  * Return the max supported MSI messages this device supports.
1905  * Basically, assuming the MD code can alloc messages, this function
1906  * should return the maximum value that pci_alloc_msi() can return.
1907  * Thus, it is subject to the tunables, etc.
1908  */
1909 int
1910 pci_msi_count_method(device_t dev, device_t child)
1911 {
1912 	struct pci_devinfo *dinfo = device_get_ivars(child);
1913 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1914 
1915 	if (pci_do_msi && msi->msi_location != 0)
1916 		return (msi->msi_msgnum);
1917 	return (0);
1918 }
1919 
1920 /* free pcicfgregs structure and all depending data structures */
1921 
1922 int
1923 pci_freecfg(struct pci_devinfo *dinfo)
1924 {
1925 	struct devlist *devlist_head;
1926 	int i;
1927 
1928 	devlist_head = &pci_devq;
1929 
1930 	if (dinfo->cfg.vpd.vpd_reg) {
1931 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1932 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1933 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1934 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1935 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1936 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1937 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1938 	}
1939 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1940 	free(dinfo, M_DEVBUF);
1941 
1942 	/* increment the generation count */
1943 	pci_generation++;
1944 
1945 	/* we're losing one device */
1946 	pci_numdevs--;
1947 	return (0);
1948 }
1949 
1950 /*
1951  * PCI power manangement
1952  */
1953 int
1954 pci_set_powerstate_method(device_t dev, device_t child, int state)
1955 {
1956 	struct pci_devinfo *dinfo = device_get_ivars(child);
1957 	pcicfgregs *cfg = &dinfo->cfg;
1958 	uint16_t status;
1959 	int result, oldstate, highest, delay;
1960 
1961 	if (cfg->pp.pp_cap == 0)
1962 		return (EOPNOTSUPP);
1963 
1964 	/*
1965 	 * Optimize a no state change request away.  While it would be OK to
1966 	 * write to the hardware in theory, some devices have shown odd
1967 	 * behavior when going from D3 -> D3.
1968 	 */
1969 	oldstate = pci_get_powerstate(child);
1970 	if (oldstate == state)
1971 		return (0);
1972 
1973 	/*
1974 	 * The PCI power management specification states that after a state
1975 	 * transition between PCI power states, system software must
1976 	 * guarantee a minimal delay before the function accesses the device.
1977 	 * Compute the worst case delay that we need to guarantee before we
1978 	 * access the device.  Many devices will be responsive much more
1979 	 * quickly than this delay, but there are some that don't respond
1980 	 * instantly to state changes.  Transitions to/from D3 state require
1981 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1982 	 * is done below with DELAY rather than a sleeper function because
1983 	 * this function can be called from contexts where we cannot sleep.
1984 	 */
1985 	highest = (oldstate > state) ? oldstate : state;
1986 	if (highest == PCI_POWERSTATE_D3)
1987 	    delay = 10000;
1988 	else if (highest == PCI_POWERSTATE_D2)
1989 	    delay = 200;
1990 	else
1991 	    delay = 0;
1992 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1993 	    & ~PCIM_PSTAT_DMASK;
1994 	result = 0;
1995 	switch (state) {
1996 	case PCI_POWERSTATE_D0:
1997 		status |= PCIM_PSTAT_D0;
1998 		break;
1999 	case PCI_POWERSTATE_D1:
2000 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2001 			return (EOPNOTSUPP);
2002 		status |= PCIM_PSTAT_D1;
2003 		break;
2004 	case PCI_POWERSTATE_D2:
2005 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2006 			return (EOPNOTSUPP);
2007 		status |= PCIM_PSTAT_D2;
2008 		break;
2009 	case PCI_POWERSTATE_D3:
2010 		status |= PCIM_PSTAT_D3;
2011 		break;
2012 	default:
2013 		return (EINVAL);
2014 	}
2015 
2016 	if (bootverbose)
2017 		printf(
2018 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2019 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2020 		    dinfo->cfg.func, oldstate, state);
2021 
2022 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2023 	if (delay)
2024 		DELAY(delay);
2025 	return (0);
2026 }
2027 
2028 int
2029 pci_get_powerstate_method(device_t dev, device_t child)
2030 {
2031 	struct pci_devinfo *dinfo = device_get_ivars(child);
2032 	pcicfgregs *cfg = &dinfo->cfg;
2033 	uint16_t status;
2034 	int result;
2035 
2036 	if (cfg->pp.pp_cap != 0) {
2037 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2038 		switch (status & PCIM_PSTAT_DMASK) {
2039 		case PCIM_PSTAT_D0:
2040 			result = PCI_POWERSTATE_D0;
2041 			break;
2042 		case PCIM_PSTAT_D1:
2043 			result = PCI_POWERSTATE_D1;
2044 			break;
2045 		case PCIM_PSTAT_D2:
2046 			result = PCI_POWERSTATE_D2;
2047 			break;
2048 		case PCIM_PSTAT_D3:
2049 			result = PCI_POWERSTATE_D3;
2050 			break;
2051 		default:
2052 			result = PCI_POWERSTATE_UNKNOWN;
2053 			break;
2054 		}
2055 	} else {
2056 		/* No support, device is always at D0 */
2057 		result = PCI_POWERSTATE_D0;
2058 	}
2059 	return (result);
2060 }
2061 
2062 /*
2063  * Some convenience functions for PCI device drivers.
2064  */
2065 
2066 static __inline void
2067 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2068 {
2069 	uint16_t	command;
2070 
2071 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2072 	command |= bit;
2073 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2074 }
2075 
2076 static __inline void
2077 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2078 {
2079 	uint16_t	command;
2080 
2081 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2082 	command &= ~bit;
2083 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2084 }
2085 
2086 int
2087 pci_enable_busmaster_method(device_t dev, device_t child)
2088 {
2089 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2090 	return (0);
2091 }
2092 
2093 int
2094 pci_disable_busmaster_method(device_t dev, device_t child)
2095 {
2096 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2097 	return (0);
2098 }
2099 
2100 int
2101 pci_enable_io_method(device_t dev, device_t child, int space)
2102 {
2103 	uint16_t command;
2104 	uint16_t bit;
2105 	char *error;
2106 
2107 	bit = 0;
2108 	error = NULL;
2109 
2110 	switch(space) {
2111 	case SYS_RES_IOPORT:
2112 		bit = PCIM_CMD_PORTEN;
2113 		error = "port";
2114 		break;
2115 	case SYS_RES_MEMORY:
2116 		bit = PCIM_CMD_MEMEN;
2117 		error = "memory";
2118 		break;
2119 	default:
2120 		return (EINVAL);
2121 	}
2122 	pci_set_command_bit(dev, child, bit);
2123 	/* Some devices seem to need a brief stall here, what do to? */
2124 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2125 	if (command & bit)
2126 		return (0);
2127 	device_printf(child, "failed to enable %s mapping!\n", error);
2128 	return (ENXIO);
2129 }
2130 
2131 int
2132 pci_disable_io_method(device_t dev, device_t child, int space)
2133 {
2134 	uint16_t command;
2135 	uint16_t bit;
2136 	char *error;
2137 
2138 	bit = 0;
2139 	error = NULL;
2140 
2141 	switch(space) {
2142 	case SYS_RES_IOPORT:
2143 		bit = PCIM_CMD_PORTEN;
2144 		error = "port";
2145 		break;
2146 	case SYS_RES_MEMORY:
2147 		bit = PCIM_CMD_MEMEN;
2148 		error = "memory";
2149 		break;
2150 	default:
2151 		return (EINVAL);
2152 	}
2153 	pci_clear_command_bit(dev, child, bit);
2154 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2155 	if (command & bit) {
2156 		device_printf(child, "failed to disable %s mapping!\n", error);
2157 		return (ENXIO);
2158 	}
2159 	return (0);
2160 }
2161 
2162 /*
2163  * New style pci driver.  Parent device is either a pci-host-bridge or a
2164  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2165  */
2166 
2167 void
2168 pci_print_verbose(struct pci_devinfo *dinfo)
2169 {
2170 
2171 	if (bootverbose) {
2172 		pcicfgregs *cfg = &dinfo->cfg;
2173 
2174 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2175 		    cfg->vendor, cfg->device, cfg->revid);
2176 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2177 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2178 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2179 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2180 		    cfg->mfdev);
2181 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2182 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2183 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2184 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2185 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2186 		if (cfg->intpin > 0)
2187 			printf("\tintpin=%c, irq=%d\n",
2188 			    cfg->intpin +'a' -1, cfg->intline);
2189 		if (cfg->pp.pp_cap) {
2190 			uint16_t status;
2191 
2192 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2193 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2194 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2195 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2196 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2197 			    status & PCIM_PSTAT_DMASK);
2198 		}
2199 		if (cfg->msi.msi_location) {
2200 			int ctrl;
2201 
2202 			ctrl = cfg->msi.msi_ctrl;
2203 			printf("\tMSI supports %d message%s%s%s\n",
2204 			    cfg->msi.msi_msgnum,
2205 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2206 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2207 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2208 		}
2209 		if (cfg->msix.msix_location) {
2210 			printf("\tMSI-X supports %d message%s ",
2211 			    cfg->msix.msix_msgnum,
2212 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2213 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2214 				printf("in map 0x%x\n",
2215 				    cfg->msix.msix_table_bar);
2216 			else
2217 				printf("in maps 0x%x and 0x%x\n",
2218 				    cfg->msix.msix_table_bar,
2219 				    cfg->msix.msix_pba_bar);
2220 		}
2221 	}
2222 }
2223 
2224 static int
2225 pci_porten(device_t pcib, int b, int s, int f)
2226 {
2227 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2228 		& PCIM_CMD_PORTEN) != 0;
2229 }
2230 
2231 static int
2232 pci_memen(device_t pcib, int b, int s, int f)
2233 {
2234 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2235 		& PCIM_CMD_MEMEN) != 0;
2236 }
2237 
2238 /*
2239  * Add a resource based on a pci map register. Return 1 if the map
2240  * register is a 32bit map register or 2 if it is a 64bit register.
2241  */
2242 static int
2243 pci_add_map(device_t pcib, device_t bus, device_t dev,
2244     int b, int s, int f, int reg, struct resource_list *rl, int force,
2245     int prefetch)
2246 {
2247 	uint32_t map;
2248 	pci_addr_t base;
2249 	pci_addr_t start, end, count;
2250 	uint8_t ln2size;
2251 	uint8_t ln2range;
2252 	uint32_t testval;
2253 	uint16_t cmd;
2254 	int type;
2255 	int barlen;
2256 	struct resource *res;
2257 
2258 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2259 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2260 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2261 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2262 
2263 	if (PCI_BAR_MEM(map))
2264 		type = SYS_RES_MEMORY;
2265 	else
2266 		type = SYS_RES_IOPORT;
2267 	ln2size = pci_mapsize(testval);
2268 	ln2range = pci_maprange(testval);
2269 	base = pci_mapbase(map);
2270 	barlen = ln2range == 64 ? 2 : 1;
2271 
2272 	/*
2273 	 * For I/O registers, if bottom bit is set, and the next bit up
2274 	 * isn't clear, we know we have a BAR that doesn't conform to the
2275 	 * spec, so ignore it.  Also, sanity check the size of the data
2276 	 * areas to the type of memory involved.  Memory must be at least
2277 	 * 16 bytes in size, while I/O ranges must be at least 4.
2278 	 */
2279 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2280 		return (barlen);
2281 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2282 	    (type == SYS_RES_IOPORT && ln2size < 2))
2283 		return (barlen);
2284 
2285 	if (ln2range == 64)
2286 		/* Read the other half of a 64bit map register */
2287 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2288 	if (bootverbose) {
2289 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2290 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2291 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2292 			printf(", port disabled\n");
2293 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2294 			printf(", memory disabled\n");
2295 		else
2296 			printf(", enabled\n");
2297 	}
2298 
2299 	/*
2300 	 * If base is 0, then we have problems.  It is best to ignore
2301 	 * such entries for the moment.  These will be allocated later if
2302 	 * the driver specifically requests them.  However, some
2303 	 * removable busses look better when all resources are allocated,
2304 	 * so allow '0' to be overriden.
2305 	 *
2306 	 * Similarly treat maps whose values is the same as the test value
2307 	 * read back.  These maps have had all f's written to them by the
2308 	 * BIOS in an attempt to disable the resources.
2309 	 */
2310 	if (!force && (base == 0 || map == testval))
2311 		return (barlen);
2312 	if ((u_long)base != base) {
2313 		device_printf(bus,
2314 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2315 		    pci_get_domain(dev), b, s, f, reg);
2316 		return (barlen);
2317 	}
2318 
2319 	/*
2320 	 * This code theoretically does the right thing, but has
2321 	 * undesirable side effects in some cases where peripherals
2322 	 * respond oddly to having these bits enabled.  Let the user
2323 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2324 	 * default).
2325 	 */
2326 	if (pci_enable_io_modes) {
2327 		/* Turn on resources that have been left off by a lazy BIOS */
2328 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2329 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2330 			cmd |= PCIM_CMD_PORTEN;
2331 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2332 		}
2333 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2334 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2335 			cmd |= PCIM_CMD_MEMEN;
2336 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2337 		}
2338 	} else {
2339 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2340 			return (barlen);
2341 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2342 			return (barlen);
2343 	}
2344 
2345 	count = 1 << ln2size;
2346 	if (base == 0 || base == pci_mapbase(testval)) {
2347 		start = 0;	/* Let the parent deside */
2348 		end = ~0ULL;
2349 	} else {
2350 		start = base;
2351 		end = base + (1 << ln2size) - 1;
2352 	}
2353 	resource_list_add(rl, type, reg, start, end, count);
2354 
2355 	/*
2356 	 * Not quite sure what to do on failure of allocating the resource
2357 	 * since I can postulate several right answers.
2358 	 */
2359 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2360 	    prefetch ? RF_PREFETCHABLE : 0);
2361 	if (res == NULL)
2362 		return (barlen);
2363 	start = rman_get_start(res);
2364 	if ((u_long)start != start) {
2365 		/* Wait a minute!  this platform can't do this address. */
2366 		device_printf(bus,
2367 		    "pci%d:%d.%d.%x bar %#x start %#jx, too many bits.",
2368 		    pci_get_domain(dev), b, s, f, reg, (uintmax_t)start);
2369 		resource_list_release(rl, bus, dev, type, reg, res);
2370 		return (barlen);
2371 	}
2372 	pci_write_config(dev, reg, start, 4);
2373 	if (ln2range == 64)
2374 		pci_write_config(dev, reg + 4, start >> 32, 4);
2375 	return (barlen);
2376 }
2377 
2378 /*
2379  * For ATA devices we need to decide early what addressing mode to use.
2380  * Legacy demands that the primary and secondary ATA ports sits on the
2381  * same addresses that old ISA hardware did. This dictates that we use
2382  * those addresses and ignore the BAR's if we cannot set PCI native
2383  * addressing mode.
2384  */
2385 static void
2386 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2387     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2388 {
2389 	int rid, type, progif;
2390 #if 0
2391 	/* if this device supports PCI native addressing use it */
2392 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2393 	if ((progif & 0x8a) == 0x8a) {
2394 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2395 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2396 			printf("Trying ATA native PCI addressing mode\n");
2397 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2398 		}
2399 	}
2400 #endif
2401 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2402 	type = SYS_RES_IOPORT;
2403 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2404 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2405 		    prefetchmask & (1 << 0));
2406 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2407 		    prefetchmask & (1 << 1));
2408 	} else {
2409 		rid = PCIR_BAR(0);
2410 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2411 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2412 		    0);
2413 		rid = PCIR_BAR(1);
2414 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2415 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2416 		    0);
2417 	}
2418 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2419 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2420 		    prefetchmask & (1 << 2));
2421 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2422 		    prefetchmask & (1 << 3));
2423 	} else {
2424 		rid = PCIR_BAR(2);
2425 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2426 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2427 		    0);
2428 		rid = PCIR_BAR(3);
2429 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2430 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2431 		    0);
2432 	}
2433 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2434 	    prefetchmask & (1 << 4));
2435 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2436 	    prefetchmask & (1 << 5));
2437 }
2438 
2439 static void
2440 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2441 {
2442 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2443 	pcicfgregs *cfg = &dinfo->cfg;
2444 	char tunable_name[64];
2445 	int irq;
2446 
2447 	/* Has to have an intpin to have an interrupt. */
2448 	if (cfg->intpin == 0)
2449 		return;
2450 
2451 	/* Let the user override the IRQ with a tunable. */
2452 	irq = PCI_INVALID_IRQ;
2453 	snprintf(tunable_name, sizeof(tunable_name),
2454 	    "hw.pci%d.%d.%d.INT%c.irq",
2455 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2456 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2457 		irq = PCI_INVALID_IRQ;
2458 
2459 	/*
2460 	 * If we didn't get an IRQ via the tunable, then we either use the
2461 	 * IRQ value in the intline register or we ask the bus to route an
2462 	 * interrupt for us.  If force_route is true, then we only use the
2463 	 * value in the intline register if the bus was unable to assign an
2464 	 * IRQ.
2465 	 */
2466 	if (!PCI_INTERRUPT_VALID(irq)) {
2467 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2468 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2469 		if (!PCI_INTERRUPT_VALID(irq))
2470 			irq = cfg->intline;
2471 	}
2472 
2473 	/* If after all that we don't have an IRQ, just bail. */
2474 	if (!PCI_INTERRUPT_VALID(irq))
2475 		return;
2476 
2477 	/* Update the config register if it changed. */
2478 	if (irq != cfg->intline) {
2479 		cfg->intline = irq;
2480 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2481 	}
2482 
2483 	/* Add this IRQ as rid 0 interrupt resource. */
2484 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2485 }
2486 
2487 void
2488 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2489 {
2490 	device_t pcib;
2491 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2492 	pcicfgregs *cfg = &dinfo->cfg;
2493 	struct resource_list *rl = &dinfo->resources;
2494 	struct pci_quirk *q;
2495 	int b, i, f, s;
2496 
2497 	pcib = device_get_parent(bus);
2498 
2499 	b = cfg->bus;
2500 	s = cfg->slot;
2501 	f = cfg->func;
2502 
2503 	/* ATA devices needs special map treatment */
2504 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2505 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2506 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2507 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2508 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2509 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2510 	else
2511 		for (i = 0; i < cfg->nummaps;)
2512 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2513 			    rl, force, prefetchmask & (1 << i));
2514 
2515 	/*
2516 	 * Add additional, quirked resources.
2517 	 */
2518 	for (q = &pci_quirks[0]; q->devid; q++) {
2519 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2520 		    && q->type == PCI_QUIRK_MAP_REG)
2521 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2522 			  force, 0);
2523 	}
2524 
2525 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2526 #ifdef __PCI_REROUTE_INTERRUPT
2527 		/*
2528 		 * Try to re-route interrupts. Sometimes the BIOS or
2529 		 * firmware may leave bogus values in these registers.
2530 		 * If the re-route fails, then just stick with what we
2531 		 * have.
2532 		 */
2533 		pci_assign_interrupt(bus, dev, 1);
2534 #else
2535 		pci_assign_interrupt(bus, dev, 0);
2536 #endif
2537 	}
2538 }
2539 
2540 void
2541 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2542 {
2543 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2544 	device_t pcib = device_get_parent(dev);
2545 	struct pci_devinfo *dinfo;
2546 	int maxslots;
2547 	int s, f, pcifunchigh;
2548 	uint8_t hdrtype;
2549 
2550 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2551 	    ("dinfo_size too small"));
2552 	maxslots = PCIB_MAXSLOTS(pcib);
2553 	for (s = 0; s <= maxslots; s++) {
2554 		pcifunchigh = 0;
2555 		f = 0;
2556 		DELAY(1);
2557 		hdrtype = REG(PCIR_HDRTYPE, 1);
2558 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2559 			continue;
2560 		if (hdrtype & PCIM_MFDEV)
2561 			pcifunchigh = PCI_FUNCMAX;
2562 		for (f = 0; f <= pcifunchigh; f++) {
2563 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2564 			    dinfo_size);
2565 			if (dinfo != NULL) {
2566 				pci_add_child(dev, dinfo);
2567 			}
2568 		}
2569 	}
2570 #undef REG
2571 }
2572 
2573 void
2574 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2575 {
2576 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2577 	device_set_ivars(dinfo->cfg.dev, dinfo);
2578 	resource_list_init(&dinfo->resources);
2579 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2580 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2581 	pci_print_verbose(dinfo);
2582 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2583 }
2584 
2585 static int
2586 pci_probe(device_t dev)
2587 {
2588 
2589 	device_set_desc(dev, "PCI bus");
2590 
2591 	/* Allow other subclasses to override this driver. */
2592 	return (-1000);
2593 }
2594 
2595 static int
2596 pci_attach(device_t dev)
2597 {
2598 	int busno, domain;
2599 
2600 	/*
2601 	 * Since there can be multiple independantly numbered PCI
2602 	 * busses on systems with multiple PCI domains, we can't use
2603 	 * the unit number to decide which bus we are probing. We ask
2604 	 * the parent pcib what our domain and bus numbers are.
2605 	 */
2606 	domain = pcib_get_domain(dev);
2607 	busno = pcib_get_bus(dev);
2608 	if (bootverbose)
2609 		device_printf(dev, "domain=%d, physical bus=%d\n",
2610 		    domain, busno);
2611 
2612 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2613 
2614 	return (bus_generic_attach(dev));
2615 }
2616 
2617 int
2618 pci_suspend(device_t dev)
2619 {
2620 	int dstate, error, i, numdevs;
2621 	device_t acpi_dev, child, *devlist;
2622 	struct pci_devinfo *dinfo;
2623 
2624 	/*
2625 	 * Save the PCI configuration space for each child and set the
2626 	 * device in the appropriate power state for this sleep state.
2627 	 */
2628 	acpi_dev = NULL;
2629 	if (pci_do_power_resume)
2630 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2631 	device_get_children(dev, &devlist, &numdevs);
2632 	for (i = 0; i < numdevs; i++) {
2633 		child = devlist[i];
2634 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2635 		pci_cfg_save(child, dinfo, 0);
2636 	}
2637 
2638 	/* Suspend devices before potentially powering them down. */
2639 	error = bus_generic_suspend(dev);
2640 	if (error) {
2641 		free(devlist, M_TEMP);
2642 		return (error);
2643 	}
2644 
2645 	/*
2646 	 * Always set the device to D3.  If ACPI suggests a different
2647 	 * power state, use it instead.  If ACPI is not present, the
2648 	 * firmware is responsible for managing device power.  Skip
2649 	 * children who aren't attached since they are powered down
2650 	 * separately.  Only manage type 0 devices for now.
2651 	 */
2652 	for (i = 0; acpi_dev && i < numdevs; i++) {
2653 		child = devlist[i];
2654 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2655 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2656 			dstate = PCI_POWERSTATE_D3;
2657 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2658 			pci_set_powerstate(child, dstate);
2659 		}
2660 	}
2661 	free(devlist, M_TEMP);
2662 	return (0);
2663 }
2664 
2665 int
2666 pci_resume(device_t dev)
2667 {
2668 	int i, numdevs;
2669 	device_t acpi_dev, child, *devlist;
2670 	struct pci_devinfo *dinfo;
2671 
2672 	/*
2673 	 * Set each child to D0 and restore its PCI configuration space.
2674 	 */
2675 	acpi_dev = NULL;
2676 	if (pci_do_power_resume)
2677 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2678 	device_get_children(dev, &devlist, &numdevs);
2679 	for (i = 0; i < numdevs; i++) {
2680 		/*
2681 		 * Notify ACPI we're going to D0 but ignore the result.  If
2682 		 * ACPI is not present, the firmware is responsible for
2683 		 * managing device power.  Only manage type 0 devices for now.
2684 		 */
2685 		child = devlist[i];
2686 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2687 		if (acpi_dev && device_is_attached(child) &&
2688 		    dinfo->cfg.hdrtype == 0) {
2689 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2690 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2691 		}
2692 
2693 		/* Now the device is powered up, restore its config space. */
2694 		pci_cfg_restore(child, dinfo);
2695 	}
2696 	free(devlist, M_TEMP);
2697 	return (bus_generic_resume(dev));
2698 }
2699 
2700 static void
2701 pci_load_vendor_data(void)
2702 {
2703 	caddr_t vendordata, info;
2704 
2705 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2706 		info = preload_search_info(vendordata, MODINFO_ADDR);
2707 		pci_vendordata = *(char **)info;
2708 		info = preload_search_info(vendordata, MODINFO_SIZE);
2709 		pci_vendordata_size = *(size_t *)info;
2710 		/* terminate the database */
2711 		pci_vendordata[pci_vendordata_size] = '\n';
2712 	}
2713 }
2714 
2715 void
2716 pci_driver_added(device_t dev, driver_t *driver)
2717 {
2718 	int numdevs;
2719 	device_t *devlist;
2720 	device_t child;
2721 	struct pci_devinfo *dinfo;
2722 	int i;
2723 
2724 	if (bootverbose)
2725 		device_printf(dev, "driver added\n");
2726 	DEVICE_IDENTIFY(driver, dev);
2727 	device_get_children(dev, &devlist, &numdevs);
2728 	for (i = 0; i < numdevs; i++) {
2729 		child = devlist[i];
2730 		if (device_get_state(child) != DS_NOTPRESENT)
2731 			continue;
2732 		dinfo = device_get_ivars(child);
2733 		pci_print_verbose(dinfo);
2734 		if (bootverbose)
2735 			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2736 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2737 			    dinfo->cfg.func);
2738 		pci_cfg_restore(child, dinfo);
2739 		if (device_probe_and_attach(child) != 0)
2740 			pci_cfg_save(child, dinfo, 1);
2741 	}
2742 	free(devlist, M_TEMP);
2743 }
2744 
2745 int
2746 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2747     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2748 {
2749 	struct pci_devinfo *dinfo;
2750 	struct msix_table_entry *mte;
2751 	struct msix_vector *mv;
2752 	uint64_t addr;
2753 	uint32_t data;
2754 	void *cookie;
2755 	int error, rid;
2756 
2757 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2758 	    arg, &cookie);
2759 	if (error)
2760 		return (error);
2761 
2762 	/*
2763 	 * If this is a direct child, check to see if the interrupt is
2764 	 * MSI or MSI-X.  If so, ask our parent to map the MSI and give
2765 	 * us the address and data register values.  If we fail for some
2766 	 * reason, teardown the interrupt handler.
2767 	 */
2768 	rid = rman_get_rid(irq);
2769 	if (device_get_parent(child) == dev && rid > 0) {
2770 		dinfo = device_get_ivars(child);
2771 		if (dinfo->cfg.msi.msi_alloc > 0) {
2772 			if (dinfo->cfg.msi.msi_addr == 0) {
2773 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2774 			    ("MSI has handlers, but vectors not mapped"));
2775 				error = PCIB_MAP_MSI(device_get_parent(dev),
2776 				    child, rman_get_start(irq), &addr, &data);
2777 				if (error)
2778 					goto bad;
2779 				dinfo->cfg.msi.msi_addr = addr;
2780 				dinfo->cfg.msi.msi_data = data;
2781 				pci_enable_msi(child, addr, data);
2782 			}
2783 			dinfo->cfg.msi.msi_handlers++;
2784 		} else {
2785 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2786 			    ("No MSI or MSI-X interrupts allocated"));
2787 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2788 			    ("MSI-X index too high"));
2789 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2790 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2791 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2792 			KASSERT(mv->mv_irq == rman_get_start(irq),
2793 			    ("IRQ mismatch"));
2794 			if (mv->mv_address == 0) {
2795 				KASSERT(mte->mte_handlers == 0,
2796 		    ("MSI-X table entry has handlers, but vector not mapped"));
2797 				error = PCIB_MAP_MSI(device_get_parent(dev),
2798 				    child, rman_get_start(irq), &addr, &data);
2799 				if (error)
2800 					goto bad;
2801 				mv->mv_address = addr;
2802 				mv->mv_data = data;
2803 			}
2804 			if (mte->mte_handlers == 0) {
2805 				pci_enable_msix(child, rid - 1, mv->mv_address,
2806 				    mv->mv_data);
2807 				pci_unmask_msix(child, rid - 1);
2808 			}
2809 			mte->mte_handlers++;
2810 		}
2811 	bad:
2812 		if (error) {
2813 			(void)bus_generic_teardown_intr(dev, child, irq,
2814 			    cookie);
2815 			return (error);
2816 		}
2817 	}
2818 	*cookiep = cookie;
2819 	return (0);
2820 }
2821 
2822 int
2823 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2824     void *cookie)
2825 {
2826 	struct msix_table_entry *mte;
2827 	struct resource_list_entry *rle;
2828 	struct pci_devinfo *dinfo;
2829 	int error, rid;
2830 
2831 	/*
2832 	 * If this is a direct child, check to see if the interrupt is
2833 	 * MSI or MSI-X.  If so, decrement the appropriate handlers
2834 	 * count and mask the MSI-X message, or disable MSI messages
2835 	 * if the count drops to 0.
2836 	 */
2837 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2838 		return (EINVAL);
2839 	rid = rman_get_rid(irq);
2840 	if (device_get_parent(child) == dev && rid > 0) {
2841 		dinfo = device_get_ivars(child);
2842 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2843 		if (rle->res != irq)
2844 			return (EINVAL);
2845 		if (dinfo->cfg.msi.msi_alloc > 0) {
2846 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2847 			    ("MSI-X index too high"));
2848 			if (dinfo->cfg.msi.msi_handlers == 0)
2849 				return (EINVAL);
2850 			dinfo->cfg.msi.msi_handlers--;
2851 			if (dinfo->cfg.msi.msi_handlers == 0)
2852 				pci_disable_msi(child);
2853 		} else {
2854 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2855 			    ("No MSI or MSI-X interrupts allocated"));
2856 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2857 			    ("MSI-X index too high"));
2858 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2859 			if (mte->mte_handlers == 0)
2860 				return (EINVAL);
2861 			mte->mte_handlers--;
2862 			if (mte->mte_handlers == 0)
2863 				pci_mask_msix(child, rid - 1);
2864 		}
2865 	}
2866 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2867 	if (device_get_parent(child) == dev && rid > 0)
2868 		KASSERT(error == 0,
2869 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2870 	return (error);
2871 }
2872 
2873 int
2874 pci_print_child(device_t dev, device_t child)
2875 {
2876 	struct pci_devinfo *dinfo;
2877 	struct resource_list *rl;
2878 	int retval = 0;
2879 
2880 	dinfo = device_get_ivars(child);
2881 	rl = &dinfo->resources;
2882 
2883 	retval += bus_print_child_header(dev, child);
2884 
2885 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2886 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2887 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2888 	if (device_get_flags(dev))
2889 		retval += printf(" flags %#x", device_get_flags(dev));
2890 
2891 	retval += printf(" at device %d.%d", pci_get_slot(child),
2892 	    pci_get_function(child));
2893 
2894 	retval += bus_print_child_footer(dev, child);
2895 
2896 	return (retval);
2897 }
2898 
2899 static struct
2900 {
2901 	int	class;
2902 	int	subclass;
2903 	char	*desc;
2904 } pci_nomatch_tab[] = {
2905 	{PCIC_OLD,		-1,			"old"},
2906 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2907 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2908 	{PCIC_STORAGE,		-1,			"mass storage"},
2909 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2910 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2911 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2912 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2913 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2914 	{PCIC_NETWORK,		-1,			"network"},
2915 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2916 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2917 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2918 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2919 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2920 	{PCIC_DISPLAY,		-1,			"display"},
2921 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2922 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2923 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2924 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2925 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2926 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2927 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2928 	{PCIC_MEMORY,		-1,			"memory"},
2929 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2930 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2931 	{PCIC_BRIDGE,		-1,			"bridge"},
2932 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2933 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2934 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2935 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2936 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2937 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2938 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2939 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2940 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2941 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2942 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2943 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2944 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2945 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2946 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2947 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2948 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2949 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2950 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2951 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2952 	{PCIC_INPUTDEV,		-1,			"input device"},
2953 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2954 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2955 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2956 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2957 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2958 	{PCIC_DOCKING,		-1,			"docking station"},
2959 	{PCIC_PROCESSOR,	-1,			"processor"},
2960 	{PCIC_SERIALBUS,	-1,			"serial bus"},
2961 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2962 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2963 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2964 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2965 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2966 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2967 	{PCIC_WIRELESS,		-1,			"wireless controller"},
2968 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2969 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2970 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2971 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2972 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2973 	{PCIC_SATCOM,		-1,			"satellite communication"},
2974 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2975 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2976 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2977 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2978 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2979 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2980 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2981 	{PCIC_DASP,		-1,			"dasp"},
2982 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2983 	{0, 0,		NULL}
2984 };
2985 
2986 void
2987 pci_probe_nomatch(device_t dev, device_t child)
2988 {
2989 	int	i;
2990 	char	*cp, *scp, *device;
2991 
2992 	/*
2993 	 * Look for a listing for this device in a loaded device database.
2994 	 */
2995 	if ((device = pci_describe_device(child)) != NULL) {
2996 		device_printf(dev, "<%s>", device);
2997 		free(device, M_DEVBUF);
2998 	} else {
2999 		/*
3000 		 * Scan the class/subclass descriptions for a general
3001 		 * description.
3002 		 */
3003 		cp = "unknown";
3004 		scp = NULL;
3005 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3006 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3007 				if (pci_nomatch_tab[i].subclass == -1) {
3008 					cp = pci_nomatch_tab[i].desc;
3009 				} else if (pci_nomatch_tab[i].subclass ==
3010 				    pci_get_subclass(child)) {
3011 					scp = pci_nomatch_tab[i].desc;
3012 				}
3013 			}
3014 		}
3015 		device_printf(dev, "<%s%s%s>",
3016 		    cp ? cp : "",
3017 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3018 		    scp ? scp : "");
3019 	}
3020 	printf(" at device %d.%d (no driver attached)\n",
3021 	    pci_get_slot(child), pci_get_function(child));
3022 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3023 	return;
3024 }
3025 
3026 /*
3027  * Parse the PCI device database, if loaded, and return a pointer to a
3028  * description of the device.
3029  *
3030  * The database is flat text formatted as follows:
3031  *
3032  * Any line not in a valid format is ignored.
3033  * Lines are terminated with newline '\n' characters.
3034  *
3035  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3036  * the vendor name.
3037  *
3038  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3039  * - devices cannot be listed without a corresponding VENDOR line.
3040  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3041  * another TAB, then the device name.
3042  */
3043 
3044 /*
3045  * Assuming (ptr) points to the beginning of a line in the database,
3046  * return the vendor or device and description of the next entry.
3047  * The value of (vendor) or (device) inappropriate for the entry type
3048  * is set to -1.  Returns nonzero at the end of the database.
3049  *
3050  * Note that this is slightly unrobust in the face of corrupt data;
3051  * we attempt to safeguard against this by spamming the end of the
3052  * database with a newline when we initialise.
3053  */
3054 static int
3055 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3056 {
3057 	char	*cp = *ptr;
3058 	int	left;
3059 
3060 	*device = -1;
3061 	*vendor = -1;
3062 	**desc = '\0';
3063 	for (;;) {
3064 		left = pci_vendordata_size - (cp - pci_vendordata);
3065 		if (left <= 0) {
3066 			*ptr = cp;
3067 			return(1);
3068 		}
3069 
3070 		/* vendor entry? */
3071 		if (*cp != '\t' &&
3072 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3073 			break;
3074 		/* device entry? */
3075 		if (*cp == '\t' &&
3076 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3077 			break;
3078 
3079 		/* skip to next line */
3080 		while (*cp != '\n' && left > 0) {
3081 			cp++;
3082 			left--;
3083 		}
3084 		if (*cp == '\n') {
3085 			cp++;
3086 			left--;
3087 		}
3088 	}
3089 	/* skip to next line */
3090 	while (*cp != '\n' && left > 0) {
3091 		cp++;
3092 		left--;
3093 	}
3094 	if (*cp == '\n' && left > 0)
3095 		cp++;
3096 	*ptr = cp;
3097 	return(0);
3098 }
3099 
3100 static char *
3101 pci_describe_device(device_t dev)
3102 {
3103 	int	vendor, device;
3104 	char	*desc, *vp, *dp, *line;
3105 
3106 	desc = vp = dp = NULL;
3107 
3108 	/*
3109 	 * If we have no vendor data, we can't do anything.
3110 	 */
3111 	if (pci_vendordata == NULL)
3112 		goto out;
3113 
3114 	/*
3115 	 * Scan the vendor data looking for this device
3116 	 */
3117 	line = pci_vendordata;
3118 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3119 		goto out;
3120 	for (;;) {
3121 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3122 			goto out;
3123 		if (vendor == pci_get_vendor(dev))
3124 			break;
3125 	}
3126 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3127 		goto out;
3128 	for (;;) {
3129 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3130 			*dp = 0;
3131 			break;
3132 		}
3133 		if (vendor != -1) {
3134 			*dp = 0;
3135 			break;
3136 		}
3137 		if (device == pci_get_device(dev))
3138 			break;
3139 	}
3140 	if (dp[0] == '\0')
3141 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3142 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3143 	    NULL)
3144 		sprintf(desc, "%s, %s", vp, dp);
3145  out:
3146 	if (vp != NULL)
3147 		free(vp, M_DEVBUF);
3148 	if (dp != NULL)
3149 		free(dp, M_DEVBUF);
3150 	return(desc);
3151 }
3152 
3153 int
3154 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3155 {
3156 	struct pci_devinfo *dinfo;
3157 	pcicfgregs *cfg;
3158 
3159 	dinfo = device_get_ivars(child);
3160 	cfg = &dinfo->cfg;
3161 
3162 	switch (which) {
3163 	case PCI_IVAR_ETHADDR:
3164 		/*
3165 		 * The generic accessor doesn't deal with failure, so
3166 		 * we set the return value, then return an error.
3167 		 */
3168 		*((uint8_t **) result) = NULL;
3169 		return (EINVAL);
3170 	case PCI_IVAR_SUBVENDOR:
3171 		*result = cfg->subvendor;
3172 		break;
3173 	case PCI_IVAR_SUBDEVICE:
3174 		*result = cfg->subdevice;
3175 		break;
3176 	case PCI_IVAR_VENDOR:
3177 		*result = cfg->vendor;
3178 		break;
3179 	case PCI_IVAR_DEVICE:
3180 		*result = cfg->device;
3181 		break;
3182 	case PCI_IVAR_DEVID:
3183 		*result = (cfg->device << 16) | cfg->vendor;
3184 		break;
3185 	case PCI_IVAR_CLASS:
3186 		*result = cfg->baseclass;
3187 		break;
3188 	case PCI_IVAR_SUBCLASS:
3189 		*result = cfg->subclass;
3190 		break;
3191 	case PCI_IVAR_PROGIF:
3192 		*result = cfg->progif;
3193 		break;
3194 	case PCI_IVAR_REVID:
3195 		*result = cfg->revid;
3196 		break;
3197 	case PCI_IVAR_INTPIN:
3198 		*result = cfg->intpin;
3199 		break;
3200 	case PCI_IVAR_IRQ:
3201 		*result = cfg->intline;
3202 		break;
3203 	case PCI_IVAR_DOMAIN:
3204 		*result = cfg->domain;
3205 		break;
3206 	case PCI_IVAR_BUS:
3207 		*result = cfg->bus;
3208 		break;
3209 	case PCI_IVAR_SLOT:
3210 		*result = cfg->slot;
3211 		break;
3212 	case PCI_IVAR_FUNCTION:
3213 		*result = cfg->func;
3214 		break;
3215 	case PCI_IVAR_CMDREG:
3216 		*result = cfg->cmdreg;
3217 		break;
3218 	case PCI_IVAR_CACHELNSZ:
3219 		*result = cfg->cachelnsz;
3220 		break;
3221 	case PCI_IVAR_MINGNT:
3222 		*result = cfg->mingnt;
3223 		break;
3224 	case PCI_IVAR_MAXLAT:
3225 		*result = cfg->maxlat;
3226 		break;
3227 	case PCI_IVAR_LATTIMER:
3228 		*result = cfg->lattimer;
3229 		break;
3230 	default:
3231 		return (ENOENT);
3232 	}
3233 	return (0);
3234 }
3235 
3236 int
3237 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3238 {
3239 	struct pci_devinfo *dinfo;
3240 
3241 	dinfo = device_get_ivars(child);
3242 
3243 	switch (which) {
3244 	case PCI_IVAR_INTPIN:
3245 		dinfo->cfg.intpin = value;
3246 		return (0);
3247 	case PCI_IVAR_ETHADDR:
3248 	case PCI_IVAR_SUBVENDOR:
3249 	case PCI_IVAR_SUBDEVICE:
3250 	case PCI_IVAR_VENDOR:
3251 	case PCI_IVAR_DEVICE:
3252 	case PCI_IVAR_DEVID:
3253 	case PCI_IVAR_CLASS:
3254 	case PCI_IVAR_SUBCLASS:
3255 	case PCI_IVAR_PROGIF:
3256 	case PCI_IVAR_REVID:
3257 	case PCI_IVAR_IRQ:
3258 	case PCI_IVAR_DOMAIN:
3259 	case PCI_IVAR_BUS:
3260 	case PCI_IVAR_SLOT:
3261 	case PCI_IVAR_FUNCTION:
3262 		return (EINVAL);	/* disallow for now */
3263 
3264 	default:
3265 		return (ENOENT);
3266 	}
3267 }
3268 
3269 
3270 #include "opt_ddb.h"
3271 #ifdef DDB
3272 #include <ddb/ddb.h>
3273 #include <sys/cons.h>
3274 
3275 /*
3276  * List resources based on pci map registers, used for within ddb
3277  */
3278 
3279 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3280 {
3281 	struct pci_devinfo *dinfo;
3282 	struct devlist *devlist_head;
3283 	struct pci_conf *p;
3284 	const char *name;
3285 	int i, error, none_count;
3286 
3287 	none_count = 0;
3288 	/* get the head of the device queue */
3289 	devlist_head = &pci_devq;
3290 
3291 	/*
3292 	 * Go through the list of devices and print out devices
3293 	 */
3294 	for (error = 0, i = 0,
3295 	     dinfo = STAILQ_FIRST(devlist_head);
3296 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3297 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3298 
3299 		/* Populate pd_name and pd_unit */
3300 		name = NULL;
3301 		if (dinfo->cfg.dev)
3302 			name = device_get_name(dinfo->cfg.dev);
3303 
3304 		p = &dinfo->conf;
3305 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3306 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3307 			(name && *name) ? name : "none",
3308 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3309 			none_count++,
3310 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3311 			p->pc_sel.pc_func, (p->pc_class << 16) |
3312 			(p->pc_subclass << 8) | p->pc_progif,
3313 			(p->pc_subdevice << 16) | p->pc_subvendor,
3314 			(p->pc_device << 16) | p->pc_vendor,
3315 			p->pc_revid, p->pc_hdr);
3316 	}
3317 }
3318 #endif /* DDB */
3319 
3320 static struct resource *
3321 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3322     u_long start, u_long end, u_long count, u_int flags)
3323 {
3324 	struct pci_devinfo *dinfo = device_get_ivars(child);
3325 	struct resource_list *rl = &dinfo->resources;
3326 	struct resource_list_entry *rle;
3327 	struct resource *res;
3328 	pci_addr_t map, testval;
3329 	int mapsize;
3330 
3331 	/*
3332 	 * Weed out the bogons, and figure out how large the BAR/map
3333 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3334 	 * Note: atapci in legacy mode are special and handled elsewhere
3335 	 * in the code.  If you have a atapci device in legacy mode and
3336 	 * it fails here, that other code is broken.
3337 	 */
3338 	res = NULL;
3339 	map = pci_read_config(child, *rid, 4);
3340 	pci_write_config(child, *rid, 0xffffffff, 4);
3341 	testval = pci_read_config(child, *rid, 4);
3342 	if (pci_maprange(testval) == 64)
3343 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3344 	if (pci_mapbase(testval) == 0)
3345 		goto out;
3346 
3347 	/*
3348 	 * Restore the original value of the BAR.  We may have reprogrammed
3349 	 * the BAR of the low-level console device and when booting verbose,
3350 	 * we need the console device addressable.
3351 	 */
3352 	pci_write_config(child, *rid, map, 4);
3353 
3354 	if (PCI_BAR_MEM(testval)) {
3355 		if (type != SYS_RES_MEMORY) {
3356 			if (bootverbose)
3357 				device_printf(dev,
3358 				    "child %s requested type %d for rid %#x,"
3359 				    " but the BAR says it is an memio\n",
3360 				    device_get_nameunit(child), type, *rid);
3361 			goto out;
3362 		}
3363 	} else {
3364 		if (type != SYS_RES_IOPORT) {
3365 			if (bootverbose)
3366 				device_printf(dev,
3367 				    "child %s requested type %d for rid %#x,"
3368 				    " but the BAR says it is an ioport\n",
3369 				    device_get_nameunit(child), type, *rid);
3370 			goto out;
3371 		}
3372 	}
3373 	/*
3374 	 * For real BARs, we need to override the size that
3375 	 * the driver requests, because that's what the BAR
3376 	 * actually uses and we would otherwise have a
3377 	 * situation where we might allocate the excess to
3378 	 * another driver, which won't work.
3379 	 */
3380 	mapsize = pci_mapsize(testval);
3381 	count = 1UL << mapsize;
3382 	if (RF_ALIGNMENT(flags) < mapsize)
3383 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3384 
3385 	/*
3386 	 * Allocate enough resource, and then write back the
3387 	 * appropriate bar for that resource.
3388 	 */
3389 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3390 	    start, end, count, flags);
3391 	if (res == NULL) {
3392 		device_printf(child,
3393 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3394 		    count, *rid, type, start, end);
3395 		goto out;
3396 	}
3397 	resource_list_add(rl, type, *rid, start, end, count);
3398 	rle = resource_list_find(rl, type, *rid);
3399 	if (rle == NULL)
3400 		panic("pci_alloc_map: unexpectedly can't find resource.");
3401 	rle->res = res;
3402 	rle->start = rman_get_start(res);
3403 	rle->end = rman_get_end(res);
3404 	rle->count = count;
3405 	if (bootverbose)
3406 		device_printf(child,
3407 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3408 		    count, *rid, type, rman_get_start(res));
3409 	map = rman_get_start(res);
3410 out:;
3411 	pci_write_config(child, *rid, map, 4);
3412 	if (pci_maprange(testval) == 64)
3413 		pci_write_config(child, *rid + 4, map >> 32, 4);
3414 	return (res);
3415 }
3416 
3417 
3418 struct resource *
3419 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3420 		   u_long start, u_long end, u_long count, u_int flags)
3421 {
3422 	struct pci_devinfo *dinfo = device_get_ivars(child);
3423 	struct resource_list *rl = &dinfo->resources;
3424 	struct resource_list_entry *rle;
3425 	pcicfgregs *cfg = &dinfo->cfg;
3426 
3427 	/*
3428 	 * Perform lazy resource allocation
3429 	 */
3430 	if (device_get_parent(child) == dev) {
3431 		switch (type) {
3432 		case SYS_RES_IRQ:
3433 			/*
3434 			 * Can't alloc legacy interrupt once MSI messages
3435 			 * have been allocated.
3436 			 */
3437 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3438 			    cfg->msix.msix_alloc > 0))
3439 				return (NULL);
3440 			/*
3441 			 * If the child device doesn't have an
3442 			 * interrupt routed and is deserving of an
3443 			 * interrupt, try to assign it one.
3444 			 */
3445 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3446 			    (cfg->intpin != 0))
3447 				pci_assign_interrupt(dev, child, 0);
3448 			break;
3449 		case SYS_RES_IOPORT:
3450 		case SYS_RES_MEMORY:
3451 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3452 				/*
3453 				 * Enable the I/O mode.  We should
3454 				 * also be assigning resources too
3455 				 * when none are present.  The
3456 				 * resource_list_alloc kind of sorta does
3457 				 * this...
3458 				 */
3459 				if (PCI_ENABLE_IO(dev, child, type))
3460 					return (NULL);
3461 			}
3462 			rle = resource_list_find(rl, type, *rid);
3463 			if (rle == NULL)
3464 				return (pci_alloc_map(dev, child, type, rid,
3465 				    start, end, count, flags));
3466 			break;
3467 		}
3468 		/*
3469 		 * If we've already allocated the resource, then
3470 		 * return it now.  But first we may need to activate
3471 		 * it, since we don't allocate the resource as active
3472 		 * above.  Normally this would be done down in the
3473 		 * nexus, but since we short-circuit that path we have
3474 		 * to do its job here.  Not sure if we should free the
3475 		 * resource if it fails to activate.
3476 		 */
3477 		rle = resource_list_find(rl, type, *rid);
3478 		if (rle != NULL && rle->res != NULL) {
3479 			if (bootverbose)
3480 				device_printf(child,
3481 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3482 				    rman_get_size(rle->res), *rid, type,
3483 				    rman_get_start(rle->res));
3484 			if ((flags & RF_ACTIVE) &&
3485 			    bus_generic_activate_resource(dev, child, type,
3486 			    *rid, rle->res) != 0)
3487 				return (NULL);
3488 			return (rle->res);
3489 		}
3490 	}
3491 	return (resource_list_alloc(rl, dev, child, type, rid,
3492 	    start, end, count, flags));
3493 }
3494 
3495 void
3496 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3497 {
3498 	struct pci_devinfo *dinfo;
3499 	struct resource_list *rl;
3500 	struct resource_list_entry *rle;
3501 
3502 	if (device_get_parent(child) != dev)
3503 		return;
3504 
3505 	dinfo = device_get_ivars(child);
3506 	rl = &dinfo->resources;
3507 	rle = resource_list_find(rl, type, rid);
3508 	if (rle) {
3509 		if (rle->res) {
3510 			if (rman_get_device(rle->res) != dev ||
3511 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3512 				device_printf(dev, "delete_resource: "
3513 				    "Resource still owned by child, oops. "
3514 				    "(type=%d, rid=%d, addr=%lx)\n",
3515 				    rle->type, rle->rid,
3516 				    rman_get_start(rle->res));
3517 				return;
3518 			}
3519 			bus_release_resource(dev, type, rid, rle->res);
3520 		}
3521 		resource_list_delete(rl, type, rid);
3522 	}
3523 	/*
3524 	 * Why do we turn off the PCI configuration BAR when we delete a
3525 	 * resource? -- imp
3526 	 */
3527 	pci_write_config(child, rid, 0, 4);
3528 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3529 }
3530 
3531 struct resource_list *
3532 pci_get_resource_list (device_t dev, device_t child)
3533 {
3534 	struct pci_devinfo *dinfo = device_get_ivars(child);
3535 
3536 	return (&dinfo->resources);
3537 }
3538 
3539 uint32_t
3540 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3541 {
3542 	struct pci_devinfo *dinfo = device_get_ivars(child);
3543 	pcicfgregs *cfg = &dinfo->cfg;
3544 
3545 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3546 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3547 }
3548 
3549 void
3550 pci_write_config_method(device_t dev, device_t child, int reg,
3551     uint32_t val, int width)
3552 {
3553 	struct pci_devinfo *dinfo = device_get_ivars(child);
3554 	pcicfgregs *cfg = &dinfo->cfg;
3555 
3556 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3557 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3558 }
3559 
3560 int
3561 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3562     size_t buflen)
3563 {
3564 
3565 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3566 	    pci_get_function(child));
3567 	return (0);
3568 }
3569 
3570 int
3571 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3572     size_t buflen)
3573 {
3574 	struct pci_devinfo *dinfo;
3575 	pcicfgregs *cfg;
3576 
3577 	dinfo = device_get_ivars(child);
3578 	cfg = &dinfo->cfg;
3579 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3580 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3581 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3582 	    cfg->progif);
3583 	return (0);
3584 }
3585 
3586 int
3587 pci_assign_interrupt_method(device_t dev, device_t child)
3588 {
3589 	struct pci_devinfo *dinfo = device_get_ivars(child);
3590 	pcicfgregs *cfg = &dinfo->cfg;
3591 
3592 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3593 	    cfg->intpin));
3594 }
3595 
3596 static int
3597 pci_modevent(module_t mod, int what, void *arg)
3598 {
3599 	static struct cdev *pci_cdev;
3600 
3601 	switch (what) {
3602 	case MOD_LOAD:
3603 		STAILQ_INIT(&pci_devq);
3604 		pci_generation = 0;
3605 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3606 		    "pci");
3607 		pci_load_vendor_data();
3608 		break;
3609 
3610 	case MOD_UNLOAD:
3611 		destroy_dev(pci_cdev);
3612 		break;
3613 	}
3614 
3615 	return (0);
3616 }
3617 
3618 void
3619 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3620 {
3621 	int i;
3622 
3623 	/*
3624 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3625 	 * which we know need special treatment.  Type 2 devices are
3626 	 * cardbus bridges which also require special treatment.
3627 	 * Other types are unknown, and we err on the side of safety
3628 	 * by ignoring them.
3629 	 */
3630 	if (dinfo->cfg.hdrtype != 0)
3631 		return;
3632 
3633 	/*
3634 	 * Restore the device to full power mode.  We must do this
3635 	 * before we restore the registers because moving from D3 to
3636 	 * D0 will cause the chip's BARs and some other registers to
3637 	 * be reset to some unknown power on reset values.  Cut down
3638 	 * the noise on boot by doing nothing if we are already in
3639 	 * state D0.
3640 	 */
3641 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3642 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3643 	}
3644 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3645 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3646 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3647 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3648 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3649 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3650 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3651 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3652 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3653 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3654 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3655 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3656 
3657 	/* Restore MSI and MSI-X configurations if they are present. */
3658 	if (dinfo->cfg.msi.msi_location != 0)
3659 		pci_resume_msi(dev);
3660 	if (dinfo->cfg.msix.msix_location != 0)
3661 		pci_resume_msix(dev);
3662 }
3663 
3664 void
3665 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3666 {
3667 	int i;
3668 	uint32_t cls;
3669 	int ps;
3670 
3671 	/*
3672 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3673 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3674 	 * which also require special treatment.  Other types are unknown, and
3675 	 * we err on the side of safety by ignoring them.  Powering down
3676 	 * bridges should not be undertaken lightly.
3677 	 */
3678 	if (dinfo->cfg.hdrtype != 0)
3679 		return;
3680 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3681 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3682 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3683 
3684 	/*
3685 	 * Some drivers apparently write to these registers w/o updating our
3686 	 * cached copy.  No harm happens if we update the copy, so do so here
3687 	 * so we can restore them.  The COMMAND register is modified by the
3688 	 * bus w/o updating the cache.  This should represent the normally
3689 	 * writable portion of the 'defined' part of type 0 headers.  In
3690 	 * theory we also need to save/restore the PCI capability structures
3691 	 * we know about, but apart from power we don't know any that are
3692 	 * writable.
3693 	 */
3694 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3695 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3696 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3697 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3698 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3699 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3700 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3701 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3702 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3703 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3704 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3705 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3706 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3707 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3708 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3709 
3710 	/*
3711 	 * don't set the state for display devices, base peripherals and
3712 	 * memory devices since bad things happen when they are powered down.
3713 	 * We should (a) have drivers that can easily detach and (b) use
3714 	 * generic drivers for these devices so that some device actually
3715 	 * attaches.  We need to make sure that when we implement (a) we don't
3716 	 * power the device down on a reattach.
3717 	 */
3718 	cls = pci_get_class(dev);
3719 	if (!setstate)
3720 		return;
3721 	switch (pci_do_power_nodriver)
3722 	{
3723 		case 0:		/* NO powerdown at all */
3724 			return;
3725 		case 1:		/* Conservative about what to power down */
3726 			if (cls == PCIC_STORAGE)
3727 				return;
3728 			/*FALLTHROUGH*/
3729 		case 2:		/* Agressive about what to power down */
3730 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3731 			    cls == PCIC_BASEPERIPH)
3732 				return;
3733 			/*FALLTHROUGH*/
3734 		case 3:		/* Power down everything */
3735 			break;
3736 	}
3737 	/*
3738 	 * PCI spec says we can only go into D3 state from D0 state.
3739 	 * Transition from D[12] into D0 before going to D3 state.
3740 	 */
3741 	ps = pci_get_powerstate(dev);
3742 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3743 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3744 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3745 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3746 }
3747