xref: /freebsd/sys/dev/pci/pci.c (revision 30d239bc4c510432e65a84fa1c14ed67a3ab1c92)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static const char	*pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg);
99 #if 0
100 static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 
115 static device_method_t pci_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_probe,		pci_probe),
118 	DEVMETHOD(device_attach,	pci_attach),
119 	DEVMETHOD(device_detach,	bus_generic_detach),
120 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121 	DEVMETHOD(device_suspend,	pci_suspend),
122 	DEVMETHOD(device_resume,	pci_resume),
123 
124 	/* Bus interface */
125 	DEVMETHOD(bus_print_child,	pci_print_child),
126 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129 	DEVMETHOD(bus_driver_added,	pci_driver_added),
130 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132 
133 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
140 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143 
144 	/* PCI interface */
145 	DEVMETHOD(pci_read_config,	pci_read_config_method),
146 	DEVMETHOD(pci_write_config,	pci_write_config_method),
147 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163 
164 	{ 0, 0 }
165 };
166 
167 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168 
169 static devclass_t pci_devclass;
170 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171 MODULE_VERSION(pci, 1);
172 
173 static char	*pci_vendordata;
174 static size_t	pci_vendordata_size;
175 
176 
177 struct pci_quirk {
178 	uint32_t devid;	/* Vendor/device of the card */
179 	int	type;
180 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182 	int	arg1;
183 	int	arg2;
184 };
185 
186 struct pci_quirk pci_quirks[] = {
187 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 
193 	/*
194 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196 	 */
197 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 
200 	/*
201 	 * MSI doesn't work on earlier Intel chipsets including
202 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203 	 */
204 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 
212 	/*
213 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214 	 * bridge.
215 	 */
216 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	{ 0 }
219 };
220 
221 /* map register information */
222 #define	PCI_MAPMEM	0x01	/* memory map */
223 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224 #define	PCI_MAPPORT	0x04	/* port map */
225 
226 struct devlist pci_devq;
227 uint32_t pci_generation;
228 uint32_t pci_numdevs = 0;
229 static int pcie_chipset, pcix_chipset;
230 
231 /* sysctl vars */
232 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233 
234 static int pci_enable_io_modes = 1;
235 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237     &pci_enable_io_modes, 1,
238     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239 enable these bits correctly.  We'd like to do this all the time, but there\n\
240 are some peripherals that this causes problems with.");
241 
242 static int pci_do_power_nodriver = 0;
243 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245     &pci_do_power_nodriver, 0,
246   "Place a function into D3 state when no driver attaches to it.  0 means\n\
247 disable.  1 means conservatively place devices into D3 state.  2 means\n\
248 agressively place devices into D3 state.  3 means put absolutely everything\n\
249 in D3 state.");
250 
251 static int pci_do_power_resume = 1;
252 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254     &pci_do_power_resume, 1,
255   "Transition from D3 -> D0 on resume.");
256 
257 static int pci_do_vpd = 1;
258 TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
259 SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
260     "Enable support for VPD.");
261 
262 static int pci_do_msi = 1;
263 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
264 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
265     "Enable support for MSI interrupts");
266 
267 static int pci_do_msix = 1;
268 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
269 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
270     "Enable support for MSI-X interrupts");
271 
272 static int pci_honor_msi_blacklist = 1;
273 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
274 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
275     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
276 
277 /* Find a device_t by bus/slot/function in domain 0 */
278 
279 device_t
280 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
281 {
282 
283 	return (pci_find_dbsf(0, bus, slot, func));
284 }
285 
286 /* Find a device_t by domain/bus/slot/function */
287 
288 device_t
289 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
290 {
291 	struct pci_devinfo *dinfo;
292 
293 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
294 		if ((dinfo->cfg.domain == domain) &&
295 		    (dinfo->cfg.bus == bus) &&
296 		    (dinfo->cfg.slot == slot) &&
297 		    (dinfo->cfg.func == func)) {
298 			return (dinfo->cfg.dev);
299 		}
300 	}
301 
302 	return (NULL);
303 }
304 
305 /* Find a device_t by vendor/device ID */
306 
307 device_t
308 pci_find_device(uint16_t vendor, uint16_t device)
309 {
310 	struct pci_devinfo *dinfo;
311 
312 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
313 		if ((dinfo->cfg.vendor == vendor) &&
314 		    (dinfo->cfg.device == device)) {
315 			return (dinfo->cfg.dev);
316 		}
317 	}
318 
319 	return (NULL);
320 }
321 
322 /* return base address of memory or port map */
323 
324 static uint32_t
325 pci_mapbase(uint32_t mapreg)
326 {
327 
328 	if (PCI_BAR_MEM(mapreg))
329 		return (mapreg & PCIM_BAR_MEM_BASE);
330 	else
331 		return (mapreg & PCIM_BAR_IO_BASE);
332 }
333 
334 /* return map type of memory or port map */
335 
336 static const char *
337 pci_maptype(unsigned mapreg)
338 {
339 
340 	if (PCI_BAR_IO(mapreg))
341 		return ("I/O Port");
342 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
343 		return ("Prefetchable Memory");
344 	return ("Memory");
345 }
346 
347 /* return log2 of map size decoded for memory or port map */
348 
349 static int
350 pci_mapsize(uint32_t testval)
351 {
352 	int ln2size;
353 
354 	testval = pci_mapbase(testval);
355 	ln2size = 0;
356 	if (testval != 0) {
357 		while ((testval & 1) == 0)
358 		{
359 			ln2size++;
360 			testval >>= 1;
361 		}
362 	}
363 	return (ln2size);
364 }
365 
366 /* return log2 of address range supported by map register */
367 
368 static int
369 pci_maprange(unsigned mapreg)
370 {
371 	int ln2range = 0;
372 
373 	if (PCI_BAR_IO(mapreg))
374 		ln2range = 32;
375 	else
376 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
377 		case PCIM_BAR_MEM_32:
378 			ln2range = 32;
379 			break;
380 		case PCIM_BAR_MEM_1MB:
381 			ln2range = 20;
382 			break;
383 		case PCIM_BAR_MEM_64:
384 			ln2range = 64;
385 			break;
386 		}
387 	return (ln2range);
388 }
389 
390 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
391 
392 static void
393 pci_fixancient(pcicfgregs *cfg)
394 {
395 	if (cfg->hdrtype != 0)
396 		return;
397 
398 	/* PCI to PCI bridges use header type 1 */
399 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
400 		cfg->hdrtype = 1;
401 }
402 
403 /* extract header type specific config data */
404 
405 static void
406 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
407 {
408 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
409 	switch (cfg->hdrtype) {
410 	case 0:
411 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
412 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
413 		cfg->nummaps	    = PCI_MAXMAPS_0;
414 		break;
415 	case 1:
416 		cfg->nummaps	    = PCI_MAXMAPS_1;
417 		break;
418 	case 2:
419 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
420 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
421 		cfg->nummaps	    = PCI_MAXMAPS_2;
422 		break;
423 	}
424 #undef REG
425 }
426 
427 /* read configuration header into pcicfgregs structure */
428 struct pci_devinfo *
429 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
430 {
431 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
432 	pcicfgregs *cfg = NULL;
433 	struct pci_devinfo *devlist_entry;
434 	struct devlist *devlist_head;
435 
436 	devlist_head = &pci_devq;
437 
438 	devlist_entry = NULL;
439 
440 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
441 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
442 		if (devlist_entry == NULL)
443 			return (NULL);
444 
445 		cfg = &devlist_entry->cfg;
446 
447 		cfg->domain		= d;
448 		cfg->bus		= b;
449 		cfg->slot		= s;
450 		cfg->func		= f;
451 		cfg->vendor		= REG(PCIR_VENDOR, 2);
452 		cfg->device		= REG(PCIR_DEVICE, 2);
453 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
454 		cfg->statreg		= REG(PCIR_STATUS, 2);
455 		cfg->baseclass		= REG(PCIR_CLASS, 1);
456 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
457 		cfg->progif		= REG(PCIR_PROGIF, 1);
458 		cfg->revid		= REG(PCIR_REVID, 1);
459 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
460 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
461 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
462 		cfg->intpin		= REG(PCIR_INTPIN, 1);
463 		cfg->intline		= REG(PCIR_INTLINE, 1);
464 
465 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
466 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
467 
468 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
469 		cfg->hdrtype		&= ~PCIM_MFDEV;
470 
471 		pci_fixancient(cfg);
472 		pci_hdrtypedata(pcib, b, s, f, cfg);
473 
474 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
475 			pci_read_extcap(pcib, cfg);
476 
477 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
478 
479 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
480 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
481 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
482 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
483 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
484 
485 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
486 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
487 		devlist_entry->conf.pc_vendor = cfg->vendor;
488 		devlist_entry->conf.pc_device = cfg->device;
489 
490 		devlist_entry->conf.pc_class = cfg->baseclass;
491 		devlist_entry->conf.pc_subclass = cfg->subclass;
492 		devlist_entry->conf.pc_progif = cfg->progif;
493 		devlist_entry->conf.pc_revid = cfg->revid;
494 
495 		pci_numdevs++;
496 		pci_generation++;
497 	}
498 	return (devlist_entry);
499 #undef REG
500 }
501 
502 static void
503 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
504 {
505 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
506 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
507 #if defined(__i386__) || defined(__amd64__)
508 	uint64_t addr;
509 #endif
510 	uint32_t val;
511 	int	ptr, nextptr, ptrptr;
512 
513 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
514 	case 0:
515 	case 1:
516 		ptrptr = PCIR_CAP_PTR;
517 		break;
518 	case 2:
519 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
520 		break;
521 	default:
522 		return;		/* no extended capabilities support */
523 	}
524 	nextptr = REG(ptrptr, 1);	/* sanity check? */
525 
526 	/*
527 	 * Read capability entries.
528 	 */
529 	while (nextptr != 0) {
530 		/* Sanity check */
531 		if (nextptr > 255) {
532 			printf("illegal PCI extended capability offset %d\n",
533 			    nextptr);
534 			return;
535 		}
536 		/* Find the next entry */
537 		ptr = nextptr;
538 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
539 
540 		/* Process this entry */
541 		switch (REG(ptr + PCICAP_ID, 1)) {
542 		case PCIY_PMG:		/* PCI power management */
543 			if (cfg->pp.pp_cap == 0) {
544 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
545 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
546 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
547 				if ((nextptr - ptr) > PCIR_POWER_DATA)
548 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
549 			}
550 			break;
551 #if defined(__i386__) || defined(__amd64__)
552 		case PCIY_HT:		/* HyperTransport */
553 			/* Determine HT-specific capability type. */
554 			val = REG(ptr + PCIR_HT_COMMAND, 2);
555 			switch (val & PCIM_HTCMD_CAP_MASK) {
556 			case PCIM_HTCAP_MSI_MAPPING:
557 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
558 					/* Sanity check the mapping window. */
559 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
560 					    4);
561 					addr <<= 32;
562 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
563 					    4);
564 					if (addr != MSI_INTEL_ADDR_BASE)
565 						device_printf(pcib,
566 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
567 						    cfg->domain, cfg->bus,
568 						    cfg->slot, cfg->func,
569 						    (long long)addr);
570 				}
571 
572 				/* Enable MSI -> HT mapping. */
573 				val |= PCIM_HTCMD_MSI_ENABLE;
574 				WREG(ptr + PCIR_HT_COMMAND, val, 2);
575 				break;
576 			}
577 			break;
578 #endif
579 		case PCIY_MSI:		/* PCI MSI */
580 			cfg->msi.msi_location = ptr;
581 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
582 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
583 						     PCIM_MSICTRL_MMC_MASK)>>1);
584 			break;
585 		case PCIY_MSIX:		/* PCI MSI-X */
586 			cfg->msix.msix_location = ptr;
587 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
588 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
589 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
590 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
591 			cfg->msix.msix_table_bar = PCIR_BAR(val &
592 			    PCIM_MSIX_BIR_MASK);
593 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
594 			val = REG(ptr + PCIR_MSIX_PBA, 4);
595 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
596 			    PCIM_MSIX_BIR_MASK);
597 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
598 			break;
599 		case PCIY_VPD:		/* PCI Vital Product Data */
600 			cfg->vpd.vpd_reg = ptr;
601 			break;
602 		case PCIY_SUBVENDOR:
603 			/* Should always be true. */
604 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
605 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
606 				cfg->subvendor = val & 0xffff;
607 				cfg->subdevice = val >> 16;
608 			}
609 			break;
610 		case PCIY_PCIX:		/* PCI-X */
611 			/*
612 			 * Assume we have a PCI-X chipset if we have
613 			 * at least one PCI-PCI bridge with a PCI-X
614 			 * capability.  Note that some systems with
615 			 * PCI-express or HT chipsets might match on
616 			 * this check as well.
617 			 */
618 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
619 				pcix_chipset = 1;
620 			break;
621 		case PCIY_EXPRESS:	/* PCI-express */
622 			/*
623 			 * Assume we have a PCI-express chipset if we have
624 			 * at least one PCI-express root port.
625 			 */
626 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
627 			if ((val & PCIM_EXP_FLAGS_TYPE) ==
628 			    PCIM_EXP_TYPE_ROOT_PORT)
629 				pcie_chipset = 1;
630 			break;
631 		default:
632 			break;
633 		}
634 	}
635 /* REG and WREG use carry through to next functions */
636 }
637 
638 /*
639  * PCI Vital Product Data
640  */
641 static uint32_t
642 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
643 {
644 
645 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
646 
647 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
648 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000)
649 		DELAY(1);	/* limit looping */
650 
651 	return (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
652 }
653 
654 #if 0
655 static void
656 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
657 {
658 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
659 
660 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
661 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
662 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000)
663 		DELAY(1);	/* limit looping */
664 
665 	return;
666 }
667 #endif
668 
669 struct vpd_readstate {
670 	device_t	pcib;
671 	pcicfgregs	*cfg;
672 	uint32_t	val;
673 	int		bytesinval;
674 	int		off;
675 	uint8_t		cksum;
676 };
677 
678 static uint8_t
679 vpd_nextbyte(struct vpd_readstate *vrs)
680 {
681 	uint8_t byte;
682 
683 	if (vrs->bytesinval == 0) {
684 		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
685 		    vrs->off));
686 		vrs->off += 4;
687 		byte = vrs->val & 0xff;
688 		vrs->bytesinval = 3;
689 	} else {
690 		vrs->val = vrs->val >> 8;
691 		byte = vrs->val & 0xff;
692 		vrs->bytesinval--;
693 	}
694 
695 	vrs->cksum += byte;
696 	return (byte);
697 }
698 
699 static void
700 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
701 {
702 	struct vpd_readstate vrs;
703 	int state;
704 	int name;
705 	int remain;
706 	int end;
707 	int i;
708 	uint8_t byte;
709 	int alloc, off;		/* alloc/off for RO/W arrays */
710 	int cksumvalid;
711 	int dflen;
712 
713 	if (!pci_do_vpd) {
714 		cfg->vpd.vpd_cached = 1;
715 		return;
716 	}
717 
718 	/* init vpd reader */
719 	vrs.bytesinval = 0;
720 	vrs.off = 0;
721 	vrs.pcib = pcib;
722 	vrs.cfg = cfg;
723 	vrs.cksum = 0;
724 
725 	state = 0;
726 	name = remain = i = 0;	/* shut up stupid gcc */
727 	alloc = off = 0;	/* shut up stupid gcc */
728 	dflen = 0;		/* shut up stupid gcc */
729 	end = 0;
730 	cksumvalid = -1;
731 	for (; !end;) {
732 		byte = vpd_nextbyte(&vrs);
733 #if 0
734 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
735 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
736 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
737 #endif
738 		switch (state) {
739 		case 0:		/* item name */
740 			if (byte & 0x80) {
741 				remain = vpd_nextbyte(&vrs);
742 				remain |= vpd_nextbyte(&vrs) << 8;
743 				if (remain > (0x7f*4 - vrs.off)) {
744 					end = 1;
745 					printf(
746 			    "pci%d:%d:%d:%d: invalid vpd data, remain %#x\n",
747 					    cfg->domain, cfg->bus, cfg->slot,
748 					    cfg->func, remain);
749 				}
750 				name = byte & 0x7f;
751 			} else {
752 				remain = byte & 0x7;
753 				name = (byte >> 3) & 0xf;
754 			}
755 			switch (name) {
756 			case 0x2:	/* String */
757 				cfg->vpd.vpd_ident = malloc(remain + 1,
758 				    M_DEVBUF, M_WAITOK);
759 				i = 0;
760 				state = 1;
761 				break;
762 			case 0xf:	/* End */
763 				end = 1;
764 				state = -1;
765 				break;
766 			case 0x10:	/* VPD-R */
767 				alloc = 8;
768 				off = 0;
769 				cfg->vpd.vpd_ros = malloc(alloc *
770 				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
771 				    M_WAITOK);
772 				state = 2;
773 				break;
774 			case 0x11:	/* VPD-W */
775 				alloc = 8;
776 				off = 0;
777 				cfg->vpd.vpd_w = malloc(alloc *
778 				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
779 				    M_WAITOK);
780 				state = 5;
781 				break;
782 			default:	/* Invalid data, abort */
783 				end = 1;
784 				continue;
785 			}
786 			break;
787 
788 		case 1:	/* Identifier String */
789 			cfg->vpd.vpd_ident[i++] = byte;
790 			remain--;
791 			if (remain == 0)  {
792 				cfg->vpd.vpd_ident[i] = '\0';
793 				state = 0;
794 			}
795 			break;
796 
797 		case 2:	/* VPD-R Keyword Header */
798 			if (off == alloc) {
799 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
800 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
801 				    M_DEVBUF, M_WAITOK);
802 			}
803 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
804 			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
805 			dflen = vpd_nextbyte(&vrs);
806 			if (dflen == 0 &&
807 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
808 			    2) == 0) {
809 				/*
810 				 * if this happens, we can't trust the rest
811 				 * of the VPD.
812 				 */
813 				printf(
814 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
815 				    cfg->domain, cfg->bus, cfg->slot,
816 				    cfg->func, dflen);
817 				cksumvalid = 0;
818 				end = 1;
819 				break;
820 			} else if (dflen == 0) {
821 				cfg->vpd.vpd_ros[off].value = malloc(1 *
822 				    sizeof *cfg->vpd.vpd_ros[off].value,
823 				    M_DEVBUF, M_WAITOK);
824 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
825 			} else
826 				cfg->vpd.vpd_ros[off].value = malloc(
827 				    (dflen + 1) *
828 				    sizeof *cfg->vpd.vpd_ros[off].value,
829 				    M_DEVBUF, M_WAITOK);
830 			remain -= 3;
831 			i = 0;
832 			/* keep in sync w/ state 3's transistions */
833 			if (dflen == 0 && remain == 0)
834 				state = 0;
835 			else if (dflen == 0)
836 				state = 2;
837 			else
838 				state = 3;
839 			break;
840 
841 		case 3:	/* VPD-R Keyword Value */
842 			cfg->vpd.vpd_ros[off].value[i++] = byte;
843 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
844 			    "RV", 2) == 0 && cksumvalid == -1) {
845 				if (vrs.cksum == 0)
846 					cksumvalid = 1;
847 				else {
848 					printf(
849 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
850 					    cfg->domain, cfg->bus, cfg->slot,
851 					    cfg->func, vrs.cksum);
852 					cksumvalid = 0;
853 					end = 1;
854 					break;
855 				}
856 			}
857 			dflen--;
858 			remain--;
859 			/* keep in sync w/ state 2's transistions */
860 			if (dflen == 0)
861 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
862 			if (dflen == 0 && remain == 0) {
863 				cfg->vpd.vpd_rocnt = off;
864 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
865 				    off * sizeof *cfg->vpd.vpd_ros,
866 				    M_DEVBUF, M_WAITOK);
867 				state = 0;
868 			} else if (dflen == 0)
869 				state = 2;
870 			break;
871 
872 		case 4:
873 			remain--;
874 			if (remain == 0)
875 				state = 0;
876 			break;
877 
878 		case 5:	/* VPD-W Keyword Header */
879 			if (off == alloc) {
880 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
881 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
882 				    M_DEVBUF, M_WAITOK);
883 			}
884 			cfg->vpd.vpd_w[off].keyword[0] = byte;
885 			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
886 			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
887 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
888 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
889 			    sizeof *cfg->vpd.vpd_w[off].value,
890 			    M_DEVBUF, M_WAITOK);
891 			remain -= 3;
892 			i = 0;
893 			/* keep in sync w/ state 6's transistions */
894 			if (dflen == 0 && remain == 0)
895 				state = 0;
896 			else if (dflen == 0)
897 				state = 5;
898 			else
899 				state = 6;
900 			break;
901 
902 		case 6:	/* VPD-W Keyword Value */
903 			cfg->vpd.vpd_w[off].value[i++] = byte;
904 			dflen--;
905 			remain--;
906 			/* keep in sync w/ state 5's transistions */
907 			if (dflen == 0)
908 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
909 			if (dflen == 0 && remain == 0) {
910 				cfg->vpd.vpd_wcnt = off;
911 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
912 				    off * sizeof *cfg->vpd.vpd_w,
913 				    M_DEVBUF, M_WAITOK);
914 				state = 0;
915 			} else if (dflen == 0)
916 				state = 5;
917 			break;
918 
919 		default:
920 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
921 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
922 			    state);
923 			end = 1;
924 			break;
925 		}
926 	}
927 
928 	if (cksumvalid == 0) {
929 		/* read-only data bad, clean up */
930 		for (; off; off--)
931 			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
932 
933 		free(cfg->vpd.vpd_ros, M_DEVBUF);
934 		cfg->vpd.vpd_ros = NULL;
935 	}
936 	cfg->vpd.vpd_cached = 1;
937 #undef REG
938 #undef WREG
939 }
940 
941 int
942 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
943 {
944 	struct pci_devinfo *dinfo = device_get_ivars(child);
945 	pcicfgregs *cfg = &dinfo->cfg;
946 
947 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
948 		pci_read_vpd(device_get_parent(dev), cfg);
949 
950 	*identptr = cfg->vpd.vpd_ident;
951 
952 	if (*identptr == NULL)
953 		return (ENXIO);
954 
955 	return (0);
956 }
957 
958 int
959 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
960 	const char **vptr)
961 {
962 	struct pci_devinfo *dinfo = device_get_ivars(child);
963 	pcicfgregs *cfg = &dinfo->cfg;
964 	int i;
965 
966 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
967 		pci_read_vpd(device_get_parent(dev), cfg);
968 
969 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
970 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
971 		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
972 			*vptr = cfg->vpd.vpd_ros[i].value;
973 		}
974 
975 	if (i != cfg->vpd.vpd_rocnt)
976 		return (0);
977 
978 	*vptr = NULL;
979 	return (ENXIO);
980 }
981 
982 /*
983  * Return the offset in configuration space of the requested extended
984  * capability entry or 0 if the specified capability was not found.
985  */
986 int
987 pci_find_extcap_method(device_t dev, device_t child, int capability,
988     int *capreg)
989 {
990 	struct pci_devinfo *dinfo = device_get_ivars(child);
991 	pcicfgregs *cfg = &dinfo->cfg;
992 	u_int32_t status;
993 	u_int8_t ptr;
994 
995 	/*
996 	 * Check the CAP_LIST bit of the PCI status register first.
997 	 */
998 	status = pci_read_config(child, PCIR_STATUS, 2);
999 	if (!(status & PCIM_STATUS_CAPPRESENT))
1000 		return (ENXIO);
1001 
1002 	/*
1003 	 * Determine the start pointer of the capabilities list.
1004 	 */
1005 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1006 	case 0:
1007 	case 1:
1008 		ptr = PCIR_CAP_PTR;
1009 		break;
1010 	case 2:
1011 		ptr = PCIR_CAP_PTR_2;
1012 		break;
1013 	default:
1014 		/* XXX: panic? */
1015 		return (ENXIO);		/* no extended capabilities support */
1016 	}
1017 	ptr = pci_read_config(child, ptr, 1);
1018 
1019 	/*
1020 	 * Traverse the capabilities list.
1021 	 */
1022 	while (ptr != 0) {
1023 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1024 			if (capreg != NULL)
1025 				*capreg = ptr;
1026 			return (0);
1027 		}
1028 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1029 	}
1030 
1031 	return (ENOENT);
1032 }
1033 
1034 /*
1035  * Support for MSI-X message interrupts.
1036  */
1037 void
1038 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1039 {
1040 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1041 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1042 	uint32_t offset;
1043 
1044 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1045 	offset = msix->msix_table_offset + index * 16;
1046 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1047 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1048 	bus_write_4(msix->msix_table_res, offset + 8, data);
1049 }
1050 
1051 void
1052 pci_mask_msix(device_t dev, u_int index)
1053 {
1054 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1055 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1056 	uint32_t offset, val;
1057 
1058 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1059 	offset = msix->msix_table_offset + index * 16 + 12;
1060 	val = bus_read_4(msix->msix_table_res, offset);
1061 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1062 		val |= PCIM_MSIX_VCTRL_MASK;
1063 		bus_write_4(msix->msix_table_res, offset, val);
1064 	}
1065 }
1066 
1067 void
1068 pci_unmask_msix(device_t dev, u_int index)
1069 {
1070 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1071 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1072 	uint32_t offset, val;
1073 
1074 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1075 	offset = msix->msix_table_offset + index * 16 + 12;
1076 	val = bus_read_4(msix->msix_table_res, offset);
1077 	if (val & PCIM_MSIX_VCTRL_MASK) {
1078 		val &= ~PCIM_MSIX_VCTRL_MASK;
1079 		bus_write_4(msix->msix_table_res, offset, val);
1080 	}
1081 }
1082 
1083 int
1084 pci_pending_msix(device_t dev, u_int index)
1085 {
1086 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1087 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1088 	uint32_t offset, bit;
1089 
1090 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1091 	offset = msix->msix_pba_offset + (index / 32) * 4;
1092 	bit = 1 << index % 32;
1093 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1094 }
1095 
1096 /*
1097  * Restore MSI-X registers and table during resume.  If MSI-X is
1098  * enabled then walk the virtual table to restore the actual MSI-X
1099  * table.
1100  */
1101 static void
1102 pci_resume_msix(device_t dev)
1103 {
1104 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1105 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1106 	struct msix_table_entry *mte;
1107 	struct msix_vector *mv;
1108 	int i;
1109 
1110 	if (msix->msix_alloc > 0) {
1111 		/* First, mask all vectors. */
1112 		for (i = 0; i < msix->msix_msgnum; i++)
1113 			pci_mask_msix(dev, i);
1114 
1115 		/* Second, program any messages with at least one handler. */
1116 		for (i = 0; i < msix->msix_table_len; i++) {
1117 			mte = &msix->msix_table[i];
1118 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1119 				continue;
1120 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1121 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1122 			pci_unmask_msix(dev, i);
1123 		}
1124 	}
1125 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1126 	    msix->msix_ctrl, 2);
1127 }
1128 
1129 /*
1130  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1131  * returned in *count.  After this function returns, each message will be
1132  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1133  */
1134 int
1135 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1136 {
1137 	struct pci_devinfo *dinfo = device_get_ivars(child);
1138 	pcicfgregs *cfg = &dinfo->cfg;
1139 	struct resource_list_entry *rle;
1140 	int actual, error, i, irq, max;
1141 
1142 	/* Don't let count == 0 get us into trouble. */
1143 	if (*count == 0)
1144 		return (EINVAL);
1145 
1146 	/* If rid 0 is allocated, then fail. */
1147 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1148 	if (rle != NULL && rle->res != NULL)
1149 		return (ENXIO);
1150 
1151 	/* Already have allocated messages? */
1152 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1153 		return (ENXIO);
1154 
1155 	/* If MSI is blacklisted for this system, fail. */
1156 	if (pci_msi_blacklisted())
1157 		return (ENXIO);
1158 
1159 	/* MSI-X capability present? */
1160 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1161 		return (ENODEV);
1162 
1163 	/* Make sure the appropriate BARs are mapped. */
1164 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1165 	    cfg->msix.msix_table_bar);
1166 	if (rle == NULL || rle->res == NULL ||
1167 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1168 		return (ENXIO);
1169 	cfg->msix.msix_table_res = rle->res;
1170 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1171 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1172 		    cfg->msix.msix_pba_bar);
1173 		if (rle == NULL || rle->res == NULL ||
1174 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1175 			return (ENXIO);
1176 	}
1177 	cfg->msix.msix_pba_res = rle->res;
1178 
1179 	if (bootverbose)
1180 		device_printf(child,
1181 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1182 		    *count, cfg->msix.msix_msgnum);
1183 	max = min(*count, cfg->msix.msix_msgnum);
1184 	for (i = 0; i < max; i++) {
1185 		/* Allocate a message. */
1186 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1187 		if (error)
1188 			break;
1189 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1190 		    irq, 1);
1191 	}
1192 	actual = i;
1193 
1194 	if (bootverbose) {
1195 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1196 		if (actual == 1)
1197 			device_printf(child, "using IRQ %lu for MSI-X\n",
1198 			    rle->start);
1199 		else {
1200 			int run;
1201 
1202 			/*
1203 			 * Be fancy and try to print contiguous runs of
1204 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1205 			 * 'run' is true if we are in a range.
1206 			 */
1207 			device_printf(child, "using IRQs %lu", rle->start);
1208 			irq = rle->start;
1209 			run = 0;
1210 			for (i = 1; i < actual; i++) {
1211 				rle = resource_list_find(&dinfo->resources,
1212 				    SYS_RES_IRQ, i + 1);
1213 
1214 				/* Still in a run? */
1215 				if (rle->start == irq + 1) {
1216 					run = 1;
1217 					irq++;
1218 					continue;
1219 				}
1220 
1221 				/* Finish previous range. */
1222 				if (run) {
1223 					printf("-%d", irq);
1224 					run = 0;
1225 				}
1226 
1227 				/* Start new range. */
1228 				printf(",%lu", rle->start);
1229 				irq = rle->start;
1230 			}
1231 
1232 			/* Unfinished range? */
1233 			if (run)
1234 				printf("-%d", irq);
1235 			printf(" for MSI-X\n");
1236 		}
1237 	}
1238 
1239 	/* Mask all vectors. */
1240 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1241 		pci_mask_msix(child, i);
1242 
1243 	/* Allocate and initialize vector data and virtual table. */
1244 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1245 	    M_DEVBUF, M_WAITOK | M_ZERO);
1246 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1247 	    M_DEVBUF, M_WAITOK | M_ZERO);
1248 	for (i = 0; i < actual; i++) {
1249 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1250 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1251 		cfg->msix.msix_table[i].mte_vector = i + 1;
1252 	}
1253 
1254 	/* Update control register to enable MSI-X. */
1255 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1256 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1257 	    cfg->msix.msix_ctrl, 2);
1258 
1259 	/* Update counts of alloc'd messages. */
1260 	cfg->msix.msix_alloc = actual;
1261 	cfg->msix.msix_table_len = actual;
1262 	*count = actual;
1263 	return (0);
1264 }
1265 
1266 /*
1267  * By default, pci_alloc_msix() will assign the allocated IRQ
1268  * resources consecutively to the first N messages in the MSI-X table.
1269  * However, device drivers may want to use different layouts if they
1270  * either receive fewer messages than they asked for, or they wish to
1271  * populate the MSI-X table sparsely.  This method allows the driver
1272  * to specify what layout it wants.  It must be called after a
1273  * successful pci_alloc_msix() but before any of the associated
1274  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1275  *
1276  * The 'vectors' array contains 'count' message vectors.  The array
1277  * maps directly to the MSI-X table in that index 0 in the array
1278  * specifies the vector for the first message in the MSI-X table, etc.
1279  * The vector value in each array index can either be 0 to indicate
1280  * that no vector should be assigned to a message slot, or it can be a
1281  * number from 1 to N (where N is the count returned from a
1282  * succcessful call to pci_alloc_msix()) to indicate which message
1283  * vector (IRQ) to be used for the corresponding message.
1284  *
1285  * On successful return, each message with a non-zero vector will have
1286  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1287  * 1.  Additionally, if any of the IRQs allocated via the previous
1288  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1289  * will be freed back to the system automatically.
1290  *
1291  * For example, suppose a driver has a MSI-X table with 6 messages and
1292  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1293  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1294  * C.  After the call to pci_alloc_msix(), the device will be setup to
1295  * have an MSI-X table of ABC--- (where - means no vector assigned).
1296  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1297  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1298  * be freed back to the system.  This device will also have valid
1299  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1300  *
1301  * In any case, the SYS_RES_IRQ rid X will always map to the message
1302  * at MSI-X table index X - 1 and will only be valid if a vector is
1303  * assigned to that table entry.
1304  */
1305 int
1306 pci_remap_msix_method(device_t dev, device_t child, int count,
1307     const u_int *vectors)
1308 {
1309 	struct pci_devinfo *dinfo = device_get_ivars(child);
1310 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1311 	struct resource_list_entry *rle;
1312 	int i, irq, j, *used;
1313 
1314 	/*
1315 	 * Have to have at least one message in the table but the
1316 	 * table can't be bigger than the actual MSI-X table in the
1317 	 * device.
1318 	 */
1319 	if (count == 0 || count > msix->msix_msgnum)
1320 		return (EINVAL);
1321 
1322 	/* Sanity check the vectors. */
1323 	for (i = 0; i < count; i++)
1324 		if (vectors[i] > msix->msix_alloc)
1325 			return (EINVAL);
1326 
1327 	/*
1328 	 * Make sure there aren't any holes in the vectors to be used.
1329 	 * It's a big pain to support it, and it doesn't really make
1330 	 * sense anyway.  Also, at least one vector must be used.
1331 	 */
1332 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1333 	    M_ZERO);
1334 	for (i = 0; i < count; i++)
1335 		if (vectors[i] != 0)
1336 			used[vectors[i] - 1] = 1;
1337 	for (i = 0; i < msix->msix_alloc - 1; i++)
1338 		if (used[i] == 0 && used[i + 1] == 1) {
1339 			free(used, M_DEVBUF);
1340 			return (EINVAL);
1341 		}
1342 	if (used[0] != 1) {
1343 		free(used, M_DEVBUF);
1344 		return (EINVAL);
1345 	}
1346 
1347 	/* Make sure none of the resources are allocated. */
1348 	for (i = 0; i < msix->msix_table_len; i++) {
1349 		if (msix->msix_table[i].mte_vector == 0)
1350 			continue;
1351 		if (msix->msix_table[i].mte_handlers > 0)
1352 			return (EBUSY);
1353 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1354 		KASSERT(rle != NULL, ("missing resource"));
1355 		if (rle->res != NULL)
1356 			return (EBUSY);
1357 	}
1358 
1359 	/* Free the existing resource list entries. */
1360 	for (i = 0; i < msix->msix_table_len; i++) {
1361 		if (msix->msix_table[i].mte_vector == 0)
1362 			continue;
1363 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1364 	}
1365 
1366 	/*
1367 	 * Build the new virtual table keeping track of which vectors are
1368 	 * used.
1369 	 */
1370 	free(msix->msix_table, M_DEVBUF);
1371 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1372 	    M_DEVBUF, M_WAITOK | M_ZERO);
1373 	for (i = 0; i < count; i++)
1374 		msix->msix_table[i].mte_vector = vectors[i];
1375 	msix->msix_table_len = count;
1376 
1377 	/* Free any unused IRQs and resize the vectors array if necessary. */
1378 	j = msix->msix_alloc - 1;
1379 	if (used[j] == 0) {
1380 		struct msix_vector *vec;
1381 
1382 		while (used[j] == 0) {
1383 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1384 			    msix->msix_vectors[j].mv_irq);
1385 			j--;
1386 		}
1387 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1388 		    M_WAITOK);
1389 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1390 		    (j + 1));
1391 		free(msix->msix_vectors, M_DEVBUF);
1392 		msix->msix_vectors = vec;
1393 		msix->msix_alloc = j + 1;
1394 	}
1395 	free(used, M_DEVBUF);
1396 
1397 	/* Map the IRQs onto the rids. */
1398 	for (i = 0; i < count; i++) {
1399 		if (vectors[i] == 0)
1400 			continue;
1401 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1402 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1403 		    irq, 1);
1404 	}
1405 
1406 	if (bootverbose) {
1407 		device_printf(child, "Remapped MSI-X IRQs as: ");
1408 		for (i = 0; i < count; i++) {
1409 			if (i != 0)
1410 				printf(", ");
1411 			if (vectors[i] == 0)
1412 				printf("---");
1413 			else
1414 				printf("%d",
1415 				    msix->msix_vectors[vectors[i]].mv_irq);
1416 		}
1417 		printf("\n");
1418 	}
1419 
1420 	return (0);
1421 }
1422 
1423 static int
1424 pci_release_msix(device_t dev, device_t child)
1425 {
1426 	struct pci_devinfo *dinfo = device_get_ivars(child);
1427 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1428 	struct resource_list_entry *rle;
1429 	int i;
1430 
1431 	/* Do we have any messages to release? */
1432 	if (msix->msix_alloc == 0)
1433 		return (ENODEV);
1434 
1435 	/* Make sure none of the resources are allocated. */
1436 	for (i = 0; i < msix->msix_table_len; i++) {
1437 		if (msix->msix_table[i].mte_vector == 0)
1438 			continue;
1439 		if (msix->msix_table[i].mte_handlers > 0)
1440 			return (EBUSY);
1441 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1442 		KASSERT(rle != NULL, ("missing resource"));
1443 		if (rle->res != NULL)
1444 			return (EBUSY);
1445 	}
1446 
1447 	/* Update control register to disable MSI-X. */
1448 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1449 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1450 	    msix->msix_ctrl, 2);
1451 
1452 	/* Free the resource list entries. */
1453 	for (i = 0; i < msix->msix_table_len; i++) {
1454 		if (msix->msix_table[i].mte_vector == 0)
1455 			continue;
1456 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1457 	}
1458 	free(msix->msix_table, M_DEVBUF);
1459 	msix->msix_table_len = 0;
1460 
1461 	/* Release the IRQs. */
1462 	for (i = 0; i < msix->msix_alloc; i++)
1463 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1464 		    msix->msix_vectors[i].mv_irq);
1465 	free(msix->msix_vectors, M_DEVBUF);
1466 	msix->msix_alloc = 0;
1467 	return (0);
1468 }
1469 
1470 /*
1471  * Return the max supported MSI-X messages this device supports.
1472  * Basically, assuming the MD code can alloc messages, this function
1473  * should return the maximum value that pci_alloc_msix() can return.
1474  * Thus, it is subject to the tunables, etc.
1475  */
1476 int
1477 pci_msix_count_method(device_t dev, device_t child)
1478 {
1479 	struct pci_devinfo *dinfo = device_get_ivars(child);
1480 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1481 
1482 	if (pci_do_msix && msix->msix_location != 0)
1483 		return (msix->msix_msgnum);
1484 	return (0);
1485 }
1486 
1487 /*
1488  * Support for MSI message signalled interrupts.
1489  */
1490 void
1491 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1492 {
1493 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1494 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1495 
1496 	/* Write data and address values. */
1497 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1498 	    address & 0xffffffff, 4);
1499 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1500 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1501 		    address >> 32, 4);
1502 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1503 		    data, 2);
1504 	} else
1505 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1506 		    2);
1507 
1508 	/* Enable MSI in the control register. */
1509 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1510 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1511 	    2);
1512 }
1513 
1514 void
1515 pci_disable_msi(device_t dev)
1516 {
1517 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1518 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1519 
1520 	/* Disable MSI in the control register. */
1521 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1522 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1523 	    2);
1524 }
1525 
1526 /*
1527  * Restore MSI registers during resume.  If MSI is enabled then
1528  * restore the data and address registers in addition to the control
1529  * register.
1530  */
1531 static void
1532 pci_resume_msi(device_t dev)
1533 {
1534 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1535 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1536 	uint64_t address;
1537 	uint16_t data;
1538 
1539 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1540 		address = msi->msi_addr;
1541 		data = msi->msi_data;
1542 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1543 		    address & 0xffffffff, 4);
1544 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1545 			pci_write_config(dev, msi->msi_location +
1546 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1547 			pci_write_config(dev, msi->msi_location +
1548 			    PCIR_MSI_DATA_64BIT, data, 2);
1549 		} else
1550 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1551 			    data, 2);
1552 	}
1553 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1554 	    2);
1555 }
1556 
1557 int
1558 pci_remap_msi_irq(device_t dev, u_int irq)
1559 {
1560 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1561 	pcicfgregs *cfg = &dinfo->cfg;
1562 	struct resource_list_entry *rle;
1563 	struct msix_table_entry *mte;
1564 	struct msix_vector *mv;
1565 	device_t bus;
1566 	uint64_t addr;
1567 	uint32_t data;
1568 	int error, i, j;
1569 
1570 	bus = device_get_parent(dev);
1571 
1572 	/*
1573 	 * Handle MSI first.  We try to find this IRQ among our list
1574 	 * of MSI IRQs.  If we find it, we request updated address and
1575 	 * data registers and apply the results.
1576 	 */
1577 	if (cfg->msi.msi_alloc > 0) {
1578 
1579 		/* If we don't have any active handlers, nothing to do. */
1580 		if (cfg->msi.msi_handlers == 0)
1581 			return (0);
1582 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1583 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1584 			    i + 1);
1585 			if (rle->start == irq) {
1586 				error = PCIB_MAP_MSI(device_get_parent(bus),
1587 				    dev, irq, &addr, &data);
1588 				if (error)
1589 					return (error);
1590 				pci_disable_msi(dev);
1591 				dinfo->cfg.msi.msi_addr = addr;
1592 				dinfo->cfg.msi.msi_data = data;
1593 				pci_enable_msi(dev, addr, data);
1594 				return (0);
1595 			}
1596 		}
1597 		return (ENOENT);
1598 	}
1599 
1600 	/*
1601 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1602 	 * we request the updated mapping info.  If that works, we go
1603 	 * through all the slots that use this IRQ and update them.
1604 	 */
1605 	if (cfg->msix.msix_alloc > 0) {
1606 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1607 			mv = &cfg->msix.msix_vectors[i];
1608 			if (mv->mv_irq == irq) {
1609 				error = PCIB_MAP_MSI(device_get_parent(bus),
1610 				    dev, irq, &addr, &data);
1611 				if (error)
1612 					return (error);
1613 				mv->mv_address = addr;
1614 				mv->mv_data = data;
1615 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1616 					mte = &cfg->msix.msix_table[j];
1617 					if (mte->mte_vector != i + 1)
1618 						continue;
1619 					if (mte->mte_handlers == 0)
1620 						continue;
1621 					pci_mask_msix(dev, j);
1622 					pci_enable_msix(dev, j, addr, data);
1623 					pci_unmask_msix(dev, j);
1624 				}
1625 			}
1626 		}
1627 		return (ENOENT);
1628 	}
1629 
1630 	return (ENOENT);
1631 }
1632 
1633 /*
1634  * Returns true if the specified device is blacklisted because MSI
1635  * doesn't work.
1636  */
1637 int
1638 pci_msi_device_blacklisted(device_t dev)
1639 {
1640 	struct pci_quirk *q;
1641 
1642 	if (!pci_honor_msi_blacklist)
1643 		return (0);
1644 
1645 	for (q = &pci_quirks[0]; q->devid; q++) {
1646 		if (q->devid == pci_get_devid(dev) &&
1647 		    q->type == PCI_QUIRK_DISABLE_MSI)
1648 			return (1);
1649 	}
1650 	return (0);
1651 }
1652 
1653 /*
1654  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1655  * we just check for blacklisted chipsets as represented by the
1656  * host-PCI bridge at device 0:0:0.  In the future, it may become
1657  * necessary to check other system attributes, such as the kenv values
1658  * that give the motherboard manufacturer and model number.
1659  */
1660 static int
1661 pci_msi_blacklisted(void)
1662 {
1663 	device_t dev;
1664 
1665 	if (!pci_honor_msi_blacklist)
1666 		return (0);
1667 
1668 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1669 	if (!(pcie_chipset || pcix_chipset))
1670 		return (1);
1671 
1672 	dev = pci_find_bsf(0, 0, 0);
1673 	if (dev != NULL)
1674 		return (pci_msi_device_blacklisted(dev));
1675 	return (0);
1676 }
1677 
1678 /*
1679  * Attempt to allocate *count MSI messages.  The actual number allocated is
1680  * returned in *count.  After this function returns, each message will be
1681  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1682  */
1683 int
1684 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1685 {
1686 	struct pci_devinfo *dinfo = device_get_ivars(child);
1687 	pcicfgregs *cfg = &dinfo->cfg;
1688 	struct resource_list_entry *rle;
1689 	int actual, error, i, irqs[32];
1690 	uint16_t ctrl;
1691 
1692 	/* Don't let count == 0 get us into trouble. */
1693 	if (*count == 0)
1694 		return (EINVAL);
1695 
1696 	/* If rid 0 is allocated, then fail. */
1697 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1698 	if (rle != NULL && rle->res != NULL)
1699 		return (ENXIO);
1700 
1701 	/* Already have allocated messages? */
1702 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1703 		return (ENXIO);
1704 
1705 	/* If MSI is blacklisted for this system, fail. */
1706 	if (pci_msi_blacklisted())
1707 		return (ENXIO);
1708 
1709 	/* MSI capability present? */
1710 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1711 		return (ENODEV);
1712 
1713 	if (bootverbose)
1714 		device_printf(child,
1715 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1716 		    *count, cfg->msi.msi_msgnum);
1717 
1718 	/* Don't ask for more than the device supports. */
1719 	actual = min(*count, cfg->msi.msi_msgnum);
1720 
1721 	/* Don't ask for more than 32 messages. */
1722 	actual = min(actual, 32);
1723 
1724 	/* MSI requires power of 2 number of messages. */
1725 	if (!powerof2(actual))
1726 		return (EINVAL);
1727 
1728 	for (;;) {
1729 		/* Try to allocate N messages. */
1730 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1731 		    cfg->msi.msi_msgnum, irqs);
1732 		if (error == 0)
1733 			break;
1734 		if (actual == 1)
1735 			return (error);
1736 
1737 		/* Try N / 2. */
1738 		actual >>= 1;
1739 	}
1740 
1741 	/*
1742 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1743 	 * resources in the irqs[] array, so add new resources
1744 	 * starting at rid 1.
1745 	 */
1746 	for (i = 0; i < actual; i++)
1747 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1748 		    irqs[i], irqs[i], 1);
1749 
1750 	if (bootverbose) {
1751 		if (actual == 1)
1752 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1753 		else {
1754 			int run;
1755 
1756 			/*
1757 			 * Be fancy and try to print contiguous runs
1758 			 * of IRQ values as ranges.  'run' is true if
1759 			 * we are in a range.
1760 			 */
1761 			device_printf(child, "using IRQs %d", irqs[0]);
1762 			run = 0;
1763 			for (i = 1; i < actual; i++) {
1764 
1765 				/* Still in a run? */
1766 				if (irqs[i] == irqs[i - 1] + 1) {
1767 					run = 1;
1768 					continue;
1769 				}
1770 
1771 				/* Finish previous range. */
1772 				if (run) {
1773 					printf("-%d", irqs[i - 1]);
1774 					run = 0;
1775 				}
1776 
1777 				/* Start new range. */
1778 				printf(",%d", irqs[i]);
1779 			}
1780 
1781 			/* Unfinished range? */
1782 			if (run)
1783 				printf("-%d", irqs[actual - 1]);
1784 			printf(" for MSI\n");
1785 		}
1786 	}
1787 
1788 	/* Update control register with actual count. */
1789 	ctrl = cfg->msi.msi_ctrl;
1790 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1791 	ctrl |= (ffs(actual) - 1) << 4;
1792 	cfg->msi.msi_ctrl = ctrl;
1793 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1794 
1795 	/* Update counts of alloc'd messages. */
1796 	cfg->msi.msi_alloc = actual;
1797 	cfg->msi.msi_handlers = 0;
1798 	*count = actual;
1799 	return (0);
1800 }
1801 
1802 /* Release the MSI messages associated with this device. */
1803 int
1804 pci_release_msi_method(device_t dev, device_t child)
1805 {
1806 	struct pci_devinfo *dinfo = device_get_ivars(child);
1807 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1808 	struct resource_list_entry *rle;
1809 	int error, i, irqs[32];
1810 
1811 	/* Try MSI-X first. */
1812 	error = pci_release_msix(dev, child);
1813 	if (error != ENODEV)
1814 		return (error);
1815 
1816 	/* Do we have any messages to release? */
1817 	if (msi->msi_alloc == 0)
1818 		return (ENODEV);
1819 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1820 
1821 	/* Make sure none of the resources are allocated. */
1822 	if (msi->msi_handlers > 0)
1823 		return (EBUSY);
1824 	for (i = 0; i < msi->msi_alloc; i++) {
1825 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1826 		KASSERT(rle != NULL, ("missing MSI resource"));
1827 		if (rle->res != NULL)
1828 			return (EBUSY);
1829 		irqs[i] = rle->start;
1830 	}
1831 
1832 	/* Update control register with 0 count. */
1833 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1834 	    ("%s: MSI still enabled", __func__));
1835 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1836 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1837 	    msi->msi_ctrl, 2);
1838 
1839 	/* Release the messages. */
1840 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1841 	for (i = 0; i < msi->msi_alloc; i++)
1842 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1843 
1844 	/* Update alloc count. */
1845 	msi->msi_alloc = 0;
1846 	msi->msi_addr = 0;
1847 	msi->msi_data = 0;
1848 	return (0);
1849 }
1850 
1851 /*
1852  * Return the max supported MSI messages this device supports.
1853  * Basically, assuming the MD code can alloc messages, this function
1854  * should return the maximum value that pci_alloc_msi() can return.
1855  * Thus, it is subject to the tunables, etc.
1856  */
1857 int
1858 pci_msi_count_method(device_t dev, device_t child)
1859 {
1860 	struct pci_devinfo *dinfo = device_get_ivars(child);
1861 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1862 
1863 	if (pci_do_msi && msi->msi_location != 0)
1864 		return (msi->msi_msgnum);
1865 	return (0);
1866 }
1867 
1868 /* free pcicfgregs structure and all depending data structures */
1869 
1870 int
1871 pci_freecfg(struct pci_devinfo *dinfo)
1872 {
1873 	struct devlist *devlist_head;
1874 	int i;
1875 
1876 	devlist_head = &pci_devq;
1877 
1878 	if (dinfo->cfg.vpd.vpd_reg) {
1879 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1880 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1881 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1882 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1883 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1884 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1885 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1886 	}
1887 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1888 	free(dinfo, M_DEVBUF);
1889 
1890 	/* increment the generation count */
1891 	pci_generation++;
1892 
1893 	/* we're losing one device */
1894 	pci_numdevs--;
1895 	return (0);
1896 }
1897 
1898 /*
1899  * PCI power manangement
1900  */
1901 int
1902 pci_set_powerstate_method(device_t dev, device_t child, int state)
1903 {
1904 	struct pci_devinfo *dinfo = device_get_ivars(child);
1905 	pcicfgregs *cfg = &dinfo->cfg;
1906 	uint16_t status;
1907 	int result, oldstate, highest, delay;
1908 
1909 	if (cfg->pp.pp_cap == 0)
1910 		return (EOPNOTSUPP);
1911 
1912 	/*
1913 	 * Optimize a no state change request away.  While it would be OK to
1914 	 * write to the hardware in theory, some devices have shown odd
1915 	 * behavior when going from D3 -> D3.
1916 	 */
1917 	oldstate = pci_get_powerstate(child);
1918 	if (oldstate == state)
1919 		return (0);
1920 
1921 	/*
1922 	 * The PCI power management specification states that after a state
1923 	 * transition between PCI power states, system software must
1924 	 * guarantee a minimal delay before the function accesses the device.
1925 	 * Compute the worst case delay that we need to guarantee before we
1926 	 * access the device.  Many devices will be responsive much more
1927 	 * quickly than this delay, but there are some that don't respond
1928 	 * instantly to state changes.  Transitions to/from D3 state require
1929 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1930 	 * is done below with DELAY rather than a sleeper function because
1931 	 * this function can be called from contexts where we cannot sleep.
1932 	 */
1933 	highest = (oldstate > state) ? oldstate : state;
1934 	if (highest == PCI_POWERSTATE_D3)
1935 	    delay = 10000;
1936 	else if (highest == PCI_POWERSTATE_D2)
1937 	    delay = 200;
1938 	else
1939 	    delay = 0;
1940 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1941 	    & ~PCIM_PSTAT_DMASK;
1942 	result = 0;
1943 	switch (state) {
1944 	case PCI_POWERSTATE_D0:
1945 		status |= PCIM_PSTAT_D0;
1946 		break;
1947 	case PCI_POWERSTATE_D1:
1948 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1949 			return (EOPNOTSUPP);
1950 		status |= PCIM_PSTAT_D1;
1951 		break;
1952 	case PCI_POWERSTATE_D2:
1953 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1954 			return (EOPNOTSUPP);
1955 		status |= PCIM_PSTAT_D2;
1956 		break;
1957 	case PCI_POWERSTATE_D3:
1958 		status |= PCIM_PSTAT_D3;
1959 		break;
1960 	default:
1961 		return (EINVAL);
1962 	}
1963 
1964 	if (bootverbose)
1965 		printf(
1966 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
1967 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
1968 		    dinfo->cfg.func, oldstate, state);
1969 
1970 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1971 	if (delay)
1972 		DELAY(delay);
1973 	return (0);
1974 }
1975 
1976 int
1977 pci_get_powerstate_method(device_t dev, device_t child)
1978 {
1979 	struct pci_devinfo *dinfo = device_get_ivars(child);
1980 	pcicfgregs *cfg = &dinfo->cfg;
1981 	uint16_t status;
1982 	int result;
1983 
1984 	if (cfg->pp.pp_cap != 0) {
1985 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1986 		switch (status & PCIM_PSTAT_DMASK) {
1987 		case PCIM_PSTAT_D0:
1988 			result = PCI_POWERSTATE_D0;
1989 			break;
1990 		case PCIM_PSTAT_D1:
1991 			result = PCI_POWERSTATE_D1;
1992 			break;
1993 		case PCIM_PSTAT_D2:
1994 			result = PCI_POWERSTATE_D2;
1995 			break;
1996 		case PCIM_PSTAT_D3:
1997 			result = PCI_POWERSTATE_D3;
1998 			break;
1999 		default:
2000 			result = PCI_POWERSTATE_UNKNOWN;
2001 			break;
2002 		}
2003 	} else {
2004 		/* No support, device is always at D0 */
2005 		result = PCI_POWERSTATE_D0;
2006 	}
2007 	return (result);
2008 }
2009 
2010 /*
2011  * Some convenience functions for PCI device drivers.
2012  */
2013 
2014 static __inline void
2015 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2016 {
2017 	uint16_t	command;
2018 
2019 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2020 	command |= bit;
2021 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2022 }
2023 
2024 static __inline void
2025 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2026 {
2027 	uint16_t	command;
2028 
2029 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2030 	command &= ~bit;
2031 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2032 }
2033 
2034 int
2035 pci_enable_busmaster_method(device_t dev, device_t child)
2036 {
2037 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2038 	return (0);
2039 }
2040 
2041 int
2042 pci_disable_busmaster_method(device_t dev, device_t child)
2043 {
2044 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2045 	return (0);
2046 }
2047 
2048 int
2049 pci_enable_io_method(device_t dev, device_t child, int space)
2050 {
2051 	uint16_t command;
2052 	uint16_t bit;
2053 	char *error;
2054 
2055 	bit = 0;
2056 	error = NULL;
2057 
2058 	switch(space) {
2059 	case SYS_RES_IOPORT:
2060 		bit = PCIM_CMD_PORTEN;
2061 		error = "port";
2062 		break;
2063 	case SYS_RES_MEMORY:
2064 		bit = PCIM_CMD_MEMEN;
2065 		error = "memory";
2066 		break;
2067 	default:
2068 		return (EINVAL);
2069 	}
2070 	pci_set_command_bit(dev, child, bit);
2071 	/* Some devices seem to need a brief stall here, what do to? */
2072 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2073 	if (command & bit)
2074 		return (0);
2075 	device_printf(child, "failed to enable %s mapping!\n", error);
2076 	return (ENXIO);
2077 }
2078 
2079 int
2080 pci_disable_io_method(device_t dev, device_t child, int space)
2081 {
2082 	uint16_t command;
2083 	uint16_t bit;
2084 	char *error;
2085 
2086 	bit = 0;
2087 	error = NULL;
2088 
2089 	switch(space) {
2090 	case SYS_RES_IOPORT:
2091 		bit = PCIM_CMD_PORTEN;
2092 		error = "port";
2093 		break;
2094 	case SYS_RES_MEMORY:
2095 		bit = PCIM_CMD_MEMEN;
2096 		error = "memory";
2097 		break;
2098 	default:
2099 		return (EINVAL);
2100 	}
2101 	pci_clear_command_bit(dev, child, bit);
2102 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2103 	if (command & bit) {
2104 		device_printf(child, "failed to disable %s mapping!\n", error);
2105 		return (ENXIO);
2106 	}
2107 	return (0);
2108 }
2109 
2110 /*
2111  * New style pci driver.  Parent device is either a pci-host-bridge or a
2112  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2113  */
2114 
2115 void
2116 pci_print_verbose(struct pci_devinfo *dinfo)
2117 {
2118 
2119 	if (bootverbose) {
2120 		pcicfgregs *cfg = &dinfo->cfg;
2121 
2122 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2123 		    cfg->vendor, cfg->device, cfg->revid);
2124 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2125 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2126 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2127 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2128 		    cfg->mfdev);
2129 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2130 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2131 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2132 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2133 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2134 		if (cfg->intpin > 0)
2135 			printf("\tintpin=%c, irq=%d\n",
2136 			    cfg->intpin +'a' -1, cfg->intline);
2137 		if (cfg->pp.pp_cap) {
2138 			uint16_t status;
2139 
2140 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2141 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2142 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2143 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2144 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2145 			    status & PCIM_PSTAT_DMASK);
2146 		}
2147 		if (cfg->msi.msi_location) {
2148 			int ctrl;
2149 
2150 			ctrl = cfg->msi.msi_ctrl;
2151 			printf("\tMSI supports %d message%s%s%s\n",
2152 			    cfg->msi.msi_msgnum,
2153 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2154 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2155 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2156 		}
2157 		if (cfg->msix.msix_location) {
2158 			printf("\tMSI-X supports %d message%s ",
2159 			    cfg->msix.msix_msgnum,
2160 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2161 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2162 				printf("in map 0x%x\n",
2163 				    cfg->msix.msix_table_bar);
2164 			else
2165 				printf("in maps 0x%x and 0x%x\n",
2166 				    cfg->msix.msix_table_bar,
2167 				    cfg->msix.msix_pba_bar);
2168 		}
2169 	}
2170 }
2171 
2172 static int
2173 pci_porten(device_t pcib, int b, int s, int f)
2174 {
2175 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2176 		& PCIM_CMD_PORTEN) != 0;
2177 }
2178 
2179 static int
2180 pci_memen(device_t pcib, int b, int s, int f)
2181 {
2182 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2183 		& PCIM_CMD_MEMEN) != 0;
2184 }
2185 
2186 /*
2187  * Add a resource based on a pci map register. Return 1 if the map
2188  * register is a 32bit map register or 2 if it is a 64bit register.
2189  */
2190 static int
2191 pci_add_map(device_t pcib, device_t bus, device_t dev,
2192     int b, int s, int f, int reg, struct resource_list *rl, int force,
2193     int prefetch)
2194 {
2195 	uint32_t map;
2196 	pci_addr_t base;
2197 	pci_addr_t start, end, count;
2198 	uint8_t ln2size;
2199 	uint8_t ln2range;
2200 	uint32_t testval;
2201 	uint16_t cmd;
2202 	int type;
2203 	int barlen;
2204 	struct resource *res;
2205 
2206 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2207 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2208 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2209 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2210 
2211 	if (PCI_BAR_MEM(map))
2212 		type = SYS_RES_MEMORY;
2213 	else
2214 		type = SYS_RES_IOPORT;
2215 	ln2size = pci_mapsize(testval);
2216 	ln2range = pci_maprange(testval);
2217 	base = pci_mapbase(map);
2218 	barlen = ln2range == 64 ? 2 : 1;
2219 
2220 	/*
2221 	 * For I/O registers, if bottom bit is set, and the next bit up
2222 	 * isn't clear, we know we have a BAR that doesn't conform to the
2223 	 * spec, so ignore it.  Also, sanity check the size of the data
2224 	 * areas to the type of memory involved.  Memory must be at least
2225 	 * 16 bytes in size, while I/O ranges must be at least 4.
2226 	 */
2227 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2228 		return (barlen);
2229 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2230 	    (type == SYS_RES_IOPORT && ln2size < 2))
2231 		return (barlen);
2232 
2233 	if (ln2range == 64)
2234 		/* Read the other half of a 64bit map register */
2235 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2236 	if (bootverbose) {
2237 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2238 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2239 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2240 			printf(", port disabled\n");
2241 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2242 			printf(", memory disabled\n");
2243 		else
2244 			printf(", enabled\n");
2245 	}
2246 
2247 	/*
2248 	 * If base is 0, then we have problems.  It is best to ignore
2249 	 * such entries for the moment.  These will be allocated later if
2250 	 * the driver specifically requests them.  However, some
2251 	 * removable busses look better when all resources are allocated,
2252 	 * so allow '0' to be overriden.
2253 	 *
2254 	 * Similarly treat maps whose values is the same as the test value
2255 	 * read back.  These maps have had all f's written to them by the
2256 	 * BIOS in an attempt to disable the resources.
2257 	 */
2258 	if (!force && (base == 0 || map == testval))
2259 		return (barlen);
2260 	if ((u_long)base != base) {
2261 		device_printf(bus,
2262 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2263 		    pci_get_domain(dev), b, s, f, reg);
2264 		return (barlen);
2265 	}
2266 
2267 	/*
2268 	 * This code theoretically does the right thing, but has
2269 	 * undesirable side effects in some cases where peripherals
2270 	 * respond oddly to having these bits enabled.  Let the user
2271 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2272 	 * default).
2273 	 */
2274 	if (pci_enable_io_modes) {
2275 		/* Turn on resources that have been left off by a lazy BIOS */
2276 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2277 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2278 			cmd |= PCIM_CMD_PORTEN;
2279 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2280 		}
2281 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2282 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2283 			cmd |= PCIM_CMD_MEMEN;
2284 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2285 		}
2286 	} else {
2287 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2288 			return (barlen);
2289 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2290 			return (barlen);
2291 	}
2292 
2293 	count = 1 << ln2size;
2294 	if (base == 0 || base == pci_mapbase(testval)) {
2295 		start = 0;	/* Let the parent deside */
2296 		end = ~0ULL;
2297 	} else {
2298 		start = base;
2299 		end = base + (1 << ln2size) - 1;
2300 	}
2301 	resource_list_add(rl, type, reg, start, end, count);
2302 
2303 	/*
2304 	 * Not quite sure what to do on failure of allocating the resource
2305 	 * since I can postulate several right answers.
2306 	 */
2307 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2308 	    prefetch ? RF_PREFETCHABLE : 0);
2309 	if (res == NULL)
2310 		return (barlen);
2311 	start = rman_get_start(res);
2312 	if ((u_long)start != start) {
2313 		/* Wait a minute!  this platform can't do this address. */
2314 		device_printf(bus,
2315 		    "pci%d:%d.%d.%x bar %#x start %#jx, too many bits.",
2316 		    pci_get_domain(dev), b, s, f, reg, (uintmax_t)start);
2317 		resource_list_release(rl, bus, dev, type, reg, res);
2318 		return (barlen);
2319 	}
2320 	pci_write_config(dev, reg, start, 4);
2321 	if (ln2range == 64)
2322 		pci_write_config(dev, reg + 4, start >> 32, 4);
2323 	return (barlen);
2324 }
2325 
2326 /*
2327  * For ATA devices we need to decide early what addressing mode to use.
2328  * Legacy demands that the primary and secondary ATA ports sits on the
2329  * same addresses that old ISA hardware did. This dictates that we use
2330  * those addresses and ignore the BAR's if we cannot set PCI native
2331  * addressing mode.
2332  */
2333 static void
2334 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2335     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2336 {
2337 	int rid, type, progif;
2338 #if 0
2339 	/* if this device supports PCI native addressing use it */
2340 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2341 	if ((progif & 0x8a) == 0x8a) {
2342 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2343 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2344 			printf("Trying ATA native PCI addressing mode\n");
2345 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2346 		}
2347 	}
2348 #endif
2349 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2350 	type = SYS_RES_IOPORT;
2351 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2352 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2353 		    prefetchmask & (1 << 0));
2354 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2355 		    prefetchmask & (1 << 1));
2356 	} else {
2357 		rid = PCIR_BAR(0);
2358 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2359 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2360 		    0);
2361 		rid = PCIR_BAR(1);
2362 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2363 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2364 		    0);
2365 	}
2366 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2367 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2368 		    prefetchmask & (1 << 2));
2369 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2370 		    prefetchmask & (1 << 3));
2371 	} else {
2372 		rid = PCIR_BAR(2);
2373 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2374 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2375 		    0);
2376 		rid = PCIR_BAR(3);
2377 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2378 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2379 		    0);
2380 	}
2381 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2382 	    prefetchmask & (1 << 4));
2383 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2384 	    prefetchmask & (1 << 5));
2385 }
2386 
2387 static void
2388 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2389 {
2390 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2391 	pcicfgregs *cfg = &dinfo->cfg;
2392 	char tunable_name[64];
2393 	int irq;
2394 
2395 	/* Has to have an intpin to have an interrupt. */
2396 	if (cfg->intpin == 0)
2397 		return;
2398 
2399 	/* Let the user override the IRQ with a tunable. */
2400 	irq = PCI_INVALID_IRQ;
2401 	snprintf(tunable_name, sizeof(tunable_name),
2402 	    "hw.pci%d.%d.%d.INT%c.irq",
2403 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2404 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2405 		irq = PCI_INVALID_IRQ;
2406 
2407 	/*
2408 	 * If we didn't get an IRQ via the tunable, then we either use the
2409 	 * IRQ value in the intline register or we ask the bus to route an
2410 	 * interrupt for us.  If force_route is true, then we only use the
2411 	 * value in the intline register if the bus was unable to assign an
2412 	 * IRQ.
2413 	 */
2414 	if (!PCI_INTERRUPT_VALID(irq)) {
2415 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2416 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2417 		if (!PCI_INTERRUPT_VALID(irq))
2418 			irq = cfg->intline;
2419 	}
2420 
2421 	/* If after all that we don't have an IRQ, just bail. */
2422 	if (!PCI_INTERRUPT_VALID(irq))
2423 		return;
2424 
2425 	/* Update the config register if it changed. */
2426 	if (irq != cfg->intline) {
2427 		cfg->intline = irq;
2428 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2429 	}
2430 
2431 	/* Add this IRQ as rid 0 interrupt resource. */
2432 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2433 }
2434 
2435 void
2436 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2437 {
2438 	device_t pcib;
2439 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2440 	pcicfgregs *cfg = &dinfo->cfg;
2441 	struct resource_list *rl = &dinfo->resources;
2442 	struct pci_quirk *q;
2443 	int b, i, f, s;
2444 
2445 	pcib = device_get_parent(bus);
2446 
2447 	b = cfg->bus;
2448 	s = cfg->slot;
2449 	f = cfg->func;
2450 
2451 	/* ATA devices needs special map treatment */
2452 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2453 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2454 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2455 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2456 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2457 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2458 	else
2459 		for (i = 0; i < cfg->nummaps;)
2460 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2461 			    rl, force, prefetchmask & (1 << i));
2462 
2463 	/*
2464 	 * Add additional, quirked resources.
2465 	 */
2466 	for (q = &pci_quirks[0]; q->devid; q++) {
2467 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2468 		    && q->type == PCI_QUIRK_MAP_REG)
2469 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2470 			  force, 0);
2471 	}
2472 
2473 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2474 #ifdef __PCI_REROUTE_INTERRUPT
2475 		/*
2476 		 * Try to re-route interrupts. Sometimes the BIOS or
2477 		 * firmware may leave bogus values in these registers.
2478 		 * If the re-route fails, then just stick with what we
2479 		 * have.
2480 		 */
2481 		pci_assign_interrupt(bus, dev, 1);
2482 #else
2483 		pci_assign_interrupt(bus, dev, 0);
2484 #endif
2485 	}
2486 }
2487 
2488 void
2489 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2490 {
2491 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2492 	device_t pcib = device_get_parent(dev);
2493 	struct pci_devinfo *dinfo;
2494 	int maxslots;
2495 	int s, f, pcifunchigh;
2496 	uint8_t hdrtype;
2497 
2498 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2499 	    ("dinfo_size too small"));
2500 	maxslots = PCIB_MAXSLOTS(pcib);
2501 	for (s = 0; s <= maxslots; s++) {
2502 		pcifunchigh = 0;
2503 		f = 0;
2504 		DELAY(1);
2505 		hdrtype = REG(PCIR_HDRTYPE, 1);
2506 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2507 			continue;
2508 		if (hdrtype & PCIM_MFDEV)
2509 			pcifunchigh = PCI_FUNCMAX;
2510 		for (f = 0; f <= pcifunchigh; f++) {
2511 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2512 			    dinfo_size);
2513 			if (dinfo != NULL) {
2514 				pci_add_child(dev, dinfo);
2515 			}
2516 		}
2517 	}
2518 #undef REG
2519 }
2520 
2521 void
2522 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2523 {
2524 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2525 	device_set_ivars(dinfo->cfg.dev, dinfo);
2526 	resource_list_init(&dinfo->resources);
2527 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2528 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2529 	pci_print_verbose(dinfo);
2530 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2531 }
2532 
2533 static int
2534 pci_probe(device_t dev)
2535 {
2536 
2537 	device_set_desc(dev, "PCI bus");
2538 
2539 	/* Allow other subclasses to override this driver. */
2540 	return (-1000);
2541 }
2542 
2543 static int
2544 pci_attach(device_t dev)
2545 {
2546 	int busno, domain;
2547 
2548 	/*
2549 	 * Since there can be multiple independantly numbered PCI
2550 	 * busses on systems with multiple PCI domains, we can't use
2551 	 * the unit number to decide which bus we are probing. We ask
2552 	 * the parent pcib what our domain and bus numbers are.
2553 	 */
2554 	domain = pcib_get_domain(dev);
2555 	busno = pcib_get_bus(dev);
2556 	if (bootverbose)
2557 		device_printf(dev, "domain=%d, physical bus=%d\n",
2558 		    domain, busno);
2559 
2560 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2561 
2562 	return (bus_generic_attach(dev));
2563 }
2564 
2565 int
2566 pci_suspend(device_t dev)
2567 {
2568 	int dstate, error, i, numdevs;
2569 	device_t acpi_dev, child, *devlist;
2570 	struct pci_devinfo *dinfo;
2571 
2572 	/*
2573 	 * Save the PCI configuration space for each child and set the
2574 	 * device in the appropriate power state for this sleep state.
2575 	 */
2576 	acpi_dev = NULL;
2577 	if (pci_do_power_resume)
2578 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2579 	device_get_children(dev, &devlist, &numdevs);
2580 	for (i = 0; i < numdevs; i++) {
2581 		child = devlist[i];
2582 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2583 		pci_cfg_save(child, dinfo, 0);
2584 	}
2585 
2586 	/* Suspend devices before potentially powering them down. */
2587 	error = bus_generic_suspend(dev);
2588 	if (error) {
2589 		free(devlist, M_TEMP);
2590 		return (error);
2591 	}
2592 
2593 	/*
2594 	 * Always set the device to D3.  If ACPI suggests a different
2595 	 * power state, use it instead.  If ACPI is not present, the
2596 	 * firmware is responsible for managing device power.  Skip
2597 	 * children who aren't attached since they are powered down
2598 	 * separately.  Only manage type 0 devices for now.
2599 	 */
2600 	for (i = 0; acpi_dev && i < numdevs; i++) {
2601 		child = devlist[i];
2602 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2603 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2604 			dstate = PCI_POWERSTATE_D3;
2605 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2606 			pci_set_powerstate(child, dstate);
2607 		}
2608 	}
2609 	free(devlist, M_TEMP);
2610 	return (0);
2611 }
2612 
2613 int
2614 pci_resume(device_t dev)
2615 {
2616 	int i, numdevs;
2617 	device_t acpi_dev, child, *devlist;
2618 	struct pci_devinfo *dinfo;
2619 
2620 	/*
2621 	 * Set each child to D0 and restore its PCI configuration space.
2622 	 */
2623 	acpi_dev = NULL;
2624 	if (pci_do_power_resume)
2625 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2626 	device_get_children(dev, &devlist, &numdevs);
2627 	for (i = 0; i < numdevs; i++) {
2628 		/*
2629 		 * Notify ACPI we're going to D0 but ignore the result.  If
2630 		 * ACPI is not present, the firmware is responsible for
2631 		 * managing device power.  Only manage type 0 devices for now.
2632 		 */
2633 		child = devlist[i];
2634 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2635 		if (acpi_dev && device_is_attached(child) &&
2636 		    dinfo->cfg.hdrtype == 0) {
2637 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2638 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2639 		}
2640 
2641 		/* Now the device is powered up, restore its config space. */
2642 		pci_cfg_restore(child, dinfo);
2643 	}
2644 	free(devlist, M_TEMP);
2645 	return (bus_generic_resume(dev));
2646 }
2647 
2648 static void
2649 pci_load_vendor_data(void)
2650 {
2651 	caddr_t vendordata, info;
2652 
2653 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2654 		info = preload_search_info(vendordata, MODINFO_ADDR);
2655 		pci_vendordata = *(char **)info;
2656 		info = preload_search_info(vendordata, MODINFO_SIZE);
2657 		pci_vendordata_size = *(size_t *)info;
2658 		/* terminate the database */
2659 		pci_vendordata[pci_vendordata_size] = '\n';
2660 	}
2661 }
2662 
2663 void
2664 pci_driver_added(device_t dev, driver_t *driver)
2665 {
2666 	int numdevs;
2667 	device_t *devlist;
2668 	device_t child;
2669 	struct pci_devinfo *dinfo;
2670 	int i;
2671 
2672 	if (bootverbose)
2673 		device_printf(dev, "driver added\n");
2674 	DEVICE_IDENTIFY(driver, dev);
2675 	device_get_children(dev, &devlist, &numdevs);
2676 	for (i = 0; i < numdevs; i++) {
2677 		child = devlist[i];
2678 		if (device_get_state(child) != DS_NOTPRESENT)
2679 			continue;
2680 		dinfo = device_get_ivars(child);
2681 		pci_print_verbose(dinfo);
2682 		if (bootverbose)
2683 			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2684 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2685 			    dinfo->cfg.func);
2686 		pci_cfg_restore(child, dinfo);
2687 		if (device_probe_and_attach(child) != 0)
2688 			pci_cfg_save(child, dinfo, 1);
2689 	}
2690 	free(devlist, M_TEMP);
2691 }
2692 
2693 int
2694 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2695     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2696 {
2697 	struct pci_devinfo *dinfo;
2698 	struct msix_table_entry *mte;
2699 	struct msix_vector *mv;
2700 	uint64_t addr;
2701 	uint32_t data;
2702 	void *cookie;
2703 	int error, rid;
2704 
2705 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2706 	    arg, &cookie);
2707 	if (error)
2708 		return (error);
2709 
2710 	/*
2711 	 * If this is a direct child, check to see if the interrupt is
2712 	 * MSI or MSI-X.  If so, ask our parent to map the MSI and give
2713 	 * us the address and data register values.  If we fail for some
2714 	 * reason, teardown the interrupt handler.
2715 	 */
2716 	rid = rman_get_rid(irq);
2717 	if (device_get_parent(child) == dev && rid > 0) {
2718 		dinfo = device_get_ivars(child);
2719 		if (dinfo->cfg.msi.msi_alloc > 0) {
2720 			if (dinfo->cfg.msi.msi_addr == 0) {
2721 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2722 			    ("MSI has handlers, but vectors not mapped"));
2723 				error = PCIB_MAP_MSI(device_get_parent(dev),
2724 				    child, rman_get_start(irq), &addr, &data);
2725 				if (error)
2726 					goto bad;
2727 				dinfo->cfg.msi.msi_addr = addr;
2728 				dinfo->cfg.msi.msi_data = data;
2729 				pci_enable_msi(child, addr, data);
2730 			}
2731 			dinfo->cfg.msi.msi_handlers++;
2732 		} else {
2733 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2734 			    ("No MSI or MSI-X interrupts allocated"));
2735 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2736 			    ("MSI-X index too high"));
2737 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2738 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2739 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2740 			KASSERT(mv->mv_irq == rman_get_start(irq),
2741 			    ("IRQ mismatch"));
2742 			if (mv->mv_address == 0) {
2743 				KASSERT(mte->mte_handlers == 0,
2744 		    ("MSI-X table entry has handlers, but vector not mapped"));
2745 				error = PCIB_MAP_MSI(device_get_parent(dev),
2746 				    child, rman_get_start(irq), &addr, &data);
2747 				if (error)
2748 					goto bad;
2749 				mv->mv_address = addr;
2750 				mv->mv_data = data;
2751 			}
2752 			if (mte->mte_handlers == 0) {
2753 				pci_enable_msix(child, rid - 1, mv->mv_address,
2754 				    mv->mv_data);
2755 				pci_unmask_msix(child, rid - 1);
2756 			}
2757 			mte->mte_handlers++;
2758 		}
2759 	bad:
2760 		if (error) {
2761 			(void)bus_generic_teardown_intr(dev, child, irq,
2762 			    cookie);
2763 			return (error);
2764 		}
2765 	}
2766 	*cookiep = cookie;
2767 	return (0);
2768 }
2769 
2770 int
2771 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2772     void *cookie)
2773 {
2774 	struct msix_table_entry *mte;
2775 	struct resource_list_entry *rle;
2776 	struct pci_devinfo *dinfo;
2777 	int error, rid;
2778 
2779 	/*
2780 	 * If this is a direct child, check to see if the interrupt is
2781 	 * MSI or MSI-X.  If so, decrement the appropriate handlers
2782 	 * count and mask the MSI-X message, or disable MSI messages
2783 	 * if the count drops to 0.
2784 	 */
2785 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2786 		return (EINVAL);
2787 	rid = rman_get_rid(irq);
2788 	if (device_get_parent(child) == dev && rid > 0) {
2789 		dinfo = device_get_ivars(child);
2790 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2791 		if (rle->res != irq)
2792 			return (EINVAL);
2793 		if (dinfo->cfg.msi.msi_alloc > 0) {
2794 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2795 			    ("MSI-X index too high"));
2796 			if (dinfo->cfg.msi.msi_handlers == 0)
2797 				return (EINVAL);
2798 			dinfo->cfg.msi.msi_handlers--;
2799 			if (dinfo->cfg.msi.msi_handlers == 0)
2800 				pci_disable_msi(child);
2801 		} else {
2802 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2803 			    ("No MSI or MSI-X interrupts allocated"));
2804 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2805 			    ("MSI-X index too high"));
2806 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2807 			if (mte->mte_handlers == 0)
2808 				return (EINVAL);
2809 			mte->mte_handlers--;
2810 			if (mte->mte_handlers == 0)
2811 				pci_mask_msix(child, rid - 1);
2812 		}
2813 	}
2814 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2815 	if (device_get_parent(child) == dev && rid > 0)
2816 		KASSERT(error == 0,
2817 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2818 	return (error);
2819 }
2820 
2821 int
2822 pci_print_child(device_t dev, device_t child)
2823 {
2824 	struct pci_devinfo *dinfo;
2825 	struct resource_list *rl;
2826 	int retval = 0;
2827 
2828 	dinfo = device_get_ivars(child);
2829 	rl = &dinfo->resources;
2830 
2831 	retval += bus_print_child_header(dev, child);
2832 
2833 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2834 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2835 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2836 	if (device_get_flags(dev))
2837 		retval += printf(" flags %#x", device_get_flags(dev));
2838 
2839 	retval += printf(" at device %d.%d", pci_get_slot(child),
2840 	    pci_get_function(child));
2841 
2842 	retval += bus_print_child_footer(dev, child);
2843 
2844 	return (retval);
2845 }
2846 
2847 static struct
2848 {
2849 	int	class;
2850 	int	subclass;
2851 	char	*desc;
2852 } pci_nomatch_tab[] = {
2853 	{PCIC_OLD,		-1,			"old"},
2854 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2855 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2856 	{PCIC_STORAGE,		-1,			"mass storage"},
2857 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2858 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2859 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2860 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2861 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2862 	{PCIC_NETWORK,		-1,			"network"},
2863 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2864 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2865 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2866 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2867 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2868 	{PCIC_DISPLAY,		-1,			"display"},
2869 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2870 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2871 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2872 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2873 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2874 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2875 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2876 	{PCIC_MEMORY,		-1,			"memory"},
2877 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2878 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2879 	{PCIC_BRIDGE,		-1,			"bridge"},
2880 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2881 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2882 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2883 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2884 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2885 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2886 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2887 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2888 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2889 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2890 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2891 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2892 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2893 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2894 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2895 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2896 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2897 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2898 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2899 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2900 	{PCIC_INPUTDEV,		-1,			"input device"},
2901 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2902 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2903 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2904 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2905 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2906 	{PCIC_DOCKING,		-1,			"docking station"},
2907 	{PCIC_PROCESSOR,	-1,			"processor"},
2908 	{PCIC_SERIALBUS,	-1,			"serial bus"},
2909 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2910 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2911 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2912 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2913 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2914 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2915 	{PCIC_WIRELESS,		-1,			"wireless controller"},
2916 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2917 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2918 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2919 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2920 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2921 	{PCIC_SATCOM,		-1,			"satellite communication"},
2922 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2923 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2924 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2925 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2926 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2927 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2928 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2929 	{PCIC_DASP,		-1,			"dasp"},
2930 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2931 	{0, 0,		NULL}
2932 };
2933 
2934 void
2935 pci_probe_nomatch(device_t dev, device_t child)
2936 {
2937 	int	i;
2938 	char	*cp, *scp, *device;
2939 
2940 	/*
2941 	 * Look for a listing for this device in a loaded device database.
2942 	 */
2943 	if ((device = pci_describe_device(child)) != NULL) {
2944 		device_printf(dev, "<%s>", device);
2945 		free(device, M_DEVBUF);
2946 	} else {
2947 		/*
2948 		 * Scan the class/subclass descriptions for a general
2949 		 * description.
2950 		 */
2951 		cp = "unknown";
2952 		scp = NULL;
2953 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2954 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2955 				if (pci_nomatch_tab[i].subclass == -1) {
2956 					cp = pci_nomatch_tab[i].desc;
2957 				} else if (pci_nomatch_tab[i].subclass ==
2958 				    pci_get_subclass(child)) {
2959 					scp = pci_nomatch_tab[i].desc;
2960 				}
2961 			}
2962 		}
2963 		device_printf(dev, "<%s%s%s>",
2964 		    cp ? cp : "",
2965 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2966 		    scp ? scp : "");
2967 	}
2968 	printf(" at device %d.%d (no driver attached)\n",
2969 	    pci_get_slot(child), pci_get_function(child));
2970 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
2971 	return;
2972 }
2973 
2974 /*
2975  * Parse the PCI device database, if loaded, and return a pointer to a
2976  * description of the device.
2977  *
2978  * The database is flat text formatted as follows:
2979  *
2980  * Any line not in a valid format is ignored.
2981  * Lines are terminated with newline '\n' characters.
2982  *
2983  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2984  * the vendor name.
2985  *
2986  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2987  * - devices cannot be listed without a corresponding VENDOR line.
2988  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2989  * another TAB, then the device name.
2990  */
2991 
2992 /*
2993  * Assuming (ptr) points to the beginning of a line in the database,
2994  * return the vendor or device and description of the next entry.
2995  * The value of (vendor) or (device) inappropriate for the entry type
2996  * is set to -1.  Returns nonzero at the end of the database.
2997  *
2998  * Note that this is slightly unrobust in the face of corrupt data;
2999  * we attempt to safeguard against this by spamming the end of the
3000  * database with a newline when we initialise.
3001  */
3002 static int
3003 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3004 {
3005 	char	*cp = *ptr;
3006 	int	left;
3007 
3008 	*device = -1;
3009 	*vendor = -1;
3010 	**desc = '\0';
3011 	for (;;) {
3012 		left = pci_vendordata_size - (cp - pci_vendordata);
3013 		if (left <= 0) {
3014 			*ptr = cp;
3015 			return(1);
3016 		}
3017 
3018 		/* vendor entry? */
3019 		if (*cp != '\t' &&
3020 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3021 			break;
3022 		/* device entry? */
3023 		if (*cp == '\t' &&
3024 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3025 			break;
3026 
3027 		/* skip to next line */
3028 		while (*cp != '\n' && left > 0) {
3029 			cp++;
3030 			left--;
3031 		}
3032 		if (*cp == '\n') {
3033 			cp++;
3034 			left--;
3035 		}
3036 	}
3037 	/* skip to next line */
3038 	while (*cp != '\n' && left > 0) {
3039 		cp++;
3040 		left--;
3041 	}
3042 	if (*cp == '\n' && left > 0)
3043 		cp++;
3044 	*ptr = cp;
3045 	return(0);
3046 }
3047 
3048 static char *
3049 pci_describe_device(device_t dev)
3050 {
3051 	int	vendor, device;
3052 	char	*desc, *vp, *dp, *line;
3053 
3054 	desc = vp = dp = NULL;
3055 
3056 	/*
3057 	 * If we have no vendor data, we can't do anything.
3058 	 */
3059 	if (pci_vendordata == NULL)
3060 		goto out;
3061 
3062 	/*
3063 	 * Scan the vendor data looking for this device
3064 	 */
3065 	line = pci_vendordata;
3066 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3067 		goto out;
3068 	for (;;) {
3069 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3070 			goto out;
3071 		if (vendor == pci_get_vendor(dev))
3072 			break;
3073 	}
3074 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3075 		goto out;
3076 	for (;;) {
3077 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3078 			*dp = 0;
3079 			break;
3080 		}
3081 		if (vendor != -1) {
3082 			*dp = 0;
3083 			break;
3084 		}
3085 		if (device == pci_get_device(dev))
3086 			break;
3087 	}
3088 	if (dp[0] == '\0')
3089 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3090 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3091 	    NULL)
3092 		sprintf(desc, "%s, %s", vp, dp);
3093  out:
3094 	if (vp != NULL)
3095 		free(vp, M_DEVBUF);
3096 	if (dp != NULL)
3097 		free(dp, M_DEVBUF);
3098 	return(desc);
3099 }
3100 
3101 int
3102 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3103 {
3104 	struct pci_devinfo *dinfo;
3105 	pcicfgregs *cfg;
3106 
3107 	dinfo = device_get_ivars(child);
3108 	cfg = &dinfo->cfg;
3109 
3110 	switch (which) {
3111 	case PCI_IVAR_ETHADDR:
3112 		/*
3113 		 * The generic accessor doesn't deal with failure, so
3114 		 * we set the return value, then return an error.
3115 		 */
3116 		*((uint8_t **) result) = NULL;
3117 		return (EINVAL);
3118 	case PCI_IVAR_SUBVENDOR:
3119 		*result = cfg->subvendor;
3120 		break;
3121 	case PCI_IVAR_SUBDEVICE:
3122 		*result = cfg->subdevice;
3123 		break;
3124 	case PCI_IVAR_VENDOR:
3125 		*result = cfg->vendor;
3126 		break;
3127 	case PCI_IVAR_DEVICE:
3128 		*result = cfg->device;
3129 		break;
3130 	case PCI_IVAR_DEVID:
3131 		*result = (cfg->device << 16) | cfg->vendor;
3132 		break;
3133 	case PCI_IVAR_CLASS:
3134 		*result = cfg->baseclass;
3135 		break;
3136 	case PCI_IVAR_SUBCLASS:
3137 		*result = cfg->subclass;
3138 		break;
3139 	case PCI_IVAR_PROGIF:
3140 		*result = cfg->progif;
3141 		break;
3142 	case PCI_IVAR_REVID:
3143 		*result = cfg->revid;
3144 		break;
3145 	case PCI_IVAR_INTPIN:
3146 		*result = cfg->intpin;
3147 		break;
3148 	case PCI_IVAR_IRQ:
3149 		*result = cfg->intline;
3150 		break;
3151 	case PCI_IVAR_DOMAIN:
3152 		*result = cfg->domain;
3153 		break;
3154 	case PCI_IVAR_BUS:
3155 		*result = cfg->bus;
3156 		break;
3157 	case PCI_IVAR_SLOT:
3158 		*result = cfg->slot;
3159 		break;
3160 	case PCI_IVAR_FUNCTION:
3161 		*result = cfg->func;
3162 		break;
3163 	case PCI_IVAR_CMDREG:
3164 		*result = cfg->cmdreg;
3165 		break;
3166 	case PCI_IVAR_CACHELNSZ:
3167 		*result = cfg->cachelnsz;
3168 		break;
3169 	case PCI_IVAR_MINGNT:
3170 		*result = cfg->mingnt;
3171 		break;
3172 	case PCI_IVAR_MAXLAT:
3173 		*result = cfg->maxlat;
3174 		break;
3175 	case PCI_IVAR_LATTIMER:
3176 		*result = cfg->lattimer;
3177 		break;
3178 	default:
3179 		return (ENOENT);
3180 	}
3181 	return (0);
3182 }
3183 
3184 int
3185 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3186 {
3187 	struct pci_devinfo *dinfo;
3188 
3189 	dinfo = device_get_ivars(child);
3190 
3191 	switch (which) {
3192 	case PCI_IVAR_INTPIN:
3193 		dinfo->cfg.intpin = value;
3194 		return (0);
3195 	case PCI_IVAR_ETHADDR:
3196 	case PCI_IVAR_SUBVENDOR:
3197 	case PCI_IVAR_SUBDEVICE:
3198 	case PCI_IVAR_VENDOR:
3199 	case PCI_IVAR_DEVICE:
3200 	case PCI_IVAR_DEVID:
3201 	case PCI_IVAR_CLASS:
3202 	case PCI_IVAR_SUBCLASS:
3203 	case PCI_IVAR_PROGIF:
3204 	case PCI_IVAR_REVID:
3205 	case PCI_IVAR_IRQ:
3206 	case PCI_IVAR_DOMAIN:
3207 	case PCI_IVAR_BUS:
3208 	case PCI_IVAR_SLOT:
3209 	case PCI_IVAR_FUNCTION:
3210 		return (EINVAL);	/* disallow for now */
3211 
3212 	default:
3213 		return (ENOENT);
3214 	}
3215 }
3216 
3217 
3218 #include "opt_ddb.h"
3219 #ifdef DDB
3220 #include <ddb/ddb.h>
3221 #include <sys/cons.h>
3222 
3223 /*
3224  * List resources based on pci map registers, used for within ddb
3225  */
3226 
3227 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3228 {
3229 	struct pci_devinfo *dinfo;
3230 	struct devlist *devlist_head;
3231 	struct pci_conf *p;
3232 	const char *name;
3233 	int i, error, none_count;
3234 
3235 	none_count = 0;
3236 	/* get the head of the device queue */
3237 	devlist_head = &pci_devq;
3238 
3239 	/*
3240 	 * Go through the list of devices and print out devices
3241 	 */
3242 	for (error = 0, i = 0,
3243 	     dinfo = STAILQ_FIRST(devlist_head);
3244 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3245 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3246 
3247 		/* Populate pd_name and pd_unit */
3248 		name = NULL;
3249 		if (dinfo->cfg.dev)
3250 			name = device_get_name(dinfo->cfg.dev);
3251 
3252 		p = &dinfo->conf;
3253 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3254 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3255 			(name && *name) ? name : "none",
3256 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3257 			none_count++,
3258 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3259 			p->pc_sel.pc_func, (p->pc_class << 16) |
3260 			(p->pc_subclass << 8) | p->pc_progif,
3261 			(p->pc_subdevice << 16) | p->pc_subvendor,
3262 			(p->pc_device << 16) | p->pc_vendor,
3263 			p->pc_revid, p->pc_hdr);
3264 	}
3265 }
3266 #endif /* DDB */
3267 
3268 static struct resource *
3269 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3270     u_long start, u_long end, u_long count, u_int flags)
3271 {
3272 	struct pci_devinfo *dinfo = device_get_ivars(child);
3273 	struct resource_list *rl = &dinfo->resources;
3274 	struct resource_list_entry *rle;
3275 	struct resource *res;
3276 	pci_addr_t map, testval;
3277 	int mapsize;
3278 
3279 	/*
3280 	 * Weed out the bogons, and figure out how large the BAR/map
3281 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3282 	 * Note: atapci in legacy mode are special and handled elsewhere
3283 	 * in the code.  If you have a atapci device in legacy mode and
3284 	 * it fails here, that other code is broken.
3285 	 */
3286 	res = NULL;
3287 	map = pci_read_config(child, *rid, 4);
3288 	pci_write_config(child, *rid, 0xffffffff, 4);
3289 	testval = pci_read_config(child, *rid, 4);
3290 	if (pci_maprange(testval) == 64)
3291 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3292 	if (pci_mapbase(testval) == 0)
3293 		goto out;
3294 
3295 	/*
3296 	 * Restore the original value of the BAR.  We may have reprogrammed
3297 	 * the BAR of the low-level console device and when booting verbose,
3298 	 * we need the console device addressable.
3299 	 */
3300 	pci_write_config(child, *rid, map, 4);
3301 
3302 	if (PCI_BAR_MEM(testval)) {
3303 		if (type != SYS_RES_MEMORY) {
3304 			if (bootverbose)
3305 				device_printf(dev,
3306 				    "child %s requested type %d for rid %#x,"
3307 				    " but the BAR says it is an memio\n",
3308 				    device_get_nameunit(child), type, *rid);
3309 			goto out;
3310 		}
3311 	} else {
3312 		if (type != SYS_RES_IOPORT) {
3313 			if (bootverbose)
3314 				device_printf(dev,
3315 				    "child %s requested type %d for rid %#x,"
3316 				    " but the BAR says it is an ioport\n",
3317 				    device_get_nameunit(child), type, *rid);
3318 			goto out;
3319 		}
3320 	}
3321 	/*
3322 	 * For real BARs, we need to override the size that
3323 	 * the driver requests, because that's what the BAR
3324 	 * actually uses and we would otherwise have a
3325 	 * situation where we might allocate the excess to
3326 	 * another driver, which won't work.
3327 	 */
3328 	mapsize = pci_mapsize(testval);
3329 	count = 1UL << mapsize;
3330 	if (RF_ALIGNMENT(flags) < mapsize)
3331 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3332 
3333 	/*
3334 	 * Allocate enough resource, and then write back the
3335 	 * appropriate bar for that resource.
3336 	 */
3337 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3338 	    start, end, count, flags);
3339 	if (res == NULL) {
3340 		device_printf(child,
3341 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3342 		    count, *rid, type, start, end);
3343 		goto out;
3344 	}
3345 	resource_list_add(rl, type, *rid, start, end, count);
3346 	rle = resource_list_find(rl, type, *rid);
3347 	if (rle == NULL)
3348 		panic("pci_alloc_map: unexpectedly can't find resource.");
3349 	rle->res = res;
3350 	rle->start = rman_get_start(res);
3351 	rle->end = rman_get_end(res);
3352 	rle->count = count;
3353 	if (bootverbose)
3354 		device_printf(child,
3355 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3356 		    count, *rid, type, rman_get_start(res));
3357 	map = rman_get_start(res);
3358 out:;
3359 	pci_write_config(child, *rid, map, 4);
3360 	if (pci_maprange(testval) == 64)
3361 		pci_write_config(child, *rid + 4, map >> 32, 4);
3362 	return (res);
3363 }
3364 
3365 
3366 struct resource *
3367 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3368 		   u_long start, u_long end, u_long count, u_int flags)
3369 {
3370 	struct pci_devinfo *dinfo = device_get_ivars(child);
3371 	struct resource_list *rl = &dinfo->resources;
3372 	struct resource_list_entry *rle;
3373 	pcicfgregs *cfg = &dinfo->cfg;
3374 
3375 	/*
3376 	 * Perform lazy resource allocation
3377 	 */
3378 	if (device_get_parent(child) == dev) {
3379 		switch (type) {
3380 		case SYS_RES_IRQ:
3381 			/*
3382 			 * Can't alloc legacy interrupt once MSI messages
3383 			 * have been allocated.
3384 			 */
3385 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3386 			    cfg->msix.msix_alloc > 0))
3387 				return (NULL);
3388 			/*
3389 			 * If the child device doesn't have an
3390 			 * interrupt routed and is deserving of an
3391 			 * interrupt, try to assign it one.
3392 			 */
3393 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3394 			    (cfg->intpin != 0))
3395 				pci_assign_interrupt(dev, child, 0);
3396 			break;
3397 		case SYS_RES_IOPORT:
3398 		case SYS_RES_MEMORY:
3399 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3400 				/*
3401 				 * Enable the I/O mode.  We should
3402 				 * also be assigning resources too
3403 				 * when none are present.  The
3404 				 * resource_list_alloc kind of sorta does
3405 				 * this...
3406 				 */
3407 				if (PCI_ENABLE_IO(dev, child, type))
3408 					return (NULL);
3409 			}
3410 			rle = resource_list_find(rl, type, *rid);
3411 			if (rle == NULL)
3412 				return (pci_alloc_map(dev, child, type, rid,
3413 				    start, end, count, flags));
3414 			break;
3415 		}
3416 		/*
3417 		 * If we've already allocated the resource, then
3418 		 * return it now.  But first we may need to activate
3419 		 * it, since we don't allocate the resource as active
3420 		 * above.  Normally this would be done down in the
3421 		 * nexus, but since we short-circuit that path we have
3422 		 * to do its job here.  Not sure if we should free the
3423 		 * resource if it fails to activate.
3424 		 */
3425 		rle = resource_list_find(rl, type, *rid);
3426 		if (rle != NULL && rle->res != NULL) {
3427 			if (bootverbose)
3428 				device_printf(child,
3429 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3430 				    rman_get_size(rle->res), *rid, type,
3431 				    rman_get_start(rle->res));
3432 			if ((flags & RF_ACTIVE) &&
3433 			    bus_generic_activate_resource(dev, child, type,
3434 			    *rid, rle->res) != 0)
3435 				return (NULL);
3436 			return (rle->res);
3437 		}
3438 	}
3439 	return (resource_list_alloc(rl, dev, child, type, rid,
3440 	    start, end, count, flags));
3441 }
3442 
3443 void
3444 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3445 {
3446 	struct pci_devinfo *dinfo;
3447 	struct resource_list *rl;
3448 	struct resource_list_entry *rle;
3449 
3450 	if (device_get_parent(child) != dev)
3451 		return;
3452 
3453 	dinfo = device_get_ivars(child);
3454 	rl = &dinfo->resources;
3455 	rle = resource_list_find(rl, type, rid);
3456 	if (rle) {
3457 		if (rle->res) {
3458 			if (rman_get_device(rle->res) != dev ||
3459 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3460 				device_printf(dev, "delete_resource: "
3461 				    "Resource still owned by child, oops. "
3462 				    "(type=%d, rid=%d, addr=%lx)\n",
3463 				    rle->type, rle->rid,
3464 				    rman_get_start(rle->res));
3465 				return;
3466 			}
3467 			bus_release_resource(dev, type, rid, rle->res);
3468 		}
3469 		resource_list_delete(rl, type, rid);
3470 	}
3471 	/*
3472 	 * Why do we turn off the PCI configuration BAR when we delete a
3473 	 * resource? -- imp
3474 	 */
3475 	pci_write_config(child, rid, 0, 4);
3476 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3477 }
3478 
3479 struct resource_list *
3480 pci_get_resource_list (device_t dev, device_t child)
3481 {
3482 	struct pci_devinfo *dinfo = device_get_ivars(child);
3483 
3484 	return (&dinfo->resources);
3485 }
3486 
3487 uint32_t
3488 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3489 {
3490 	struct pci_devinfo *dinfo = device_get_ivars(child);
3491 	pcicfgregs *cfg = &dinfo->cfg;
3492 
3493 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3494 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3495 }
3496 
3497 void
3498 pci_write_config_method(device_t dev, device_t child, int reg,
3499     uint32_t val, int width)
3500 {
3501 	struct pci_devinfo *dinfo = device_get_ivars(child);
3502 	pcicfgregs *cfg = &dinfo->cfg;
3503 
3504 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3505 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3506 }
3507 
3508 int
3509 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3510     size_t buflen)
3511 {
3512 
3513 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3514 	    pci_get_function(child));
3515 	return (0);
3516 }
3517 
3518 int
3519 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3520     size_t buflen)
3521 {
3522 	struct pci_devinfo *dinfo;
3523 	pcicfgregs *cfg;
3524 
3525 	dinfo = device_get_ivars(child);
3526 	cfg = &dinfo->cfg;
3527 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3528 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3529 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3530 	    cfg->progif);
3531 	return (0);
3532 }
3533 
3534 int
3535 pci_assign_interrupt_method(device_t dev, device_t child)
3536 {
3537 	struct pci_devinfo *dinfo = device_get_ivars(child);
3538 	pcicfgregs *cfg = &dinfo->cfg;
3539 
3540 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3541 	    cfg->intpin));
3542 }
3543 
3544 static int
3545 pci_modevent(module_t mod, int what, void *arg)
3546 {
3547 	static struct cdev *pci_cdev;
3548 
3549 	switch (what) {
3550 	case MOD_LOAD:
3551 		STAILQ_INIT(&pci_devq);
3552 		pci_generation = 0;
3553 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3554 		    "pci");
3555 		pci_load_vendor_data();
3556 		break;
3557 
3558 	case MOD_UNLOAD:
3559 		destroy_dev(pci_cdev);
3560 		break;
3561 	}
3562 
3563 	return (0);
3564 }
3565 
3566 void
3567 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3568 {
3569 	int i;
3570 
3571 	/*
3572 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3573 	 * which we know need special treatment.  Type 2 devices are
3574 	 * cardbus bridges which also require special treatment.
3575 	 * Other types are unknown, and we err on the side of safety
3576 	 * by ignoring them.
3577 	 */
3578 	if (dinfo->cfg.hdrtype != 0)
3579 		return;
3580 
3581 	/*
3582 	 * Restore the device to full power mode.  We must do this
3583 	 * before we restore the registers because moving from D3 to
3584 	 * D0 will cause the chip's BARs and some other registers to
3585 	 * be reset to some unknown power on reset values.  Cut down
3586 	 * the noise on boot by doing nothing if we are already in
3587 	 * state D0.
3588 	 */
3589 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3590 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3591 	}
3592 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3593 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3594 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3595 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3596 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3597 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3598 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3599 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3600 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3601 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3602 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3603 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3604 
3605 	/* Restore MSI and MSI-X configurations if they are present. */
3606 	if (dinfo->cfg.msi.msi_location != 0)
3607 		pci_resume_msi(dev);
3608 	if (dinfo->cfg.msix.msix_location != 0)
3609 		pci_resume_msix(dev);
3610 }
3611 
3612 void
3613 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3614 {
3615 	int i;
3616 	uint32_t cls;
3617 	int ps;
3618 
3619 	/*
3620 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3621 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3622 	 * which also require special treatment.  Other types are unknown, and
3623 	 * we err on the side of safety by ignoring them.  Powering down
3624 	 * bridges should not be undertaken lightly.
3625 	 */
3626 	if (dinfo->cfg.hdrtype != 0)
3627 		return;
3628 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3629 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3630 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3631 
3632 	/*
3633 	 * Some drivers apparently write to these registers w/o updating our
3634 	 * cached copy.  No harm happens if we update the copy, so do so here
3635 	 * so we can restore them.  The COMMAND register is modified by the
3636 	 * bus w/o updating the cache.  This should represent the normally
3637 	 * writable portion of the 'defined' part of type 0 headers.  In
3638 	 * theory we also need to save/restore the PCI capability structures
3639 	 * we know about, but apart from power we don't know any that are
3640 	 * writable.
3641 	 */
3642 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3643 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3644 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3645 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3646 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3647 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3648 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3649 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3650 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3651 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3652 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3653 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3654 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3655 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3656 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3657 
3658 	/*
3659 	 * don't set the state for display devices, base peripherals and
3660 	 * memory devices since bad things happen when they are powered down.
3661 	 * We should (a) have drivers that can easily detach and (b) use
3662 	 * generic drivers for these devices so that some device actually
3663 	 * attaches.  We need to make sure that when we implement (a) we don't
3664 	 * power the device down on a reattach.
3665 	 */
3666 	cls = pci_get_class(dev);
3667 	if (!setstate)
3668 		return;
3669 	switch (pci_do_power_nodriver)
3670 	{
3671 		case 0:		/* NO powerdown at all */
3672 			return;
3673 		case 1:		/* Conservative about what to power down */
3674 			if (cls == PCIC_STORAGE)
3675 				return;
3676 			/*FALLTHROUGH*/
3677 		case 2:		/* Agressive about what to power down */
3678 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3679 			    cls == PCIC_BASEPERIPH)
3680 				return;
3681 			/*FALLTHROUGH*/
3682 		case 3:		/* Power down everything */
3683 			break;
3684 	}
3685 	/*
3686 	 * PCI spec says we can only go into D3 state from D0 state.
3687 	 * Transition from D[12] into D0 before going to D3 state.
3688 	 */
3689 	ps = pci_get_powerstate(dev);
3690 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3691 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3692 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3693 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3694 }
3695