xref: /freebsd/sys/dev/pci/pci.c (revision b28624fde638caadd4a89f50c9b7e7da0f98c4d2)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static const char	*pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg);
99 #if 0
100 static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 
115 static device_method_t pci_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_probe,		pci_probe),
118 	DEVMETHOD(device_attach,	pci_attach),
119 	DEVMETHOD(device_detach,	bus_generic_detach),
120 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121 	DEVMETHOD(device_suspend,	pci_suspend),
122 	DEVMETHOD(device_resume,	pci_resume),
123 
124 	/* Bus interface */
125 	DEVMETHOD(bus_print_child,	pci_print_child),
126 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129 	DEVMETHOD(bus_driver_added,	pci_driver_added),
130 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132 
133 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
140 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143 
144 	/* PCI interface */
145 	DEVMETHOD(pci_read_config,	pci_read_config_method),
146 	DEVMETHOD(pci_write_config,	pci_write_config_method),
147 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163 
164 	{ 0, 0 }
165 };
166 
167 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168 
169 static devclass_t pci_devclass;
170 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171 MODULE_VERSION(pci, 1);
172 
173 static char	*pci_vendordata;
174 static size_t	pci_vendordata_size;
175 
176 
177 struct pci_quirk {
178 	uint32_t devid;	/* Vendor/device of the card */
179 	int	type;
180 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182 	int	arg1;
183 	int	arg2;
184 };
185 
186 struct pci_quirk pci_quirks[] = {
187 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 
193 	/*
194 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196 	 */
197 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 
200 	/*
201 	 * MSI doesn't work on earlier Intel chipsets including
202 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203 	 */
204 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 
212 	/*
213 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214 	 * bridge.
215 	 */
216 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	{ 0 }
219 };
220 
221 /* map register information */
222 #define	PCI_MAPMEM	0x01	/* memory map */
223 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224 #define	PCI_MAPPORT	0x04	/* port map */
225 
226 struct devlist pci_devq;
227 uint32_t pci_generation;
228 uint32_t pci_numdevs = 0;
229 static int pcie_chipset, pcix_chipset;
230 
231 /* sysctl vars */
232 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233 
234 static int pci_enable_io_modes = 1;
235 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237     &pci_enable_io_modes, 1,
238     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239 enable these bits correctly.  We'd like to do this all the time, but there\n\
240 are some peripherals that this causes problems with.");
241 
242 static int pci_do_power_nodriver = 0;
243 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245     &pci_do_power_nodriver, 0,
246   "Place a function into D3 state when no driver attaches to it.  0 means\n\
247 disable.  1 means conservatively place devices into D3 state.  2 means\n\
248 agressively place devices into D3 state.  3 means put absolutely everything\n\
249 in D3 state.");
250 
251 static int pci_do_power_resume = 1;
252 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254     &pci_do_power_resume, 1,
255   "Transition from D3 -> D0 on resume.");
256 
257 static int pci_do_vpd = 1;
258 TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
259 SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
260     "Enable support for VPD.");
261 
262 static int pci_do_msi = 1;
263 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
264 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
265     "Enable support for MSI interrupts");
266 
267 static int pci_do_msix = 1;
268 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
269 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
270     "Enable support for MSI-X interrupts");
271 
272 static int pci_honor_msi_blacklist = 1;
273 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
274 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
275     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
276 
277 /* Find a device_t by bus/slot/function */
278 
279 device_t
280 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
281 {
282 	struct pci_devinfo *dinfo;
283 
284 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
285 		if ((dinfo->cfg.bus == bus) &&
286 		    (dinfo->cfg.slot == slot) &&
287 		    (dinfo->cfg.func == func)) {
288 			return (dinfo->cfg.dev);
289 		}
290 	}
291 
292 	return (NULL);
293 }
294 
295 /* Find a device_t by vendor/device ID */
296 
297 device_t
298 pci_find_device(uint16_t vendor, uint16_t device)
299 {
300 	struct pci_devinfo *dinfo;
301 
302 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
303 		if ((dinfo->cfg.vendor == vendor) &&
304 		    (dinfo->cfg.device == device)) {
305 			return (dinfo->cfg.dev);
306 		}
307 	}
308 
309 	return (NULL);
310 }
311 
312 /* return base address of memory or port map */
313 
314 static uint32_t
315 pci_mapbase(uint32_t mapreg)
316 {
317 
318 	if (PCI_BAR_MEM(mapreg))
319 		return (mapreg & PCIM_BAR_MEM_BASE);
320 	else
321 		return (mapreg & PCIM_BAR_IO_BASE);
322 }
323 
324 /* return map type of memory or port map */
325 
326 static const char *
327 pci_maptype(unsigned mapreg)
328 {
329 
330 	if (PCI_BAR_IO(mapreg))
331 		return ("I/O Port");
332 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
333 		return ("Prefetchable Memory");
334 	return ("Memory");
335 }
336 
337 /* return log2 of map size decoded for memory or port map */
338 
339 static int
340 pci_mapsize(uint32_t testval)
341 {
342 	int ln2size;
343 
344 	testval = pci_mapbase(testval);
345 	ln2size = 0;
346 	if (testval != 0) {
347 		while ((testval & 1) == 0)
348 		{
349 			ln2size++;
350 			testval >>= 1;
351 		}
352 	}
353 	return (ln2size);
354 }
355 
356 /* return log2 of address range supported by map register */
357 
358 static int
359 pci_maprange(unsigned mapreg)
360 {
361 	int ln2range = 0;
362 
363 	if (PCI_BAR_IO(mapreg))
364 		ln2range = 32;
365 	else
366 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
367 		case PCIM_BAR_MEM_32:
368 			ln2range = 32;
369 			break;
370 		case PCIM_BAR_MEM_1MB:
371 			ln2range = 20;
372 			break;
373 		case PCIM_BAR_MEM_64:
374 			ln2range = 64;
375 			break;
376 		}
377 	return (ln2range);
378 }
379 
380 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
381 
382 static void
383 pci_fixancient(pcicfgregs *cfg)
384 {
385 	if (cfg->hdrtype != 0)
386 		return;
387 
388 	/* PCI to PCI bridges use header type 1 */
389 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
390 		cfg->hdrtype = 1;
391 }
392 
393 /* extract header type specific config data */
394 
395 static void
396 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
397 {
398 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
399 	switch (cfg->hdrtype) {
400 	case 0:
401 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
402 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
403 		cfg->nummaps	    = PCI_MAXMAPS_0;
404 		break;
405 	case 1:
406 		cfg->nummaps	    = PCI_MAXMAPS_1;
407 		break;
408 	case 2:
409 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
410 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
411 		cfg->nummaps	    = PCI_MAXMAPS_2;
412 		break;
413 	}
414 #undef REG
415 }
416 
417 /* read configuration header into pcicfgregs structure */
418 struct pci_devinfo *
419 pci_read_device(device_t pcib, int b, int s, int f, size_t size)
420 {
421 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
422 	pcicfgregs *cfg = NULL;
423 	struct pci_devinfo *devlist_entry;
424 	struct devlist *devlist_head;
425 
426 	devlist_head = &pci_devq;
427 
428 	devlist_entry = NULL;
429 
430 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
431 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
432 		if (devlist_entry == NULL)
433 			return (NULL);
434 
435 		cfg = &devlist_entry->cfg;
436 
437 		cfg->bus		= b;
438 		cfg->slot		= s;
439 		cfg->func		= f;
440 		cfg->vendor		= REG(PCIR_VENDOR, 2);
441 		cfg->device		= REG(PCIR_DEVICE, 2);
442 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
443 		cfg->statreg		= REG(PCIR_STATUS, 2);
444 		cfg->baseclass		= REG(PCIR_CLASS, 1);
445 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
446 		cfg->progif		= REG(PCIR_PROGIF, 1);
447 		cfg->revid		= REG(PCIR_REVID, 1);
448 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
449 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
450 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
451 		cfg->intpin		= REG(PCIR_INTPIN, 1);
452 		cfg->intline		= REG(PCIR_INTLINE, 1);
453 
454 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
455 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
456 
457 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
458 		cfg->hdrtype		&= ~PCIM_MFDEV;
459 
460 		pci_fixancient(cfg);
461 		pci_hdrtypedata(pcib, b, s, f, cfg);
462 
463 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
464 			pci_read_extcap(pcib, cfg);
465 
466 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
467 
468 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
469 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
470 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
471 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
472 
473 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
474 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
475 		devlist_entry->conf.pc_vendor = cfg->vendor;
476 		devlist_entry->conf.pc_device = cfg->device;
477 
478 		devlist_entry->conf.pc_class = cfg->baseclass;
479 		devlist_entry->conf.pc_subclass = cfg->subclass;
480 		devlist_entry->conf.pc_progif = cfg->progif;
481 		devlist_entry->conf.pc_revid = cfg->revid;
482 
483 		pci_numdevs++;
484 		pci_generation++;
485 	}
486 	return (devlist_entry);
487 #undef REG
488 }
489 
490 static void
491 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
492 {
493 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
494 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
495 #if defined(__i386__) || defined(__amd64__)
496 	uint64_t addr;
497 #endif
498 	uint32_t val;
499 	int	ptr, nextptr, ptrptr;
500 
501 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
502 	case 0:
503 	case 1:
504 		ptrptr = PCIR_CAP_PTR;
505 		break;
506 	case 2:
507 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
508 		break;
509 	default:
510 		return;		/* no extended capabilities support */
511 	}
512 	nextptr = REG(ptrptr, 1);	/* sanity check? */
513 
514 	/*
515 	 * Read capability entries.
516 	 */
517 	while (nextptr != 0) {
518 		/* Sanity check */
519 		if (nextptr > 255) {
520 			printf("illegal PCI extended capability offset %d\n",
521 			    nextptr);
522 			return;
523 		}
524 		/* Find the next entry */
525 		ptr = nextptr;
526 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
527 
528 		/* Process this entry */
529 		switch (REG(ptr + PCICAP_ID, 1)) {
530 		case PCIY_PMG:		/* PCI power management */
531 			if (cfg->pp.pp_cap == 0) {
532 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
533 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
534 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
535 				if ((nextptr - ptr) > PCIR_POWER_DATA)
536 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
537 			}
538 			break;
539 #if defined(__i386__) || defined(__amd64__)
540 		case PCIY_HT:		/* HyperTransport */
541 			/* Determine HT-specific capability type. */
542 			val = REG(ptr + PCIR_HT_COMMAND, 2);
543 			switch (val & PCIM_HTCMD_CAP_MASK) {
544 			case PCIM_HTCAP_MSI_MAPPING:
545 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
546 					/* Sanity check the mapping window. */
547 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
548 					    4);
549 					addr <<= 32;
550 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
551 					    4);
552 					if (addr != MSI_INTEL_ADDR_BASE)
553 						device_printf(pcib,
554 		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
555 						    cfg->bus, cfg->slot,
556 						    cfg->func, (long long)addr);
557 				}
558 
559 				/* Enable MSI -> HT mapping. */
560 				val |= PCIM_HTCMD_MSI_ENABLE;
561 				WREG(ptr + PCIR_HT_COMMAND, val, 2);
562 				break;
563 			}
564 			break;
565 #endif
566 		case PCIY_MSI:		/* PCI MSI */
567 			cfg->msi.msi_location = ptr;
568 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
569 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
570 						     PCIM_MSICTRL_MMC_MASK)>>1);
571 			break;
572 		case PCIY_MSIX:		/* PCI MSI-X */
573 			cfg->msix.msix_location = ptr;
574 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
575 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
576 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
577 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
578 			cfg->msix.msix_table_bar = PCIR_BAR(val &
579 			    PCIM_MSIX_BIR_MASK);
580 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
581 			val = REG(ptr + PCIR_MSIX_PBA, 4);
582 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
583 			    PCIM_MSIX_BIR_MASK);
584 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
585 			break;
586 		case PCIY_VPD:		/* PCI Vital Product Data */
587 			cfg->vpd.vpd_reg = ptr;
588 			break;
589 		case PCIY_SUBVENDOR:
590 			/* Should always be true. */
591 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
592 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
593 				cfg->subvendor = val & 0xffff;
594 				cfg->subdevice = val >> 16;
595 			}
596 			break;
597 		case PCIY_PCIX:		/* PCI-X */
598 			/*
599 			 * Assume we have a PCI-X chipset if we have
600 			 * at least one PCI-PCI bridge with a PCI-X
601 			 * capability.  Note that some systems with
602 			 * PCI-express or HT chipsets might match on
603 			 * this check as well.
604 			 */
605 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
606 				pcix_chipset = 1;
607 			break;
608 		case PCIY_EXPRESS:	/* PCI-express */
609 			/*
610 			 * Assume we have a PCI-express chipset if we have
611 			 * at least one PCI-express root port.
612 			 */
613 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
614 			if ((val & PCIM_EXP_FLAGS_TYPE) ==
615 			    PCIM_EXP_TYPE_ROOT_PORT)
616 				pcie_chipset = 1;
617 			break;
618 		default:
619 			break;
620 		}
621 	}
622 /* REG and WREG use carry through to next functions */
623 }
624 
625 /*
626  * PCI Vital Product Data
627  */
628 static uint32_t
629 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
630 {
631 
632 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
633 
634 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
635 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000)
636 		DELAY(1);	/* limit looping */
637 
638 	return (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
639 }
640 
641 #if 0
642 static void
643 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
644 {
645 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
646 
647 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
648 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
649 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000)
650 		DELAY(1);	/* limit looping */
651 
652 	return;
653 }
654 #endif
655 
656 struct vpd_readstate {
657 	device_t	pcib;
658 	pcicfgregs	*cfg;
659 	uint32_t	val;
660 	int		bytesinval;
661 	int		off;
662 	uint8_t		cksum;
663 };
664 
665 static uint8_t
666 vpd_nextbyte(struct vpd_readstate *vrs)
667 {
668 	uint8_t byte;
669 
670 	if (vrs->bytesinval == 0) {
671 		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
672 		    vrs->off));
673 		vrs->off += 4;
674 		byte = vrs->val & 0xff;
675 		vrs->bytesinval = 3;
676 	} else {
677 		vrs->val = vrs->val >> 8;
678 		byte = vrs->val & 0xff;
679 		vrs->bytesinval--;
680 	}
681 
682 	vrs->cksum += byte;
683 	return (byte);
684 }
685 
686 static void
687 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
688 {
689 	struct vpd_readstate vrs;
690 	int state;
691 	int name;
692 	int remain;
693 	int end;
694 	int i;
695 	uint8_t byte;
696 	int alloc, off;		/* alloc/off for RO/W arrays */
697 	int cksumvalid;
698 	int dflen;
699 
700 	if (!pci_do_vpd) {
701 		cfg->vpd.vpd_cached = 1;
702 		return;
703 	}
704 
705 	/* init vpd reader */
706 	vrs.bytesinval = 0;
707 	vrs.off = 0;
708 	vrs.pcib = pcib;
709 	vrs.cfg = cfg;
710 	vrs.cksum = 0;
711 
712 	state = 0;
713 	name = remain = i = 0;	/* shut up stupid gcc */
714 	alloc = off = 0;	/* shut up stupid gcc */
715 	dflen = 0;		/* shut up stupid gcc */
716 	end = 0;
717 	cksumvalid = -1;
718 	for (; !end;) {
719 		byte = vpd_nextbyte(&vrs);
720 #if 0
721 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
722 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
723 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
724 #endif
725 		switch (state) {
726 		case 0:		/* item name */
727 			if (byte & 0x80) {
728 				remain = vpd_nextbyte(&vrs);
729 				remain |= vpd_nextbyte(&vrs) << 8;
730 				if (remain > (0x7f*4 - vrs.off)) {
731 					end = 1;
732 					printf(
733 			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
734 					    cfg->bus, cfg->slot, cfg->func,
735 					    remain);
736 				}
737 				name = byte & 0x7f;
738 			} else {
739 				remain = byte & 0x7;
740 				name = (byte >> 3) & 0xf;
741 			}
742 			switch (name) {
743 			case 0x2:	/* String */
744 				cfg->vpd.vpd_ident = malloc(remain + 1,
745 				    M_DEVBUF, M_WAITOK);
746 				i = 0;
747 				state = 1;
748 				break;
749 			case 0xf:	/* End */
750 				end = 1;
751 				state = -1;
752 				break;
753 			case 0x10:	/* VPD-R */
754 				alloc = 8;
755 				off = 0;
756 				cfg->vpd.vpd_ros = malloc(alloc *
757 				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
758 				    M_WAITOK);
759 				state = 2;
760 				break;
761 			case 0x11:	/* VPD-W */
762 				alloc = 8;
763 				off = 0;
764 				cfg->vpd.vpd_w = malloc(alloc *
765 				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
766 				    M_WAITOK);
767 				state = 5;
768 				break;
769 			default:	/* Invalid data, abort */
770 				end = 1;
771 				continue;
772 			}
773 			break;
774 
775 		case 1:	/* Identifier String */
776 			cfg->vpd.vpd_ident[i++] = byte;
777 			remain--;
778 			if (remain == 0)  {
779 				cfg->vpd.vpd_ident[i] = '\0';
780 				state = 0;
781 			}
782 			break;
783 
784 		case 2:	/* VPD-R Keyword Header */
785 			if (off == alloc) {
786 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
787 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
788 				    M_DEVBUF, M_WAITOK);
789 			}
790 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
791 			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
792 			dflen = vpd_nextbyte(&vrs);
793 			if (dflen == 0 &&
794 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
795 			    2) == 0) {
796 				/*
797 				 * if this happens, we can't trust the rest
798 				 * of the VPD.
799 				 */
800 				printf("pci%d:%d:%d: bad keyword length: %d\n",
801 				    cfg->bus, cfg->slot, cfg->func, dflen);
802 				cksumvalid = 0;
803 				end = 1;
804 				break;
805 			} else if (dflen == 0) {
806 				cfg->vpd.vpd_ros[off].value = malloc(1 *
807 				    sizeof *cfg->vpd.vpd_ros[off].value,
808 				    M_DEVBUF, M_WAITOK);
809 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
810 			} else
811 				cfg->vpd.vpd_ros[off].value = malloc(
812 				    (dflen + 1) *
813 				    sizeof *cfg->vpd.vpd_ros[off].value,
814 				    M_DEVBUF, M_WAITOK);
815 			remain -= 3;
816 			i = 0;
817 			/* keep in sync w/ state 3's transistions */
818 			if (dflen == 0 && remain == 0)
819 				state = 0;
820 			else if (dflen == 0)
821 				state = 2;
822 			else
823 				state = 3;
824 			break;
825 
826 		case 3:	/* VPD-R Keyword Value */
827 			cfg->vpd.vpd_ros[off].value[i++] = byte;
828 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
829 			    "RV", 2) == 0 && cksumvalid == -1) {
830 				if (vrs.cksum == 0)
831 					cksumvalid = 1;
832 				else {
833 					printf(
834 				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
835 					    cfg->bus, cfg->slot, cfg->func,
836 					    vrs.cksum);
837 					cksumvalid = 0;
838 					end = 1;
839 					break;
840 				}
841 			}
842 			dflen--;
843 			remain--;
844 			/* keep in sync w/ state 2's transistions */
845 			if (dflen == 0)
846 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
847 			if (dflen == 0 && remain == 0) {
848 				cfg->vpd.vpd_rocnt = off;
849 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
850 				    off * sizeof *cfg->vpd.vpd_ros,
851 				    M_DEVBUF, M_WAITOK);
852 				state = 0;
853 			} else if (dflen == 0)
854 				state = 2;
855 			break;
856 
857 		case 4:
858 			remain--;
859 			if (remain == 0)
860 				state = 0;
861 			break;
862 
863 		case 5:	/* VPD-W Keyword Header */
864 			if (off == alloc) {
865 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
866 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
867 				    M_DEVBUF, M_WAITOK);
868 			}
869 			cfg->vpd.vpd_w[off].keyword[0] = byte;
870 			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
871 			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
872 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
873 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
874 			    sizeof *cfg->vpd.vpd_w[off].value,
875 			    M_DEVBUF, M_WAITOK);
876 			remain -= 3;
877 			i = 0;
878 			/* keep in sync w/ state 6's transistions */
879 			if (dflen == 0 && remain == 0)
880 				state = 0;
881 			else if (dflen == 0)
882 				state = 5;
883 			else
884 				state = 6;
885 			break;
886 
887 		case 6:	/* VPD-W Keyword Value */
888 			cfg->vpd.vpd_w[off].value[i++] = byte;
889 			dflen--;
890 			remain--;
891 			/* keep in sync w/ state 5's transistions */
892 			if (dflen == 0)
893 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
894 			if (dflen == 0 && remain == 0) {
895 				cfg->vpd.vpd_wcnt = off;
896 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
897 				    off * sizeof *cfg->vpd.vpd_w,
898 				    M_DEVBUF, M_WAITOK);
899 				state = 0;
900 			} else if (dflen == 0)
901 				state = 5;
902 			break;
903 
904 		default:
905 			printf("pci%d:%d:%d: invalid state: %d\n",
906 			    cfg->bus, cfg->slot, cfg->func, state);
907 			end = 1;
908 			break;
909 		}
910 	}
911 
912 	if (cksumvalid == 0) {
913 		/* read-only data bad, clean up */
914 		for (; off; off--)
915 			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
916 
917 		free(cfg->vpd.vpd_ros, M_DEVBUF);
918 		cfg->vpd.vpd_ros = NULL;
919 	}
920 	cfg->vpd.vpd_cached = 1;
921 #undef REG
922 #undef WREG
923 }
924 
925 int
926 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
927 {
928 	struct pci_devinfo *dinfo = device_get_ivars(child);
929 	pcicfgregs *cfg = &dinfo->cfg;
930 
931 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
932 		pci_read_vpd(device_get_parent(dev), cfg);
933 
934 	*identptr = cfg->vpd.vpd_ident;
935 
936 	if (*identptr == NULL)
937 		return (ENXIO);
938 
939 	return (0);
940 }
941 
942 int
943 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
944 	const char **vptr)
945 {
946 	struct pci_devinfo *dinfo = device_get_ivars(child);
947 	pcicfgregs *cfg = &dinfo->cfg;
948 	int i;
949 
950 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
951 		pci_read_vpd(device_get_parent(dev), cfg);
952 
953 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
954 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
955 		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
956 			*vptr = cfg->vpd.vpd_ros[i].value;
957 		}
958 
959 	if (i != cfg->vpd.vpd_rocnt)
960 		return (0);
961 
962 	*vptr = NULL;
963 	return (ENXIO);
964 }
965 
966 /*
967  * Return the offset in configuration space of the requested extended
968  * capability entry or 0 if the specified capability was not found.
969  */
970 int
971 pci_find_extcap_method(device_t dev, device_t child, int capability,
972     int *capreg)
973 {
974 	struct pci_devinfo *dinfo = device_get_ivars(child);
975 	pcicfgregs *cfg = &dinfo->cfg;
976 	u_int32_t status;
977 	u_int8_t ptr;
978 
979 	/*
980 	 * Check the CAP_LIST bit of the PCI status register first.
981 	 */
982 	status = pci_read_config(child, PCIR_STATUS, 2);
983 	if (!(status & PCIM_STATUS_CAPPRESENT))
984 		return (ENXIO);
985 
986 	/*
987 	 * Determine the start pointer of the capabilities list.
988 	 */
989 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
990 	case 0:
991 	case 1:
992 		ptr = PCIR_CAP_PTR;
993 		break;
994 	case 2:
995 		ptr = PCIR_CAP_PTR_2;
996 		break;
997 	default:
998 		/* XXX: panic? */
999 		return (ENXIO);		/* no extended capabilities support */
1000 	}
1001 	ptr = pci_read_config(child, ptr, 1);
1002 
1003 	/*
1004 	 * Traverse the capabilities list.
1005 	 */
1006 	while (ptr != 0) {
1007 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1008 			if (capreg != NULL)
1009 				*capreg = ptr;
1010 			return (0);
1011 		}
1012 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1013 	}
1014 
1015 	return (ENOENT);
1016 }
1017 
1018 /*
1019  * Support for MSI-X message interrupts.
1020  */
1021 void
1022 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1023 {
1024 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1025 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1026 	uint32_t offset;
1027 
1028 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1029 	offset = msix->msix_table_offset + index * 16;
1030 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1031 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1032 	bus_write_4(msix->msix_table_res, offset + 8, data);
1033 }
1034 
1035 void
1036 pci_mask_msix(device_t dev, u_int index)
1037 {
1038 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1039 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1040 	uint32_t offset, val;
1041 
1042 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1043 	offset = msix->msix_table_offset + index * 16 + 12;
1044 	val = bus_read_4(msix->msix_table_res, offset);
1045 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1046 		val |= PCIM_MSIX_VCTRL_MASK;
1047 		bus_write_4(msix->msix_table_res, offset, val);
1048 	}
1049 }
1050 
1051 void
1052 pci_unmask_msix(device_t dev, u_int index)
1053 {
1054 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1055 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1056 	uint32_t offset, val;
1057 
1058 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1059 	offset = msix->msix_table_offset + index * 16 + 12;
1060 	val = bus_read_4(msix->msix_table_res, offset);
1061 	if (val & PCIM_MSIX_VCTRL_MASK) {
1062 		val &= ~PCIM_MSIX_VCTRL_MASK;
1063 		bus_write_4(msix->msix_table_res, offset, val);
1064 	}
1065 }
1066 
1067 int
1068 pci_pending_msix(device_t dev, u_int index)
1069 {
1070 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1071 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1072 	uint32_t offset, bit;
1073 
1074 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1075 	offset = msix->msix_pba_offset + (index / 32) * 4;
1076 	bit = 1 << index % 32;
1077 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1078 }
1079 
1080 /*
1081  * Restore MSI-X registers and table during resume.  If MSI-X is
1082  * enabled then walk the virtual table to restore the actual MSI-X
1083  * table.
1084  */
1085 static void
1086 pci_resume_msix(device_t dev)
1087 {
1088 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1089 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1090 	struct msix_table_entry *mte;
1091 	struct msix_vector *mv;
1092 	int i;
1093 
1094 	if (msix->msix_alloc > 0) {
1095 		/* First, mask all vectors. */
1096 		for (i = 0; i < msix->msix_msgnum; i++)
1097 			pci_mask_msix(dev, i);
1098 
1099 		/* Second, program any messages with at least one handler. */
1100 		for (i = 0; i < msix->msix_table_len; i++) {
1101 			mte = &msix->msix_table[i];
1102 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1103 				continue;
1104 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1105 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1106 			pci_unmask_msix(dev, i);
1107 		}
1108 	}
1109 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1110 	    msix->msix_ctrl, 2);
1111 }
1112 
1113 /*
1114  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1115  * returned in *count.  After this function returns, each message will be
1116  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1117  */
1118 int
1119 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1120 {
1121 	struct pci_devinfo *dinfo = device_get_ivars(child);
1122 	pcicfgregs *cfg = &dinfo->cfg;
1123 	struct resource_list_entry *rle;
1124 	int actual, error, i, irq, max;
1125 
1126 	/* Don't let count == 0 get us into trouble. */
1127 	if (*count == 0)
1128 		return (EINVAL);
1129 
1130 	/* If rid 0 is allocated, then fail. */
1131 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1132 	if (rle != NULL && rle->res != NULL)
1133 		return (ENXIO);
1134 
1135 	/* Already have allocated messages? */
1136 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1137 		return (ENXIO);
1138 
1139 	/* If MSI is blacklisted for this system, fail. */
1140 	if (pci_msi_blacklisted())
1141 		return (ENXIO);
1142 
1143 	/* MSI-X capability present? */
1144 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1145 		return (ENODEV);
1146 
1147 	/* Make sure the appropriate BARs are mapped. */
1148 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1149 	    cfg->msix.msix_table_bar);
1150 	if (rle == NULL || rle->res == NULL ||
1151 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1152 		return (ENXIO);
1153 	cfg->msix.msix_table_res = rle->res;
1154 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1155 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1156 		    cfg->msix.msix_pba_bar);
1157 		if (rle == NULL || rle->res == NULL ||
1158 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1159 			return (ENXIO);
1160 	}
1161 	cfg->msix.msix_pba_res = rle->res;
1162 
1163 	if (bootverbose)
1164 		device_printf(child,
1165 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1166 		    *count, cfg->msix.msix_msgnum);
1167 	max = min(*count, cfg->msix.msix_msgnum);
1168 	for (i = 0; i < max; i++) {
1169 		/* Allocate a message. */
1170 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1171 		if (error)
1172 			break;
1173 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1174 		    irq, 1);
1175 	}
1176 	actual = i;
1177 
1178 	if (bootverbose) {
1179 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1180 		if (actual == 1)
1181 			device_printf(child, "using IRQ %lu for MSI-X\n",
1182 			    rle->start);
1183 		else {
1184 			int run;
1185 
1186 			/*
1187 			 * Be fancy and try to print contiguous runs of
1188 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1189 			 * 'run' is true if we are in a range.
1190 			 */
1191 			device_printf(child, "using IRQs %lu", rle->start);
1192 			irq = rle->start;
1193 			run = 0;
1194 			for (i = 1; i < actual; i++) {
1195 				rle = resource_list_find(&dinfo->resources,
1196 				    SYS_RES_IRQ, i + 1);
1197 
1198 				/* Still in a run? */
1199 				if (rle->start == irq + 1) {
1200 					run = 1;
1201 					irq++;
1202 					continue;
1203 				}
1204 
1205 				/* Finish previous range. */
1206 				if (run) {
1207 					printf("-%d", irq);
1208 					run = 0;
1209 				}
1210 
1211 				/* Start new range. */
1212 				printf(",%lu", rle->start);
1213 				irq = rle->start;
1214 			}
1215 
1216 			/* Unfinished range? */
1217 			if (run)
1218 				printf("-%d", irq);
1219 			printf(" for MSI-X\n");
1220 		}
1221 	}
1222 
1223 	/* Mask all vectors. */
1224 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1225 		pci_mask_msix(child, i);
1226 
1227 	/* Allocate and initialize vector data and virtual table. */
1228 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1229 	    M_DEVBUF, M_WAITOK | M_ZERO);
1230 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1231 	    M_DEVBUF, M_WAITOK | M_ZERO);
1232 	for (i = 0; i < actual; i++) {
1233 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1234 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1235 		cfg->msix.msix_table[i].mte_vector = i + 1;
1236 	}
1237 
1238 	/* Update control register to enable MSI-X. */
1239 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1240 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1241 	    cfg->msix.msix_ctrl, 2);
1242 
1243 	/* Update counts of alloc'd messages. */
1244 	cfg->msix.msix_alloc = actual;
1245 	cfg->msix.msix_table_len = actual;
1246 	*count = actual;
1247 	return (0);
1248 }
1249 
1250 /*
1251  * By default, pci_alloc_msix() will assign the allocated IRQ
1252  * resources consecutively to the first N messages in the MSI-X table.
1253  * However, device drivers may want to use different layouts if they
1254  * either receive fewer messages than they asked for, or they wish to
1255  * populate the MSI-X table sparsely.  This method allows the driver
1256  * to specify what layout it wants.  It must be called after a
1257  * successful pci_alloc_msix() but before any of the associated
1258  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1259  *
1260  * The 'vectors' array contains 'count' message vectors.  The array
1261  * maps directly to the MSI-X table in that index 0 in the array
1262  * specifies the vector for the first message in the MSI-X table, etc.
1263  * The vector value in each array index can either be 0 to indicate
1264  * that no vector should be assigned to a message slot, or it can be a
1265  * number from 1 to N (where N is the count returned from a
1266  * succcessful call to pci_alloc_msix()) to indicate which message
1267  * vector (IRQ) to be used for the corresponding message.
1268  *
1269  * On successful return, each message with a non-zero vector will have
1270  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1271  * 1.  Additionally, if any of the IRQs allocated via the previous
1272  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1273  * will be freed back to the system automatically.
1274  *
1275  * For example, suppose a driver has a MSI-X table with 6 messages and
1276  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1277  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1278  * C.  After the call to pci_alloc_msix(), the device will be setup to
1279  * have an MSI-X table of ABC--- (where - means no vector assigned).
1280  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1281  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1282  * be freed back to the system.  This device will also have valid
1283  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1284  *
1285  * In any case, the SYS_RES_IRQ rid X will always map to the message
1286  * at MSI-X table index X - 1 and will only be valid if a vector is
1287  * assigned to that table entry.
1288  */
1289 int
1290 pci_remap_msix_method(device_t dev, device_t child, int count,
1291     const u_int *vectors)
1292 {
1293 	struct pci_devinfo *dinfo = device_get_ivars(child);
1294 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1295 	struct resource_list_entry *rle;
1296 	int i, irq, j, *used;
1297 
1298 	/*
1299 	 * Have to have at least one message in the table but the
1300 	 * table can't be bigger than the actual MSI-X table in the
1301 	 * device.
1302 	 */
1303 	if (count == 0 || count > msix->msix_msgnum)
1304 		return (EINVAL);
1305 
1306 	/* Sanity check the vectors. */
1307 	for (i = 0; i < count; i++)
1308 		if (vectors[i] > msix->msix_alloc)
1309 			return (EINVAL);
1310 
1311 	/*
1312 	 * Make sure there aren't any holes in the vectors to be used.
1313 	 * It's a big pain to support it, and it doesn't really make
1314 	 * sense anyway.  Also, at least one vector must be used.
1315 	 */
1316 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1317 	    M_ZERO);
1318 	for (i = 0; i < count; i++)
1319 		if (vectors[i] != 0)
1320 			used[vectors[i] - 1] = 1;
1321 	for (i = 0; i < msix->msix_alloc - 1; i++)
1322 		if (used[i] == 0 && used[i + 1] == 1) {
1323 			free(used, M_DEVBUF);
1324 			return (EINVAL);
1325 		}
1326 	if (used[0] != 1) {
1327 		free(used, M_DEVBUF);
1328 		return (EINVAL);
1329 	}
1330 
1331 	/* Make sure none of the resources are allocated. */
1332 	for (i = 0; i < msix->msix_table_len; i++) {
1333 		if (msix->msix_table[i].mte_vector == 0)
1334 			continue;
1335 		if (msix->msix_table[i].mte_handlers > 0)
1336 			return (EBUSY);
1337 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1338 		KASSERT(rle != NULL, ("missing resource"));
1339 		if (rle->res != NULL)
1340 			return (EBUSY);
1341 	}
1342 
1343 	/* Free the existing resource list entries. */
1344 	for (i = 0; i < msix->msix_table_len; i++) {
1345 		if (msix->msix_table[i].mte_vector == 0)
1346 			continue;
1347 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1348 	}
1349 
1350 	/*
1351 	 * Build the new virtual table keeping track of which vectors are
1352 	 * used.
1353 	 */
1354 	free(msix->msix_table, M_DEVBUF);
1355 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1356 	    M_DEVBUF, M_WAITOK | M_ZERO);
1357 	for (i = 0; i < count; i++)
1358 		msix->msix_table[i].mte_vector = vectors[i];
1359 	msix->msix_table_len = count;
1360 
1361 	/* Free any unused IRQs and resize the vectors array if necessary. */
1362 	j = msix->msix_alloc - 1;
1363 	if (used[j] == 0) {
1364 		struct msix_vector *vec;
1365 
1366 		while (used[j] == 0) {
1367 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1368 			    msix->msix_vectors[j].mv_irq);
1369 			j--;
1370 		}
1371 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1372 		    M_WAITOK);
1373 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1374 		    (j + 1));
1375 		free(msix->msix_vectors, M_DEVBUF);
1376 		msix->msix_vectors = vec;
1377 		msix->msix_alloc = j + 1;
1378 	}
1379 	free(used, M_DEVBUF);
1380 
1381 	/* Map the IRQs onto the rids. */
1382 	for (i = 0; i < count; i++) {
1383 		if (vectors[i] == 0)
1384 			continue;
1385 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1386 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1387 		    irq, 1);
1388 	}
1389 
1390 	if (bootverbose) {
1391 		device_printf(child, "Remapped MSI-X IRQs as: ");
1392 		for (i = 0; i < count; i++) {
1393 			if (i != 0)
1394 				printf(", ");
1395 			if (vectors[i] == 0)
1396 				printf("---");
1397 			else
1398 				printf("%d",
1399 				    msix->msix_vectors[vectors[i]].mv_irq);
1400 		}
1401 		printf("\n");
1402 	}
1403 
1404 	return (0);
1405 }
1406 
1407 static int
1408 pci_release_msix(device_t dev, device_t child)
1409 {
1410 	struct pci_devinfo *dinfo = device_get_ivars(child);
1411 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1412 	struct resource_list_entry *rle;
1413 	int i;
1414 
1415 	/* Do we have any messages to release? */
1416 	if (msix->msix_alloc == 0)
1417 		return (ENODEV);
1418 
1419 	/* Make sure none of the resources are allocated. */
1420 	for (i = 0; i < msix->msix_table_len; i++) {
1421 		if (msix->msix_table[i].mte_vector == 0)
1422 			continue;
1423 		if (msix->msix_table[i].mte_handlers > 0)
1424 			return (EBUSY);
1425 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1426 		KASSERT(rle != NULL, ("missing resource"));
1427 		if (rle->res != NULL)
1428 			return (EBUSY);
1429 	}
1430 
1431 	/* Update control register to disable MSI-X. */
1432 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1433 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1434 	    msix->msix_ctrl, 2);
1435 
1436 	/* Free the resource list entries. */
1437 	for (i = 0; i < msix->msix_table_len; i++) {
1438 		if (msix->msix_table[i].mte_vector == 0)
1439 			continue;
1440 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1441 	}
1442 	free(msix->msix_table, M_DEVBUF);
1443 	msix->msix_table_len = 0;
1444 
1445 	/* Release the IRQs. */
1446 	for (i = 0; i < msix->msix_alloc; i++)
1447 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1448 		    msix->msix_vectors[i].mv_irq);
1449 	free(msix->msix_vectors, M_DEVBUF);
1450 	msix->msix_alloc = 0;
1451 	return (0);
1452 }
1453 
1454 /*
1455  * Return the max supported MSI-X messages this device supports.
1456  * Basically, assuming the MD code can alloc messages, this function
1457  * should return the maximum value that pci_alloc_msix() can return.
1458  * Thus, it is subject to the tunables, etc.
1459  */
1460 int
1461 pci_msix_count_method(device_t dev, device_t child)
1462 {
1463 	struct pci_devinfo *dinfo = device_get_ivars(child);
1464 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1465 
1466 	if (pci_do_msix && msix->msix_location != 0)
1467 		return (msix->msix_msgnum);
1468 	return (0);
1469 }
1470 
1471 /*
1472  * Support for MSI message signalled interrupts.
1473  */
1474 void
1475 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1476 {
1477 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1478 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1479 
1480 	/* Write data and address values. */
1481 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1482 	    address & 0xffffffff, 4);
1483 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1484 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1485 		    address >> 32, 4);
1486 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1487 		    data, 2);
1488 	} else
1489 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1490 		    2);
1491 
1492 	/* Enable MSI in the control register. */
1493 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1494 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1495 	    2);
1496 }
1497 
1498 void
1499 pci_disable_msi(device_t dev)
1500 {
1501 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1502 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1503 
1504 	/* Disable MSI in the control register. */
1505 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1506 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1507 	    2);
1508 }
1509 
1510 /*
1511  * Restore MSI registers during resume.  If MSI is enabled then
1512  * restore the data and address registers in addition to the control
1513  * register.
1514  */
1515 static void
1516 pci_resume_msi(device_t dev)
1517 {
1518 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1519 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1520 	uint64_t address;
1521 	uint16_t data;
1522 
1523 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1524 		address = msi->msi_addr;
1525 		data = msi->msi_data;
1526 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1527 		    address & 0xffffffff, 4);
1528 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1529 			pci_write_config(dev, msi->msi_location +
1530 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1531 			pci_write_config(dev, msi->msi_location +
1532 			    PCIR_MSI_DATA_64BIT, data, 2);
1533 		} else
1534 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1535 			    data, 2);
1536 	}
1537 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1538 	    2);
1539 }
1540 
1541 int
1542 pci_remap_msi_irq(device_t dev, u_int irq)
1543 {
1544 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1545 	pcicfgregs *cfg = &dinfo->cfg;
1546 	struct resource_list_entry *rle;
1547 	struct msix_table_entry *mte;
1548 	struct msix_vector *mv;
1549 	device_t bus;
1550 	uint64_t addr;
1551 	uint32_t data;
1552 	int error, i, j;
1553 
1554 	bus = device_get_parent(dev);
1555 
1556 	/*
1557 	 * Handle MSI first.  We try to find this IRQ among our list
1558 	 * of MSI IRQs.  If we find it, we request updated address and
1559 	 * data registers and apply the results.
1560 	 */
1561 	if (cfg->msi.msi_alloc > 0) {
1562 
1563 		/* If we don't have any active handlers, nothing to do. */
1564 		if (cfg->msi.msi_handlers == 0)
1565 			return (0);
1566 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1567 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1568 			    i + 1);
1569 			if (rle->start == irq) {
1570 				error = PCIB_MAP_MSI(device_get_parent(bus),
1571 				    dev, irq, &addr, &data);
1572 				if (error)
1573 					return (error);
1574 				pci_disable_msi(dev);
1575 				dinfo->cfg.msi.msi_addr = addr;
1576 				dinfo->cfg.msi.msi_data = data;
1577 				pci_enable_msi(dev, addr, data);
1578 				return (0);
1579 			}
1580 		}
1581 		return (ENOENT);
1582 	}
1583 
1584 	/*
1585 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1586 	 * we request the updated mapping info.  If that works, we go
1587 	 * through all the slots that use this IRQ and update them.
1588 	 */
1589 	if (cfg->msix.msix_alloc > 0) {
1590 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1591 			mv = &cfg->msix.msix_vectors[i];
1592 			if (mv->mv_irq == irq) {
1593 				error = PCIB_MAP_MSI(device_get_parent(bus),
1594 				    dev, irq, &addr, &data);
1595 				if (error)
1596 					return (error);
1597 				mv->mv_address = addr;
1598 				mv->mv_data = data;
1599 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1600 					mte = &cfg->msix.msix_table[j];
1601 					if (mte->mte_vector != i + 1)
1602 						continue;
1603 					if (mte->mte_handlers == 0)
1604 						continue;
1605 					pci_mask_msix(dev, j);
1606 					pci_enable_msix(dev, j, addr, data);
1607 					pci_unmask_msix(dev, j);
1608 				}
1609 			}
1610 		}
1611 		return (ENOENT);
1612 	}
1613 
1614 	return (ENOENT);
1615 }
1616 
1617 /*
1618  * Returns true if the specified device is blacklisted because MSI
1619  * doesn't work.
1620  */
1621 int
1622 pci_msi_device_blacklisted(device_t dev)
1623 {
1624 	struct pci_quirk *q;
1625 
1626 	if (!pci_honor_msi_blacklist)
1627 		return (0);
1628 
1629 	for (q = &pci_quirks[0]; q->devid; q++) {
1630 		if (q->devid == pci_get_devid(dev) &&
1631 		    q->type == PCI_QUIRK_DISABLE_MSI)
1632 			return (1);
1633 	}
1634 	return (0);
1635 }
1636 
1637 /*
1638  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1639  * we just check for blacklisted chipsets as represented by the
1640  * host-PCI bridge at device 0:0:0.  In the future, it may become
1641  * necessary to check other system attributes, such as the kenv values
1642  * that give the motherboard manufacturer and model number.
1643  */
1644 static int
1645 pci_msi_blacklisted(void)
1646 {
1647 	device_t dev;
1648 
1649 	if (!pci_honor_msi_blacklist)
1650 		return (0);
1651 
1652 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1653 	if (!(pcie_chipset || pcix_chipset))
1654 		return (1);
1655 
1656 	dev = pci_find_bsf(0, 0, 0);
1657 	if (dev != NULL)
1658 		return (pci_msi_device_blacklisted(dev));
1659 	return (0);
1660 }
1661 
1662 /*
1663  * Attempt to allocate *count MSI messages.  The actual number allocated is
1664  * returned in *count.  After this function returns, each message will be
1665  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1666  */
1667 int
1668 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1669 {
1670 	struct pci_devinfo *dinfo = device_get_ivars(child);
1671 	pcicfgregs *cfg = &dinfo->cfg;
1672 	struct resource_list_entry *rle;
1673 	int actual, error, i, irqs[32];
1674 	uint16_t ctrl;
1675 
1676 	/* Don't let count == 0 get us into trouble. */
1677 	if (*count == 0)
1678 		return (EINVAL);
1679 
1680 	/* If rid 0 is allocated, then fail. */
1681 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1682 	if (rle != NULL && rle->res != NULL)
1683 		return (ENXIO);
1684 
1685 	/* Already have allocated messages? */
1686 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1687 		return (ENXIO);
1688 
1689 	/* If MSI is blacklisted for this system, fail. */
1690 	if (pci_msi_blacklisted())
1691 		return (ENXIO);
1692 
1693 	/* MSI capability present? */
1694 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1695 		return (ENODEV);
1696 
1697 	if (bootverbose)
1698 		device_printf(child,
1699 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1700 		    *count, cfg->msi.msi_msgnum);
1701 
1702 	/* Don't ask for more than the device supports. */
1703 	actual = min(*count, cfg->msi.msi_msgnum);
1704 
1705 	/* Don't ask for more than 32 messages. */
1706 	actual = min(actual, 32);
1707 
1708 	/* MSI requires power of 2 number of messages. */
1709 	if (!powerof2(actual))
1710 		return (EINVAL);
1711 
1712 	for (;;) {
1713 		/* Try to allocate N messages. */
1714 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1715 		    cfg->msi.msi_msgnum, irqs);
1716 		if (error == 0)
1717 			break;
1718 		if (actual == 1)
1719 			return (error);
1720 
1721 		/* Try N / 2. */
1722 		actual >>= 1;
1723 	}
1724 
1725 	/*
1726 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1727 	 * resources in the irqs[] array, so add new resources
1728 	 * starting at rid 1.
1729 	 */
1730 	for (i = 0; i < actual; i++)
1731 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1732 		    irqs[i], irqs[i], 1);
1733 
1734 	if (bootverbose) {
1735 		if (actual == 1)
1736 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1737 		else {
1738 			int run;
1739 
1740 			/*
1741 			 * Be fancy and try to print contiguous runs
1742 			 * of IRQ values as ranges.  'run' is true if
1743 			 * we are in a range.
1744 			 */
1745 			device_printf(child, "using IRQs %d", irqs[0]);
1746 			run = 0;
1747 			for (i = 1; i < actual; i++) {
1748 
1749 				/* Still in a run? */
1750 				if (irqs[i] == irqs[i - 1] + 1) {
1751 					run = 1;
1752 					continue;
1753 				}
1754 
1755 				/* Finish previous range. */
1756 				if (run) {
1757 					printf("-%d", irqs[i - 1]);
1758 					run = 0;
1759 				}
1760 
1761 				/* Start new range. */
1762 				printf(",%d", irqs[i]);
1763 			}
1764 
1765 			/* Unfinished range? */
1766 			if (run)
1767 				printf("-%d", irqs[actual - 1]);
1768 			printf(" for MSI\n");
1769 		}
1770 	}
1771 
1772 	/* Update control register with actual count. */
1773 	ctrl = cfg->msi.msi_ctrl;
1774 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1775 	ctrl |= (ffs(actual) - 1) << 4;
1776 	cfg->msi.msi_ctrl = ctrl;
1777 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1778 
1779 	/* Update counts of alloc'd messages. */
1780 	cfg->msi.msi_alloc = actual;
1781 	cfg->msi.msi_handlers = 0;
1782 	*count = actual;
1783 	return (0);
1784 }
1785 
1786 /* Release the MSI messages associated with this device. */
1787 int
1788 pci_release_msi_method(device_t dev, device_t child)
1789 {
1790 	struct pci_devinfo *dinfo = device_get_ivars(child);
1791 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1792 	struct resource_list_entry *rle;
1793 	int error, i, irqs[32];
1794 
1795 	/* Try MSI-X first. */
1796 	error = pci_release_msix(dev, child);
1797 	if (error != ENODEV)
1798 		return (error);
1799 
1800 	/* Do we have any messages to release? */
1801 	if (msi->msi_alloc == 0)
1802 		return (ENODEV);
1803 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1804 
1805 	/* Make sure none of the resources are allocated. */
1806 	if (msi->msi_handlers > 0)
1807 		return (EBUSY);
1808 	for (i = 0; i < msi->msi_alloc; i++) {
1809 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1810 		KASSERT(rle != NULL, ("missing MSI resource"));
1811 		if (rle->res != NULL)
1812 			return (EBUSY);
1813 		irqs[i] = rle->start;
1814 	}
1815 
1816 	/* Update control register with 0 count. */
1817 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1818 	    ("%s: MSI still enabled", __func__));
1819 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1820 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1821 	    msi->msi_ctrl, 2);
1822 
1823 	/* Release the messages. */
1824 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1825 	for (i = 0; i < msi->msi_alloc; i++)
1826 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1827 
1828 	/* Update alloc count. */
1829 	msi->msi_alloc = 0;
1830 	msi->msi_addr = 0;
1831 	msi->msi_data = 0;
1832 	return (0);
1833 }
1834 
1835 /*
1836  * Return the max supported MSI messages this device supports.
1837  * Basically, assuming the MD code can alloc messages, this function
1838  * should return the maximum value that pci_alloc_msi() can return.
1839  * Thus, it is subject to the tunables, etc.
1840  */
1841 int
1842 pci_msi_count_method(device_t dev, device_t child)
1843 {
1844 	struct pci_devinfo *dinfo = device_get_ivars(child);
1845 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1846 
1847 	if (pci_do_msi && msi->msi_location != 0)
1848 		return (msi->msi_msgnum);
1849 	return (0);
1850 }
1851 
1852 /* free pcicfgregs structure and all depending data structures */
1853 
1854 int
1855 pci_freecfg(struct pci_devinfo *dinfo)
1856 {
1857 	struct devlist *devlist_head;
1858 	int i;
1859 
1860 	devlist_head = &pci_devq;
1861 
1862 	if (dinfo->cfg.vpd.vpd_reg) {
1863 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1864 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1865 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1866 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1867 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1868 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1869 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1870 	}
1871 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1872 	free(dinfo, M_DEVBUF);
1873 
1874 	/* increment the generation count */
1875 	pci_generation++;
1876 
1877 	/* we're losing one device */
1878 	pci_numdevs--;
1879 	return (0);
1880 }
1881 
1882 /*
1883  * PCI power manangement
1884  */
1885 int
1886 pci_set_powerstate_method(device_t dev, device_t child, int state)
1887 {
1888 	struct pci_devinfo *dinfo = device_get_ivars(child);
1889 	pcicfgregs *cfg = &dinfo->cfg;
1890 	uint16_t status;
1891 	int result, oldstate, highest, delay;
1892 
1893 	if (cfg->pp.pp_cap == 0)
1894 		return (EOPNOTSUPP);
1895 
1896 	/*
1897 	 * Optimize a no state change request away.  While it would be OK to
1898 	 * write to the hardware in theory, some devices have shown odd
1899 	 * behavior when going from D3 -> D3.
1900 	 */
1901 	oldstate = pci_get_powerstate(child);
1902 	if (oldstate == state)
1903 		return (0);
1904 
1905 	/*
1906 	 * The PCI power management specification states that after a state
1907 	 * transition between PCI power states, system software must
1908 	 * guarantee a minimal delay before the function accesses the device.
1909 	 * Compute the worst case delay that we need to guarantee before we
1910 	 * access the device.  Many devices will be responsive much more
1911 	 * quickly than this delay, but there are some that don't respond
1912 	 * instantly to state changes.  Transitions to/from D3 state require
1913 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1914 	 * is done below with DELAY rather than a sleeper function because
1915 	 * this function can be called from contexts where we cannot sleep.
1916 	 */
1917 	highest = (oldstate > state) ? oldstate : state;
1918 	if (highest == PCI_POWERSTATE_D3)
1919 	    delay = 10000;
1920 	else if (highest == PCI_POWERSTATE_D2)
1921 	    delay = 200;
1922 	else
1923 	    delay = 0;
1924 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1925 	    & ~PCIM_PSTAT_DMASK;
1926 	result = 0;
1927 	switch (state) {
1928 	case PCI_POWERSTATE_D0:
1929 		status |= PCIM_PSTAT_D0;
1930 		break;
1931 	case PCI_POWERSTATE_D1:
1932 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1933 			return (EOPNOTSUPP);
1934 		status |= PCIM_PSTAT_D1;
1935 		break;
1936 	case PCI_POWERSTATE_D2:
1937 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1938 			return (EOPNOTSUPP);
1939 		status |= PCIM_PSTAT_D2;
1940 		break;
1941 	case PCI_POWERSTATE_D3:
1942 		status |= PCIM_PSTAT_D3;
1943 		break;
1944 	default:
1945 		return (EINVAL);
1946 	}
1947 
1948 	if (bootverbose)
1949 		printf(
1950 		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1951 		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1952 		    oldstate, state);
1953 
1954 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1955 	if (delay)
1956 		DELAY(delay);
1957 	return (0);
1958 }
1959 
1960 int
1961 pci_get_powerstate_method(device_t dev, device_t child)
1962 {
1963 	struct pci_devinfo *dinfo = device_get_ivars(child);
1964 	pcicfgregs *cfg = &dinfo->cfg;
1965 	uint16_t status;
1966 	int result;
1967 
1968 	if (cfg->pp.pp_cap != 0) {
1969 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1970 		switch (status & PCIM_PSTAT_DMASK) {
1971 		case PCIM_PSTAT_D0:
1972 			result = PCI_POWERSTATE_D0;
1973 			break;
1974 		case PCIM_PSTAT_D1:
1975 			result = PCI_POWERSTATE_D1;
1976 			break;
1977 		case PCIM_PSTAT_D2:
1978 			result = PCI_POWERSTATE_D2;
1979 			break;
1980 		case PCIM_PSTAT_D3:
1981 			result = PCI_POWERSTATE_D3;
1982 			break;
1983 		default:
1984 			result = PCI_POWERSTATE_UNKNOWN;
1985 			break;
1986 		}
1987 	} else {
1988 		/* No support, device is always at D0 */
1989 		result = PCI_POWERSTATE_D0;
1990 	}
1991 	return (result);
1992 }
1993 
1994 /*
1995  * Some convenience functions for PCI device drivers.
1996  */
1997 
1998 static __inline void
1999 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2000 {
2001 	uint16_t	command;
2002 
2003 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2004 	command |= bit;
2005 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2006 }
2007 
2008 static __inline void
2009 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2010 {
2011 	uint16_t	command;
2012 
2013 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2014 	command &= ~bit;
2015 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2016 }
2017 
2018 int
2019 pci_enable_busmaster_method(device_t dev, device_t child)
2020 {
2021 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2022 	return (0);
2023 }
2024 
2025 int
2026 pci_disable_busmaster_method(device_t dev, device_t child)
2027 {
2028 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2029 	return (0);
2030 }
2031 
2032 int
2033 pci_enable_io_method(device_t dev, device_t child, int space)
2034 {
2035 	uint16_t command;
2036 	uint16_t bit;
2037 	char *error;
2038 
2039 	bit = 0;
2040 	error = NULL;
2041 
2042 	switch(space) {
2043 	case SYS_RES_IOPORT:
2044 		bit = PCIM_CMD_PORTEN;
2045 		error = "port";
2046 		break;
2047 	case SYS_RES_MEMORY:
2048 		bit = PCIM_CMD_MEMEN;
2049 		error = "memory";
2050 		break;
2051 	default:
2052 		return (EINVAL);
2053 	}
2054 	pci_set_command_bit(dev, child, bit);
2055 	/* Some devices seem to need a brief stall here, what do to? */
2056 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2057 	if (command & bit)
2058 		return (0);
2059 	device_printf(child, "failed to enable %s mapping!\n", error);
2060 	return (ENXIO);
2061 }
2062 
2063 int
2064 pci_disable_io_method(device_t dev, device_t child, int space)
2065 {
2066 	uint16_t command;
2067 	uint16_t bit;
2068 	char *error;
2069 
2070 	bit = 0;
2071 	error = NULL;
2072 
2073 	switch(space) {
2074 	case SYS_RES_IOPORT:
2075 		bit = PCIM_CMD_PORTEN;
2076 		error = "port";
2077 		break;
2078 	case SYS_RES_MEMORY:
2079 		bit = PCIM_CMD_MEMEN;
2080 		error = "memory";
2081 		break;
2082 	default:
2083 		return (EINVAL);
2084 	}
2085 	pci_clear_command_bit(dev, child, bit);
2086 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2087 	if (command & bit) {
2088 		device_printf(child, "failed to disable %s mapping!\n", error);
2089 		return (ENXIO);
2090 	}
2091 	return (0);
2092 }
2093 
2094 /*
2095  * New style pci driver.  Parent device is either a pci-host-bridge or a
2096  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2097  */
2098 
2099 void
2100 pci_print_verbose(struct pci_devinfo *dinfo)
2101 {
2102 
2103 	if (bootverbose) {
2104 		pcicfgregs *cfg = &dinfo->cfg;
2105 
2106 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2107 		    cfg->vendor, cfg->device, cfg->revid);
2108 		printf("\tbus=%d, slot=%d, func=%d\n",
2109 		    cfg->bus, cfg->slot, cfg->func);
2110 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2111 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2112 		    cfg->mfdev);
2113 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2114 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2115 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2116 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2117 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2118 		if (cfg->intpin > 0)
2119 			printf("\tintpin=%c, irq=%d\n",
2120 			    cfg->intpin +'a' -1, cfg->intline);
2121 		if (cfg->pp.pp_cap) {
2122 			uint16_t status;
2123 
2124 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2125 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2126 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2127 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2128 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2129 			    status & PCIM_PSTAT_DMASK);
2130 		}
2131 		if (cfg->msi.msi_location) {
2132 			int ctrl;
2133 
2134 			ctrl = cfg->msi.msi_ctrl;
2135 			printf("\tMSI supports %d message%s%s%s\n",
2136 			    cfg->msi.msi_msgnum,
2137 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2138 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2139 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2140 		}
2141 		if (cfg->msix.msix_location) {
2142 			printf("\tMSI-X supports %d message%s ",
2143 			    cfg->msix.msix_msgnum,
2144 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2145 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2146 				printf("in map 0x%x\n",
2147 				    cfg->msix.msix_table_bar);
2148 			else
2149 				printf("in maps 0x%x and 0x%x\n",
2150 				    cfg->msix.msix_table_bar,
2151 				    cfg->msix.msix_pba_bar);
2152 		}
2153 	}
2154 }
2155 
2156 static int
2157 pci_porten(device_t pcib, int b, int s, int f)
2158 {
2159 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2160 		& PCIM_CMD_PORTEN) != 0;
2161 }
2162 
2163 static int
2164 pci_memen(device_t pcib, int b, int s, int f)
2165 {
2166 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2167 		& PCIM_CMD_MEMEN) != 0;
2168 }
2169 
2170 /*
2171  * Add a resource based on a pci map register. Return 1 if the map
2172  * register is a 32bit map register or 2 if it is a 64bit register.
2173  */
2174 static int
2175 pci_add_map(device_t pcib, device_t bus, device_t dev,
2176     int b, int s, int f, int reg, struct resource_list *rl, int force,
2177     int prefetch)
2178 {
2179 	uint32_t map;
2180 	pci_addr_t base;
2181 	pci_addr_t start, end, count;
2182 	uint8_t ln2size;
2183 	uint8_t ln2range;
2184 	uint32_t testval;
2185 	uint16_t cmd;
2186 	int type;
2187 	int barlen;
2188 	struct resource *res;
2189 
2190 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2191 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2192 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2193 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2194 
2195 	if (PCI_BAR_MEM(map))
2196 		type = SYS_RES_MEMORY;
2197 	else
2198 		type = SYS_RES_IOPORT;
2199 	ln2size = pci_mapsize(testval);
2200 	ln2range = pci_maprange(testval);
2201 	base = pci_mapbase(map);
2202 	barlen = ln2range == 64 ? 2 : 1;
2203 
2204 	/*
2205 	 * For I/O registers, if bottom bit is set, and the next bit up
2206 	 * isn't clear, we know we have a BAR that doesn't conform to the
2207 	 * spec, so ignore it.  Also, sanity check the size of the data
2208 	 * areas to the type of memory involved.  Memory must be at least
2209 	 * 16 bytes in size, while I/O ranges must be at least 4.
2210 	 */
2211 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2212 		return (barlen);
2213 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2214 	    (type == SYS_RES_IOPORT && ln2size < 2))
2215 		return (barlen);
2216 
2217 	if (ln2range == 64)
2218 		/* Read the other half of a 64bit map register */
2219 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2220 	if (bootverbose) {
2221 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2222 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2223 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2224 			printf(", port disabled\n");
2225 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2226 			printf(", memory disabled\n");
2227 		else
2228 			printf(", enabled\n");
2229 	}
2230 
2231 	/*
2232 	 * If base is 0, then we have problems.  It is best to ignore
2233 	 * such entries for the moment.  These will be allocated later if
2234 	 * the driver specifically requests them.  However, some
2235 	 * removable busses look better when all resources are allocated,
2236 	 * so allow '0' to be overriden.
2237 	 *
2238 	 * Similarly treat maps whose values is the same as the test value
2239 	 * read back.  These maps have had all f's written to them by the
2240 	 * BIOS in an attempt to disable the resources.
2241 	 */
2242 	if (!force && (base == 0 || map == testval))
2243 		return (barlen);
2244 	if ((u_long)base != base) {
2245 		device_printf(bus,
2246 		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2247 		return (barlen);
2248 	}
2249 
2250 	/*
2251 	 * This code theoretically does the right thing, but has
2252 	 * undesirable side effects in some cases where peripherals
2253 	 * respond oddly to having these bits enabled.  Let the user
2254 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2255 	 * default).
2256 	 */
2257 	if (pci_enable_io_modes) {
2258 		/* Turn on resources that have been left off by a lazy BIOS */
2259 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2260 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2261 			cmd |= PCIM_CMD_PORTEN;
2262 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2263 		}
2264 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2265 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2266 			cmd |= PCIM_CMD_MEMEN;
2267 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2268 		}
2269 	} else {
2270 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2271 			return (barlen);
2272 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2273 			return (barlen);
2274 	}
2275 
2276 	count = 1 << ln2size;
2277 	if (base == 0 || base == pci_mapbase(testval)) {
2278 		start = 0;	/* Let the parent deside */
2279 		end = ~0ULL;
2280 	} else {
2281 		start = base;
2282 		end = base + (1 << ln2size) - 1;
2283 	}
2284 	resource_list_add(rl, type, reg, start, end, count);
2285 
2286 	/*
2287 	 * Not quite sure what to do on failure of allocating the resource
2288 	 * since I can postulate several right answers.
2289 	 */
2290 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2291 	    prefetch ? RF_PREFETCHABLE : 0);
2292 	if (res == NULL)
2293 		return (barlen);
2294 	start = rman_get_start(res);
2295 	if ((u_long)start != start) {
2296 		/* Wait a minute!  this platform can't do this address. */
2297 		device_printf(bus,
2298 		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2299 		    b, s, f, reg, (uintmax_t)start);
2300 		resource_list_release(rl, bus, dev, type, reg, res);
2301 		return (barlen);
2302 	}
2303 	pci_write_config(dev, reg, start, 4);
2304 	if (ln2range == 64)
2305 		pci_write_config(dev, reg + 4, start >> 32, 4);
2306 	return (barlen);
2307 }
2308 
2309 /*
2310  * For ATA devices we need to decide early what addressing mode to use.
2311  * Legacy demands that the primary and secondary ATA ports sits on the
2312  * same addresses that old ISA hardware did. This dictates that we use
2313  * those addresses and ignore the BAR's if we cannot set PCI native
2314  * addressing mode.
2315  */
2316 static void
2317 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2318     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2319 {
2320 	int rid, type, progif;
2321 #if 0
2322 	/* if this device supports PCI native addressing use it */
2323 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2324 	if ((progif & 0x8a) == 0x8a) {
2325 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2326 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2327 			printf("Trying ATA native PCI addressing mode\n");
2328 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2329 		}
2330 	}
2331 #endif
2332 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2333 	type = SYS_RES_IOPORT;
2334 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2335 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2336 		    prefetchmask & (1 << 0));
2337 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2338 		    prefetchmask & (1 << 1));
2339 	} else {
2340 		rid = PCIR_BAR(0);
2341 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2342 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2343 		    0);
2344 		rid = PCIR_BAR(1);
2345 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2346 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2347 		    0);
2348 	}
2349 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2350 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2351 		    prefetchmask & (1 << 2));
2352 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2353 		    prefetchmask & (1 << 3));
2354 	} else {
2355 		rid = PCIR_BAR(2);
2356 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2357 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2358 		    0);
2359 		rid = PCIR_BAR(3);
2360 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2361 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2362 		    0);
2363 	}
2364 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2365 	    prefetchmask & (1 << 4));
2366 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2367 	    prefetchmask & (1 << 5));
2368 }
2369 
2370 static void
2371 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2372 {
2373 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2374 	pcicfgregs *cfg = &dinfo->cfg;
2375 	char tunable_name[64];
2376 	int irq;
2377 
2378 	/* Has to have an intpin to have an interrupt. */
2379 	if (cfg->intpin == 0)
2380 		return;
2381 
2382 	/* Let the user override the IRQ with a tunable. */
2383 	irq = PCI_INVALID_IRQ;
2384 	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2385 	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2386 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2387 		irq = PCI_INVALID_IRQ;
2388 
2389 	/*
2390 	 * If we didn't get an IRQ via the tunable, then we either use the
2391 	 * IRQ value in the intline register or we ask the bus to route an
2392 	 * interrupt for us.  If force_route is true, then we only use the
2393 	 * value in the intline register if the bus was unable to assign an
2394 	 * IRQ.
2395 	 */
2396 	if (!PCI_INTERRUPT_VALID(irq)) {
2397 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2398 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2399 		if (!PCI_INTERRUPT_VALID(irq))
2400 			irq = cfg->intline;
2401 	}
2402 
2403 	/* If after all that we don't have an IRQ, just bail. */
2404 	if (!PCI_INTERRUPT_VALID(irq))
2405 		return;
2406 
2407 	/* Update the config register if it changed. */
2408 	if (irq != cfg->intline) {
2409 		cfg->intline = irq;
2410 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2411 	}
2412 
2413 	/* Add this IRQ as rid 0 interrupt resource. */
2414 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2415 }
2416 
2417 void
2418 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2419 {
2420 	device_t pcib;
2421 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2422 	pcicfgregs *cfg = &dinfo->cfg;
2423 	struct resource_list *rl = &dinfo->resources;
2424 	struct pci_quirk *q;
2425 	int b, i, f, s;
2426 
2427 	pcib = device_get_parent(bus);
2428 
2429 	b = cfg->bus;
2430 	s = cfg->slot;
2431 	f = cfg->func;
2432 
2433 	/* ATA devices needs special map treatment */
2434 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2435 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2436 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2437 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2438 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2439 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2440 	else
2441 		for (i = 0; i < cfg->nummaps;)
2442 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2443 			    rl, force, prefetchmask & (1 << i));
2444 
2445 	/*
2446 	 * Add additional, quirked resources.
2447 	 */
2448 	for (q = &pci_quirks[0]; q->devid; q++) {
2449 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2450 		    && q->type == PCI_QUIRK_MAP_REG)
2451 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2452 			  force, 0);
2453 	}
2454 
2455 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2456 #ifdef __PCI_REROUTE_INTERRUPT
2457 		/*
2458 		 * Try to re-route interrupts. Sometimes the BIOS or
2459 		 * firmware may leave bogus values in these registers.
2460 		 * If the re-route fails, then just stick with what we
2461 		 * have.
2462 		 */
2463 		pci_assign_interrupt(bus, dev, 1);
2464 #else
2465 		pci_assign_interrupt(bus, dev, 0);
2466 #endif
2467 	}
2468 }
2469 
2470 void
2471 pci_add_children(device_t dev, int busno, size_t dinfo_size)
2472 {
2473 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2474 	device_t pcib = device_get_parent(dev);
2475 	struct pci_devinfo *dinfo;
2476 	int maxslots;
2477 	int s, f, pcifunchigh;
2478 	uint8_t hdrtype;
2479 
2480 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2481 	    ("dinfo_size too small"));
2482 	maxslots = PCIB_MAXSLOTS(pcib);
2483 	for (s = 0; s <= maxslots; s++) {
2484 		pcifunchigh = 0;
2485 		f = 0;
2486 		DELAY(1);
2487 		hdrtype = REG(PCIR_HDRTYPE, 1);
2488 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2489 			continue;
2490 		if (hdrtype & PCIM_MFDEV)
2491 			pcifunchigh = PCI_FUNCMAX;
2492 		for (f = 0; f <= pcifunchigh; f++) {
2493 			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2494 			if (dinfo != NULL) {
2495 				pci_add_child(dev, dinfo);
2496 			}
2497 		}
2498 	}
2499 #undef REG
2500 }
2501 
2502 void
2503 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2504 {
2505 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2506 	device_set_ivars(dinfo->cfg.dev, dinfo);
2507 	resource_list_init(&dinfo->resources);
2508 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2509 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2510 	pci_print_verbose(dinfo);
2511 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2512 }
2513 
2514 static int
2515 pci_probe(device_t dev)
2516 {
2517 
2518 	device_set_desc(dev, "PCI bus");
2519 
2520 	/* Allow other subclasses to override this driver. */
2521 	return (-1000);
2522 }
2523 
2524 static int
2525 pci_attach(device_t dev)
2526 {
2527 	int busno;
2528 
2529 	/*
2530 	 * Since there can be multiple independantly numbered PCI
2531 	 * busses on systems with multiple PCI domains, we can't use
2532 	 * the unit number to decide which bus we are probing. We ask
2533 	 * the parent pcib what our bus number is.
2534 	 */
2535 	busno = pcib_get_bus(dev);
2536 	if (bootverbose)
2537 		device_printf(dev, "physical bus=%d\n", busno);
2538 
2539 	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2540 
2541 	return (bus_generic_attach(dev));
2542 }
2543 
2544 int
2545 pci_suspend(device_t dev)
2546 {
2547 	int dstate, error, i, numdevs;
2548 	device_t acpi_dev, child, *devlist;
2549 	struct pci_devinfo *dinfo;
2550 
2551 	/*
2552 	 * Save the PCI configuration space for each child and set the
2553 	 * device in the appropriate power state for this sleep state.
2554 	 */
2555 	acpi_dev = NULL;
2556 	if (pci_do_power_resume)
2557 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2558 	device_get_children(dev, &devlist, &numdevs);
2559 	for (i = 0; i < numdevs; i++) {
2560 		child = devlist[i];
2561 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2562 		pci_cfg_save(child, dinfo, 0);
2563 	}
2564 
2565 	/* Suspend devices before potentially powering them down. */
2566 	error = bus_generic_suspend(dev);
2567 	if (error) {
2568 		free(devlist, M_TEMP);
2569 		return (error);
2570 	}
2571 
2572 	/*
2573 	 * Always set the device to D3.  If ACPI suggests a different
2574 	 * power state, use it instead.  If ACPI is not present, the
2575 	 * firmware is responsible for managing device power.  Skip
2576 	 * children who aren't attached since they are powered down
2577 	 * separately.  Only manage type 0 devices for now.
2578 	 */
2579 	for (i = 0; acpi_dev && i < numdevs; i++) {
2580 		child = devlist[i];
2581 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2582 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2583 			dstate = PCI_POWERSTATE_D3;
2584 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2585 			pci_set_powerstate(child, dstate);
2586 		}
2587 	}
2588 	free(devlist, M_TEMP);
2589 	return (0);
2590 }
2591 
2592 int
2593 pci_resume(device_t dev)
2594 {
2595 	int i, numdevs;
2596 	device_t acpi_dev, child, *devlist;
2597 	struct pci_devinfo *dinfo;
2598 
2599 	/*
2600 	 * Set each child to D0 and restore its PCI configuration space.
2601 	 */
2602 	acpi_dev = NULL;
2603 	if (pci_do_power_resume)
2604 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2605 	device_get_children(dev, &devlist, &numdevs);
2606 	for (i = 0; i < numdevs; i++) {
2607 		/*
2608 		 * Notify ACPI we're going to D0 but ignore the result.  If
2609 		 * ACPI is not present, the firmware is responsible for
2610 		 * managing device power.  Only manage type 0 devices for now.
2611 		 */
2612 		child = devlist[i];
2613 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2614 		if (acpi_dev && device_is_attached(child) &&
2615 		    dinfo->cfg.hdrtype == 0) {
2616 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2617 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2618 		}
2619 
2620 		/* Now the device is powered up, restore its config space. */
2621 		pci_cfg_restore(child, dinfo);
2622 	}
2623 	free(devlist, M_TEMP);
2624 	return (bus_generic_resume(dev));
2625 }
2626 
2627 static void
2628 pci_load_vendor_data(void)
2629 {
2630 	caddr_t vendordata, info;
2631 
2632 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2633 		info = preload_search_info(vendordata, MODINFO_ADDR);
2634 		pci_vendordata = *(char **)info;
2635 		info = preload_search_info(vendordata, MODINFO_SIZE);
2636 		pci_vendordata_size = *(size_t *)info;
2637 		/* terminate the database */
2638 		pci_vendordata[pci_vendordata_size] = '\n';
2639 	}
2640 }
2641 
2642 void
2643 pci_driver_added(device_t dev, driver_t *driver)
2644 {
2645 	int numdevs;
2646 	device_t *devlist;
2647 	device_t child;
2648 	struct pci_devinfo *dinfo;
2649 	int i;
2650 
2651 	if (bootverbose)
2652 		device_printf(dev, "driver added\n");
2653 	DEVICE_IDENTIFY(driver, dev);
2654 	device_get_children(dev, &devlist, &numdevs);
2655 	for (i = 0; i < numdevs; i++) {
2656 		child = devlist[i];
2657 		if (device_get_state(child) != DS_NOTPRESENT)
2658 			continue;
2659 		dinfo = device_get_ivars(child);
2660 		pci_print_verbose(dinfo);
2661 		if (bootverbose)
2662 			printf("pci%d:%d:%d: reprobing on driver added\n",
2663 			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2664 		pci_cfg_restore(child, dinfo);
2665 		if (device_probe_and_attach(child) != 0)
2666 			pci_cfg_save(child, dinfo, 1);
2667 	}
2668 	free(devlist, M_TEMP);
2669 }
2670 
2671 int
2672 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2673     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2674 {
2675 	struct pci_devinfo *dinfo;
2676 	struct msix_table_entry *mte;
2677 	struct msix_vector *mv;
2678 	uint64_t addr;
2679 	uint32_t data;
2680 	void *cookie;
2681 	int error, rid;
2682 
2683 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2684 	    arg, &cookie);
2685 	if (error)
2686 		return (error);
2687 
2688 	/*
2689 	 * If this is a direct child, check to see if the interrupt is
2690 	 * MSI or MSI-X.  If so, ask our parent to map the MSI and give
2691 	 * us the address and data register values.  If we fail for some
2692 	 * reason, teardown the interrupt handler.
2693 	 */
2694 	rid = rman_get_rid(irq);
2695 	if (device_get_parent(child) == dev && rid > 0) {
2696 		dinfo = device_get_ivars(child);
2697 		if (dinfo->cfg.msi.msi_alloc > 0) {
2698 			if (dinfo->cfg.msi.msi_addr == 0) {
2699 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2700 			    ("MSI has handlers, but vectors not mapped"));
2701 				error = PCIB_MAP_MSI(device_get_parent(dev),
2702 				    child, rman_get_start(irq), &addr, &data);
2703 				if (error)
2704 					goto bad;
2705 				dinfo->cfg.msi.msi_addr = addr;
2706 				dinfo->cfg.msi.msi_data = data;
2707 				pci_enable_msi(child, addr, data);
2708 			}
2709 			dinfo->cfg.msi.msi_handlers++;
2710 		} else {
2711 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2712 			    ("No MSI or MSI-X interrupts allocated"));
2713 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2714 			    ("MSI-X index too high"));
2715 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2716 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2717 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2718 			KASSERT(mv->mv_irq == rman_get_start(irq),
2719 			    ("IRQ mismatch"));
2720 			if (mv->mv_address == 0) {
2721 				KASSERT(mte->mte_handlers == 0,
2722 		    ("MSI-X table entry has handlers, but vector not mapped"));
2723 				error = PCIB_MAP_MSI(device_get_parent(dev),
2724 				    child, rman_get_start(irq), &addr, &data);
2725 				if (error)
2726 					goto bad;
2727 				mv->mv_address = addr;
2728 				mv->mv_data = data;
2729 			}
2730 			if (mte->mte_handlers == 0) {
2731 				pci_enable_msix(child, rid - 1, mv->mv_address,
2732 				    mv->mv_data);
2733 				pci_unmask_msix(child, rid - 1);
2734 			}
2735 			mte->mte_handlers++;
2736 		}
2737 	bad:
2738 		if (error) {
2739 			(void)bus_generic_teardown_intr(dev, child, irq,
2740 			    cookie);
2741 			return (error);
2742 		}
2743 	}
2744 	*cookiep = cookie;
2745 	return (0);
2746 }
2747 
2748 int
2749 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2750     void *cookie)
2751 {
2752 	struct msix_table_entry *mte;
2753 	struct resource_list_entry *rle;
2754 	struct pci_devinfo *dinfo;
2755 	int error, rid;
2756 
2757 	/*
2758 	 * If this is a direct child, check to see if the interrupt is
2759 	 * MSI or MSI-X.  If so, decrement the appropriate handlers
2760 	 * count and mask the MSI-X message, or disable MSI messages
2761 	 * if the count drops to 0.
2762 	 */
2763 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2764 		return (EINVAL);
2765 	rid = rman_get_rid(irq);
2766 	if (device_get_parent(child) == dev && rid > 0) {
2767 		dinfo = device_get_ivars(child);
2768 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2769 		if (rle->res != irq)
2770 			return (EINVAL);
2771 		if (dinfo->cfg.msi.msi_alloc > 0) {
2772 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2773 			    ("MSI-X index too high"));
2774 			if (dinfo->cfg.msi.msi_handlers == 0)
2775 				return (EINVAL);
2776 			dinfo->cfg.msi.msi_handlers--;
2777 			if (dinfo->cfg.msi.msi_handlers == 0)
2778 				pci_disable_msi(child);
2779 		} else {
2780 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2781 			    ("No MSI or MSI-X interrupts allocated"));
2782 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2783 			    ("MSI-X index too high"));
2784 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2785 			if (mte->mte_handlers == 0)
2786 				return (EINVAL);
2787 			mte->mte_handlers--;
2788 			if (mte->mte_handlers == 0)
2789 				pci_mask_msix(child, rid - 1);
2790 		}
2791 	}
2792 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2793 	if (device_get_parent(child) == dev && rid > 0)
2794 		KASSERT(error == 0,
2795 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2796 	return (error);
2797 }
2798 
2799 int
2800 pci_print_child(device_t dev, device_t child)
2801 {
2802 	struct pci_devinfo *dinfo;
2803 	struct resource_list *rl;
2804 	int retval = 0;
2805 
2806 	dinfo = device_get_ivars(child);
2807 	rl = &dinfo->resources;
2808 
2809 	retval += bus_print_child_header(dev, child);
2810 
2811 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2812 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2813 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2814 	if (device_get_flags(dev))
2815 		retval += printf(" flags %#x", device_get_flags(dev));
2816 
2817 	retval += printf(" at device %d.%d", pci_get_slot(child),
2818 	    pci_get_function(child));
2819 
2820 	retval += bus_print_child_footer(dev, child);
2821 
2822 	return (retval);
2823 }
2824 
2825 static struct
2826 {
2827 	int	class;
2828 	int	subclass;
2829 	char	*desc;
2830 } pci_nomatch_tab[] = {
2831 	{PCIC_OLD,		-1,			"old"},
2832 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2833 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2834 	{PCIC_STORAGE,		-1,			"mass storage"},
2835 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2836 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2837 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2838 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2839 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2840 	{PCIC_NETWORK,		-1,			"network"},
2841 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2842 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2843 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2844 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2845 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2846 	{PCIC_DISPLAY,		-1,			"display"},
2847 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2848 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2849 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2850 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2851 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2852 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2853 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2854 	{PCIC_MEMORY,		-1,			"memory"},
2855 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2856 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2857 	{PCIC_BRIDGE,		-1,			"bridge"},
2858 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2859 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2860 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2861 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2862 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2863 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2864 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2865 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2866 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2867 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2868 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2869 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2870 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2871 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2872 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2873 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2874 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2875 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2876 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2877 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2878 	{PCIC_INPUTDEV,		-1,			"input device"},
2879 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2880 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2881 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2882 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2883 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2884 	{PCIC_DOCKING,		-1,			"docking station"},
2885 	{PCIC_PROCESSOR,	-1,			"processor"},
2886 	{PCIC_SERIALBUS,	-1,			"serial bus"},
2887 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2888 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2889 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2890 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2891 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2892 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2893 	{PCIC_WIRELESS,		-1,			"wireless controller"},
2894 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2895 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2896 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2897 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2898 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2899 	{PCIC_SATCOM,		-1,			"satellite communication"},
2900 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2901 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2902 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2903 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2904 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2905 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2906 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2907 	{PCIC_DASP,		-1,			"dasp"},
2908 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2909 	{0, 0,		NULL}
2910 };
2911 
2912 void
2913 pci_probe_nomatch(device_t dev, device_t child)
2914 {
2915 	int	i;
2916 	char	*cp, *scp, *device;
2917 
2918 	/*
2919 	 * Look for a listing for this device in a loaded device database.
2920 	 */
2921 	if ((device = pci_describe_device(child)) != NULL) {
2922 		device_printf(dev, "<%s>", device);
2923 		free(device, M_DEVBUF);
2924 	} else {
2925 		/*
2926 		 * Scan the class/subclass descriptions for a general
2927 		 * description.
2928 		 */
2929 		cp = "unknown";
2930 		scp = NULL;
2931 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2932 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2933 				if (pci_nomatch_tab[i].subclass == -1) {
2934 					cp = pci_nomatch_tab[i].desc;
2935 				} else if (pci_nomatch_tab[i].subclass ==
2936 				    pci_get_subclass(child)) {
2937 					scp = pci_nomatch_tab[i].desc;
2938 				}
2939 			}
2940 		}
2941 		device_printf(dev, "<%s%s%s>",
2942 		    cp ? cp : "",
2943 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2944 		    scp ? scp : "");
2945 	}
2946 	printf(" at device %d.%d (no driver attached)\n",
2947 	    pci_get_slot(child), pci_get_function(child));
2948 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
2949 	return;
2950 }
2951 
2952 /*
2953  * Parse the PCI device database, if loaded, and return a pointer to a
2954  * description of the device.
2955  *
2956  * The database is flat text formatted as follows:
2957  *
2958  * Any line not in a valid format is ignored.
2959  * Lines are terminated with newline '\n' characters.
2960  *
2961  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2962  * the vendor name.
2963  *
2964  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2965  * - devices cannot be listed without a corresponding VENDOR line.
2966  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2967  * another TAB, then the device name.
2968  */
2969 
2970 /*
2971  * Assuming (ptr) points to the beginning of a line in the database,
2972  * return the vendor or device and description of the next entry.
2973  * The value of (vendor) or (device) inappropriate for the entry type
2974  * is set to -1.  Returns nonzero at the end of the database.
2975  *
2976  * Note that this is slightly unrobust in the face of corrupt data;
2977  * we attempt to safeguard against this by spamming the end of the
2978  * database with a newline when we initialise.
2979  */
2980 static int
2981 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2982 {
2983 	char	*cp = *ptr;
2984 	int	left;
2985 
2986 	*device = -1;
2987 	*vendor = -1;
2988 	**desc = '\0';
2989 	for (;;) {
2990 		left = pci_vendordata_size - (cp - pci_vendordata);
2991 		if (left <= 0) {
2992 			*ptr = cp;
2993 			return(1);
2994 		}
2995 
2996 		/* vendor entry? */
2997 		if (*cp != '\t' &&
2998 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2999 			break;
3000 		/* device entry? */
3001 		if (*cp == '\t' &&
3002 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3003 			break;
3004 
3005 		/* skip to next line */
3006 		while (*cp != '\n' && left > 0) {
3007 			cp++;
3008 			left--;
3009 		}
3010 		if (*cp == '\n') {
3011 			cp++;
3012 			left--;
3013 		}
3014 	}
3015 	/* skip to next line */
3016 	while (*cp != '\n' && left > 0) {
3017 		cp++;
3018 		left--;
3019 	}
3020 	if (*cp == '\n' && left > 0)
3021 		cp++;
3022 	*ptr = cp;
3023 	return(0);
3024 }
3025 
3026 static char *
3027 pci_describe_device(device_t dev)
3028 {
3029 	int	vendor, device;
3030 	char	*desc, *vp, *dp, *line;
3031 
3032 	desc = vp = dp = NULL;
3033 
3034 	/*
3035 	 * If we have no vendor data, we can't do anything.
3036 	 */
3037 	if (pci_vendordata == NULL)
3038 		goto out;
3039 
3040 	/*
3041 	 * Scan the vendor data looking for this device
3042 	 */
3043 	line = pci_vendordata;
3044 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3045 		goto out;
3046 	for (;;) {
3047 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3048 			goto out;
3049 		if (vendor == pci_get_vendor(dev))
3050 			break;
3051 	}
3052 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3053 		goto out;
3054 	for (;;) {
3055 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3056 			*dp = 0;
3057 			break;
3058 		}
3059 		if (vendor != -1) {
3060 			*dp = 0;
3061 			break;
3062 		}
3063 		if (device == pci_get_device(dev))
3064 			break;
3065 	}
3066 	if (dp[0] == '\0')
3067 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3068 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3069 	    NULL)
3070 		sprintf(desc, "%s, %s", vp, dp);
3071  out:
3072 	if (vp != NULL)
3073 		free(vp, M_DEVBUF);
3074 	if (dp != NULL)
3075 		free(dp, M_DEVBUF);
3076 	return(desc);
3077 }
3078 
3079 int
3080 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3081 {
3082 	struct pci_devinfo *dinfo;
3083 	pcicfgregs *cfg;
3084 
3085 	dinfo = device_get_ivars(child);
3086 	cfg = &dinfo->cfg;
3087 
3088 	switch (which) {
3089 	case PCI_IVAR_ETHADDR:
3090 		/*
3091 		 * The generic accessor doesn't deal with failure, so
3092 		 * we set the return value, then return an error.
3093 		 */
3094 		*((uint8_t **) result) = NULL;
3095 		return (EINVAL);
3096 	case PCI_IVAR_SUBVENDOR:
3097 		*result = cfg->subvendor;
3098 		break;
3099 	case PCI_IVAR_SUBDEVICE:
3100 		*result = cfg->subdevice;
3101 		break;
3102 	case PCI_IVAR_VENDOR:
3103 		*result = cfg->vendor;
3104 		break;
3105 	case PCI_IVAR_DEVICE:
3106 		*result = cfg->device;
3107 		break;
3108 	case PCI_IVAR_DEVID:
3109 		*result = (cfg->device << 16) | cfg->vendor;
3110 		break;
3111 	case PCI_IVAR_CLASS:
3112 		*result = cfg->baseclass;
3113 		break;
3114 	case PCI_IVAR_SUBCLASS:
3115 		*result = cfg->subclass;
3116 		break;
3117 	case PCI_IVAR_PROGIF:
3118 		*result = cfg->progif;
3119 		break;
3120 	case PCI_IVAR_REVID:
3121 		*result = cfg->revid;
3122 		break;
3123 	case PCI_IVAR_INTPIN:
3124 		*result = cfg->intpin;
3125 		break;
3126 	case PCI_IVAR_IRQ:
3127 		*result = cfg->intline;
3128 		break;
3129 	case PCI_IVAR_BUS:
3130 		*result = cfg->bus;
3131 		break;
3132 	case PCI_IVAR_SLOT:
3133 		*result = cfg->slot;
3134 		break;
3135 	case PCI_IVAR_FUNCTION:
3136 		*result = cfg->func;
3137 		break;
3138 	case PCI_IVAR_CMDREG:
3139 		*result = cfg->cmdreg;
3140 		break;
3141 	case PCI_IVAR_CACHELNSZ:
3142 		*result = cfg->cachelnsz;
3143 		break;
3144 	case PCI_IVAR_MINGNT:
3145 		*result = cfg->mingnt;
3146 		break;
3147 	case PCI_IVAR_MAXLAT:
3148 		*result = cfg->maxlat;
3149 		break;
3150 	case PCI_IVAR_LATTIMER:
3151 		*result = cfg->lattimer;
3152 		break;
3153 	default:
3154 		return (ENOENT);
3155 	}
3156 	return (0);
3157 }
3158 
3159 int
3160 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3161 {
3162 	struct pci_devinfo *dinfo;
3163 
3164 	dinfo = device_get_ivars(child);
3165 
3166 	switch (which) {
3167 	case PCI_IVAR_INTPIN:
3168 		dinfo->cfg.intpin = value;
3169 		return (0);
3170 	case PCI_IVAR_ETHADDR:
3171 	case PCI_IVAR_SUBVENDOR:
3172 	case PCI_IVAR_SUBDEVICE:
3173 	case PCI_IVAR_VENDOR:
3174 	case PCI_IVAR_DEVICE:
3175 	case PCI_IVAR_DEVID:
3176 	case PCI_IVAR_CLASS:
3177 	case PCI_IVAR_SUBCLASS:
3178 	case PCI_IVAR_PROGIF:
3179 	case PCI_IVAR_REVID:
3180 	case PCI_IVAR_IRQ:
3181 	case PCI_IVAR_BUS:
3182 	case PCI_IVAR_SLOT:
3183 	case PCI_IVAR_FUNCTION:
3184 		return (EINVAL);	/* disallow for now */
3185 
3186 	default:
3187 		return (ENOENT);
3188 	}
3189 }
3190 
3191 
3192 #include "opt_ddb.h"
3193 #ifdef DDB
3194 #include <ddb/ddb.h>
3195 #include <sys/cons.h>
3196 
3197 /*
3198  * List resources based on pci map registers, used for within ddb
3199  */
3200 
3201 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3202 {
3203 	struct pci_devinfo *dinfo;
3204 	struct devlist *devlist_head;
3205 	struct pci_conf *p;
3206 	const char *name;
3207 	int i, error, none_count;
3208 
3209 	none_count = 0;
3210 	/* get the head of the device queue */
3211 	devlist_head = &pci_devq;
3212 
3213 	/*
3214 	 * Go through the list of devices and print out devices
3215 	 */
3216 	for (error = 0, i = 0,
3217 	     dinfo = STAILQ_FIRST(devlist_head);
3218 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3219 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3220 
3221 		/* Populate pd_name and pd_unit */
3222 		name = NULL;
3223 		if (dinfo->cfg.dev)
3224 			name = device_get_name(dinfo->cfg.dev);
3225 
3226 		p = &dinfo->conf;
3227 		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
3228 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3229 			(name && *name) ? name : "none",
3230 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3231 			none_count++,
3232 			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3233 			p->pc_sel.pc_func, (p->pc_class << 16) |
3234 			(p->pc_subclass << 8) | p->pc_progif,
3235 			(p->pc_subdevice << 16) | p->pc_subvendor,
3236 			(p->pc_device << 16) | p->pc_vendor,
3237 			p->pc_revid, p->pc_hdr);
3238 	}
3239 }
3240 #endif /* DDB */
3241 
3242 static struct resource *
3243 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3244     u_long start, u_long end, u_long count, u_int flags)
3245 {
3246 	struct pci_devinfo *dinfo = device_get_ivars(child);
3247 	struct resource_list *rl = &dinfo->resources;
3248 	struct resource_list_entry *rle;
3249 	struct resource *res;
3250 	pci_addr_t map, testval;
3251 	int mapsize;
3252 
3253 	/*
3254 	 * Weed out the bogons, and figure out how large the BAR/map
3255 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3256 	 * Note: atapci in legacy mode are special and handled elsewhere
3257 	 * in the code.  If you have a atapci device in legacy mode and
3258 	 * it fails here, that other code is broken.
3259 	 */
3260 	res = NULL;
3261 	map = pci_read_config(child, *rid, 4);
3262 	pci_write_config(child, *rid, 0xffffffff, 4);
3263 	testval = pci_read_config(child, *rid, 4);
3264 	if (pci_maprange(testval) == 64)
3265 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3266 	if (pci_mapbase(testval) == 0)
3267 		goto out;
3268 
3269 	/*
3270 	 * Restore the original value of the BAR.  We may have reprogrammed
3271 	 * the BAR of the low-level console device and when booting verbose,
3272 	 * we need the console device addressable.
3273 	 */
3274 	pci_write_config(child, *rid, map, 4);
3275 
3276 	if (PCI_BAR_MEM(testval)) {
3277 		if (type != SYS_RES_MEMORY) {
3278 			if (bootverbose)
3279 				device_printf(dev,
3280 				    "child %s requested type %d for rid %#x,"
3281 				    " but the BAR says it is an memio\n",
3282 				    device_get_nameunit(child), type, *rid);
3283 			goto out;
3284 		}
3285 	} else {
3286 		if (type != SYS_RES_IOPORT) {
3287 			if (bootverbose)
3288 				device_printf(dev,
3289 				    "child %s requested type %d for rid %#x,"
3290 				    " but the BAR says it is an ioport\n",
3291 				    device_get_nameunit(child), type, *rid);
3292 			goto out;
3293 		}
3294 	}
3295 	/*
3296 	 * For real BARs, we need to override the size that
3297 	 * the driver requests, because that's what the BAR
3298 	 * actually uses and we would otherwise have a
3299 	 * situation where we might allocate the excess to
3300 	 * another driver, which won't work.
3301 	 */
3302 	mapsize = pci_mapsize(testval);
3303 	count = 1UL << mapsize;
3304 	if (RF_ALIGNMENT(flags) < mapsize)
3305 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3306 
3307 	/*
3308 	 * Allocate enough resource, and then write back the
3309 	 * appropriate bar for that resource.
3310 	 */
3311 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3312 	    start, end, count, flags);
3313 	if (res == NULL) {
3314 		device_printf(child,
3315 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3316 		    count, *rid, type, start, end);
3317 		goto out;
3318 	}
3319 	resource_list_add(rl, type, *rid, start, end, count);
3320 	rle = resource_list_find(rl, type, *rid);
3321 	if (rle == NULL)
3322 		panic("pci_alloc_map: unexpectedly can't find resource.");
3323 	rle->res = res;
3324 	rle->start = rman_get_start(res);
3325 	rle->end = rman_get_end(res);
3326 	rle->count = count;
3327 	if (bootverbose)
3328 		device_printf(child,
3329 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3330 		    count, *rid, type, rman_get_start(res));
3331 	map = rman_get_start(res);
3332 out:;
3333 	pci_write_config(child, *rid, map, 4);
3334 	if (pci_maprange(testval) == 64)
3335 		pci_write_config(child, *rid + 4, map >> 32, 4);
3336 	return (res);
3337 }
3338 
3339 
3340 struct resource *
3341 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3342 		   u_long start, u_long end, u_long count, u_int flags)
3343 {
3344 	struct pci_devinfo *dinfo = device_get_ivars(child);
3345 	struct resource_list *rl = &dinfo->resources;
3346 	struct resource_list_entry *rle;
3347 	pcicfgregs *cfg = &dinfo->cfg;
3348 
3349 	/*
3350 	 * Perform lazy resource allocation
3351 	 */
3352 	if (device_get_parent(child) == dev) {
3353 		switch (type) {
3354 		case SYS_RES_IRQ:
3355 			/*
3356 			 * Can't alloc legacy interrupt once MSI messages
3357 			 * have been allocated.
3358 			 */
3359 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3360 			    cfg->msix.msix_alloc > 0))
3361 				return (NULL);
3362 			/*
3363 			 * If the child device doesn't have an
3364 			 * interrupt routed and is deserving of an
3365 			 * interrupt, try to assign it one.
3366 			 */
3367 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3368 			    (cfg->intpin != 0))
3369 				pci_assign_interrupt(dev, child, 0);
3370 			break;
3371 		case SYS_RES_IOPORT:
3372 		case SYS_RES_MEMORY:
3373 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3374 				/*
3375 				 * Enable the I/O mode.  We should
3376 				 * also be assigning resources too
3377 				 * when none are present.  The
3378 				 * resource_list_alloc kind of sorta does
3379 				 * this...
3380 				 */
3381 				if (PCI_ENABLE_IO(dev, child, type))
3382 					return (NULL);
3383 			}
3384 			rle = resource_list_find(rl, type, *rid);
3385 			if (rle == NULL)
3386 				return (pci_alloc_map(dev, child, type, rid,
3387 				    start, end, count, flags));
3388 			break;
3389 		}
3390 		/*
3391 		 * If we've already allocated the resource, then
3392 		 * return it now.  But first we may need to activate
3393 		 * it, since we don't allocate the resource as active
3394 		 * above.  Normally this would be done down in the
3395 		 * nexus, but since we short-circuit that path we have
3396 		 * to do its job here.  Not sure if we should free the
3397 		 * resource if it fails to activate.
3398 		 */
3399 		rle = resource_list_find(rl, type, *rid);
3400 		if (rle != NULL && rle->res != NULL) {
3401 			if (bootverbose)
3402 				device_printf(child,
3403 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3404 				    rman_get_size(rle->res), *rid, type,
3405 				    rman_get_start(rle->res));
3406 			if ((flags & RF_ACTIVE) &&
3407 			    bus_generic_activate_resource(dev, child, type,
3408 			    *rid, rle->res) != 0)
3409 				return (NULL);
3410 			return (rle->res);
3411 		}
3412 	}
3413 	return (resource_list_alloc(rl, dev, child, type, rid,
3414 	    start, end, count, flags));
3415 }
3416 
3417 void
3418 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3419 {
3420 	struct pci_devinfo *dinfo;
3421 	struct resource_list *rl;
3422 	struct resource_list_entry *rle;
3423 
3424 	if (device_get_parent(child) != dev)
3425 		return;
3426 
3427 	dinfo = device_get_ivars(child);
3428 	rl = &dinfo->resources;
3429 	rle = resource_list_find(rl, type, rid);
3430 	if (rle) {
3431 		if (rle->res) {
3432 			if (rman_get_device(rle->res) != dev ||
3433 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3434 				device_printf(dev, "delete_resource: "
3435 				    "Resource still owned by child, oops. "
3436 				    "(type=%d, rid=%d, addr=%lx)\n",
3437 				    rle->type, rle->rid,
3438 				    rman_get_start(rle->res));
3439 				return;
3440 			}
3441 			bus_release_resource(dev, type, rid, rle->res);
3442 		}
3443 		resource_list_delete(rl, type, rid);
3444 	}
3445 	/*
3446 	 * Why do we turn off the PCI configuration BAR when we delete a
3447 	 * resource? -- imp
3448 	 */
3449 	pci_write_config(child, rid, 0, 4);
3450 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3451 }
3452 
3453 struct resource_list *
3454 pci_get_resource_list (device_t dev, device_t child)
3455 {
3456 	struct pci_devinfo *dinfo = device_get_ivars(child);
3457 
3458 	return (&dinfo->resources);
3459 }
3460 
3461 uint32_t
3462 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3463 {
3464 	struct pci_devinfo *dinfo = device_get_ivars(child);
3465 	pcicfgregs *cfg = &dinfo->cfg;
3466 
3467 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3468 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3469 }
3470 
3471 void
3472 pci_write_config_method(device_t dev, device_t child, int reg,
3473     uint32_t val, int width)
3474 {
3475 	struct pci_devinfo *dinfo = device_get_ivars(child);
3476 	pcicfgregs *cfg = &dinfo->cfg;
3477 
3478 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3479 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3480 }
3481 
3482 int
3483 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3484     size_t buflen)
3485 {
3486 
3487 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3488 	    pci_get_function(child));
3489 	return (0);
3490 }
3491 
3492 int
3493 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3494     size_t buflen)
3495 {
3496 	struct pci_devinfo *dinfo;
3497 	pcicfgregs *cfg;
3498 
3499 	dinfo = device_get_ivars(child);
3500 	cfg = &dinfo->cfg;
3501 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3502 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3503 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3504 	    cfg->progif);
3505 	return (0);
3506 }
3507 
3508 int
3509 pci_assign_interrupt_method(device_t dev, device_t child)
3510 {
3511 	struct pci_devinfo *dinfo = device_get_ivars(child);
3512 	pcicfgregs *cfg = &dinfo->cfg;
3513 
3514 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3515 	    cfg->intpin));
3516 }
3517 
3518 static int
3519 pci_modevent(module_t mod, int what, void *arg)
3520 {
3521 	static struct cdev *pci_cdev;
3522 
3523 	switch (what) {
3524 	case MOD_LOAD:
3525 		STAILQ_INIT(&pci_devq);
3526 		pci_generation = 0;
3527 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3528 		    "pci");
3529 		pci_load_vendor_data();
3530 		break;
3531 
3532 	case MOD_UNLOAD:
3533 		destroy_dev(pci_cdev);
3534 		break;
3535 	}
3536 
3537 	return (0);
3538 }
3539 
3540 void
3541 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3542 {
3543 	int i;
3544 
3545 	/*
3546 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3547 	 * which we know need special treatment.  Type 2 devices are
3548 	 * cardbus bridges which also require special treatment.
3549 	 * Other types are unknown, and we err on the side of safety
3550 	 * by ignoring them.
3551 	 */
3552 	if (dinfo->cfg.hdrtype != 0)
3553 		return;
3554 
3555 	/*
3556 	 * Restore the device to full power mode.  We must do this
3557 	 * before we restore the registers because moving from D3 to
3558 	 * D0 will cause the chip's BARs and some other registers to
3559 	 * be reset to some unknown power on reset values.  Cut down
3560 	 * the noise on boot by doing nothing if we are already in
3561 	 * state D0.
3562 	 */
3563 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3564 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3565 	}
3566 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3567 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3568 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3569 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3570 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3571 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3572 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3573 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3574 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3575 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3576 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3577 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3578 
3579 	/* Restore MSI and MSI-X configurations if they are present. */
3580 	if (dinfo->cfg.msi.msi_location != 0)
3581 		pci_resume_msi(dev);
3582 	if (dinfo->cfg.msix.msix_location != 0)
3583 		pci_resume_msix(dev);
3584 }
3585 
3586 void
3587 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3588 {
3589 	int i;
3590 	uint32_t cls;
3591 	int ps;
3592 
3593 	/*
3594 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3595 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3596 	 * which also require special treatment.  Other types are unknown, and
3597 	 * we err on the side of safety by ignoring them.  Powering down
3598 	 * bridges should not be undertaken lightly.
3599 	 */
3600 	if (dinfo->cfg.hdrtype != 0)
3601 		return;
3602 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3603 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3604 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3605 
3606 	/*
3607 	 * Some drivers apparently write to these registers w/o updating our
3608 	 * cached copy.  No harm happens if we update the copy, so do so here
3609 	 * so we can restore them.  The COMMAND register is modified by the
3610 	 * bus w/o updating the cache.  This should represent the normally
3611 	 * writable portion of the 'defined' part of type 0 headers.  In
3612 	 * theory we also need to save/restore the PCI capability structures
3613 	 * we know about, but apart from power we don't know any that are
3614 	 * writable.
3615 	 */
3616 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3617 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3618 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3619 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3620 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3621 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3622 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3623 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3624 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3625 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3626 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3627 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3628 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3629 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3630 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3631 
3632 	/*
3633 	 * don't set the state for display devices, base peripherals and
3634 	 * memory devices since bad things happen when they are powered down.
3635 	 * We should (a) have drivers that can easily detach and (b) use
3636 	 * generic drivers for these devices so that some device actually
3637 	 * attaches.  We need to make sure that when we implement (a) we don't
3638 	 * power the device down on a reattach.
3639 	 */
3640 	cls = pci_get_class(dev);
3641 	if (!setstate)
3642 		return;
3643 	switch (pci_do_power_nodriver)
3644 	{
3645 		case 0:		/* NO powerdown at all */
3646 			return;
3647 		case 1:		/* Conservative about what to power down */
3648 			if (cls == PCIC_STORAGE)
3649 				return;
3650 			/*FALLTHROUGH*/
3651 		case 2:		/* Agressive about what to power down */
3652 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3653 			    cls == PCIC_BASEPERIPH)
3654 				return;
3655 			/*FALLTHROUGH*/
3656 		case 3:		/* Power down everything */
3657 			break;
3658 	}
3659 	/*
3660 	 * PCI spec says we can only go into D3 state from D0 state.
3661 	 * Transition from D[12] into D0 before going to D3 state.
3662 	 */
3663 	ps = pci_get_powerstate(dev);
3664 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3665 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3666 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3667 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3668 }
3669