xref: /freebsd/sys/dev/pci/pci.c (revision 2b743a9e9ddc6736208dc8ca1ce06ce64ad20a19)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static int		pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg);
99 #if 0
100 static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static int		pci_msi_blacklisted(void);
105 
106 static device_method_t pci_methods[] = {
107 	/* Device interface */
108 	DEVMETHOD(device_probe,		pci_probe),
109 	DEVMETHOD(device_attach,	pci_attach),
110 	DEVMETHOD(device_detach,	bus_generic_detach),
111 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112 	DEVMETHOD(device_suspend,	pci_suspend),
113 	DEVMETHOD(device_resume,	pci_resume),
114 
115 	/* Bus interface */
116 	DEVMETHOD(bus_print_child,	pci_print_child),
117 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120 	DEVMETHOD(bus_driver_added,	pci_driver_added),
121 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122 	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123 
124 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134 
135 	/* PCI interface */
136 	DEVMETHOD(pci_read_config,	pci_read_config_method),
137 	DEVMETHOD(pci_write_config,	pci_write_config_method),
138 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154 
155 	{ 0, 0 }
156 };
157 
158 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159 
160 static devclass_t pci_devclass;
161 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162 MODULE_VERSION(pci, 1);
163 
164 static char	*pci_vendordata;
165 static size_t	pci_vendordata_size;
166 
167 
168 struct pci_quirk {
169 	uint32_t devid;	/* Vendor/device of the card */
170 	int	type;
171 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173 	int	arg1;
174 	int	arg2;
175 };
176 
177 struct pci_quirk pci_quirks[] = {
178 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183 
184 	/*
185 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187 	 */
188 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190 
191 	/*
192 	 * MSI doesn't work on earlier Intel chipsets including
193 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194 	 */
195 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 
203 	/*
204 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205 	 * bridge.
206 	 */
207 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 
209 	{ 0 }
210 };
211 
212 /* map register information */
213 #define	PCI_MAPMEM	0x01	/* memory map */
214 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215 #define	PCI_MAPPORT	0x04	/* port map */
216 
217 struct devlist pci_devq;
218 uint32_t pci_generation;
219 uint32_t pci_numdevs = 0;
220 static int pcie_chipset, pcix_chipset;
221 
222 /* sysctl vars */
223 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
224 
225 static int pci_enable_io_modes = 1;
226 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
227 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
228     &pci_enable_io_modes, 1,
229     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
230 enable these bits correctly.  We'd like to do this all the time, but there\n\
231 are some peripherals that this causes problems with.");
232 
233 static int pci_do_power_nodriver = 0;
234 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
235 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
236     &pci_do_power_nodriver, 0,
237   "Place a function into D3 state when no driver attaches to it.  0 means\n\
238 disable.  1 means conservatively place devices into D3 state.  2 means\n\
239 agressively place devices into D3 state.  3 means put absolutely everything\n\
240 in D3 state.");
241 
242 static int pci_do_power_resume = 1;
243 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
245     &pci_do_power_resume, 1,
246   "Transition from D3 -> D0 on resume.");
247 
248 static int pci_do_vpd = 1;
249 TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
250 SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
251     "Enable support for VPD.");
252 
253 static int pci_do_msi = 1;
254 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
255 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
256     "Enable support for MSI interrupts");
257 
258 static int pci_do_msix = 1;
259 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
260 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
261     "Enable support for MSI-X interrupts");
262 
263 static int pci_honor_msi_blacklist = 1;
264 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
265 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
266     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
267 
268 /* Find a device_t by bus/slot/function */
269 
270 device_t
271 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
272 {
273 	struct pci_devinfo *dinfo;
274 
275 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
276 		if ((dinfo->cfg.bus == bus) &&
277 		    (dinfo->cfg.slot == slot) &&
278 		    (dinfo->cfg.func == func)) {
279 			return (dinfo->cfg.dev);
280 		}
281 	}
282 
283 	return (NULL);
284 }
285 
286 /* Find a device_t by vendor/device ID */
287 
288 device_t
289 pci_find_device(uint16_t vendor, uint16_t device)
290 {
291 	struct pci_devinfo *dinfo;
292 
293 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
294 		if ((dinfo->cfg.vendor == vendor) &&
295 		    (dinfo->cfg.device == device)) {
296 			return (dinfo->cfg.dev);
297 		}
298 	}
299 
300 	return (NULL);
301 }
302 
303 /* return base address of memory or port map */
304 
305 static uint32_t
306 pci_mapbase(uint32_t mapreg)
307 {
308 	int mask = 0x03;
309 	if ((mapreg & 0x01) == 0)
310 		mask = 0x0f;
311 	return (mapreg & ~mask);
312 }
313 
314 /* return map type of memory or port map */
315 
316 static int
317 pci_maptype(unsigned mapreg)
318 {
319 	static uint8_t maptype[0x10] = {
320 		PCI_MAPMEM,		PCI_MAPPORT,
321 		PCI_MAPMEM,		0,
322 		PCI_MAPMEM,		PCI_MAPPORT,
323 		0,			0,
324 		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
325 		PCI_MAPMEM|PCI_MAPMEMP, 0,
326 		PCI_MAPMEM|PCI_MAPMEMP,	PCI_MAPPORT,
327 		0,			0,
328 	};
329 
330 	return maptype[mapreg & 0x0f];
331 }
332 
333 /* return log2 of map size decoded for memory or port map */
334 
335 static int
336 pci_mapsize(uint32_t testval)
337 {
338 	int ln2size;
339 
340 	testval = pci_mapbase(testval);
341 	ln2size = 0;
342 	if (testval != 0) {
343 		while ((testval & 1) == 0)
344 		{
345 			ln2size++;
346 			testval >>= 1;
347 		}
348 	}
349 	return (ln2size);
350 }
351 
352 /* return log2 of address range supported by map register */
353 
354 static int
355 pci_maprange(unsigned mapreg)
356 {
357 	int ln2range = 0;
358 	switch (mapreg & 0x07) {
359 	case 0x00:
360 	case 0x01:
361 	case 0x05:
362 		ln2range = 32;
363 		break;
364 	case 0x02:
365 		ln2range = 20;
366 		break;
367 	case 0x04:
368 		ln2range = 64;
369 		break;
370 	}
371 	return (ln2range);
372 }
373 
374 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
375 
376 static void
377 pci_fixancient(pcicfgregs *cfg)
378 {
379 	if (cfg->hdrtype != 0)
380 		return;
381 
382 	/* PCI to PCI bridges use header type 1 */
383 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
384 		cfg->hdrtype = 1;
385 }
386 
387 /* extract header type specific config data */
388 
389 static void
390 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
391 {
392 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
393 	switch (cfg->hdrtype) {
394 	case 0:
395 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
396 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
397 		cfg->nummaps	    = PCI_MAXMAPS_0;
398 		break;
399 	case 1:
400 		cfg->nummaps	    = PCI_MAXMAPS_1;
401 		break;
402 	case 2:
403 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
404 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
405 		cfg->nummaps	    = PCI_MAXMAPS_2;
406 		break;
407 	}
408 #undef REG
409 }
410 
411 /* read configuration header into pcicfgregs structure */
412 struct pci_devinfo *
413 pci_read_device(device_t pcib, int b, int s, int f, size_t size)
414 {
415 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
416 	pcicfgregs *cfg = NULL;
417 	struct pci_devinfo *devlist_entry;
418 	struct devlist *devlist_head;
419 
420 	devlist_head = &pci_devq;
421 
422 	devlist_entry = NULL;
423 
424 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
425 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
426 		if (devlist_entry == NULL)
427 			return (NULL);
428 
429 		cfg = &devlist_entry->cfg;
430 
431 		cfg->bus		= b;
432 		cfg->slot		= s;
433 		cfg->func		= f;
434 		cfg->vendor		= REG(PCIR_VENDOR, 2);
435 		cfg->device		= REG(PCIR_DEVICE, 2);
436 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
437 		cfg->statreg		= REG(PCIR_STATUS, 2);
438 		cfg->baseclass		= REG(PCIR_CLASS, 1);
439 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
440 		cfg->progif		= REG(PCIR_PROGIF, 1);
441 		cfg->revid		= REG(PCIR_REVID, 1);
442 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
443 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
444 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
445 		cfg->intpin		= REG(PCIR_INTPIN, 1);
446 		cfg->intline		= REG(PCIR_INTLINE, 1);
447 
448 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
449 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
450 
451 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
452 		cfg->hdrtype		&= ~PCIM_MFDEV;
453 
454 		pci_fixancient(cfg);
455 		pci_hdrtypedata(pcib, b, s, f, cfg);
456 
457 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
458 			pci_read_extcap(pcib, cfg);
459 
460 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
461 
462 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
463 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
464 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
465 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
466 
467 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
468 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
469 		devlist_entry->conf.pc_vendor = cfg->vendor;
470 		devlist_entry->conf.pc_device = cfg->device;
471 
472 		devlist_entry->conf.pc_class = cfg->baseclass;
473 		devlist_entry->conf.pc_subclass = cfg->subclass;
474 		devlist_entry->conf.pc_progif = cfg->progif;
475 		devlist_entry->conf.pc_revid = cfg->revid;
476 
477 		pci_numdevs++;
478 		pci_generation++;
479 	}
480 	return (devlist_entry);
481 #undef REG
482 }
483 
484 static void
485 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
486 {
487 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
488 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
489 #if defined(__i386__) || defined(__amd64__)
490 	uint64_t addr;
491 #endif
492 	uint32_t val;
493 	int	ptr, nextptr, ptrptr;
494 
495 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
496 	case 0:
497 	case 1:
498 		ptrptr = PCIR_CAP_PTR;
499 		break;
500 	case 2:
501 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
502 		break;
503 	default:
504 		return;		/* no extended capabilities support */
505 	}
506 	nextptr = REG(ptrptr, 1);	/* sanity check? */
507 
508 	/*
509 	 * Read capability entries.
510 	 */
511 	while (nextptr != 0) {
512 		/* Sanity check */
513 		if (nextptr > 255) {
514 			printf("illegal PCI extended capability offset %d\n",
515 			    nextptr);
516 			return;
517 		}
518 		/* Find the next entry */
519 		ptr = nextptr;
520 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
521 
522 		/* Process this entry */
523 		switch (REG(ptr + PCICAP_ID, 1)) {
524 		case PCIY_PMG:		/* PCI power management */
525 			if (cfg->pp.pp_cap == 0) {
526 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
527 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
528 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
529 				if ((nextptr - ptr) > PCIR_POWER_DATA)
530 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
531 			}
532 			break;
533 #if defined(__i386__) || defined(__amd64__)
534 		case PCIY_HT:		/* HyperTransport */
535 			/* Determine HT-specific capability type. */
536 			val = REG(ptr + PCIR_HT_COMMAND, 2);
537 			switch (val & PCIM_HTCMD_CAP_MASK) {
538 			case PCIM_HTCAP_MSI_MAPPING:
539 				/* Sanity check the mapping window. */
540 				addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
541 				addr <<= 32;
542 				addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
543 				if (addr != MSI_INTEL_ADDR_BASE)
544 					device_printf(pcib,
545 		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
546 					    cfg->bus, cfg->slot, cfg->func,
547 					    (long long)addr);
548 
549 				/* Enable MSI -> HT mapping. */
550 				val |= PCIM_HTCMD_MSI_ENABLE;
551 				WREG(ptr + PCIR_HT_COMMAND, val, 2);
552 				break;
553 			}
554 			break;
555 #endif
556 		case PCIY_MSI:		/* PCI MSI */
557 			cfg->msi.msi_location = ptr;
558 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
559 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
560 						     PCIM_MSICTRL_MMC_MASK)>>1);
561 			break;
562 		case PCIY_MSIX:		/* PCI MSI-X */
563 			cfg->msix.msix_location = ptr;
564 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
565 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
566 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
567 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
568 			cfg->msix.msix_table_bar = PCIR_BAR(val &
569 			    PCIM_MSIX_BIR_MASK);
570 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
571 			val = REG(ptr + PCIR_MSIX_PBA, 4);
572 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
573 			    PCIM_MSIX_BIR_MASK);
574 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
575 			break;
576 		case PCIY_VPD:		/* PCI Vital Product Data */
577 			if (pci_do_vpd) {
578 				cfg->vpd.vpd_reg = ptr;
579 				pci_read_vpd(pcib, cfg);
580 			}
581 			break;
582 		case PCIY_SUBVENDOR:
583 			/* Should always be true. */
584 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
585 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
586 				cfg->subvendor = val & 0xffff;
587 				cfg->subdevice = val >> 16;
588 			}
589 			break;
590 		case PCIY_PCIX:		/* PCI-X */
591 			/*
592 			 * Assume we have a PCI-X chipset if we have
593 			 * at least one PCI-PCI bridge with a PCI-X
594 			 * capability.  Note that some systems with
595 			 * PCI-express or HT chipsets might match on
596 			 * this check as well.
597 			 */
598 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
599 				pcix_chipset = 1;
600 			break;
601 		case PCIY_EXPRESS:	/* PCI-express */
602 			/*
603 			 * Assume we have a PCI-express chipset if we have
604 			 * at least one PCI-express root port.
605 			 */
606 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
607 			if ((val & PCIM_EXP_FLAGS_TYPE) ==
608 			    PCIM_EXP_TYPE_ROOT_PORT)
609 				pcie_chipset = 1;
610 			break;
611 		default:
612 			break;
613 		}
614 	}
615 /* REG and WREG use carry through to next functions */
616 }
617 
618 /*
619  * PCI Vital Product Data
620  */
621 static uint32_t
622 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
623 {
624 
625 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
626 
627 	WREG(cfg->vpd.vpd_reg + 2, reg, 2);
628 	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) != 0x8000)
629 		DELAY(1);	/* limit looping */
630 
631 	return REG(cfg->vpd.vpd_reg + 4, 4);
632 }
633 
634 #if 0
635 static void
636 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
637 {
638 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
639 
640 	WREG(cfg->vpd.vpd_reg + 4, data, 4);
641 	WREG(cfg->vpd.vpd_reg + 2, reg | 0x8000, 2);
642 	while ((REG(cfg->vpd.vpd_reg + 2, 2) & 0x8000) == 0x8000)
643 		DELAY(1);	/* limit looping */
644 
645 	return;
646 }
647 #endif
648 
649 struct vpd_readstate {
650 	device_t	pcib;
651 	pcicfgregs	*cfg;
652 	uint32_t	val;
653 	int		bytesinval;
654 	int		off;
655 	uint8_t		cksum;
656 };
657 
658 static uint8_t
659 vpd_nextbyte(struct vpd_readstate *vrs)
660 {
661 	uint8_t byte;
662 
663 	if (vrs->bytesinval == 0) {
664 		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
665 		    vrs->off));
666 		vrs->off += 4;
667 		byte = vrs->val & 0xff;
668 		vrs->bytesinval = 3;
669 	} else {
670 		vrs->val = vrs->val >> 8;
671 		byte = vrs->val & 0xff;
672 		vrs->bytesinval--;
673 	}
674 
675 	vrs->cksum += byte;
676 	return byte;
677 }
678 
679 static void
680 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
681 {
682 	struct vpd_readstate vrs;
683 	int state;
684 	int name;
685 	int remain;
686 	int end;
687 	int i;
688 	uint8_t byte;
689 	int alloc, off;		/* alloc/off for RO/W arrays */
690 	int cksumvalid;
691 	int dflen;
692 
693 	/* init vpd reader */
694 	vrs.bytesinval = 0;
695 	vrs.off = 0;
696 	vrs.pcib = pcib;
697 	vrs.cfg = cfg;
698 	vrs.cksum = 0;
699 
700 	state = 0;
701 	name = remain = i = 0;	/* shut up stupid gcc */
702 	alloc = off = 0;	/* shut up stupid gcc */
703 	dflen = 0;		/* shut up stupid gcc */
704 	end = 0;
705 	cksumvalid = -1;
706 	for (; !end;) {
707 		byte = vpd_nextbyte(&vrs);
708 #if 0
709 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
710 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
711 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
712 #endif
713 		switch (state) {
714 		case 0:		/* item name */
715 			if (byte & 0x80) {
716 				remain = vpd_nextbyte(&vrs);
717 				remain |= vpd_nextbyte(&vrs) << 8;
718 				if (remain > (0x7f*4 - vrs.off)) {
719 					end = 1;
720 					printf(
721 			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
722 					    cfg->bus, cfg->slot, cfg->func,
723 					    remain);
724 				}
725 				name = byte & 0x7f;
726 			} else {
727 				remain = byte & 0x7;
728 				name = (byte >> 3) & 0xf;
729 			}
730 			switch (name) {
731 			case 0x2:	/* String */
732 				cfg->vpd.vpd_ident = malloc(remain + 1,
733 				    M_DEVBUF, M_WAITOK);
734 				i = 0;
735 				state = 1;
736 				break;
737 			case 0xf:	/* End */
738 				end = 1;
739 				state = -1;
740 				break;
741 			case 0x10:	/* VPD-R */
742 				alloc = 8;
743 				off = 0;
744 				cfg->vpd.vpd_ros = malloc(alloc *
745 				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
746 				    M_WAITOK);
747 				state = 2;
748 				break;
749 			case 0x11:	/* VPD-W */
750 				alloc = 8;
751 				off = 0;
752 				cfg->vpd.vpd_w = malloc(alloc *
753 				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
754 				    M_WAITOK);
755 				state = 5;
756 				break;
757 			default:	/* Invalid data, abort */
758 				end = 1;
759 				continue;
760 			}
761 			break;
762 
763 		case 1:	/* Identifier String */
764 			cfg->vpd.vpd_ident[i++] = byte;
765 			remain--;
766 			if (remain == 0)  {
767 				cfg->vpd.vpd_ident[i] = '\0';
768 				state = 0;
769 			}
770 			break;
771 
772 		case 2:	/* VPD-R Keyword Header */
773 			if (off == alloc) {
774 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
775 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
776 				    M_DEVBUF, M_WAITOK);
777 			}
778 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
779 			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
780 			dflen = vpd_nextbyte(&vrs);
781 			if (dflen == 0 &&
782 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
783 			    2) == 0) {
784 				/*
785 				 * if this happens, we can't trust the rest
786 				 * of the VPD.
787 				 */
788 				printf("pci%d:%d:%d: bad keyword length: %d\n",
789 				    cfg->bus, cfg->slot, cfg->func, dflen);
790 				cksumvalid = 0;
791 				end = 1;
792 				break;
793 			} else if (dflen == 0) {
794 				cfg->vpd.vpd_ros[off].value = malloc(1 *
795 				    sizeof *cfg->vpd.vpd_ros[off].value,
796 				    M_DEVBUF, M_WAITOK);
797 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
798 			} else
799 				cfg->vpd.vpd_ros[off].value = malloc(
800 				    (dflen + 1) *
801 				    sizeof *cfg->vpd.vpd_ros[off].value,
802 				    M_DEVBUF, M_WAITOK);
803 			remain -= 3;
804 			i = 0;
805 			/* keep in sync w/ state 3's transistions */
806 			if (dflen == 0 && remain == 0)
807 				state = 0;
808 			else if (dflen == 0)
809 				state = 2;
810 			else
811 				state = 3;
812 			break;
813 
814 		case 3:	/* VPD-R Keyword Value */
815 			cfg->vpd.vpd_ros[off].value[i++] = byte;
816 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
817 			    "RV", 2) == 0 && cksumvalid == -1) {
818 				if (vrs.cksum == 0)
819 					cksumvalid = 1;
820 				else {
821 					printf(
822 				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
823 					    cfg->bus, cfg->slot, cfg->func,
824 					    vrs.cksum);
825 					cksumvalid = 0;
826 					end = 1;
827 					break;
828 				}
829 			}
830 			dflen--;
831 			remain--;
832 			/* keep in sync w/ state 2's transistions */
833 			if (dflen == 0)
834 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
835 			if (dflen == 0 && remain == 0) {
836 				cfg->vpd.vpd_rocnt = off;
837 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
838 				    off * sizeof *cfg->vpd.vpd_ros,
839 				    M_DEVBUF, M_WAITOK);
840 				state = 0;
841 			} else if (dflen == 0)
842 				state = 2;
843 			break;
844 
845 		case 4:
846 			remain--;
847 			if (remain == 0)
848 				state = 0;
849 			break;
850 
851 		case 5:	/* VPD-W Keyword Header */
852 			if (off == alloc) {
853 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
854 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
855 				    M_DEVBUF, M_WAITOK);
856 			}
857 			cfg->vpd.vpd_w[off].keyword[0] = byte;
858 			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
859 			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
860 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
861 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
862 			    sizeof *cfg->vpd.vpd_w[off].value,
863 			    M_DEVBUF, M_WAITOK);
864 			remain -= 3;
865 			i = 0;
866 			/* keep in sync w/ state 6's transistions */
867 			if (dflen == 0 && remain == 0)
868 				state = 0;
869 			else if (dflen == 0)
870 				state = 5;
871 			else
872 				state = 6;
873 			break;
874 
875 		case 6:	/* VPD-W Keyword Value */
876 			cfg->vpd.vpd_w[off].value[i++] = byte;
877 			dflen--;
878 			remain--;
879 			/* keep in sync w/ state 5's transistions */
880 			if (dflen == 0)
881 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
882 			if (dflen == 0 && remain == 0) {
883 				cfg->vpd.vpd_wcnt = off;
884 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
885 				    off * sizeof *cfg->vpd.vpd_w,
886 				    M_DEVBUF, M_WAITOK);
887 				state = 0;
888 			} else if (dflen == 0)
889 				state = 5;
890 			break;
891 
892 		default:
893 			printf("pci%d:%d:%d: invalid state: %d\n",
894 			    cfg->bus, cfg->slot, cfg->func, state);
895 			end = 1;
896 			break;
897 		}
898 	}
899 
900 	if (cksumvalid == 0) {
901 		/* read-only data bad, clean up */
902 		for (; off; off--)
903 			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
904 
905 		free(cfg->vpd.vpd_ros, M_DEVBUF);
906 		cfg->vpd.vpd_ros = NULL;
907 	}
908 #undef REG
909 #undef WREG
910 }
911 
912 int
913 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
914 {
915 	struct pci_devinfo *dinfo = device_get_ivars(child);
916 	pcicfgregs *cfg = &dinfo->cfg;
917 
918 	*identptr = cfg->vpd.vpd_ident;
919 
920 	if (*identptr == NULL)
921 		return ENXIO;
922 
923 	return 0;
924 }
925 
926 int
927 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
928 	const char **vptr)
929 {
930 	struct pci_devinfo *dinfo = device_get_ivars(child);
931 	pcicfgregs *cfg = &dinfo->cfg;
932 	int i;
933 
934 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
935 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
936 		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
937 			*vptr = cfg->vpd.vpd_ros[i].value;
938 		}
939 
940 	if (i != cfg->vpd.vpd_rocnt)
941 		return 0;
942 
943 	*vptr = NULL;
944 	return ENXIO;
945 }
946 
947 /*
948  * Return the offset in configuration space of the requested extended
949  * capability entry or 0 if the specified capability was not found.
950  */
951 int
952 pci_find_extcap_method(device_t dev, device_t child, int capability,
953     int *capreg)
954 {
955 	struct pci_devinfo *dinfo = device_get_ivars(child);
956 	pcicfgregs *cfg = &dinfo->cfg;
957 	u_int32_t status;
958 	u_int8_t ptr;
959 
960 	/*
961 	 * Check the CAP_LIST bit of the PCI status register first.
962 	 */
963 	status = pci_read_config(child, PCIR_STATUS, 2);
964 	if (!(status & PCIM_STATUS_CAPPRESENT))
965 		return (ENXIO);
966 
967 	/*
968 	 * Determine the start pointer of the capabilities list.
969 	 */
970 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
971 	case 0:
972 	case 1:
973 		ptr = PCIR_CAP_PTR;
974 		break;
975 	case 2:
976 		ptr = PCIR_CAP_PTR_2;
977 		break;
978 	default:
979 		/* XXX: panic? */
980 		return (ENXIO);		/* no extended capabilities support */
981 	}
982 	ptr = pci_read_config(child, ptr, 1);
983 
984 	/*
985 	 * Traverse the capabilities list.
986 	 */
987 	while (ptr != 0) {
988 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
989 			if (capreg != NULL)
990 				*capreg = ptr;
991 			return (0);
992 		}
993 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
994 	}
995 
996 	return (ENOENT);
997 }
998 
999 /*
1000  * Support for MSI-X message interrupts.
1001  */
1002 void
1003 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1004 {
1005 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1006 	pcicfgregs *cfg = &dinfo->cfg;
1007 	uint32_t offset;
1008 
1009 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1010 	offset = cfg->msix.msix_table_offset + index * 16;
1011 	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
1012 	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
1013 	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
1014 }
1015 
1016 void
1017 pci_mask_msix(device_t dev, u_int index)
1018 {
1019 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1020 	pcicfgregs *cfg = &dinfo->cfg;
1021 	uint32_t offset, val;
1022 
1023 	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
1024 	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1025 	val = bus_read_4(cfg->msix.msix_table_res, offset);
1026 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1027 		val |= PCIM_MSIX_VCTRL_MASK;
1028 		bus_write_4(cfg->msix.msix_table_res, offset, val);
1029 	}
1030 }
1031 
1032 void
1033 pci_unmask_msix(device_t dev, u_int index)
1034 {
1035 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1036 	pcicfgregs *cfg = &dinfo->cfg;
1037 	uint32_t offset, val;
1038 
1039 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1040 	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1041 	val = bus_read_4(cfg->msix.msix_table_res, offset);
1042 	if (val & PCIM_MSIX_VCTRL_MASK) {
1043 		val &= ~PCIM_MSIX_VCTRL_MASK;
1044 		bus_write_4(cfg->msix.msix_table_res, offset, val);
1045 	}
1046 }
1047 
1048 int
1049 pci_pending_msix(device_t dev, u_int index)
1050 {
1051 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1052 	pcicfgregs *cfg = &dinfo->cfg;
1053 	uint32_t offset, bit;
1054 
1055 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1056 	offset = cfg->msix.msix_pba_offset + (index / 4) * 4;
1057 	bit = 1 << index % 32;
1058 	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1059 }
1060 
1061 /*
1062  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1063  * returned in *count.  After this function returns, each message will be
1064  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1065  */
1066 int
1067 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1068 {
1069 	struct pci_devinfo *dinfo = device_get_ivars(child);
1070 	pcicfgregs *cfg = &dinfo->cfg;
1071 	struct resource_list_entry *rle;
1072 	int actual, error, i, irq, max;
1073 
1074 	/* Don't let count == 0 get us into trouble. */
1075 	if (*count == 0)
1076 		return (EINVAL);
1077 
1078 	/* If rid 0 is allocated, then fail. */
1079 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1080 	if (rle != NULL && rle->res != NULL)
1081 		return (ENXIO);
1082 
1083 	/* Already have allocated messages? */
1084 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1085 		return (ENXIO);
1086 
1087 	/* If MSI is blacklisted for this system, fail. */
1088 	if (pci_msi_blacklisted())
1089 		return (ENXIO);
1090 
1091 	/* MSI-X capability present? */
1092 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1093 		return (ENODEV);
1094 
1095 	/* Make sure the appropriate BARs are mapped. */
1096 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1097 	    cfg->msix.msix_table_bar);
1098 	if (rle == NULL || rle->res == NULL ||
1099 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1100 		return (ENXIO);
1101 	cfg->msix.msix_table_res = rle->res;
1102 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1103 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1104 		    cfg->msix.msix_pba_bar);
1105 		if (rle == NULL || rle->res == NULL ||
1106 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1107 			return (ENXIO);
1108 	}
1109 	cfg->msix.msix_pba_res = rle->res;
1110 
1111 	if (bootverbose)
1112 		device_printf(child,
1113 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1114 		    *count, cfg->msix.msix_msgnum);
1115 	max = min(*count, cfg->msix.msix_msgnum);
1116 	for (i = 0; i < max; i++) {
1117 		/* Allocate a message. */
1118 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1119 		    &irq);
1120 		if (error)
1121 			break;
1122 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1123 		    irq, 1);
1124 	}
1125 	actual = i;
1126 
1127 	if (bootverbose) {
1128 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1129 		if (actual == 1)
1130 			device_printf(child, "using IRQ %lu for MSI-X\n",
1131 			    rle->start);
1132 		else {
1133 			int run;
1134 
1135 			/*
1136 			 * Be fancy and try to print contiguous runs of
1137 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1138 			 * 'run' is true if we are in a range.
1139 			 */
1140 			device_printf(child, "using IRQs %lu", rle->start);
1141 			irq = rle->start;
1142 			run = 0;
1143 			for (i = 1; i < actual; i++) {
1144 				rle = resource_list_find(&dinfo->resources,
1145 				    SYS_RES_IRQ, i + 1);
1146 
1147 				/* Still in a run? */
1148 				if (rle->start == irq + 1) {
1149 					run = 1;
1150 					irq++;
1151 					continue;
1152 				}
1153 
1154 				/* Finish previous range. */
1155 				if (run) {
1156 					printf("-%d", irq);
1157 					run = 0;
1158 				}
1159 
1160 				/* Start new range. */
1161 				printf(",%lu", rle->start);
1162 				irq = rle->start;
1163 			}
1164 
1165 			/* Unfinished range? */
1166 			if (run)
1167 				printf("-%d", irq);
1168 			printf(" for MSI-X\n");
1169 		}
1170 	}
1171 
1172 	/* Mask all vectors. */
1173 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1174 		pci_mask_msix(child, i);
1175 
1176 	/* Update control register to enable MSI-X. */
1177 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1178 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1179 	    cfg->msix.msix_ctrl, 2);
1180 
1181 	/* Update counts of alloc'd messages. */
1182 	cfg->msix.msix_alloc = actual;
1183 	*count = actual;
1184 	return (0);
1185 }
1186 
1187 /*
1188  * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1189  * the first N messages in the MSI-X table.  However, device drivers may
1190  * want to use different layouts in the case that they do not allocate a
1191  * full table.  This method allows the driver to specify what layout it
1192  * wants.  It must be called after a successful pci_alloc_msix() but
1193  * before any of the associated SYS_RES_IRQ resources are allocated via
1194  * bus_alloc_resource().  The 'indices' array contains N (where N equals
1195  * the 'count' returned from pci_alloc_msix()) message indices.  The
1196  * indices are 1-based (meaning the first message is at index 1).  On
1197  * successful return, each of the messages in the 'indices' array will
1198  * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1199  * if indices contains { 2, 4 }, then upon successful return, the 'child'
1200  * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1201  */
1202 int
1203 pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1204 {
1205 	struct pci_devinfo *dinfo = device_get_ivars(child);
1206 	pcicfgregs *cfg = &dinfo->cfg;
1207 	struct resource_list_entry *rle;
1208 	int count, error, i, j, *irqs;
1209 
1210 	/* Sanity check the indices. */
1211 	for (i = 0; i < cfg->msix.msix_alloc; i++)
1212 		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1213 			return (EINVAL);
1214 
1215 	/* Check for duplicates. */
1216 	for (i = 0; i < cfg->msix.msix_alloc; i++)
1217 		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1218 			if (indices[i] == indices[j])
1219 				return (EINVAL);
1220 
1221 	/* Make sure none of the resources are allocated. */
1222 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1223 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1224 		if (rle == NULL)
1225 			continue;
1226 		if (rle->res != NULL)
1227 			return (EBUSY);
1228 		count++;
1229 	}
1230 
1231 	/* Save the IRQ values and free the existing resources. */
1232 	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1233 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1234 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1235 		if (rle == NULL)
1236 			continue;
1237 		irqs[count] = rle->start;
1238 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1239 		count++;
1240 	}
1241 
1242 	/* Map the IRQ values to the new message indices and rids. */
1243 	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1244 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1245 		    irqs[i], irqs[i], 1);
1246 
1247 		/*
1248 		 * The indices in the backend code (PCIB_* methods and the
1249 		 * MI helper routines for MD code such as pci_enable_msix())
1250 		 * are all zero-based.  However, the indices passed to this
1251 		 * function are 1-based so that the correspond 1:1 with the
1252 		 * SYS_RES_IRQ resource IDs.
1253 		 */
1254 		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1255 		    indices[i] - 1, irqs[i]);
1256 		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1257 	}
1258 	if (bootverbose) {
1259 		if (cfg->msix.msix_alloc == 1)
1260 			device_printf(child,
1261 			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1262 		else {
1263 			device_printf(child, "Remapped MSI-X IRQs to indices");
1264 			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1265 				printf(" %d,", indices[i]);
1266 			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1267 		}
1268 	}
1269 	free(irqs, M_TEMP);
1270 
1271 	return (0);
1272 }
1273 
1274 static int
1275 pci_release_msix(device_t dev, device_t child)
1276 {
1277 	struct pci_devinfo *dinfo = device_get_ivars(child);
1278 	pcicfgregs *cfg = &dinfo->cfg;
1279 	struct resource_list_entry *rle;
1280 	int count, i;
1281 
1282 	/* Do we have any messages to release? */
1283 	if (cfg->msix.msix_alloc == 0)
1284 		return (ENODEV);
1285 
1286 	/* Make sure none of the resources are allocated. */
1287 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1288 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1289 		if (rle == NULL)
1290 			continue;
1291 		if (rle->res != NULL)
1292 			return (EBUSY);
1293 		count++;
1294 	}
1295 
1296 	/* Update control register with to disable MSI-X. */
1297 	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1298 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1299 	    cfg->msix.msix_ctrl, 2);
1300 
1301 	/* Release the messages. */
1302 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1303 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1304 		if (rle == NULL)
1305 			continue;
1306 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1307 		    rle->start);
1308 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1309 		count++;
1310 	}
1311 
1312 	/* Update alloc count. */
1313 	cfg->msix.msix_alloc = 0;
1314 	return (0);
1315 }
1316 
1317 /*
1318  * Return the max supported MSI-X messages this device supports.
1319  * Basically, assuming the MD code can alloc messages, this function
1320  * should return the maximum value that pci_alloc_msix() can return.
1321  * Thus, it is subject to the tunables, etc.
1322  */
1323 int
1324 pci_msix_count_method(device_t dev, device_t child)
1325 {
1326 	struct pci_devinfo *dinfo = device_get_ivars(child);
1327 	pcicfgregs *cfg = &dinfo->cfg;
1328 
1329 	if (pci_do_msix && cfg->msix.msix_location != 0)
1330 		return (cfg->msix.msix_msgnum);
1331 	return (0);
1332 }
1333 
1334 /*
1335  * Support for MSI message signalled interrupts.
1336  */
1337 void
1338 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1339 {
1340 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1341 	pcicfgregs *cfg = &dinfo->cfg;
1342 
1343 	/* Write data and address values. */
1344 	cfg->msi.msi_addr = address;
1345 	cfg->msi.msi_data = data;
1346 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1347 	    address & 0xffffffff, 4);
1348 	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1349 		pci_write_config(dev, cfg->msi.msi_location +
1350 		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1351 		pci_write_config(dev, cfg->msi.msi_location +
1352 		    PCIR_MSI_DATA_64BIT, data, 2);
1353 	} else
1354 		pci_write_config(dev, cfg->msi.msi_location +
1355 		    PCIR_MSI_DATA, data, 2);
1356 
1357 	/* Enable MSI in the control register. */
1358 	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1359 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1360 	    cfg->msi.msi_ctrl, 2);
1361 }
1362 
1363 /*
1364  * Restore MSI registers during resume.  If MSI is enabled then
1365  * restore the data and address registers in addition to the control
1366  * register.
1367  */
1368 static void
1369 pci_resume_msi(device_t dev)
1370 {
1371 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1372 	pcicfgregs *cfg = &dinfo->cfg;
1373 	uint64_t address;
1374 	uint16_t data;
1375 
1376 	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1377 		address = cfg->msi.msi_addr;
1378 		data = cfg->msi.msi_data;
1379 		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1380 		    address & 0xffffffff, 4);
1381 		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1382 			pci_write_config(dev, cfg->msi.msi_location +
1383 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1384 			pci_write_config(dev, cfg->msi.msi_location +
1385 			    PCIR_MSI_DATA_64BIT, data, 2);
1386 		} else
1387 			pci_write_config(dev, cfg->msi.msi_location +
1388 			    PCIR_MSI_DATA, data, 2);
1389 	}
1390 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1391 	    cfg->msi.msi_ctrl, 2);
1392 }
1393 
1394 /*
1395  * Returns true if the specified device is blacklisted because MSI
1396  * doesn't work.
1397  */
1398 int
1399 pci_msi_device_blacklisted(device_t dev)
1400 {
1401 	struct pci_quirk *q;
1402 
1403 	if (!pci_honor_msi_blacklist)
1404 		return (0);
1405 
1406 	for (q = &pci_quirks[0]; q->devid; q++) {
1407 		if (q->devid == pci_get_devid(dev) &&
1408 		    q->type == PCI_QUIRK_DISABLE_MSI)
1409 			return (1);
1410 	}
1411 	return (0);
1412 }
1413 
1414 /*
1415  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1416  * we just check for blacklisted chipsets as represented by the
1417  * host-PCI bridge at device 0:0:0.  In the future, it may become
1418  * necessary to check other system attributes, such as the kenv values
1419  * that give the motherboard manufacturer and model number.
1420  */
1421 static int
1422 pci_msi_blacklisted(void)
1423 {
1424 	device_t dev;
1425 
1426 	if (!pci_honor_msi_blacklist)
1427 		return (0);
1428 
1429 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1430 	if (!(pcie_chipset || pcix_chipset))
1431 		return (1);
1432 
1433 	dev = pci_find_bsf(0, 0, 0);
1434 	if (dev != NULL)
1435 		return (pci_msi_device_blacklisted(dev));
1436 	return (0);
1437 }
1438 
1439 /*
1440  * Attempt to allocate *count MSI messages.  The actual number allocated is
1441  * returned in *count.  After this function returns, each message will be
1442  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1443  */
1444 int
1445 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1446 {
1447 	struct pci_devinfo *dinfo = device_get_ivars(child);
1448 	pcicfgregs *cfg = &dinfo->cfg;
1449 	struct resource_list_entry *rle;
1450 	int actual, error, i, irqs[32];
1451 	uint16_t ctrl;
1452 
1453 	/* Don't let count == 0 get us into trouble. */
1454 	if (*count == 0)
1455 		return (EINVAL);
1456 
1457 	/* If rid 0 is allocated, then fail. */
1458 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1459 	if (rle != NULL && rle->res != NULL)
1460 		return (ENXIO);
1461 
1462 	/* Already have allocated messages? */
1463 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1464 		return (ENXIO);
1465 
1466 	/* If MSI is blacklisted for this system, fail. */
1467 	if (pci_msi_blacklisted())
1468 		return (ENXIO);
1469 
1470 	/* MSI capability present? */
1471 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1472 		return (ENODEV);
1473 
1474 	if (bootverbose)
1475 		device_printf(child,
1476 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1477 		    *count, cfg->msi.msi_msgnum);
1478 
1479 	/* Don't ask for more than the device supports. */
1480 	actual = min(*count, cfg->msi.msi_msgnum);
1481 
1482 	/* Don't ask for more than 32 messages. */
1483 	actual = min(actual, 32);
1484 
1485 	/* MSI requires power of 2 number of messages. */
1486 	if (!powerof2(actual))
1487 		return (EINVAL);
1488 
1489 	for (;;) {
1490 		/* Try to allocate N messages. */
1491 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1492 		    cfg->msi.msi_msgnum, irqs);
1493 		if (error == 0)
1494 			break;
1495 		if (actual == 1)
1496 			return (error);
1497 
1498 		/* Try N / 2. */
1499 		actual >>= 1;
1500 	}
1501 
1502 	/*
1503 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1504 	 * resources in the irqs[] array, so add new resources
1505 	 * starting at rid 1.
1506 	 */
1507 	for (i = 0; i < actual; i++)
1508 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1509 		    irqs[i], irqs[i], 1);
1510 
1511 	if (bootverbose) {
1512 		if (actual == 1)
1513 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1514 		else {
1515 			int run;
1516 
1517 			/*
1518 			 * Be fancy and try to print contiguous runs
1519 			 * of IRQ values as ranges.  'run' is true if
1520 			 * we are in a range.
1521 			 */
1522 			device_printf(child, "using IRQs %d", irqs[0]);
1523 			run = 0;
1524 			for (i = 1; i < actual; i++) {
1525 
1526 				/* Still in a run? */
1527 				if (irqs[i] == irqs[i - 1] + 1) {
1528 					run = 1;
1529 					continue;
1530 				}
1531 
1532 				/* Finish previous range. */
1533 				if (run) {
1534 					printf("-%d", irqs[i - 1]);
1535 					run = 0;
1536 				}
1537 
1538 				/* Start new range. */
1539 				printf(",%d", irqs[i]);
1540 			}
1541 
1542 			/* Unfinished range? */
1543 			if (run)
1544 				printf("%d", irqs[actual - 1]);
1545 			printf(" for MSI\n");
1546 		}
1547 	}
1548 
1549 	/* Update control register with actual count and enable MSI. */
1550 	ctrl = cfg->msi.msi_ctrl;
1551 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1552 	ctrl |= (ffs(actual) - 1) << 4;
1553 	cfg->msi.msi_ctrl = ctrl;
1554 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1555 
1556 	/* Update counts of alloc'd messages. */
1557 	cfg->msi.msi_alloc = actual;
1558 	*count = actual;
1559 	return (0);
1560 }
1561 
1562 /* Release the MSI messages associated with this device. */
1563 int
1564 pci_release_msi_method(device_t dev, device_t child)
1565 {
1566 	struct pci_devinfo *dinfo = device_get_ivars(child);
1567 	pcicfgregs *cfg = &dinfo->cfg;
1568 	struct resource_list_entry *rle;
1569 	int error, i, irqs[32];
1570 
1571 	/* Try MSI-X first. */
1572 	error = pci_release_msix(dev, child);
1573 	if (error != ENODEV)
1574 		return (error);
1575 
1576 	/* Do we have any messages to release? */
1577 	if (cfg->msi.msi_alloc == 0)
1578 		return (ENODEV);
1579 	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1580 
1581 	/* Make sure none of the resources are allocated. */
1582 	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1583 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1584 		KASSERT(rle != NULL, ("missing MSI resource"));
1585 		if (rle->res != NULL)
1586 			return (EBUSY);
1587 		irqs[i] = rle->start;
1588 	}
1589 
1590 	/* Update control register with 0 count and disable MSI. */
1591 	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1592 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1593 	    cfg->msi.msi_ctrl, 2);
1594 
1595 	/* Release the messages. */
1596 	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1597 	    irqs);
1598 	for (i = 0; i < cfg->msi.msi_alloc; i++)
1599 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1600 
1601 	/* Update alloc count. */
1602 	cfg->msi.msi_alloc = 0;
1603 	return (0);
1604 }
1605 
1606 /*
1607  * Return the max supported MSI messages this device supports.
1608  * Basically, assuming the MD code can alloc messages, this function
1609  * should return the maximum value that pci_alloc_msi() can return.
1610  * Thus, it is subject to the tunables, etc.
1611  */
1612 int
1613 pci_msi_count_method(device_t dev, device_t child)
1614 {
1615 	struct pci_devinfo *dinfo = device_get_ivars(child);
1616 	pcicfgregs *cfg = &dinfo->cfg;
1617 
1618 	if (pci_do_msi && cfg->msi.msi_location != 0)
1619 		return (cfg->msi.msi_msgnum);
1620 	return (0);
1621 }
1622 
1623 /* free pcicfgregs structure and all depending data structures */
1624 
1625 int
1626 pci_freecfg(struct pci_devinfo *dinfo)
1627 {
1628 	struct devlist *devlist_head;
1629 	int i;
1630 
1631 	devlist_head = &pci_devq;
1632 
1633 	if (dinfo->cfg.vpd.vpd_reg) {
1634 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1635 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1636 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1637 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1638 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1639 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1640 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1641 	}
1642 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1643 	free(dinfo, M_DEVBUF);
1644 
1645 	/* increment the generation count */
1646 	pci_generation++;
1647 
1648 	/* we're losing one device */
1649 	pci_numdevs--;
1650 	return (0);
1651 }
1652 
1653 /*
1654  * PCI power manangement
1655  */
1656 int
1657 pci_set_powerstate_method(device_t dev, device_t child, int state)
1658 {
1659 	struct pci_devinfo *dinfo = device_get_ivars(child);
1660 	pcicfgregs *cfg = &dinfo->cfg;
1661 	uint16_t status;
1662 	int result, oldstate, highest, delay;
1663 
1664 	if (cfg->pp.pp_cap == 0)
1665 		return (EOPNOTSUPP);
1666 
1667 	/*
1668 	 * Optimize a no state change request away.  While it would be OK to
1669 	 * write to the hardware in theory, some devices have shown odd
1670 	 * behavior when going from D3 -> D3.
1671 	 */
1672 	oldstate = pci_get_powerstate(child);
1673 	if (oldstate == state)
1674 		return (0);
1675 
1676 	/*
1677 	 * The PCI power management specification states that after a state
1678 	 * transition between PCI power states, system software must
1679 	 * guarantee a minimal delay before the function accesses the device.
1680 	 * Compute the worst case delay that we need to guarantee before we
1681 	 * access the device.  Many devices will be responsive much more
1682 	 * quickly than this delay, but there are some that don't respond
1683 	 * instantly to state changes.  Transitions to/from D3 state require
1684 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1685 	 * is done below with DELAY rather than a sleeper function because
1686 	 * this function can be called from contexts where we cannot sleep.
1687 	 */
1688 	highest = (oldstate > state) ? oldstate : state;
1689 	if (highest == PCI_POWERSTATE_D3)
1690 	    delay = 10000;
1691 	else if (highest == PCI_POWERSTATE_D2)
1692 	    delay = 200;
1693 	else
1694 	    delay = 0;
1695 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1696 	    & ~PCIM_PSTAT_DMASK;
1697 	result = 0;
1698 	switch (state) {
1699 	case PCI_POWERSTATE_D0:
1700 		status |= PCIM_PSTAT_D0;
1701 		break;
1702 	case PCI_POWERSTATE_D1:
1703 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1704 			return (EOPNOTSUPP);
1705 		status |= PCIM_PSTAT_D1;
1706 		break;
1707 	case PCI_POWERSTATE_D2:
1708 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1709 			return (EOPNOTSUPP);
1710 		status |= PCIM_PSTAT_D2;
1711 		break;
1712 	case PCI_POWERSTATE_D3:
1713 		status |= PCIM_PSTAT_D3;
1714 		break;
1715 	default:
1716 		return (EINVAL);
1717 	}
1718 
1719 	if (bootverbose)
1720 		printf(
1721 		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1722 		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1723 		    oldstate, state);
1724 
1725 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1726 	if (delay)
1727 		DELAY(delay);
1728 	return (0);
1729 }
1730 
1731 int
1732 pci_get_powerstate_method(device_t dev, device_t child)
1733 {
1734 	struct pci_devinfo *dinfo = device_get_ivars(child);
1735 	pcicfgregs *cfg = &dinfo->cfg;
1736 	uint16_t status;
1737 	int result;
1738 
1739 	if (cfg->pp.pp_cap != 0) {
1740 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1741 		switch (status & PCIM_PSTAT_DMASK) {
1742 		case PCIM_PSTAT_D0:
1743 			result = PCI_POWERSTATE_D0;
1744 			break;
1745 		case PCIM_PSTAT_D1:
1746 			result = PCI_POWERSTATE_D1;
1747 			break;
1748 		case PCIM_PSTAT_D2:
1749 			result = PCI_POWERSTATE_D2;
1750 			break;
1751 		case PCIM_PSTAT_D3:
1752 			result = PCI_POWERSTATE_D3;
1753 			break;
1754 		default:
1755 			result = PCI_POWERSTATE_UNKNOWN;
1756 			break;
1757 		}
1758 	} else {
1759 		/* No support, device is always at D0 */
1760 		result = PCI_POWERSTATE_D0;
1761 	}
1762 	return (result);
1763 }
1764 
1765 /*
1766  * Some convenience functions for PCI device drivers.
1767  */
1768 
1769 static __inline void
1770 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1771 {
1772 	uint16_t	command;
1773 
1774 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1775 	command |= bit;
1776 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1777 }
1778 
1779 static __inline void
1780 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1781 {
1782 	uint16_t	command;
1783 
1784 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1785 	command &= ~bit;
1786 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1787 }
1788 
1789 int
1790 pci_enable_busmaster_method(device_t dev, device_t child)
1791 {
1792 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1793 	return (0);
1794 }
1795 
1796 int
1797 pci_disable_busmaster_method(device_t dev, device_t child)
1798 {
1799 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1800 	return (0);
1801 }
1802 
1803 int
1804 pci_enable_io_method(device_t dev, device_t child, int space)
1805 {
1806 	uint16_t command;
1807 	uint16_t bit;
1808 	char *error;
1809 
1810 	bit = 0;
1811 	error = NULL;
1812 
1813 	switch(space) {
1814 	case SYS_RES_IOPORT:
1815 		bit = PCIM_CMD_PORTEN;
1816 		error = "port";
1817 		break;
1818 	case SYS_RES_MEMORY:
1819 		bit = PCIM_CMD_MEMEN;
1820 		error = "memory";
1821 		break;
1822 	default:
1823 		return (EINVAL);
1824 	}
1825 	pci_set_command_bit(dev, child, bit);
1826 	/* Some devices seem to need a brief stall here, what do to? */
1827 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1828 	if (command & bit)
1829 		return (0);
1830 	device_printf(child, "failed to enable %s mapping!\n", error);
1831 	return (ENXIO);
1832 }
1833 
1834 int
1835 pci_disable_io_method(device_t dev, device_t child, int space)
1836 {
1837 	uint16_t command;
1838 	uint16_t bit;
1839 	char *error;
1840 
1841 	bit = 0;
1842 	error = NULL;
1843 
1844 	switch(space) {
1845 	case SYS_RES_IOPORT:
1846 		bit = PCIM_CMD_PORTEN;
1847 		error = "port";
1848 		break;
1849 	case SYS_RES_MEMORY:
1850 		bit = PCIM_CMD_MEMEN;
1851 		error = "memory";
1852 		break;
1853 	default:
1854 		return (EINVAL);
1855 	}
1856 	pci_clear_command_bit(dev, child, bit);
1857 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1858 	if (command & bit) {
1859 		device_printf(child, "failed to disable %s mapping!\n", error);
1860 		return (ENXIO);
1861 	}
1862 	return (0);
1863 }
1864 
1865 /*
1866  * New style pci driver.  Parent device is either a pci-host-bridge or a
1867  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1868  */
1869 
1870 void
1871 pci_print_verbose(struct pci_devinfo *dinfo)
1872 {
1873 	int i;
1874 
1875 	if (bootverbose) {
1876 		pcicfgregs *cfg = &dinfo->cfg;
1877 
1878 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1879 		    cfg->vendor, cfg->device, cfg->revid);
1880 		printf("\tbus=%d, slot=%d, func=%d\n",
1881 		    cfg->bus, cfg->slot, cfg->func);
1882 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1883 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1884 		    cfg->mfdev);
1885 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1886 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1887 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1888 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1889 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1890 		if (cfg->intpin > 0)
1891 			printf("\tintpin=%c, irq=%d\n",
1892 			    cfg->intpin +'a' -1, cfg->intline);
1893 		if (cfg->pp.pp_cap) {
1894 			uint16_t status;
1895 
1896 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1897 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1898 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1899 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1900 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1901 			    status & PCIM_PSTAT_DMASK);
1902 		}
1903 		if (cfg->vpd.vpd_reg) {
1904 			printf("\tVPD Ident: %s\n", cfg->vpd.vpd_ident);
1905 			for (i = 0; i < cfg->vpd.vpd_rocnt; i++) {
1906 				struct vpd_readonly *vrop;
1907 				vrop = &cfg->vpd.vpd_ros[i];
1908 				if (strncmp("CP", vrop->keyword, 2) == 0)
1909 					printf("\tCP: id %d, BAR%d, off %#x\n",
1910 					    vrop->value[0], vrop->value[1],
1911 					    le16toh(
1912 					      *(uint16_t *)&vrop->value[2]));
1913 				else if (strncmp("RV", vrop->keyword, 2) == 0)
1914 					printf("\tRV: %#hhx\n", vrop->value[0]);
1915 				else
1916 					printf("\t%.2s: %s\n", vrop->keyword,
1917 					    vrop->value);
1918 			}
1919 			for (i = 0; i < cfg->vpd.vpd_wcnt; i++) {
1920 				struct vpd_write *vwp;
1921 				vwp = &cfg->vpd.vpd_w[i];
1922 				if (strncmp("RW", vwp->keyword, 2) != 0)
1923 					printf("\t%.2s(%#x-%#x): %s\n",
1924 					    vwp->keyword, vwp->start,
1925 					    vwp->start + vwp->len, vwp->value);
1926 			}
1927 		}
1928 		if (cfg->msi.msi_location) {
1929 			int ctrl;
1930 
1931 			ctrl = cfg->msi.msi_ctrl;
1932 			printf("\tMSI supports %d message%s%s%s\n",
1933 			    cfg->msi.msi_msgnum,
1934 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1935 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1936 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1937 		}
1938 		if (cfg->msix.msix_location) {
1939 			printf("\tMSI-X supports %d message%s ",
1940 			    cfg->msix.msix_msgnum,
1941 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1942 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1943 				printf("in map 0x%x\n",
1944 				    cfg->msix.msix_table_bar);
1945 			else
1946 				printf("in maps 0x%x and 0x%x\n",
1947 				    cfg->msix.msix_table_bar,
1948 				    cfg->msix.msix_pba_bar);
1949 		}
1950 	}
1951 }
1952 
1953 static int
1954 pci_porten(device_t pcib, int b, int s, int f)
1955 {
1956 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1957 		& PCIM_CMD_PORTEN) != 0;
1958 }
1959 
1960 static int
1961 pci_memen(device_t pcib, int b, int s, int f)
1962 {
1963 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1964 		& PCIM_CMD_MEMEN) != 0;
1965 }
1966 
1967 /*
1968  * Add a resource based on a pci map register. Return 1 if the map
1969  * register is a 32bit map register or 2 if it is a 64bit register.
1970  */
1971 static int
1972 pci_add_map(device_t pcib, device_t bus, device_t dev,
1973     int b, int s, int f, int reg, struct resource_list *rl, int force,
1974     int prefetch)
1975 {
1976 	uint32_t map;
1977 	pci_addr_t base;
1978 	pci_addr_t start, end, count;
1979 	uint8_t ln2size;
1980 	uint8_t ln2range;
1981 	uint32_t testval;
1982 	uint16_t cmd;
1983 	int type;
1984 	int barlen;
1985 	struct resource *res;
1986 
1987 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1988 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1989 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1990 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1991 
1992 	if (pci_maptype(map) & PCI_MAPMEM)
1993 		type = SYS_RES_MEMORY;
1994 	else
1995 		type = SYS_RES_IOPORT;
1996 	ln2size = pci_mapsize(testval);
1997 	ln2range = pci_maprange(testval);
1998 	base = pci_mapbase(map);
1999 	barlen = ln2range == 64 ? 2 : 1;
2000 
2001 	/*
2002 	 * For I/O registers, if bottom bit is set, and the next bit up
2003 	 * isn't clear, we know we have a BAR that doesn't conform to the
2004 	 * spec, so ignore it.  Also, sanity check the size of the data
2005 	 * areas to the type of memory involved.  Memory must be at least
2006 	 * 16 bytes in size, while I/O ranges must be at least 4.
2007 	 */
2008 	if ((testval & 0x1) == 0x1 &&
2009 	    (testval & 0x2) != 0)
2010 		return (barlen);
2011 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2012 	    (type == SYS_RES_IOPORT && ln2size < 2))
2013 		return (barlen);
2014 
2015 	if (ln2range == 64)
2016 		/* Read the other half of a 64bit map register */
2017 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2018 	if (bootverbose) {
2019 		printf("\tmap[%02x]: type %x, range %2d, base %#jx, size %2d",
2020 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2021 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2022 			printf(", port disabled\n");
2023 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2024 			printf(", memory disabled\n");
2025 		else
2026 			printf(", enabled\n");
2027 	}
2028 
2029 	/*
2030 	 * If base is 0, then we have problems.  It is best to ignore
2031 	 * such entries for the moment.  These will be allocated later if
2032 	 * the driver specifically requests them.  However, some
2033 	 * removable busses look better when all resources are allocated,
2034 	 * so allow '0' to be overriden.
2035 	 *
2036 	 * Similarly treat maps whose values is the same as the test value
2037 	 * read back.  These maps have had all f's written to them by the
2038 	 * BIOS in an attempt to disable the resources.
2039 	 */
2040 	if (!force && (base == 0 || map == testval))
2041 		return (barlen);
2042 	if ((u_long)base != base) {
2043 		device_printf(bus,
2044 		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2045 		return (barlen);
2046 	}
2047 
2048 	/*
2049 	 * This code theoretically does the right thing, but has
2050 	 * undesirable side effects in some cases where peripherals
2051 	 * respond oddly to having these bits enabled.  Let the user
2052 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2053 	 * default).
2054 	 */
2055 	if (pci_enable_io_modes) {
2056 		/* Turn on resources that have been left off by a lazy BIOS */
2057 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2058 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2059 			cmd |= PCIM_CMD_PORTEN;
2060 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2061 		}
2062 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2063 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2064 			cmd |= PCIM_CMD_MEMEN;
2065 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2066 		}
2067 	} else {
2068 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2069 			return (barlen);
2070 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2071 			return (barlen);
2072 	}
2073 
2074 	count = 1 << ln2size;
2075 	if (base == 0 || base == pci_mapbase(testval)) {
2076 		start = 0;	/* Let the parent deside */
2077 		end = ~0ULL;
2078 	} else {
2079 		start = base;
2080 		end = base + (1 << ln2size) - 1;
2081 	}
2082 	resource_list_add(rl, type, reg, start, end, count);
2083 
2084 	/*
2085 	 * Not quite sure what to do on failure of allocating the resource
2086 	 * since I can postulate several right answers.
2087 	 */
2088 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2089 	    prefetch ? RF_PREFETCHABLE : 0);
2090 	if (res == NULL)
2091 		return (barlen);
2092 	start = rman_get_start(res);
2093 	if ((u_long)start != start) {
2094 		/* Wait a minute!  this platform can't do this address. */
2095 		device_printf(bus,
2096 		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2097 		    b, s, f, reg, (uintmax_t)start);
2098 		resource_list_release(rl, bus, dev, type, reg, res);
2099 		return (barlen);
2100 	}
2101 	pci_write_config(dev, reg, start, 4);
2102 	if (ln2range == 64)
2103 		pci_write_config(dev, reg + 4, start >> 32, 4);
2104 	return (barlen);
2105 }
2106 
2107 /*
2108  * For ATA devices we need to decide early what addressing mode to use.
2109  * Legacy demands that the primary and secondary ATA ports sits on the
2110  * same addresses that old ISA hardware did. This dictates that we use
2111  * those addresses and ignore the BAR's if we cannot set PCI native
2112  * addressing mode.
2113  */
2114 static void
2115 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2116     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2117 {
2118 	int rid, type, progif;
2119 #if 0
2120 	/* if this device supports PCI native addressing use it */
2121 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2122 	if ((progif & 0x8a) == 0x8a) {
2123 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2124 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2125 			printf("Trying ATA native PCI addressing mode\n");
2126 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2127 		}
2128 	}
2129 #endif
2130 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2131 	type = SYS_RES_IOPORT;
2132 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2133 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2134 		    prefetchmask & (1 << 0));
2135 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2136 		    prefetchmask & (1 << 1));
2137 	} else {
2138 		rid = PCIR_BAR(0);
2139 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2140 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2141 		    0);
2142 		rid = PCIR_BAR(1);
2143 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2144 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2145 		    0);
2146 	}
2147 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2148 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2149 		    prefetchmask & (1 << 2));
2150 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2151 		    prefetchmask & (1 << 3));
2152 	} else {
2153 		rid = PCIR_BAR(2);
2154 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2155 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2156 		    0);
2157 		rid = PCIR_BAR(3);
2158 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2159 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2160 		    0);
2161 	}
2162 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2163 	    prefetchmask & (1 << 4));
2164 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2165 	    prefetchmask & (1 << 5));
2166 }
2167 
2168 static void
2169 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2170 {
2171 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2172 	pcicfgregs *cfg = &dinfo->cfg;
2173 	char tunable_name[64];
2174 	int irq;
2175 
2176 	/* Has to have an intpin to have an interrupt. */
2177 	if (cfg->intpin == 0)
2178 		return;
2179 
2180 	/* Let the user override the IRQ with a tunable. */
2181 	irq = PCI_INVALID_IRQ;
2182 	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2183 	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2184 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2185 		irq = PCI_INVALID_IRQ;
2186 
2187 	/*
2188 	 * If we didn't get an IRQ via the tunable, then we either use the
2189 	 * IRQ value in the intline register or we ask the bus to route an
2190 	 * interrupt for us.  If force_route is true, then we only use the
2191 	 * value in the intline register if the bus was unable to assign an
2192 	 * IRQ.
2193 	 */
2194 	if (!PCI_INTERRUPT_VALID(irq)) {
2195 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2196 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2197 		if (!PCI_INTERRUPT_VALID(irq))
2198 			irq = cfg->intline;
2199 	}
2200 
2201 	/* If after all that we don't have an IRQ, just bail. */
2202 	if (!PCI_INTERRUPT_VALID(irq))
2203 		return;
2204 
2205 	/* Update the config register if it changed. */
2206 	if (irq != cfg->intline) {
2207 		cfg->intline = irq;
2208 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2209 	}
2210 
2211 	/* Add this IRQ as rid 0 interrupt resource. */
2212 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2213 }
2214 
2215 void
2216 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2217 {
2218 	device_t pcib;
2219 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2220 	pcicfgregs *cfg = &dinfo->cfg;
2221 	struct resource_list *rl = &dinfo->resources;
2222 	struct pci_quirk *q;
2223 	int b, i, f, s;
2224 
2225 	pcib = device_get_parent(bus);
2226 
2227 	b = cfg->bus;
2228 	s = cfg->slot;
2229 	f = cfg->func;
2230 
2231 	/* ATA devices needs special map treatment */
2232 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2233 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2234 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2235 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2236 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2237 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2238 	else
2239 		for (i = 0; i < cfg->nummaps;)
2240 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2241 			    rl, force, prefetchmask & (1 << i));
2242 
2243 	/*
2244 	 * Add additional, quirked resources.
2245 	 */
2246 	for (q = &pci_quirks[0]; q->devid; q++) {
2247 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2248 		    && q->type == PCI_QUIRK_MAP_REG)
2249 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2250 			  force, 0);
2251 	}
2252 
2253 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2254 #ifdef __PCI_REROUTE_INTERRUPT
2255 		/*
2256 		 * Try to re-route interrupts. Sometimes the BIOS or
2257 		 * firmware may leave bogus values in these registers.
2258 		 * If the re-route fails, then just stick with what we
2259 		 * have.
2260 		 */
2261 		pci_assign_interrupt(bus, dev, 1);
2262 #else
2263 		pci_assign_interrupt(bus, dev, 0);
2264 #endif
2265 	}
2266 }
2267 
2268 void
2269 pci_add_children(device_t dev, int busno, size_t dinfo_size)
2270 {
2271 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2272 	device_t pcib = device_get_parent(dev);
2273 	struct pci_devinfo *dinfo;
2274 	int maxslots;
2275 	int s, f, pcifunchigh;
2276 	uint8_t hdrtype;
2277 
2278 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2279 	    ("dinfo_size too small"));
2280 	maxslots = PCIB_MAXSLOTS(pcib);
2281 	for (s = 0; s <= maxslots; s++) {
2282 		pcifunchigh = 0;
2283 		f = 0;
2284 		DELAY(1);
2285 		hdrtype = REG(PCIR_HDRTYPE, 1);
2286 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2287 			continue;
2288 		if (hdrtype & PCIM_MFDEV)
2289 			pcifunchigh = PCI_FUNCMAX;
2290 		for (f = 0; f <= pcifunchigh; f++) {
2291 			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2292 			if (dinfo != NULL) {
2293 				pci_add_child(dev, dinfo);
2294 			}
2295 		}
2296 	}
2297 #undef REG
2298 }
2299 
2300 void
2301 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2302 {
2303 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2304 	device_set_ivars(dinfo->cfg.dev, dinfo);
2305 	resource_list_init(&dinfo->resources);
2306 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2307 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2308 	pci_print_verbose(dinfo);
2309 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2310 }
2311 
2312 static int
2313 pci_probe(device_t dev)
2314 {
2315 
2316 	device_set_desc(dev, "PCI bus");
2317 
2318 	/* Allow other subclasses to override this driver. */
2319 	return (-1000);
2320 }
2321 
2322 static int
2323 pci_attach(device_t dev)
2324 {
2325 	int busno;
2326 
2327 	/*
2328 	 * Since there can be multiple independantly numbered PCI
2329 	 * busses on systems with multiple PCI domains, we can't use
2330 	 * the unit number to decide which bus we are probing. We ask
2331 	 * the parent pcib what our bus number is.
2332 	 */
2333 	busno = pcib_get_bus(dev);
2334 	if (bootverbose)
2335 		device_printf(dev, "physical bus=%d\n", busno);
2336 
2337 	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2338 
2339 	return (bus_generic_attach(dev));
2340 }
2341 
2342 int
2343 pci_suspend(device_t dev)
2344 {
2345 	int dstate, error, i, numdevs;
2346 	device_t acpi_dev, child, *devlist;
2347 	struct pci_devinfo *dinfo;
2348 
2349 	/*
2350 	 * Save the PCI configuration space for each child and set the
2351 	 * device in the appropriate power state for this sleep state.
2352 	 */
2353 	acpi_dev = NULL;
2354 	if (pci_do_power_resume)
2355 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2356 	device_get_children(dev, &devlist, &numdevs);
2357 	for (i = 0; i < numdevs; i++) {
2358 		child = devlist[i];
2359 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2360 		pci_cfg_save(child, dinfo, 0);
2361 	}
2362 
2363 	/* Suspend devices before potentially powering them down. */
2364 	error = bus_generic_suspend(dev);
2365 	if (error) {
2366 		free(devlist, M_TEMP);
2367 		return (error);
2368 	}
2369 
2370 	/*
2371 	 * Always set the device to D3.  If ACPI suggests a different
2372 	 * power state, use it instead.  If ACPI is not present, the
2373 	 * firmware is responsible for managing device power.  Skip
2374 	 * children who aren't attached since they are powered down
2375 	 * separately.  Only manage type 0 devices for now.
2376 	 */
2377 	for (i = 0; acpi_dev && i < numdevs; i++) {
2378 		child = devlist[i];
2379 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2380 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2381 			dstate = PCI_POWERSTATE_D3;
2382 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2383 			pci_set_powerstate(child, dstate);
2384 		}
2385 	}
2386 	free(devlist, M_TEMP);
2387 	return (0);
2388 }
2389 
2390 int
2391 pci_resume(device_t dev)
2392 {
2393 	int i, numdevs;
2394 	device_t acpi_dev, child, *devlist;
2395 	struct pci_devinfo *dinfo;
2396 
2397 	/*
2398 	 * Set each child to D0 and restore its PCI configuration space.
2399 	 */
2400 	acpi_dev = NULL;
2401 	if (pci_do_power_resume)
2402 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2403 	device_get_children(dev, &devlist, &numdevs);
2404 	for (i = 0; i < numdevs; i++) {
2405 		/*
2406 		 * Notify ACPI we're going to D0 but ignore the result.  If
2407 		 * ACPI is not present, the firmware is responsible for
2408 		 * managing device power.  Only manage type 0 devices for now.
2409 		 */
2410 		child = devlist[i];
2411 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2412 		if (acpi_dev && device_is_attached(child) &&
2413 		    dinfo->cfg.hdrtype == 0) {
2414 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2415 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2416 		}
2417 
2418 		/* Now the device is powered up, restore its config space. */
2419 		pci_cfg_restore(child, dinfo);
2420 	}
2421 	free(devlist, M_TEMP);
2422 	return (bus_generic_resume(dev));
2423 }
2424 
2425 static void
2426 pci_load_vendor_data(void)
2427 {
2428 	caddr_t vendordata, info;
2429 
2430 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2431 		info = preload_search_info(vendordata, MODINFO_ADDR);
2432 		pci_vendordata = *(char **)info;
2433 		info = preload_search_info(vendordata, MODINFO_SIZE);
2434 		pci_vendordata_size = *(size_t *)info;
2435 		/* terminate the database */
2436 		pci_vendordata[pci_vendordata_size] = '\n';
2437 	}
2438 }
2439 
2440 void
2441 pci_driver_added(device_t dev, driver_t *driver)
2442 {
2443 	int numdevs;
2444 	device_t *devlist;
2445 	device_t child;
2446 	struct pci_devinfo *dinfo;
2447 	int i;
2448 
2449 	if (bootverbose)
2450 		device_printf(dev, "driver added\n");
2451 	DEVICE_IDENTIFY(driver, dev);
2452 	device_get_children(dev, &devlist, &numdevs);
2453 	for (i = 0; i < numdevs; i++) {
2454 		child = devlist[i];
2455 		if (device_get_state(child) != DS_NOTPRESENT)
2456 			continue;
2457 		dinfo = device_get_ivars(child);
2458 		pci_print_verbose(dinfo);
2459 		if (bootverbose)
2460 			printf("pci%d:%d:%d: reprobing on driver added\n",
2461 			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2462 		pci_cfg_restore(child, dinfo);
2463 		if (device_probe_and_attach(child) != 0)
2464 			pci_cfg_save(child, dinfo, 1);
2465 	}
2466 	free(devlist, M_TEMP);
2467 }
2468 
2469 int
2470 pci_print_child(device_t dev, device_t child)
2471 {
2472 	struct pci_devinfo *dinfo;
2473 	struct resource_list *rl;
2474 	int retval = 0;
2475 
2476 	dinfo = device_get_ivars(child);
2477 	rl = &dinfo->resources;
2478 
2479 	retval += bus_print_child_header(dev, child);
2480 
2481 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2482 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2483 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2484 	if (device_get_flags(dev))
2485 		retval += printf(" flags %#x", device_get_flags(dev));
2486 
2487 	retval += printf(" at device %d.%d", pci_get_slot(child),
2488 	    pci_get_function(child));
2489 
2490 	retval += bus_print_child_footer(dev, child);
2491 
2492 	return (retval);
2493 }
2494 
2495 static struct
2496 {
2497 	int	class;
2498 	int	subclass;
2499 	char	*desc;
2500 } pci_nomatch_tab[] = {
2501 	{PCIC_OLD,		-1,			"old"},
2502 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2503 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2504 	{PCIC_STORAGE,		-1,			"mass storage"},
2505 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2506 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2507 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2508 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2509 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2510 	{PCIC_NETWORK,		-1,			"network"},
2511 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2512 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2513 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2514 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2515 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2516 	{PCIC_DISPLAY,		-1,			"display"},
2517 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2518 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2519 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2520 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2521 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2522 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2523 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2524 	{PCIC_MEMORY,		-1,			"memory"},
2525 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2526 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2527 	{PCIC_BRIDGE,		-1,			"bridge"},
2528 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2529 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2530 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2531 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2532 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2533 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2534 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2535 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2536 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2537 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2538 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2539 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2540 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2541 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2542 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2543 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2544 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2545 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2546 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2547 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2548 	{PCIC_INPUTDEV,		-1,			"input device"},
2549 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2550 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2551 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2552 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2553 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2554 	{PCIC_DOCKING,		-1,			"docking station"},
2555 	{PCIC_PROCESSOR,	-1,			"processor"},
2556 	{PCIC_SERIALBUS,	-1,			"serial bus"},
2557 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2558 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2559 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2560 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2561 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2562 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2563 	{PCIC_WIRELESS,		-1,			"wireless controller"},
2564 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2565 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2566 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2567 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2568 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2569 	{PCIC_SATCOM,		-1,			"satellite communication"},
2570 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2571 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2572 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2573 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2574 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2575 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2576 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2577 	{PCIC_DASP,		-1,			"dasp"},
2578 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2579 	{0, 0,		NULL}
2580 };
2581 
2582 void
2583 pci_probe_nomatch(device_t dev, device_t child)
2584 {
2585 	int	i;
2586 	char	*cp, *scp, *device;
2587 
2588 	/*
2589 	 * Look for a listing for this device in a loaded device database.
2590 	 */
2591 	if ((device = pci_describe_device(child)) != NULL) {
2592 		device_printf(dev, "<%s>", device);
2593 		free(device, M_DEVBUF);
2594 	} else {
2595 		/*
2596 		 * Scan the class/subclass descriptions for a general
2597 		 * description.
2598 		 */
2599 		cp = "unknown";
2600 		scp = NULL;
2601 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2602 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2603 				if (pci_nomatch_tab[i].subclass == -1) {
2604 					cp = pci_nomatch_tab[i].desc;
2605 				} else if (pci_nomatch_tab[i].subclass ==
2606 				    pci_get_subclass(child)) {
2607 					scp = pci_nomatch_tab[i].desc;
2608 				}
2609 			}
2610 		}
2611 		device_printf(dev, "<%s%s%s>",
2612 		    cp ? cp : "",
2613 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2614 		    scp ? scp : "");
2615 	}
2616 	printf(" at device %d.%d (no driver attached)\n",
2617 	    pci_get_slot(child), pci_get_function(child));
2618 	if (pci_do_power_nodriver)
2619 		pci_cfg_save(child,
2620 		    (struct pci_devinfo *) device_get_ivars(child), 1);
2621 	return;
2622 }
2623 
2624 /*
2625  * Parse the PCI device database, if loaded, and return a pointer to a
2626  * description of the device.
2627  *
2628  * The database is flat text formatted as follows:
2629  *
2630  * Any line not in a valid format is ignored.
2631  * Lines are terminated with newline '\n' characters.
2632  *
2633  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2634  * the vendor name.
2635  *
2636  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2637  * - devices cannot be listed without a corresponding VENDOR line.
2638  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2639  * another TAB, then the device name.
2640  */
2641 
2642 /*
2643  * Assuming (ptr) points to the beginning of a line in the database,
2644  * return the vendor or device and description of the next entry.
2645  * The value of (vendor) or (device) inappropriate for the entry type
2646  * is set to -1.  Returns nonzero at the end of the database.
2647  *
2648  * Note that this is slightly unrobust in the face of corrupt data;
2649  * we attempt to safeguard against this by spamming the end of the
2650  * database with a newline when we initialise.
2651  */
2652 static int
2653 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2654 {
2655 	char	*cp = *ptr;
2656 	int	left;
2657 
2658 	*device = -1;
2659 	*vendor = -1;
2660 	**desc = '\0';
2661 	for (;;) {
2662 		left = pci_vendordata_size - (cp - pci_vendordata);
2663 		if (left <= 0) {
2664 			*ptr = cp;
2665 			return(1);
2666 		}
2667 
2668 		/* vendor entry? */
2669 		if (*cp != '\t' &&
2670 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2671 			break;
2672 		/* device entry? */
2673 		if (*cp == '\t' &&
2674 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2675 			break;
2676 
2677 		/* skip to next line */
2678 		while (*cp != '\n' && left > 0) {
2679 			cp++;
2680 			left--;
2681 		}
2682 		if (*cp == '\n') {
2683 			cp++;
2684 			left--;
2685 		}
2686 	}
2687 	/* skip to next line */
2688 	while (*cp != '\n' && left > 0) {
2689 		cp++;
2690 		left--;
2691 	}
2692 	if (*cp == '\n' && left > 0)
2693 		cp++;
2694 	*ptr = cp;
2695 	return(0);
2696 }
2697 
2698 static char *
2699 pci_describe_device(device_t dev)
2700 {
2701 	int	vendor, device;
2702 	char	*desc, *vp, *dp, *line;
2703 
2704 	desc = vp = dp = NULL;
2705 
2706 	/*
2707 	 * If we have no vendor data, we can't do anything.
2708 	 */
2709 	if (pci_vendordata == NULL)
2710 		goto out;
2711 
2712 	/*
2713 	 * Scan the vendor data looking for this device
2714 	 */
2715 	line = pci_vendordata;
2716 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2717 		goto out;
2718 	for (;;) {
2719 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2720 			goto out;
2721 		if (vendor == pci_get_vendor(dev))
2722 			break;
2723 	}
2724 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2725 		goto out;
2726 	for (;;) {
2727 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2728 			*dp = 0;
2729 			break;
2730 		}
2731 		if (vendor != -1) {
2732 			*dp = 0;
2733 			break;
2734 		}
2735 		if (device == pci_get_device(dev))
2736 			break;
2737 	}
2738 	if (dp[0] == '\0')
2739 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2740 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2741 	    NULL)
2742 		sprintf(desc, "%s, %s", vp, dp);
2743  out:
2744 	if (vp != NULL)
2745 		free(vp, M_DEVBUF);
2746 	if (dp != NULL)
2747 		free(dp, M_DEVBUF);
2748 	return(desc);
2749 }
2750 
2751 int
2752 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2753 {
2754 	struct pci_devinfo *dinfo;
2755 	pcicfgregs *cfg;
2756 
2757 	dinfo = device_get_ivars(child);
2758 	cfg = &dinfo->cfg;
2759 
2760 	switch (which) {
2761 	case PCI_IVAR_ETHADDR:
2762 		/*
2763 		 * The generic accessor doesn't deal with failure, so
2764 		 * we set the return value, then return an error.
2765 		 */
2766 		*((uint8_t **) result) = NULL;
2767 		return (EINVAL);
2768 	case PCI_IVAR_SUBVENDOR:
2769 		*result = cfg->subvendor;
2770 		break;
2771 	case PCI_IVAR_SUBDEVICE:
2772 		*result = cfg->subdevice;
2773 		break;
2774 	case PCI_IVAR_VENDOR:
2775 		*result = cfg->vendor;
2776 		break;
2777 	case PCI_IVAR_DEVICE:
2778 		*result = cfg->device;
2779 		break;
2780 	case PCI_IVAR_DEVID:
2781 		*result = (cfg->device << 16) | cfg->vendor;
2782 		break;
2783 	case PCI_IVAR_CLASS:
2784 		*result = cfg->baseclass;
2785 		break;
2786 	case PCI_IVAR_SUBCLASS:
2787 		*result = cfg->subclass;
2788 		break;
2789 	case PCI_IVAR_PROGIF:
2790 		*result = cfg->progif;
2791 		break;
2792 	case PCI_IVAR_REVID:
2793 		*result = cfg->revid;
2794 		break;
2795 	case PCI_IVAR_INTPIN:
2796 		*result = cfg->intpin;
2797 		break;
2798 	case PCI_IVAR_IRQ:
2799 		*result = cfg->intline;
2800 		break;
2801 	case PCI_IVAR_BUS:
2802 		*result = cfg->bus;
2803 		break;
2804 	case PCI_IVAR_SLOT:
2805 		*result = cfg->slot;
2806 		break;
2807 	case PCI_IVAR_FUNCTION:
2808 		*result = cfg->func;
2809 		break;
2810 	case PCI_IVAR_CMDREG:
2811 		*result = cfg->cmdreg;
2812 		break;
2813 	case PCI_IVAR_CACHELNSZ:
2814 		*result = cfg->cachelnsz;
2815 		break;
2816 	case PCI_IVAR_MINGNT:
2817 		*result = cfg->mingnt;
2818 		break;
2819 	case PCI_IVAR_MAXLAT:
2820 		*result = cfg->maxlat;
2821 		break;
2822 	case PCI_IVAR_LATTIMER:
2823 		*result = cfg->lattimer;
2824 		break;
2825 	default:
2826 		return (ENOENT);
2827 	}
2828 	return (0);
2829 }
2830 
2831 int
2832 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2833 {
2834 	struct pci_devinfo *dinfo;
2835 
2836 	dinfo = device_get_ivars(child);
2837 
2838 	switch (which) {
2839 	case PCI_IVAR_INTPIN:
2840 		dinfo->cfg.intpin = value;
2841 		return (0);
2842 	case PCI_IVAR_ETHADDR:
2843 	case PCI_IVAR_SUBVENDOR:
2844 	case PCI_IVAR_SUBDEVICE:
2845 	case PCI_IVAR_VENDOR:
2846 	case PCI_IVAR_DEVICE:
2847 	case PCI_IVAR_DEVID:
2848 	case PCI_IVAR_CLASS:
2849 	case PCI_IVAR_SUBCLASS:
2850 	case PCI_IVAR_PROGIF:
2851 	case PCI_IVAR_REVID:
2852 	case PCI_IVAR_IRQ:
2853 	case PCI_IVAR_BUS:
2854 	case PCI_IVAR_SLOT:
2855 	case PCI_IVAR_FUNCTION:
2856 		return (EINVAL);	/* disallow for now */
2857 
2858 	default:
2859 		return (ENOENT);
2860 	}
2861 }
2862 
2863 
2864 #include "opt_ddb.h"
2865 #ifdef DDB
2866 #include <ddb/ddb.h>
2867 #include <sys/cons.h>
2868 
2869 /*
2870  * List resources based on pci map registers, used for within ddb
2871  */
2872 
2873 DB_SHOW_COMMAND(pciregs, db_pci_dump)
2874 {
2875 	struct pci_devinfo *dinfo;
2876 	struct devlist *devlist_head;
2877 	struct pci_conf *p;
2878 	const char *name;
2879 	int i, error, none_count;
2880 
2881 	none_count = 0;
2882 	/* get the head of the device queue */
2883 	devlist_head = &pci_devq;
2884 
2885 	/*
2886 	 * Go through the list of devices and print out devices
2887 	 */
2888 	for (error = 0, i = 0,
2889 	     dinfo = STAILQ_FIRST(devlist_head);
2890 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2891 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2892 
2893 		/* Populate pd_name and pd_unit */
2894 		name = NULL;
2895 		if (dinfo->cfg.dev)
2896 			name = device_get_name(dinfo->cfg.dev);
2897 
2898 		p = &dinfo->conf;
2899 		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2900 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2901 			(name && *name) ? name : "none",
2902 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2903 			none_count++,
2904 			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2905 			p->pc_sel.pc_func, (p->pc_class << 16) |
2906 			(p->pc_subclass << 8) | p->pc_progif,
2907 			(p->pc_subdevice << 16) | p->pc_subvendor,
2908 			(p->pc_device << 16) | p->pc_vendor,
2909 			p->pc_revid, p->pc_hdr);
2910 	}
2911 }
2912 #endif /* DDB */
2913 
2914 static struct resource *
2915 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2916     u_long start, u_long end, u_long count, u_int flags)
2917 {
2918 	struct pci_devinfo *dinfo = device_get_ivars(child);
2919 	struct resource_list *rl = &dinfo->resources;
2920 	struct resource_list_entry *rle;
2921 	struct resource *res;
2922 	pci_addr_t map, testval;
2923 	int mapsize;
2924 
2925 	/*
2926 	 * Weed out the bogons, and figure out how large the BAR/map
2927 	 * is.  Bars that read back 0 here are bogus and unimplemented.
2928 	 * Note: atapci in legacy mode are special and handled elsewhere
2929 	 * in the code.  If you have a atapci device in legacy mode and
2930 	 * it fails here, that other code is broken.
2931 	 */
2932 	res = NULL;
2933 	map = pci_read_config(child, *rid, 4);
2934 	pci_write_config(child, *rid, 0xffffffff, 4);
2935 	testval = pci_read_config(child, *rid, 4);
2936 	if (pci_maprange(testval) == 64)
2937 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2938 	if (pci_mapbase(testval) == 0)
2939 		goto out;
2940 	if (pci_maptype(testval) & PCI_MAPMEM) {
2941 		if (type != SYS_RES_MEMORY) {
2942 			if (bootverbose)
2943 				device_printf(dev,
2944 				    "child %s requested type %d for rid %#x,"
2945 				    " but the BAR says it is an memio\n",
2946 				    device_get_nameunit(child), type, *rid);
2947 			goto out;
2948 		}
2949 	} else {
2950 		if (type != SYS_RES_IOPORT) {
2951 			if (bootverbose)
2952 				device_printf(dev,
2953 				    "child %s requested type %d for rid %#x,"
2954 				    " but the BAR says it is an ioport\n",
2955 				    device_get_nameunit(child), type, *rid);
2956 			goto out;
2957 		}
2958 	}
2959 	/*
2960 	 * For real BARs, we need to override the size that
2961 	 * the driver requests, because that's what the BAR
2962 	 * actually uses and we would otherwise have a
2963 	 * situation where we might allocate the excess to
2964 	 * another driver, which won't work.
2965 	 */
2966 	mapsize = pci_mapsize(testval);
2967 	count = 1UL << mapsize;
2968 	if (RF_ALIGNMENT(flags) < mapsize)
2969 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2970 
2971 	/*
2972 	 * Allocate enough resource, and then write back the
2973 	 * appropriate bar for that resource.
2974 	 */
2975 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2976 	    start, end, count, flags);
2977 	if (res == NULL) {
2978 		device_printf(child,
2979 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2980 		    count, *rid, type, start, end);
2981 		goto out;
2982 	}
2983 	resource_list_add(rl, type, *rid, start, end, count);
2984 	rle = resource_list_find(rl, type, *rid);
2985 	if (rle == NULL)
2986 		panic("pci_alloc_map: unexpectedly can't find resource.");
2987 	rle->res = res;
2988 	rle->start = rman_get_start(res);
2989 	rle->end = rman_get_end(res);
2990 	rle->count = count;
2991 	if (bootverbose)
2992 		device_printf(child,
2993 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2994 		    count, *rid, type, rman_get_start(res));
2995 	map = rman_get_start(res);
2996 out:;
2997 	pci_write_config(child, *rid, map, 4);
2998 	if (pci_maprange(testval) == 64)
2999 		pci_write_config(child, *rid + 4, map >> 32, 4);
3000 	return (res);
3001 }
3002 
3003 
3004 struct resource *
3005 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3006 		   u_long start, u_long end, u_long count, u_int flags)
3007 {
3008 	struct pci_devinfo *dinfo = device_get_ivars(child);
3009 	struct resource_list *rl = &dinfo->resources;
3010 	struct resource_list_entry *rle;
3011 	pcicfgregs *cfg = &dinfo->cfg;
3012 
3013 	/*
3014 	 * Perform lazy resource allocation
3015 	 */
3016 	if (device_get_parent(child) == dev) {
3017 		switch (type) {
3018 		case SYS_RES_IRQ:
3019 			/*
3020 			 * Can't alloc legacy interrupt once MSI messages
3021 			 * have been allocated.
3022 			 */
3023 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3024 			    cfg->msix.msix_alloc > 0))
3025 				return (NULL);
3026 			/*
3027 			 * If the child device doesn't have an
3028 			 * interrupt routed and is deserving of an
3029 			 * interrupt, try to assign it one.
3030 			 */
3031 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3032 			    (cfg->intpin != 0))
3033 				pci_assign_interrupt(dev, child, 0);
3034 			break;
3035 		case SYS_RES_IOPORT:
3036 		case SYS_RES_MEMORY:
3037 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3038 				/*
3039 				 * Enable the I/O mode.  We should
3040 				 * also be assigning resources too
3041 				 * when none are present.  The
3042 				 * resource_list_alloc kind of sorta does
3043 				 * this...
3044 				 */
3045 				if (PCI_ENABLE_IO(dev, child, type))
3046 					return (NULL);
3047 			}
3048 			rle = resource_list_find(rl, type, *rid);
3049 			if (rle == NULL)
3050 				return (pci_alloc_map(dev, child, type, rid,
3051 				    start, end, count, flags));
3052 			break;
3053 		}
3054 		/*
3055 		 * If we've already allocated the resource, then
3056 		 * return it now.  But first we may need to activate
3057 		 * it, since we don't allocate the resource as active
3058 		 * above.  Normally this would be done down in the
3059 		 * nexus, but since we short-circuit that path we have
3060 		 * to do its job here.  Not sure if we should free the
3061 		 * resource if it fails to activate.
3062 		 */
3063 		rle = resource_list_find(rl, type, *rid);
3064 		if (rle != NULL && rle->res != NULL) {
3065 			if (bootverbose)
3066 				device_printf(child,
3067 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3068 				    rman_get_size(rle->res), *rid, type,
3069 				    rman_get_start(rle->res));
3070 			if ((flags & RF_ACTIVE) &&
3071 			    bus_generic_activate_resource(dev, child, type,
3072 			    *rid, rle->res) != 0)
3073 				return NULL;
3074 			return (rle->res);
3075 		}
3076 	}
3077 	return (resource_list_alloc(rl, dev, child, type, rid,
3078 	    start, end, count, flags));
3079 }
3080 
3081 void
3082 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3083 {
3084 	struct pci_devinfo *dinfo;
3085 	struct resource_list *rl;
3086 	struct resource_list_entry *rle;
3087 
3088 	if (device_get_parent(child) != dev)
3089 		return;
3090 
3091 	dinfo = device_get_ivars(child);
3092 	rl = &dinfo->resources;
3093 	rle = resource_list_find(rl, type, rid);
3094 	if (rle) {
3095 		if (rle->res) {
3096 			if (rman_get_device(rle->res) != dev ||
3097 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3098 				device_printf(dev, "delete_resource: "
3099 				    "Resource still owned by child, oops. "
3100 				    "(type=%d, rid=%d, addr=%lx)\n",
3101 				    rle->type, rle->rid,
3102 				    rman_get_start(rle->res));
3103 				return;
3104 			}
3105 			bus_release_resource(dev, type, rid, rle->res);
3106 		}
3107 		resource_list_delete(rl, type, rid);
3108 	}
3109 	/*
3110 	 * Why do we turn off the PCI configuration BAR when we delete a
3111 	 * resource? -- imp
3112 	 */
3113 	pci_write_config(child, rid, 0, 4);
3114 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3115 }
3116 
3117 struct resource_list *
3118 pci_get_resource_list (device_t dev, device_t child)
3119 {
3120 	struct pci_devinfo *dinfo = device_get_ivars(child);
3121 
3122 	return (&dinfo->resources);
3123 }
3124 
3125 uint32_t
3126 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3127 {
3128 	struct pci_devinfo *dinfo = device_get_ivars(child);
3129 	pcicfgregs *cfg = &dinfo->cfg;
3130 
3131 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3132 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3133 }
3134 
3135 void
3136 pci_write_config_method(device_t dev, device_t child, int reg,
3137     uint32_t val, int width)
3138 {
3139 	struct pci_devinfo *dinfo = device_get_ivars(child);
3140 	pcicfgregs *cfg = &dinfo->cfg;
3141 
3142 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3143 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3144 }
3145 
3146 int
3147 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3148     size_t buflen)
3149 {
3150 
3151 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3152 	    pci_get_function(child));
3153 	return (0);
3154 }
3155 
3156 int
3157 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3158     size_t buflen)
3159 {
3160 	struct pci_devinfo *dinfo;
3161 	pcicfgregs *cfg;
3162 
3163 	dinfo = device_get_ivars(child);
3164 	cfg = &dinfo->cfg;
3165 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3166 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3167 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3168 	    cfg->progif);
3169 	return (0);
3170 }
3171 
3172 int
3173 pci_assign_interrupt_method(device_t dev, device_t child)
3174 {
3175 	struct pci_devinfo *dinfo = device_get_ivars(child);
3176 	pcicfgregs *cfg = &dinfo->cfg;
3177 
3178 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3179 	    cfg->intpin));
3180 }
3181 
3182 static int
3183 pci_modevent(module_t mod, int what, void *arg)
3184 {
3185 	static struct cdev *pci_cdev;
3186 
3187 	switch (what) {
3188 	case MOD_LOAD:
3189 		STAILQ_INIT(&pci_devq);
3190 		pci_generation = 0;
3191 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3192 		    "pci");
3193 		pci_load_vendor_data();
3194 		break;
3195 
3196 	case MOD_UNLOAD:
3197 		destroy_dev(pci_cdev);
3198 		break;
3199 	}
3200 
3201 	return (0);
3202 }
3203 
3204 void
3205 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3206 {
3207 	int i;
3208 
3209 	/*
3210 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3211 	 * which we know need special treatment.  Type 2 devices are
3212 	 * cardbus bridges which also require special treatment.
3213 	 * Other types are unknown, and we err on the side of safety
3214 	 * by ignoring them.
3215 	 */
3216 	if (dinfo->cfg.hdrtype != 0)
3217 		return;
3218 
3219 	/*
3220 	 * Restore the device to full power mode.  We must do this
3221 	 * before we restore the registers because moving from D3 to
3222 	 * D0 will cause the chip's BARs and some other registers to
3223 	 * be reset to some unknown power on reset values.  Cut down
3224 	 * the noise on boot by doing nothing if we are already in
3225 	 * state D0.
3226 	 */
3227 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3228 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3229 	}
3230 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3231 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3232 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3233 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3234 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3235 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3236 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3237 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3238 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3239 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3240 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3241 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3242 
3243 	/*
3244 	 * Restore MSI configuration if it is present.  If MSI is enabled,
3245 	 * then restore the data and addr registers.
3246 	 */
3247 	if (dinfo->cfg.msi.msi_location != 0)
3248 		pci_resume_msi(dev);
3249 }
3250 
3251 void
3252 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3253 {
3254 	int i;
3255 	uint32_t cls;
3256 	int ps;
3257 
3258 	/*
3259 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3260 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3261 	 * which also require special treatment.  Other types are unknown, and
3262 	 * we err on the side of safety by ignoring them.  Powering down
3263 	 * bridges should not be undertaken lightly.
3264 	 */
3265 	if (dinfo->cfg.hdrtype != 0)
3266 		return;
3267 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3268 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3269 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3270 
3271 	/*
3272 	 * Some drivers apparently write to these registers w/o updating our
3273 	 * cached copy.  No harm happens if we update the copy, so do so here
3274 	 * so we can restore them.  The COMMAND register is modified by the
3275 	 * bus w/o updating the cache.  This should represent the normally
3276 	 * writable portion of the 'defined' part of type 0 headers.  In
3277 	 * theory we also need to save/restore the PCI capability structures
3278 	 * we know about, but apart from power we don't know any that are
3279 	 * writable.
3280 	 */
3281 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3282 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3283 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3284 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3285 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3286 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3287 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3288 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3289 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3290 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3291 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3292 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3293 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3294 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3295 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3296 
3297 	/*
3298 	 * don't set the state for display devices, base peripherals and
3299 	 * memory devices since bad things happen when they are powered down.
3300 	 * We should (a) have drivers that can easily detach and (b) use
3301 	 * generic drivers for these devices so that some device actually
3302 	 * attaches.  We need to make sure that when we implement (a) we don't
3303 	 * power the device down on a reattach.
3304 	 */
3305 	cls = pci_get_class(dev);
3306 	if (!setstate)
3307 		return;
3308 	switch (pci_do_power_nodriver)
3309 	{
3310 		case 0:		/* NO powerdown at all */
3311 			return;
3312 		case 1:		/* Conservative about what to power down */
3313 			if (cls == PCIC_STORAGE)
3314 				return;
3315 			/*FALLTHROUGH*/
3316 		case 2:		/* Agressive about what to power down */
3317 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3318 			    cls == PCIC_BASEPERIPH)
3319 				return;
3320 			/*FALLTHROUGH*/
3321 		case 3:		/* Power down everything */
3322 			break;
3323 	}
3324 	/*
3325 	 * PCI spec says we can only go into D3 state from D0 state.
3326 	 * Transition from D[12] into D0 before going to D3 state.
3327 	 */
3328 	ps = pci_get_powerstate(dev);
3329 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3330 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3331 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3332 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3333 }
3334