xref: /freebsd/sys/dev/pci/pci.c (revision f0a75d274af375d15b97b830966b99a02b7db911)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static const char	*pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg);
99 #if 0
100 static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static int		pci_msi_blacklisted(void);
105 
106 static device_method_t pci_methods[] = {
107 	/* Device interface */
108 	DEVMETHOD(device_probe,		pci_probe),
109 	DEVMETHOD(device_attach,	pci_attach),
110 	DEVMETHOD(device_detach,	bus_generic_detach),
111 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112 	DEVMETHOD(device_suspend,	pci_suspend),
113 	DEVMETHOD(device_resume,	pci_resume),
114 
115 	/* Bus interface */
116 	DEVMETHOD(bus_print_child,	pci_print_child),
117 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120 	DEVMETHOD(bus_driver_added,	pci_driver_added),
121 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122 	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123 
124 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134 
135 	/* PCI interface */
136 	DEVMETHOD(pci_read_config,	pci_read_config_method),
137 	DEVMETHOD(pci_write_config,	pci_write_config_method),
138 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154 
155 	{ 0, 0 }
156 };
157 
158 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159 
160 static devclass_t pci_devclass;
161 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162 MODULE_VERSION(pci, 1);
163 
164 static char	*pci_vendordata;
165 static size_t	pci_vendordata_size;
166 
167 
168 struct pci_quirk {
169 	uint32_t devid;	/* Vendor/device of the card */
170 	int	type;
171 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173 	int	arg1;
174 	int	arg2;
175 };
176 
177 struct pci_quirk pci_quirks[] = {
178 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183 
184 	/*
185 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187 	 */
188 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190 
191 	/*
192 	 * MSI doesn't work on earlier Intel chipsets including
193 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194 	 */
195 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 
203 	/*
204 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205 	 * bridge.
206 	 */
207 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 
209 	{ 0 }
210 };
211 
212 /* map register information */
213 #define	PCI_MAPMEM	0x01	/* memory map */
214 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215 #define	PCI_MAPPORT	0x04	/* port map */
216 
217 struct devlist pci_devq;
218 uint32_t pci_generation;
219 uint32_t pci_numdevs = 0;
220 static int pcie_chipset, pcix_chipset;
221 
222 /* sysctl vars */
223 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
224 
225 static int pci_enable_io_modes = 1;
226 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
227 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
228     &pci_enable_io_modes, 1,
229     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
230 enable these bits correctly.  We'd like to do this all the time, but there\n\
231 are some peripherals that this causes problems with.");
232 
233 static int pci_do_power_nodriver = 0;
234 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
235 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
236     &pci_do_power_nodriver, 0,
237   "Place a function into D3 state when no driver attaches to it.  0 means\n\
238 disable.  1 means conservatively place devices into D3 state.  2 means\n\
239 agressively place devices into D3 state.  3 means put absolutely everything\n\
240 in D3 state.");
241 
242 static int pci_do_power_resume = 1;
243 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
245     &pci_do_power_resume, 1,
246   "Transition from D3 -> D0 on resume.");
247 
248 static int pci_do_vpd = 1;
249 TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
250 SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
251     "Enable support for VPD.");
252 
253 static int pci_do_msi = 1;
254 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
255 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
256     "Enable support for MSI interrupts");
257 
258 static int pci_do_msix = 1;
259 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
260 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
261     "Enable support for MSI-X interrupts");
262 
263 static int pci_honor_msi_blacklist = 1;
264 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
265 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
266     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
267 
268 /* Find a device_t by bus/slot/function */
269 
270 device_t
271 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
272 {
273 	struct pci_devinfo *dinfo;
274 
275 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
276 		if ((dinfo->cfg.bus == bus) &&
277 		    (dinfo->cfg.slot == slot) &&
278 		    (dinfo->cfg.func == func)) {
279 			return (dinfo->cfg.dev);
280 		}
281 	}
282 
283 	return (NULL);
284 }
285 
286 /* Find a device_t by vendor/device ID */
287 
288 device_t
289 pci_find_device(uint16_t vendor, uint16_t device)
290 {
291 	struct pci_devinfo *dinfo;
292 
293 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
294 		if ((dinfo->cfg.vendor == vendor) &&
295 		    (dinfo->cfg.device == device)) {
296 			return (dinfo->cfg.dev);
297 		}
298 	}
299 
300 	return (NULL);
301 }
302 
303 /* return base address of memory or port map */
304 
305 static uint32_t
306 pci_mapbase(uint32_t mapreg)
307 {
308 
309 	if (PCI_BAR_MEM(mapreg))
310 		return (mapreg & PCIM_BAR_MEM_BASE);
311 	else
312 		return (mapreg & PCIM_BAR_IO_BASE);
313 }
314 
315 /* return map type of memory or port map */
316 
317 static const char *
318 pci_maptype(unsigned mapreg)
319 {
320 
321 	if (PCI_BAR_IO(mapreg))
322 		return ("I/O Port");
323 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
324 		return ("Prefetchable Memory");
325 	return ("Memory");
326 }
327 
328 /* return log2 of map size decoded for memory or port map */
329 
330 static int
331 pci_mapsize(uint32_t testval)
332 {
333 	int ln2size;
334 
335 	testval = pci_mapbase(testval);
336 	ln2size = 0;
337 	if (testval != 0) {
338 		while ((testval & 1) == 0)
339 		{
340 			ln2size++;
341 			testval >>= 1;
342 		}
343 	}
344 	return (ln2size);
345 }
346 
347 /* return log2 of address range supported by map register */
348 
349 static int
350 pci_maprange(unsigned mapreg)
351 {
352 	int ln2range = 0;
353 
354 	if (PCI_BAR_IO(mapreg))
355 		ln2range = 32;
356 	else
357 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
358 		case PCIM_BAR_MEM_32:
359 			ln2range = 32;
360 			break;
361 		case PCIM_BAR_MEM_1MB:
362 			ln2range = 20;
363 			break;
364 		case PCIM_BAR_MEM_64:
365 			ln2range = 64;
366 			break;
367 		}
368 	return (ln2range);
369 }
370 
371 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
372 
373 static void
374 pci_fixancient(pcicfgregs *cfg)
375 {
376 	if (cfg->hdrtype != 0)
377 		return;
378 
379 	/* PCI to PCI bridges use header type 1 */
380 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
381 		cfg->hdrtype = 1;
382 }
383 
384 /* extract header type specific config data */
385 
386 static void
387 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
388 {
389 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
390 	switch (cfg->hdrtype) {
391 	case 0:
392 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
393 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
394 		cfg->nummaps	    = PCI_MAXMAPS_0;
395 		break;
396 	case 1:
397 		cfg->nummaps	    = PCI_MAXMAPS_1;
398 		break;
399 	case 2:
400 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
401 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
402 		cfg->nummaps	    = PCI_MAXMAPS_2;
403 		break;
404 	}
405 #undef REG
406 }
407 
408 /* read configuration header into pcicfgregs structure */
409 struct pci_devinfo *
410 pci_read_device(device_t pcib, int b, int s, int f, size_t size)
411 {
412 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
413 	pcicfgregs *cfg = NULL;
414 	struct pci_devinfo *devlist_entry;
415 	struct devlist *devlist_head;
416 
417 	devlist_head = &pci_devq;
418 
419 	devlist_entry = NULL;
420 
421 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
422 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
423 		if (devlist_entry == NULL)
424 			return (NULL);
425 
426 		cfg = &devlist_entry->cfg;
427 
428 		cfg->bus		= b;
429 		cfg->slot		= s;
430 		cfg->func		= f;
431 		cfg->vendor		= REG(PCIR_VENDOR, 2);
432 		cfg->device		= REG(PCIR_DEVICE, 2);
433 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
434 		cfg->statreg		= REG(PCIR_STATUS, 2);
435 		cfg->baseclass		= REG(PCIR_CLASS, 1);
436 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
437 		cfg->progif		= REG(PCIR_PROGIF, 1);
438 		cfg->revid		= REG(PCIR_REVID, 1);
439 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
440 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
441 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
442 		cfg->intpin		= REG(PCIR_INTPIN, 1);
443 		cfg->intline		= REG(PCIR_INTLINE, 1);
444 
445 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
446 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
447 
448 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
449 		cfg->hdrtype		&= ~PCIM_MFDEV;
450 
451 		pci_fixancient(cfg);
452 		pci_hdrtypedata(pcib, b, s, f, cfg);
453 
454 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
455 			pci_read_extcap(pcib, cfg);
456 
457 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
458 
459 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
460 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
461 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
462 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
463 
464 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
465 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
466 		devlist_entry->conf.pc_vendor = cfg->vendor;
467 		devlist_entry->conf.pc_device = cfg->device;
468 
469 		devlist_entry->conf.pc_class = cfg->baseclass;
470 		devlist_entry->conf.pc_subclass = cfg->subclass;
471 		devlist_entry->conf.pc_progif = cfg->progif;
472 		devlist_entry->conf.pc_revid = cfg->revid;
473 
474 		pci_numdevs++;
475 		pci_generation++;
476 	}
477 	return (devlist_entry);
478 #undef REG
479 }
480 
481 static void
482 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
483 {
484 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
485 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
486 #if defined(__i386__) || defined(__amd64__)
487 	uint64_t addr;
488 #endif
489 	uint32_t val;
490 	int	ptr, nextptr, ptrptr;
491 
492 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
493 	case 0:
494 	case 1:
495 		ptrptr = PCIR_CAP_PTR;
496 		break;
497 	case 2:
498 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
499 		break;
500 	default:
501 		return;		/* no extended capabilities support */
502 	}
503 	nextptr = REG(ptrptr, 1);	/* sanity check? */
504 
505 	/*
506 	 * Read capability entries.
507 	 */
508 	while (nextptr != 0) {
509 		/* Sanity check */
510 		if (nextptr > 255) {
511 			printf("illegal PCI extended capability offset %d\n",
512 			    nextptr);
513 			return;
514 		}
515 		/* Find the next entry */
516 		ptr = nextptr;
517 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
518 
519 		/* Process this entry */
520 		switch (REG(ptr + PCICAP_ID, 1)) {
521 		case PCIY_PMG:		/* PCI power management */
522 			if (cfg->pp.pp_cap == 0) {
523 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
524 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
525 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
526 				if ((nextptr - ptr) > PCIR_POWER_DATA)
527 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
528 			}
529 			break;
530 #if defined(__i386__) || defined(__amd64__)
531 		case PCIY_HT:		/* HyperTransport */
532 			/* Determine HT-specific capability type. */
533 			val = REG(ptr + PCIR_HT_COMMAND, 2);
534 			switch (val & PCIM_HTCMD_CAP_MASK) {
535 			case PCIM_HTCAP_MSI_MAPPING:
536 				/* Sanity check the mapping window. */
537 				addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
538 				addr <<= 32;
539 				addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
540 				if (addr != MSI_INTEL_ADDR_BASE)
541 					device_printf(pcib,
542 		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
543 					    cfg->bus, cfg->slot, cfg->func,
544 					    (long long)addr);
545 
546 				/* Enable MSI -> HT mapping. */
547 				val |= PCIM_HTCMD_MSI_ENABLE;
548 				WREG(ptr + PCIR_HT_COMMAND, val, 2);
549 				break;
550 			}
551 			break;
552 #endif
553 		case PCIY_MSI:		/* PCI MSI */
554 			cfg->msi.msi_location = ptr;
555 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
556 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
557 						     PCIM_MSICTRL_MMC_MASK)>>1);
558 			break;
559 		case PCIY_MSIX:		/* PCI MSI-X */
560 			cfg->msix.msix_location = ptr;
561 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
562 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
563 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
564 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
565 			cfg->msix.msix_table_bar = PCIR_BAR(val &
566 			    PCIM_MSIX_BIR_MASK);
567 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
568 			val = REG(ptr + PCIR_MSIX_PBA, 4);
569 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
570 			    PCIM_MSIX_BIR_MASK);
571 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
572 			break;
573 		case PCIY_VPD:		/* PCI Vital Product Data */
574 			cfg->vpd.vpd_reg = ptr;
575 			break;
576 		case PCIY_SUBVENDOR:
577 			/* Should always be true. */
578 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
579 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
580 				cfg->subvendor = val & 0xffff;
581 				cfg->subdevice = val >> 16;
582 			}
583 			break;
584 		case PCIY_PCIX:		/* PCI-X */
585 			/*
586 			 * Assume we have a PCI-X chipset if we have
587 			 * at least one PCI-PCI bridge with a PCI-X
588 			 * capability.  Note that some systems with
589 			 * PCI-express or HT chipsets might match on
590 			 * this check as well.
591 			 */
592 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
593 				pcix_chipset = 1;
594 			break;
595 		case PCIY_EXPRESS:	/* PCI-express */
596 			/*
597 			 * Assume we have a PCI-express chipset if we have
598 			 * at least one PCI-express root port.
599 			 */
600 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
601 			if ((val & PCIM_EXP_FLAGS_TYPE) ==
602 			    PCIM_EXP_TYPE_ROOT_PORT)
603 				pcie_chipset = 1;
604 			break;
605 		default:
606 			break;
607 		}
608 	}
609 /* REG and WREG use carry through to next functions */
610 }
611 
612 /*
613  * PCI Vital Product Data
614  */
615 static uint32_t
616 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
617 {
618 
619 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
620 
621 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
622 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000)
623 		DELAY(1);	/* limit looping */
624 
625 	return (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
626 }
627 
628 #if 0
629 static void
630 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
631 {
632 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
633 
634 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
635 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
636 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000)
637 		DELAY(1);	/* limit looping */
638 
639 	return;
640 }
641 #endif
642 
643 struct vpd_readstate {
644 	device_t	pcib;
645 	pcicfgregs	*cfg;
646 	uint32_t	val;
647 	int		bytesinval;
648 	int		off;
649 	uint8_t		cksum;
650 };
651 
652 static uint8_t
653 vpd_nextbyte(struct vpd_readstate *vrs)
654 {
655 	uint8_t byte;
656 
657 	if (vrs->bytesinval == 0) {
658 		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
659 		    vrs->off));
660 		vrs->off += 4;
661 		byte = vrs->val & 0xff;
662 		vrs->bytesinval = 3;
663 	} else {
664 		vrs->val = vrs->val >> 8;
665 		byte = vrs->val & 0xff;
666 		vrs->bytesinval--;
667 	}
668 
669 	vrs->cksum += byte;
670 	return (byte);
671 }
672 
673 static void
674 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
675 {
676 	struct vpd_readstate vrs;
677 	int state;
678 	int name;
679 	int remain;
680 	int end;
681 	int i;
682 	uint8_t byte;
683 	int alloc, off;		/* alloc/off for RO/W arrays */
684 	int cksumvalid;
685 	int dflen;
686 
687 	if (!pci_do_vpd) {
688 		cfg->vpd.vpd_cached = 1;
689 		return;
690 	}
691 
692 	/* init vpd reader */
693 	vrs.bytesinval = 0;
694 	vrs.off = 0;
695 	vrs.pcib = pcib;
696 	vrs.cfg = cfg;
697 	vrs.cksum = 0;
698 
699 	state = 0;
700 	name = remain = i = 0;	/* shut up stupid gcc */
701 	alloc = off = 0;	/* shut up stupid gcc */
702 	dflen = 0;		/* shut up stupid gcc */
703 	end = 0;
704 	cksumvalid = -1;
705 	for (; !end;) {
706 		byte = vpd_nextbyte(&vrs);
707 #if 0
708 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
709 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
710 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
711 #endif
712 		switch (state) {
713 		case 0:		/* item name */
714 			if (byte & 0x80) {
715 				remain = vpd_nextbyte(&vrs);
716 				remain |= vpd_nextbyte(&vrs) << 8;
717 				if (remain > (0x7f*4 - vrs.off)) {
718 					end = 1;
719 					printf(
720 			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
721 					    cfg->bus, cfg->slot, cfg->func,
722 					    remain);
723 				}
724 				name = byte & 0x7f;
725 			} else {
726 				remain = byte & 0x7;
727 				name = (byte >> 3) & 0xf;
728 			}
729 			switch (name) {
730 			case 0x2:	/* String */
731 				cfg->vpd.vpd_ident = malloc(remain + 1,
732 				    M_DEVBUF, M_WAITOK);
733 				i = 0;
734 				state = 1;
735 				break;
736 			case 0xf:	/* End */
737 				end = 1;
738 				state = -1;
739 				break;
740 			case 0x10:	/* VPD-R */
741 				alloc = 8;
742 				off = 0;
743 				cfg->vpd.vpd_ros = malloc(alloc *
744 				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
745 				    M_WAITOK);
746 				state = 2;
747 				break;
748 			case 0x11:	/* VPD-W */
749 				alloc = 8;
750 				off = 0;
751 				cfg->vpd.vpd_w = malloc(alloc *
752 				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
753 				    M_WAITOK);
754 				state = 5;
755 				break;
756 			default:	/* Invalid data, abort */
757 				end = 1;
758 				continue;
759 			}
760 			break;
761 
762 		case 1:	/* Identifier String */
763 			cfg->vpd.vpd_ident[i++] = byte;
764 			remain--;
765 			if (remain == 0)  {
766 				cfg->vpd.vpd_ident[i] = '\0';
767 				state = 0;
768 			}
769 			break;
770 
771 		case 2:	/* VPD-R Keyword Header */
772 			if (off == alloc) {
773 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
774 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
775 				    M_DEVBUF, M_WAITOK);
776 			}
777 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
778 			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
779 			dflen = vpd_nextbyte(&vrs);
780 			if (dflen == 0 &&
781 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
782 			    2) == 0) {
783 				/*
784 				 * if this happens, we can't trust the rest
785 				 * of the VPD.
786 				 */
787 				printf("pci%d:%d:%d: bad keyword length: %d\n",
788 				    cfg->bus, cfg->slot, cfg->func, dflen);
789 				cksumvalid = 0;
790 				end = 1;
791 				break;
792 			} else if (dflen == 0) {
793 				cfg->vpd.vpd_ros[off].value = malloc(1 *
794 				    sizeof *cfg->vpd.vpd_ros[off].value,
795 				    M_DEVBUF, M_WAITOK);
796 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
797 			} else
798 				cfg->vpd.vpd_ros[off].value = malloc(
799 				    (dflen + 1) *
800 				    sizeof *cfg->vpd.vpd_ros[off].value,
801 				    M_DEVBUF, M_WAITOK);
802 			remain -= 3;
803 			i = 0;
804 			/* keep in sync w/ state 3's transistions */
805 			if (dflen == 0 && remain == 0)
806 				state = 0;
807 			else if (dflen == 0)
808 				state = 2;
809 			else
810 				state = 3;
811 			break;
812 
813 		case 3:	/* VPD-R Keyword Value */
814 			cfg->vpd.vpd_ros[off].value[i++] = byte;
815 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
816 			    "RV", 2) == 0 && cksumvalid == -1) {
817 				if (vrs.cksum == 0)
818 					cksumvalid = 1;
819 				else {
820 					printf(
821 				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
822 					    cfg->bus, cfg->slot, cfg->func,
823 					    vrs.cksum);
824 					cksumvalid = 0;
825 					end = 1;
826 					break;
827 				}
828 			}
829 			dflen--;
830 			remain--;
831 			/* keep in sync w/ state 2's transistions */
832 			if (dflen == 0)
833 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
834 			if (dflen == 0 && remain == 0) {
835 				cfg->vpd.vpd_rocnt = off;
836 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
837 				    off * sizeof *cfg->vpd.vpd_ros,
838 				    M_DEVBUF, M_WAITOK);
839 				state = 0;
840 			} else if (dflen == 0)
841 				state = 2;
842 			break;
843 
844 		case 4:
845 			remain--;
846 			if (remain == 0)
847 				state = 0;
848 			break;
849 
850 		case 5:	/* VPD-W Keyword Header */
851 			if (off == alloc) {
852 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
853 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
854 				    M_DEVBUF, M_WAITOK);
855 			}
856 			cfg->vpd.vpd_w[off].keyword[0] = byte;
857 			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
858 			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
859 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
860 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
861 			    sizeof *cfg->vpd.vpd_w[off].value,
862 			    M_DEVBUF, M_WAITOK);
863 			remain -= 3;
864 			i = 0;
865 			/* keep in sync w/ state 6's transistions */
866 			if (dflen == 0 && remain == 0)
867 				state = 0;
868 			else if (dflen == 0)
869 				state = 5;
870 			else
871 				state = 6;
872 			break;
873 
874 		case 6:	/* VPD-W Keyword Value */
875 			cfg->vpd.vpd_w[off].value[i++] = byte;
876 			dflen--;
877 			remain--;
878 			/* keep in sync w/ state 5's transistions */
879 			if (dflen == 0)
880 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
881 			if (dflen == 0 && remain == 0) {
882 				cfg->vpd.vpd_wcnt = off;
883 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
884 				    off * sizeof *cfg->vpd.vpd_w,
885 				    M_DEVBUF, M_WAITOK);
886 				state = 0;
887 			} else if (dflen == 0)
888 				state = 5;
889 			break;
890 
891 		default:
892 			printf("pci%d:%d:%d: invalid state: %d\n",
893 			    cfg->bus, cfg->slot, cfg->func, state);
894 			end = 1;
895 			break;
896 		}
897 	}
898 
899 	if (cksumvalid == 0) {
900 		/* read-only data bad, clean up */
901 		for (; off; off--)
902 			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
903 
904 		free(cfg->vpd.vpd_ros, M_DEVBUF);
905 		cfg->vpd.vpd_ros = NULL;
906 	}
907 	cfg->vpd.vpd_cached = 1;
908 #undef REG
909 #undef WREG
910 }
911 
912 int
913 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
914 {
915 	struct pci_devinfo *dinfo = device_get_ivars(child);
916 	pcicfgregs *cfg = &dinfo->cfg;
917 
918 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
919 		pci_read_vpd(device_get_parent(dev), cfg);
920 
921 	*identptr = cfg->vpd.vpd_ident;
922 
923 	if (*identptr == NULL)
924 		return (ENXIO);
925 
926 	return (0);
927 }
928 
929 int
930 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
931 	const char **vptr)
932 {
933 	struct pci_devinfo *dinfo = device_get_ivars(child);
934 	pcicfgregs *cfg = &dinfo->cfg;
935 	int i;
936 
937 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
938 		pci_read_vpd(device_get_parent(dev), cfg);
939 
940 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
941 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
942 		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
943 			*vptr = cfg->vpd.vpd_ros[i].value;
944 		}
945 
946 	if (i != cfg->vpd.vpd_rocnt)
947 		return (0);
948 
949 	*vptr = NULL;
950 	return (ENXIO);
951 }
952 
953 /*
954  * Return the offset in configuration space of the requested extended
955  * capability entry or 0 if the specified capability was not found.
956  */
957 int
958 pci_find_extcap_method(device_t dev, device_t child, int capability,
959     int *capreg)
960 {
961 	struct pci_devinfo *dinfo = device_get_ivars(child);
962 	pcicfgregs *cfg = &dinfo->cfg;
963 	u_int32_t status;
964 	u_int8_t ptr;
965 
966 	/*
967 	 * Check the CAP_LIST bit of the PCI status register first.
968 	 */
969 	status = pci_read_config(child, PCIR_STATUS, 2);
970 	if (!(status & PCIM_STATUS_CAPPRESENT))
971 		return (ENXIO);
972 
973 	/*
974 	 * Determine the start pointer of the capabilities list.
975 	 */
976 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
977 	case 0:
978 	case 1:
979 		ptr = PCIR_CAP_PTR;
980 		break;
981 	case 2:
982 		ptr = PCIR_CAP_PTR_2;
983 		break;
984 	default:
985 		/* XXX: panic? */
986 		return (ENXIO);		/* no extended capabilities support */
987 	}
988 	ptr = pci_read_config(child, ptr, 1);
989 
990 	/*
991 	 * Traverse the capabilities list.
992 	 */
993 	while (ptr != 0) {
994 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
995 			if (capreg != NULL)
996 				*capreg = ptr;
997 			return (0);
998 		}
999 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1000 	}
1001 
1002 	return (ENOENT);
1003 }
1004 
1005 /*
1006  * Support for MSI-X message interrupts.
1007  */
1008 void
1009 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1010 {
1011 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1012 	pcicfgregs *cfg = &dinfo->cfg;
1013 	uint32_t offset;
1014 
1015 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1016 	offset = cfg->msix.msix_table_offset + index * 16;
1017 	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
1018 	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
1019 	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
1020 }
1021 
1022 void
1023 pci_mask_msix(device_t dev, u_int index)
1024 {
1025 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1026 	pcicfgregs *cfg = &dinfo->cfg;
1027 	uint32_t offset, val;
1028 
1029 	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
1030 	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1031 	val = bus_read_4(cfg->msix.msix_table_res, offset);
1032 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1033 		val |= PCIM_MSIX_VCTRL_MASK;
1034 		bus_write_4(cfg->msix.msix_table_res, offset, val);
1035 	}
1036 }
1037 
1038 void
1039 pci_unmask_msix(device_t dev, u_int index)
1040 {
1041 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1042 	pcicfgregs *cfg = &dinfo->cfg;
1043 	uint32_t offset, val;
1044 
1045 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1046 	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1047 	val = bus_read_4(cfg->msix.msix_table_res, offset);
1048 	if (val & PCIM_MSIX_VCTRL_MASK) {
1049 		val &= ~PCIM_MSIX_VCTRL_MASK;
1050 		bus_write_4(cfg->msix.msix_table_res, offset, val);
1051 	}
1052 }
1053 
1054 int
1055 pci_pending_msix(device_t dev, u_int index)
1056 {
1057 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1058 	pcicfgregs *cfg = &dinfo->cfg;
1059 	uint32_t offset, bit;
1060 
1061 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1062 	offset = cfg->msix.msix_pba_offset + (index / 32) * 4;
1063 	bit = 1 << index % 32;
1064 	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1065 }
1066 
1067 /*
1068  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1069  * returned in *count.  After this function returns, each message will be
1070  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1071  */
1072 int
1073 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1074 {
1075 	struct pci_devinfo *dinfo = device_get_ivars(child);
1076 	pcicfgregs *cfg = &dinfo->cfg;
1077 	struct resource_list_entry *rle;
1078 	int actual, error, i, irq, max;
1079 
1080 	/* Don't let count == 0 get us into trouble. */
1081 	if (*count == 0)
1082 		return (EINVAL);
1083 
1084 	/* If rid 0 is allocated, then fail. */
1085 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1086 	if (rle != NULL && rle->res != NULL)
1087 		return (ENXIO);
1088 
1089 	/* Already have allocated messages? */
1090 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1091 		return (ENXIO);
1092 
1093 	/* If MSI is blacklisted for this system, fail. */
1094 	if (pci_msi_blacklisted())
1095 		return (ENXIO);
1096 
1097 	/* MSI-X capability present? */
1098 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1099 		return (ENODEV);
1100 
1101 	/* Make sure the appropriate BARs are mapped. */
1102 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1103 	    cfg->msix.msix_table_bar);
1104 	if (rle == NULL || rle->res == NULL ||
1105 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1106 		return (ENXIO);
1107 	cfg->msix.msix_table_res = rle->res;
1108 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1109 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1110 		    cfg->msix.msix_pba_bar);
1111 		if (rle == NULL || rle->res == NULL ||
1112 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1113 			return (ENXIO);
1114 	}
1115 	cfg->msix.msix_pba_res = rle->res;
1116 
1117 	if (bootverbose)
1118 		device_printf(child,
1119 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1120 		    *count, cfg->msix.msix_msgnum);
1121 	max = min(*count, cfg->msix.msix_msgnum);
1122 	for (i = 0; i < max; i++) {
1123 		/* Allocate a message. */
1124 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1125 		    &irq);
1126 		if (error)
1127 			break;
1128 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1129 		    irq, 1);
1130 	}
1131 	actual = i;
1132 
1133 	if (bootverbose) {
1134 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1135 		if (actual == 1)
1136 			device_printf(child, "using IRQ %lu for MSI-X\n",
1137 			    rle->start);
1138 		else {
1139 			int run;
1140 
1141 			/*
1142 			 * Be fancy and try to print contiguous runs of
1143 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1144 			 * 'run' is true if we are in a range.
1145 			 */
1146 			device_printf(child, "using IRQs %lu", rle->start);
1147 			irq = rle->start;
1148 			run = 0;
1149 			for (i = 1; i < actual; i++) {
1150 				rle = resource_list_find(&dinfo->resources,
1151 				    SYS_RES_IRQ, i + 1);
1152 
1153 				/* Still in a run? */
1154 				if (rle->start == irq + 1) {
1155 					run = 1;
1156 					irq++;
1157 					continue;
1158 				}
1159 
1160 				/* Finish previous range. */
1161 				if (run) {
1162 					printf("-%d", irq);
1163 					run = 0;
1164 				}
1165 
1166 				/* Start new range. */
1167 				printf(",%lu", rle->start);
1168 				irq = rle->start;
1169 			}
1170 
1171 			/* Unfinished range? */
1172 			if (run)
1173 				printf("-%d", irq);
1174 			printf(" for MSI-X\n");
1175 		}
1176 	}
1177 
1178 	/* Mask all vectors. */
1179 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1180 		pci_mask_msix(child, i);
1181 
1182 	/* Update control register to enable MSI-X. */
1183 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1184 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1185 	    cfg->msix.msix_ctrl, 2);
1186 
1187 	/* Update counts of alloc'd messages. */
1188 	cfg->msix.msix_alloc = actual;
1189 	*count = actual;
1190 	return (0);
1191 }
1192 
1193 /*
1194  * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1195  * the first N messages in the MSI-X table.  However, device drivers may
1196  * want to use different layouts in the case that they do not allocate a
1197  * full table.  This method allows the driver to specify what layout it
1198  * wants.  It must be called after a successful pci_alloc_msix() but
1199  * before any of the associated SYS_RES_IRQ resources are allocated via
1200  * bus_alloc_resource().  The 'indices' array contains N (where N equals
1201  * the 'count' returned from pci_alloc_msix()) message indices.  The
1202  * indices are 1-based (meaning the first message is at index 1).  On
1203  * successful return, each of the messages in the 'indices' array will
1204  * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1205  * if indices contains { 2, 4 }, then upon successful return, the 'child'
1206  * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1207  */
1208 int
1209 pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1210 {
1211 	struct pci_devinfo *dinfo = device_get_ivars(child);
1212 	pcicfgregs *cfg = &dinfo->cfg;
1213 	struct resource_list_entry *rle;
1214 	int count, error, i, j, *irqs;
1215 
1216 	/* Sanity check the indices. */
1217 	for (i = 0; i < cfg->msix.msix_alloc; i++)
1218 		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1219 			return (EINVAL);
1220 
1221 	/* Check for duplicates. */
1222 	for (i = 0; i < cfg->msix.msix_alloc; i++)
1223 		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1224 			if (indices[i] == indices[j])
1225 				return (EINVAL);
1226 
1227 	/* Make sure none of the resources are allocated. */
1228 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1229 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1230 		if (rle == NULL)
1231 			continue;
1232 		if (rle->res != NULL)
1233 			return (EBUSY);
1234 		count++;
1235 	}
1236 
1237 	/* Save the IRQ values and free the existing resources. */
1238 	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1239 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1240 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1241 		if (rle == NULL)
1242 			continue;
1243 		irqs[count] = rle->start;
1244 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1245 		count++;
1246 	}
1247 
1248 	/* Map the IRQ values to the new message indices and rids. */
1249 	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1250 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1251 		    irqs[i], irqs[i], 1);
1252 
1253 		/*
1254 		 * The indices in the backend code (PCIB_* methods and the
1255 		 * MI helper routines for MD code such as pci_enable_msix())
1256 		 * are all zero-based.  However, the indices passed to this
1257 		 * function are 1-based so that the correspond 1:1 with the
1258 		 * SYS_RES_IRQ resource IDs.
1259 		 */
1260 		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1261 		    indices[i] - 1, irqs[i]);
1262 		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1263 	}
1264 	if (bootverbose) {
1265 		if (cfg->msix.msix_alloc == 1)
1266 			device_printf(child,
1267 			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1268 		else {
1269 			device_printf(child, "Remapped MSI-X IRQs to indices");
1270 			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1271 				printf(" %d,", indices[i]);
1272 			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1273 		}
1274 	}
1275 	free(irqs, M_TEMP);
1276 
1277 	return (0);
1278 }
1279 
1280 static int
1281 pci_release_msix(device_t dev, device_t child)
1282 {
1283 	struct pci_devinfo *dinfo = device_get_ivars(child);
1284 	pcicfgregs *cfg = &dinfo->cfg;
1285 	struct resource_list_entry *rle;
1286 	int count, i;
1287 
1288 	/* Do we have any messages to release? */
1289 	if (cfg->msix.msix_alloc == 0)
1290 		return (ENODEV);
1291 
1292 	/* Make sure none of the resources are allocated. */
1293 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1294 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1295 		if (rle == NULL)
1296 			continue;
1297 		if (rle->res != NULL)
1298 			return (EBUSY);
1299 		count++;
1300 	}
1301 
1302 	/* Update control register with to disable MSI-X. */
1303 	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1304 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1305 	    cfg->msix.msix_ctrl, 2);
1306 
1307 	/* Release the messages. */
1308 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1309 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1310 		if (rle == NULL)
1311 			continue;
1312 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1313 		    rle->start);
1314 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1315 		count++;
1316 	}
1317 
1318 	/* Update alloc count. */
1319 	cfg->msix.msix_alloc = 0;
1320 	return (0);
1321 }
1322 
1323 /*
1324  * Return the max supported MSI-X messages this device supports.
1325  * Basically, assuming the MD code can alloc messages, this function
1326  * should return the maximum value that pci_alloc_msix() can return.
1327  * Thus, it is subject to the tunables, etc.
1328  */
1329 int
1330 pci_msix_count_method(device_t dev, device_t child)
1331 {
1332 	struct pci_devinfo *dinfo = device_get_ivars(child);
1333 	pcicfgregs *cfg = &dinfo->cfg;
1334 
1335 	if (pci_do_msix && cfg->msix.msix_location != 0)
1336 		return (cfg->msix.msix_msgnum);
1337 	return (0);
1338 }
1339 
1340 /*
1341  * Support for MSI message signalled interrupts.
1342  */
1343 void
1344 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1345 {
1346 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1347 	pcicfgregs *cfg = &dinfo->cfg;
1348 
1349 	/* Write data and address values. */
1350 	cfg->msi.msi_addr = address;
1351 	cfg->msi.msi_data = data;
1352 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1353 	    address & 0xffffffff, 4);
1354 	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1355 		pci_write_config(dev, cfg->msi.msi_location +
1356 		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1357 		pci_write_config(dev, cfg->msi.msi_location +
1358 		    PCIR_MSI_DATA_64BIT, data, 2);
1359 	} else
1360 		pci_write_config(dev, cfg->msi.msi_location +
1361 		    PCIR_MSI_DATA, data, 2);
1362 
1363 	/* Enable MSI in the control register. */
1364 	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1365 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1366 	    cfg->msi.msi_ctrl, 2);
1367 }
1368 
1369 /*
1370  * Restore MSI registers during resume.  If MSI is enabled then
1371  * restore the data and address registers in addition to the control
1372  * register.
1373  */
1374 static void
1375 pci_resume_msi(device_t dev)
1376 {
1377 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1378 	pcicfgregs *cfg = &dinfo->cfg;
1379 	uint64_t address;
1380 	uint16_t data;
1381 
1382 	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1383 		address = cfg->msi.msi_addr;
1384 		data = cfg->msi.msi_data;
1385 		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1386 		    address & 0xffffffff, 4);
1387 		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1388 			pci_write_config(dev, cfg->msi.msi_location +
1389 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1390 			pci_write_config(dev, cfg->msi.msi_location +
1391 			    PCIR_MSI_DATA_64BIT, data, 2);
1392 		} else
1393 			pci_write_config(dev, cfg->msi.msi_location +
1394 			    PCIR_MSI_DATA, data, 2);
1395 	}
1396 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1397 	    cfg->msi.msi_ctrl, 2);
1398 }
1399 
1400 /*
1401  * Returns true if the specified device is blacklisted because MSI
1402  * doesn't work.
1403  */
1404 int
1405 pci_msi_device_blacklisted(device_t dev)
1406 {
1407 	struct pci_quirk *q;
1408 
1409 	if (!pci_honor_msi_blacklist)
1410 		return (0);
1411 
1412 	for (q = &pci_quirks[0]; q->devid; q++) {
1413 		if (q->devid == pci_get_devid(dev) &&
1414 		    q->type == PCI_QUIRK_DISABLE_MSI)
1415 			return (1);
1416 	}
1417 	return (0);
1418 }
1419 
1420 /*
1421  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1422  * we just check for blacklisted chipsets as represented by the
1423  * host-PCI bridge at device 0:0:0.  In the future, it may become
1424  * necessary to check other system attributes, such as the kenv values
1425  * that give the motherboard manufacturer and model number.
1426  */
1427 static int
1428 pci_msi_blacklisted(void)
1429 {
1430 	device_t dev;
1431 
1432 	if (!pci_honor_msi_blacklist)
1433 		return (0);
1434 
1435 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1436 	if (!(pcie_chipset || pcix_chipset))
1437 		return (1);
1438 
1439 	dev = pci_find_bsf(0, 0, 0);
1440 	if (dev != NULL)
1441 		return (pci_msi_device_blacklisted(dev));
1442 	return (0);
1443 }
1444 
1445 /*
1446  * Attempt to allocate *count MSI messages.  The actual number allocated is
1447  * returned in *count.  After this function returns, each message will be
1448  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1449  */
1450 int
1451 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1452 {
1453 	struct pci_devinfo *dinfo = device_get_ivars(child);
1454 	pcicfgregs *cfg = &dinfo->cfg;
1455 	struct resource_list_entry *rle;
1456 	int actual, error, i, irqs[32];
1457 	uint16_t ctrl;
1458 
1459 	/* Don't let count == 0 get us into trouble. */
1460 	if (*count == 0)
1461 		return (EINVAL);
1462 
1463 	/* If rid 0 is allocated, then fail. */
1464 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1465 	if (rle != NULL && rle->res != NULL)
1466 		return (ENXIO);
1467 
1468 	/* Already have allocated messages? */
1469 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1470 		return (ENXIO);
1471 
1472 	/* If MSI is blacklisted for this system, fail. */
1473 	if (pci_msi_blacklisted())
1474 		return (ENXIO);
1475 
1476 	/* MSI capability present? */
1477 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1478 		return (ENODEV);
1479 
1480 	if (bootverbose)
1481 		device_printf(child,
1482 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1483 		    *count, cfg->msi.msi_msgnum);
1484 
1485 	/* Don't ask for more than the device supports. */
1486 	actual = min(*count, cfg->msi.msi_msgnum);
1487 
1488 	/* Don't ask for more than 32 messages. */
1489 	actual = min(actual, 32);
1490 
1491 	/* MSI requires power of 2 number of messages. */
1492 	if (!powerof2(actual))
1493 		return (EINVAL);
1494 
1495 	for (;;) {
1496 		/* Try to allocate N messages. */
1497 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1498 		    cfg->msi.msi_msgnum, irqs);
1499 		if (error == 0)
1500 			break;
1501 		if (actual == 1)
1502 			return (error);
1503 
1504 		/* Try N / 2. */
1505 		actual >>= 1;
1506 	}
1507 
1508 	/*
1509 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1510 	 * resources in the irqs[] array, so add new resources
1511 	 * starting at rid 1.
1512 	 */
1513 	for (i = 0; i < actual; i++)
1514 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1515 		    irqs[i], irqs[i], 1);
1516 
1517 	if (bootverbose) {
1518 		if (actual == 1)
1519 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1520 		else {
1521 			int run;
1522 
1523 			/*
1524 			 * Be fancy and try to print contiguous runs
1525 			 * of IRQ values as ranges.  'run' is true if
1526 			 * we are in a range.
1527 			 */
1528 			device_printf(child, "using IRQs %d", irqs[0]);
1529 			run = 0;
1530 			for (i = 1; i < actual; i++) {
1531 
1532 				/* Still in a run? */
1533 				if (irqs[i] == irqs[i - 1] + 1) {
1534 					run = 1;
1535 					continue;
1536 				}
1537 
1538 				/* Finish previous range. */
1539 				if (run) {
1540 					printf("-%d", irqs[i - 1]);
1541 					run = 0;
1542 				}
1543 
1544 				/* Start new range. */
1545 				printf(",%d", irqs[i]);
1546 			}
1547 
1548 			/* Unfinished range? */
1549 			if (run)
1550 				printf("%d", irqs[actual - 1]);
1551 			printf(" for MSI\n");
1552 		}
1553 	}
1554 
1555 	/* Update control register with actual count and enable MSI. */
1556 	ctrl = cfg->msi.msi_ctrl;
1557 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1558 	ctrl |= (ffs(actual) - 1) << 4;
1559 	cfg->msi.msi_ctrl = ctrl;
1560 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1561 
1562 	/* Update counts of alloc'd messages. */
1563 	cfg->msi.msi_alloc = actual;
1564 	*count = actual;
1565 	return (0);
1566 }
1567 
1568 /* Release the MSI messages associated with this device. */
1569 int
1570 pci_release_msi_method(device_t dev, device_t child)
1571 {
1572 	struct pci_devinfo *dinfo = device_get_ivars(child);
1573 	pcicfgregs *cfg = &dinfo->cfg;
1574 	struct resource_list_entry *rle;
1575 	int error, i, irqs[32];
1576 
1577 	/* Try MSI-X first. */
1578 	error = pci_release_msix(dev, child);
1579 	if (error != ENODEV)
1580 		return (error);
1581 
1582 	/* Do we have any messages to release? */
1583 	if (cfg->msi.msi_alloc == 0)
1584 		return (ENODEV);
1585 	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1586 
1587 	/* Make sure none of the resources are allocated. */
1588 	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1589 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1590 		KASSERT(rle != NULL, ("missing MSI resource"));
1591 		if (rle->res != NULL)
1592 			return (EBUSY);
1593 		irqs[i] = rle->start;
1594 	}
1595 
1596 	/* Update control register with 0 count and disable MSI. */
1597 	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1598 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1599 	    cfg->msi.msi_ctrl, 2);
1600 
1601 	/* Release the messages. */
1602 	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1603 	    irqs);
1604 	for (i = 0; i < cfg->msi.msi_alloc; i++)
1605 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1606 
1607 	/* Update alloc count. */
1608 	cfg->msi.msi_alloc = 0;
1609 	return (0);
1610 }
1611 
1612 /*
1613  * Return the max supported MSI messages this device supports.
1614  * Basically, assuming the MD code can alloc messages, this function
1615  * should return the maximum value that pci_alloc_msi() can return.
1616  * Thus, it is subject to the tunables, etc.
1617  */
1618 int
1619 pci_msi_count_method(device_t dev, device_t child)
1620 {
1621 	struct pci_devinfo *dinfo = device_get_ivars(child);
1622 	pcicfgregs *cfg = &dinfo->cfg;
1623 
1624 	if (pci_do_msi && cfg->msi.msi_location != 0)
1625 		return (cfg->msi.msi_msgnum);
1626 	return (0);
1627 }
1628 
1629 /* free pcicfgregs structure and all depending data structures */
1630 
1631 int
1632 pci_freecfg(struct pci_devinfo *dinfo)
1633 {
1634 	struct devlist *devlist_head;
1635 	int i;
1636 
1637 	devlist_head = &pci_devq;
1638 
1639 	if (dinfo->cfg.vpd.vpd_reg) {
1640 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1641 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1642 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1643 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1644 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1645 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1646 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1647 	}
1648 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1649 	free(dinfo, M_DEVBUF);
1650 
1651 	/* increment the generation count */
1652 	pci_generation++;
1653 
1654 	/* we're losing one device */
1655 	pci_numdevs--;
1656 	return (0);
1657 }
1658 
1659 /*
1660  * PCI power manangement
1661  */
1662 int
1663 pci_set_powerstate_method(device_t dev, device_t child, int state)
1664 {
1665 	struct pci_devinfo *dinfo = device_get_ivars(child);
1666 	pcicfgregs *cfg = &dinfo->cfg;
1667 	uint16_t status;
1668 	int result, oldstate, highest, delay;
1669 
1670 	if (cfg->pp.pp_cap == 0)
1671 		return (EOPNOTSUPP);
1672 
1673 	/*
1674 	 * Optimize a no state change request away.  While it would be OK to
1675 	 * write to the hardware in theory, some devices have shown odd
1676 	 * behavior when going from D3 -> D3.
1677 	 */
1678 	oldstate = pci_get_powerstate(child);
1679 	if (oldstate == state)
1680 		return (0);
1681 
1682 	/*
1683 	 * The PCI power management specification states that after a state
1684 	 * transition between PCI power states, system software must
1685 	 * guarantee a minimal delay before the function accesses the device.
1686 	 * Compute the worst case delay that we need to guarantee before we
1687 	 * access the device.  Many devices will be responsive much more
1688 	 * quickly than this delay, but there are some that don't respond
1689 	 * instantly to state changes.  Transitions to/from D3 state require
1690 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1691 	 * is done below with DELAY rather than a sleeper function because
1692 	 * this function can be called from contexts where we cannot sleep.
1693 	 */
1694 	highest = (oldstate > state) ? oldstate : state;
1695 	if (highest == PCI_POWERSTATE_D3)
1696 	    delay = 10000;
1697 	else if (highest == PCI_POWERSTATE_D2)
1698 	    delay = 200;
1699 	else
1700 	    delay = 0;
1701 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1702 	    & ~PCIM_PSTAT_DMASK;
1703 	result = 0;
1704 	switch (state) {
1705 	case PCI_POWERSTATE_D0:
1706 		status |= PCIM_PSTAT_D0;
1707 		break;
1708 	case PCI_POWERSTATE_D1:
1709 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1710 			return (EOPNOTSUPP);
1711 		status |= PCIM_PSTAT_D1;
1712 		break;
1713 	case PCI_POWERSTATE_D2:
1714 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1715 			return (EOPNOTSUPP);
1716 		status |= PCIM_PSTAT_D2;
1717 		break;
1718 	case PCI_POWERSTATE_D3:
1719 		status |= PCIM_PSTAT_D3;
1720 		break;
1721 	default:
1722 		return (EINVAL);
1723 	}
1724 
1725 	if (bootverbose)
1726 		printf(
1727 		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1728 		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1729 		    oldstate, state);
1730 
1731 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1732 	if (delay)
1733 		DELAY(delay);
1734 	return (0);
1735 }
1736 
1737 int
1738 pci_get_powerstate_method(device_t dev, device_t child)
1739 {
1740 	struct pci_devinfo *dinfo = device_get_ivars(child);
1741 	pcicfgregs *cfg = &dinfo->cfg;
1742 	uint16_t status;
1743 	int result;
1744 
1745 	if (cfg->pp.pp_cap != 0) {
1746 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1747 		switch (status & PCIM_PSTAT_DMASK) {
1748 		case PCIM_PSTAT_D0:
1749 			result = PCI_POWERSTATE_D0;
1750 			break;
1751 		case PCIM_PSTAT_D1:
1752 			result = PCI_POWERSTATE_D1;
1753 			break;
1754 		case PCIM_PSTAT_D2:
1755 			result = PCI_POWERSTATE_D2;
1756 			break;
1757 		case PCIM_PSTAT_D3:
1758 			result = PCI_POWERSTATE_D3;
1759 			break;
1760 		default:
1761 			result = PCI_POWERSTATE_UNKNOWN;
1762 			break;
1763 		}
1764 	} else {
1765 		/* No support, device is always at D0 */
1766 		result = PCI_POWERSTATE_D0;
1767 	}
1768 	return (result);
1769 }
1770 
1771 /*
1772  * Some convenience functions for PCI device drivers.
1773  */
1774 
1775 static __inline void
1776 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1777 {
1778 	uint16_t	command;
1779 
1780 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1781 	command |= bit;
1782 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1783 }
1784 
1785 static __inline void
1786 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1787 {
1788 	uint16_t	command;
1789 
1790 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1791 	command &= ~bit;
1792 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1793 }
1794 
1795 int
1796 pci_enable_busmaster_method(device_t dev, device_t child)
1797 {
1798 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1799 	return (0);
1800 }
1801 
1802 int
1803 pci_disable_busmaster_method(device_t dev, device_t child)
1804 {
1805 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1806 	return (0);
1807 }
1808 
1809 int
1810 pci_enable_io_method(device_t dev, device_t child, int space)
1811 {
1812 	uint16_t command;
1813 	uint16_t bit;
1814 	char *error;
1815 
1816 	bit = 0;
1817 	error = NULL;
1818 
1819 	switch(space) {
1820 	case SYS_RES_IOPORT:
1821 		bit = PCIM_CMD_PORTEN;
1822 		error = "port";
1823 		break;
1824 	case SYS_RES_MEMORY:
1825 		bit = PCIM_CMD_MEMEN;
1826 		error = "memory";
1827 		break;
1828 	default:
1829 		return (EINVAL);
1830 	}
1831 	pci_set_command_bit(dev, child, bit);
1832 	/* Some devices seem to need a brief stall here, what do to? */
1833 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1834 	if (command & bit)
1835 		return (0);
1836 	device_printf(child, "failed to enable %s mapping!\n", error);
1837 	return (ENXIO);
1838 }
1839 
1840 int
1841 pci_disable_io_method(device_t dev, device_t child, int space)
1842 {
1843 	uint16_t command;
1844 	uint16_t bit;
1845 	char *error;
1846 
1847 	bit = 0;
1848 	error = NULL;
1849 
1850 	switch(space) {
1851 	case SYS_RES_IOPORT:
1852 		bit = PCIM_CMD_PORTEN;
1853 		error = "port";
1854 		break;
1855 	case SYS_RES_MEMORY:
1856 		bit = PCIM_CMD_MEMEN;
1857 		error = "memory";
1858 		break;
1859 	default:
1860 		return (EINVAL);
1861 	}
1862 	pci_clear_command_bit(dev, child, bit);
1863 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1864 	if (command & bit) {
1865 		device_printf(child, "failed to disable %s mapping!\n", error);
1866 		return (ENXIO);
1867 	}
1868 	return (0);
1869 }
1870 
1871 /*
1872  * New style pci driver.  Parent device is either a pci-host-bridge or a
1873  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1874  */
1875 
1876 void
1877 pci_print_verbose(struct pci_devinfo *dinfo)
1878 {
1879 
1880 	if (bootverbose) {
1881 		pcicfgregs *cfg = &dinfo->cfg;
1882 
1883 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1884 		    cfg->vendor, cfg->device, cfg->revid);
1885 		printf("\tbus=%d, slot=%d, func=%d\n",
1886 		    cfg->bus, cfg->slot, cfg->func);
1887 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1888 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1889 		    cfg->mfdev);
1890 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1891 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1892 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1893 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1894 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1895 		if (cfg->intpin > 0)
1896 			printf("\tintpin=%c, irq=%d\n",
1897 			    cfg->intpin +'a' -1, cfg->intline);
1898 		if (cfg->pp.pp_cap) {
1899 			uint16_t status;
1900 
1901 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1902 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1903 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1904 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1905 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1906 			    status & PCIM_PSTAT_DMASK);
1907 		}
1908 		if (cfg->msi.msi_location) {
1909 			int ctrl;
1910 
1911 			ctrl = cfg->msi.msi_ctrl;
1912 			printf("\tMSI supports %d message%s%s%s\n",
1913 			    cfg->msi.msi_msgnum,
1914 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1915 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1916 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1917 		}
1918 		if (cfg->msix.msix_location) {
1919 			printf("\tMSI-X supports %d message%s ",
1920 			    cfg->msix.msix_msgnum,
1921 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1922 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1923 				printf("in map 0x%x\n",
1924 				    cfg->msix.msix_table_bar);
1925 			else
1926 				printf("in maps 0x%x and 0x%x\n",
1927 				    cfg->msix.msix_table_bar,
1928 				    cfg->msix.msix_pba_bar);
1929 		}
1930 	}
1931 }
1932 
1933 static int
1934 pci_porten(device_t pcib, int b, int s, int f)
1935 {
1936 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1937 		& PCIM_CMD_PORTEN) != 0;
1938 }
1939 
1940 static int
1941 pci_memen(device_t pcib, int b, int s, int f)
1942 {
1943 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1944 		& PCIM_CMD_MEMEN) != 0;
1945 }
1946 
1947 /*
1948  * Add a resource based on a pci map register. Return 1 if the map
1949  * register is a 32bit map register or 2 if it is a 64bit register.
1950  */
1951 static int
1952 pci_add_map(device_t pcib, device_t bus, device_t dev,
1953     int b, int s, int f, int reg, struct resource_list *rl, int force,
1954     int prefetch)
1955 {
1956 	uint32_t map;
1957 	pci_addr_t base;
1958 	pci_addr_t start, end, count;
1959 	uint8_t ln2size;
1960 	uint8_t ln2range;
1961 	uint32_t testval;
1962 	uint16_t cmd;
1963 	int type;
1964 	int barlen;
1965 	struct resource *res;
1966 
1967 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1968 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1969 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1970 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1971 
1972 	if (PCI_BAR_MEM(map))
1973 		type = SYS_RES_MEMORY;
1974 	else
1975 		type = SYS_RES_IOPORT;
1976 	ln2size = pci_mapsize(testval);
1977 	ln2range = pci_maprange(testval);
1978 	base = pci_mapbase(map);
1979 	barlen = ln2range == 64 ? 2 : 1;
1980 
1981 	/*
1982 	 * For I/O registers, if bottom bit is set, and the next bit up
1983 	 * isn't clear, we know we have a BAR that doesn't conform to the
1984 	 * spec, so ignore it.  Also, sanity check the size of the data
1985 	 * areas to the type of memory involved.  Memory must be at least
1986 	 * 16 bytes in size, while I/O ranges must be at least 4.
1987 	 */
1988 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
1989 		return (barlen);
1990 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1991 	    (type == SYS_RES_IOPORT && ln2size < 2))
1992 		return (barlen);
1993 
1994 	if (ln2range == 64)
1995 		/* Read the other half of a 64bit map register */
1996 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
1997 	if (bootverbose) {
1998 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
1999 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2000 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2001 			printf(", port disabled\n");
2002 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2003 			printf(", memory disabled\n");
2004 		else
2005 			printf(", enabled\n");
2006 	}
2007 
2008 	/*
2009 	 * If base is 0, then we have problems.  It is best to ignore
2010 	 * such entries for the moment.  These will be allocated later if
2011 	 * the driver specifically requests them.  However, some
2012 	 * removable busses look better when all resources are allocated,
2013 	 * so allow '0' to be overriden.
2014 	 *
2015 	 * Similarly treat maps whose values is the same as the test value
2016 	 * read back.  These maps have had all f's written to them by the
2017 	 * BIOS in an attempt to disable the resources.
2018 	 */
2019 	if (!force && (base == 0 || map == testval))
2020 		return (barlen);
2021 	if ((u_long)base != base) {
2022 		device_printf(bus,
2023 		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2024 		return (barlen);
2025 	}
2026 
2027 	/*
2028 	 * This code theoretically does the right thing, but has
2029 	 * undesirable side effects in some cases where peripherals
2030 	 * respond oddly to having these bits enabled.  Let the user
2031 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2032 	 * default).
2033 	 */
2034 	if (pci_enable_io_modes) {
2035 		/* Turn on resources that have been left off by a lazy BIOS */
2036 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2037 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2038 			cmd |= PCIM_CMD_PORTEN;
2039 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2040 		}
2041 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2042 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2043 			cmd |= PCIM_CMD_MEMEN;
2044 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2045 		}
2046 	} else {
2047 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2048 			return (barlen);
2049 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2050 			return (barlen);
2051 	}
2052 
2053 	count = 1 << ln2size;
2054 	if (base == 0 || base == pci_mapbase(testval)) {
2055 		start = 0;	/* Let the parent deside */
2056 		end = ~0ULL;
2057 	} else {
2058 		start = base;
2059 		end = base + (1 << ln2size) - 1;
2060 	}
2061 	resource_list_add(rl, type, reg, start, end, count);
2062 
2063 	/*
2064 	 * Not quite sure what to do on failure of allocating the resource
2065 	 * since I can postulate several right answers.
2066 	 */
2067 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2068 	    prefetch ? RF_PREFETCHABLE : 0);
2069 	if (res == NULL)
2070 		return (barlen);
2071 	start = rman_get_start(res);
2072 	if ((u_long)start != start) {
2073 		/* Wait a minute!  this platform can't do this address. */
2074 		device_printf(bus,
2075 		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2076 		    b, s, f, reg, (uintmax_t)start);
2077 		resource_list_release(rl, bus, dev, type, reg, res);
2078 		return (barlen);
2079 	}
2080 	pci_write_config(dev, reg, start, 4);
2081 	if (ln2range == 64)
2082 		pci_write_config(dev, reg + 4, start >> 32, 4);
2083 	return (barlen);
2084 }
2085 
2086 /*
2087  * For ATA devices we need to decide early what addressing mode to use.
2088  * Legacy demands that the primary and secondary ATA ports sits on the
2089  * same addresses that old ISA hardware did. This dictates that we use
2090  * those addresses and ignore the BAR's if we cannot set PCI native
2091  * addressing mode.
2092  */
2093 static void
2094 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2095     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2096 {
2097 	int rid, type, progif;
2098 #if 0
2099 	/* if this device supports PCI native addressing use it */
2100 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2101 	if ((progif & 0x8a) == 0x8a) {
2102 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2103 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2104 			printf("Trying ATA native PCI addressing mode\n");
2105 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2106 		}
2107 	}
2108 #endif
2109 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2110 	type = SYS_RES_IOPORT;
2111 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2112 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2113 		    prefetchmask & (1 << 0));
2114 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2115 		    prefetchmask & (1 << 1));
2116 	} else {
2117 		rid = PCIR_BAR(0);
2118 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2119 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2120 		    0);
2121 		rid = PCIR_BAR(1);
2122 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2123 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2124 		    0);
2125 	}
2126 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2127 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2128 		    prefetchmask & (1 << 2));
2129 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2130 		    prefetchmask & (1 << 3));
2131 	} else {
2132 		rid = PCIR_BAR(2);
2133 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2134 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2135 		    0);
2136 		rid = PCIR_BAR(3);
2137 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2138 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2139 		    0);
2140 	}
2141 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2142 	    prefetchmask & (1 << 4));
2143 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2144 	    prefetchmask & (1 << 5));
2145 }
2146 
2147 static void
2148 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2149 {
2150 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2151 	pcicfgregs *cfg = &dinfo->cfg;
2152 	char tunable_name[64];
2153 	int irq;
2154 
2155 	/* Has to have an intpin to have an interrupt. */
2156 	if (cfg->intpin == 0)
2157 		return;
2158 
2159 	/* Let the user override the IRQ with a tunable. */
2160 	irq = PCI_INVALID_IRQ;
2161 	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2162 	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2163 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2164 		irq = PCI_INVALID_IRQ;
2165 
2166 	/*
2167 	 * If we didn't get an IRQ via the tunable, then we either use the
2168 	 * IRQ value in the intline register or we ask the bus to route an
2169 	 * interrupt for us.  If force_route is true, then we only use the
2170 	 * value in the intline register if the bus was unable to assign an
2171 	 * IRQ.
2172 	 */
2173 	if (!PCI_INTERRUPT_VALID(irq)) {
2174 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2175 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2176 		if (!PCI_INTERRUPT_VALID(irq))
2177 			irq = cfg->intline;
2178 	}
2179 
2180 	/* If after all that we don't have an IRQ, just bail. */
2181 	if (!PCI_INTERRUPT_VALID(irq))
2182 		return;
2183 
2184 	/* Update the config register if it changed. */
2185 	if (irq != cfg->intline) {
2186 		cfg->intline = irq;
2187 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2188 	}
2189 
2190 	/* Add this IRQ as rid 0 interrupt resource. */
2191 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2192 }
2193 
2194 void
2195 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2196 {
2197 	device_t pcib;
2198 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2199 	pcicfgregs *cfg = &dinfo->cfg;
2200 	struct resource_list *rl = &dinfo->resources;
2201 	struct pci_quirk *q;
2202 	int b, i, f, s;
2203 
2204 	pcib = device_get_parent(bus);
2205 
2206 	b = cfg->bus;
2207 	s = cfg->slot;
2208 	f = cfg->func;
2209 
2210 	/* ATA devices needs special map treatment */
2211 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2212 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2213 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2214 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2215 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2216 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2217 	else
2218 		for (i = 0; i < cfg->nummaps;)
2219 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2220 			    rl, force, prefetchmask & (1 << i));
2221 
2222 	/*
2223 	 * Add additional, quirked resources.
2224 	 */
2225 	for (q = &pci_quirks[0]; q->devid; q++) {
2226 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2227 		    && q->type == PCI_QUIRK_MAP_REG)
2228 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2229 			  force, 0);
2230 	}
2231 
2232 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2233 #ifdef __PCI_REROUTE_INTERRUPT
2234 		/*
2235 		 * Try to re-route interrupts. Sometimes the BIOS or
2236 		 * firmware may leave bogus values in these registers.
2237 		 * If the re-route fails, then just stick with what we
2238 		 * have.
2239 		 */
2240 		pci_assign_interrupt(bus, dev, 1);
2241 #else
2242 		pci_assign_interrupt(bus, dev, 0);
2243 #endif
2244 	}
2245 }
2246 
2247 void
2248 pci_add_children(device_t dev, int busno, size_t dinfo_size)
2249 {
2250 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2251 	device_t pcib = device_get_parent(dev);
2252 	struct pci_devinfo *dinfo;
2253 	int maxslots;
2254 	int s, f, pcifunchigh;
2255 	uint8_t hdrtype;
2256 
2257 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2258 	    ("dinfo_size too small"));
2259 	maxslots = PCIB_MAXSLOTS(pcib);
2260 	for (s = 0; s <= maxslots; s++) {
2261 		pcifunchigh = 0;
2262 		f = 0;
2263 		DELAY(1);
2264 		hdrtype = REG(PCIR_HDRTYPE, 1);
2265 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2266 			continue;
2267 		if (hdrtype & PCIM_MFDEV)
2268 			pcifunchigh = PCI_FUNCMAX;
2269 		for (f = 0; f <= pcifunchigh; f++) {
2270 			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2271 			if (dinfo != NULL) {
2272 				pci_add_child(dev, dinfo);
2273 			}
2274 		}
2275 	}
2276 #undef REG
2277 }
2278 
2279 void
2280 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2281 {
2282 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2283 	device_set_ivars(dinfo->cfg.dev, dinfo);
2284 	resource_list_init(&dinfo->resources);
2285 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2286 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2287 	pci_print_verbose(dinfo);
2288 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2289 }
2290 
2291 static int
2292 pci_probe(device_t dev)
2293 {
2294 
2295 	device_set_desc(dev, "PCI bus");
2296 
2297 	/* Allow other subclasses to override this driver. */
2298 	return (-1000);
2299 }
2300 
2301 static int
2302 pci_attach(device_t dev)
2303 {
2304 	int busno;
2305 
2306 	/*
2307 	 * Since there can be multiple independantly numbered PCI
2308 	 * busses on systems with multiple PCI domains, we can't use
2309 	 * the unit number to decide which bus we are probing. We ask
2310 	 * the parent pcib what our bus number is.
2311 	 */
2312 	busno = pcib_get_bus(dev);
2313 	if (bootverbose)
2314 		device_printf(dev, "physical bus=%d\n", busno);
2315 
2316 	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2317 
2318 	return (bus_generic_attach(dev));
2319 }
2320 
2321 int
2322 pci_suspend(device_t dev)
2323 {
2324 	int dstate, error, i, numdevs;
2325 	device_t acpi_dev, child, *devlist;
2326 	struct pci_devinfo *dinfo;
2327 
2328 	/*
2329 	 * Save the PCI configuration space for each child and set the
2330 	 * device in the appropriate power state for this sleep state.
2331 	 */
2332 	acpi_dev = NULL;
2333 	if (pci_do_power_resume)
2334 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2335 	device_get_children(dev, &devlist, &numdevs);
2336 	for (i = 0; i < numdevs; i++) {
2337 		child = devlist[i];
2338 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2339 		pci_cfg_save(child, dinfo, 0);
2340 	}
2341 
2342 	/* Suspend devices before potentially powering them down. */
2343 	error = bus_generic_suspend(dev);
2344 	if (error) {
2345 		free(devlist, M_TEMP);
2346 		return (error);
2347 	}
2348 
2349 	/*
2350 	 * Always set the device to D3.  If ACPI suggests a different
2351 	 * power state, use it instead.  If ACPI is not present, the
2352 	 * firmware is responsible for managing device power.  Skip
2353 	 * children who aren't attached since they are powered down
2354 	 * separately.  Only manage type 0 devices for now.
2355 	 */
2356 	for (i = 0; acpi_dev && i < numdevs; i++) {
2357 		child = devlist[i];
2358 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2359 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2360 			dstate = PCI_POWERSTATE_D3;
2361 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2362 			pci_set_powerstate(child, dstate);
2363 		}
2364 	}
2365 	free(devlist, M_TEMP);
2366 	return (0);
2367 }
2368 
2369 int
2370 pci_resume(device_t dev)
2371 {
2372 	int i, numdevs;
2373 	device_t acpi_dev, child, *devlist;
2374 	struct pci_devinfo *dinfo;
2375 
2376 	/*
2377 	 * Set each child to D0 and restore its PCI configuration space.
2378 	 */
2379 	acpi_dev = NULL;
2380 	if (pci_do_power_resume)
2381 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2382 	device_get_children(dev, &devlist, &numdevs);
2383 	for (i = 0; i < numdevs; i++) {
2384 		/*
2385 		 * Notify ACPI we're going to D0 but ignore the result.  If
2386 		 * ACPI is not present, the firmware is responsible for
2387 		 * managing device power.  Only manage type 0 devices for now.
2388 		 */
2389 		child = devlist[i];
2390 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2391 		if (acpi_dev && device_is_attached(child) &&
2392 		    dinfo->cfg.hdrtype == 0) {
2393 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2394 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2395 		}
2396 
2397 		/* Now the device is powered up, restore its config space. */
2398 		pci_cfg_restore(child, dinfo);
2399 	}
2400 	free(devlist, M_TEMP);
2401 	return (bus_generic_resume(dev));
2402 }
2403 
2404 static void
2405 pci_load_vendor_data(void)
2406 {
2407 	caddr_t vendordata, info;
2408 
2409 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2410 		info = preload_search_info(vendordata, MODINFO_ADDR);
2411 		pci_vendordata = *(char **)info;
2412 		info = preload_search_info(vendordata, MODINFO_SIZE);
2413 		pci_vendordata_size = *(size_t *)info;
2414 		/* terminate the database */
2415 		pci_vendordata[pci_vendordata_size] = '\n';
2416 	}
2417 }
2418 
2419 void
2420 pci_driver_added(device_t dev, driver_t *driver)
2421 {
2422 	int numdevs;
2423 	device_t *devlist;
2424 	device_t child;
2425 	struct pci_devinfo *dinfo;
2426 	int i;
2427 
2428 	if (bootverbose)
2429 		device_printf(dev, "driver added\n");
2430 	DEVICE_IDENTIFY(driver, dev);
2431 	device_get_children(dev, &devlist, &numdevs);
2432 	for (i = 0; i < numdevs; i++) {
2433 		child = devlist[i];
2434 		if (device_get_state(child) != DS_NOTPRESENT)
2435 			continue;
2436 		dinfo = device_get_ivars(child);
2437 		pci_print_verbose(dinfo);
2438 		if (bootverbose)
2439 			printf("pci%d:%d:%d: reprobing on driver added\n",
2440 			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2441 		pci_cfg_restore(child, dinfo);
2442 		if (device_probe_and_attach(child) != 0)
2443 			pci_cfg_save(child, dinfo, 1);
2444 	}
2445 	free(devlist, M_TEMP);
2446 }
2447 
2448 int
2449 pci_print_child(device_t dev, device_t child)
2450 {
2451 	struct pci_devinfo *dinfo;
2452 	struct resource_list *rl;
2453 	int retval = 0;
2454 
2455 	dinfo = device_get_ivars(child);
2456 	rl = &dinfo->resources;
2457 
2458 	retval += bus_print_child_header(dev, child);
2459 
2460 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2461 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2462 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2463 	if (device_get_flags(dev))
2464 		retval += printf(" flags %#x", device_get_flags(dev));
2465 
2466 	retval += printf(" at device %d.%d", pci_get_slot(child),
2467 	    pci_get_function(child));
2468 
2469 	retval += bus_print_child_footer(dev, child);
2470 
2471 	return (retval);
2472 }
2473 
2474 static struct
2475 {
2476 	int	class;
2477 	int	subclass;
2478 	char	*desc;
2479 } pci_nomatch_tab[] = {
2480 	{PCIC_OLD,		-1,			"old"},
2481 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2482 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2483 	{PCIC_STORAGE,		-1,			"mass storage"},
2484 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2485 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2486 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2487 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2488 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2489 	{PCIC_NETWORK,		-1,			"network"},
2490 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2491 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2492 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2493 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2494 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2495 	{PCIC_DISPLAY,		-1,			"display"},
2496 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2497 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2498 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2499 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2500 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2501 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2502 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2503 	{PCIC_MEMORY,		-1,			"memory"},
2504 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2505 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2506 	{PCIC_BRIDGE,		-1,			"bridge"},
2507 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2508 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2509 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2510 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2511 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2512 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2513 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2514 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2515 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2516 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2517 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2518 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2519 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2520 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2521 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2522 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2523 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2524 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2525 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2526 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2527 	{PCIC_INPUTDEV,		-1,			"input device"},
2528 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2529 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2530 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2531 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2532 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2533 	{PCIC_DOCKING,		-1,			"docking station"},
2534 	{PCIC_PROCESSOR,	-1,			"processor"},
2535 	{PCIC_SERIALBUS,	-1,			"serial bus"},
2536 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2537 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2538 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2539 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2540 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2541 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2542 	{PCIC_WIRELESS,		-1,			"wireless controller"},
2543 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2544 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2545 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2546 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2547 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2548 	{PCIC_SATCOM,		-1,			"satellite communication"},
2549 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2550 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2551 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2552 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2553 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2554 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2555 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2556 	{PCIC_DASP,		-1,			"dasp"},
2557 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2558 	{0, 0,		NULL}
2559 };
2560 
2561 void
2562 pci_probe_nomatch(device_t dev, device_t child)
2563 {
2564 	int	i;
2565 	char	*cp, *scp, *device;
2566 
2567 	/*
2568 	 * Look for a listing for this device in a loaded device database.
2569 	 */
2570 	if ((device = pci_describe_device(child)) != NULL) {
2571 		device_printf(dev, "<%s>", device);
2572 		free(device, M_DEVBUF);
2573 	} else {
2574 		/*
2575 		 * Scan the class/subclass descriptions for a general
2576 		 * description.
2577 		 */
2578 		cp = "unknown";
2579 		scp = NULL;
2580 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2581 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2582 				if (pci_nomatch_tab[i].subclass == -1) {
2583 					cp = pci_nomatch_tab[i].desc;
2584 				} else if (pci_nomatch_tab[i].subclass ==
2585 				    pci_get_subclass(child)) {
2586 					scp = pci_nomatch_tab[i].desc;
2587 				}
2588 			}
2589 		}
2590 		device_printf(dev, "<%s%s%s>",
2591 		    cp ? cp : "",
2592 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2593 		    scp ? scp : "");
2594 	}
2595 	printf(" at device %d.%d (no driver attached)\n",
2596 	    pci_get_slot(child), pci_get_function(child));
2597 	if (pci_do_power_nodriver)
2598 		pci_cfg_save(child,
2599 		    (struct pci_devinfo *) device_get_ivars(child), 1);
2600 	return;
2601 }
2602 
2603 /*
2604  * Parse the PCI device database, if loaded, and return a pointer to a
2605  * description of the device.
2606  *
2607  * The database is flat text formatted as follows:
2608  *
2609  * Any line not in a valid format is ignored.
2610  * Lines are terminated with newline '\n' characters.
2611  *
2612  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2613  * the vendor name.
2614  *
2615  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2616  * - devices cannot be listed without a corresponding VENDOR line.
2617  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2618  * another TAB, then the device name.
2619  */
2620 
2621 /*
2622  * Assuming (ptr) points to the beginning of a line in the database,
2623  * return the vendor or device and description of the next entry.
2624  * The value of (vendor) or (device) inappropriate for the entry type
2625  * is set to -1.  Returns nonzero at the end of the database.
2626  *
2627  * Note that this is slightly unrobust in the face of corrupt data;
2628  * we attempt to safeguard against this by spamming the end of the
2629  * database with a newline when we initialise.
2630  */
2631 static int
2632 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2633 {
2634 	char	*cp = *ptr;
2635 	int	left;
2636 
2637 	*device = -1;
2638 	*vendor = -1;
2639 	**desc = '\0';
2640 	for (;;) {
2641 		left = pci_vendordata_size - (cp - pci_vendordata);
2642 		if (left <= 0) {
2643 			*ptr = cp;
2644 			return(1);
2645 		}
2646 
2647 		/* vendor entry? */
2648 		if (*cp != '\t' &&
2649 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2650 			break;
2651 		/* device entry? */
2652 		if (*cp == '\t' &&
2653 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2654 			break;
2655 
2656 		/* skip to next line */
2657 		while (*cp != '\n' && left > 0) {
2658 			cp++;
2659 			left--;
2660 		}
2661 		if (*cp == '\n') {
2662 			cp++;
2663 			left--;
2664 		}
2665 	}
2666 	/* skip to next line */
2667 	while (*cp != '\n' && left > 0) {
2668 		cp++;
2669 		left--;
2670 	}
2671 	if (*cp == '\n' && left > 0)
2672 		cp++;
2673 	*ptr = cp;
2674 	return(0);
2675 }
2676 
2677 static char *
2678 pci_describe_device(device_t dev)
2679 {
2680 	int	vendor, device;
2681 	char	*desc, *vp, *dp, *line;
2682 
2683 	desc = vp = dp = NULL;
2684 
2685 	/*
2686 	 * If we have no vendor data, we can't do anything.
2687 	 */
2688 	if (pci_vendordata == NULL)
2689 		goto out;
2690 
2691 	/*
2692 	 * Scan the vendor data looking for this device
2693 	 */
2694 	line = pci_vendordata;
2695 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2696 		goto out;
2697 	for (;;) {
2698 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2699 			goto out;
2700 		if (vendor == pci_get_vendor(dev))
2701 			break;
2702 	}
2703 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2704 		goto out;
2705 	for (;;) {
2706 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2707 			*dp = 0;
2708 			break;
2709 		}
2710 		if (vendor != -1) {
2711 			*dp = 0;
2712 			break;
2713 		}
2714 		if (device == pci_get_device(dev))
2715 			break;
2716 	}
2717 	if (dp[0] == '\0')
2718 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2719 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2720 	    NULL)
2721 		sprintf(desc, "%s, %s", vp, dp);
2722  out:
2723 	if (vp != NULL)
2724 		free(vp, M_DEVBUF);
2725 	if (dp != NULL)
2726 		free(dp, M_DEVBUF);
2727 	return(desc);
2728 }
2729 
2730 int
2731 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2732 {
2733 	struct pci_devinfo *dinfo;
2734 	pcicfgregs *cfg;
2735 
2736 	dinfo = device_get_ivars(child);
2737 	cfg = &dinfo->cfg;
2738 
2739 	switch (which) {
2740 	case PCI_IVAR_ETHADDR:
2741 		/*
2742 		 * The generic accessor doesn't deal with failure, so
2743 		 * we set the return value, then return an error.
2744 		 */
2745 		*((uint8_t **) result) = NULL;
2746 		return (EINVAL);
2747 	case PCI_IVAR_SUBVENDOR:
2748 		*result = cfg->subvendor;
2749 		break;
2750 	case PCI_IVAR_SUBDEVICE:
2751 		*result = cfg->subdevice;
2752 		break;
2753 	case PCI_IVAR_VENDOR:
2754 		*result = cfg->vendor;
2755 		break;
2756 	case PCI_IVAR_DEVICE:
2757 		*result = cfg->device;
2758 		break;
2759 	case PCI_IVAR_DEVID:
2760 		*result = (cfg->device << 16) | cfg->vendor;
2761 		break;
2762 	case PCI_IVAR_CLASS:
2763 		*result = cfg->baseclass;
2764 		break;
2765 	case PCI_IVAR_SUBCLASS:
2766 		*result = cfg->subclass;
2767 		break;
2768 	case PCI_IVAR_PROGIF:
2769 		*result = cfg->progif;
2770 		break;
2771 	case PCI_IVAR_REVID:
2772 		*result = cfg->revid;
2773 		break;
2774 	case PCI_IVAR_INTPIN:
2775 		*result = cfg->intpin;
2776 		break;
2777 	case PCI_IVAR_IRQ:
2778 		*result = cfg->intline;
2779 		break;
2780 	case PCI_IVAR_BUS:
2781 		*result = cfg->bus;
2782 		break;
2783 	case PCI_IVAR_SLOT:
2784 		*result = cfg->slot;
2785 		break;
2786 	case PCI_IVAR_FUNCTION:
2787 		*result = cfg->func;
2788 		break;
2789 	case PCI_IVAR_CMDREG:
2790 		*result = cfg->cmdreg;
2791 		break;
2792 	case PCI_IVAR_CACHELNSZ:
2793 		*result = cfg->cachelnsz;
2794 		break;
2795 	case PCI_IVAR_MINGNT:
2796 		*result = cfg->mingnt;
2797 		break;
2798 	case PCI_IVAR_MAXLAT:
2799 		*result = cfg->maxlat;
2800 		break;
2801 	case PCI_IVAR_LATTIMER:
2802 		*result = cfg->lattimer;
2803 		break;
2804 	default:
2805 		return (ENOENT);
2806 	}
2807 	return (0);
2808 }
2809 
2810 int
2811 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2812 {
2813 	struct pci_devinfo *dinfo;
2814 
2815 	dinfo = device_get_ivars(child);
2816 
2817 	switch (which) {
2818 	case PCI_IVAR_INTPIN:
2819 		dinfo->cfg.intpin = value;
2820 		return (0);
2821 	case PCI_IVAR_ETHADDR:
2822 	case PCI_IVAR_SUBVENDOR:
2823 	case PCI_IVAR_SUBDEVICE:
2824 	case PCI_IVAR_VENDOR:
2825 	case PCI_IVAR_DEVICE:
2826 	case PCI_IVAR_DEVID:
2827 	case PCI_IVAR_CLASS:
2828 	case PCI_IVAR_SUBCLASS:
2829 	case PCI_IVAR_PROGIF:
2830 	case PCI_IVAR_REVID:
2831 	case PCI_IVAR_IRQ:
2832 	case PCI_IVAR_BUS:
2833 	case PCI_IVAR_SLOT:
2834 	case PCI_IVAR_FUNCTION:
2835 		return (EINVAL);	/* disallow for now */
2836 
2837 	default:
2838 		return (ENOENT);
2839 	}
2840 }
2841 
2842 
2843 #include "opt_ddb.h"
2844 #ifdef DDB
2845 #include <ddb/ddb.h>
2846 #include <sys/cons.h>
2847 
2848 /*
2849  * List resources based on pci map registers, used for within ddb
2850  */
2851 
2852 DB_SHOW_COMMAND(pciregs, db_pci_dump)
2853 {
2854 	struct pci_devinfo *dinfo;
2855 	struct devlist *devlist_head;
2856 	struct pci_conf *p;
2857 	const char *name;
2858 	int i, error, none_count;
2859 
2860 	none_count = 0;
2861 	/* get the head of the device queue */
2862 	devlist_head = &pci_devq;
2863 
2864 	/*
2865 	 * Go through the list of devices and print out devices
2866 	 */
2867 	for (error = 0, i = 0,
2868 	     dinfo = STAILQ_FIRST(devlist_head);
2869 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2870 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2871 
2872 		/* Populate pd_name and pd_unit */
2873 		name = NULL;
2874 		if (dinfo->cfg.dev)
2875 			name = device_get_name(dinfo->cfg.dev);
2876 
2877 		p = &dinfo->conf;
2878 		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2879 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2880 			(name && *name) ? name : "none",
2881 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2882 			none_count++,
2883 			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2884 			p->pc_sel.pc_func, (p->pc_class << 16) |
2885 			(p->pc_subclass << 8) | p->pc_progif,
2886 			(p->pc_subdevice << 16) | p->pc_subvendor,
2887 			(p->pc_device << 16) | p->pc_vendor,
2888 			p->pc_revid, p->pc_hdr);
2889 	}
2890 }
2891 #endif /* DDB */
2892 
2893 static struct resource *
2894 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2895     u_long start, u_long end, u_long count, u_int flags)
2896 {
2897 	struct pci_devinfo *dinfo = device_get_ivars(child);
2898 	struct resource_list *rl = &dinfo->resources;
2899 	struct resource_list_entry *rle;
2900 	struct resource *res;
2901 	pci_addr_t map, testval;
2902 	int mapsize;
2903 
2904 	/*
2905 	 * Weed out the bogons, and figure out how large the BAR/map
2906 	 * is.  Bars that read back 0 here are bogus and unimplemented.
2907 	 * Note: atapci in legacy mode are special and handled elsewhere
2908 	 * in the code.  If you have a atapci device in legacy mode and
2909 	 * it fails here, that other code is broken.
2910 	 */
2911 	res = NULL;
2912 	map = pci_read_config(child, *rid, 4);
2913 	pci_write_config(child, *rid, 0xffffffff, 4);
2914 	testval = pci_read_config(child, *rid, 4);
2915 	if (pci_maprange(testval) == 64)
2916 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2917 	if (pci_mapbase(testval) == 0)
2918 		goto out;
2919 	if (PCI_BAR_MEM(testval)) {
2920 		if (type != SYS_RES_MEMORY) {
2921 			if (bootverbose)
2922 				device_printf(dev,
2923 				    "child %s requested type %d for rid %#x,"
2924 				    " but the BAR says it is an memio\n",
2925 				    device_get_nameunit(child), type, *rid);
2926 			goto out;
2927 		}
2928 	} else {
2929 		if (type != SYS_RES_IOPORT) {
2930 			if (bootverbose)
2931 				device_printf(dev,
2932 				    "child %s requested type %d for rid %#x,"
2933 				    " but the BAR says it is an ioport\n",
2934 				    device_get_nameunit(child), type, *rid);
2935 			goto out;
2936 		}
2937 	}
2938 	/*
2939 	 * For real BARs, we need to override the size that
2940 	 * the driver requests, because that's what the BAR
2941 	 * actually uses and we would otherwise have a
2942 	 * situation where we might allocate the excess to
2943 	 * another driver, which won't work.
2944 	 */
2945 	mapsize = pci_mapsize(testval);
2946 	count = 1UL << mapsize;
2947 	if (RF_ALIGNMENT(flags) < mapsize)
2948 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2949 
2950 	/*
2951 	 * Allocate enough resource, and then write back the
2952 	 * appropriate bar for that resource.
2953 	 */
2954 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2955 	    start, end, count, flags);
2956 	if (res == NULL) {
2957 		device_printf(child,
2958 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2959 		    count, *rid, type, start, end);
2960 		goto out;
2961 	}
2962 	resource_list_add(rl, type, *rid, start, end, count);
2963 	rle = resource_list_find(rl, type, *rid);
2964 	if (rle == NULL)
2965 		panic("pci_alloc_map: unexpectedly can't find resource.");
2966 	rle->res = res;
2967 	rle->start = rman_get_start(res);
2968 	rle->end = rman_get_end(res);
2969 	rle->count = count;
2970 	if (bootverbose)
2971 		device_printf(child,
2972 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2973 		    count, *rid, type, rman_get_start(res));
2974 	map = rman_get_start(res);
2975 out:;
2976 	pci_write_config(child, *rid, map, 4);
2977 	if (pci_maprange(testval) == 64)
2978 		pci_write_config(child, *rid + 4, map >> 32, 4);
2979 	return (res);
2980 }
2981 
2982 
2983 struct resource *
2984 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2985 		   u_long start, u_long end, u_long count, u_int flags)
2986 {
2987 	struct pci_devinfo *dinfo = device_get_ivars(child);
2988 	struct resource_list *rl = &dinfo->resources;
2989 	struct resource_list_entry *rle;
2990 	pcicfgregs *cfg = &dinfo->cfg;
2991 
2992 	/*
2993 	 * Perform lazy resource allocation
2994 	 */
2995 	if (device_get_parent(child) == dev) {
2996 		switch (type) {
2997 		case SYS_RES_IRQ:
2998 			/*
2999 			 * Can't alloc legacy interrupt once MSI messages
3000 			 * have been allocated.
3001 			 */
3002 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3003 			    cfg->msix.msix_alloc > 0))
3004 				return (NULL);
3005 			/*
3006 			 * If the child device doesn't have an
3007 			 * interrupt routed and is deserving of an
3008 			 * interrupt, try to assign it one.
3009 			 */
3010 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3011 			    (cfg->intpin != 0))
3012 				pci_assign_interrupt(dev, child, 0);
3013 			break;
3014 		case SYS_RES_IOPORT:
3015 		case SYS_RES_MEMORY:
3016 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3017 				/*
3018 				 * Enable the I/O mode.  We should
3019 				 * also be assigning resources too
3020 				 * when none are present.  The
3021 				 * resource_list_alloc kind of sorta does
3022 				 * this...
3023 				 */
3024 				if (PCI_ENABLE_IO(dev, child, type))
3025 					return (NULL);
3026 			}
3027 			rle = resource_list_find(rl, type, *rid);
3028 			if (rle == NULL)
3029 				return (pci_alloc_map(dev, child, type, rid,
3030 				    start, end, count, flags));
3031 			break;
3032 		}
3033 		/*
3034 		 * If we've already allocated the resource, then
3035 		 * return it now.  But first we may need to activate
3036 		 * it, since we don't allocate the resource as active
3037 		 * above.  Normally this would be done down in the
3038 		 * nexus, but since we short-circuit that path we have
3039 		 * to do its job here.  Not sure if we should free the
3040 		 * resource if it fails to activate.
3041 		 */
3042 		rle = resource_list_find(rl, type, *rid);
3043 		if (rle != NULL && rle->res != NULL) {
3044 			if (bootverbose)
3045 				device_printf(child,
3046 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3047 				    rman_get_size(rle->res), *rid, type,
3048 				    rman_get_start(rle->res));
3049 			if ((flags & RF_ACTIVE) &&
3050 			    bus_generic_activate_resource(dev, child, type,
3051 			    *rid, rle->res) != 0)
3052 				return (NULL);
3053 			return (rle->res);
3054 		}
3055 	}
3056 	return (resource_list_alloc(rl, dev, child, type, rid,
3057 	    start, end, count, flags));
3058 }
3059 
3060 void
3061 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3062 {
3063 	struct pci_devinfo *dinfo;
3064 	struct resource_list *rl;
3065 	struct resource_list_entry *rle;
3066 
3067 	if (device_get_parent(child) != dev)
3068 		return;
3069 
3070 	dinfo = device_get_ivars(child);
3071 	rl = &dinfo->resources;
3072 	rle = resource_list_find(rl, type, rid);
3073 	if (rle) {
3074 		if (rle->res) {
3075 			if (rman_get_device(rle->res) != dev ||
3076 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3077 				device_printf(dev, "delete_resource: "
3078 				    "Resource still owned by child, oops. "
3079 				    "(type=%d, rid=%d, addr=%lx)\n",
3080 				    rle->type, rle->rid,
3081 				    rman_get_start(rle->res));
3082 				return;
3083 			}
3084 			bus_release_resource(dev, type, rid, rle->res);
3085 		}
3086 		resource_list_delete(rl, type, rid);
3087 	}
3088 	/*
3089 	 * Why do we turn off the PCI configuration BAR when we delete a
3090 	 * resource? -- imp
3091 	 */
3092 	pci_write_config(child, rid, 0, 4);
3093 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3094 }
3095 
3096 struct resource_list *
3097 pci_get_resource_list (device_t dev, device_t child)
3098 {
3099 	struct pci_devinfo *dinfo = device_get_ivars(child);
3100 
3101 	return (&dinfo->resources);
3102 }
3103 
3104 uint32_t
3105 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3106 {
3107 	struct pci_devinfo *dinfo = device_get_ivars(child);
3108 	pcicfgregs *cfg = &dinfo->cfg;
3109 
3110 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3111 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3112 }
3113 
3114 void
3115 pci_write_config_method(device_t dev, device_t child, int reg,
3116     uint32_t val, int width)
3117 {
3118 	struct pci_devinfo *dinfo = device_get_ivars(child);
3119 	pcicfgregs *cfg = &dinfo->cfg;
3120 
3121 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3122 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3123 }
3124 
3125 int
3126 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3127     size_t buflen)
3128 {
3129 
3130 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3131 	    pci_get_function(child));
3132 	return (0);
3133 }
3134 
3135 int
3136 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3137     size_t buflen)
3138 {
3139 	struct pci_devinfo *dinfo;
3140 	pcicfgregs *cfg;
3141 
3142 	dinfo = device_get_ivars(child);
3143 	cfg = &dinfo->cfg;
3144 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3145 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3146 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3147 	    cfg->progif);
3148 	return (0);
3149 }
3150 
3151 int
3152 pci_assign_interrupt_method(device_t dev, device_t child)
3153 {
3154 	struct pci_devinfo *dinfo = device_get_ivars(child);
3155 	pcicfgregs *cfg = &dinfo->cfg;
3156 
3157 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3158 	    cfg->intpin));
3159 }
3160 
3161 static int
3162 pci_modevent(module_t mod, int what, void *arg)
3163 {
3164 	static struct cdev *pci_cdev;
3165 
3166 	switch (what) {
3167 	case MOD_LOAD:
3168 		STAILQ_INIT(&pci_devq);
3169 		pci_generation = 0;
3170 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3171 		    "pci");
3172 		pci_load_vendor_data();
3173 		break;
3174 
3175 	case MOD_UNLOAD:
3176 		destroy_dev(pci_cdev);
3177 		break;
3178 	}
3179 
3180 	return (0);
3181 }
3182 
3183 void
3184 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3185 {
3186 	int i;
3187 
3188 	/*
3189 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3190 	 * which we know need special treatment.  Type 2 devices are
3191 	 * cardbus bridges which also require special treatment.
3192 	 * Other types are unknown, and we err on the side of safety
3193 	 * by ignoring them.
3194 	 */
3195 	if (dinfo->cfg.hdrtype != 0)
3196 		return;
3197 
3198 	/*
3199 	 * Restore the device to full power mode.  We must do this
3200 	 * before we restore the registers because moving from D3 to
3201 	 * D0 will cause the chip's BARs and some other registers to
3202 	 * be reset to some unknown power on reset values.  Cut down
3203 	 * the noise on boot by doing nothing if we are already in
3204 	 * state D0.
3205 	 */
3206 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3207 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3208 	}
3209 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3210 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3211 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3212 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3213 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3214 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3215 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3216 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3217 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3218 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3219 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3220 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3221 
3222 	/*
3223 	 * Restore MSI configuration if it is present.  If MSI is enabled,
3224 	 * then restore the data and addr registers.
3225 	 */
3226 	if (dinfo->cfg.msi.msi_location != 0)
3227 		pci_resume_msi(dev);
3228 }
3229 
3230 void
3231 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3232 {
3233 	int i;
3234 	uint32_t cls;
3235 	int ps;
3236 
3237 	/*
3238 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3239 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3240 	 * which also require special treatment.  Other types are unknown, and
3241 	 * we err on the side of safety by ignoring them.  Powering down
3242 	 * bridges should not be undertaken lightly.
3243 	 */
3244 	if (dinfo->cfg.hdrtype != 0)
3245 		return;
3246 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3247 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3248 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3249 
3250 	/*
3251 	 * Some drivers apparently write to these registers w/o updating our
3252 	 * cached copy.  No harm happens if we update the copy, so do so here
3253 	 * so we can restore them.  The COMMAND register is modified by the
3254 	 * bus w/o updating the cache.  This should represent the normally
3255 	 * writable portion of the 'defined' part of type 0 headers.  In
3256 	 * theory we also need to save/restore the PCI capability structures
3257 	 * we know about, but apart from power we don't know any that are
3258 	 * writable.
3259 	 */
3260 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3261 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3262 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3263 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3264 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3265 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3266 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3267 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3268 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3269 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3270 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3271 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3272 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3273 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3274 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3275 
3276 	/*
3277 	 * don't set the state for display devices, base peripherals and
3278 	 * memory devices since bad things happen when they are powered down.
3279 	 * We should (a) have drivers that can easily detach and (b) use
3280 	 * generic drivers for these devices so that some device actually
3281 	 * attaches.  We need to make sure that when we implement (a) we don't
3282 	 * power the device down on a reattach.
3283 	 */
3284 	cls = pci_get_class(dev);
3285 	if (!setstate)
3286 		return;
3287 	switch (pci_do_power_nodriver)
3288 	{
3289 		case 0:		/* NO powerdown at all */
3290 			return;
3291 		case 1:		/* Conservative about what to power down */
3292 			if (cls == PCIC_STORAGE)
3293 				return;
3294 			/*FALLTHROUGH*/
3295 		case 2:		/* Agressive about what to power down */
3296 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3297 			    cls == PCIC_BASEPERIPH)
3298 				return;
3299 			/*FALLTHROUGH*/
3300 		case 3:		/* Power down everything */
3301 			break;
3302 	}
3303 	/*
3304 	 * PCI spec says we can only go into D3 state from D0 state.
3305 	 * Transition from D[12] into D0 before going to D3 state.
3306 	 */
3307 	ps = pci_get_powerstate(dev);
3308 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3309 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3310 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3311 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3312 }
3313