xref: /freebsd/sys/dev/pci/pci.c (revision 85999a0155e389415cc476110fd5614baf543a55)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static const char	*pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static uint32_t		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg);
99 #if 0
100 static void		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static int		pci_msi_blacklisted(void);
105 
106 static device_method_t pci_methods[] = {
107 	/* Device interface */
108 	DEVMETHOD(device_probe,		pci_probe),
109 	DEVMETHOD(device_attach,	pci_attach),
110 	DEVMETHOD(device_detach,	bus_generic_detach),
111 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
112 	DEVMETHOD(device_suspend,	pci_suspend),
113 	DEVMETHOD(device_resume,	pci_resume),
114 
115 	/* Bus interface */
116 	DEVMETHOD(bus_print_child,	pci_print_child),
117 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
118 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
119 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
120 	DEVMETHOD(bus_driver_added,	pci_driver_added),
121 	DEVMETHOD(bus_setup_intr,	bus_generic_setup_intr),
122 	DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
123 
124 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
125 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
126 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
127 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
128 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
129 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
130 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
131 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
132 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
133 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
134 
135 	/* PCI interface */
136 	DEVMETHOD(pci_read_config,	pci_read_config_method),
137 	DEVMETHOD(pci_write_config,	pci_write_config_method),
138 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
139 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
140 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
141 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
142 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
143 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
144 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
145 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
146 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
147 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
148 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
149 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
150 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
151 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
152 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
153 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
154 
155 	{ 0, 0 }
156 };
157 
158 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
159 
160 static devclass_t pci_devclass;
161 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
162 MODULE_VERSION(pci, 1);
163 
164 static char	*pci_vendordata;
165 static size_t	pci_vendordata_size;
166 
167 
168 struct pci_quirk {
169 	uint32_t devid;	/* Vendor/device of the card */
170 	int	type;
171 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
172 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
173 	int	arg1;
174 	int	arg2;
175 };
176 
177 struct pci_quirk pci_quirks[] = {
178 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
179 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
180 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
181 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
182 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
183 
184 	/*
185 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
186 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
187 	 */
188 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
189 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
190 
191 	/*
192 	 * MSI doesn't work on earlier Intel chipsets including
193 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
194 	 */
195 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
196 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
197 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
200 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
201 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
202 
203 	/*
204 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
205 	 * bridge.
206 	 */
207 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 
209 	{ 0 }
210 };
211 
212 /* map register information */
213 #define	PCI_MAPMEM	0x01	/* memory map */
214 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
215 #define	PCI_MAPPORT	0x04	/* port map */
216 
217 struct devlist pci_devq;
218 uint32_t pci_generation;
219 uint32_t pci_numdevs = 0;
220 static int pcie_chipset, pcix_chipset;
221 
222 /* sysctl vars */
223 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
224 
225 static int pci_enable_io_modes = 1;
226 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
227 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
228     &pci_enable_io_modes, 1,
229     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
230 enable these bits correctly.  We'd like to do this all the time, but there\n\
231 are some peripherals that this causes problems with.");
232 
233 static int pci_do_power_nodriver = 0;
234 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
235 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
236     &pci_do_power_nodriver, 0,
237   "Place a function into D3 state when no driver attaches to it.  0 means\n\
238 disable.  1 means conservatively place devices into D3 state.  2 means\n\
239 agressively place devices into D3 state.  3 means put absolutely everything\n\
240 in D3 state.");
241 
242 static int pci_do_power_resume = 1;
243 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
245     &pci_do_power_resume, 1,
246   "Transition from D3 -> D0 on resume.");
247 
248 static int pci_do_vpd = 1;
249 TUNABLE_INT("hw.pci.enable_vpd", &pci_do_vpd);
250 SYSCTL_INT(_hw_pci, OID_AUTO, enable_vpd, CTLFLAG_RW, &pci_do_vpd, 1,
251     "Enable support for VPD.");
252 
253 static int pci_do_msi = 1;
254 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
255 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
256     "Enable support for MSI interrupts");
257 
258 static int pci_do_msix = 1;
259 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
260 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
261     "Enable support for MSI-X interrupts");
262 
263 static int pci_honor_msi_blacklist = 1;
264 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
265 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
266     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
267 
268 /* Find a device_t by bus/slot/function */
269 
270 device_t
271 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
272 {
273 	struct pci_devinfo *dinfo;
274 
275 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
276 		if ((dinfo->cfg.bus == bus) &&
277 		    (dinfo->cfg.slot == slot) &&
278 		    (dinfo->cfg.func == func)) {
279 			return (dinfo->cfg.dev);
280 		}
281 	}
282 
283 	return (NULL);
284 }
285 
286 /* Find a device_t by vendor/device ID */
287 
288 device_t
289 pci_find_device(uint16_t vendor, uint16_t device)
290 {
291 	struct pci_devinfo *dinfo;
292 
293 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
294 		if ((dinfo->cfg.vendor == vendor) &&
295 		    (dinfo->cfg.device == device)) {
296 			return (dinfo->cfg.dev);
297 		}
298 	}
299 
300 	return (NULL);
301 }
302 
303 /* return base address of memory or port map */
304 
305 static uint32_t
306 pci_mapbase(uint32_t mapreg)
307 {
308 
309 	if (PCI_BAR_MEM(mapreg))
310 		return (mapreg & PCIM_BAR_MEM_BASE);
311 	else
312 		return (mapreg & PCIM_BAR_IO_BASE);
313 }
314 
315 /* return map type of memory or port map */
316 
317 static const char *
318 pci_maptype(unsigned mapreg)
319 {
320 
321 	if (PCI_BAR_IO(mapreg))
322 		return ("I/O Port");
323 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
324 		return ("Prefetchable Memory");
325 	return ("Memory");
326 }
327 
328 /* return log2 of map size decoded for memory or port map */
329 
330 static int
331 pci_mapsize(uint32_t testval)
332 {
333 	int ln2size;
334 
335 	testval = pci_mapbase(testval);
336 	ln2size = 0;
337 	if (testval != 0) {
338 		while ((testval & 1) == 0)
339 		{
340 			ln2size++;
341 			testval >>= 1;
342 		}
343 	}
344 	return (ln2size);
345 }
346 
347 /* return log2 of address range supported by map register */
348 
349 static int
350 pci_maprange(unsigned mapreg)
351 {
352 	int ln2range = 0;
353 
354 	if (PCI_BAR_IO(mapreg))
355 		ln2range = 32;
356 	else
357 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
358 		case PCIM_BAR_MEM_32:
359 			ln2range = 32;
360 			break;
361 		case PCIM_BAR_MEM_1MB:
362 			ln2range = 20;
363 			break;
364 		case PCIM_BAR_MEM_64:
365 			ln2range = 64;
366 			break;
367 		}
368 	return (ln2range);
369 }
370 
371 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
372 
373 static void
374 pci_fixancient(pcicfgregs *cfg)
375 {
376 	if (cfg->hdrtype != 0)
377 		return;
378 
379 	/* PCI to PCI bridges use header type 1 */
380 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
381 		cfg->hdrtype = 1;
382 }
383 
384 /* extract header type specific config data */
385 
386 static void
387 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
388 {
389 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
390 	switch (cfg->hdrtype) {
391 	case 0:
392 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
393 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
394 		cfg->nummaps	    = PCI_MAXMAPS_0;
395 		break;
396 	case 1:
397 		cfg->nummaps	    = PCI_MAXMAPS_1;
398 		break;
399 	case 2:
400 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
401 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
402 		cfg->nummaps	    = PCI_MAXMAPS_2;
403 		break;
404 	}
405 #undef REG
406 }
407 
408 /* read configuration header into pcicfgregs structure */
409 struct pci_devinfo *
410 pci_read_device(device_t pcib, int b, int s, int f, size_t size)
411 {
412 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
413 	pcicfgregs *cfg = NULL;
414 	struct pci_devinfo *devlist_entry;
415 	struct devlist *devlist_head;
416 
417 	devlist_head = &pci_devq;
418 
419 	devlist_entry = NULL;
420 
421 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
422 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
423 		if (devlist_entry == NULL)
424 			return (NULL);
425 
426 		cfg = &devlist_entry->cfg;
427 
428 		cfg->bus		= b;
429 		cfg->slot		= s;
430 		cfg->func		= f;
431 		cfg->vendor		= REG(PCIR_VENDOR, 2);
432 		cfg->device		= REG(PCIR_DEVICE, 2);
433 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
434 		cfg->statreg		= REG(PCIR_STATUS, 2);
435 		cfg->baseclass		= REG(PCIR_CLASS, 1);
436 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
437 		cfg->progif		= REG(PCIR_PROGIF, 1);
438 		cfg->revid		= REG(PCIR_REVID, 1);
439 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
440 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
441 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
442 		cfg->intpin		= REG(PCIR_INTPIN, 1);
443 		cfg->intline		= REG(PCIR_INTLINE, 1);
444 
445 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
446 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
447 
448 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
449 		cfg->hdrtype		&= ~PCIM_MFDEV;
450 
451 		pci_fixancient(cfg);
452 		pci_hdrtypedata(pcib, b, s, f, cfg);
453 
454 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
455 			pci_read_extcap(pcib, cfg);
456 
457 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
458 
459 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
460 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
461 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
462 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
463 
464 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
465 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
466 		devlist_entry->conf.pc_vendor = cfg->vendor;
467 		devlist_entry->conf.pc_device = cfg->device;
468 
469 		devlist_entry->conf.pc_class = cfg->baseclass;
470 		devlist_entry->conf.pc_subclass = cfg->subclass;
471 		devlist_entry->conf.pc_progif = cfg->progif;
472 		devlist_entry->conf.pc_revid = cfg->revid;
473 
474 		pci_numdevs++;
475 		pci_generation++;
476 	}
477 	return (devlist_entry);
478 #undef REG
479 }
480 
481 static void
482 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
483 {
484 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
485 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
486 #if defined(__i386__) || defined(__amd64__)
487 	uint64_t addr;
488 #endif
489 	uint32_t val;
490 	int	ptr, nextptr, ptrptr;
491 
492 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
493 	case 0:
494 	case 1:
495 		ptrptr = PCIR_CAP_PTR;
496 		break;
497 	case 2:
498 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
499 		break;
500 	default:
501 		return;		/* no extended capabilities support */
502 	}
503 	nextptr = REG(ptrptr, 1);	/* sanity check? */
504 
505 	/*
506 	 * Read capability entries.
507 	 */
508 	while (nextptr != 0) {
509 		/* Sanity check */
510 		if (nextptr > 255) {
511 			printf("illegal PCI extended capability offset %d\n",
512 			    nextptr);
513 			return;
514 		}
515 		/* Find the next entry */
516 		ptr = nextptr;
517 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
518 
519 		/* Process this entry */
520 		switch (REG(ptr + PCICAP_ID, 1)) {
521 		case PCIY_PMG:		/* PCI power management */
522 			if (cfg->pp.pp_cap == 0) {
523 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
524 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
525 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
526 				if ((nextptr - ptr) > PCIR_POWER_DATA)
527 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
528 			}
529 			break;
530 #if defined(__i386__) || defined(__amd64__)
531 		case PCIY_HT:		/* HyperTransport */
532 			/* Determine HT-specific capability type. */
533 			val = REG(ptr + PCIR_HT_COMMAND, 2);
534 			switch (val & PCIM_HTCMD_CAP_MASK) {
535 			case PCIM_HTCAP_MSI_MAPPING:
536 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
537 					/* Sanity check the mapping window. */
538 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
539 					    4);
540 					addr <<= 32;
541 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
542 					    4);
543 					if (addr != MSI_INTEL_ADDR_BASE)
544 						device_printf(pcib,
545 		    "HT Bridge at %d:%d:%d has non-default MSI window 0x%llx\n",
546 						    cfg->bus, cfg->slot,
547 						    cfg->func, (long long)addr);
548 				}
549 
550 				/* Enable MSI -> HT mapping. */
551 				val |= PCIM_HTCMD_MSI_ENABLE;
552 				WREG(ptr + PCIR_HT_COMMAND, val, 2);
553 				break;
554 			}
555 			break;
556 #endif
557 		case PCIY_MSI:		/* PCI MSI */
558 			cfg->msi.msi_location = ptr;
559 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
560 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
561 						     PCIM_MSICTRL_MMC_MASK)>>1);
562 			break;
563 		case PCIY_MSIX:		/* PCI MSI-X */
564 			cfg->msix.msix_location = ptr;
565 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
566 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
567 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
568 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
569 			cfg->msix.msix_table_bar = PCIR_BAR(val &
570 			    PCIM_MSIX_BIR_MASK);
571 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
572 			val = REG(ptr + PCIR_MSIX_PBA, 4);
573 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
574 			    PCIM_MSIX_BIR_MASK);
575 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
576 			break;
577 		case PCIY_VPD:		/* PCI Vital Product Data */
578 			cfg->vpd.vpd_reg = ptr;
579 			break;
580 		case PCIY_SUBVENDOR:
581 			/* Should always be true. */
582 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
583 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
584 				cfg->subvendor = val & 0xffff;
585 				cfg->subdevice = val >> 16;
586 			}
587 			break;
588 		case PCIY_PCIX:		/* PCI-X */
589 			/*
590 			 * Assume we have a PCI-X chipset if we have
591 			 * at least one PCI-PCI bridge with a PCI-X
592 			 * capability.  Note that some systems with
593 			 * PCI-express or HT chipsets might match on
594 			 * this check as well.
595 			 */
596 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
597 				pcix_chipset = 1;
598 			break;
599 		case PCIY_EXPRESS:	/* PCI-express */
600 			/*
601 			 * Assume we have a PCI-express chipset if we have
602 			 * at least one PCI-express root port.
603 			 */
604 			val = REG(ptr + PCIR_EXPRESS_FLAGS, 2);
605 			if ((val & PCIM_EXP_FLAGS_TYPE) ==
606 			    PCIM_EXP_TYPE_ROOT_PORT)
607 				pcie_chipset = 1;
608 			break;
609 		default:
610 			break;
611 		}
612 	}
613 /* REG and WREG use carry through to next functions */
614 }
615 
616 /*
617  * PCI Vital Product Data
618  */
619 static uint32_t
620 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg)
621 {
622 
623 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
624 
625 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
626 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000)
627 		DELAY(1);	/* limit looping */
628 
629 	return (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
630 }
631 
632 #if 0
633 static void
634 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
635 {
636 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
637 
638 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
639 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
640 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000)
641 		DELAY(1);	/* limit looping */
642 
643 	return;
644 }
645 #endif
646 
647 struct vpd_readstate {
648 	device_t	pcib;
649 	pcicfgregs	*cfg;
650 	uint32_t	val;
651 	int		bytesinval;
652 	int		off;
653 	uint8_t		cksum;
654 };
655 
656 static uint8_t
657 vpd_nextbyte(struct vpd_readstate *vrs)
658 {
659 	uint8_t byte;
660 
661 	if (vrs->bytesinval == 0) {
662 		vrs->val = le32toh(pci_read_vpd_reg(vrs->pcib, vrs->cfg,
663 		    vrs->off));
664 		vrs->off += 4;
665 		byte = vrs->val & 0xff;
666 		vrs->bytesinval = 3;
667 	} else {
668 		vrs->val = vrs->val >> 8;
669 		byte = vrs->val & 0xff;
670 		vrs->bytesinval--;
671 	}
672 
673 	vrs->cksum += byte;
674 	return (byte);
675 }
676 
677 static void
678 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
679 {
680 	struct vpd_readstate vrs;
681 	int state;
682 	int name;
683 	int remain;
684 	int end;
685 	int i;
686 	uint8_t byte;
687 	int alloc, off;		/* alloc/off for RO/W arrays */
688 	int cksumvalid;
689 	int dflen;
690 
691 	if (!pci_do_vpd) {
692 		cfg->vpd.vpd_cached = 1;
693 		return;
694 	}
695 
696 	/* init vpd reader */
697 	vrs.bytesinval = 0;
698 	vrs.off = 0;
699 	vrs.pcib = pcib;
700 	vrs.cfg = cfg;
701 	vrs.cksum = 0;
702 
703 	state = 0;
704 	name = remain = i = 0;	/* shut up stupid gcc */
705 	alloc = off = 0;	/* shut up stupid gcc */
706 	dflen = 0;		/* shut up stupid gcc */
707 	end = 0;
708 	cksumvalid = -1;
709 	for (; !end;) {
710 		byte = vpd_nextbyte(&vrs);
711 #if 0
712 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
713 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
714 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
715 #endif
716 		switch (state) {
717 		case 0:		/* item name */
718 			if (byte & 0x80) {
719 				remain = vpd_nextbyte(&vrs);
720 				remain |= vpd_nextbyte(&vrs) << 8;
721 				if (remain > (0x7f*4 - vrs.off)) {
722 					end = 1;
723 					printf(
724 			    "pci%d:%d:%d: invalid vpd data, remain %#x\n",
725 					    cfg->bus, cfg->slot, cfg->func,
726 					    remain);
727 				}
728 				name = byte & 0x7f;
729 			} else {
730 				remain = byte & 0x7;
731 				name = (byte >> 3) & 0xf;
732 			}
733 			switch (name) {
734 			case 0x2:	/* String */
735 				cfg->vpd.vpd_ident = malloc(remain + 1,
736 				    M_DEVBUF, M_WAITOK);
737 				i = 0;
738 				state = 1;
739 				break;
740 			case 0xf:	/* End */
741 				end = 1;
742 				state = -1;
743 				break;
744 			case 0x10:	/* VPD-R */
745 				alloc = 8;
746 				off = 0;
747 				cfg->vpd.vpd_ros = malloc(alloc *
748 				    sizeof *cfg->vpd.vpd_ros, M_DEVBUF,
749 				    M_WAITOK);
750 				state = 2;
751 				break;
752 			case 0x11:	/* VPD-W */
753 				alloc = 8;
754 				off = 0;
755 				cfg->vpd.vpd_w = malloc(alloc *
756 				    sizeof *cfg->vpd.vpd_w, M_DEVBUF,
757 				    M_WAITOK);
758 				state = 5;
759 				break;
760 			default:	/* Invalid data, abort */
761 				end = 1;
762 				continue;
763 			}
764 			break;
765 
766 		case 1:	/* Identifier String */
767 			cfg->vpd.vpd_ident[i++] = byte;
768 			remain--;
769 			if (remain == 0)  {
770 				cfg->vpd.vpd_ident[i] = '\0';
771 				state = 0;
772 			}
773 			break;
774 
775 		case 2:	/* VPD-R Keyword Header */
776 			if (off == alloc) {
777 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
778 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_ros,
779 				    M_DEVBUF, M_WAITOK);
780 			}
781 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
782 			cfg->vpd.vpd_ros[off].keyword[1] = vpd_nextbyte(&vrs);
783 			dflen = vpd_nextbyte(&vrs);
784 			if (dflen == 0 &&
785 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
786 			    2) == 0) {
787 				/*
788 				 * if this happens, we can't trust the rest
789 				 * of the VPD.
790 				 */
791 				printf("pci%d:%d:%d: bad keyword length: %d\n",
792 				    cfg->bus, cfg->slot, cfg->func, dflen);
793 				cksumvalid = 0;
794 				end = 1;
795 				break;
796 			} else if (dflen == 0) {
797 				cfg->vpd.vpd_ros[off].value = malloc(1 *
798 				    sizeof *cfg->vpd.vpd_ros[off].value,
799 				    M_DEVBUF, M_WAITOK);
800 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
801 			} else
802 				cfg->vpd.vpd_ros[off].value = malloc(
803 				    (dflen + 1) *
804 				    sizeof *cfg->vpd.vpd_ros[off].value,
805 				    M_DEVBUF, M_WAITOK);
806 			remain -= 3;
807 			i = 0;
808 			/* keep in sync w/ state 3's transistions */
809 			if (dflen == 0 && remain == 0)
810 				state = 0;
811 			else if (dflen == 0)
812 				state = 2;
813 			else
814 				state = 3;
815 			break;
816 
817 		case 3:	/* VPD-R Keyword Value */
818 			cfg->vpd.vpd_ros[off].value[i++] = byte;
819 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
820 			    "RV", 2) == 0 && cksumvalid == -1) {
821 				if (vrs.cksum == 0)
822 					cksumvalid = 1;
823 				else {
824 					printf(
825 				    "pci%d:%d:%d: bad VPD cksum, remain %hhu\n",
826 					    cfg->bus, cfg->slot, cfg->func,
827 					    vrs.cksum);
828 					cksumvalid = 0;
829 					end = 1;
830 					break;
831 				}
832 			}
833 			dflen--;
834 			remain--;
835 			/* keep in sync w/ state 2's transistions */
836 			if (dflen == 0)
837 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
838 			if (dflen == 0 && remain == 0) {
839 				cfg->vpd.vpd_rocnt = off;
840 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
841 				    off * sizeof *cfg->vpd.vpd_ros,
842 				    M_DEVBUF, M_WAITOK);
843 				state = 0;
844 			} else if (dflen == 0)
845 				state = 2;
846 			break;
847 
848 		case 4:
849 			remain--;
850 			if (remain == 0)
851 				state = 0;
852 			break;
853 
854 		case 5:	/* VPD-W Keyword Header */
855 			if (off == alloc) {
856 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
857 				    (alloc *= 2) * sizeof *cfg->vpd.vpd_w,
858 				    M_DEVBUF, M_WAITOK);
859 			}
860 			cfg->vpd.vpd_w[off].keyword[0] = byte;
861 			cfg->vpd.vpd_w[off].keyword[1] = vpd_nextbyte(&vrs);
862 			cfg->vpd.vpd_w[off].len = dflen = vpd_nextbyte(&vrs);
863 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
864 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
865 			    sizeof *cfg->vpd.vpd_w[off].value,
866 			    M_DEVBUF, M_WAITOK);
867 			remain -= 3;
868 			i = 0;
869 			/* keep in sync w/ state 6's transistions */
870 			if (dflen == 0 && remain == 0)
871 				state = 0;
872 			else if (dflen == 0)
873 				state = 5;
874 			else
875 				state = 6;
876 			break;
877 
878 		case 6:	/* VPD-W Keyword Value */
879 			cfg->vpd.vpd_w[off].value[i++] = byte;
880 			dflen--;
881 			remain--;
882 			/* keep in sync w/ state 5's transistions */
883 			if (dflen == 0)
884 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
885 			if (dflen == 0 && remain == 0) {
886 				cfg->vpd.vpd_wcnt = off;
887 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
888 				    off * sizeof *cfg->vpd.vpd_w,
889 				    M_DEVBUF, M_WAITOK);
890 				state = 0;
891 			} else if (dflen == 0)
892 				state = 5;
893 			break;
894 
895 		default:
896 			printf("pci%d:%d:%d: invalid state: %d\n",
897 			    cfg->bus, cfg->slot, cfg->func, state);
898 			end = 1;
899 			break;
900 		}
901 	}
902 
903 	if (cksumvalid == 0) {
904 		/* read-only data bad, clean up */
905 		for (; off; off--)
906 			free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
907 
908 		free(cfg->vpd.vpd_ros, M_DEVBUF);
909 		cfg->vpd.vpd_ros = NULL;
910 	}
911 	cfg->vpd.vpd_cached = 1;
912 #undef REG
913 #undef WREG
914 }
915 
916 int
917 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
918 {
919 	struct pci_devinfo *dinfo = device_get_ivars(child);
920 	pcicfgregs *cfg = &dinfo->cfg;
921 
922 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
923 		pci_read_vpd(device_get_parent(dev), cfg);
924 
925 	*identptr = cfg->vpd.vpd_ident;
926 
927 	if (*identptr == NULL)
928 		return (ENXIO);
929 
930 	return (0);
931 }
932 
933 int
934 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
935 	const char **vptr)
936 {
937 	struct pci_devinfo *dinfo = device_get_ivars(child);
938 	pcicfgregs *cfg = &dinfo->cfg;
939 	int i;
940 
941 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
942 		pci_read_vpd(device_get_parent(dev), cfg);
943 
944 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
945 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
946 		    sizeof cfg->vpd.vpd_ros[i].keyword) == 0) {
947 			*vptr = cfg->vpd.vpd_ros[i].value;
948 		}
949 
950 	if (i != cfg->vpd.vpd_rocnt)
951 		return (0);
952 
953 	*vptr = NULL;
954 	return (ENXIO);
955 }
956 
957 /*
958  * Return the offset in configuration space of the requested extended
959  * capability entry or 0 if the specified capability was not found.
960  */
961 int
962 pci_find_extcap_method(device_t dev, device_t child, int capability,
963     int *capreg)
964 {
965 	struct pci_devinfo *dinfo = device_get_ivars(child);
966 	pcicfgregs *cfg = &dinfo->cfg;
967 	u_int32_t status;
968 	u_int8_t ptr;
969 
970 	/*
971 	 * Check the CAP_LIST bit of the PCI status register first.
972 	 */
973 	status = pci_read_config(child, PCIR_STATUS, 2);
974 	if (!(status & PCIM_STATUS_CAPPRESENT))
975 		return (ENXIO);
976 
977 	/*
978 	 * Determine the start pointer of the capabilities list.
979 	 */
980 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
981 	case 0:
982 	case 1:
983 		ptr = PCIR_CAP_PTR;
984 		break;
985 	case 2:
986 		ptr = PCIR_CAP_PTR_2;
987 		break;
988 	default:
989 		/* XXX: panic? */
990 		return (ENXIO);		/* no extended capabilities support */
991 	}
992 	ptr = pci_read_config(child, ptr, 1);
993 
994 	/*
995 	 * Traverse the capabilities list.
996 	 */
997 	while (ptr != 0) {
998 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
999 			if (capreg != NULL)
1000 				*capreg = ptr;
1001 			return (0);
1002 		}
1003 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1004 	}
1005 
1006 	return (ENOENT);
1007 }
1008 
1009 /*
1010  * Support for MSI-X message interrupts.
1011  */
1012 void
1013 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1014 {
1015 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1016 	pcicfgregs *cfg = &dinfo->cfg;
1017 	uint32_t offset;
1018 
1019 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1020 	offset = cfg->msix.msix_table_offset + index * 16;
1021 	bus_write_4(cfg->msix.msix_table_res, offset, address & 0xffffffff);
1022 	bus_write_4(cfg->msix.msix_table_res, offset + 4, address >> 32);
1023 	bus_write_4(cfg->msix.msix_table_res, offset + 8, data);
1024 }
1025 
1026 void
1027 pci_mask_msix(device_t dev, u_int index)
1028 {
1029 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1030 	pcicfgregs *cfg = &dinfo->cfg;
1031 	uint32_t offset, val;
1032 
1033 	KASSERT(cfg->msix.msix_msgnum > index, ("bogus index"));
1034 	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1035 	val = bus_read_4(cfg->msix.msix_table_res, offset);
1036 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1037 		val |= PCIM_MSIX_VCTRL_MASK;
1038 		bus_write_4(cfg->msix.msix_table_res, offset, val);
1039 	}
1040 }
1041 
1042 void
1043 pci_unmask_msix(device_t dev, u_int index)
1044 {
1045 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1046 	pcicfgregs *cfg = &dinfo->cfg;
1047 	uint32_t offset, val;
1048 
1049 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1050 	offset = cfg->msix.msix_table_offset + index * 16 + 12;
1051 	val = bus_read_4(cfg->msix.msix_table_res, offset);
1052 	if (val & PCIM_MSIX_VCTRL_MASK) {
1053 		val &= ~PCIM_MSIX_VCTRL_MASK;
1054 		bus_write_4(cfg->msix.msix_table_res, offset, val);
1055 	}
1056 }
1057 
1058 int
1059 pci_pending_msix(device_t dev, u_int index)
1060 {
1061 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1062 	pcicfgregs *cfg = &dinfo->cfg;
1063 	uint32_t offset, bit;
1064 
1065 	KASSERT(cfg->msix.msix_alloc > index, ("bogus index"));
1066 	offset = cfg->msix.msix_pba_offset + (index / 32) * 4;
1067 	bit = 1 << index % 32;
1068 	return (bus_read_4(cfg->msix.msix_pba_res, offset) & bit);
1069 }
1070 
1071 /*
1072  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1073  * returned in *count.  After this function returns, each message will be
1074  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1075  */
1076 int
1077 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1078 {
1079 	struct pci_devinfo *dinfo = device_get_ivars(child);
1080 	pcicfgregs *cfg = &dinfo->cfg;
1081 	struct resource_list_entry *rle;
1082 	int actual, error, i, irq, max;
1083 
1084 	/* Don't let count == 0 get us into trouble. */
1085 	if (*count == 0)
1086 		return (EINVAL);
1087 
1088 	/* If rid 0 is allocated, then fail. */
1089 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1090 	if (rle != NULL && rle->res != NULL)
1091 		return (ENXIO);
1092 
1093 	/* Already have allocated messages? */
1094 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1095 		return (ENXIO);
1096 
1097 	/* If MSI is blacklisted for this system, fail. */
1098 	if (pci_msi_blacklisted())
1099 		return (ENXIO);
1100 
1101 	/* MSI-X capability present? */
1102 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1103 		return (ENODEV);
1104 
1105 	/* Make sure the appropriate BARs are mapped. */
1106 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1107 	    cfg->msix.msix_table_bar);
1108 	if (rle == NULL || rle->res == NULL ||
1109 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1110 		return (ENXIO);
1111 	cfg->msix.msix_table_res = rle->res;
1112 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1113 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1114 		    cfg->msix.msix_pba_bar);
1115 		if (rle == NULL || rle->res == NULL ||
1116 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1117 			return (ENXIO);
1118 	}
1119 	cfg->msix.msix_pba_res = rle->res;
1120 
1121 	if (bootverbose)
1122 		device_printf(child,
1123 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1124 		    *count, cfg->msix.msix_msgnum);
1125 	max = min(*count, cfg->msix.msix_msgnum);
1126 	for (i = 0; i < max; i++) {
1127 		/* Allocate a message. */
1128 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i,
1129 		    &irq);
1130 		if (error)
1131 			break;
1132 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1133 		    irq, 1);
1134 	}
1135 	actual = i;
1136 
1137 	if (bootverbose) {
1138 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1139 		if (actual == 1)
1140 			device_printf(child, "using IRQ %lu for MSI-X\n",
1141 			    rle->start);
1142 		else {
1143 			int run;
1144 
1145 			/*
1146 			 * Be fancy and try to print contiguous runs of
1147 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1148 			 * 'run' is true if we are in a range.
1149 			 */
1150 			device_printf(child, "using IRQs %lu", rle->start);
1151 			irq = rle->start;
1152 			run = 0;
1153 			for (i = 1; i < actual; i++) {
1154 				rle = resource_list_find(&dinfo->resources,
1155 				    SYS_RES_IRQ, i + 1);
1156 
1157 				/* Still in a run? */
1158 				if (rle->start == irq + 1) {
1159 					run = 1;
1160 					irq++;
1161 					continue;
1162 				}
1163 
1164 				/* Finish previous range. */
1165 				if (run) {
1166 					printf("-%d", irq);
1167 					run = 0;
1168 				}
1169 
1170 				/* Start new range. */
1171 				printf(",%lu", rle->start);
1172 				irq = rle->start;
1173 			}
1174 
1175 			/* Unfinished range? */
1176 			if (run)
1177 				printf("-%d", irq);
1178 			printf(" for MSI-X\n");
1179 		}
1180 	}
1181 
1182 	/* Mask all vectors. */
1183 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1184 		pci_mask_msix(child, i);
1185 
1186 	/* Update control register to enable MSI-X. */
1187 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1188 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1189 	    cfg->msix.msix_ctrl, 2);
1190 
1191 	/* Update counts of alloc'd messages. */
1192 	cfg->msix.msix_alloc = actual;
1193 	*count = actual;
1194 	return (0);
1195 }
1196 
1197 /*
1198  * By default, pci_alloc_msix() will assign the allocated IRQ resources to
1199  * the first N messages in the MSI-X table.  However, device drivers may
1200  * want to use different layouts in the case that they do not allocate a
1201  * full table.  This method allows the driver to specify what layout it
1202  * wants.  It must be called after a successful pci_alloc_msix() but
1203  * before any of the associated SYS_RES_IRQ resources are allocated via
1204  * bus_alloc_resource().  The 'indices' array contains N (where N equals
1205  * the 'count' returned from pci_alloc_msix()) message indices.  The
1206  * indices are 1-based (meaning the first message is at index 1).  On
1207  * successful return, each of the messages in the 'indices' array will
1208  * have an associated SYS_RES_IRQ whose rid is equal to the index.  Thus,
1209  * if indices contains { 2, 4 }, then upon successful return, the 'child'
1210  * device will have two SYS_RES_IRQ resources available at rids 2 and 4.
1211  */
1212 int
1213 pci_remap_msix_method(device_t dev, device_t child, u_int *indices)
1214 {
1215 	struct pci_devinfo *dinfo = device_get_ivars(child);
1216 	pcicfgregs *cfg = &dinfo->cfg;
1217 	struct resource_list_entry *rle;
1218 	int count, error, i, j, *irqs;
1219 
1220 	/* Sanity check the indices. */
1221 	for (i = 0; i < cfg->msix.msix_alloc; i++)
1222 		if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum)
1223 			return (EINVAL);
1224 
1225 	/* Check for duplicates. */
1226 	for (i = 0; i < cfg->msix.msix_alloc; i++)
1227 		for (j = i + 1; j < cfg->msix.msix_alloc; j++)
1228 			if (indices[i] == indices[j])
1229 				return (EINVAL);
1230 
1231 	/* Make sure none of the resources are allocated. */
1232 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1233 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1234 		if (rle == NULL)
1235 			continue;
1236 		if (rle->res != NULL)
1237 			return (EBUSY);
1238 		count++;
1239 	}
1240 
1241 	/* Save the IRQ values and free the existing resources. */
1242 	irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK);
1243 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1244 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1245 		if (rle == NULL)
1246 			continue;
1247 		irqs[count] = rle->start;
1248 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1249 		count++;
1250 	}
1251 
1252 	/* Map the IRQ values to the new message indices and rids. */
1253 	for (i = 0; i < cfg->msix.msix_alloc; i++) {
1254 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i],
1255 		    irqs[i], irqs[i], 1);
1256 
1257 		/*
1258 		 * The indices in the backend code (PCIB_* methods and the
1259 		 * MI helper routines for MD code such as pci_enable_msix())
1260 		 * are all zero-based.  However, the indices passed to this
1261 		 * function are 1-based so that the correspond 1:1 with the
1262 		 * SYS_RES_IRQ resource IDs.
1263 		 */
1264 		error = PCIB_REMAP_MSIX(device_get_parent(dev), child,
1265 		    indices[i] - 1, irqs[i]);
1266 		KASSERT(error == 0, ("Failed to remap MSI-X message"));
1267 	}
1268 	if (bootverbose) {
1269 		if (cfg->msix.msix_alloc == 1)
1270 			device_printf(child,
1271 			    "Remapped MSI-X IRQ to index %d\n", indices[0]);
1272 		else {
1273 			device_printf(child, "Remapped MSI-X IRQs to indices");
1274 			for (i = 0; i < cfg->msix.msix_alloc - 1; i++)
1275 				printf(" %d,", indices[i]);
1276 			printf(" %d\n", indices[cfg->msix.msix_alloc - 1]);
1277 		}
1278 	}
1279 	free(irqs, M_TEMP);
1280 
1281 	return (0);
1282 }
1283 
1284 static int
1285 pci_release_msix(device_t dev, device_t child)
1286 {
1287 	struct pci_devinfo *dinfo = device_get_ivars(child);
1288 	pcicfgregs *cfg = &dinfo->cfg;
1289 	struct resource_list_entry *rle;
1290 	int count, i;
1291 
1292 	/* Do we have any messages to release? */
1293 	if (cfg->msix.msix_alloc == 0)
1294 		return (ENODEV);
1295 
1296 	/* Make sure none of the resources are allocated. */
1297 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1298 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1299 		if (rle == NULL)
1300 			continue;
1301 		if (rle->res != NULL)
1302 			return (EBUSY);
1303 		count++;
1304 	}
1305 
1306 	/* Update control register with to disable MSI-X. */
1307 	cfg->msix.msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1308 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1309 	    cfg->msix.msix_ctrl, 2);
1310 
1311 	/* Release the messages. */
1312 	for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) {
1313 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i);
1314 		if (rle == NULL)
1315 			continue;
1316 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1317 		    rle->start);
1318 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i);
1319 		count++;
1320 	}
1321 
1322 	/* Update alloc count. */
1323 	cfg->msix.msix_alloc = 0;
1324 	return (0);
1325 }
1326 
1327 /*
1328  * Return the max supported MSI-X messages this device supports.
1329  * Basically, assuming the MD code can alloc messages, this function
1330  * should return the maximum value that pci_alloc_msix() can return.
1331  * Thus, it is subject to the tunables, etc.
1332  */
1333 int
1334 pci_msix_count_method(device_t dev, device_t child)
1335 {
1336 	struct pci_devinfo *dinfo = device_get_ivars(child);
1337 	pcicfgregs *cfg = &dinfo->cfg;
1338 
1339 	if (pci_do_msix && cfg->msix.msix_location != 0)
1340 		return (cfg->msix.msix_msgnum);
1341 	return (0);
1342 }
1343 
1344 /*
1345  * Support for MSI message signalled interrupts.
1346  */
1347 void
1348 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1349 {
1350 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1351 	pcicfgregs *cfg = &dinfo->cfg;
1352 
1353 	/* Write data and address values. */
1354 	cfg->msi.msi_addr = address;
1355 	cfg->msi.msi_data = data;
1356 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1357 	    address & 0xffffffff, 4);
1358 	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1359 		pci_write_config(dev, cfg->msi.msi_location +
1360 		    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1361 		pci_write_config(dev, cfg->msi.msi_location +
1362 		    PCIR_MSI_DATA_64BIT, data, 2);
1363 	} else
1364 		pci_write_config(dev, cfg->msi.msi_location +
1365 		    PCIR_MSI_DATA, data, 2);
1366 
1367 	/* Enable MSI in the control register. */
1368 	cfg->msi.msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1369 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1370 	    cfg->msi.msi_ctrl, 2);
1371 }
1372 
1373 /*
1374  * Restore MSI registers during resume.  If MSI is enabled then
1375  * restore the data and address registers in addition to the control
1376  * register.
1377  */
1378 static void
1379 pci_resume_msi(device_t dev)
1380 {
1381 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1382 	pcicfgregs *cfg = &dinfo->cfg;
1383 	uint64_t address;
1384 	uint16_t data;
1385 
1386 	if (cfg->msi.msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1387 		address = cfg->msi.msi_addr;
1388 		data = cfg->msi.msi_data;
1389 		pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_ADDR,
1390 		    address & 0xffffffff, 4);
1391 		if (cfg->msi.msi_ctrl & PCIM_MSICTRL_64BIT) {
1392 			pci_write_config(dev, cfg->msi.msi_location +
1393 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1394 			pci_write_config(dev, cfg->msi.msi_location +
1395 			    PCIR_MSI_DATA_64BIT, data, 2);
1396 		} else
1397 			pci_write_config(dev, cfg->msi.msi_location +
1398 			    PCIR_MSI_DATA, data, 2);
1399 	}
1400 	pci_write_config(dev, cfg->msi.msi_location + PCIR_MSI_CTRL,
1401 	    cfg->msi.msi_ctrl, 2);
1402 }
1403 
1404 /*
1405  * Returns true if the specified device is blacklisted because MSI
1406  * doesn't work.
1407  */
1408 int
1409 pci_msi_device_blacklisted(device_t dev)
1410 {
1411 	struct pci_quirk *q;
1412 
1413 	if (!pci_honor_msi_blacklist)
1414 		return (0);
1415 
1416 	for (q = &pci_quirks[0]; q->devid; q++) {
1417 		if (q->devid == pci_get_devid(dev) &&
1418 		    q->type == PCI_QUIRK_DISABLE_MSI)
1419 			return (1);
1420 	}
1421 	return (0);
1422 }
1423 
1424 /*
1425  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1426  * we just check for blacklisted chipsets as represented by the
1427  * host-PCI bridge at device 0:0:0.  In the future, it may become
1428  * necessary to check other system attributes, such as the kenv values
1429  * that give the motherboard manufacturer and model number.
1430  */
1431 static int
1432 pci_msi_blacklisted(void)
1433 {
1434 	device_t dev;
1435 
1436 	if (!pci_honor_msi_blacklist)
1437 		return (0);
1438 
1439 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1440 	if (!(pcie_chipset || pcix_chipset))
1441 		return (1);
1442 
1443 	dev = pci_find_bsf(0, 0, 0);
1444 	if (dev != NULL)
1445 		return (pci_msi_device_blacklisted(dev));
1446 	return (0);
1447 }
1448 
1449 /*
1450  * Attempt to allocate *count MSI messages.  The actual number allocated is
1451  * returned in *count.  After this function returns, each message will be
1452  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1453  */
1454 int
1455 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1456 {
1457 	struct pci_devinfo *dinfo = device_get_ivars(child);
1458 	pcicfgregs *cfg = &dinfo->cfg;
1459 	struct resource_list_entry *rle;
1460 	int actual, error, i, irqs[32];
1461 	uint16_t ctrl;
1462 
1463 	/* Don't let count == 0 get us into trouble. */
1464 	if (*count == 0)
1465 		return (EINVAL);
1466 
1467 	/* If rid 0 is allocated, then fail. */
1468 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1469 	if (rle != NULL && rle->res != NULL)
1470 		return (ENXIO);
1471 
1472 	/* Already have allocated messages? */
1473 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1474 		return (ENXIO);
1475 
1476 	/* If MSI is blacklisted for this system, fail. */
1477 	if (pci_msi_blacklisted())
1478 		return (ENXIO);
1479 
1480 	/* MSI capability present? */
1481 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1482 		return (ENODEV);
1483 
1484 	if (bootverbose)
1485 		device_printf(child,
1486 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1487 		    *count, cfg->msi.msi_msgnum);
1488 
1489 	/* Don't ask for more than the device supports. */
1490 	actual = min(*count, cfg->msi.msi_msgnum);
1491 
1492 	/* Don't ask for more than 32 messages. */
1493 	actual = min(actual, 32);
1494 
1495 	/* MSI requires power of 2 number of messages. */
1496 	if (!powerof2(actual))
1497 		return (EINVAL);
1498 
1499 	for (;;) {
1500 		/* Try to allocate N messages. */
1501 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1502 		    cfg->msi.msi_msgnum, irqs);
1503 		if (error == 0)
1504 			break;
1505 		if (actual == 1)
1506 			return (error);
1507 
1508 		/* Try N / 2. */
1509 		actual >>= 1;
1510 	}
1511 
1512 	/*
1513 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1514 	 * resources in the irqs[] array, so add new resources
1515 	 * starting at rid 1.
1516 	 */
1517 	for (i = 0; i < actual; i++)
1518 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1519 		    irqs[i], irqs[i], 1);
1520 
1521 	if (bootverbose) {
1522 		if (actual == 1)
1523 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1524 		else {
1525 			int run;
1526 
1527 			/*
1528 			 * Be fancy and try to print contiguous runs
1529 			 * of IRQ values as ranges.  'run' is true if
1530 			 * we are in a range.
1531 			 */
1532 			device_printf(child, "using IRQs %d", irqs[0]);
1533 			run = 0;
1534 			for (i = 1; i < actual; i++) {
1535 
1536 				/* Still in a run? */
1537 				if (irqs[i] == irqs[i - 1] + 1) {
1538 					run = 1;
1539 					continue;
1540 				}
1541 
1542 				/* Finish previous range. */
1543 				if (run) {
1544 					printf("-%d", irqs[i - 1]);
1545 					run = 0;
1546 				}
1547 
1548 				/* Start new range. */
1549 				printf(",%d", irqs[i]);
1550 			}
1551 
1552 			/* Unfinished range? */
1553 			if (run)
1554 				printf("%d", irqs[actual - 1]);
1555 			printf(" for MSI\n");
1556 		}
1557 	}
1558 
1559 	/* Update control register with actual count and enable MSI. */
1560 	ctrl = cfg->msi.msi_ctrl;
1561 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1562 	ctrl |= (ffs(actual) - 1) << 4;
1563 	cfg->msi.msi_ctrl = ctrl;
1564 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1565 
1566 	/* Update counts of alloc'd messages. */
1567 	cfg->msi.msi_alloc = actual;
1568 	*count = actual;
1569 	return (0);
1570 }
1571 
1572 /* Release the MSI messages associated with this device. */
1573 int
1574 pci_release_msi_method(device_t dev, device_t child)
1575 {
1576 	struct pci_devinfo *dinfo = device_get_ivars(child);
1577 	pcicfgregs *cfg = &dinfo->cfg;
1578 	struct resource_list_entry *rle;
1579 	int error, i, irqs[32];
1580 
1581 	/* Try MSI-X first. */
1582 	error = pci_release_msix(dev, child);
1583 	if (error != ENODEV)
1584 		return (error);
1585 
1586 	/* Do we have any messages to release? */
1587 	if (cfg->msi.msi_alloc == 0)
1588 		return (ENODEV);
1589 	KASSERT(cfg->msi.msi_alloc <= 32, ("more than 32 alloc'd messages"));
1590 
1591 	/* Make sure none of the resources are allocated. */
1592 	for (i = 0; i < cfg->msi.msi_alloc; i++) {
1593 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1594 		KASSERT(rle != NULL, ("missing MSI resource"));
1595 		if (rle->res != NULL)
1596 			return (EBUSY);
1597 		irqs[i] = rle->start;
1598 	}
1599 
1600 	/* Update control register with 0 count and disable MSI. */
1601 	cfg->msi.msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE);
1602 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL,
1603 	    cfg->msi.msi_ctrl, 2);
1604 
1605 	/* Release the messages. */
1606 	PCIB_RELEASE_MSI(device_get_parent(dev), child, cfg->msi.msi_alloc,
1607 	    irqs);
1608 	for (i = 0; i < cfg->msi.msi_alloc; i++)
1609 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1610 
1611 	/* Update alloc count. */
1612 	cfg->msi.msi_alloc = 0;
1613 	return (0);
1614 }
1615 
1616 /*
1617  * Return the max supported MSI messages this device supports.
1618  * Basically, assuming the MD code can alloc messages, this function
1619  * should return the maximum value that pci_alloc_msi() can return.
1620  * Thus, it is subject to the tunables, etc.
1621  */
1622 int
1623 pci_msi_count_method(device_t dev, device_t child)
1624 {
1625 	struct pci_devinfo *dinfo = device_get_ivars(child);
1626 	pcicfgregs *cfg = &dinfo->cfg;
1627 
1628 	if (pci_do_msi && cfg->msi.msi_location != 0)
1629 		return (cfg->msi.msi_msgnum);
1630 	return (0);
1631 }
1632 
1633 /* free pcicfgregs structure and all depending data structures */
1634 
1635 int
1636 pci_freecfg(struct pci_devinfo *dinfo)
1637 {
1638 	struct devlist *devlist_head;
1639 	int i;
1640 
1641 	devlist_head = &pci_devq;
1642 
1643 	if (dinfo->cfg.vpd.vpd_reg) {
1644 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1645 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1646 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1647 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1648 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1649 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1650 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1651 	}
1652 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1653 	free(dinfo, M_DEVBUF);
1654 
1655 	/* increment the generation count */
1656 	pci_generation++;
1657 
1658 	/* we're losing one device */
1659 	pci_numdevs--;
1660 	return (0);
1661 }
1662 
1663 /*
1664  * PCI power manangement
1665  */
1666 int
1667 pci_set_powerstate_method(device_t dev, device_t child, int state)
1668 {
1669 	struct pci_devinfo *dinfo = device_get_ivars(child);
1670 	pcicfgregs *cfg = &dinfo->cfg;
1671 	uint16_t status;
1672 	int result, oldstate, highest, delay;
1673 
1674 	if (cfg->pp.pp_cap == 0)
1675 		return (EOPNOTSUPP);
1676 
1677 	/*
1678 	 * Optimize a no state change request away.  While it would be OK to
1679 	 * write to the hardware in theory, some devices have shown odd
1680 	 * behavior when going from D3 -> D3.
1681 	 */
1682 	oldstate = pci_get_powerstate(child);
1683 	if (oldstate == state)
1684 		return (0);
1685 
1686 	/*
1687 	 * The PCI power management specification states that after a state
1688 	 * transition between PCI power states, system software must
1689 	 * guarantee a minimal delay before the function accesses the device.
1690 	 * Compute the worst case delay that we need to guarantee before we
1691 	 * access the device.  Many devices will be responsive much more
1692 	 * quickly than this delay, but there are some that don't respond
1693 	 * instantly to state changes.  Transitions to/from D3 state require
1694 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1695 	 * is done below with DELAY rather than a sleeper function because
1696 	 * this function can be called from contexts where we cannot sleep.
1697 	 */
1698 	highest = (oldstate > state) ? oldstate : state;
1699 	if (highest == PCI_POWERSTATE_D3)
1700 	    delay = 10000;
1701 	else if (highest == PCI_POWERSTATE_D2)
1702 	    delay = 200;
1703 	else
1704 	    delay = 0;
1705 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1706 	    & ~PCIM_PSTAT_DMASK;
1707 	result = 0;
1708 	switch (state) {
1709 	case PCI_POWERSTATE_D0:
1710 		status |= PCIM_PSTAT_D0;
1711 		break;
1712 	case PCI_POWERSTATE_D1:
1713 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1714 			return (EOPNOTSUPP);
1715 		status |= PCIM_PSTAT_D1;
1716 		break;
1717 	case PCI_POWERSTATE_D2:
1718 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
1719 			return (EOPNOTSUPP);
1720 		status |= PCIM_PSTAT_D2;
1721 		break;
1722 	case PCI_POWERSTATE_D3:
1723 		status |= PCIM_PSTAT_D3;
1724 		break;
1725 	default:
1726 		return (EINVAL);
1727 	}
1728 
1729 	if (bootverbose)
1730 		printf(
1731 		    "pci%d:%d:%d: Transition from D%d to D%d\n",
1732 		    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func,
1733 		    oldstate, state);
1734 
1735 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
1736 	if (delay)
1737 		DELAY(delay);
1738 	return (0);
1739 }
1740 
1741 int
1742 pci_get_powerstate_method(device_t dev, device_t child)
1743 {
1744 	struct pci_devinfo *dinfo = device_get_ivars(child);
1745 	pcicfgregs *cfg = &dinfo->cfg;
1746 	uint16_t status;
1747 	int result;
1748 
1749 	if (cfg->pp.pp_cap != 0) {
1750 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
1751 		switch (status & PCIM_PSTAT_DMASK) {
1752 		case PCIM_PSTAT_D0:
1753 			result = PCI_POWERSTATE_D0;
1754 			break;
1755 		case PCIM_PSTAT_D1:
1756 			result = PCI_POWERSTATE_D1;
1757 			break;
1758 		case PCIM_PSTAT_D2:
1759 			result = PCI_POWERSTATE_D2;
1760 			break;
1761 		case PCIM_PSTAT_D3:
1762 			result = PCI_POWERSTATE_D3;
1763 			break;
1764 		default:
1765 			result = PCI_POWERSTATE_UNKNOWN;
1766 			break;
1767 		}
1768 	} else {
1769 		/* No support, device is always at D0 */
1770 		result = PCI_POWERSTATE_D0;
1771 	}
1772 	return (result);
1773 }
1774 
1775 /*
1776  * Some convenience functions for PCI device drivers.
1777  */
1778 
1779 static __inline void
1780 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
1781 {
1782 	uint16_t	command;
1783 
1784 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1785 	command |= bit;
1786 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1787 }
1788 
1789 static __inline void
1790 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
1791 {
1792 	uint16_t	command;
1793 
1794 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1795 	command &= ~bit;
1796 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
1797 }
1798 
1799 int
1800 pci_enable_busmaster_method(device_t dev, device_t child)
1801 {
1802 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1803 	return (0);
1804 }
1805 
1806 int
1807 pci_disable_busmaster_method(device_t dev, device_t child)
1808 {
1809 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
1810 	return (0);
1811 }
1812 
1813 int
1814 pci_enable_io_method(device_t dev, device_t child, int space)
1815 {
1816 	uint16_t command;
1817 	uint16_t bit;
1818 	char *error;
1819 
1820 	bit = 0;
1821 	error = NULL;
1822 
1823 	switch(space) {
1824 	case SYS_RES_IOPORT:
1825 		bit = PCIM_CMD_PORTEN;
1826 		error = "port";
1827 		break;
1828 	case SYS_RES_MEMORY:
1829 		bit = PCIM_CMD_MEMEN;
1830 		error = "memory";
1831 		break;
1832 	default:
1833 		return (EINVAL);
1834 	}
1835 	pci_set_command_bit(dev, child, bit);
1836 	/* Some devices seem to need a brief stall here, what do to? */
1837 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1838 	if (command & bit)
1839 		return (0);
1840 	device_printf(child, "failed to enable %s mapping!\n", error);
1841 	return (ENXIO);
1842 }
1843 
1844 int
1845 pci_disable_io_method(device_t dev, device_t child, int space)
1846 {
1847 	uint16_t command;
1848 	uint16_t bit;
1849 	char *error;
1850 
1851 	bit = 0;
1852 	error = NULL;
1853 
1854 	switch(space) {
1855 	case SYS_RES_IOPORT:
1856 		bit = PCIM_CMD_PORTEN;
1857 		error = "port";
1858 		break;
1859 	case SYS_RES_MEMORY:
1860 		bit = PCIM_CMD_MEMEN;
1861 		error = "memory";
1862 		break;
1863 	default:
1864 		return (EINVAL);
1865 	}
1866 	pci_clear_command_bit(dev, child, bit);
1867 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
1868 	if (command & bit) {
1869 		device_printf(child, "failed to disable %s mapping!\n", error);
1870 		return (ENXIO);
1871 	}
1872 	return (0);
1873 }
1874 
1875 /*
1876  * New style pci driver.  Parent device is either a pci-host-bridge or a
1877  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
1878  */
1879 
1880 void
1881 pci_print_verbose(struct pci_devinfo *dinfo)
1882 {
1883 
1884 	if (bootverbose) {
1885 		pcicfgregs *cfg = &dinfo->cfg;
1886 
1887 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
1888 		    cfg->vendor, cfg->device, cfg->revid);
1889 		printf("\tbus=%d, slot=%d, func=%d\n",
1890 		    cfg->bus, cfg->slot, cfg->func);
1891 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
1892 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
1893 		    cfg->mfdev);
1894 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
1895 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
1896 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
1897 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
1898 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
1899 		if (cfg->intpin > 0)
1900 			printf("\tintpin=%c, irq=%d\n",
1901 			    cfg->intpin +'a' -1, cfg->intline);
1902 		if (cfg->pp.pp_cap) {
1903 			uint16_t status;
1904 
1905 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
1906 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
1907 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
1908 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
1909 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
1910 			    status & PCIM_PSTAT_DMASK);
1911 		}
1912 		if (cfg->msi.msi_location) {
1913 			int ctrl;
1914 
1915 			ctrl = cfg->msi.msi_ctrl;
1916 			printf("\tMSI supports %d message%s%s%s\n",
1917 			    cfg->msi.msi_msgnum,
1918 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
1919 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
1920 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
1921 		}
1922 		if (cfg->msix.msix_location) {
1923 			printf("\tMSI-X supports %d message%s ",
1924 			    cfg->msix.msix_msgnum,
1925 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
1926 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
1927 				printf("in map 0x%x\n",
1928 				    cfg->msix.msix_table_bar);
1929 			else
1930 				printf("in maps 0x%x and 0x%x\n",
1931 				    cfg->msix.msix_table_bar,
1932 				    cfg->msix.msix_pba_bar);
1933 		}
1934 	}
1935 }
1936 
1937 static int
1938 pci_porten(device_t pcib, int b, int s, int f)
1939 {
1940 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1941 		& PCIM_CMD_PORTEN) != 0;
1942 }
1943 
1944 static int
1945 pci_memen(device_t pcib, int b, int s, int f)
1946 {
1947 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
1948 		& PCIM_CMD_MEMEN) != 0;
1949 }
1950 
1951 /*
1952  * Add a resource based on a pci map register. Return 1 if the map
1953  * register is a 32bit map register or 2 if it is a 64bit register.
1954  */
1955 static int
1956 pci_add_map(device_t pcib, device_t bus, device_t dev,
1957     int b, int s, int f, int reg, struct resource_list *rl, int force,
1958     int prefetch)
1959 {
1960 	uint32_t map;
1961 	pci_addr_t base;
1962 	pci_addr_t start, end, count;
1963 	uint8_t ln2size;
1964 	uint8_t ln2range;
1965 	uint32_t testval;
1966 	uint16_t cmd;
1967 	int type;
1968 	int barlen;
1969 	struct resource *res;
1970 
1971 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1972 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
1973 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
1974 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
1975 
1976 	if (PCI_BAR_MEM(map))
1977 		type = SYS_RES_MEMORY;
1978 	else
1979 		type = SYS_RES_IOPORT;
1980 	ln2size = pci_mapsize(testval);
1981 	ln2range = pci_maprange(testval);
1982 	base = pci_mapbase(map);
1983 	barlen = ln2range == 64 ? 2 : 1;
1984 
1985 	/*
1986 	 * For I/O registers, if bottom bit is set, and the next bit up
1987 	 * isn't clear, we know we have a BAR that doesn't conform to the
1988 	 * spec, so ignore it.  Also, sanity check the size of the data
1989 	 * areas to the type of memory involved.  Memory must be at least
1990 	 * 16 bytes in size, while I/O ranges must be at least 4.
1991 	 */
1992 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
1993 		return (barlen);
1994 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
1995 	    (type == SYS_RES_IOPORT && ln2size < 2))
1996 		return (barlen);
1997 
1998 	if (ln2range == 64)
1999 		/* Read the other half of a 64bit map register */
2000 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2001 	if (bootverbose) {
2002 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2003 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2004 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2005 			printf(", port disabled\n");
2006 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2007 			printf(", memory disabled\n");
2008 		else
2009 			printf(", enabled\n");
2010 	}
2011 
2012 	/*
2013 	 * If base is 0, then we have problems.  It is best to ignore
2014 	 * such entries for the moment.  These will be allocated later if
2015 	 * the driver specifically requests them.  However, some
2016 	 * removable busses look better when all resources are allocated,
2017 	 * so allow '0' to be overriden.
2018 	 *
2019 	 * Similarly treat maps whose values is the same as the test value
2020 	 * read back.  These maps have had all f's written to them by the
2021 	 * BIOS in an attempt to disable the resources.
2022 	 */
2023 	if (!force && (base == 0 || map == testval))
2024 		return (barlen);
2025 	if ((u_long)base != base) {
2026 		device_printf(bus,
2027 		    "pci%d:%d:%d bar %#x too many address bits", b, s, f, reg);
2028 		return (barlen);
2029 	}
2030 
2031 	/*
2032 	 * This code theoretically does the right thing, but has
2033 	 * undesirable side effects in some cases where peripherals
2034 	 * respond oddly to having these bits enabled.  Let the user
2035 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2036 	 * default).
2037 	 */
2038 	if (pci_enable_io_modes) {
2039 		/* Turn on resources that have been left off by a lazy BIOS */
2040 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2041 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2042 			cmd |= PCIM_CMD_PORTEN;
2043 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2044 		}
2045 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2046 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2047 			cmd |= PCIM_CMD_MEMEN;
2048 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2049 		}
2050 	} else {
2051 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2052 			return (barlen);
2053 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2054 			return (barlen);
2055 	}
2056 
2057 	count = 1 << ln2size;
2058 	if (base == 0 || base == pci_mapbase(testval)) {
2059 		start = 0;	/* Let the parent deside */
2060 		end = ~0ULL;
2061 	} else {
2062 		start = base;
2063 		end = base + (1 << ln2size) - 1;
2064 	}
2065 	resource_list_add(rl, type, reg, start, end, count);
2066 
2067 	/*
2068 	 * Not quite sure what to do on failure of allocating the resource
2069 	 * since I can postulate several right answers.
2070 	 */
2071 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2072 	    prefetch ? RF_PREFETCHABLE : 0);
2073 	if (res == NULL)
2074 		return (barlen);
2075 	start = rman_get_start(res);
2076 	if ((u_long)start != start) {
2077 		/* Wait a minute!  this platform can't do this address. */
2078 		device_printf(bus,
2079 		    "pci%d.%d.%x bar %#x start %#jx, too many bits.",
2080 		    b, s, f, reg, (uintmax_t)start);
2081 		resource_list_release(rl, bus, dev, type, reg, res);
2082 		return (barlen);
2083 	}
2084 	pci_write_config(dev, reg, start, 4);
2085 	if (ln2range == 64)
2086 		pci_write_config(dev, reg + 4, start >> 32, 4);
2087 	return (barlen);
2088 }
2089 
2090 /*
2091  * For ATA devices we need to decide early what addressing mode to use.
2092  * Legacy demands that the primary and secondary ATA ports sits on the
2093  * same addresses that old ISA hardware did. This dictates that we use
2094  * those addresses and ignore the BAR's if we cannot set PCI native
2095  * addressing mode.
2096  */
2097 static void
2098 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2099     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2100 {
2101 	int rid, type, progif;
2102 #if 0
2103 	/* if this device supports PCI native addressing use it */
2104 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2105 	if ((progif & 0x8a) == 0x8a) {
2106 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2107 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2108 			printf("Trying ATA native PCI addressing mode\n");
2109 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2110 		}
2111 	}
2112 #endif
2113 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2114 	type = SYS_RES_IOPORT;
2115 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2116 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2117 		    prefetchmask & (1 << 0));
2118 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2119 		    prefetchmask & (1 << 1));
2120 	} else {
2121 		rid = PCIR_BAR(0);
2122 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2123 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2124 		    0);
2125 		rid = PCIR_BAR(1);
2126 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2127 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2128 		    0);
2129 	}
2130 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2131 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2132 		    prefetchmask & (1 << 2));
2133 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2134 		    prefetchmask & (1 << 3));
2135 	} else {
2136 		rid = PCIR_BAR(2);
2137 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2138 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2139 		    0);
2140 		rid = PCIR_BAR(3);
2141 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2142 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2143 		    0);
2144 	}
2145 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2146 	    prefetchmask & (1 << 4));
2147 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2148 	    prefetchmask & (1 << 5));
2149 }
2150 
2151 static void
2152 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2153 {
2154 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2155 	pcicfgregs *cfg = &dinfo->cfg;
2156 	char tunable_name[64];
2157 	int irq;
2158 
2159 	/* Has to have an intpin to have an interrupt. */
2160 	if (cfg->intpin == 0)
2161 		return;
2162 
2163 	/* Let the user override the IRQ with a tunable. */
2164 	irq = PCI_INVALID_IRQ;
2165 	snprintf(tunable_name, sizeof(tunable_name), "hw.pci%d.%d.INT%c.irq",
2166 	    cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2167 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2168 		irq = PCI_INVALID_IRQ;
2169 
2170 	/*
2171 	 * If we didn't get an IRQ via the tunable, then we either use the
2172 	 * IRQ value in the intline register or we ask the bus to route an
2173 	 * interrupt for us.  If force_route is true, then we only use the
2174 	 * value in the intline register if the bus was unable to assign an
2175 	 * IRQ.
2176 	 */
2177 	if (!PCI_INTERRUPT_VALID(irq)) {
2178 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2179 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2180 		if (!PCI_INTERRUPT_VALID(irq))
2181 			irq = cfg->intline;
2182 	}
2183 
2184 	/* If after all that we don't have an IRQ, just bail. */
2185 	if (!PCI_INTERRUPT_VALID(irq))
2186 		return;
2187 
2188 	/* Update the config register if it changed. */
2189 	if (irq != cfg->intline) {
2190 		cfg->intline = irq;
2191 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2192 	}
2193 
2194 	/* Add this IRQ as rid 0 interrupt resource. */
2195 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2196 }
2197 
2198 void
2199 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2200 {
2201 	device_t pcib;
2202 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2203 	pcicfgregs *cfg = &dinfo->cfg;
2204 	struct resource_list *rl = &dinfo->resources;
2205 	struct pci_quirk *q;
2206 	int b, i, f, s;
2207 
2208 	pcib = device_get_parent(bus);
2209 
2210 	b = cfg->bus;
2211 	s = cfg->slot;
2212 	f = cfg->func;
2213 
2214 	/* ATA devices needs special map treatment */
2215 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2216 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2217 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2218 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2219 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2220 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2221 	else
2222 		for (i = 0; i < cfg->nummaps;)
2223 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2224 			    rl, force, prefetchmask & (1 << i));
2225 
2226 	/*
2227 	 * Add additional, quirked resources.
2228 	 */
2229 	for (q = &pci_quirks[0]; q->devid; q++) {
2230 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2231 		    && q->type == PCI_QUIRK_MAP_REG)
2232 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2233 			  force, 0);
2234 	}
2235 
2236 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2237 #ifdef __PCI_REROUTE_INTERRUPT
2238 		/*
2239 		 * Try to re-route interrupts. Sometimes the BIOS or
2240 		 * firmware may leave bogus values in these registers.
2241 		 * If the re-route fails, then just stick with what we
2242 		 * have.
2243 		 */
2244 		pci_assign_interrupt(bus, dev, 1);
2245 #else
2246 		pci_assign_interrupt(bus, dev, 0);
2247 #endif
2248 	}
2249 }
2250 
2251 void
2252 pci_add_children(device_t dev, int busno, size_t dinfo_size)
2253 {
2254 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2255 	device_t pcib = device_get_parent(dev);
2256 	struct pci_devinfo *dinfo;
2257 	int maxslots;
2258 	int s, f, pcifunchigh;
2259 	uint8_t hdrtype;
2260 
2261 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2262 	    ("dinfo_size too small"));
2263 	maxslots = PCIB_MAXSLOTS(pcib);
2264 	for (s = 0; s <= maxslots; s++) {
2265 		pcifunchigh = 0;
2266 		f = 0;
2267 		DELAY(1);
2268 		hdrtype = REG(PCIR_HDRTYPE, 1);
2269 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2270 			continue;
2271 		if (hdrtype & PCIM_MFDEV)
2272 			pcifunchigh = PCI_FUNCMAX;
2273 		for (f = 0; f <= pcifunchigh; f++) {
2274 			dinfo = pci_read_device(pcib, busno, s, f, dinfo_size);
2275 			if (dinfo != NULL) {
2276 				pci_add_child(dev, dinfo);
2277 			}
2278 		}
2279 	}
2280 #undef REG
2281 }
2282 
2283 void
2284 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2285 {
2286 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2287 	device_set_ivars(dinfo->cfg.dev, dinfo);
2288 	resource_list_init(&dinfo->resources);
2289 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2290 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2291 	pci_print_verbose(dinfo);
2292 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2293 }
2294 
2295 static int
2296 pci_probe(device_t dev)
2297 {
2298 
2299 	device_set_desc(dev, "PCI bus");
2300 
2301 	/* Allow other subclasses to override this driver. */
2302 	return (-1000);
2303 }
2304 
2305 static int
2306 pci_attach(device_t dev)
2307 {
2308 	int busno;
2309 
2310 	/*
2311 	 * Since there can be multiple independantly numbered PCI
2312 	 * busses on systems with multiple PCI domains, we can't use
2313 	 * the unit number to decide which bus we are probing. We ask
2314 	 * the parent pcib what our bus number is.
2315 	 */
2316 	busno = pcib_get_bus(dev);
2317 	if (bootverbose)
2318 		device_printf(dev, "physical bus=%d\n", busno);
2319 
2320 	pci_add_children(dev, busno, sizeof(struct pci_devinfo));
2321 
2322 	return (bus_generic_attach(dev));
2323 }
2324 
2325 int
2326 pci_suspend(device_t dev)
2327 {
2328 	int dstate, error, i, numdevs;
2329 	device_t acpi_dev, child, *devlist;
2330 	struct pci_devinfo *dinfo;
2331 
2332 	/*
2333 	 * Save the PCI configuration space for each child and set the
2334 	 * device in the appropriate power state for this sleep state.
2335 	 */
2336 	acpi_dev = NULL;
2337 	if (pci_do_power_resume)
2338 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2339 	device_get_children(dev, &devlist, &numdevs);
2340 	for (i = 0; i < numdevs; i++) {
2341 		child = devlist[i];
2342 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2343 		pci_cfg_save(child, dinfo, 0);
2344 	}
2345 
2346 	/* Suspend devices before potentially powering them down. */
2347 	error = bus_generic_suspend(dev);
2348 	if (error) {
2349 		free(devlist, M_TEMP);
2350 		return (error);
2351 	}
2352 
2353 	/*
2354 	 * Always set the device to D3.  If ACPI suggests a different
2355 	 * power state, use it instead.  If ACPI is not present, the
2356 	 * firmware is responsible for managing device power.  Skip
2357 	 * children who aren't attached since they are powered down
2358 	 * separately.  Only manage type 0 devices for now.
2359 	 */
2360 	for (i = 0; acpi_dev && i < numdevs; i++) {
2361 		child = devlist[i];
2362 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2363 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2364 			dstate = PCI_POWERSTATE_D3;
2365 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2366 			pci_set_powerstate(child, dstate);
2367 		}
2368 	}
2369 	free(devlist, M_TEMP);
2370 	return (0);
2371 }
2372 
2373 int
2374 pci_resume(device_t dev)
2375 {
2376 	int i, numdevs;
2377 	device_t acpi_dev, child, *devlist;
2378 	struct pci_devinfo *dinfo;
2379 
2380 	/*
2381 	 * Set each child to D0 and restore its PCI configuration space.
2382 	 */
2383 	acpi_dev = NULL;
2384 	if (pci_do_power_resume)
2385 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2386 	device_get_children(dev, &devlist, &numdevs);
2387 	for (i = 0; i < numdevs; i++) {
2388 		/*
2389 		 * Notify ACPI we're going to D0 but ignore the result.  If
2390 		 * ACPI is not present, the firmware is responsible for
2391 		 * managing device power.  Only manage type 0 devices for now.
2392 		 */
2393 		child = devlist[i];
2394 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2395 		if (acpi_dev && device_is_attached(child) &&
2396 		    dinfo->cfg.hdrtype == 0) {
2397 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2398 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2399 		}
2400 
2401 		/* Now the device is powered up, restore its config space. */
2402 		pci_cfg_restore(child, dinfo);
2403 	}
2404 	free(devlist, M_TEMP);
2405 	return (bus_generic_resume(dev));
2406 }
2407 
2408 static void
2409 pci_load_vendor_data(void)
2410 {
2411 	caddr_t vendordata, info;
2412 
2413 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2414 		info = preload_search_info(vendordata, MODINFO_ADDR);
2415 		pci_vendordata = *(char **)info;
2416 		info = preload_search_info(vendordata, MODINFO_SIZE);
2417 		pci_vendordata_size = *(size_t *)info;
2418 		/* terminate the database */
2419 		pci_vendordata[pci_vendordata_size] = '\n';
2420 	}
2421 }
2422 
2423 void
2424 pci_driver_added(device_t dev, driver_t *driver)
2425 {
2426 	int numdevs;
2427 	device_t *devlist;
2428 	device_t child;
2429 	struct pci_devinfo *dinfo;
2430 	int i;
2431 
2432 	if (bootverbose)
2433 		device_printf(dev, "driver added\n");
2434 	DEVICE_IDENTIFY(driver, dev);
2435 	device_get_children(dev, &devlist, &numdevs);
2436 	for (i = 0; i < numdevs; i++) {
2437 		child = devlist[i];
2438 		if (device_get_state(child) != DS_NOTPRESENT)
2439 			continue;
2440 		dinfo = device_get_ivars(child);
2441 		pci_print_verbose(dinfo);
2442 		if (bootverbose)
2443 			printf("pci%d:%d:%d: reprobing on driver added\n",
2444 			    dinfo->cfg.bus, dinfo->cfg.slot, dinfo->cfg.func);
2445 		pci_cfg_restore(child, dinfo);
2446 		if (device_probe_and_attach(child) != 0)
2447 			pci_cfg_save(child, dinfo, 1);
2448 	}
2449 	free(devlist, M_TEMP);
2450 }
2451 
2452 int
2453 pci_print_child(device_t dev, device_t child)
2454 {
2455 	struct pci_devinfo *dinfo;
2456 	struct resource_list *rl;
2457 	int retval = 0;
2458 
2459 	dinfo = device_get_ivars(child);
2460 	rl = &dinfo->resources;
2461 
2462 	retval += bus_print_child_header(dev, child);
2463 
2464 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2465 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2466 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2467 	if (device_get_flags(dev))
2468 		retval += printf(" flags %#x", device_get_flags(dev));
2469 
2470 	retval += printf(" at device %d.%d", pci_get_slot(child),
2471 	    pci_get_function(child));
2472 
2473 	retval += bus_print_child_footer(dev, child);
2474 
2475 	return (retval);
2476 }
2477 
2478 static struct
2479 {
2480 	int	class;
2481 	int	subclass;
2482 	char	*desc;
2483 } pci_nomatch_tab[] = {
2484 	{PCIC_OLD,		-1,			"old"},
2485 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2486 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2487 	{PCIC_STORAGE,		-1,			"mass storage"},
2488 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2489 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2490 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2491 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2492 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2493 	{PCIC_NETWORK,		-1,			"network"},
2494 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2495 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2496 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2497 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2498 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2499 	{PCIC_DISPLAY,		-1,			"display"},
2500 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2501 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2502 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2503 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2504 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2505 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2506 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2507 	{PCIC_MEMORY,		-1,			"memory"},
2508 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2509 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2510 	{PCIC_BRIDGE,		-1,			"bridge"},
2511 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2512 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2513 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2514 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2515 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2516 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2517 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2518 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2519 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2520 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2521 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2522 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2523 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2524 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2525 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2526 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2527 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2528 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2529 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2530 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2531 	{PCIC_INPUTDEV,		-1,			"input device"},
2532 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2533 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2534 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2535 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2536 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2537 	{PCIC_DOCKING,		-1,			"docking station"},
2538 	{PCIC_PROCESSOR,	-1,			"processor"},
2539 	{PCIC_SERIALBUS,	-1,			"serial bus"},
2540 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2541 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2542 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2543 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2544 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2545 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2546 	{PCIC_WIRELESS,		-1,			"wireless controller"},
2547 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2548 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2549 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2550 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2551 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2552 	{PCIC_SATCOM,		-1,			"satellite communication"},
2553 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2554 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2555 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2556 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2557 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2558 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2559 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2560 	{PCIC_DASP,		-1,			"dasp"},
2561 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2562 	{0, 0,		NULL}
2563 };
2564 
2565 void
2566 pci_probe_nomatch(device_t dev, device_t child)
2567 {
2568 	int	i;
2569 	char	*cp, *scp, *device;
2570 
2571 	/*
2572 	 * Look for a listing for this device in a loaded device database.
2573 	 */
2574 	if ((device = pci_describe_device(child)) != NULL) {
2575 		device_printf(dev, "<%s>", device);
2576 		free(device, M_DEVBUF);
2577 	} else {
2578 		/*
2579 		 * Scan the class/subclass descriptions for a general
2580 		 * description.
2581 		 */
2582 		cp = "unknown";
2583 		scp = NULL;
2584 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
2585 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
2586 				if (pci_nomatch_tab[i].subclass == -1) {
2587 					cp = pci_nomatch_tab[i].desc;
2588 				} else if (pci_nomatch_tab[i].subclass ==
2589 				    pci_get_subclass(child)) {
2590 					scp = pci_nomatch_tab[i].desc;
2591 				}
2592 			}
2593 		}
2594 		device_printf(dev, "<%s%s%s>",
2595 		    cp ? cp : "",
2596 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
2597 		    scp ? scp : "");
2598 	}
2599 	printf(" at device %d.%d (no driver attached)\n",
2600 	    pci_get_slot(child), pci_get_function(child));
2601 	if (pci_do_power_nodriver)
2602 		pci_cfg_save(child,
2603 		    (struct pci_devinfo *) device_get_ivars(child), 1);
2604 	return;
2605 }
2606 
2607 /*
2608  * Parse the PCI device database, if loaded, and return a pointer to a
2609  * description of the device.
2610  *
2611  * The database is flat text formatted as follows:
2612  *
2613  * Any line not in a valid format is ignored.
2614  * Lines are terminated with newline '\n' characters.
2615  *
2616  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
2617  * the vendor name.
2618  *
2619  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
2620  * - devices cannot be listed without a corresponding VENDOR line.
2621  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
2622  * another TAB, then the device name.
2623  */
2624 
2625 /*
2626  * Assuming (ptr) points to the beginning of a line in the database,
2627  * return the vendor or device and description of the next entry.
2628  * The value of (vendor) or (device) inappropriate for the entry type
2629  * is set to -1.  Returns nonzero at the end of the database.
2630  *
2631  * Note that this is slightly unrobust in the face of corrupt data;
2632  * we attempt to safeguard against this by spamming the end of the
2633  * database with a newline when we initialise.
2634  */
2635 static int
2636 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
2637 {
2638 	char	*cp = *ptr;
2639 	int	left;
2640 
2641 	*device = -1;
2642 	*vendor = -1;
2643 	**desc = '\0';
2644 	for (;;) {
2645 		left = pci_vendordata_size - (cp - pci_vendordata);
2646 		if (left <= 0) {
2647 			*ptr = cp;
2648 			return(1);
2649 		}
2650 
2651 		/* vendor entry? */
2652 		if (*cp != '\t' &&
2653 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
2654 			break;
2655 		/* device entry? */
2656 		if (*cp == '\t' &&
2657 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
2658 			break;
2659 
2660 		/* skip to next line */
2661 		while (*cp != '\n' && left > 0) {
2662 			cp++;
2663 			left--;
2664 		}
2665 		if (*cp == '\n') {
2666 			cp++;
2667 			left--;
2668 		}
2669 	}
2670 	/* skip to next line */
2671 	while (*cp != '\n' && left > 0) {
2672 		cp++;
2673 		left--;
2674 	}
2675 	if (*cp == '\n' && left > 0)
2676 		cp++;
2677 	*ptr = cp;
2678 	return(0);
2679 }
2680 
2681 static char *
2682 pci_describe_device(device_t dev)
2683 {
2684 	int	vendor, device;
2685 	char	*desc, *vp, *dp, *line;
2686 
2687 	desc = vp = dp = NULL;
2688 
2689 	/*
2690 	 * If we have no vendor data, we can't do anything.
2691 	 */
2692 	if (pci_vendordata == NULL)
2693 		goto out;
2694 
2695 	/*
2696 	 * Scan the vendor data looking for this device
2697 	 */
2698 	line = pci_vendordata;
2699 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2700 		goto out;
2701 	for (;;) {
2702 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
2703 			goto out;
2704 		if (vendor == pci_get_vendor(dev))
2705 			break;
2706 	}
2707 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
2708 		goto out;
2709 	for (;;) {
2710 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
2711 			*dp = 0;
2712 			break;
2713 		}
2714 		if (vendor != -1) {
2715 			*dp = 0;
2716 			break;
2717 		}
2718 		if (device == pci_get_device(dev))
2719 			break;
2720 	}
2721 	if (dp[0] == '\0')
2722 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
2723 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
2724 	    NULL)
2725 		sprintf(desc, "%s, %s", vp, dp);
2726  out:
2727 	if (vp != NULL)
2728 		free(vp, M_DEVBUF);
2729 	if (dp != NULL)
2730 		free(dp, M_DEVBUF);
2731 	return(desc);
2732 }
2733 
2734 int
2735 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
2736 {
2737 	struct pci_devinfo *dinfo;
2738 	pcicfgregs *cfg;
2739 
2740 	dinfo = device_get_ivars(child);
2741 	cfg = &dinfo->cfg;
2742 
2743 	switch (which) {
2744 	case PCI_IVAR_ETHADDR:
2745 		/*
2746 		 * The generic accessor doesn't deal with failure, so
2747 		 * we set the return value, then return an error.
2748 		 */
2749 		*((uint8_t **) result) = NULL;
2750 		return (EINVAL);
2751 	case PCI_IVAR_SUBVENDOR:
2752 		*result = cfg->subvendor;
2753 		break;
2754 	case PCI_IVAR_SUBDEVICE:
2755 		*result = cfg->subdevice;
2756 		break;
2757 	case PCI_IVAR_VENDOR:
2758 		*result = cfg->vendor;
2759 		break;
2760 	case PCI_IVAR_DEVICE:
2761 		*result = cfg->device;
2762 		break;
2763 	case PCI_IVAR_DEVID:
2764 		*result = (cfg->device << 16) | cfg->vendor;
2765 		break;
2766 	case PCI_IVAR_CLASS:
2767 		*result = cfg->baseclass;
2768 		break;
2769 	case PCI_IVAR_SUBCLASS:
2770 		*result = cfg->subclass;
2771 		break;
2772 	case PCI_IVAR_PROGIF:
2773 		*result = cfg->progif;
2774 		break;
2775 	case PCI_IVAR_REVID:
2776 		*result = cfg->revid;
2777 		break;
2778 	case PCI_IVAR_INTPIN:
2779 		*result = cfg->intpin;
2780 		break;
2781 	case PCI_IVAR_IRQ:
2782 		*result = cfg->intline;
2783 		break;
2784 	case PCI_IVAR_BUS:
2785 		*result = cfg->bus;
2786 		break;
2787 	case PCI_IVAR_SLOT:
2788 		*result = cfg->slot;
2789 		break;
2790 	case PCI_IVAR_FUNCTION:
2791 		*result = cfg->func;
2792 		break;
2793 	case PCI_IVAR_CMDREG:
2794 		*result = cfg->cmdreg;
2795 		break;
2796 	case PCI_IVAR_CACHELNSZ:
2797 		*result = cfg->cachelnsz;
2798 		break;
2799 	case PCI_IVAR_MINGNT:
2800 		*result = cfg->mingnt;
2801 		break;
2802 	case PCI_IVAR_MAXLAT:
2803 		*result = cfg->maxlat;
2804 		break;
2805 	case PCI_IVAR_LATTIMER:
2806 		*result = cfg->lattimer;
2807 		break;
2808 	default:
2809 		return (ENOENT);
2810 	}
2811 	return (0);
2812 }
2813 
2814 int
2815 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
2816 {
2817 	struct pci_devinfo *dinfo;
2818 
2819 	dinfo = device_get_ivars(child);
2820 
2821 	switch (which) {
2822 	case PCI_IVAR_INTPIN:
2823 		dinfo->cfg.intpin = value;
2824 		return (0);
2825 	case PCI_IVAR_ETHADDR:
2826 	case PCI_IVAR_SUBVENDOR:
2827 	case PCI_IVAR_SUBDEVICE:
2828 	case PCI_IVAR_VENDOR:
2829 	case PCI_IVAR_DEVICE:
2830 	case PCI_IVAR_DEVID:
2831 	case PCI_IVAR_CLASS:
2832 	case PCI_IVAR_SUBCLASS:
2833 	case PCI_IVAR_PROGIF:
2834 	case PCI_IVAR_REVID:
2835 	case PCI_IVAR_IRQ:
2836 	case PCI_IVAR_BUS:
2837 	case PCI_IVAR_SLOT:
2838 	case PCI_IVAR_FUNCTION:
2839 		return (EINVAL);	/* disallow for now */
2840 
2841 	default:
2842 		return (ENOENT);
2843 	}
2844 }
2845 
2846 
2847 #include "opt_ddb.h"
2848 #ifdef DDB
2849 #include <ddb/ddb.h>
2850 #include <sys/cons.h>
2851 
2852 /*
2853  * List resources based on pci map registers, used for within ddb
2854  */
2855 
2856 DB_SHOW_COMMAND(pciregs, db_pci_dump)
2857 {
2858 	struct pci_devinfo *dinfo;
2859 	struct devlist *devlist_head;
2860 	struct pci_conf *p;
2861 	const char *name;
2862 	int i, error, none_count;
2863 
2864 	none_count = 0;
2865 	/* get the head of the device queue */
2866 	devlist_head = &pci_devq;
2867 
2868 	/*
2869 	 * Go through the list of devices and print out devices
2870 	 */
2871 	for (error = 0, i = 0,
2872 	     dinfo = STAILQ_FIRST(devlist_head);
2873 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
2874 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
2875 
2876 		/* Populate pd_name and pd_unit */
2877 		name = NULL;
2878 		if (dinfo->cfg.dev)
2879 			name = device_get_name(dinfo->cfg.dev);
2880 
2881 		p = &dinfo->conf;
2882 		db_printf("%s%d@pci%d:%d:%d:\tclass=0x%06x card=0x%08x "
2883 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
2884 			(name && *name) ? name : "none",
2885 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
2886 			none_count++,
2887 			p->pc_sel.pc_bus, p->pc_sel.pc_dev,
2888 			p->pc_sel.pc_func, (p->pc_class << 16) |
2889 			(p->pc_subclass << 8) | p->pc_progif,
2890 			(p->pc_subdevice << 16) | p->pc_subvendor,
2891 			(p->pc_device << 16) | p->pc_vendor,
2892 			p->pc_revid, p->pc_hdr);
2893 	}
2894 }
2895 #endif /* DDB */
2896 
2897 static struct resource *
2898 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
2899     u_long start, u_long end, u_long count, u_int flags)
2900 {
2901 	struct pci_devinfo *dinfo = device_get_ivars(child);
2902 	struct resource_list *rl = &dinfo->resources;
2903 	struct resource_list_entry *rle;
2904 	struct resource *res;
2905 	pci_addr_t map, testval;
2906 	int mapsize;
2907 
2908 	/*
2909 	 * Weed out the bogons, and figure out how large the BAR/map
2910 	 * is.  Bars that read back 0 here are bogus and unimplemented.
2911 	 * Note: atapci in legacy mode are special and handled elsewhere
2912 	 * in the code.  If you have a atapci device in legacy mode and
2913 	 * it fails here, that other code is broken.
2914 	 */
2915 	res = NULL;
2916 	map = pci_read_config(child, *rid, 4);
2917 	pci_write_config(child, *rid, 0xffffffff, 4);
2918 	testval = pci_read_config(child, *rid, 4);
2919 	if (pci_maprange(testval) == 64)
2920 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
2921 	if (pci_mapbase(testval) == 0)
2922 		goto out;
2923 	if (PCI_BAR_MEM(testval)) {
2924 		if (type != SYS_RES_MEMORY) {
2925 			if (bootverbose)
2926 				device_printf(dev,
2927 				    "child %s requested type %d for rid %#x,"
2928 				    " but the BAR says it is an memio\n",
2929 				    device_get_nameunit(child), type, *rid);
2930 			goto out;
2931 		}
2932 	} else {
2933 		if (type != SYS_RES_IOPORT) {
2934 			if (bootverbose)
2935 				device_printf(dev,
2936 				    "child %s requested type %d for rid %#x,"
2937 				    " but the BAR says it is an ioport\n",
2938 				    device_get_nameunit(child), type, *rid);
2939 			goto out;
2940 		}
2941 	}
2942 	/*
2943 	 * For real BARs, we need to override the size that
2944 	 * the driver requests, because that's what the BAR
2945 	 * actually uses and we would otherwise have a
2946 	 * situation where we might allocate the excess to
2947 	 * another driver, which won't work.
2948 	 */
2949 	mapsize = pci_mapsize(testval);
2950 	count = 1UL << mapsize;
2951 	if (RF_ALIGNMENT(flags) < mapsize)
2952 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
2953 
2954 	/*
2955 	 * Allocate enough resource, and then write back the
2956 	 * appropriate bar for that resource.
2957 	 */
2958 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
2959 	    start, end, count, flags);
2960 	if (res == NULL) {
2961 		device_printf(child,
2962 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
2963 		    count, *rid, type, start, end);
2964 		goto out;
2965 	}
2966 	resource_list_add(rl, type, *rid, start, end, count);
2967 	rle = resource_list_find(rl, type, *rid);
2968 	if (rle == NULL)
2969 		panic("pci_alloc_map: unexpectedly can't find resource.");
2970 	rle->res = res;
2971 	rle->start = rman_get_start(res);
2972 	rle->end = rman_get_end(res);
2973 	rle->count = count;
2974 	if (bootverbose)
2975 		device_printf(child,
2976 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
2977 		    count, *rid, type, rman_get_start(res));
2978 	map = rman_get_start(res);
2979 out:;
2980 	pci_write_config(child, *rid, map, 4);
2981 	if (pci_maprange(testval) == 64)
2982 		pci_write_config(child, *rid + 4, map >> 32, 4);
2983 	return (res);
2984 }
2985 
2986 
2987 struct resource *
2988 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
2989 		   u_long start, u_long end, u_long count, u_int flags)
2990 {
2991 	struct pci_devinfo *dinfo = device_get_ivars(child);
2992 	struct resource_list *rl = &dinfo->resources;
2993 	struct resource_list_entry *rle;
2994 	pcicfgregs *cfg = &dinfo->cfg;
2995 
2996 	/*
2997 	 * Perform lazy resource allocation
2998 	 */
2999 	if (device_get_parent(child) == dev) {
3000 		switch (type) {
3001 		case SYS_RES_IRQ:
3002 			/*
3003 			 * Can't alloc legacy interrupt once MSI messages
3004 			 * have been allocated.
3005 			 */
3006 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3007 			    cfg->msix.msix_alloc > 0))
3008 				return (NULL);
3009 			/*
3010 			 * If the child device doesn't have an
3011 			 * interrupt routed and is deserving of an
3012 			 * interrupt, try to assign it one.
3013 			 */
3014 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3015 			    (cfg->intpin != 0))
3016 				pci_assign_interrupt(dev, child, 0);
3017 			break;
3018 		case SYS_RES_IOPORT:
3019 		case SYS_RES_MEMORY:
3020 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3021 				/*
3022 				 * Enable the I/O mode.  We should
3023 				 * also be assigning resources too
3024 				 * when none are present.  The
3025 				 * resource_list_alloc kind of sorta does
3026 				 * this...
3027 				 */
3028 				if (PCI_ENABLE_IO(dev, child, type))
3029 					return (NULL);
3030 			}
3031 			rle = resource_list_find(rl, type, *rid);
3032 			if (rle == NULL)
3033 				return (pci_alloc_map(dev, child, type, rid,
3034 				    start, end, count, flags));
3035 			break;
3036 		}
3037 		/*
3038 		 * If we've already allocated the resource, then
3039 		 * return it now.  But first we may need to activate
3040 		 * it, since we don't allocate the resource as active
3041 		 * above.  Normally this would be done down in the
3042 		 * nexus, but since we short-circuit that path we have
3043 		 * to do its job here.  Not sure if we should free the
3044 		 * resource if it fails to activate.
3045 		 */
3046 		rle = resource_list_find(rl, type, *rid);
3047 		if (rle != NULL && rle->res != NULL) {
3048 			if (bootverbose)
3049 				device_printf(child,
3050 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3051 				    rman_get_size(rle->res), *rid, type,
3052 				    rman_get_start(rle->res));
3053 			if ((flags & RF_ACTIVE) &&
3054 			    bus_generic_activate_resource(dev, child, type,
3055 			    *rid, rle->res) != 0)
3056 				return (NULL);
3057 			return (rle->res);
3058 		}
3059 	}
3060 	return (resource_list_alloc(rl, dev, child, type, rid,
3061 	    start, end, count, flags));
3062 }
3063 
3064 void
3065 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3066 {
3067 	struct pci_devinfo *dinfo;
3068 	struct resource_list *rl;
3069 	struct resource_list_entry *rle;
3070 
3071 	if (device_get_parent(child) != dev)
3072 		return;
3073 
3074 	dinfo = device_get_ivars(child);
3075 	rl = &dinfo->resources;
3076 	rle = resource_list_find(rl, type, rid);
3077 	if (rle) {
3078 		if (rle->res) {
3079 			if (rman_get_device(rle->res) != dev ||
3080 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3081 				device_printf(dev, "delete_resource: "
3082 				    "Resource still owned by child, oops. "
3083 				    "(type=%d, rid=%d, addr=%lx)\n",
3084 				    rle->type, rle->rid,
3085 				    rman_get_start(rle->res));
3086 				return;
3087 			}
3088 			bus_release_resource(dev, type, rid, rle->res);
3089 		}
3090 		resource_list_delete(rl, type, rid);
3091 	}
3092 	/*
3093 	 * Why do we turn off the PCI configuration BAR when we delete a
3094 	 * resource? -- imp
3095 	 */
3096 	pci_write_config(child, rid, 0, 4);
3097 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3098 }
3099 
3100 struct resource_list *
3101 pci_get_resource_list (device_t dev, device_t child)
3102 {
3103 	struct pci_devinfo *dinfo = device_get_ivars(child);
3104 
3105 	return (&dinfo->resources);
3106 }
3107 
3108 uint32_t
3109 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3110 {
3111 	struct pci_devinfo *dinfo = device_get_ivars(child);
3112 	pcicfgregs *cfg = &dinfo->cfg;
3113 
3114 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3115 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3116 }
3117 
3118 void
3119 pci_write_config_method(device_t dev, device_t child, int reg,
3120     uint32_t val, int width)
3121 {
3122 	struct pci_devinfo *dinfo = device_get_ivars(child);
3123 	pcicfgregs *cfg = &dinfo->cfg;
3124 
3125 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3126 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3127 }
3128 
3129 int
3130 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3131     size_t buflen)
3132 {
3133 
3134 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3135 	    pci_get_function(child));
3136 	return (0);
3137 }
3138 
3139 int
3140 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3141     size_t buflen)
3142 {
3143 	struct pci_devinfo *dinfo;
3144 	pcicfgregs *cfg;
3145 
3146 	dinfo = device_get_ivars(child);
3147 	cfg = &dinfo->cfg;
3148 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3149 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3150 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3151 	    cfg->progif);
3152 	return (0);
3153 }
3154 
3155 int
3156 pci_assign_interrupt_method(device_t dev, device_t child)
3157 {
3158 	struct pci_devinfo *dinfo = device_get_ivars(child);
3159 	pcicfgregs *cfg = &dinfo->cfg;
3160 
3161 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3162 	    cfg->intpin));
3163 }
3164 
3165 static int
3166 pci_modevent(module_t mod, int what, void *arg)
3167 {
3168 	static struct cdev *pci_cdev;
3169 
3170 	switch (what) {
3171 	case MOD_LOAD:
3172 		STAILQ_INIT(&pci_devq);
3173 		pci_generation = 0;
3174 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3175 		    "pci");
3176 		pci_load_vendor_data();
3177 		break;
3178 
3179 	case MOD_UNLOAD:
3180 		destroy_dev(pci_cdev);
3181 		break;
3182 	}
3183 
3184 	return (0);
3185 }
3186 
3187 void
3188 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3189 {
3190 	int i;
3191 
3192 	/*
3193 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3194 	 * which we know need special treatment.  Type 2 devices are
3195 	 * cardbus bridges which also require special treatment.
3196 	 * Other types are unknown, and we err on the side of safety
3197 	 * by ignoring them.
3198 	 */
3199 	if (dinfo->cfg.hdrtype != 0)
3200 		return;
3201 
3202 	/*
3203 	 * Restore the device to full power mode.  We must do this
3204 	 * before we restore the registers because moving from D3 to
3205 	 * D0 will cause the chip's BARs and some other registers to
3206 	 * be reset to some unknown power on reset values.  Cut down
3207 	 * the noise on boot by doing nothing if we are already in
3208 	 * state D0.
3209 	 */
3210 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3211 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3212 	}
3213 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3214 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3215 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3216 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3217 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3218 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3219 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3220 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3221 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3222 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3223 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3224 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3225 
3226 	/*
3227 	 * Restore MSI configuration if it is present.  If MSI is enabled,
3228 	 * then restore the data and addr registers.
3229 	 */
3230 	if (dinfo->cfg.msi.msi_location != 0)
3231 		pci_resume_msi(dev);
3232 }
3233 
3234 void
3235 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3236 {
3237 	int i;
3238 	uint32_t cls;
3239 	int ps;
3240 
3241 	/*
3242 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3243 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3244 	 * which also require special treatment.  Other types are unknown, and
3245 	 * we err on the side of safety by ignoring them.  Powering down
3246 	 * bridges should not be undertaken lightly.
3247 	 */
3248 	if (dinfo->cfg.hdrtype != 0)
3249 		return;
3250 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3251 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3252 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3253 
3254 	/*
3255 	 * Some drivers apparently write to these registers w/o updating our
3256 	 * cached copy.  No harm happens if we update the copy, so do so here
3257 	 * so we can restore them.  The COMMAND register is modified by the
3258 	 * bus w/o updating the cache.  This should represent the normally
3259 	 * writable portion of the 'defined' part of type 0 headers.  In
3260 	 * theory we also need to save/restore the PCI capability structures
3261 	 * we know about, but apart from power we don't know any that are
3262 	 * writable.
3263 	 */
3264 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3265 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3266 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3267 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3268 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3269 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3270 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3271 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3272 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3273 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3274 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3275 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3276 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3277 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3278 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3279 
3280 	/*
3281 	 * don't set the state for display devices, base peripherals and
3282 	 * memory devices since bad things happen when they are powered down.
3283 	 * We should (a) have drivers that can easily detach and (b) use
3284 	 * generic drivers for these devices so that some device actually
3285 	 * attaches.  We need to make sure that when we implement (a) we don't
3286 	 * power the device down on a reattach.
3287 	 */
3288 	cls = pci_get_class(dev);
3289 	if (!setstate)
3290 		return;
3291 	switch (pci_do_power_nodriver)
3292 	{
3293 		case 0:		/* NO powerdown at all */
3294 			return;
3295 		case 1:		/* Conservative about what to power down */
3296 			if (cls == PCIC_STORAGE)
3297 				return;
3298 			/*FALLTHROUGH*/
3299 		case 2:		/* Agressive about what to power down */
3300 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3301 			    cls == PCIC_BASEPERIPH)
3302 				return;
3303 			/*FALLTHROUGH*/
3304 		case 3:		/* Power down everything */
3305 			break;
3306 	}
3307 	/*
3308 	 * PCI spec says we can only go into D3 state from D0 state.
3309 	 * Transition from D[12] into D0 before going to D3 state.
3310 	 */
3311 	ps = pci_get_powerstate(dev);
3312 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3313 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3314 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3315 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3316 }
3317