xref: /freebsd/sys/dev/pci/pci.c (revision c2bce4a2fcf3083607e00a1734b47c249751c8a8)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #define	PCIR_IS_BIOS(cfg, reg)						\
73 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
74 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
75 
76 
77 static pci_addr_t	pci_mapbase(uint64_t mapreg);
78 static const char	*pci_maptype(uint64_t mapreg);
79 static int		pci_mapsize(uint64_t testval);
80 static int		pci_maprange(uint64_t mapreg);
81 static pci_addr_t	pci_rombase(uint64_t mapreg);
82 static int		pci_romsize(uint64_t testval);
83 static void		pci_fixancient(pcicfgregs *cfg);
84 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85 
86 static int		pci_porten(device_t dev);
87 static int		pci_memen(device_t dev);
88 static void		pci_assign_interrupt(device_t bus, device_t dev,
89 			    int force_route);
90 static int		pci_add_map(device_t bus, device_t dev, int reg,
91 			    struct resource_list *rl, int force, int prefetch);
92 static int		pci_probe(device_t dev);
93 static int		pci_attach(device_t dev);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_enable_msix(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix(device_t dev, u_int index);
115 static void		pci_unmask_msix(device_t dev, u_int index);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static device_method_t pci_methods[] = {
123 	/* Device interface */
124 	DEVMETHOD(device_probe,		pci_probe),
125 	DEVMETHOD(device_attach,	pci_attach),
126 	DEVMETHOD(device_detach,	bus_generic_detach),
127 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128 	DEVMETHOD(device_suspend,	pci_suspend),
129 	DEVMETHOD(device_resume,	pci_resume),
130 
131 	/* Bus interface */
132 	DEVMETHOD(bus_print_child,	pci_print_child),
133 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136 	DEVMETHOD(bus_driver_added,	pci_driver_added),
137 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139 
140 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
146 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
147 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
148 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
149 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
150 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
151 
152 	/* PCI interface */
153 	DEVMETHOD(pci_read_config,	pci_read_config_method),
154 	DEVMETHOD(pci_write_config,	pci_write_config_method),
155 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
156 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
157 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
158 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
159 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
160 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
161 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
162 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
163 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
164 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
165 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
166 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
167 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
168 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
169 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
170 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
171 
172 	{ 0, 0 }
173 };
174 
175 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
176 
177 static devclass_t pci_devclass;
178 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
179 MODULE_VERSION(pci, 1);
180 
181 static char	*pci_vendordata;
182 static size_t	pci_vendordata_size;
183 
184 
185 struct pci_quirk {
186 	uint32_t devid;	/* Vendor/device of the card */
187 	int	type;
188 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
189 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
190 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
191 	int	arg1;
192 	int	arg2;
193 };
194 
195 struct pci_quirk pci_quirks[] = {
196 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
197 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
198 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
200 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
201 
202 	/*
203 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
204 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
205 	 */
206 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 
209 	/*
210 	 * MSI doesn't work on earlier Intel chipsets including
211 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
212 	 */
213 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
214 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 
221 	/*
222 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
223 	 * bridge.
224 	 */
225 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
226 
227 	/*
228 	 * Some virtualization environments emulate an older chipset
229 	 * but support MSI just fine.  QEMU uses the Intel 82440.
230 	 */
231 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
232 
233 	{ 0 }
234 };
235 
236 /* map register information */
237 #define	PCI_MAPMEM	0x01	/* memory map */
238 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
239 #define	PCI_MAPPORT	0x04	/* port map */
240 
241 struct devlist pci_devq;
242 uint32_t pci_generation;
243 uint32_t pci_numdevs = 0;
244 static int pcie_chipset, pcix_chipset;
245 
246 /* sysctl vars */
247 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
248 
249 static int pci_enable_io_modes = 1;
250 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
251 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
252     &pci_enable_io_modes, 1,
253     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
254 enable these bits correctly.  We'd like to do this all the time, but there\n\
255 are some peripherals that this causes problems with.");
256 
257 static int pci_do_power_nodriver = 0;
258 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
259 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
260     &pci_do_power_nodriver, 0,
261   "Place a function into D3 state when no driver attaches to it.  0 means\n\
262 disable.  1 means conservatively place devices into D3 state.  2 means\n\
263 agressively place devices into D3 state.  3 means put absolutely everything\n\
264 in D3 state.");
265 
266 int pci_do_power_resume = 1;
267 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
268 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
269     &pci_do_power_resume, 1,
270   "Transition from D3 -> D0 on resume.");
271 
272 int pci_do_power_suspend = 1;
273 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
274 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
275     &pci_do_power_suspend, 1,
276   "Transition from D0 -> D3 on suspend.");
277 
278 static int pci_do_msi = 1;
279 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
280 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
281     "Enable support for MSI interrupts");
282 
283 static int pci_do_msix = 1;
284 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
285 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
286     "Enable support for MSI-X interrupts");
287 
288 static int pci_honor_msi_blacklist = 1;
289 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
290 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
291     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
292 
293 #if defined(__i386__) || defined(__amd64__)
294 static int pci_usb_takeover = 1;
295 #else
296 static int pci_usb_takeover = 0;
297 #endif
298 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
299 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
300     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
301 Disable this if you depend on BIOS emulation of USB devices, that is\n\
302 you use USB devices (like keyboard or mouse) but do not load USB drivers");
303 
304 /* Find a device_t by bus/slot/function in domain 0 */
305 
306 device_t
307 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
308 {
309 
310 	return (pci_find_dbsf(0, bus, slot, func));
311 }
312 
313 /* Find a device_t by domain/bus/slot/function */
314 
315 device_t
316 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
317 {
318 	struct pci_devinfo *dinfo;
319 
320 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
321 		if ((dinfo->cfg.domain == domain) &&
322 		    (dinfo->cfg.bus == bus) &&
323 		    (dinfo->cfg.slot == slot) &&
324 		    (dinfo->cfg.func == func)) {
325 			return (dinfo->cfg.dev);
326 		}
327 	}
328 
329 	return (NULL);
330 }
331 
332 /* Find a device_t by vendor/device ID */
333 
334 device_t
335 pci_find_device(uint16_t vendor, uint16_t device)
336 {
337 	struct pci_devinfo *dinfo;
338 
339 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
340 		if ((dinfo->cfg.vendor == vendor) &&
341 		    (dinfo->cfg.device == device)) {
342 			return (dinfo->cfg.dev);
343 		}
344 	}
345 
346 	return (NULL);
347 }
348 
349 static int
350 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
351 {
352 	va_list ap;
353 	int retval;
354 
355 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
356 	    cfg->func);
357 	va_start(ap, fmt);
358 	retval += vprintf(fmt, ap);
359 	va_end(ap);
360 	return (retval);
361 }
362 
363 /* return base address of memory or port map */
364 
365 static pci_addr_t
366 pci_mapbase(uint64_t mapreg)
367 {
368 
369 	if (PCI_BAR_MEM(mapreg))
370 		return (mapreg & PCIM_BAR_MEM_BASE);
371 	else
372 		return (mapreg & PCIM_BAR_IO_BASE);
373 }
374 
375 /* return map type of memory or port map */
376 
377 static const char *
378 pci_maptype(uint64_t mapreg)
379 {
380 
381 	if (PCI_BAR_IO(mapreg))
382 		return ("I/O Port");
383 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
384 		return ("Prefetchable Memory");
385 	return ("Memory");
386 }
387 
388 /* return log2 of map size decoded for memory or port map */
389 
390 static int
391 pci_mapsize(uint64_t testval)
392 {
393 	int ln2size;
394 
395 	testval = pci_mapbase(testval);
396 	ln2size = 0;
397 	if (testval != 0) {
398 		while ((testval & 1) == 0)
399 		{
400 			ln2size++;
401 			testval >>= 1;
402 		}
403 	}
404 	return (ln2size);
405 }
406 
407 /* return base address of device ROM */
408 
409 static pci_addr_t
410 pci_rombase(uint64_t mapreg)
411 {
412 
413 	return (mapreg & PCIM_BIOS_ADDR_MASK);
414 }
415 
416 /* return log2 of map size decided for device ROM */
417 
418 static int
419 pci_romsize(uint64_t testval)
420 {
421 	int ln2size;
422 
423 	testval = pci_rombase(testval);
424 	ln2size = 0;
425 	if (testval != 0) {
426 		while ((testval & 1) == 0)
427 		{
428 			ln2size++;
429 			testval >>= 1;
430 		}
431 	}
432 	return (ln2size);
433 }
434 
435 /* return log2 of address range supported by map register */
436 
437 static int
438 pci_maprange(uint64_t mapreg)
439 {
440 	int ln2range = 0;
441 
442 	if (PCI_BAR_IO(mapreg))
443 		ln2range = 32;
444 	else
445 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
446 		case PCIM_BAR_MEM_32:
447 			ln2range = 32;
448 			break;
449 		case PCIM_BAR_MEM_1MB:
450 			ln2range = 20;
451 			break;
452 		case PCIM_BAR_MEM_64:
453 			ln2range = 64;
454 			break;
455 		}
456 	return (ln2range);
457 }
458 
459 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
460 
461 static void
462 pci_fixancient(pcicfgregs *cfg)
463 {
464 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
465 		return;
466 
467 	/* PCI to PCI bridges use header type 1 */
468 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
469 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
470 }
471 
472 /* extract header type specific config data */
473 
474 static void
475 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
476 {
477 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
478 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
479 	case PCIM_HDRTYPE_NORMAL:
480 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
481 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
482 		cfg->nummaps	    = PCI_MAXMAPS_0;
483 		break;
484 	case PCIM_HDRTYPE_BRIDGE:
485 		cfg->nummaps	    = PCI_MAXMAPS_1;
486 		break;
487 	case PCIM_HDRTYPE_CARDBUS:
488 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
489 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
490 		cfg->nummaps	    = PCI_MAXMAPS_2;
491 		break;
492 	}
493 #undef REG
494 }
495 
496 /* read configuration header into pcicfgregs structure */
497 struct pci_devinfo *
498 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
499 {
500 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
501 	pcicfgregs *cfg = NULL;
502 	struct pci_devinfo *devlist_entry;
503 	struct devlist *devlist_head;
504 
505 	devlist_head = &pci_devq;
506 
507 	devlist_entry = NULL;
508 
509 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
510 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
511 		if (devlist_entry == NULL)
512 			return (NULL);
513 
514 		cfg = &devlist_entry->cfg;
515 
516 		cfg->domain		= d;
517 		cfg->bus		= b;
518 		cfg->slot		= s;
519 		cfg->func		= f;
520 		cfg->vendor		= REG(PCIR_VENDOR, 2);
521 		cfg->device		= REG(PCIR_DEVICE, 2);
522 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
523 		cfg->statreg		= REG(PCIR_STATUS, 2);
524 		cfg->baseclass		= REG(PCIR_CLASS, 1);
525 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
526 		cfg->progif		= REG(PCIR_PROGIF, 1);
527 		cfg->revid		= REG(PCIR_REVID, 1);
528 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
529 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
530 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
531 		cfg->intpin		= REG(PCIR_INTPIN, 1);
532 		cfg->intline		= REG(PCIR_INTLINE, 1);
533 
534 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
535 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
536 
537 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
538 		cfg->hdrtype		&= ~PCIM_MFDEV;
539 		STAILQ_INIT(&cfg->maps);
540 
541 		pci_fixancient(cfg);
542 		pci_hdrtypedata(pcib, b, s, f, cfg);
543 
544 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
545 			pci_read_cap(pcib, cfg);
546 
547 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
548 
549 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
550 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
551 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
552 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
553 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
554 
555 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
556 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
557 		devlist_entry->conf.pc_vendor = cfg->vendor;
558 		devlist_entry->conf.pc_device = cfg->device;
559 
560 		devlist_entry->conf.pc_class = cfg->baseclass;
561 		devlist_entry->conf.pc_subclass = cfg->subclass;
562 		devlist_entry->conf.pc_progif = cfg->progif;
563 		devlist_entry->conf.pc_revid = cfg->revid;
564 
565 		pci_numdevs++;
566 		pci_generation++;
567 	}
568 	return (devlist_entry);
569 #undef REG
570 }
571 
572 static void
573 pci_read_cap(device_t pcib, pcicfgregs *cfg)
574 {
575 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
576 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
577 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
578 	uint64_t addr;
579 #endif
580 	uint32_t val;
581 	int	ptr, nextptr, ptrptr;
582 
583 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
584 	case PCIM_HDRTYPE_NORMAL:
585 	case PCIM_HDRTYPE_BRIDGE:
586 		ptrptr = PCIR_CAP_PTR;
587 		break;
588 	case PCIM_HDRTYPE_CARDBUS:
589 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
590 		break;
591 	default:
592 		return;		/* no extended capabilities support */
593 	}
594 	nextptr = REG(ptrptr, 1);	/* sanity check? */
595 
596 	/*
597 	 * Read capability entries.
598 	 */
599 	while (nextptr != 0) {
600 		/* Sanity check */
601 		if (nextptr > 255) {
602 			printf("illegal PCI extended capability offset %d\n",
603 			    nextptr);
604 			return;
605 		}
606 		/* Find the next entry */
607 		ptr = nextptr;
608 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
609 
610 		/* Process this entry */
611 		switch (REG(ptr + PCICAP_ID, 1)) {
612 		case PCIY_PMG:		/* PCI power management */
613 			if (cfg->pp.pp_cap == 0) {
614 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
615 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
616 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
617 				if ((nextptr - ptr) > PCIR_POWER_DATA)
618 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
619 			}
620 			break;
621 		case PCIY_HT:		/* HyperTransport */
622 			/* Determine HT-specific capability type. */
623 			val = REG(ptr + PCIR_HT_COMMAND, 2);
624 
625 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
626 				cfg->ht.ht_slave = ptr;
627 
628 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
629 			switch (val & PCIM_HTCMD_CAP_MASK) {
630 			case PCIM_HTCAP_MSI_MAPPING:
631 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
632 					/* Sanity check the mapping window. */
633 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
634 					    4);
635 					addr <<= 32;
636 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
637 					    4);
638 					if (addr != MSI_INTEL_ADDR_BASE)
639 						device_printf(pcib,
640 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
641 						    cfg->domain, cfg->bus,
642 						    cfg->slot, cfg->func,
643 						    (long long)addr);
644 				} else
645 					addr = MSI_INTEL_ADDR_BASE;
646 
647 				cfg->ht.ht_msimap = ptr;
648 				cfg->ht.ht_msictrl = val;
649 				cfg->ht.ht_msiaddr = addr;
650 				break;
651 			}
652 #endif
653 			break;
654 		case PCIY_MSI:		/* PCI MSI */
655 			cfg->msi.msi_location = ptr;
656 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
657 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
658 						     PCIM_MSICTRL_MMC_MASK)>>1);
659 			break;
660 		case PCIY_MSIX:		/* PCI MSI-X */
661 			cfg->msix.msix_location = ptr;
662 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
663 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
664 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
665 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
666 			cfg->msix.msix_table_bar = PCIR_BAR(val &
667 			    PCIM_MSIX_BIR_MASK);
668 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
669 			val = REG(ptr + PCIR_MSIX_PBA, 4);
670 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
671 			    PCIM_MSIX_BIR_MASK);
672 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
673 			break;
674 		case PCIY_VPD:		/* PCI Vital Product Data */
675 			cfg->vpd.vpd_reg = ptr;
676 			break;
677 		case PCIY_SUBVENDOR:
678 			/* Should always be true. */
679 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
680 			    PCIM_HDRTYPE_BRIDGE) {
681 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
682 				cfg->subvendor = val & 0xffff;
683 				cfg->subdevice = val >> 16;
684 			}
685 			break;
686 		case PCIY_PCIX:		/* PCI-X */
687 			/*
688 			 * Assume we have a PCI-X chipset if we have
689 			 * at least one PCI-PCI bridge with a PCI-X
690 			 * capability.  Note that some systems with
691 			 * PCI-express or HT chipsets might match on
692 			 * this check as well.
693 			 */
694 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
695 			    PCIM_HDRTYPE_BRIDGE)
696 				pcix_chipset = 1;
697 			break;
698 		case PCIY_EXPRESS:	/* PCI-express */
699 			/*
700 			 * Assume we have a PCI-express chipset if we have
701 			 * at least one PCI-express device.
702 			 */
703 			pcie_chipset = 1;
704 			break;
705 		default:
706 			break;
707 		}
708 	}
709 
710 
711 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
712 	/*
713 	 * Enable the MSI mapping window for all HyperTransport
714 	 * slaves.  PCI-PCI bridges have their windows enabled via
715 	 * PCIB_MAP_MSI().
716 	 */
717 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
718 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
719 		device_printf(pcib,
720 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
721 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
722 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
723 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
724 		     2);
725 	}
726 #endif
727 /* REG and WREG use carry through to next functions */
728 }
729 
730 /*
731  * PCI Vital Product Data
732  */
733 
734 #define	PCI_VPD_TIMEOUT		1000000
735 
736 static int
737 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
738 {
739 	int count = PCI_VPD_TIMEOUT;
740 
741 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
742 
743 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
744 
745 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
746 		if (--count < 0)
747 			return (ENXIO);
748 		DELAY(1);	/* limit looping */
749 	}
750 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
751 
752 	return (0);
753 }
754 
755 #if 0
756 static int
757 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
758 {
759 	int count = PCI_VPD_TIMEOUT;
760 
761 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
762 
763 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
764 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
765 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
766 		if (--count < 0)
767 			return (ENXIO);
768 		DELAY(1);	/* limit looping */
769 	}
770 
771 	return (0);
772 }
773 #endif
774 
775 #undef PCI_VPD_TIMEOUT
776 
777 struct vpd_readstate {
778 	device_t	pcib;
779 	pcicfgregs	*cfg;
780 	uint32_t	val;
781 	int		bytesinval;
782 	int		off;
783 	uint8_t		cksum;
784 };
785 
786 static int
787 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
788 {
789 	uint32_t reg;
790 	uint8_t byte;
791 
792 	if (vrs->bytesinval == 0) {
793 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
794 			return (ENXIO);
795 		vrs->val = le32toh(reg);
796 		vrs->off += 4;
797 		byte = vrs->val & 0xff;
798 		vrs->bytesinval = 3;
799 	} else {
800 		vrs->val = vrs->val >> 8;
801 		byte = vrs->val & 0xff;
802 		vrs->bytesinval--;
803 	}
804 
805 	vrs->cksum += byte;
806 	*data = byte;
807 	return (0);
808 }
809 
810 static void
811 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
812 {
813 	struct vpd_readstate vrs;
814 	int state;
815 	int name;
816 	int remain;
817 	int i;
818 	int alloc, off;		/* alloc/off for RO/W arrays */
819 	int cksumvalid;
820 	int dflen;
821 	uint8_t byte;
822 	uint8_t byte2;
823 
824 	/* init vpd reader */
825 	vrs.bytesinval = 0;
826 	vrs.off = 0;
827 	vrs.pcib = pcib;
828 	vrs.cfg = cfg;
829 	vrs.cksum = 0;
830 
831 	state = 0;
832 	name = remain = i = 0;	/* shut up stupid gcc */
833 	alloc = off = 0;	/* shut up stupid gcc */
834 	dflen = 0;		/* shut up stupid gcc */
835 	cksumvalid = -1;
836 	while (state >= 0) {
837 		if (vpd_nextbyte(&vrs, &byte)) {
838 			state = -2;
839 			break;
840 		}
841 #if 0
842 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
843 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
844 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
845 #endif
846 		switch (state) {
847 		case 0:		/* item name */
848 			if (byte & 0x80) {
849 				if (vpd_nextbyte(&vrs, &byte2)) {
850 					state = -2;
851 					break;
852 				}
853 				remain = byte2;
854 				if (vpd_nextbyte(&vrs, &byte2)) {
855 					state = -2;
856 					break;
857 				}
858 				remain |= byte2 << 8;
859 				if (remain > (0x7f*4 - vrs.off)) {
860 					state = -1;
861 					printf(
862 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
863 					    cfg->domain, cfg->bus, cfg->slot,
864 					    cfg->func, remain);
865 				}
866 				name = byte & 0x7f;
867 			} else {
868 				remain = byte & 0x7;
869 				name = (byte >> 3) & 0xf;
870 			}
871 			switch (name) {
872 			case 0x2:	/* String */
873 				cfg->vpd.vpd_ident = malloc(remain + 1,
874 				    M_DEVBUF, M_WAITOK);
875 				i = 0;
876 				state = 1;
877 				break;
878 			case 0xf:	/* End */
879 				state = -1;
880 				break;
881 			case 0x10:	/* VPD-R */
882 				alloc = 8;
883 				off = 0;
884 				cfg->vpd.vpd_ros = malloc(alloc *
885 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
886 				    M_WAITOK | M_ZERO);
887 				state = 2;
888 				break;
889 			case 0x11:	/* VPD-W */
890 				alloc = 8;
891 				off = 0;
892 				cfg->vpd.vpd_w = malloc(alloc *
893 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
894 				    M_WAITOK | M_ZERO);
895 				state = 5;
896 				break;
897 			default:	/* Invalid data, abort */
898 				state = -1;
899 				break;
900 			}
901 			break;
902 
903 		case 1:	/* Identifier String */
904 			cfg->vpd.vpd_ident[i++] = byte;
905 			remain--;
906 			if (remain == 0)  {
907 				cfg->vpd.vpd_ident[i] = '\0';
908 				state = 0;
909 			}
910 			break;
911 
912 		case 2:	/* VPD-R Keyword Header */
913 			if (off == alloc) {
914 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
915 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
916 				    M_DEVBUF, M_WAITOK | M_ZERO);
917 			}
918 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
919 			if (vpd_nextbyte(&vrs, &byte2)) {
920 				state = -2;
921 				break;
922 			}
923 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
924 			if (vpd_nextbyte(&vrs, &byte2)) {
925 				state = -2;
926 				break;
927 			}
928 			dflen = byte2;
929 			if (dflen == 0 &&
930 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
931 			    2) == 0) {
932 				/*
933 				 * if this happens, we can't trust the rest
934 				 * of the VPD.
935 				 */
936 				printf(
937 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
938 				    cfg->domain, cfg->bus, cfg->slot,
939 				    cfg->func, dflen);
940 				cksumvalid = 0;
941 				state = -1;
942 				break;
943 			} else if (dflen == 0) {
944 				cfg->vpd.vpd_ros[off].value = malloc(1 *
945 				    sizeof(*cfg->vpd.vpd_ros[off].value),
946 				    M_DEVBUF, M_WAITOK);
947 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
948 			} else
949 				cfg->vpd.vpd_ros[off].value = malloc(
950 				    (dflen + 1) *
951 				    sizeof(*cfg->vpd.vpd_ros[off].value),
952 				    M_DEVBUF, M_WAITOK);
953 			remain -= 3;
954 			i = 0;
955 			/* keep in sync w/ state 3's transistions */
956 			if (dflen == 0 && remain == 0)
957 				state = 0;
958 			else if (dflen == 0)
959 				state = 2;
960 			else
961 				state = 3;
962 			break;
963 
964 		case 3:	/* VPD-R Keyword Value */
965 			cfg->vpd.vpd_ros[off].value[i++] = byte;
966 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
967 			    "RV", 2) == 0 && cksumvalid == -1) {
968 				if (vrs.cksum == 0)
969 					cksumvalid = 1;
970 				else {
971 					if (bootverbose)
972 						printf(
973 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
974 						    cfg->domain, cfg->bus,
975 						    cfg->slot, cfg->func,
976 						    vrs.cksum);
977 					cksumvalid = 0;
978 					state = -1;
979 					break;
980 				}
981 			}
982 			dflen--;
983 			remain--;
984 			/* keep in sync w/ state 2's transistions */
985 			if (dflen == 0)
986 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
987 			if (dflen == 0 && remain == 0) {
988 				cfg->vpd.vpd_rocnt = off;
989 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
990 				    off * sizeof(*cfg->vpd.vpd_ros),
991 				    M_DEVBUF, M_WAITOK | M_ZERO);
992 				state = 0;
993 			} else if (dflen == 0)
994 				state = 2;
995 			break;
996 
997 		case 4:
998 			remain--;
999 			if (remain == 0)
1000 				state = 0;
1001 			break;
1002 
1003 		case 5:	/* VPD-W Keyword Header */
1004 			if (off == alloc) {
1005 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1006 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1007 				    M_DEVBUF, M_WAITOK | M_ZERO);
1008 			}
1009 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1010 			if (vpd_nextbyte(&vrs, &byte2)) {
1011 				state = -2;
1012 				break;
1013 			}
1014 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1015 			if (vpd_nextbyte(&vrs, &byte2)) {
1016 				state = -2;
1017 				break;
1018 			}
1019 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1020 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1021 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1022 			    sizeof(*cfg->vpd.vpd_w[off].value),
1023 			    M_DEVBUF, M_WAITOK);
1024 			remain -= 3;
1025 			i = 0;
1026 			/* keep in sync w/ state 6's transistions */
1027 			if (dflen == 0 && remain == 0)
1028 				state = 0;
1029 			else if (dflen == 0)
1030 				state = 5;
1031 			else
1032 				state = 6;
1033 			break;
1034 
1035 		case 6:	/* VPD-W Keyword Value */
1036 			cfg->vpd.vpd_w[off].value[i++] = byte;
1037 			dflen--;
1038 			remain--;
1039 			/* keep in sync w/ state 5's transistions */
1040 			if (dflen == 0)
1041 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1042 			if (dflen == 0 && remain == 0) {
1043 				cfg->vpd.vpd_wcnt = off;
1044 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1045 				    off * sizeof(*cfg->vpd.vpd_w),
1046 				    M_DEVBUF, M_WAITOK | M_ZERO);
1047 				state = 0;
1048 			} else if (dflen == 0)
1049 				state = 5;
1050 			break;
1051 
1052 		default:
1053 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1054 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1055 			    state);
1056 			state = -1;
1057 			break;
1058 		}
1059 	}
1060 
1061 	if (cksumvalid == 0 || state < -1) {
1062 		/* read-only data bad, clean up */
1063 		if (cfg->vpd.vpd_ros != NULL) {
1064 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1065 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1066 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1067 			cfg->vpd.vpd_ros = NULL;
1068 		}
1069 	}
1070 	if (state < -1) {
1071 		/* I/O error, clean up */
1072 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1073 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1074 		if (cfg->vpd.vpd_ident != NULL) {
1075 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1076 			cfg->vpd.vpd_ident = NULL;
1077 		}
1078 		if (cfg->vpd.vpd_w != NULL) {
1079 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1080 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1081 			free(cfg->vpd.vpd_w, M_DEVBUF);
1082 			cfg->vpd.vpd_w = NULL;
1083 		}
1084 	}
1085 	cfg->vpd.vpd_cached = 1;
1086 #undef REG
1087 #undef WREG
1088 }
1089 
1090 int
1091 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1092 {
1093 	struct pci_devinfo *dinfo = device_get_ivars(child);
1094 	pcicfgregs *cfg = &dinfo->cfg;
1095 
1096 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1097 		pci_read_vpd(device_get_parent(dev), cfg);
1098 
1099 	*identptr = cfg->vpd.vpd_ident;
1100 
1101 	if (*identptr == NULL)
1102 		return (ENXIO);
1103 
1104 	return (0);
1105 }
1106 
1107 int
1108 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1109 	const char **vptr)
1110 {
1111 	struct pci_devinfo *dinfo = device_get_ivars(child);
1112 	pcicfgregs *cfg = &dinfo->cfg;
1113 	int i;
1114 
1115 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1116 		pci_read_vpd(device_get_parent(dev), cfg);
1117 
1118 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1119 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1120 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1121 			*vptr = cfg->vpd.vpd_ros[i].value;
1122 		}
1123 
1124 	if (i != cfg->vpd.vpd_rocnt)
1125 		return (0);
1126 
1127 	*vptr = NULL;
1128 	return (ENXIO);
1129 }
1130 
1131 /*
1132  * Find the requested extended capability and return the offset in
1133  * configuration space via the pointer provided. The function returns
1134  * 0 on success and error code otherwise.
1135  */
1136 int
1137 pci_find_extcap_method(device_t dev, device_t child, int capability,
1138     int *capreg)
1139 {
1140 	struct pci_devinfo *dinfo = device_get_ivars(child);
1141 	pcicfgregs *cfg = &dinfo->cfg;
1142 	u_int32_t status;
1143 	u_int8_t ptr;
1144 
1145 	/*
1146 	 * Check the CAP_LIST bit of the PCI status register first.
1147 	 */
1148 	status = pci_read_config(child, PCIR_STATUS, 2);
1149 	if (!(status & PCIM_STATUS_CAPPRESENT))
1150 		return (ENXIO);
1151 
1152 	/*
1153 	 * Determine the start pointer of the capabilities list.
1154 	 */
1155 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1156 	case PCIM_HDRTYPE_NORMAL:
1157 	case PCIM_HDRTYPE_BRIDGE:
1158 		ptr = PCIR_CAP_PTR;
1159 		break;
1160 	case PCIM_HDRTYPE_CARDBUS:
1161 		ptr = PCIR_CAP_PTR_2;
1162 		break;
1163 	default:
1164 		/* XXX: panic? */
1165 		return (ENXIO);		/* no extended capabilities support */
1166 	}
1167 	ptr = pci_read_config(child, ptr, 1);
1168 
1169 	/*
1170 	 * Traverse the capabilities list.
1171 	 */
1172 	while (ptr != 0) {
1173 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1174 			if (capreg != NULL)
1175 				*capreg = ptr;
1176 			return (0);
1177 		}
1178 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1179 	}
1180 
1181 	return (ENOENT);
1182 }
1183 
1184 /*
1185  * Support for MSI-X message interrupts.
1186  */
1187 void
1188 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1189 {
1190 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1191 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1192 	uint32_t offset;
1193 
1194 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1195 	offset = msix->msix_table_offset + index * 16;
1196 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1197 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1198 	bus_write_4(msix->msix_table_res, offset + 8, data);
1199 
1200 	/* Enable MSI -> HT mapping. */
1201 	pci_ht_map_msi(dev, address);
1202 }
1203 
1204 void
1205 pci_mask_msix(device_t dev, u_int index)
1206 {
1207 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1208 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1209 	uint32_t offset, val;
1210 
1211 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1212 	offset = msix->msix_table_offset + index * 16 + 12;
1213 	val = bus_read_4(msix->msix_table_res, offset);
1214 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1215 		val |= PCIM_MSIX_VCTRL_MASK;
1216 		bus_write_4(msix->msix_table_res, offset, val);
1217 	}
1218 }
1219 
1220 void
1221 pci_unmask_msix(device_t dev, u_int index)
1222 {
1223 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1224 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1225 	uint32_t offset, val;
1226 
1227 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1228 	offset = msix->msix_table_offset + index * 16 + 12;
1229 	val = bus_read_4(msix->msix_table_res, offset);
1230 	if (val & PCIM_MSIX_VCTRL_MASK) {
1231 		val &= ~PCIM_MSIX_VCTRL_MASK;
1232 		bus_write_4(msix->msix_table_res, offset, val);
1233 	}
1234 }
1235 
1236 int
1237 pci_pending_msix(device_t dev, u_int index)
1238 {
1239 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1240 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1241 	uint32_t offset, bit;
1242 
1243 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1244 	offset = msix->msix_pba_offset + (index / 32) * 4;
1245 	bit = 1 << index % 32;
1246 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1247 }
1248 
1249 /*
1250  * Restore MSI-X registers and table during resume.  If MSI-X is
1251  * enabled then walk the virtual table to restore the actual MSI-X
1252  * table.
1253  */
1254 static void
1255 pci_resume_msix(device_t dev)
1256 {
1257 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1258 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1259 	struct msix_table_entry *mte;
1260 	struct msix_vector *mv;
1261 	int i;
1262 
1263 	if (msix->msix_alloc > 0) {
1264 		/* First, mask all vectors. */
1265 		for (i = 0; i < msix->msix_msgnum; i++)
1266 			pci_mask_msix(dev, i);
1267 
1268 		/* Second, program any messages with at least one handler. */
1269 		for (i = 0; i < msix->msix_table_len; i++) {
1270 			mte = &msix->msix_table[i];
1271 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1272 				continue;
1273 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1274 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1275 			pci_unmask_msix(dev, i);
1276 		}
1277 	}
1278 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1279 	    msix->msix_ctrl, 2);
1280 }
1281 
1282 /*
1283  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1284  * returned in *count.  After this function returns, each message will be
1285  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1286  */
1287 int
1288 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1289 {
1290 	struct pci_devinfo *dinfo = device_get_ivars(child);
1291 	pcicfgregs *cfg = &dinfo->cfg;
1292 	struct resource_list_entry *rle;
1293 	int actual, error, i, irq, max;
1294 
1295 	/* Don't let count == 0 get us into trouble. */
1296 	if (*count == 0)
1297 		return (EINVAL);
1298 
1299 	/* If rid 0 is allocated, then fail. */
1300 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1301 	if (rle != NULL && rle->res != NULL)
1302 		return (ENXIO);
1303 
1304 	/* Already have allocated messages? */
1305 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1306 		return (ENXIO);
1307 
1308 	/* If MSI is blacklisted for this system, fail. */
1309 	if (pci_msi_blacklisted())
1310 		return (ENXIO);
1311 
1312 	/* MSI-X capability present? */
1313 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1314 		return (ENODEV);
1315 
1316 	/* Make sure the appropriate BARs are mapped. */
1317 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1318 	    cfg->msix.msix_table_bar);
1319 	if (rle == NULL || rle->res == NULL ||
1320 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1321 		return (ENXIO);
1322 	cfg->msix.msix_table_res = rle->res;
1323 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1324 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1325 		    cfg->msix.msix_pba_bar);
1326 		if (rle == NULL || rle->res == NULL ||
1327 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1328 			return (ENXIO);
1329 	}
1330 	cfg->msix.msix_pba_res = rle->res;
1331 
1332 	if (bootverbose)
1333 		device_printf(child,
1334 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1335 		    *count, cfg->msix.msix_msgnum);
1336 	max = min(*count, cfg->msix.msix_msgnum);
1337 	for (i = 0; i < max; i++) {
1338 		/* Allocate a message. */
1339 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1340 		if (error)
1341 			break;
1342 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1343 		    irq, 1);
1344 	}
1345 	actual = i;
1346 
1347 	if (bootverbose) {
1348 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1349 		if (actual == 1)
1350 			device_printf(child, "using IRQ %lu for MSI-X\n",
1351 			    rle->start);
1352 		else {
1353 			int run;
1354 
1355 			/*
1356 			 * Be fancy and try to print contiguous runs of
1357 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1358 			 * 'run' is true if we are in a range.
1359 			 */
1360 			device_printf(child, "using IRQs %lu", rle->start);
1361 			irq = rle->start;
1362 			run = 0;
1363 			for (i = 1; i < actual; i++) {
1364 				rle = resource_list_find(&dinfo->resources,
1365 				    SYS_RES_IRQ, i + 1);
1366 
1367 				/* Still in a run? */
1368 				if (rle->start == irq + 1) {
1369 					run = 1;
1370 					irq++;
1371 					continue;
1372 				}
1373 
1374 				/* Finish previous range. */
1375 				if (run) {
1376 					printf("-%d", irq);
1377 					run = 0;
1378 				}
1379 
1380 				/* Start new range. */
1381 				printf(",%lu", rle->start);
1382 				irq = rle->start;
1383 			}
1384 
1385 			/* Unfinished range? */
1386 			if (run)
1387 				printf("-%d", irq);
1388 			printf(" for MSI-X\n");
1389 		}
1390 	}
1391 
1392 	/* Mask all vectors. */
1393 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1394 		pci_mask_msix(child, i);
1395 
1396 	/* Allocate and initialize vector data and virtual table. */
1397 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1398 	    M_DEVBUF, M_WAITOK | M_ZERO);
1399 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1400 	    M_DEVBUF, M_WAITOK | M_ZERO);
1401 	for (i = 0; i < actual; i++) {
1402 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1403 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1404 		cfg->msix.msix_table[i].mte_vector = i + 1;
1405 	}
1406 
1407 	/* Update control register to enable MSI-X. */
1408 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1409 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1410 	    cfg->msix.msix_ctrl, 2);
1411 
1412 	/* Update counts of alloc'd messages. */
1413 	cfg->msix.msix_alloc = actual;
1414 	cfg->msix.msix_table_len = actual;
1415 	*count = actual;
1416 	return (0);
1417 }
1418 
1419 /*
1420  * By default, pci_alloc_msix() will assign the allocated IRQ
1421  * resources consecutively to the first N messages in the MSI-X table.
1422  * However, device drivers may want to use different layouts if they
1423  * either receive fewer messages than they asked for, or they wish to
1424  * populate the MSI-X table sparsely.  This method allows the driver
1425  * to specify what layout it wants.  It must be called after a
1426  * successful pci_alloc_msix() but before any of the associated
1427  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1428  *
1429  * The 'vectors' array contains 'count' message vectors.  The array
1430  * maps directly to the MSI-X table in that index 0 in the array
1431  * specifies the vector for the first message in the MSI-X table, etc.
1432  * The vector value in each array index can either be 0 to indicate
1433  * that no vector should be assigned to a message slot, or it can be a
1434  * number from 1 to N (where N is the count returned from a
1435  * succcessful call to pci_alloc_msix()) to indicate which message
1436  * vector (IRQ) to be used for the corresponding message.
1437  *
1438  * On successful return, each message with a non-zero vector will have
1439  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1440  * 1.  Additionally, if any of the IRQs allocated via the previous
1441  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1442  * will be freed back to the system automatically.
1443  *
1444  * For example, suppose a driver has a MSI-X table with 6 messages and
1445  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1446  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1447  * C.  After the call to pci_alloc_msix(), the device will be setup to
1448  * have an MSI-X table of ABC--- (where - means no vector assigned).
1449  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1450  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1451  * be freed back to the system.  This device will also have valid
1452  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1453  *
1454  * In any case, the SYS_RES_IRQ rid X will always map to the message
1455  * at MSI-X table index X - 1 and will only be valid if a vector is
1456  * assigned to that table entry.
1457  */
1458 int
1459 pci_remap_msix_method(device_t dev, device_t child, int count,
1460     const u_int *vectors)
1461 {
1462 	struct pci_devinfo *dinfo = device_get_ivars(child);
1463 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1464 	struct resource_list_entry *rle;
1465 	int i, irq, j, *used;
1466 
1467 	/*
1468 	 * Have to have at least one message in the table but the
1469 	 * table can't be bigger than the actual MSI-X table in the
1470 	 * device.
1471 	 */
1472 	if (count == 0 || count > msix->msix_msgnum)
1473 		return (EINVAL);
1474 
1475 	/* Sanity check the vectors. */
1476 	for (i = 0; i < count; i++)
1477 		if (vectors[i] > msix->msix_alloc)
1478 			return (EINVAL);
1479 
1480 	/*
1481 	 * Make sure there aren't any holes in the vectors to be used.
1482 	 * It's a big pain to support it, and it doesn't really make
1483 	 * sense anyway.  Also, at least one vector must be used.
1484 	 */
1485 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1486 	    M_ZERO);
1487 	for (i = 0; i < count; i++)
1488 		if (vectors[i] != 0)
1489 			used[vectors[i] - 1] = 1;
1490 	for (i = 0; i < msix->msix_alloc - 1; i++)
1491 		if (used[i] == 0 && used[i + 1] == 1) {
1492 			free(used, M_DEVBUF);
1493 			return (EINVAL);
1494 		}
1495 	if (used[0] != 1) {
1496 		free(used, M_DEVBUF);
1497 		return (EINVAL);
1498 	}
1499 
1500 	/* Make sure none of the resources are allocated. */
1501 	for (i = 0; i < msix->msix_table_len; i++) {
1502 		if (msix->msix_table[i].mte_vector == 0)
1503 			continue;
1504 		if (msix->msix_table[i].mte_handlers > 0)
1505 			return (EBUSY);
1506 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1507 		KASSERT(rle != NULL, ("missing resource"));
1508 		if (rle->res != NULL)
1509 			return (EBUSY);
1510 	}
1511 
1512 	/* Free the existing resource list entries. */
1513 	for (i = 0; i < msix->msix_table_len; i++) {
1514 		if (msix->msix_table[i].mte_vector == 0)
1515 			continue;
1516 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1517 	}
1518 
1519 	/*
1520 	 * Build the new virtual table keeping track of which vectors are
1521 	 * used.
1522 	 */
1523 	free(msix->msix_table, M_DEVBUF);
1524 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1525 	    M_DEVBUF, M_WAITOK | M_ZERO);
1526 	for (i = 0; i < count; i++)
1527 		msix->msix_table[i].mte_vector = vectors[i];
1528 	msix->msix_table_len = count;
1529 
1530 	/* Free any unused IRQs and resize the vectors array if necessary. */
1531 	j = msix->msix_alloc - 1;
1532 	if (used[j] == 0) {
1533 		struct msix_vector *vec;
1534 
1535 		while (used[j] == 0) {
1536 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1537 			    msix->msix_vectors[j].mv_irq);
1538 			j--;
1539 		}
1540 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1541 		    M_WAITOK);
1542 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1543 		    (j + 1));
1544 		free(msix->msix_vectors, M_DEVBUF);
1545 		msix->msix_vectors = vec;
1546 		msix->msix_alloc = j + 1;
1547 	}
1548 	free(used, M_DEVBUF);
1549 
1550 	/* Map the IRQs onto the rids. */
1551 	for (i = 0; i < count; i++) {
1552 		if (vectors[i] == 0)
1553 			continue;
1554 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1555 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1556 		    irq, 1);
1557 	}
1558 
1559 	if (bootverbose) {
1560 		device_printf(child, "Remapped MSI-X IRQs as: ");
1561 		for (i = 0; i < count; i++) {
1562 			if (i != 0)
1563 				printf(", ");
1564 			if (vectors[i] == 0)
1565 				printf("---");
1566 			else
1567 				printf("%d",
1568 				    msix->msix_vectors[vectors[i]].mv_irq);
1569 		}
1570 		printf("\n");
1571 	}
1572 
1573 	return (0);
1574 }
1575 
1576 static int
1577 pci_release_msix(device_t dev, device_t child)
1578 {
1579 	struct pci_devinfo *dinfo = device_get_ivars(child);
1580 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1581 	struct resource_list_entry *rle;
1582 	int i;
1583 
1584 	/* Do we have any messages to release? */
1585 	if (msix->msix_alloc == 0)
1586 		return (ENODEV);
1587 
1588 	/* Make sure none of the resources are allocated. */
1589 	for (i = 0; i < msix->msix_table_len; i++) {
1590 		if (msix->msix_table[i].mte_vector == 0)
1591 			continue;
1592 		if (msix->msix_table[i].mte_handlers > 0)
1593 			return (EBUSY);
1594 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1595 		KASSERT(rle != NULL, ("missing resource"));
1596 		if (rle->res != NULL)
1597 			return (EBUSY);
1598 	}
1599 
1600 	/* Update control register to disable MSI-X. */
1601 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1602 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1603 	    msix->msix_ctrl, 2);
1604 
1605 	/* Free the resource list entries. */
1606 	for (i = 0; i < msix->msix_table_len; i++) {
1607 		if (msix->msix_table[i].mte_vector == 0)
1608 			continue;
1609 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1610 	}
1611 	free(msix->msix_table, M_DEVBUF);
1612 	msix->msix_table_len = 0;
1613 
1614 	/* Release the IRQs. */
1615 	for (i = 0; i < msix->msix_alloc; i++)
1616 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1617 		    msix->msix_vectors[i].mv_irq);
1618 	free(msix->msix_vectors, M_DEVBUF);
1619 	msix->msix_alloc = 0;
1620 	return (0);
1621 }
1622 
1623 /*
1624  * Return the max supported MSI-X messages this device supports.
1625  * Basically, assuming the MD code can alloc messages, this function
1626  * should return the maximum value that pci_alloc_msix() can return.
1627  * Thus, it is subject to the tunables, etc.
1628  */
1629 int
1630 pci_msix_count_method(device_t dev, device_t child)
1631 {
1632 	struct pci_devinfo *dinfo = device_get_ivars(child);
1633 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1634 
1635 	if (pci_do_msix && msix->msix_location != 0)
1636 		return (msix->msix_msgnum);
1637 	return (0);
1638 }
1639 
1640 /*
1641  * HyperTransport MSI mapping control
1642  */
1643 void
1644 pci_ht_map_msi(device_t dev, uint64_t addr)
1645 {
1646 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1647 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1648 
1649 	if (!ht->ht_msimap)
1650 		return;
1651 
1652 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1653 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1654 		/* Enable MSI -> HT mapping. */
1655 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1656 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1657 		    ht->ht_msictrl, 2);
1658 	}
1659 
1660 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1661 		/* Disable MSI -> HT mapping. */
1662 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1663 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1664 		    ht->ht_msictrl, 2);
1665 	}
1666 }
1667 
1668 int
1669 pci_get_max_read_req(device_t dev)
1670 {
1671 	int cap;
1672 	uint16_t val;
1673 
1674 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1675 		return (0);
1676 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1677 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1678 	val >>= 12;
1679 	return (1 << (val + 7));
1680 }
1681 
1682 int
1683 pci_set_max_read_req(device_t dev, int size)
1684 {
1685 	int cap;
1686 	uint16_t val;
1687 
1688 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1689 		return (0);
1690 	if (size < 128)
1691 		size = 128;
1692 	if (size > 4096)
1693 		size = 4096;
1694 	size = (1 << (fls(size) - 1));
1695 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1696 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1697 	val |= (fls(size) - 8) << 12;
1698 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1699 	return (size);
1700 }
1701 
1702 /*
1703  * Support for MSI message signalled interrupts.
1704  */
1705 void
1706 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1707 {
1708 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1709 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1710 
1711 	/* Write data and address values. */
1712 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1713 	    address & 0xffffffff, 4);
1714 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1715 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1716 		    address >> 32, 4);
1717 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1718 		    data, 2);
1719 	} else
1720 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1721 		    2);
1722 
1723 	/* Enable MSI in the control register. */
1724 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1725 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1726 	    2);
1727 
1728 	/* Enable MSI -> HT mapping. */
1729 	pci_ht_map_msi(dev, address);
1730 }
1731 
1732 void
1733 pci_disable_msi(device_t dev)
1734 {
1735 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1736 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1737 
1738 	/* Disable MSI -> HT mapping. */
1739 	pci_ht_map_msi(dev, 0);
1740 
1741 	/* Disable MSI in the control register. */
1742 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1743 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1744 	    2);
1745 }
1746 
1747 /*
1748  * Restore MSI registers during resume.  If MSI is enabled then
1749  * restore the data and address registers in addition to the control
1750  * register.
1751  */
1752 static void
1753 pci_resume_msi(device_t dev)
1754 {
1755 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1756 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1757 	uint64_t address;
1758 	uint16_t data;
1759 
1760 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1761 		address = msi->msi_addr;
1762 		data = msi->msi_data;
1763 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1764 		    address & 0xffffffff, 4);
1765 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1766 			pci_write_config(dev, msi->msi_location +
1767 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1768 			pci_write_config(dev, msi->msi_location +
1769 			    PCIR_MSI_DATA_64BIT, data, 2);
1770 		} else
1771 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1772 			    data, 2);
1773 	}
1774 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1775 	    2);
1776 }
1777 
1778 static int
1779 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1780 {
1781 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1782 	pcicfgregs *cfg = &dinfo->cfg;
1783 	struct resource_list_entry *rle;
1784 	struct msix_table_entry *mte;
1785 	struct msix_vector *mv;
1786 	uint64_t addr;
1787 	uint32_t data;
1788 	int error, i, j;
1789 
1790 	/*
1791 	 * Handle MSI first.  We try to find this IRQ among our list
1792 	 * of MSI IRQs.  If we find it, we request updated address and
1793 	 * data registers and apply the results.
1794 	 */
1795 	if (cfg->msi.msi_alloc > 0) {
1796 
1797 		/* If we don't have any active handlers, nothing to do. */
1798 		if (cfg->msi.msi_handlers == 0)
1799 			return (0);
1800 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1801 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1802 			    i + 1);
1803 			if (rle->start == irq) {
1804 				error = PCIB_MAP_MSI(device_get_parent(bus),
1805 				    dev, irq, &addr, &data);
1806 				if (error)
1807 					return (error);
1808 				pci_disable_msi(dev);
1809 				dinfo->cfg.msi.msi_addr = addr;
1810 				dinfo->cfg.msi.msi_data = data;
1811 				pci_enable_msi(dev, addr, data);
1812 				return (0);
1813 			}
1814 		}
1815 		return (ENOENT);
1816 	}
1817 
1818 	/*
1819 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1820 	 * we request the updated mapping info.  If that works, we go
1821 	 * through all the slots that use this IRQ and update them.
1822 	 */
1823 	if (cfg->msix.msix_alloc > 0) {
1824 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1825 			mv = &cfg->msix.msix_vectors[i];
1826 			if (mv->mv_irq == irq) {
1827 				error = PCIB_MAP_MSI(device_get_parent(bus),
1828 				    dev, irq, &addr, &data);
1829 				if (error)
1830 					return (error);
1831 				mv->mv_address = addr;
1832 				mv->mv_data = data;
1833 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1834 					mte = &cfg->msix.msix_table[j];
1835 					if (mte->mte_vector != i + 1)
1836 						continue;
1837 					if (mte->mte_handlers == 0)
1838 						continue;
1839 					pci_mask_msix(dev, j);
1840 					pci_enable_msix(dev, j, addr, data);
1841 					pci_unmask_msix(dev, j);
1842 				}
1843 			}
1844 		}
1845 		return (ENOENT);
1846 	}
1847 
1848 	return (ENOENT);
1849 }
1850 
1851 /*
1852  * Returns true if the specified device is blacklisted because MSI
1853  * doesn't work.
1854  */
1855 int
1856 pci_msi_device_blacklisted(device_t dev)
1857 {
1858 	struct pci_quirk *q;
1859 
1860 	if (!pci_honor_msi_blacklist)
1861 		return (0);
1862 
1863 	for (q = &pci_quirks[0]; q->devid; q++) {
1864 		if (q->devid == pci_get_devid(dev) &&
1865 		    q->type == PCI_QUIRK_DISABLE_MSI)
1866 			return (1);
1867 	}
1868 	return (0);
1869 }
1870 
1871 /*
1872  * Returns true if a specified chipset supports MSI when it is
1873  * emulated hardware in a virtual machine.
1874  */
1875 static int
1876 pci_msi_vm_chipset(device_t dev)
1877 {
1878 	struct pci_quirk *q;
1879 
1880 	for (q = &pci_quirks[0]; q->devid; q++) {
1881 		if (q->devid == pci_get_devid(dev) &&
1882 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1883 			return (1);
1884 	}
1885 	return (0);
1886 }
1887 
1888 /*
1889  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1890  * we just check for blacklisted chipsets as represented by the
1891  * host-PCI bridge at device 0:0:0.  In the future, it may become
1892  * necessary to check other system attributes, such as the kenv values
1893  * that give the motherboard manufacturer and model number.
1894  */
1895 static int
1896 pci_msi_blacklisted(void)
1897 {
1898 	device_t dev;
1899 
1900 	if (!pci_honor_msi_blacklist)
1901 		return (0);
1902 
1903 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1904 	if (!(pcie_chipset || pcix_chipset)) {
1905 		if (vm_guest != VM_GUEST_NO) {
1906 			dev = pci_find_bsf(0, 0, 0);
1907 			if (dev != NULL)
1908 				return (pci_msi_vm_chipset(dev) == 0);
1909 		}
1910 		return (1);
1911 	}
1912 
1913 	dev = pci_find_bsf(0, 0, 0);
1914 	if (dev != NULL)
1915 		return (pci_msi_device_blacklisted(dev));
1916 	return (0);
1917 }
1918 
1919 /*
1920  * Attempt to allocate *count MSI messages.  The actual number allocated is
1921  * returned in *count.  After this function returns, each message will be
1922  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1923  */
1924 int
1925 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1926 {
1927 	struct pci_devinfo *dinfo = device_get_ivars(child);
1928 	pcicfgregs *cfg = &dinfo->cfg;
1929 	struct resource_list_entry *rle;
1930 	int actual, error, i, irqs[32];
1931 	uint16_t ctrl;
1932 
1933 	/* Don't let count == 0 get us into trouble. */
1934 	if (*count == 0)
1935 		return (EINVAL);
1936 
1937 	/* If rid 0 is allocated, then fail. */
1938 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1939 	if (rle != NULL && rle->res != NULL)
1940 		return (ENXIO);
1941 
1942 	/* Already have allocated messages? */
1943 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1944 		return (ENXIO);
1945 
1946 	/* If MSI is blacklisted for this system, fail. */
1947 	if (pci_msi_blacklisted())
1948 		return (ENXIO);
1949 
1950 	/* MSI capability present? */
1951 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1952 		return (ENODEV);
1953 
1954 	if (bootverbose)
1955 		device_printf(child,
1956 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1957 		    *count, cfg->msi.msi_msgnum);
1958 
1959 	/* Don't ask for more than the device supports. */
1960 	actual = min(*count, cfg->msi.msi_msgnum);
1961 
1962 	/* Don't ask for more than 32 messages. */
1963 	actual = min(actual, 32);
1964 
1965 	/* MSI requires power of 2 number of messages. */
1966 	if (!powerof2(actual))
1967 		return (EINVAL);
1968 
1969 	for (;;) {
1970 		/* Try to allocate N messages. */
1971 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1972 		    cfg->msi.msi_msgnum, irqs);
1973 		if (error == 0)
1974 			break;
1975 		if (actual == 1)
1976 			return (error);
1977 
1978 		/* Try N / 2. */
1979 		actual >>= 1;
1980 	}
1981 
1982 	/*
1983 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1984 	 * resources in the irqs[] array, so add new resources
1985 	 * starting at rid 1.
1986 	 */
1987 	for (i = 0; i < actual; i++)
1988 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1989 		    irqs[i], irqs[i], 1);
1990 
1991 	if (bootverbose) {
1992 		if (actual == 1)
1993 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1994 		else {
1995 			int run;
1996 
1997 			/*
1998 			 * Be fancy and try to print contiguous runs
1999 			 * of IRQ values as ranges.  'run' is true if
2000 			 * we are in a range.
2001 			 */
2002 			device_printf(child, "using IRQs %d", irqs[0]);
2003 			run = 0;
2004 			for (i = 1; i < actual; i++) {
2005 
2006 				/* Still in a run? */
2007 				if (irqs[i] == irqs[i - 1] + 1) {
2008 					run = 1;
2009 					continue;
2010 				}
2011 
2012 				/* Finish previous range. */
2013 				if (run) {
2014 					printf("-%d", irqs[i - 1]);
2015 					run = 0;
2016 				}
2017 
2018 				/* Start new range. */
2019 				printf(",%d", irqs[i]);
2020 			}
2021 
2022 			/* Unfinished range? */
2023 			if (run)
2024 				printf("-%d", irqs[actual - 1]);
2025 			printf(" for MSI\n");
2026 		}
2027 	}
2028 
2029 	/* Update control register with actual count. */
2030 	ctrl = cfg->msi.msi_ctrl;
2031 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2032 	ctrl |= (ffs(actual) - 1) << 4;
2033 	cfg->msi.msi_ctrl = ctrl;
2034 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2035 
2036 	/* Update counts of alloc'd messages. */
2037 	cfg->msi.msi_alloc = actual;
2038 	cfg->msi.msi_handlers = 0;
2039 	*count = actual;
2040 	return (0);
2041 }
2042 
2043 /* Release the MSI messages associated with this device. */
2044 int
2045 pci_release_msi_method(device_t dev, device_t child)
2046 {
2047 	struct pci_devinfo *dinfo = device_get_ivars(child);
2048 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2049 	struct resource_list_entry *rle;
2050 	int error, i, irqs[32];
2051 
2052 	/* Try MSI-X first. */
2053 	error = pci_release_msix(dev, child);
2054 	if (error != ENODEV)
2055 		return (error);
2056 
2057 	/* Do we have any messages to release? */
2058 	if (msi->msi_alloc == 0)
2059 		return (ENODEV);
2060 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2061 
2062 	/* Make sure none of the resources are allocated. */
2063 	if (msi->msi_handlers > 0)
2064 		return (EBUSY);
2065 	for (i = 0; i < msi->msi_alloc; i++) {
2066 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2067 		KASSERT(rle != NULL, ("missing MSI resource"));
2068 		if (rle->res != NULL)
2069 			return (EBUSY);
2070 		irqs[i] = rle->start;
2071 	}
2072 
2073 	/* Update control register with 0 count. */
2074 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2075 	    ("%s: MSI still enabled", __func__));
2076 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2077 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2078 	    msi->msi_ctrl, 2);
2079 
2080 	/* Release the messages. */
2081 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2082 	for (i = 0; i < msi->msi_alloc; i++)
2083 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2084 
2085 	/* Update alloc count. */
2086 	msi->msi_alloc = 0;
2087 	msi->msi_addr = 0;
2088 	msi->msi_data = 0;
2089 	return (0);
2090 }
2091 
2092 /*
2093  * Return the max supported MSI messages this device supports.
2094  * Basically, assuming the MD code can alloc messages, this function
2095  * should return the maximum value that pci_alloc_msi() can return.
2096  * Thus, it is subject to the tunables, etc.
2097  */
2098 int
2099 pci_msi_count_method(device_t dev, device_t child)
2100 {
2101 	struct pci_devinfo *dinfo = device_get_ivars(child);
2102 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2103 
2104 	if (pci_do_msi && msi->msi_location != 0)
2105 		return (msi->msi_msgnum);
2106 	return (0);
2107 }
2108 
2109 /* free pcicfgregs structure and all depending data structures */
2110 
2111 int
2112 pci_freecfg(struct pci_devinfo *dinfo)
2113 {
2114 	struct devlist *devlist_head;
2115 	struct pci_map *pm, *next;
2116 	int i;
2117 
2118 	devlist_head = &pci_devq;
2119 
2120 	if (dinfo->cfg.vpd.vpd_reg) {
2121 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2122 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2123 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2124 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2125 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2126 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2127 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2128 	}
2129 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2130 		free(pm, M_DEVBUF);
2131 	}
2132 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2133 	free(dinfo, M_DEVBUF);
2134 
2135 	/* increment the generation count */
2136 	pci_generation++;
2137 
2138 	/* we're losing one device */
2139 	pci_numdevs--;
2140 	return (0);
2141 }
2142 
2143 /*
2144  * PCI power manangement
2145  */
2146 int
2147 pci_set_powerstate_method(device_t dev, device_t child, int state)
2148 {
2149 	struct pci_devinfo *dinfo = device_get_ivars(child);
2150 	pcicfgregs *cfg = &dinfo->cfg;
2151 	uint16_t status;
2152 	int result, oldstate, highest, delay;
2153 
2154 	if (cfg->pp.pp_cap == 0)
2155 		return (EOPNOTSUPP);
2156 
2157 	/*
2158 	 * Optimize a no state change request away.  While it would be OK to
2159 	 * write to the hardware in theory, some devices have shown odd
2160 	 * behavior when going from D3 -> D3.
2161 	 */
2162 	oldstate = pci_get_powerstate(child);
2163 	if (oldstate == state)
2164 		return (0);
2165 
2166 	/*
2167 	 * The PCI power management specification states that after a state
2168 	 * transition between PCI power states, system software must
2169 	 * guarantee a minimal delay before the function accesses the device.
2170 	 * Compute the worst case delay that we need to guarantee before we
2171 	 * access the device.  Many devices will be responsive much more
2172 	 * quickly than this delay, but there are some that don't respond
2173 	 * instantly to state changes.  Transitions to/from D3 state require
2174 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2175 	 * is done below with DELAY rather than a sleeper function because
2176 	 * this function can be called from contexts where we cannot sleep.
2177 	 */
2178 	highest = (oldstate > state) ? oldstate : state;
2179 	if (highest == PCI_POWERSTATE_D3)
2180 	    delay = 10000;
2181 	else if (highest == PCI_POWERSTATE_D2)
2182 	    delay = 200;
2183 	else
2184 	    delay = 0;
2185 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2186 	    & ~PCIM_PSTAT_DMASK;
2187 	result = 0;
2188 	switch (state) {
2189 	case PCI_POWERSTATE_D0:
2190 		status |= PCIM_PSTAT_D0;
2191 		break;
2192 	case PCI_POWERSTATE_D1:
2193 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2194 			return (EOPNOTSUPP);
2195 		status |= PCIM_PSTAT_D1;
2196 		break;
2197 	case PCI_POWERSTATE_D2:
2198 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2199 			return (EOPNOTSUPP);
2200 		status |= PCIM_PSTAT_D2;
2201 		break;
2202 	case PCI_POWERSTATE_D3:
2203 		status |= PCIM_PSTAT_D3;
2204 		break;
2205 	default:
2206 		return (EINVAL);
2207 	}
2208 
2209 	if (bootverbose)
2210 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2211 		    state);
2212 
2213 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2214 	if (delay)
2215 		DELAY(delay);
2216 	return (0);
2217 }
2218 
2219 int
2220 pci_get_powerstate_method(device_t dev, device_t child)
2221 {
2222 	struct pci_devinfo *dinfo = device_get_ivars(child);
2223 	pcicfgregs *cfg = &dinfo->cfg;
2224 	uint16_t status;
2225 	int result;
2226 
2227 	if (cfg->pp.pp_cap != 0) {
2228 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2229 		switch (status & PCIM_PSTAT_DMASK) {
2230 		case PCIM_PSTAT_D0:
2231 			result = PCI_POWERSTATE_D0;
2232 			break;
2233 		case PCIM_PSTAT_D1:
2234 			result = PCI_POWERSTATE_D1;
2235 			break;
2236 		case PCIM_PSTAT_D2:
2237 			result = PCI_POWERSTATE_D2;
2238 			break;
2239 		case PCIM_PSTAT_D3:
2240 			result = PCI_POWERSTATE_D3;
2241 			break;
2242 		default:
2243 			result = PCI_POWERSTATE_UNKNOWN;
2244 			break;
2245 		}
2246 	} else {
2247 		/* No support, device is always at D0 */
2248 		result = PCI_POWERSTATE_D0;
2249 	}
2250 	return (result);
2251 }
2252 
2253 /*
2254  * Some convenience functions for PCI device drivers.
2255  */
2256 
2257 static __inline void
2258 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2259 {
2260 	uint16_t	command;
2261 
2262 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2263 	command |= bit;
2264 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2265 }
2266 
2267 static __inline void
2268 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2269 {
2270 	uint16_t	command;
2271 
2272 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2273 	command &= ~bit;
2274 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2275 }
2276 
2277 int
2278 pci_enable_busmaster_method(device_t dev, device_t child)
2279 {
2280 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2281 	return (0);
2282 }
2283 
2284 int
2285 pci_disable_busmaster_method(device_t dev, device_t child)
2286 {
2287 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2288 	return (0);
2289 }
2290 
2291 int
2292 pci_enable_io_method(device_t dev, device_t child, int space)
2293 {
2294 	uint16_t bit;
2295 
2296 	switch(space) {
2297 	case SYS_RES_IOPORT:
2298 		bit = PCIM_CMD_PORTEN;
2299 		break;
2300 	case SYS_RES_MEMORY:
2301 		bit = PCIM_CMD_MEMEN;
2302 		break;
2303 	default:
2304 		return (EINVAL);
2305 	}
2306 	pci_set_command_bit(dev, child, bit);
2307 	return (0);
2308 }
2309 
2310 int
2311 pci_disable_io_method(device_t dev, device_t child, int space)
2312 {
2313 	uint16_t bit;
2314 
2315 	switch(space) {
2316 	case SYS_RES_IOPORT:
2317 		bit = PCIM_CMD_PORTEN;
2318 		break;
2319 	case SYS_RES_MEMORY:
2320 		bit = PCIM_CMD_MEMEN;
2321 		break;
2322 	default:
2323 		return (EINVAL);
2324 	}
2325 	pci_clear_command_bit(dev, child, bit);
2326 	return (0);
2327 }
2328 
2329 /*
2330  * New style pci driver.  Parent device is either a pci-host-bridge or a
2331  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2332  */
2333 
2334 void
2335 pci_print_verbose(struct pci_devinfo *dinfo)
2336 {
2337 
2338 	if (bootverbose) {
2339 		pcicfgregs *cfg = &dinfo->cfg;
2340 
2341 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2342 		    cfg->vendor, cfg->device, cfg->revid);
2343 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2344 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2345 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2346 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2347 		    cfg->mfdev);
2348 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2349 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2350 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2351 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2352 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2353 		if (cfg->intpin > 0)
2354 			printf("\tintpin=%c, irq=%d\n",
2355 			    cfg->intpin +'a' -1, cfg->intline);
2356 		if (cfg->pp.pp_cap) {
2357 			uint16_t status;
2358 
2359 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2360 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2361 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2362 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2363 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2364 			    status & PCIM_PSTAT_DMASK);
2365 		}
2366 		if (cfg->msi.msi_location) {
2367 			int ctrl;
2368 
2369 			ctrl = cfg->msi.msi_ctrl;
2370 			printf("\tMSI supports %d message%s%s%s\n",
2371 			    cfg->msi.msi_msgnum,
2372 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2373 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2374 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2375 		}
2376 		if (cfg->msix.msix_location) {
2377 			printf("\tMSI-X supports %d message%s ",
2378 			    cfg->msix.msix_msgnum,
2379 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2380 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2381 				printf("in map 0x%x\n",
2382 				    cfg->msix.msix_table_bar);
2383 			else
2384 				printf("in maps 0x%x and 0x%x\n",
2385 				    cfg->msix.msix_table_bar,
2386 				    cfg->msix.msix_pba_bar);
2387 		}
2388 	}
2389 }
2390 
2391 static int
2392 pci_porten(device_t dev)
2393 {
2394 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2395 }
2396 
2397 static int
2398 pci_memen(device_t dev)
2399 {
2400 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2401 }
2402 
2403 static void
2404 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2405 {
2406 	struct pci_devinfo *dinfo;
2407 	pci_addr_t map, testval;
2408 	int ln2range;
2409 	uint16_t cmd;
2410 
2411 	/*
2412 	 * The device ROM BAR is special.  It is always a 32-bit
2413 	 * memory BAR.  Bit 0 is special and should not be set when
2414 	 * sizing the BAR.
2415 	 */
2416 	dinfo = device_get_ivars(dev);
2417 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2418 		map = pci_read_config(dev, reg, 4);
2419 		pci_write_config(dev, reg, 0xfffffffe, 4);
2420 		testval = pci_read_config(dev, reg, 4);
2421 		pci_write_config(dev, reg, map, 4);
2422 		*mapp = map;
2423 		*testvalp = testval;
2424 		return;
2425 	}
2426 
2427 	map = pci_read_config(dev, reg, 4);
2428 	ln2range = pci_maprange(map);
2429 	if (ln2range == 64)
2430 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2431 
2432 	/*
2433 	 * Disable decoding via the command register before
2434 	 * determining the BAR's length since we will be placing it in
2435 	 * a weird state.
2436 	 */
2437 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2438 	pci_write_config(dev, PCIR_COMMAND,
2439 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2440 
2441 	/*
2442 	 * Determine the BAR's length by writing all 1's.  The bottom
2443 	 * log_2(size) bits of the BAR will stick as 0 when we read
2444 	 * the value back.
2445 	 */
2446 	pci_write_config(dev, reg, 0xffffffff, 4);
2447 	testval = pci_read_config(dev, reg, 4);
2448 	if (ln2range == 64) {
2449 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2450 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2451 	}
2452 
2453 	/*
2454 	 * Restore the original value of the BAR.  We may have reprogrammed
2455 	 * the BAR of the low-level console device and when booting verbose,
2456 	 * we need the console device addressable.
2457 	 */
2458 	pci_write_config(dev, reg, map, 4);
2459 	if (ln2range == 64)
2460 		pci_write_config(dev, reg + 4, map >> 32, 4);
2461 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2462 
2463 	*mapp = map;
2464 	*testvalp = testval;
2465 }
2466 
2467 static void
2468 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2469 {
2470 	struct pci_devinfo *dinfo;
2471 	int ln2range;
2472 
2473 	/* The device ROM BAR is always a 32-bit memory BAR. */
2474 	dinfo = device_get_ivars(dev);
2475 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2476 		ln2range = 32;
2477 	else
2478 		ln2range = pci_maprange(pm->pm_value);
2479 	pci_write_config(dev, pm->pm_reg, base, 4);
2480 	if (ln2range == 64)
2481 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2482 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2483 	if (ln2range == 64)
2484 		pm->pm_value |= (pci_addr_t)pci_read_config(dev, pm->pm_reg + 4, 4) << 32;
2485 }
2486 
2487 struct pci_map *
2488 pci_find_bar(device_t dev, int reg)
2489 {
2490 	struct pci_devinfo *dinfo;
2491 	struct pci_map *pm;
2492 
2493 	dinfo = device_get_ivars(dev);
2494 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2495 		if (pm->pm_reg == reg)
2496 			return (pm);
2497 	}
2498 	return (NULL);
2499 }
2500 
2501 int
2502 pci_bar_enabled(device_t dev, struct pci_map *pm)
2503 {
2504 	struct pci_devinfo *dinfo;
2505 	uint16_t cmd;
2506 
2507 	dinfo = device_get_ivars(dev);
2508 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2509 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2510 		return (0);
2511 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2512 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2513 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2514 	else
2515 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2516 }
2517 
2518 static struct pci_map *
2519 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2520 {
2521 	struct pci_devinfo *dinfo;
2522 	struct pci_map *pm, *prev;
2523 
2524 	dinfo = device_get_ivars(dev);
2525 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2526 	pm->pm_reg = reg;
2527 	pm->pm_value = value;
2528 	pm->pm_size = size;
2529 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2530 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2531 		    reg));
2532 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2533 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2534 			break;
2535 	}
2536 	if (prev != NULL)
2537 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2538 	else
2539 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2540 	return (pm);
2541 }
2542 
2543 static void
2544 pci_restore_bars(device_t dev)
2545 {
2546 	struct pci_devinfo *dinfo;
2547 	struct pci_map *pm;
2548 	int ln2range;
2549 
2550 	dinfo = device_get_ivars(dev);
2551 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2552 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2553 			ln2range = 32;
2554 		else
2555 			ln2range = pci_maprange(pm->pm_value);
2556 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2557 		if (ln2range == 64)
2558 			pci_write_config(dev, pm->pm_reg + 4,
2559 			    pm->pm_value >> 32, 4);
2560 	}
2561 }
2562 
2563 /*
2564  * Add a resource based on a pci map register. Return 1 if the map
2565  * register is a 32bit map register or 2 if it is a 64bit register.
2566  */
2567 static int
2568 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2569     int force, int prefetch)
2570 {
2571 	struct pci_map *pm;
2572 	pci_addr_t base, map, testval;
2573 	pci_addr_t start, end, count;
2574 	int barlen, basezero, maprange, mapsize, type;
2575 	uint16_t cmd;
2576 	struct resource *res;
2577 
2578 	pci_read_bar(dev, reg, &map, &testval);
2579 	if (PCI_BAR_MEM(map)) {
2580 		type = SYS_RES_MEMORY;
2581 		if (map & PCIM_BAR_MEM_PREFETCH)
2582 			prefetch = 1;
2583 	} else
2584 		type = SYS_RES_IOPORT;
2585 	mapsize = pci_mapsize(testval);
2586 	base = pci_mapbase(map);
2587 #ifdef __PCI_BAR_ZERO_VALID
2588 	basezero = 0;
2589 #else
2590 	basezero = base == 0;
2591 #endif
2592 	maprange = pci_maprange(map);
2593 	barlen = maprange == 64 ? 2 : 1;
2594 
2595 	/*
2596 	 * For I/O registers, if bottom bit is set, and the next bit up
2597 	 * isn't clear, we know we have a BAR that doesn't conform to the
2598 	 * spec, so ignore it.  Also, sanity check the size of the data
2599 	 * areas to the type of memory involved.  Memory must be at least
2600 	 * 16 bytes in size, while I/O ranges must be at least 4.
2601 	 */
2602 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2603 		return (barlen);
2604 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2605 	    (type == SYS_RES_IOPORT && mapsize < 2))
2606 		return (barlen);
2607 
2608 	/* Save a record of this BAR. */
2609 	pm = pci_add_bar(dev, reg, map, mapsize);
2610 	if (bootverbose) {
2611 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2612 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2613 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2614 			printf(", port disabled\n");
2615 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2616 			printf(", memory disabled\n");
2617 		else
2618 			printf(", enabled\n");
2619 	}
2620 
2621 	/*
2622 	 * If base is 0, then we have problems if this architecture does
2623 	 * not allow that.  It is best to ignore such entries for the
2624 	 * moment.  These will be allocated later if the driver specifically
2625 	 * requests them.  However, some removable busses look better when
2626 	 * all resources are allocated, so allow '0' to be overriden.
2627 	 *
2628 	 * Similarly treat maps whose values is the same as the test value
2629 	 * read back.  These maps have had all f's written to them by the
2630 	 * BIOS in an attempt to disable the resources.
2631 	 */
2632 	if (!force && (basezero || map == testval))
2633 		return (barlen);
2634 	if ((u_long)base != base) {
2635 		device_printf(bus,
2636 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2637 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2638 		    pci_get_function(dev), reg);
2639 		return (barlen);
2640 	}
2641 
2642 	/*
2643 	 * This code theoretically does the right thing, but has
2644 	 * undesirable side effects in some cases where peripherals
2645 	 * respond oddly to having these bits enabled.  Let the user
2646 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2647 	 * default).
2648 	 */
2649 	if (pci_enable_io_modes) {
2650 		/* Turn on resources that have been left off by a lazy BIOS */
2651 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2652 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2653 			cmd |= PCIM_CMD_PORTEN;
2654 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2655 		}
2656 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2657 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2658 			cmd |= PCIM_CMD_MEMEN;
2659 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2660 		}
2661 	} else {
2662 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2663 			return (barlen);
2664 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2665 			return (barlen);
2666 	}
2667 
2668 	count = (pci_addr_t)1 << mapsize;
2669 	if (basezero || base == pci_mapbase(testval)) {
2670 		start = 0;	/* Let the parent decide. */
2671 		end = ~0ULL;
2672 	} else {
2673 		start = base;
2674 		end = base + count - 1;
2675 	}
2676 	resource_list_add(rl, type, reg, start, end, count);
2677 
2678 	/*
2679 	 * Try to allocate the resource for this BAR from our parent
2680 	 * so that this resource range is already reserved.  The
2681 	 * driver for this device will later inherit this resource in
2682 	 * pci_alloc_resource().
2683 	 */
2684 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2685 	    prefetch ? RF_PREFETCHABLE : 0);
2686 	if (res == NULL) {
2687 		/*
2688 		 * If the allocation fails, clear the BAR and delete
2689 		 * the resource list entry to force
2690 		 * pci_alloc_resource() to allocate resources from the
2691 		 * parent.
2692 		 */
2693 		resource_list_delete(rl, type, reg);
2694 		start = 0;
2695 	} else
2696 		start = rman_get_start(res);
2697 	pci_write_bar(dev, pm, start);
2698 	return (barlen);
2699 }
2700 
2701 /*
2702  * For ATA devices we need to decide early what addressing mode to use.
2703  * Legacy demands that the primary and secondary ATA ports sits on the
2704  * same addresses that old ISA hardware did. This dictates that we use
2705  * those addresses and ignore the BAR's if we cannot set PCI native
2706  * addressing mode.
2707  */
2708 static void
2709 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2710     uint32_t prefetchmask)
2711 {
2712 	struct resource *r;
2713 	int rid, type, progif;
2714 #if 0
2715 	/* if this device supports PCI native addressing use it */
2716 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2717 	if ((progif & 0x8a) == 0x8a) {
2718 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2719 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2720 			printf("Trying ATA native PCI addressing mode\n");
2721 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2722 		}
2723 	}
2724 #endif
2725 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2726 	type = SYS_RES_IOPORT;
2727 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2728 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2729 		    prefetchmask & (1 << 0));
2730 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2731 		    prefetchmask & (1 << 1));
2732 	} else {
2733 		rid = PCIR_BAR(0);
2734 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2735 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2736 		    0x1f7, 8, 0);
2737 		rid = PCIR_BAR(1);
2738 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2739 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2740 		    0x3f6, 1, 0);
2741 	}
2742 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2743 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2744 		    prefetchmask & (1 << 2));
2745 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2746 		    prefetchmask & (1 << 3));
2747 	} else {
2748 		rid = PCIR_BAR(2);
2749 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2750 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2751 		    0x177, 8, 0);
2752 		rid = PCIR_BAR(3);
2753 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2754 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2755 		    0x376, 1, 0);
2756 	}
2757 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2758 	    prefetchmask & (1 << 4));
2759 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2760 	    prefetchmask & (1 << 5));
2761 }
2762 
2763 static void
2764 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2765 {
2766 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2767 	pcicfgregs *cfg = &dinfo->cfg;
2768 	char tunable_name[64];
2769 	int irq;
2770 
2771 	/* Has to have an intpin to have an interrupt. */
2772 	if (cfg->intpin == 0)
2773 		return;
2774 
2775 	/* Let the user override the IRQ with a tunable. */
2776 	irq = PCI_INVALID_IRQ;
2777 	snprintf(tunable_name, sizeof(tunable_name),
2778 	    "hw.pci%d.%d.%d.INT%c.irq",
2779 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2780 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2781 		irq = PCI_INVALID_IRQ;
2782 
2783 	/*
2784 	 * If we didn't get an IRQ via the tunable, then we either use the
2785 	 * IRQ value in the intline register or we ask the bus to route an
2786 	 * interrupt for us.  If force_route is true, then we only use the
2787 	 * value in the intline register if the bus was unable to assign an
2788 	 * IRQ.
2789 	 */
2790 	if (!PCI_INTERRUPT_VALID(irq)) {
2791 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2792 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2793 		if (!PCI_INTERRUPT_VALID(irq))
2794 			irq = cfg->intline;
2795 	}
2796 
2797 	/* If after all that we don't have an IRQ, just bail. */
2798 	if (!PCI_INTERRUPT_VALID(irq))
2799 		return;
2800 
2801 	/* Update the config register if it changed. */
2802 	if (irq != cfg->intline) {
2803 		cfg->intline = irq;
2804 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2805 	}
2806 
2807 	/* Add this IRQ as rid 0 interrupt resource. */
2808 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2809 }
2810 
2811 /* Perform early OHCI takeover from SMM. */
2812 static void
2813 ohci_early_takeover(device_t self)
2814 {
2815 	struct resource *res;
2816 	uint32_t ctl;
2817 	int rid;
2818 	int i;
2819 
2820 	rid = PCIR_BAR(0);
2821 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2822 	if (res == NULL)
2823 		return;
2824 
2825 	ctl = bus_read_4(res, OHCI_CONTROL);
2826 	if (ctl & OHCI_IR) {
2827 		if (bootverbose)
2828 			printf("ohci early: "
2829 			    "SMM active, request owner change\n");
2830 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2831 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2832 			DELAY(1000);
2833 			ctl = bus_read_4(res, OHCI_CONTROL);
2834 		}
2835 		if (ctl & OHCI_IR) {
2836 			if (bootverbose)
2837 				printf("ohci early: "
2838 				    "SMM does not respond, resetting\n");
2839 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2840 		}
2841 		/* Disable interrupts */
2842 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2843 	}
2844 
2845 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2846 }
2847 
2848 /* Perform early UHCI takeover from SMM. */
2849 static void
2850 uhci_early_takeover(device_t self)
2851 {
2852 	struct resource *res;
2853 	int rid;
2854 
2855 	/*
2856 	 * Set the PIRQD enable bit and switch off all the others. We don't
2857 	 * want legacy support to interfere with us XXX Does this also mean
2858 	 * that the BIOS won't touch the keyboard anymore if it is connected
2859 	 * to the ports of the root hub?
2860 	 */
2861 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2862 
2863 	/* Disable interrupts */
2864 	rid = PCI_UHCI_BASE_REG;
2865 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2866 	if (res != NULL) {
2867 		bus_write_2(res, UHCI_INTR, 0);
2868 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2869 	}
2870 }
2871 
2872 /* Perform early EHCI takeover from SMM. */
2873 static void
2874 ehci_early_takeover(device_t self)
2875 {
2876 	struct resource *res;
2877 	uint32_t cparams;
2878 	uint32_t eec;
2879 	uint8_t eecp;
2880 	uint8_t bios_sem;
2881 	uint8_t offs;
2882 	int rid;
2883 	int i;
2884 
2885 	rid = PCIR_BAR(0);
2886 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2887 	if (res == NULL)
2888 		return;
2889 
2890 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2891 
2892 	/* Synchronise with the BIOS if it owns the controller. */
2893 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2894 	    eecp = EHCI_EECP_NEXT(eec)) {
2895 		eec = pci_read_config(self, eecp, 4);
2896 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2897 			continue;
2898 		}
2899 		bios_sem = pci_read_config(self, eecp +
2900 		    EHCI_LEGSUP_BIOS_SEM, 1);
2901 		if (bios_sem == 0) {
2902 			continue;
2903 		}
2904 		if (bootverbose)
2905 			printf("ehci early: "
2906 			    "SMM active, request owner change\n");
2907 
2908 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2909 
2910 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2911 			DELAY(1000);
2912 			bios_sem = pci_read_config(self, eecp +
2913 			    EHCI_LEGSUP_BIOS_SEM, 1);
2914 		}
2915 
2916 		if (bios_sem != 0) {
2917 			if (bootverbose)
2918 				printf("ehci early: "
2919 				    "SMM does not respond\n");
2920 		}
2921 		/* Disable interrupts */
2922 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2923 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2924 	}
2925 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2926 }
2927 
2928 void
2929 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2930 {
2931 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2932 	pcicfgregs *cfg = &dinfo->cfg;
2933 	struct resource_list *rl = &dinfo->resources;
2934 	struct pci_quirk *q;
2935 	int i;
2936 
2937 	/* ATA devices needs special map treatment */
2938 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2939 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2940 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2941 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2942 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2943 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2944 	else
2945 		for (i = 0; i < cfg->nummaps;)
2946 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2947 			    prefetchmask & (1 << i));
2948 
2949 	/*
2950 	 * Add additional, quirked resources.
2951 	 */
2952 	for (q = &pci_quirks[0]; q->devid; q++) {
2953 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2954 		    && q->type == PCI_QUIRK_MAP_REG)
2955 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2956 	}
2957 
2958 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2959 #ifdef __PCI_REROUTE_INTERRUPT
2960 		/*
2961 		 * Try to re-route interrupts. Sometimes the BIOS or
2962 		 * firmware may leave bogus values in these registers.
2963 		 * If the re-route fails, then just stick with what we
2964 		 * have.
2965 		 */
2966 		pci_assign_interrupt(bus, dev, 1);
2967 #else
2968 		pci_assign_interrupt(bus, dev, 0);
2969 #endif
2970 	}
2971 
2972 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2973 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2974 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2975 			ehci_early_takeover(dev);
2976 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2977 			ohci_early_takeover(dev);
2978 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2979 			uhci_early_takeover(dev);
2980 	}
2981 }
2982 
2983 void
2984 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2985 {
2986 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2987 	device_t pcib = device_get_parent(dev);
2988 	struct pci_devinfo *dinfo;
2989 	int maxslots;
2990 	int s, f, pcifunchigh;
2991 	uint8_t hdrtype;
2992 
2993 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2994 	    ("dinfo_size too small"));
2995 	maxslots = PCIB_MAXSLOTS(pcib);
2996 	for (s = 0; s <= maxslots; s++) {
2997 		pcifunchigh = 0;
2998 		f = 0;
2999 		DELAY(1);
3000 		hdrtype = REG(PCIR_HDRTYPE, 1);
3001 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3002 			continue;
3003 		if (hdrtype & PCIM_MFDEV)
3004 			pcifunchigh = PCI_FUNCMAX;
3005 		for (f = 0; f <= pcifunchigh; f++) {
3006 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3007 			    dinfo_size);
3008 			if (dinfo != NULL) {
3009 				pci_add_child(dev, dinfo);
3010 			}
3011 		}
3012 	}
3013 #undef REG
3014 }
3015 
3016 void
3017 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3018 {
3019 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3020 	device_set_ivars(dinfo->cfg.dev, dinfo);
3021 	resource_list_init(&dinfo->resources);
3022 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3023 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3024 	pci_print_verbose(dinfo);
3025 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3026 }
3027 
3028 static int
3029 pci_probe(device_t dev)
3030 {
3031 
3032 	device_set_desc(dev, "PCI bus");
3033 
3034 	/* Allow other subclasses to override this driver. */
3035 	return (BUS_PROBE_GENERIC);
3036 }
3037 
3038 static int
3039 pci_attach(device_t dev)
3040 {
3041 	int busno, domain;
3042 
3043 	/*
3044 	 * Since there can be multiple independantly numbered PCI
3045 	 * busses on systems with multiple PCI domains, we can't use
3046 	 * the unit number to decide which bus we are probing. We ask
3047 	 * the parent pcib what our domain and bus numbers are.
3048 	 */
3049 	domain = pcib_get_domain(dev);
3050 	busno = pcib_get_bus(dev);
3051 	if (bootverbose)
3052 		device_printf(dev, "domain=%d, physical bus=%d\n",
3053 		    domain, busno);
3054 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3055 	return (bus_generic_attach(dev));
3056 }
3057 
3058 static void
3059 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3060     int state)
3061 {
3062 	device_t child, pcib;
3063 	struct pci_devinfo *dinfo;
3064 	int dstate, i;
3065 
3066 	/*
3067 	 * Set the device to the given state.  If the firmware suggests
3068 	 * a different power state, use it instead.  If power management
3069 	 * is not present, the firmware is responsible for managing
3070 	 * device power.  Skip children who aren't attached since they
3071 	 * are handled separately.
3072 	 */
3073 	pcib = device_get_parent(dev);
3074 	for (i = 0; i < numdevs; i++) {
3075 		child = devlist[i];
3076 		dinfo = device_get_ivars(child);
3077 		dstate = state;
3078 		if (device_is_attached(child) &&
3079 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3080 			pci_set_powerstate(child, dstate);
3081 	}
3082 }
3083 
3084 int
3085 pci_suspend(device_t dev)
3086 {
3087 	device_t child, *devlist;
3088 	struct pci_devinfo *dinfo;
3089 	int error, i, numdevs;
3090 
3091 	/*
3092 	 * Save the PCI configuration space for each child and set the
3093 	 * device in the appropriate power state for this sleep state.
3094 	 */
3095 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3096 		return (error);
3097 	for (i = 0; i < numdevs; i++) {
3098 		child = devlist[i];
3099 		dinfo = device_get_ivars(child);
3100 		pci_cfg_save(child, dinfo, 0);
3101 	}
3102 
3103 	/* Suspend devices before potentially powering them down. */
3104 	error = bus_generic_suspend(dev);
3105 	if (error) {
3106 		free(devlist, M_TEMP);
3107 		return (error);
3108 	}
3109 	if (pci_do_power_suspend)
3110 		pci_set_power_children(dev, devlist, numdevs,
3111 		    PCI_POWERSTATE_D3);
3112 	free(devlist, M_TEMP);
3113 	return (0);
3114 }
3115 
3116 int
3117 pci_resume(device_t dev)
3118 {
3119 	device_t child, *devlist;
3120 	struct pci_devinfo *dinfo;
3121 	int error, i, numdevs;
3122 
3123 	/*
3124 	 * Set each child to D0 and restore its PCI configuration space.
3125 	 */
3126 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3127 		return (error);
3128 	if (pci_do_power_resume)
3129 		pci_set_power_children(dev, devlist, numdevs,
3130 		    PCI_POWERSTATE_D0);
3131 
3132 	/* Now the device is powered up, restore its config space. */
3133 	for (i = 0; i < numdevs; i++) {
3134 		child = devlist[i];
3135 		dinfo = device_get_ivars(child);
3136 
3137 		pci_cfg_restore(child, dinfo);
3138 		if (!device_is_attached(child))
3139 			pci_cfg_save(child, dinfo, 1);
3140 	}
3141 
3142 	/*
3143 	 * Resume critical devices first, then everything else later.
3144 	 */
3145 	for (i = 0; i < numdevs; i++) {
3146 		child = devlist[i];
3147 		switch (pci_get_class(child)) {
3148 		case PCIC_DISPLAY:
3149 		case PCIC_MEMORY:
3150 		case PCIC_BRIDGE:
3151 		case PCIC_BASEPERIPH:
3152 			DEVICE_RESUME(child);
3153 			break;
3154 		}
3155 	}
3156 	for (i = 0; i < numdevs; i++) {
3157 		child = devlist[i];
3158 		switch (pci_get_class(child)) {
3159 		case PCIC_DISPLAY:
3160 		case PCIC_MEMORY:
3161 		case PCIC_BRIDGE:
3162 		case PCIC_BASEPERIPH:
3163 			break;
3164 		default:
3165 			DEVICE_RESUME(child);
3166 		}
3167 	}
3168 	free(devlist, M_TEMP);
3169 	return (0);
3170 }
3171 
3172 static void
3173 pci_load_vendor_data(void)
3174 {
3175 	caddr_t data;
3176 	void *ptr;
3177 	size_t sz;
3178 
3179 	data = preload_search_by_type("pci_vendor_data");
3180 	if (data != NULL) {
3181 		ptr = preload_fetch_addr(data);
3182 		sz = preload_fetch_size(data);
3183 		if (ptr != NULL && sz != 0) {
3184 			pci_vendordata = ptr;
3185 			pci_vendordata_size = sz;
3186 			/* terminate the database */
3187 			pci_vendordata[pci_vendordata_size] = '\n';
3188 		}
3189 	}
3190 }
3191 
3192 void
3193 pci_driver_added(device_t dev, driver_t *driver)
3194 {
3195 	int numdevs;
3196 	device_t *devlist;
3197 	device_t child;
3198 	struct pci_devinfo *dinfo;
3199 	int i;
3200 
3201 	if (bootverbose)
3202 		device_printf(dev, "driver added\n");
3203 	DEVICE_IDENTIFY(driver, dev);
3204 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3205 		return;
3206 	for (i = 0; i < numdevs; i++) {
3207 		child = devlist[i];
3208 		if (device_get_state(child) != DS_NOTPRESENT)
3209 			continue;
3210 		dinfo = device_get_ivars(child);
3211 		pci_print_verbose(dinfo);
3212 		if (bootverbose)
3213 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3214 		pci_cfg_restore(child, dinfo);
3215 		if (device_probe_and_attach(child) != 0)
3216 			pci_cfg_save(child, dinfo, 1);
3217 	}
3218 	free(devlist, M_TEMP);
3219 }
3220 
3221 int
3222 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3223     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3224 {
3225 	struct pci_devinfo *dinfo;
3226 	struct msix_table_entry *mte;
3227 	struct msix_vector *mv;
3228 	uint64_t addr;
3229 	uint32_t data;
3230 	void *cookie;
3231 	int error, rid;
3232 
3233 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3234 	    arg, &cookie);
3235 	if (error)
3236 		return (error);
3237 
3238 	/* If this is not a direct child, just bail out. */
3239 	if (device_get_parent(child) != dev) {
3240 		*cookiep = cookie;
3241 		return(0);
3242 	}
3243 
3244 	rid = rman_get_rid(irq);
3245 	if (rid == 0) {
3246 		/* Make sure that INTx is enabled */
3247 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3248 	} else {
3249 		/*
3250 		 * Check to see if the interrupt is MSI or MSI-X.
3251 		 * Ask our parent to map the MSI and give
3252 		 * us the address and data register values.
3253 		 * If we fail for some reason, teardown the
3254 		 * interrupt handler.
3255 		 */
3256 		dinfo = device_get_ivars(child);
3257 		if (dinfo->cfg.msi.msi_alloc > 0) {
3258 			if (dinfo->cfg.msi.msi_addr == 0) {
3259 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3260 			    ("MSI has handlers, but vectors not mapped"));
3261 				error = PCIB_MAP_MSI(device_get_parent(dev),
3262 				    child, rman_get_start(irq), &addr, &data);
3263 				if (error)
3264 					goto bad;
3265 				dinfo->cfg.msi.msi_addr = addr;
3266 				dinfo->cfg.msi.msi_data = data;
3267 			}
3268 			if (dinfo->cfg.msi.msi_handlers == 0)
3269 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3270 				    dinfo->cfg.msi.msi_data);
3271 			dinfo->cfg.msi.msi_handlers++;
3272 		} else {
3273 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3274 			    ("No MSI or MSI-X interrupts allocated"));
3275 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3276 			    ("MSI-X index too high"));
3277 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3278 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3279 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3280 			KASSERT(mv->mv_irq == rman_get_start(irq),
3281 			    ("IRQ mismatch"));
3282 			if (mv->mv_address == 0) {
3283 				KASSERT(mte->mte_handlers == 0,
3284 		    ("MSI-X table entry has handlers, but vector not mapped"));
3285 				error = PCIB_MAP_MSI(device_get_parent(dev),
3286 				    child, rman_get_start(irq), &addr, &data);
3287 				if (error)
3288 					goto bad;
3289 				mv->mv_address = addr;
3290 				mv->mv_data = data;
3291 			}
3292 			if (mte->mte_handlers == 0) {
3293 				pci_enable_msix(child, rid - 1, mv->mv_address,
3294 				    mv->mv_data);
3295 				pci_unmask_msix(child, rid - 1);
3296 			}
3297 			mte->mte_handlers++;
3298 		}
3299 
3300 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3301 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3302 	bad:
3303 		if (error) {
3304 			(void)bus_generic_teardown_intr(dev, child, irq,
3305 			    cookie);
3306 			return (error);
3307 		}
3308 	}
3309 	*cookiep = cookie;
3310 	return (0);
3311 }
3312 
3313 int
3314 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3315     void *cookie)
3316 {
3317 	struct msix_table_entry *mte;
3318 	struct resource_list_entry *rle;
3319 	struct pci_devinfo *dinfo;
3320 	int error, rid;
3321 
3322 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3323 		return (EINVAL);
3324 
3325 	/* If this isn't a direct child, just bail out */
3326 	if (device_get_parent(child) != dev)
3327 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3328 
3329 	rid = rman_get_rid(irq);
3330 	if (rid == 0) {
3331 		/* Mask INTx */
3332 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3333 	} else {
3334 		/*
3335 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3336 		 * decrement the appropriate handlers count and mask the
3337 		 * MSI-X message, or disable MSI messages if the count
3338 		 * drops to 0.
3339 		 */
3340 		dinfo = device_get_ivars(child);
3341 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3342 		if (rle->res != irq)
3343 			return (EINVAL);
3344 		if (dinfo->cfg.msi.msi_alloc > 0) {
3345 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3346 			    ("MSI-X index too high"));
3347 			if (dinfo->cfg.msi.msi_handlers == 0)
3348 				return (EINVAL);
3349 			dinfo->cfg.msi.msi_handlers--;
3350 			if (dinfo->cfg.msi.msi_handlers == 0)
3351 				pci_disable_msi(child);
3352 		} else {
3353 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3354 			    ("No MSI or MSI-X interrupts allocated"));
3355 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3356 			    ("MSI-X index too high"));
3357 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3358 			if (mte->mte_handlers == 0)
3359 				return (EINVAL);
3360 			mte->mte_handlers--;
3361 			if (mte->mte_handlers == 0)
3362 				pci_mask_msix(child, rid - 1);
3363 		}
3364 	}
3365 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3366 	if (rid > 0)
3367 		KASSERT(error == 0,
3368 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3369 	return (error);
3370 }
3371 
3372 int
3373 pci_print_child(device_t dev, device_t child)
3374 {
3375 	struct pci_devinfo *dinfo;
3376 	struct resource_list *rl;
3377 	int retval = 0;
3378 
3379 	dinfo = device_get_ivars(child);
3380 	rl = &dinfo->resources;
3381 
3382 	retval += bus_print_child_header(dev, child);
3383 
3384 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3385 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3386 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3387 	if (device_get_flags(dev))
3388 		retval += printf(" flags %#x", device_get_flags(dev));
3389 
3390 	retval += printf(" at device %d.%d", pci_get_slot(child),
3391 	    pci_get_function(child));
3392 
3393 	retval += bus_print_child_footer(dev, child);
3394 
3395 	return (retval);
3396 }
3397 
3398 static struct
3399 {
3400 	int	class;
3401 	int	subclass;
3402 	char	*desc;
3403 } pci_nomatch_tab[] = {
3404 	{PCIC_OLD,		-1,			"old"},
3405 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3406 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3407 	{PCIC_STORAGE,		-1,			"mass storage"},
3408 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3409 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3410 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3411 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3412 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3413 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3414 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3415 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3416 	{PCIC_NETWORK,		-1,			"network"},
3417 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3418 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3419 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3420 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3421 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3422 	{PCIC_DISPLAY,		-1,			"display"},
3423 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3424 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3425 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3426 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3427 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3428 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3429 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3430 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3431 	{PCIC_MEMORY,		-1,			"memory"},
3432 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3433 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3434 	{PCIC_BRIDGE,		-1,			"bridge"},
3435 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3436 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3437 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3438 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3439 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3440 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3441 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3442 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3443 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3444 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3445 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3446 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3447 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3448 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3449 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3450 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3451 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3452 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3453 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3454 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3455 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3456 	{PCIC_INPUTDEV,		-1,			"input device"},
3457 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3458 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3459 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3460 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3461 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3462 	{PCIC_DOCKING,		-1,			"docking station"},
3463 	{PCIC_PROCESSOR,	-1,			"processor"},
3464 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3465 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3466 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3467 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3468 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3469 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3470 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3471 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3472 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3473 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3474 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3475 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3476 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3477 	{PCIC_SATCOM,		-1,			"satellite communication"},
3478 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3479 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3480 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3481 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3482 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3483 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3484 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3485 	{PCIC_DASP,		-1,			"dasp"},
3486 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3487 	{0, 0,		NULL}
3488 };
3489 
3490 void
3491 pci_probe_nomatch(device_t dev, device_t child)
3492 {
3493 	int	i;
3494 	char	*cp, *scp, *device;
3495 
3496 	/*
3497 	 * Look for a listing for this device in a loaded device database.
3498 	 */
3499 	if ((device = pci_describe_device(child)) != NULL) {
3500 		device_printf(dev, "<%s>", device);
3501 		free(device, M_DEVBUF);
3502 	} else {
3503 		/*
3504 		 * Scan the class/subclass descriptions for a general
3505 		 * description.
3506 		 */
3507 		cp = "unknown";
3508 		scp = NULL;
3509 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3510 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3511 				if (pci_nomatch_tab[i].subclass == -1) {
3512 					cp = pci_nomatch_tab[i].desc;
3513 				} else if (pci_nomatch_tab[i].subclass ==
3514 				    pci_get_subclass(child)) {
3515 					scp = pci_nomatch_tab[i].desc;
3516 				}
3517 			}
3518 		}
3519 		device_printf(dev, "<%s%s%s>",
3520 		    cp ? cp : "",
3521 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3522 		    scp ? scp : "");
3523 	}
3524 	printf(" at device %d.%d (no driver attached)\n",
3525 	    pci_get_slot(child), pci_get_function(child));
3526 	pci_cfg_save(child, device_get_ivars(child), 1);
3527 	return;
3528 }
3529 
3530 /*
3531  * Parse the PCI device database, if loaded, and return a pointer to a
3532  * description of the device.
3533  *
3534  * The database is flat text formatted as follows:
3535  *
3536  * Any line not in a valid format is ignored.
3537  * Lines are terminated with newline '\n' characters.
3538  *
3539  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3540  * the vendor name.
3541  *
3542  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3543  * - devices cannot be listed without a corresponding VENDOR line.
3544  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3545  * another TAB, then the device name.
3546  */
3547 
3548 /*
3549  * Assuming (ptr) points to the beginning of a line in the database,
3550  * return the vendor or device and description of the next entry.
3551  * The value of (vendor) or (device) inappropriate for the entry type
3552  * is set to -1.  Returns nonzero at the end of the database.
3553  *
3554  * Note that this is slightly unrobust in the face of corrupt data;
3555  * we attempt to safeguard against this by spamming the end of the
3556  * database with a newline when we initialise.
3557  */
3558 static int
3559 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3560 {
3561 	char	*cp = *ptr;
3562 	int	left;
3563 
3564 	*device = -1;
3565 	*vendor = -1;
3566 	**desc = '\0';
3567 	for (;;) {
3568 		left = pci_vendordata_size - (cp - pci_vendordata);
3569 		if (left <= 0) {
3570 			*ptr = cp;
3571 			return(1);
3572 		}
3573 
3574 		/* vendor entry? */
3575 		if (*cp != '\t' &&
3576 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3577 			break;
3578 		/* device entry? */
3579 		if (*cp == '\t' &&
3580 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3581 			break;
3582 
3583 		/* skip to next line */
3584 		while (*cp != '\n' && left > 0) {
3585 			cp++;
3586 			left--;
3587 		}
3588 		if (*cp == '\n') {
3589 			cp++;
3590 			left--;
3591 		}
3592 	}
3593 	/* skip to next line */
3594 	while (*cp != '\n' && left > 0) {
3595 		cp++;
3596 		left--;
3597 	}
3598 	if (*cp == '\n' && left > 0)
3599 		cp++;
3600 	*ptr = cp;
3601 	return(0);
3602 }
3603 
3604 static char *
3605 pci_describe_device(device_t dev)
3606 {
3607 	int	vendor, device;
3608 	char	*desc, *vp, *dp, *line;
3609 
3610 	desc = vp = dp = NULL;
3611 
3612 	/*
3613 	 * If we have no vendor data, we can't do anything.
3614 	 */
3615 	if (pci_vendordata == NULL)
3616 		goto out;
3617 
3618 	/*
3619 	 * Scan the vendor data looking for this device
3620 	 */
3621 	line = pci_vendordata;
3622 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3623 		goto out;
3624 	for (;;) {
3625 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3626 			goto out;
3627 		if (vendor == pci_get_vendor(dev))
3628 			break;
3629 	}
3630 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3631 		goto out;
3632 	for (;;) {
3633 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3634 			*dp = 0;
3635 			break;
3636 		}
3637 		if (vendor != -1) {
3638 			*dp = 0;
3639 			break;
3640 		}
3641 		if (device == pci_get_device(dev))
3642 			break;
3643 	}
3644 	if (dp[0] == '\0')
3645 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3646 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3647 	    NULL)
3648 		sprintf(desc, "%s, %s", vp, dp);
3649  out:
3650 	if (vp != NULL)
3651 		free(vp, M_DEVBUF);
3652 	if (dp != NULL)
3653 		free(dp, M_DEVBUF);
3654 	return(desc);
3655 }
3656 
3657 int
3658 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3659 {
3660 	struct pci_devinfo *dinfo;
3661 	pcicfgregs *cfg;
3662 
3663 	dinfo = device_get_ivars(child);
3664 	cfg = &dinfo->cfg;
3665 
3666 	switch (which) {
3667 	case PCI_IVAR_ETHADDR:
3668 		/*
3669 		 * The generic accessor doesn't deal with failure, so
3670 		 * we set the return value, then return an error.
3671 		 */
3672 		*((uint8_t **) result) = NULL;
3673 		return (EINVAL);
3674 	case PCI_IVAR_SUBVENDOR:
3675 		*result = cfg->subvendor;
3676 		break;
3677 	case PCI_IVAR_SUBDEVICE:
3678 		*result = cfg->subdevice;
3679 		break;
3680 	case PCI_IVAR_VENDOR:
3681 		*result = cfg->vendor;
3682 		break;
3683 	case PCI_IVAR_DEVICE:
3684 		*result = cfg->device;
3685 		break;
3686 	case PCI_IVAR_DEVID:
3687 		*result = (cfg->device << 16) | cfg->vendor;
3688 		break;
3689 	case PCI_IVAR_CLASS:
3690 		*result = cfg->baseclass;
3691 		break;
3692 	case PCI_IVAR_SUBCLASS:
3693 		*result = cfg->subclass;
3694 		break;
3695 	case PCI_IVAR_PROGIF:
3696 		*result = cfg->progif;
3697 		break;
3698 	case PCI_IVAR_REVID:
3699 		*result = cfg->revid;
3700 		break;
3701 	case PCI_IVAR_INTPIN:
3702 		*result = cfg->intpin;
3703 		break;
3704 	case PCI_IVAR_IRQ:
3705 		*result = cfg->intline;
3706 		break;
3707 	case PCI_IVAR_DOMAIN:
3708 		*result = cfg->domain;
3709 		break;
3710 	case PCI_IVAR_BUS:
3711 		*result = cfg->bus;
3712 		break;
3713 	case PCI_IVAR_SLOT:
3714 		*result = cfg->slot;
3715 		break;
3716 	case PCI_IVAR_FUNCTION:
3717 		*result = cfg->func;
3718 		break;
3719 	case PCI_IVAR_CMDREG:
3720 		*result = cfg->cmdreg;
3721 		break;
3722 	case PCI_IVAR_CACHELNSZ:
3723 		*result = cfg->cachelnsz;
3724 		break;
3725 	case PCI_IVAR_MINGNT:
3726 		*result = cfg->mingnt;
3727 		break;
3728 	case PCI_IVAR_MAXLAT:
3729 		*result = cfg->maxlat;
3730 		break;
3731 	case PCI_IVAR_LATTIMER:
3732 		*result = cfg->lattimer;
3733 		break;
3734 	default:
3735 		return (ENOENT);
3736 	}
3737 	return (0);
3738 }
3739 
3740 int
3741 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3742 {
3743 	struct pci_devinfo *dinfo;
3744 
3745 	dinfo = device_get_ivars(child);
3746 
3747 	switch (which) {
3748 	case PCI_IVAR_INTPIN:
3749 		dinfo->cfg.intpin = value;
3750 		return (0);
3751 	case PCI_IVAR_ETHADDR:
3752 	case PCI_IVAR_SUBVENDOR:
3753 	case PCI_IVAR_SUBDEVICE:
3754 	case PCI_IVAR_VENDOR:
3755 	case PCI_IVAR_DEVICE:
3756 	case PCI_IVAR_DEVID:
3757 	case PCI_IVAR_CLASS:
3758 	case PCI_IVAR_SUBCLASS:
3759 	case PCI_IVAR_PROGIF:
3760 	case PCI_IVAR_REVID:
3761 	case PCI_IVAR_IRQ:
3762 	case PCI_IVAR_DOMAIN:
3763 	case PCI_IVAR_BUS:
3764 	case PCI_IVAR_SLOT:
3765 	case PCI_IVAR_FUNCTION:
3766 		return (EINVAL);	/* disallow for now */
3767 
3768 	default:
3769 		return (ENOENT);
3770 	}
3771 }
3772 
3773 
3774 #include "opt_ddb.h"
3775 #ifdef DDB
3776 #include <ddb/ddb.h>
3777 #include <sys/cons.h>
3778 
3779 /*
3780  * List resources based on pci map registers, used for within ddb
3781  */
3782 
3783 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3784 {
3785 	struct pci_devinfo *dinfo;
3786 	struct devlist *devlist_head;
3787 	struct pci_conf *p;
3788 	const char *name;
3789 	int i, error, none_count;
3790 
3791 	none_count = 0;
3792 	/* get the head of the device queue */
3793 	devlist_head = &pci_devq;
3794 
3795 	/*
3796 	 * Go through the list of devices and print out devices
3797 	 */
3798 	for (error = 0, i = 0,
3799 	     dinfo = STAILQ_FIRST(devlist_head);
3800 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3801 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3802 
3803 		/* Populate pd_name and pd_unit */
3804 		name = NULL;
3805 		if (dinfo->cfg.dev)
3806 			name = device_get_name(dinfo->cfg.dev);
3807 
3808 		p = &dinfo->conf;
3809 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3810 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3811 			(name && *name) ? name : "none",
3812 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3813 			none_count++,
3814 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3815 			p->pc_sel.pc_func, (p->pc_class << 16) |
3816 			(p->pc_subclass << 8) | p->pc_progif,
3817 			(p->pc_subdevice << 16) | p->pc_subvendor,
3818 			(p->pc_device << 16) | p->pc_vendor,
3819 			p->pc_revid, p->pc_hdr);
3820 	}
3821 }
3822 #endif /* DDB */
3823 
3824 static struct resource *
3825 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3826     u_long start, u_long end, u_long count, u_int flags)
3827 {
3828 	struct pci_devinfo *dinfo = device_get_ivars(child);
3829 	struct resource_list *rl = &dinfo->resources;
3830 	struct resource_list_entry *rle;
3831 	struct resource *res;
3832 	struct pci_map *pm;
3833 	pci_addr_t map, testval;
3834 	int mapsize;
3835 
3836 	res = NULL;
3837 	pm = pci_find_bar(child, *rid);
3838 	if (pm != NULL) {
3839 		/* This is a BAR that we failed to allocate earlier. */
3840 		mapsize = pm->pm_size;
3841 		map = pm->pm_value;
3842 	} else {
3843 		/*
3844 		 * Weed out the bogons, and figure out how large the
3845 		 * BAR/map is.  BARs that read back 0 here are bogus
3846 		 * and unimplemented.  Note: atapci in legacy mode are
3847 		 * special and handled elsewhere in the code.  If you
3848 		 * have a atapci device in legacy mode and it fails
3849 		 * here, that other code is broken.
3850 		 */
3851 		pci_read_bar(child, *rid, &map, &testval);
3852 
3853 		/*
3854 		 * Determine the size of the BAR and ignore BARs with a size
3855 		 * of 0.  Device ROM BARs use a different mask value.
3856 		 */
3857 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3858 			mapsize = pci_romsize(testval);
3859 		else
3860 			mapsize = pci_mapsize(testval);
3861 		if (mapsize == 0)
3862 			goto out;
3863 		pm = pci_add_bar(child, *rid, map, mapsize);
3864 	}
3865 
3866 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3867 		if (type != SYS_RES_MEMORY) {
3868 			if (bootverbose)
3869 				device_printf(dev,
3870 				    "child %s requested type %d for rid %#x,"
3871 				    " but the BAR says it is an memio\n",
3872 				    device_get_nameunit(child), type, *rid);
3873 			goto out;
3874 		}
3875 	} else {
3876 		if (type != SYS_RES_IOPORT) {
3877 			if (bootverbose)
3878 				device_printf(dev,
3879 				    "child %s requested type %d for rid %#x,"
3880 				    " but the BAR says it is an ioport\n",
3881 				    device_get_nameunit(child), type, *rid);
3882 			goto out;
3883 		}
3884 	}
3885 
3886 	/*
3887 	 * For real BARs, we need to override the size that
3888 	 * the driver requests, because that's what the BAR
3889 	 * actually uses and we would otherwise have a
3890 	 * situation where we might allocate the excess to
3891 	 * another driver, which won't work.
3892 	 */
3893 	count = (pci_addr_t)1 << mapsize;
3894 	if (RF_ALIGNMENT(flags) < mapsize)
3895 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3896 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3897 		flags |= RF_PREFETCHABLE;
3898 
3899 	/*
3900 	 * Allocate enough resource, and then write back the
3901 	 * appropriate BAR for that resource.
3902 	 */
3903 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3904 	    start, end, count, flags & ~RF_ACTIVE);
3905 	if (res == NULL) {
3906 		device_printf(child,
3907 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3908 		    count, *rid, type, start, end);
3909 		goto out;
3910 	}
3911 	resource_list_add(rl, type, *rid, start, end, count);
3912 	rle = resource_list_find(rl, type, *rid);
3913 	if (rle == NULL)
3914 		panic("pci_reserve_map: unexpectedly can't find resource.");
3915 	rle->res = res;
3916 	rle->start = rman_get_start(res);
3917 	rle->end = rman_get_end(res);
3918 	rle->count = count;
3919 	rle->flags = RLE_RESERVED;
3920 	if (bootverbose)
3921 		device_printf(child,
3922 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3923 		    count, *rid, type, rman_get_start(res));
3924 	map = rman_get_start(res);
3925 	pci_write_bar(child, pm, map);
3926 out:;
3927 	return (res);
3928 }
3929 
3930 
3931 struct resource *
3932 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3933 		   u_long start, u_long end, u_long count, u_int flags)
3934 {
3935 	struct pci_devinfo *dinfo = device_get_ivars(child);
3936 	struct resource_list *rl = &dinfo->resources;
3937 	struct resource_list_entry *rle;
3938 	struct resource *res;
3939 	pcicfgregs *cfg = &dinfo->cfg;
3940 
3941 	if (device_get_parent(child) != dev)
3942 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3943 		    type, rid, start, end, count, flags));
3944 
3945 	/*
3946 	 * Perform lazy resource allocation
3947 	 */
3948 	switch (type) {
3949 	case SYS_RES_IRQ:
3950 		/*
3951 		 * Can't alloc legacy interrupt once MSI messages have
3952 		 * been allocated.
3953 		 */
3954 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3955 		    cfg->msix.msix_alloc > 0))
3956 			return (NULL);
3957 
3958 		/*
3959 		 * If the child device doesn't have an interrupt
3960 		 * routed and is deserving of an interrupt, try to
3961 		 * assign it one.
3962 		 */
3963 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3964 		    (cfg->intpin != 0))
3965 			pci_assign_interrupt(dev, child, 0);
3966 		break;
3967 	case SYS_RES_IOPORT:
3968 	case SYS_RES_MEMORY:
3969 		/* Reserve resources for this BAR if needed. */
3970 		rle = resource_list_find(rl, type, *rid);
3971 		if (rle == NULL) {
3972 			res = pci_reserve_map(dev, child, type, rid, start, end,
3973 			    count, flags);
3974 			if (res == NULL)
3975 				return (NULL);
3976 		}
3977 	}
3978 	return (resource_list_alloc(rl, dev, child, type, rid,
3979 	    start, end, count, flags));
3980 }
3981 
3982 int
3983 pci_activate_resource(device_t dev, device_t child, int type, int rid,
3984     struct resource *r)
3985 {
3986 	struct pci_devinfo *dinfo;
3987 	int error;
3988 
3989 	error = bus_generic_activate_resource(dev, child, type, rid, r);
3990 	if (error)
3991 		return (error);
3992 
3993 	/* Enable decoding in the command register when activating BARs. */
3994 	if (device_get_parent(child) == dev) {
3995 		/* Device ROMs need their decoding explicitly enabled. */
3996 		dinfo = device_get_ivars(child);
3997 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
3998 			pci_write_bar(child, pci_find_bar(child, rid),
3999 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4000 		switch (type) {
4001 		case SYS_RES_IOPORT:
4002 		case SYS_RES_MEMORY:
4003 			error = PCI_ENABLE_IO(dev, child, type);
4004 			break;
4005 		}
4006 	}
4007 	return (error);
4008 }
4009 
4010 int
4011 pci_deactivate_resource(device_t dev, device_t child, int type,
4012     int rid, struct resource *r)
4013 {
4014 	struct pci_devinfo *dinfo;
4015 	int error;
4016 
4017 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4018 	if (error)
4019 		return (error);
4020 
4021 	/* Disable decoding for device ROMs. */
4022 	if (device_get_parent(child) == dev) {
4023 		dinfo = device_get_ivars(child);
4024 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4025 			pci_write_bar(child, pci_find_bar(child, rid),
4026 			    rman_get_start(r));
4027 	}
4028 	return (0);
4029 }
4030 
4031 void
4032 pci_delete_child(device_t dev, device_t child)
4033 {
4034 	struct resource_list_entry *rle;
4035 	struct resource_list *rl;
4036 	struct pci_devinfo *dinfo;
4037 
4038 	dinfo = device_get_ivars(child);
4039 	rl = &dinfo->resources;
4040 
4041 	if (device_is_attached(child))
4042 		device_detach(child);
4043 
4044 	/* Turn off access to resources we're about to free */
4045 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4046 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4047 
4048 	/* Free all allocated resources */
4049 	STAILQ_FOREACH(rle, rl, link) {
4050 		if (rle->res) {
4051 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4052 			    resource_list_busy(rl, rle->type, rle->rid)) {
4053 				pci_printf(&dinfo->cfg,
4054 				    "Resource still owned, oops. "
4055 				    "(type=%d, rid=%d, addr=%lx)\n",
4056 				    rle->type, rle->rid,
4057 				    rman_get_start(rle->res));
4058 				bus_release_resource(child, rle->type, rle->rid,
4059 				    rle->res);
4060 			}
4061 			resource_list_unreserve(rl, dev, child, rle->type,
4062 			    rle->rid);
4063 		}
4064 	}
4065 	resource_list_free(rl);
4066 
4067 	device_delete_child(dev, child);
4068 	pci_freecfg(dinfo);
4069 }
4070 
4071 void
4072 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4073 {
4074 	struct pci_devinfo *dinfo;
4075 	struct resource_list *rl;
4076 	struct resource_list_entry *rle;
4077 
4078 	if (device_get_parent(child) != dev)
4079 		return;
4080 
4081 	dinfo = device_get_ivars(child);
4082 	rl = &dinfo->resources;
4083 	rle = resource_list_find(rl, type, rid);
4084 	if (rle == NULL)
4085 		return;
4086 
4087 	if (rle->res) {
4088 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4089 		    resource_list_busy(rl, type, rid)) {
4090 			device_printf(dev, "delete_resource: "
4091 			    "Resource still owned by child, oops. "
4092 			    "(type=%d, rid=%d, addr=%lx)\n",
4093 			    type, rid, rman_get_start(rle->res));
4094 			return;
4095 		}
4096 
4097 #ifndef __PCI_BAR_ZERO_VALID
4098 		/*
4099 		 * If this is a BAR, clear the BAR so it stops
4100 		 * decoding before releasing the resource.
4101 		 */
4102 		switch (type) {
4103 		case SYS_RES_IOPORT:
4104 		case SYS_RES_MEMORY:
4105 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4106 			break;
4107 		}
4108 #endif
4109 		resource_list_unreserve(rl, dev, child, type, rid);
4110 	}
4111 	resource_list_delete(rl, type, rid);
4112 }
4113 
4114 struct resource_list *
4115 pci_get_resource_list (device_t dev, device_t child)
4116 {
4117 	struct pci_devinfo *dinfo = device_get_ivars(child);
4118 
4119 	return (&dinfo->resources);
4120 }
4121 
4122 uint32_t
4123 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4124 {
4125 	struct pci_devinfo *dinfo = device_get_ivars(child);
4126 	pcicfgregs *cfg = &dinfo->cfg;
4127 
4128 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4129 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4130 }
4131 
4132 void
4133 pci_write_config_method(device_t dev, device_t child, int reg,
4134     uint32_t val, int width)
4135 {
4136 	struct pci_devinfo *dinfo = device_get_ivars(child);
4137 	pcicfgregs *cfg = &dinfo->cfg;
4138 
4139 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4140 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4141 }
4142 
4143 int
4144 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4145     size_t buflen)
4146 {
4147 
4148 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4149 	    pci_get_function(child));
4150 	return (0);
4151 }
4152 
4153 int
4154 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4155     size_t buflen)
4156 {
4157 	struct pci_devinfo *dinfo;
4158 	pcicfgregs *cfg;
4159 
4160 	dinfo = device_get_ivars(child);
4161 	cfg = &dinfo->cfg;
4162 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4163 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4164 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4165 	    cfg->progif);
4166 	return (0);
4167 }
4168 
4169 int
4170 pci_assign_interrupt_method(device_t dev, device_t child)
4171 {
4172 	struct pci_devinfo *dinfo = device_get_ivars(child);
4173 	pcicfgregs *cfg = &dinfo->cfg;
4174 
4175 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4176 	    cfg->intpin));
4177 }
4178 
4179 static int
4180 pci_modevent(module_t mod, int what, void *arg)
4181 {
4182 	static struct cdev *pci_cdev;
4183 
4184 	switch (what) {
4185 	case MOD_LOAD:
4186 		STAILQ_INIT(&pci_devq);
4187 		pci_generation = 0;
4188 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4189 		    "pci");
4190 		pci_load_vendor_data();
4191 		break;
4192 
4193 	case MOD_UNLOAD:
4194 		destroy_dev(pci_cdev);
4195 		break;
4196 	}
4197 
4198 	return (0);
4199 }
4200 
4201 void
4202 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4203 {
4204 
4205 	/*
4206 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4207 	 * which we know need special treatment.  Type 2 devices are
4208 	 * cardbus bridges which also require special treatment.
4209 	 * Other types are unknown, and we err on the side of safety
4210 	 * by ignoring them.
4211 	 */
4212 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4213 		return;
4214 
4215 	/*
4216 	 * Restore the device to full power mode.  We must do this
4217 	 * before we restore the registers because moving from D3 to
4218 	 * D0 will cause the chip's BARs and some other registers to
4219 	 * be reset to some unknown power on reset values.  Cut down
4220 	 * the noise on boot by doing nothing if we are already in
4221 	 * state D0.
4222 	 */
4223 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4224 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4225 	pci_restore_bars(dev);
4226 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4227 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4228 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4229 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4230 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4231 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4232 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4233 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4234 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4235 
4236 	/* Restore MSI and MSI-X configurations if they are present. */
4237 	if (dinfo->cfg.msi.msi_location != 0)
4238 		pci_resume_msi(dev);
4239 	if (dinfo->cfg.msix.msix_location != 0)
4240 		pci_resume_msix(dev);
4241 }
4242 
4243 void
4244 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4245 {
4246 	uint32_t cls;
4247 	int ps;
4248 
4249 	/*
4250 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4251 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4252 	 * which also require special treatment.  Other types are unknown, and
4253 	 * we err on the side of safety by ignoring them.  Powering down
4254 	 * bridges should not be undertaken lightly.
4255 	 */
4256 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4257 		return;
4258 
4259 	/*
4260 	 * Some drivers apparently write to these registers w/o updating our
4261 	 * cached copy.  No harm happens if we update the copy, so do so here
4262 	 * so we can restore them.  The COMMAND register is modified by the
4263 	 * bus w/o updating the cache.  This should represent the normally
4264 	 * writable portion of the 'defined' part of type 0 headers.  In
4265 	 * theory we also need to save/restore the PCI capability structures
4266 	 * we know about, but apart from power we don't know any that are
4267 	 * writable.
4268 	 */
4269 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4270 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4271 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4272 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4273 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4274 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4275 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4276 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4277 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4278 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4279 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4280 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4281 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4282 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4283 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4284 
4285 	/*
4286 	 * don't set the state for display devices, base peripherals and
4287 	 * memory devices since bad things happen when they are powered down.
4288 	 * We should (a) have drivers that can easily detach and (b) use
4289 	 * generic drivers for these devices so that some device actually
4290 	 * attaches.  We need to make sure that when we implement (a) we don't
4291 	 * power the device down on a reattach.
4292 	 */
4293 	cls = pci_get_class(dev);
4294 	if (!setstate)
4295 		return;
4296 	switch (pci_do_power_nodriver)
4297 	{
4298 		case 0:		/* NO powerdown at all */
4299 			return;
4300 		case 1:		/* Conservative about what to power down */
4301 			if (cls == PCIC_STORAGE)
4302 				return;
4303 			/*FALLTHROUGH*/
4304 		case 2:		/* Agressive about what to power down */
4305 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4306 			    cls == PCIC_BASEPERIPH)
4307 				return;
4308 			/*FALLTHROUGH*/
4309 		case 3:		/* Power down everything */
4310 			break;
4311 	}
4312 	/*
4313 	 * PCI spec says we can only go into D3 state from D0 state.
4314 	 * Transition from D[12] into D0 before going to D3 state.
4315 	 */
4316 	ps = pci_get_powerstate(dev);
4317 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4318 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4319 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4320 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4321 }
4322