xref: /freebsd/sys/dev/pci/pci.c (revision 59c7ad52aaa5b26e503871334672af0f58f9c2e8)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/ehcireg.h>
66 #include <dev/usb/controller/ohcireg.h>
67 #include <dev/usb/controller/uhcireg.h>
68 
69 #include "pcib_if.h"
70 #include "pci_if.h"
71 
72 #define	PCIR_IS_BIOS(cfg, reg)						\
73 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
74 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
75 
76 
77 static pci_addr_t	pci_mapbase(uint64_t mapreg);
78 static const char	*pci_maptype(uint64_t mapreg);
79 static int		pci_mapsize(uint64_t testval);
80 static int		pci_maprange(uint64_t mapreg);
81 static pci_addr_t	pci_rombase(uint64_t mapreg);
82 static int		pci_romsize(uint64_t testval);
83 static void		pci_fixancient(pcicfgregs *cfg);
84 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
85 
86 static int		pci_porten(device_t dev);
87 static int		pci_memen(device_t dev);
88 static void		pci_assign_interrupt(device_t bus, device_t dev,
89 			    int force_route);
90 static int		pci_add_map(device_t bus, device_t dev, int reg,
91 			    struct resource_list *rl, int force, int prefetch);
92 static int		pci_probe(device_t dev);
93 static int		pci_attach(device_t dev);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_enable_msix(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix(device_t dev, u_int index);
115 static void		pci_unmask_msix(device_t dev, u_int index);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pci_remap_intr_method(device_t bus, device_t dev,
120 			    u_int irq);
121 
122 static device_method_t pci_methods[] = {
123 	/* Device interface */
124 	DEVMETHOD(device_probe,		pci_probe),
125 	DEVMETHOD(device_attach,	pci_attach),
126 	DEVMETHOD(device_detach,	bus_generic_detach),
127 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
128 	DEVMETHOD(device_suspend,	pci_suspend),
129 	DEVMETHOD(device_resume,	pci_resume),
130 
131 	/* Bus interface */
132 	DEVMETHOD(bus_print_child,	pci_print_child),
133 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
134 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
135 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
136 	DEVMETHOD(bus_driver_added,	pci_driver_added),
137 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
138 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
139 
140 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
141 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
142 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
143 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
144 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
145 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
146 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
147 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
148 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
149 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
150 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
151 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
152 
153 	/* PCI interface */
154 	DEVMETHOD(pci_read_config,	pci_read_config_method),
155 	DEVMETHOD(pci_write_config,	pci_write_config_method),
156 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
157 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
158 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
159 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
160 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
161 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
162 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
163 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
164 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
165 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
166 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
167 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
168 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
169 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
170 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
171 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
172 
173 	{ 0, 0 }
174 };
175 
176 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
177 
178 static devclass_t pci_devclass;
179 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
180 MODULE_VERSION(pci, 1);
181 
182 static char	*pci_vendordata;
183 static size_t	pci_vendordata_size;
184 
185 
186 struct pci_quirk {
187 	uint32_t devid;	/* Vendor/device of the card */
188 	int	type;
189 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
190 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
191 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
192 	int	arg1;
193 	int	arg2;
194 };
195 
196 struct pci_quirk pci_quirks[] = {
197 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
198 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
199 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
201 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
202 
203 	/*
204 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
205 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
206 	 */
207 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 
210 	/*
211 	 * MSI doesn't work on earlier Intel chipsets including
212 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
213 	 */
214 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
215 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 
222 	/*
223 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
224 	 * bridge.
225 	 */
226 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 
228 	/*
229 	 * Some virtualization environments emulate an older chipset
230 	 * but support MSI just fine.  QEMU uses the Intel 82440.
231 	 */
232 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
233 
234 	{ 0 }
235 };
236 
237 /* map register information */
238 #define	PCI_MAPMEM	0x01	/* memory map */
239 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
240 #define	PCI_MAPPORT	0x04	/* port map */
241 
242 struct devlist pci_devq;
243 uint32_t pci_generation;
244 uint32_t pci_numdevs = 0;
245 static int pcie_chipset, pcix_chipset;
246 
247 /* sysctl vars */
248 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
249 
250 static int pci_enable_io_modes = 1;
251 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
252 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
253     &pci_enable_io_modes, 1,
254     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
255 enable these bits correctly.  We'd like to do this all the time, but there\n\
256 are some peripherals that this causes problems with.");
257 
258 static int pci_do_power_nodriver = 0;
259 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
260 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
261     &pci_do_power_nodriver, 0,
262   "Place a function into D3 state when no driver attaches to it.  0 means\n\
263 disable.  1 means conservatively place devices into D3 state.  2 means\n\
264 agressively place devices into D3 state.  3 means put absolutely everything\n\
265 in D3 state.");
266 
267 int pci_do_power_resume = 1;
268 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
269 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
270     &pci_do_power_resume, 1,
271   "Transition from D3 -> D0 on resume.");
272 
273 int pci_do_power_suspend = 1;
274 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
275 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
276     &pci_do_power_suspend, 1,
277   "Transition from D0 -> D3 on suspend.");
278 
279 static int pci_do_msi = 1;
280 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
281 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
282     "Enable support for MSI interrupts");
283 
284 static int pci_do_msix = 1;
285 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
286 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
287     "Enable support for MSI-X interrupts");
288 
289 static int pci_honor_msi_blacklist = 1;
290 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
291 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
292     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
293 
294 #if defined(__i386__) || defined(__amd64__)
295 static int pci_usb_takeover = 1;
296 #else
297 static int pci_usb_takeover = 0;
298 #endif
299 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
300 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD | CTLFLAG_TUN,
301     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
302 Disable this if you depend on BIOS emulation of USB devices, that is\n\
303 you use USB devices (like keyboard or mouse) but do not load USB drivers");
304 
305 /* Find a device_t by bus/slot/function in domain 0 */
306 
307 device_t
308 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
309 {
310 
311 	return (pci_find_dbsf(0, bus, slot, func));
312 }
313 
314 /* Find a device_t by domain/bus/slot/function */
315 
316 device_t
317 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
318 {
319 	struct pci_devinfo *dinfo;
320 
321 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
322 		if ((dinfo->cfg.domain == domain) &&
323 		    (dinfo->cfg.bus == bus) &&
324 		    (dinfo->cfg.slot == slot) &&
325 		    (dinfo->cfg.func == func)) {
326 			return (dinfo->cfg.dev);
327 		}
328 	}
329 
330 	return (NULL);
331 }
332 
333 /* Find a device_t by vendor/device ID */
334 
335 device_t
336 pci_find_device(uint16_t vendor, uint16_t device)
337 {
338 	struct pci_devinfo *dinfo;
339 
340 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
341 		if ((dinfo->cfg.vendor == vendor) &&
342 		    (dinfo->cfg.device == device)) {
343 			return (dinfo->cfg.dev);
344 		}
345 	}
346 
347 	return (NULL);
348 }
349 
350 static int
351 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
352 {
353 	va_list ap;
354 	int retval;
355 
356 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
357 	    cfg->func);
358 	va_start(ap, fmt);
359 	retval += vprintf(fmt, ap);
360 	va_end(ap);
361 	return (retval);
362 }
363 
364 /* return base address of memory or port map */
365 
366 static pci_addr_t
367 pci_mapbase(uint64_t mapreg)
368 {
369 
370 	if (PCI_BAR_MEM(mapreg))
371 		return (mapreg & PCIM_BAR_MEM_BASE);
372 	else
373 		return (mapreg & PCIM_BAR_IO_BASE);
374 }
375 
376 /* return map type of memory or port map */
377 
378 static const char *
379 pci_maptype(uint64_t mapreg)
380 {
381 
382 	if (PCI_BAR_IO(mapreg))
383 		return ("I/O Port");
384 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
385 		return ("Prefetchable Memory");
386 	return ("Memory");
387 }
388 
389 /* return log2 of map size decoded for memory or port map */
390 
391 static int
392 pci_mapsize(uint64_t testval)
393 {
394 	int ln2size;
395 
396 	testval = pci_mapbase(testval);
397 	ln2size = 0;
398 	if (testval != 0) {
399 		while ((testval & 1) == 0)
400 		{
401 			ln2size++;
402 			testval >>= 1;
403 		}
404 	}
405 	return (ln2size);
406 }
407 
408 /* return base address of device ROM */
409 
410 static pci_addr_t
411 pci_rombase(uint64_t mapreg)
412 {
413 
414 	return (mapreg & PCIM_BIOS_ADDR_MASK);
415 }
416 
417 /* return log2 of map size decided for device ROM */
418 
419 static int
420 pci_romsize(uint64_t testval)
421 {
422 	int ln2size;
423 
424 	testval = pci_rombase(testval);
425 	ln2size = 0;
426 	if (testval != 0) {
427 		while ((testval & 1) == 0)
428 		{
429 			ln2size++;
430 			testval >>= 1;
431 		}
432 	}
433 	return (ln2size);
434 }
435 
436 /* return log2 of address range supported by map register */
437 
438 static int
439 pci_maprange(uint64_t mapreg)
440 {
441 	int ln2range = 0;
442 
443 	if (PCI_BAR_IO(mapreg))
444 		ln2range = 32;
445 	else
446 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
447 		case PCIM_BAR_MEM_32:
448 			ln2range = 32;
449 			break;
450 		case PCIM_BAR_MEM_1MB:
451 			ln2range = 20;
452 			break;
453 		case PCIM_BAR_MEM_64:
454 			ln2range = 64;
455 			break;
456 		}
457 	return (ln2range);
458 }
459 
460 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
461 
462 static void
463 pci_fixancient(pcicfgregs *cfg)
464 {
465 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
466 		return;
467 
468 	/* PCI to PCI bridges use header type 1 */
469 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
470 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
471 }
472 
473 /* extract header type specific config data */
474 
475 static void
476 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
477 {
478 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
479 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
480 	case PCIM_HDRTYPE_NORMAL:
481 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
482 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
483 		cfg->nummaps	    = PCI_MAXMAPS_0;
484 		break;
485 	case PCIM_HDRTYPE_BRIDGE:
486 		cfg->nummaps	    = PCI_MAXMAPS_1;
487 		break;
488 	case PCIM_HDRTYPE_CARDBUS:
489 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
490 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
491 		cfg->nummaps	    = PCI_MAXMAPS_2;
492 		break;
493 	}
494 #undef REG
495 }
496 
497 /* read configuration header into pcicfgregs structure */
498 struct pci_devinfo *
499 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
500 {
501 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
502 	pcicfgregs *cfg = NULL;
503 	struct pci_devinfo *devlist_entry;
504 	struct devlist *devlist_head;
505 
506 	devlist_head = &pci_devq;
507 
508 	devlist_entry = NULL;
509 
510 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
511 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
512 		if (devlist_entry == NULL)
513 			return (NULL);
514 
515 		cfg = &devlist_entry->cfg;
516 
517 		cfg->domain		= d;
518 		cfg->bus		= b;
519 		cfg->slot		= s;
520 		cfg->func		= f;
521 		cfg->vendor		= REG(PCIR_VENDOR, 2);
522 		cfg->device		= REG(PCIR_DEVICE, 2);
523 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
524 		cfg->statreg		= REG(PCIR_STATUS, 2);
525 		cfg->baseclass		= REG(PCIR_CLASS, 1);
526 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
527 		cfg->progif		= REG(PCIR_PROGIF, 1);
528 		cfg->revid		= REG(PCIR_REVID, 1);
529 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
530 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
531 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
532 		cfg->intpin		= REG(PCIR_INTPIN, 1);
533 		cfg->intline		= REG(PCIR_INTLINE, 1);
534 
535 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
536 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
537 
538 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
539 		cfg->hdrtype		&= ~PCIM_MFDEV;
540 		STAILQ_INIT(&cfg->maps);
541 
542 		pci_fixancient(cfg);
543 		pci_hdrtypedata(pcib, b, s, f, cfg);
544 
545 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
546 			pci_read_cap(pcib, cfg);
547 
548 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
549 
550 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
551 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
552 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
553 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
554 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
555 
556 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
557 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
558 		devlist_entry->conf.pc_vendor = cfg->vendor;
559 		devlist_entry->conf.pc_device = cfg->device;
560 
561 		devlist_entry->conf.pc_class = cfg->baseclass;
562 		devlist_entry->conf.pc_subclass = cfg->subclass;
563 		devlist_entry->conf.pc_progif = cfg->progif;
564 		devlist_entry->conf.pc_revid = cfg->revid;
565 
566 		pci_numdevs++;
567 		pci_generation++;
568 	}
569 	return (devlist_entry);
570 #undef REG
571 }
572 
573 static void
574 pci_read_cap(device_t pcib, pcicfgregs *cfg)
575 {
576 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
577 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
578 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
579 	uint64_t addr;
580 #endif
581 	uint32_t val;
582 	int	ptr, nextptr, ptrptr;
583 
584 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
585 	case PCIM_HDRTYPE_NORMAL:
586 	case PCIM_HDRTYPE_BRIDGE:
587 		ptrptr = PCIR_CAP_PTR;
588 		break;
589 	case PCIM_HDRTYPE_CARDBUS:
590 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
591 		break;
592 	default:
593 		return;		/* no extended capabilities support */
594 	}
595 	nextptr = REG(ptrptr, 1);	/* sanity check? */
596 
597 	/*
598 	 * Read capability entries.
599 	 */
600 	while (nextptr != 0) {
601 		/* Sanity check */
602 		if (nextptr > 255) {
603 			printf("illegal PCI extended capability offset %d\n",
604 			    nextptr);
605 			return;
606 		}
607 		/* Find the next entry */
608 		ptr = nextptr;
609 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
610 
611 		/* Process this entry */
612 		switch (REG(ptr + PCICAP_ID, 1)) {
613 		case PCIY_PMG:		/* PCI power management */
614 			if (cfg->pp.pp_cap == 0) {
615 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
616 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
617 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
618 				if ((nextptr - ptr) > PCIR_POWER_DATA)
619 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
620 			}
621 			break;
622 		case PCIY_HT:		/* HyperTransport */
623 			/* Determine HT-specific capability type. */
624 			val = REG(ptr + PCIR_HT_COMMAND, 2);
625 
626 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
627 				cfg->ht.ht_slave = ptr;
628 
629 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
630 			switch (val & PCIM_HTCMD_CAP_MASK) {
631 			case PCIM_HTCAP_MSI_MAPPING:
632 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
633 					/* Sanity check the mapping window. */
634 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
635 					    4);
636 					addr <<= 32;
637 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
638 					    4);
639 					if (addr != MSI_INTEL_ADDR_BASE)
640 						device_printf(pcib,
641 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
642 						    cfg->domain, cfg->bus,
643 						    cfg->slot, cfg->func,
644 						    (long long)addr);
645 				} else
646 					addr = MSI_INTEL_ADDR_BASE;
647 
648 				cfg->ht.ht_msimap = ptr;
649 				cfg->ht.ht_msictrl = val;
650 				cfg->ht.ht_msiaddr = addr;
651 				break;
652 			}
653 #endif
654 			break;
655 		case PCIY_MSI:		/* PCI MSI */
656 			cfg->msi.msi_location = ptr;
657 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
658 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
659 						     PCIM_MSICTRL_MMC_MASK)>>1);
660 			break;
661 		case PCIY_MSIX:		/* PCI MSI-X */
662 			cfg->msix.msix_location = ptr;
663 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
664 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
665 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
666 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
667 			cfg->msix.msix_table_bar = PCIR_BAR(val &
668 			    PCIM_MSIX_BIR_MASK);
669 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
670 			val = REG(ptr + PCIR_MSIX_PBA, 4);
671 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
672 			    PCIM_MSIX_BIR_MASK);
673 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
674 			break;
675 		case PCIY_VPD:		/* PCI Vital Product Data */
676 			cfg->vpd.vpd_reg = ptr;
677 			break;
678 		case PCIY_SUBVENDOR:
679 			/* Should always be true. */
680 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
681 			    PCIM_HDRTYPE_BRIDGE) {
682 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
683 				cfg->subvendor = val & 0xffff;
684 				cfg->subdevice = val >> 16;
685 			}
686 			break;
687 		case PCIY_PCIX:		/* PCI-X */
688 			/*
689 			 * Assume we have a PCI-X chipset if we have
690 			 * at least one PCI-PCI bridge with a PCI-X
691 			 * capability.  Note that some systems with
692 			 * PCI-express or HT chipsets might match on
693 			 * this check as well.
694 			 */
695 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
696 			    PCIM_HDRTYPE_BRIDGE)
697 				pcix_chipset = 1;
698 			break;
699 		case PCIY_EXPRESS:	/* PCI-express */
700 			/*
701 			 * Assume we have a PCI-express chipset if we have
702 			 * at least one PCI-express device.
703 			 */
704 			pcie_chipset = 1;
705 			break;
706 		default:
707 			break;
708 		}
709 	}
710 
711 
712 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
713 	/*
714 	 * Enable the MSI mapping window for all HyperTransport
715 	 * slaves.  PCI-PCI bridges have their windows enabled via
716 	 * PCIB_MAP_MSI().
717 	 */
718 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
719 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
720 		device_printf(pcib,
721 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
722 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
723 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
724 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
725 		     2);
726 	}
727 #endif
728 /* REG and WREG use carry through to next functions */
729 }
730 
731 /*
732  * PCI Vital Product Data
733  */
734 
735 #define	PCI_VPD_TIMEOUT		1000000
736 
737 static int
738 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
739 {
740 	int count = PCI_VPD_TIMEOUT;
741 
742 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
743 
744 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
745 
746 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
747 		if (--count < 0)
748 			return (ENXIO);
749 		DELAY(1);	/* limit looping */
750 	}
751 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
752 
753 	return (0);
754 }
755 
756 #if 0
757 static int
758 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
759 {
760 	int count = PCI_VPD_TIMEOUT;
761 
762 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
763 
764 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
765 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
766 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
767 		if (--count < 0)
768 			return (ENXIO);
769 		DELAY(1);	/* limit looping */
770 	}
771 
772 	return (0);
773 }
774 #endif
775 
776 #undef PCI_VPD_TIMEOUT
777 
778 struct vpd_readstate {
779 	device_t	pcib;
780 	pcicfgregs	*cfg;
781 	uint32_t	val;
782 	int		bytesinval;
783 	int		off;
784 	uint8_t		cksum;
785 };
786 
787 static int
788 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
789 {
790 	uint32_t reg;
791 	uint8_t byte;
792 
793 	if (vrs->bytesinval == 0) {
794 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
795 			return (ENXIO);
796 		vrs->val = le32toh(reg);
797 		vrs->off += 4;
798 		byte = vrs->val & 0xff;
799 		vrs->bytesinval = 3;
800 	} else {
801 		vrs->val = vrs->val >> 8;
802 		byte = vrs->val & 0xff;
803 		vrs->bytesinval--;
804 	}
805 
806 	vrs->cksum += byte;
807 	*data = byte;
808 	return (0);
809 }
810 
811 static void
812 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
813 {
814 	struct vpd_readstate vrs;
815 	int state;
816 	int name;
817 	int remain;
818 	int i;
819 	int alloc, off;		/* alloc/off for RO/W arrays */
820 	int cksumvalid;
821 	int dflen;
822 	uint8_t byte;
823 	uint8_t byte2;
824 
825 	/* init vpd reader */
826 	vrs.bytesinval = 0;
827 	vrs.off = 0;
828 	vrs.pcib = pcib;
829 	vrs.cfg = cfg;
830 	vrs.cksum = 0;
831 
832 	state = 0;
833 	name = remain = i = 0;	/* shut up stupid gcc */
834 	alloc = off = 0;	/* shut up stupid gcc */
835 	dflen = 0;		/* shut up stupid gcc */
836 	cksumvalid = -1;
837 	while (state >= 0) {
838 		if (vpd_nextbyte(&vrs, &byte)) {
839 			state = -2;
840 			break;
841 		}
842 #if 0
843 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
844 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
845 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
846 #endif
847 		switch (state) {
848 		case 0:		/* item name */
849 			if (byte & 0x80) {
850 				if (vpd_nextbyte(&vrs, &byte2)) {
851 					state = -2;
852 					break;
853 				}
854 				remain = byte2;
855 				if (vpd_nextbyte(&vrs, &byte2)) {
856 					state = -2;
857 					break;
858 				}
859 				remain |= byte2 << 8;
860 				if (remain > (0x7f*4 - vrs.off)) {
861 					state = -1;
862 					printf(
863 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
864 					    cfg->domain, cfg->bus, cfg->slot,
865 					    cfg->func, remain);
866 				}
867 				name = byte & 0x7f;
868 			} else {
869 				remain = byte & 0x7;
870 				name = (byte >> 3) & 0xf;
871 			}
872 			switch (name) {
873 			case 0x2:	/* String */
874 				cfg->vpd.vpd_ident = malloc(remain + 1,
875 				    M_DEVBUF, M_WAITOK);
876 				i = 0;
877 				state = 1;
878 				break;
879 			case 0xf:	/* End */
880 				state = -1;
881 				break;
882 			case 0x10:	/* VPD-R */
883 				alloc = 8;
884 				off = 0;
885 				cfg->vpd.vpd_ros = malloc(alloc *
886 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
887 				    M_WAITOK | M_ZERO);
888 				state = 2;
889 				break;
890 			case 0x11:	/* VPD-W */
891 				alloc = 8;
892 				off = 0;
893 				cfg->vpd.vpd_w = malloc(alloc *
894 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
895 				    M_WAITOK | M_ZERO);
896 				state = 5;
897 				break;
898 			default:	/* Invalid data, abort */
899 				state = -1;
900 				break;
901 			}
902 			break;
903 
904 		case 1:	/* Identifier String */
905 			cfg->vpd.vpd_ident[i++] = byte;
906 			remain--;
907 			if (remain == 0)  {
908 				cfg->vpd.vpd_ident[i] = '\0';
909 				state = 0;
910 			}
911 			break;
912 
913 		case 2:	/* VPD-R Keyword Header */
914 			if (off == alloc) {
915 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
916 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
917 				    M_DEVBUF, M_WAITOK | M_ZERO);
918 			}
919 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
920 			if (vpd_nextbyte(&vrs, &byte2)) {
921 				state = -2;
922 				break;
923 			}
924 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
925 			if (vpd_nextbyte(&vrs, &byte2)) {
926 				state = -2;
927 				break;
928 			}
929 			dflen = byte2;
930 			if (dflen == 0 &&
931 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
932 			    2) == 0) {
933 				/*
934 				 * if this happens, we can't trust the rest
935 				 * of the VPD.
936 				 */
937 				printf(
938 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
939 				    cfg->domain, cfg->bus, cfg->slot,
940 				    cfg->func, dflen);
941 				cksumvalid = 0;
942 				state = -1;
943 				break;
944 			} else if (dflen == 0) {
945 				cfg->vpd.vpd_ros[off].value = malloc(1 *
946 				    sizeof(*cfg->vpd.vpd_ros[off].value),
947 				    M_DEVBUF, M_WAITOK);
948 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
949 			} else
950 				cfg->vpd.vpd_ros[off].value = malloc(
951 				    (dflen + 1) *
952 				    sizeof(*cfg->vpd.vpd_ros[off].value),
953 				    M_DEVBUF, M_WAITOK);
954 			remain -= 3;
955 			i = 0;
956 			/* keep in sync w/ state 3's transistions */
957 			if (dflen == 0 && remain == 0)
958 				state = 0;
959 			else if (dflen == 0)
960 				state = 2;
961 			else
962 				state = 3;
963 			break;
964 
965 		case 3:	/* VPD-R Keyword Value */
966 			cfg->vpd.vpd_ros[off].value[i++] = byte;
967 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
968 			    "RV", 2) == 0 && cksumvalid == -1) {
969 				if (vrs.cksum == 0)
970 					cksumvalid = 1;
971 				else {
972 					if (bootverbose)
973 						printf(
974 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
975 						    cfg->domain, cfg->bus,
976 						    cfg->slot, cfg->func,
977 						    vrs.cksum);
978 					cksumvalid = 0;
979 					state = -1;
980 					break;
981 				}
982 			}
983 			dflen--;
984 			remain--;
985 			/* keep in sync w/ state 2's transistions */
986 			if (dflen == 0)
987 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
988 			if (dflen == 0 && remain == 0) {
989 				cfg->vpd.vpd_rocnt = off;
990 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
991 				    off * sizeof(*cfg->vpd.vpd_ros),
992 				    M_DEVBUF, M_WAITOK | M_ZERO);
993 				state = 0;
994 			} else if (dflen == 0)
995 				state = 2;
996 			break;
997 
998 		case 4:
999 			remain--;
1000 			if (remain == 0)
1001 				state = 0;
1002 			break;
1003 
1004 		case 5:	/* VPD-W Keyword Header */
1005 			if (off == alloc) {
1006 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1007 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1008 				    M_DEVBUF, M_WAITOK | M_ZERO);
1009 			}
1010 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1011 			if (vpd_nextbyte(&vrs, &byte2)) {
1012 				state = -2;
1013 				break;
1014 			}
1015 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1016 			if (vpd_nextbyte(&vrs, &byte2)) {
1017 				state = -2;
1018 				break;
1019 			}
1020 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1021 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1022 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1023 			    sizeof(*cfg->vpd.vpd_w[off].value),
1024 			    M_DEVBUF, M_WAITOK);
1025 			remain -= 3;
1026 			i = 0;
1027 			/* keep in sync w/ state 6's transistions */
1028 			if (dflen == 0 && remain == 0)
1029 				state = 0;
1030 			else if (dflen == 0)
1031 				state = 5;
1032 			else
1033 				state = 6;
1034 			break;
1035 
1036 		case 6:	/* VPD-W Keyword Value */
1037 			cfg->vpd.vpd_w[off].value[i++] = byte;
1038 			dflen--;
1039 			remain--;
1040 			/* keep in sync w/ state 5's transistions */
1041 			if (dflen == 0)
1042 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1043 			if (dflen == 0 && remain == 0) {
1044 				cfg->vpd.vpd_wcnt = off;
1045 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1046 				    off * sizeof(*cfg->vpd.vpd_w),
1047 				    M_DEVBUF, M_WAITOK | M_ZERO);
1048 				state = 0;
1049 			} else if (dflen == 0)
1050 				state = 5;
1051 			break;
1052 
1053 		default:
1054 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1055 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1056 			    state);
1057 			state = -1;
1058 			break;
1059 		}
1060 	}
1061 
1062 	if (cksumvalid == 0 || state < -1) {
1063 		/* read-only data bad, clean up */
1064 		if (cfg->vpd.vpd_ros != NULL) {
1065 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1066 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1067 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1068 			cfg->vpd.vpd_ros = NULL;
1069 		}
1070 	}
1071 	if (state < -1) {
1072 		/* I/O error, clean up */
1073 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1074 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1075 		if (cfg->vpd.vpd_ident != NULL) {
1076 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1077 			cfg->vpd.vpd_ident = NULL;
1078 		}
1079 		if (cfg->vpd.vpd_w != NULL) {
1080 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1081 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1082 			free(cfg->vpd.vpd_w, M_DEVBUF);
1083 			cfg->vpd.vpd_w = NULL;
1084 		}
1085 	}
1086 	cfg->vpd.vpd_cached = 1;
1087 #undef REG
1088 #undef WREG
1089 }
1090 
1091 int
1092 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1093 {
1094 	struct pci_devinfo *dinfo = device_get_ivars(child);
1095 	pcicfgregs *cfg = &dinfo->cfg;
1096 
1097 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1098 		pci_read_vpd(device_get_parent(dev), cfg);
1099 
1100 	*identptr = cfg->vpd.vpd_ident;
1101 
1102 	if (*identptr == NULL)
1103 		return (ENXIO);
1104 
1105 	return (0);
1106 }
1107 
1108 int
1109 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1110 	const char **vptr)
1111 {
1112 	struct pci_devinfo *dinfo = device_get_ivars(child);
1113 	pcicfgregs *cfg = &dinfo->cfg;
1114 	int i;
1115 
1116 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1117 		pci_read_vpd(device_get_parent(dev), cfg);
1118 
1119 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1120 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1121 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1122 			*vptr = cfg->vpd.vpd_ros[i].value;
1123 		}
1124 
1125 	if (i != cfg->vpd.vpd_rocnt)
1126 		return (0);
1127 
1128 	*vptr = NULL;
1129 	return (ENXIO);
1130 }
1131 
1132 /*
1133  * Find the requested extended capability and return the offset in
1134  * configuration space via the pointer provided. The function returns
1135  * 0 on success and error code otherwise.
1136  */
1137 int
1138 pci_find_extcap_method(device_t dev, device_t child, int capability,
1139     int *capreg)
1140 {
1141 	struct pci_devinfo *dinfo = device_get_ivars(child);
1142 	pcicfgregs *cfg = &dinfo->cfg;
1143 	u_int32_t status;
1144 	u_int8_t ptr;
1145 
1146 	/*
1147 	 * Check the CAP_LIST bit of the PCI status register first.
1148 	 */
1149 	status = pci_read_config(child, PCIR_STATUS, 2);
1150 	if (!(status & PCIM_STATUS_CAPPRESENT))
1151 		return (ENXIO);
1152 
1153 	/*
1154 	 * Determine the start pointer of the capabilities list.
1155 	 */
1156 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1157 	case PCIM_HDRTYPE_NORMAL:
1158 	case PCIM_HDRTYPE_BRIDGE:
1159 		ptr = PCIR_CAP_PTR;
1160 		break;
1161 	case PCIM_HDRTYPE_CARDBUS:
1162 		ptr = PCIR_CAP_PTR_2;
1163 		break;
1164 	default:
1165 		/* XXX: panic? */
1166 		return (ENXIO);		/* no extended capabilities support */
1167 	}
1168 	ptr = pci_read_config(child, ptr, 1);
1169 
1170 	/*
1171 	 * Traverse the capabilities list.
1172 	 */
1173 	while (ptr != 0) {
1174 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1175 			if (capreg != NULL)
1176 				*capreg = ptr;
1177 			return (0);
1178 		}
1179 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1180 	}
1181 
1182 	return (ENOENT);
1183 }
1184 
1185 /*
1186  * Support for MSI-X message interrupts.
1187  */
1188 void
1189 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1190 {
1191 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1192 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1193 	uint32_t offset;
1194 
1195 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1196 	offset = msix->msix_table_offset + index * 16;
1197 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1198 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1199 	bus_write_4(msix->msix_table_res, offset + 8, data);
1200 
1201 	/* Enable MSI -> HT mapping. */
1202 	pci_ht_map_msi(dev, address);
1203 }
1204 
1205 void
1206 pci_mask_msix(device_t dev, u_int index)
1207 {
1208 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1209 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1210 	uint32_t offset, val;
1211 
1212 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1213 	offset = msix->msix_table_offset + index * 16 + 12;
1214 	val = bus_read_4(msix->msix_table_res, offset);
1215 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1216 		val |= PCIM_MSIX_VCTRL_MASK;
1217 		bus_write_4(msix->msix_table_res, offset, val);
1218 	}
1219 }
1220 
1221 void
1222 pci_unmask_msix(device_t dev, u_int index)
1223 {
1224 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1225 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1226 	uint32_t offset, val;
1227 
1228 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1229 	offset = msix->msix_table_offset + index * 16 + 12;
1230 	val = bus_read_4(msix->msix_table_res, offset);
1231 	if (val & PCIM_MSIX_VCTRL_MASK) {
1232 		val &= ~PCIM_MSIX_VCTRL_MASK;
1233 		bus_write_4(msix->msix_table_res, offset, val);
1234 	}
1235 }
1236 
1237 int
1238 pci_pending_msix(device_t dev, u_int index)
1239 {
1240 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1241 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1242 	uint32_t offset, bit;
1243 
1244 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1245 	offset = msix->msix_pba_offset + (index / 32) * 4;
1246 	bit = 1 << index % 32;
1247 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1248 }
1249 
1250 /*
1251  * Restore MSI-X registers and table during resume.  If MSI-X is
1252  * enabled then walk the virtual table to restore the actual MSI-X
1253  * table.
1254  */
1255 static void
1256 pci_resume_msix(device_t dev)
1257 {
1258 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1259 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1260 	struct msix_table_entry *mte;
1261 	struct msix_vector *mv;
1262 	int i;
1263 
1264 	if (msix->msix_alloc > 0) {
1265 		/* First, mask all vectors. */
1266 		for (i = 0; i < msix->msix_msgnum; i++)
1267 			pci_mask_msix(dev, i);
1268 
1269 		/* Second, program any messages with at least one handler. */
1270 		for (i = 0; i < msix->msix_table_len; i++) {
1271 			mte = &msix->msix_table[i];
1272 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1273 				continue;
1274 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1275 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1276 			pci_unmask_msix(dev, i);
1277 		}
1278 	}
1279 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1280 	    msix->msix_ctrl, 2);
1281 }
1282 
1283 /*
1284  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1285  * returned in *count.  After this function returns, each message will be
1286  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1287  */
1288 int
1289 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1290 {
1291 	struct pci_devinfo *dinfo = device_get_ivars(child);
1292 	pcicfgregs *cfg = &dinfo->cfg;
1293 	struct resource_list_entry *rle;
1294 	int actual, error, i, irq, max;
1295 
1296 	/* Don't let count == 0 get us into trouble. */
1297 	if (*count == 0)
1298 		return (EINVAL);
1299 
1300 	/* If rid 0 is allocated, then fail. */
1301 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1302 	if (rle != NULL && rle->res != NULL)
1303 		return (ENXIO);
1304 
1305 	/* Already have allocated messages? */
1306 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1307 		return (ENXIO);
1308 
1309 	/* If MSI is blacklisted for this system, fail. */
1310 	if (pci_msi_blacklisted())
1311 		return (ENXIO);
1312 
1313 	/* MSI-X capability present? */
1314 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1315 		return (ENODEV);
1316 
1317 	/* Make sure the appropriate BARs are mapped. */
1318 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1319 	    cfg->msix.msix_table_bar);
1320 	if (rle == NULL || rle->res == NULL ||
1321 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1322 		return (ENXIO);
1323 	cfg->msix.msix_table_res = rle->res;
1324 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1325 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1326 		    cfg->msix.msix_pba_bar);
1327 		if (rle == NULL || rle->res == NULL ||
1328 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1329 			return (ENXIO);
1330 	}
1331 	cfg->msix.msix_pba_res = rle->res;
1332 
1333 	if (bootverbose)
1334 		device_printf(child,
1335 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1336 		    *count, cfg->msix.msix_msgnum);
1337 	max = min(*count, cfg->msix.msix_msgnum);
1338 	for (i = 0; i < max; i++) {
1339 		/* Allocate a message. */
1340 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1341 		if (error)
1342 			break;
1343 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1344 		    irq, 1);
1345 	}
1346 	actual = i;
1347 
1348 	if (bootverbose) {
1349 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1350 		if (actual == 1)
1351 			device_printf(child, "using IRQ %lu for MSI-X\n",
1352 			    rle->start);
1353 		else {
1354 			int run;
1355 
1356 			/*
1357 			 * Be fancy and try to print contiguous runs of
1358 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1359 			 * 'run' is true if we are in a range.
1360 			 */
1361 			device_printf(child, "using IRQs %lu", rle->start);
1362 			irq = rle->start;
1363 			run = 0;
1364 			for (i = 1; i < actual; i++) {
1365 				rle = resource_list_find(&dinfo->resources,
1366 				    SYS_RES_IRQ, i + 1);
1367 
1368 				/* Still in a run? */
1369 				if (rle->start == irq + 1) {
1370 					run = 1;
1371 					irq++;
1372 					continue;
1373 				}
1374 
1375 				/* Finish previous range. */
1376 				if (run) {
1377 					printf("-%d", irq);
1378 					run = 0;
1379 				}
1380 
1381 				/* Start new range. */
1382 				printf(",%lu", rle->start);
1383 				irq = rle->start;
1384 			}
1385 
1386 			/* Unfinished range? */
1387 			if (run)
1388 				printf("-%d", irq);
1389 			printf(" for MSI-X\n");
1390 		}
1391 	}
1392 
1393 	/* Mask all vectors. */
1394 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1395 		pci_mask_msix(child, i);
1396 
1397 	/* Allocate and initialize vector data and virtual table. */
1398 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1399 	    M_DEVBUF, M_WAITOK | M_ZERO);
1400 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1401 	    M_DEVBUF, M_WAITOK | M_ZERO);
1402 	for (i = 0; i < actual; i++) {
1403 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1404 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1405 		cfg->msix.msix_table[i].mte_vector = i + 1;
1406 	}
1407 
1408 	/* Update control register to enable MSI-X. */
1409 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1410 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1411 	    cfg->msix.msix_ctrl, 2);
1412 
1413 	/* Update counts of alloc'd messages. */
1414 	cfg->msix.msix_alloc = actual;
1415 	cfg->msix.msix_table_len = actual;
1416 	*count = actual;
1417 	return (0);
1418 }
1419 
1420 /*
1421  * By default, pci_alloc_msix() will assign the allocated IRQ
1422  * resources consecutively to the first N messages in the MSI-X table.
1423  * However, device drivers may want to use different layouts if they
1424  * either receive fewer messages than they asked for, or they wish to
1425  * populate the MSI-X table sparsely.  This method allows the driver
1426  * to specify what layout it wants.  It must be called after a
1427  * successful pci_alloc_msix() but before any of the associated
1428  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1429  *
1430  * The 'vectors' array contains 'count' message vectors.  The array
1431  * maps directly to the MSI-X table in that index 0 in the array
1432  * specifies the vector for the first message in the MSI-X table, etc.
1433  * The vector value in each array index can either be 0 to indicate
1434  * that no vector should be assigned to a message slot, or it can be a
1435  * number from 1 to N (where N is the count returned from a
1436  * succcessful call to pci_alloc_msix()) to indicate which message
1437  * vector (IRQ) to be used for the corresponding message.
1438  *
1439  * On successful return, each message with a non-zero vector will have
1440  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1441  * 1.  Additionally, if any of the IRQs allocated via the previous
1442  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1443  * will be freed back to the system automatically.
1444  *
1445  * For example, suppose a driver has a MSI-X table with 6 messages and
1446  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1447  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1448  * C.  After the call to pci_alloc_msix(), the device will be setup to
1449  * have an MSI-X table of ABC--- (where - means no vector assigned).
1450  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1451  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1452  * be freed back to the system.  This device will also have valid
1453  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1454  *
1455  * In any case, the SYS_RES_IRQ rid X will always map to the message
1456  * at MSI-X table index X - 1 and will only be valid if a vector is
1457  * assigned to that table entry.
1458  */
1459 int
1460 pci_remap_msix_method(device_t dev, device_t child, int count,
1461     const u_int *vectors)
1462 {
1463 	struct pci_devinfo *dinfo = device_get_ivars(child);
1464 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1465 	struct resource_list_entry *rle;
1466 	int i, irq, j, *used;
1467 
1468 	/*
1469 	 * Have to have at least one message in the table but the
1470 	 * table can't be bigger than the actual MSI-X table in the
1471 	 * device.
1472 	 */
1473 	if (count == 0 || count > msix->msix_msgnum)
1474 		return (EINVAL);
1475 
1476 	/* Sanity check the vectors. */
1477 	for (i = 0; i < count; i++)
1478 		if (vectors[i] > msix->msix_alloc)
1479 			return (EINVAL);
1480 
1481 	/*
1482 	 * Make sure there aren't any holes in the vectors to be used.
1483 	 * It's a big pain to support it, and it doesn't really make
1484 	 * sense anyway.  Also, at least one vector must be used.
1485 	 */
1486 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1487 	    M_ZERO);
1488 	for (i = 0; i < count; i++)
1489 		if (vectors[i] != 0)
1490 			used[vectors[i] - 1] = 1;
1491 	for (i = 0; i < msix->msix_alloc - 1; i++)
1492 		if (used[i] == 0 && used[i + 1] == 1) {
1493 			free(used, M_DEVBUF);
1494 			return (EINVAL);
1495 		}
1496 	if (used[0] != 1) {
1497 		free(used, M_DEVBUF);
1498 		return (EINVAL);
1499 	}
1500 
1501 	/* Make sure none of the resources are allocated. */
1502 	for (i = 0; i < msix->msix_table_len; i++) {
1503 		if (msix->msix_table[i].mte_vector == 0)
1504 			continue;
1505 		if (msix->msix_table[i].mte_handlers > 0)
1506 			return (EBUSY);
1507 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1508 		KASSERT(rle != NULL, ("missing resource"));
1509 		if (rle->res != NULL)
1510 			return (EBUSY);
1511 	}
1512 
1513 	/* Free the existing resource list entries. */
1514 	for (i = 0; i < msix->msix_table_len; i++) {
1515 		if (msix->msix_table[i].mte_vector == 0)
1516 			continue;
1517 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1518 	}
1519 
1520 	/*
1521 	 * Build the new virtual table keeping track of which vectors are
1522 	 * used.
1523 	 */
1524 	free(msix->msix_table, M_DEVBUF);
1525 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1526 	    M_DEVBUF, M_WAITOK | M_ZERO);
1527 	for (i = 0; i < count; i++)
1528 		msix->msix_table[i].mte_vector = vectors[i];
1529 	msix->msix_table_len = count;
1530 
1531 	/* Free any unused IRQs and resize the vectors array if necessary. */
1532 	j = msix->msix_alloc - 1;
1533 	if (used[j] == 0) {
1534 		struct msix_vector *vec;
1535 
1536 		while (used[j] == 0) {
1537 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1538 			    msix->msix_vectors[j].mv_irq);
1539 			j--;
1540 		}
1541 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1542 		    M_WAITOK);
1543 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1544 		    (j + 1));
1545 		free(msix->msix_vectors, M_DEVBUF);
1546 		msix->msix_vectors = vec;
1547 		msix->msix_alloc = j + 1;
1548 	}
1549 	free(used, M_DEVBUF);
1550 
1551 	/* Map the IRQs onto the rids. */
1552 	for (i = 0; i < count; i++) {
1553 		if (vectors[i] == 0)
1554 			continue;
1555 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1556 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1557 		    irq, 1);
1558 	}
1559 
1560 	if (bootverbose) {
1561 		device_printf(child, "Remapped MSI-X IRQs as: ");
1562 		for (i = 0; i < count; i++) {
1563 			if (i != 0)
1564 				printf(", ");
1565 			if (vectors[i] == 0)
1566 				printf("---");
1567 			else
1568 				printf("%d",
1569 				    msix->msix_vectors[vectors[i]].mv_irq);
1570 		}
1571 		printf("\n");
1572 	}
1573 
1574 	return (0);
1575 }
1576 
1577 static int
1578 pci_release_msix(device_t dev, device_t child)
1579 {
1580 	struct pci_devinfo *dinfo = device_get_ivars(child);
1581 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1582 	struct resource_list_entry *rle;
1583 	int i;
1584 
1585 	/* Do we have any messages to release? */
1586 	if (msix->msix_alloc == 0)
1587 		return (ENODEV);
1588 
1589 	/* Make sure none of the resources are allocated. */
1590 	for (i = 0; i < msix->msix_table_len; i++) {
1591 		if (msix->msix_table[i].mte_vector == 0)
1592 			continue;
1593 		if (msix->msix_table[i].mte_handlers > 0)
1594 			return (EBUSY);
1595 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1596 		KASSERT(rle != NULL, ("missing resource"));
1597 		if (rle->res != NULL)
1598 			return (EBUSY);
1599 	}
1600 
1601 	/* Update control register to disable MSI-X. */
1602 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1603 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1604 	    msix->msix_ctrl, 2);
1605 
1606 	/* Free the resource list entries. */
1607 	for (i = 0; i < msix->msix_table_len; i++) {
1608 		if (msix->msix_table[i].mte_vector == 0)
1609 			continue;
1610 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1611 	}
1612 	free(msix->msix_table, M_DEVBUF);
1613 	msix->msix_table_len = 0;
1614 
1615 	/* Release the IRQs. */
1616 	for (i = 0; i < msix->msix_alloc; i++)
1617 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1618 		    msix->msix_vectors[i].mv_irq);
1619 	free(msix->msix_vectors, M_DEVBUF);
1620 	msix->msix_alloc = 0;
1621 	return (0);
1622 }
1623 
1624 /*
1625  * Return the max supported MSI-X messages this device supports.
1626  * Basically, assuming the MD code can alloc messages, this function
1627  * should return the maximum value that pci_alloc_msix() can return.
1628  * Thus, it is subject to the tunables, etc.
1629  */
1630 int
1631 pci_msix_count_method(device_t dev, device_t child)
1632 {
1633 	struct pci_devinfo *dinfo = device_get_ivars(child);
1634 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1635 
1636 	if (pci_do_msix && msix->msix_location != 0)
1637 		return (msix->msix_msgnum);
1638 	return (0);
1639 }
1640 
1641 /*
1642  * HyperTransport MSI mapping control
1643  */
1644 void
1645 pci_ht_map_msi(device_t dev, uint64_t addr)
1646 {
1647 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1648 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1649 
1650 	if (!ht->ht_msimap)
1651 		return;
1652 
1653 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1654 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1655 		/* Enable MSI -> HT mapping. */
1656 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1657 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1658 		    ht->ht_msictrl, 2);
1659 	}
1660 
1661 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1662 		/* Disable MSI -> HT mapping. */
1663 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1664 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1665 		    ht->ht_msictrl, 2);
1666 	}
1667 }
1668 
1669 int
1670 pci_get_max_read_req(device_t dev)
1671 {
1672 	int cap;
1673 	uint16_t val;
1674 
1675 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1676 		return (0);
1677 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1678 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1679 	val >>= 12;
1680 	return (1 << (val + 7));
1681 }
1682 
1683 int
1684 pci_set_max_read_req(device_t dev, int size)
1685 {
1686 	int cap;
1687 	uint16_t val;
1688 
1689 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1690 		return (0);
1691 	if (size < 128)
1692 		size = 128;
1693 	if (size > 4096)
1694 		size = 4096;
1695 	size = (1 << (fls(size) - 1));
1696 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1697 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1698 	val |= (fls(size) - 8) << 12;
1699 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1700 	return (size);
1701 }
1702 
1703 /*
1704  * Support for MSI message signalled interrupts.
1705  */
1706 void
1707 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1708 {
1709 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1710 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1711 
1712 	/* Write data and address values. */
1713 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1714 	    address & 0xffffffff, 4);
1715 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1716 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1717 		    address >> 32, 4);
1718 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1719 		    data, 2);
1720 	} else
1721 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1722 		    2);
1723 
1724 	/* Enable MSI in the control register. */
1725 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1726 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1727 	    2);
1728 
1729 	/* Enable MSI -> HT mapping. */
1730 	pci_ht_map_msi(dev, address);
1731 }
1732 
1733 void
1734 pci_disable_msi(device_t dev)
1735 {
1736 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1737 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1738 
1739 	/* Disable MSI -> HT mapping. */
1740 	pci_ht_map_msi(dev, 0);
1741 
1742 	/* Disable MSI in the control register. */
1743 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1744 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1745 	    2);
1746 }
1747 
1748 /*
1749  * Restore MSI registers during resume.  If MSI is enabled then
1750  * restore the data and address registers in addition to the control
1751  * register.
1752  */
1753 static void
1754 pci_resume_msi(device_t dev)
1755 {
1756 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1757 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1758 	uint64_t address;
1759 	uint16_t data;
1760 
1761 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1762 		address = msi->msi_addr;
1763 		data = msi->msi_data;
1764 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1765 		    address & 0xffffffff, 4);
1766 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1767 			pci_write_config(dev, msi->msi_location +
1768 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1769 			pci_write_config(dev, msi->msi_location +
1770 			    PCIR_MSI_DATA_64BIT, data, 2);
1771 		} else
1772 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1773 			    data, 2);
1774 	}
1775 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1776 	    2);
1777 }
1778 
1779 static int
1780 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1781 {
1782 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1783 	pcicfgregs *cfg = &dinfo->cfg;
1784 	struct resource_list_entry *rle;
1785 	struct msix_table_entry *mte;
1786 	struct msix_vector *mv;
1787 	uint64_t addr;
1788 	uint32_t data;
1789 	int error, i, j;
1790 
1791 	/*
1792 	 * Handle MSI first.  We try to find this IRQ among our list
1793 	 * of MSI IRQs.  If we find it, we request updated address and
1794 	 * data registers and apply the results.
1795 	 */
1796 	if (cfg->msi.msi_alloc > 0) {
1797 
1798 		/* If we don't have any active handlers, nothing to do. */
1799 		if (cfg->msi.msi_handlers == 0)
1800 			return (0);
1801 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1802 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1803 			    i + 1);
1804 			if (rle->start == irq) {
1805 				error = PCIB_MAP_MSI(device_get_parent(bus),
1806 				    dev, irq, &addr, &data);
1807 				if (error)
1808 					return (error);
1809 				pci_disable_msi(dev);
1810 				dinfo->cfg.msi.msi_addr = addr;
1811 				dinfo->cfg.msi.msi_data = data;
1812 				pci_enable_msi(dev, addr, data);
1813 				return (0);
1814 			}
1815 		}
1816 		return (ENOENT);
1817 	}
1818 
1819 	/*
1820 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1821 	 * we request the updated mapping info.  If that works, we go
1822 	 * through all the slots that use this IRQ and update them.
1823 	 */
1824 	if (cfg->msix.msix_alloc > 0) {
1825 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1826 			mv = &cfg->msix.msix_vectors[i];
1827 			if (mv->mv_irq == irq) {
1828 				error = PCIB_MAP_MSI(device_get_parent(bus),
1829 				    dev, irq, &addr, &data);
1830 				if (error)
1831 					return (error);
1832 				mv->mv_address = addr;
1833 				mv->mv_data = data;
1834 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1835 					mte = &cfg->msix.msix_table[j];
1836 					if (mte->mte_vector != i + 1)
1837 						continue;
1838 					if (mte->mte_handlers == 0)
1839 						continue;
1840 					pci_mask_msix(dev, j);
1841 					pci_enable_msix(dev, j, addr, data);
1842 					pci_unmask_msix(dev, j);
1843 				}
1844 			}
1845 		}
1846 		return (ENOENT);
1847 	}
1848 
1849 	return (ENOENT);
1850 }
1851 
1852 /*
1853  * Returns true if the specified device is blacklisted because MSI
1854  * doesn't work.
1855  */
1856 int
1857 pci_msi_device_blacklisted(device_t dev)
1858 {
1859 	struct pci_quirk *q;
1860 
1861 	if (!pci_honor_msi_blacklist)
1862 		return (0);
1863 
1864 	for (q = &pci_quirks[0]; q->devid; q++) {
1865 		if (q->devid == pci_get_devid(dev) &&
1866 		    q->type == PCI_QUIRK_DISABLE_MSI)
1867 			return (1);
1868 	}
1869 	return (0);
1870 }
1871 
1872 /*
1873  * Returns true if a specified chipset supports MSI when it is
1874  * emulated hardware in a virtual machine.
1875  */
1876 static int
1877 pci_msi_vm_chipset(device_t dev)
1878 {
1879 	struct pci_quirk *q;
1880 
1881 	for (q = &pci_quirks[0]; q->devid; q++) {
1882 		if (q->devid == pci_get_devid(dev) &&
1883 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1884 			return (1);
1885 	}
1886 	return (0);
1887 }
1888 
1889 /*
1890  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1891  * we just check for blacklisted chipsets as represented by the
1892  * host-PCI bridge at device 0:0:0.  In the future, it may become
1893  * necessary to check other system attributes, such as the kenv values
1894  * that give the motherboard manufacturer and model number.
1895  */
1896 static int
1897 pci_msi_blacklisted(void)
1898 {
1899 	device_t dev;
1900 
1901 	if (!pci_honor_msi_blacklist)
1902 		return (0);
1903 
1904 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1905 	if (!(pcie_chipset || pcix_chipset)) {
1906 		if (vm_guest != VM_GUEST_NO) {
1907 			dev = pci_find_bsf(0, 0, 0);
1908 			if (dev != NULL)
1909 				return (pci_msi_vm_chipset(dev) == 0);
1910 		}
1911 		return (1);
1912 	}
1913 
1914 	dev = pci_find_bsf(0, 0, 0);
1915 	if (dev != NULL)
1916 		return (pci_msi_device_blacklisted(dev));
1917 	return (0);
1918 }
1919 
1920 /*
1921  * Attempt to allocate *count MSI messages.  The actual number allocated is
1922  * returned in *count.  After this function returns, each message will be
1923  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1924  */
1925 int
1926 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1927 {
1928 	struct pci_devinfo *dinfo = device_get_ivars(child);
1929 	pcicfgregs *cfg = &dinfo->cfg;
1930 	struct resource_list_entry *rle;
1931 	int actual, error, i, irqs[32];
1932 	uint16_t ctrl;
1933 
1934 	/* Don't let count == 0 get us into trouble. */
1935 	if (*count == 0)
1936 		return (EINVAL);
1937 
1938 	/* If rid 0 is allocated, then fail. */
1939 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1940 	if (rle != NULL && rle->res != NULL)
1941 		return (ENXIO);
1942 
1943 	/* Already have allocated messages? */
1944 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1945 		return (ENXIO);
1946 
1947 	/* If MSI is blacklisted for this system, fail. */
1948 	if (pci_msi_blacklisted())
1949 		return (ENXIO);
1950 
1951 	/* MSI capability present? */
1952 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1953 		return (ENODEV);
1954 
1955 	if (bootverbose)
1956 		device_printf(child,
1957 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1958 		    *count, cfg->msi.msi_msgnum);
1959 
1960 	/* Don't ask for more than the device supports. */
1961 	actual = min(*count, cfg->msi.msi_msgnum);
1962 
1963 	/* Don't ask for more than 32 messages. */
1964 	actual = min(actual, 32);
1965 
1966 	/* MSI requires power of 2 number of messages. */
1967 	if (!powerof2(actual))
1968 		return (EINVAL);
1969 
1970 	for (;;) {
1971 		/* Try to allocate N messages. */
1972 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1973 		    actual, irqs);
1974 		if (error == 0)
1975 			break;
1976 		if (actual == 1)
1977 			return (error);
1978 
1979 		/* Try N / 2. */
1980 		actual >>= 1;
1981 	}
1982 
1983 	/*
1984 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1985 	 * resources in the irqs[] array, so add new resources
1986 	 * starting at rid 1.
1987 	 */
1988 	for (i = 0; i < actual; i++)
1989 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1990 		    irqs[i], irqs[i], 1);
1991 
1992 	if (bootverbose) {
1993 		if (actual == 1)
1994 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1995 		else {
1996 			int run;
1997 
1998 			/*
1999 			 * Be fancy and try to print contiguous runs
2000 			 * of IRQ values as ranges.  'run' is true if
2001 			 * we are in a range.
2002 			 */
2003 			device_printf(child, "using IRQs %d", irqs[0]);
2004 			run = 0;
2005 			for (i = 1; i < actual; i++) {
2006 
2007 				/* Still in a run? */
2008 				if (irqs[i] == irqs[i - 1] + 1) {
2009 					run = 1;
2010 					continue;
2011 				}
2012 
2013 				/* Finish previous range. */
2014 				if (run) {
2015 					printf("-%d", irqs[i - 1]);
2016 					run = 0;
2017 				}
2018 
2019 				/* Start new range. */
2020 				printf(",%d", irqs[i]);
2021 			}
2022 
2023 			/* Unfinished range? */
2024 			if (run)
2025 				printf("-%d", irqs[actual - 1]);
2026 			printf(" for MSI\n");
2027 		}
2028 	}
2029 
2030 	/* Update control register with actual count. */
2031 	ctrl = cfg->msi.msi_ctrl;
2032 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2033 	ctrl |= (ffs(actual) - 1) << 4;
2034 	cfg->msi.msi_ctrl = ctrl;
2035 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2036 
2037 	/* Update counts of alloc'd messages. */
2038 	cfg->msi.msi_alloc = actual;
2039 	cfg->msi.msi_handlers = 0;
2040 	*count = actual;
2041 	return (0);
2042 }
2043 
2044 /* Release the MSI messages associated with this device. */
2045 int
2046 pci_release_msi_method(device_t dev, device_t child)
2047 {
2048 	struct pci_devinfo *dinfo = device_get_ivars(child);
2049 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2050 	struct resource_list_entry *rle;
2051 	int error, i, irqs[32];
2052 
2053 	/* Try MSI-X first. */
2054 	error = pci_release_msix(dev, child);
2055 	if (error != ENODEV)
2056 		return (error);
2057 
2058 	/* Do we have any messages to release? */
2059 	if (msi->msi_alloc == 0)
2060 		return (ENODEV);
2061 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2062 
2063 	/* Make sure none of the resources are allocated. */
2064 	if (msi->msi_handlers > 0)
2065 		return (EBUSY);
2066 	for (i = 0; i < msi->msi_alloc; i++) {
2067 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2068 		KASSERT(rle != NULL, ("missing MSI resource"));
2069 		if (rle->res != NULL)
2070 			return (EBUSY);
2071 		irqs[i] = rle->start;
2072 	}
2073 
2074 	/* Update control register with 0 count. */
2075 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2076 	    ("%s: MSI still enabled", __func__));
2077 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2078 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2079 	    msi->msi_ctrl, 2);
2080 
2081 	/* Release the messages. */
2082 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2083 	for (i = 0; i < msi->msi_alloc; i++)
2084 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2085 
2086 	/* Update alloc count. */
2087 	msi->msi_alloc = 0;
2088 	msi->msi_addr = 0;
2089 	msi->msi_data = 0;
2090 	return (0);
2091 }
2092 
2093 /*
2094  * Return the max supported MSI messages this device supports.
2095  * Basically, assuming the MD code can alloc messages, this function
2096  * should return the maximum value that pci_alloc_msi() can return.
2097  * Thus, it is subject to the tunables, etc.
2098  */
2099 int
2100 pci_msi_count_method(device_t dev, device_t child)
2101 {
2102 	struct pci_devinfo *dinfo = device_get_ivars(child);
2103 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2104 
2105 	if (pci_do_msi && msi->msi_location != 0)
2106 		return (msi->msi_msgnum);
2107 	return (0);
2108 }
2109 
2110 /* free pcicfgregs structure and all depending data structures */
2111 
2112 int
2113 pci_freecfg(struct pci_devinfo *dinfo)
2114 {
2115 	struct devlist *devlist_head;
2116 	struct pci_map *pm, *next;
2117 	int i;
2118 
2119 	devlist_head = &pci_devq;
2120 
2121 	if (dinfo->cfg.vpd.vpd_reg) {
2122 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2123 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2124 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2125 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2126 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2127 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2128 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2129 	}
2130 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2131 		free(pm, M_DEVBUF);
2132 	}
2133 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2134 	free(dinfo, M_DEVBUF);
2135 
2136 	/* increment the generation count */
2137 	pci_generation++;
2138 
2139 	/* we're losing one device */
2140 	pci_numdevs--;
2141 	return (0);
2142 }
2143 
2144 /*
2145  * PCI power manangement
2146  */
2147 int
2148 pci_set_powerstate_method(device_t dev, device_t child, int state)
2149 {
2150 	struct pci_devinfo *dinfo = device_get_ivars(child);
2151 	pcicfgregs *cfg = &dinfo->cfg;
2152 	uint16_t status;
2153 	int result, oldstate, highest, delay;
2154 
2155 	if (cfg->pp.pp_cap == 0)
2156 		return (EOPNOTSUPP);
2157 
2158 	/*
2159 	 * Optimize a no state change request away.  While it would be OK to
2160 	 * write to the hardware in theory, some devices have shown odd
2161 	 * behavior when going from D3 -> D3.
2162 	 */
2163 	oldstate = pci_get_powerstate(child);
2164 	if (oldstate == state)
2165 		return (0);
2166 
2167 	/*
2168 	 * The PCI power management specification states that after a state
2169 	 * transition between PCI power states, system software must
2170 	 * guarantee a minimal delay before the function accesses the device.
2171 	 * Compute the worst case delay that we need to guarantee before we
2172 	 * access the device.  Many devices will be responsive much more
2173 	 * quickly than this delay, but there are some that don't respond
2174 	 * instantly to state changes.  Transitions to/from D3 state require
2175 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2176 	 * is done below with DELAY rather than a sleeper function because
2177 	 * this function can be called from contexts where we cannot sleep.
2178 	 */
2179 	highest = (oldstate > state) ? oldstate : state;
2180 	if (highest == PCI_POWERSTATE_D3)
2181 	    delay = 10000;
2182 	else if (highest == PCI_POWERSTATE_D2)
2183 	    delay = 200;
2184 	else
2185 	    delay = 0;
2186 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2187 	    & ~PCIM_PSTAT_DMASK;
2188 	result = 0;
2189 	switch (state) {
2190 	case PCI_POWERSTATE_D0:
2191 		status |= PCIM_PSTAT_D0;
2192 		break;
2193 	case PCI_POWERSTATE_D1:
2194 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2195 			return (EOPNOTSUPP);
2196 		status |= PCIM_PSTAT_D1;
2197 		break;
2198 	case PCI_POWERSTATE_D2:
2199 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2200 			return (EOPNOTSUPP);
2201 		status |= PCIM_PSTAT_D2;
2202 		break;
2203 	case PCI_POWERSTATE_D3:
2204 		status |= PCIM_PSTAT_D3;
2205 		break;
2206 	default:
2207 		return (EINVAL);
2208 	}
2209 
2210 	if (bootverbose)
2211 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2212 		    state);
2213 
2214 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2215 	if (delay)
2216 		DELAY(delay);
2217 	return (0);
2218 }
2219 
2220 int
2221 pci_get_powerstate_method(device_t dev, device_t child)
2222 {
2223 	struct pci_devinfo *dinfo = device_get_ivars(child);
2224 	pcicfgregs *cfg = &dinfo->cfg;
2225 	uint16_t status;
2226 	int result;
2227 
2228 	if (cfg->pp.pp_cap != 0) {
2229 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2230 		switch (status & PCIM_PSTAT_DMASK) {
2231 		case PCIM_PSTAT_D0:
2232 			result = PCI_POWERSTATE_D0;
2233 			break;
2234 		case PCIM_PSTAT_D1:
2235 			result = PCI_POWERSTATE_D1;
2236 			break;
2237 		case PCIM_PSTAT_D2:
2238 			result = PCI_POWERSTATE_D2;
2239 			break;
2240 		case PCIM_PSTAT_D3:
2241 			result = PCI_POWERSTATE_D3;
2242 			break;
2243 		default:
2244 			result = PCI_POWERSTATE_UNKNOWN;
2245 			break;
2246 		}
2247 	} else {
2248 		/* No support, device is always at D0 */
2249 		result = PCI_POWERSTATE_D0;
2250 	}
2251 	return (result);
2252 }
2253 
2254 /*
2255  * Some convenience functions for PCI device drivers.
2256  */
2257 
2258 static __inline void
2259 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2260 {
2261 	uint16_t	command;
2262 
2263 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2264 	command |= bit;
2265 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2266 }
2267 
2268 static __inline void
2269 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2270 {
2271 	uint16_t	command;
2272 
2273 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2274 	command &= ~bit;
2275 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2276 }
2277 
2278 int
2279 pci_enable_busmaster_method(device_t dev, device_t child)
2280 {
2281 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2282 	return (0);
2283 }
2284 
2285 int
2286 pci_disable_busmaster_method(device_t dev, device_t child)
2287 {
2288 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2289 	return (0);
2290 }
2291 
2292 int
2293 pci_enable_io_method(device_t dev, device_t child, int space)
2294 {
2295 	uint16_t bit;
2296 
2297 	switch(space) {
2298 	case SYS_RES_IOPORT:
2299 		bit = PCIM_CMD_PORTEN;
2300 		break;
2301 	case SYS_RES_MEMORY:
2302 		bit = PCIM_CMD_MEMEN;
2303 		break;
2304 	default:
2305 		return (EINVAL);
2306 	}
2307 	pci_set_command_bit(dev, child, bit);
2308 	return (0);
2309 }
2310 
2311 int
2312 pci_disable_io_method(device_t dev, device_t child, int space)
2313 {
2314 	uint16_t bit;
2315 
2316 	switch(space) {
2317 	case SYS_RES_IOPORT:
2318 		bit = PCIM_CMD_PORTEN;
2319 		break;
2320 	case SYS_RES_MEMORY:
2321 		bit = PCIM_CMD_MEMEN;
2322 		break;
2323 	default:
2324 		return (EINVAL);
2325 	}
2326 	pci_clear_command_bit(dev, child, bit);
2327 	return (0);
2328 }
2329 
2330 /*
2331  * New style pci driver.  Parent device is either a pci-host-bridge or a
2332  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2333  */
2334 
2335 void
2336 pci_print_verbose(struct pci_devinfo *dinfo)
2337 {
2338 
2339 	if (bootverbose) {
2340 		pcicfgregs *cfg = &dinfo->cfg;
2341 
2342 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2343 		    cfg->vendor, cfg->device, cfg->revid);
2344 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2345 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2346 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2347 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2348 		    cfg->mfdev);
2349 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2350 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2351 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2352 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2353 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2354 		if (cfg->intpin > 0)
2355 			printf("\tintpin=%c, irq=%d\n",
2356 			    cfg->intpin +'a' -1, cfg->intline);
2357 		if (cfg->pp.pp_cap) {
2358 			uint16_t status;
2359 
2360 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2361 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2362 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2363 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2364 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2365 			    status & PCIM_PSTAT_DMASK);
2366 		}
2367 		if (cfg->msi.msi_location) {
2368 			int ctrl;
2369 
2370 			ctrl = cfg->msi.msi_ctrl;
2371 			printf("\tMSI supports %d message%s%s%s\n",
2372 			    cfg->msi.msi_msgnum,
2373 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2374 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2375 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2376 		}
2377 		if (cfg->msix.msix_location) {
2378 			printf("\tMSI-X supports %d message%s ",
2379 			    cfg->msix.msix_msgnum,
2380 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2381 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2382 				printf("in map 0x%x\n",
2383 				    cfg->msix.msix_table_bar);
2384 			else
2385 				printf("in maps 0x%x and 0x%x\n",
2386 				    cfg->msix.msix_table_bar,
2387 				    cfg->msix.msix_pba_bar);
2388 		}
2389 	}
2390 }
2391 
2392 static int
2393 pci_porten(device_t dev)
2394 {
2395 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2396 }
2397 
2398 static int
2399 pci_memen(device_t dev)
2400 {
2401 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2402 }
2403 
2404 static void
2405 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2406 {
2407 	struct pci_devinfo *dinfo;
2408 	pci_addr_t map, testval;
2409 	int ln2range;
2410 	uint16_t cmd;
2411 
2412 	/*
2413 	 * The device ROM BAR is special.  It is always a 32-bit
2414 	 * memory BAR.  Bit 0 is special and should not be set when
2415 	 * sizing the BAR.
2416 	 */
2417 	dinfo = device_get_ivars(dev);
2418 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2419 		map = pci_read_config(dev, reg, 4);
2420 		pci_write_config(dev, reg, 0xfffffffe, 4);
2421 		testval = pci_read_config(dev, reg, 4);
2422 		pci_write_config(dev, reg, map, 4);
2423 		*mapp = map;
2424 		*testvalp = testval;
2425 		return;
2426 	}
2427 
2428 	map = pci_read_config(dev, reg, 4);
2429 	ln2range = pci_maprange(map);
2430 	if (ln2range == 64)
2431 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2432 
2433 	/*
2434 	 * Disable decoding via the command register before
2435 	 * determining the BAR's length since we will be placing it in
2436 	 * a weird state.
2437 	 */
2438 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2439 	pci_write_config(dev, PCIR_COMMAND,
2440 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2441 
2442 	/*
2443 	 * Determine the BAR's length by writing all 1's.  The bottom
2444 	 * log_2(size) bits of the BAR will stick as 0 when we read
2445 	 * the value back.
2446 	 */
2447 	pci_write_config(dev, reg, 0xffffffff, 4);
2448 	testval = pci_read_config(dev, reg, 4);
2449 	if (ln2range == 64) {
2450 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2451 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2452 	}
2453 
2454 	/*
2455 	 * Restore the original value of the BAR.  We may have reprogrammed
2456 	 * the BAR of the low-level console device and when booting verbose,
2457 	 * we need the console device addressable.
2458 	 */
2459 	pci_write_config(dev, reg, map, 4);
2460 	if (ln2range == 64)
2461 		pci_write_config(dev, reg + 4, map >> 32, 4);
2462 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2463 
2464 	*mapp = map;
2465 	*testvalp = testval;
2466 }
2467 
2468 static void
2469 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2470 {
2471 	struct pci_devinfo *dinfo;
2472 	int ln2range;
2473 
2474 	/* The device ROM BAR is always a 32-bit memory BAR. */
2475 	dinfo = device_get_ivars(dev);
2476 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2477 		ln2range = 32;
2478 	else
2479 		ln2range = pci_maprange(pm->pm_value);
2480 	pci_write_config(dev, pm->pm_reg, base, 4);
2481 	if (ln2range == 64)
2482 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2483 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2484 	if (ln2range == 64)
2485 		pm->pm_value |= (pci_addr_t)pci_read_config(dev, pm->pm_reg + 4, 4) << 32;
2486 }
2487 
2488 struct pci_map *
2489 pci_find_bar(device_t dev, int reg)
2490 {
2491 	struct pci_devinfo *dinfo;
2492 	struct pci_map *pm;
2493 
2494 	dinfo = device_get_ivars(dev);
2495 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2496 		if (pm->pm_reg == reg)
2497 			return (pm);
2498 	}
2499 	return (NULL);
2500 }
2501 
2502 int
2503 pci_bar_enabled(device_t dev, struct pci_map *pm)
2504 {
2505 	struct pci_devinfo *dinfo;
2506 	uint16_t cmd;
2507 
2508 	dinfo = device_get_ivars(dev);
2509 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2510 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2511 		return (0);
2512 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2513 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2514 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2515 	else
2516 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2517 }
2518 
2519 static struct pci_map *
2520 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2521 {
2522 	struct pci_devinfo *dinfo;
2523 	struct pci_map *pm, *prev;
2524 
2525 	dinfo = device_get_ivars(dev);
2526 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2527 	pm->pm_reg = reg;
2528 	pm->pm_value = value;
2529 	pm->pm_size = size;
2530 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2531 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2532 		    reg));
2533 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2534 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2535 			break;
2536 	}
2537 	if (prev != NULL)
2538 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2539 	else
2540 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2541 	return (pm);
2542 }
2543 
2544 static void
2545 pci_restore_bars(device_t dev)
2546 {
2547 	struct pci_devinfo *dinfo;
2548 	struct pci_map *pm;
2549 	int ln2range;
2550 
2551 	dinfo = device_get_ivars(dev);
2552 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2553 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2554 			ln2range = 32;
2555 		else
2556 			ln2range = pci_maprange(pm->pm_value);
2557 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2558 		if (ln2range == 64)
2559 			pci_write_config(dev, pm->pm_reg + 4,
2560 			    pm->pm_value >> 32, 4);
2561 	}
2562 }
2563 
2564 /*
2565  * Add a resource based on a pci map register. Return 1 if the map
2566  * register is a 32bit map register or 2 if it is a 64bit register.
2567  */
2568 static int
2569 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2570     int force, int prefetch)
2571 {
2572 	struct pci_map *pm;
2573 	pci_addr_t base, map, testval;
2574 	pci_addr_t start, end, count;
2575 	int barlen, basezero, maprange, mapsize, type;
2576 	uint16_t cmd;
2577 	struct resource *res;
2578 
2579 	pci_read_bar(dev, reg, &map, &testval);
2580 	if (PCI_BAR_MEM(map)) {
2581 		type = SYS_RES_MEMORY;
2582 		if (map & PCIM_BAR_MEM_PREFETCH)
2583 			prefetch = 1;
2584 	} else
2585 		type = SYS_RES_IOPORT;
2586 	mapsize = pci_mapsize(testval);
2587 	base = pci_mapbase(map);
2588 #ifdef __PCI_BAR_ZERO_VALID
2589 	basezero = 0;
2590 #else
2591 	basezero = base == 0;
2592 #endif
2593 	maprange = pci_maprange(map);
2594 	barlen = maprange == 64 ? 2 : 1;
2595 
2596 	/*
2597 	 * For I/O registers, if bottom bit is set, and the next bit up
2598 	 * isn't clear, we know we have a BAR that doesn't conform to the
2599 	 * spec, so ignore it.  Also, sanity check the size of the data
2600 	 * areas to the type of memory involved.  Memory must be at least
2601 	 * 16 bytes in size, while I/O ranges must be at least 4.
2602 	 */
2603 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2604 		return (barlen);
2605 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2606 	    (type == SYS_RES_IOPORT && mapsize < 2))
2607 		return (barlen);
2608 
2609 	/* Save a record of this BAR. */
2610 	pm = pci_add_bar(dev, reg, map, mapsize);
2611 	if (bootverbose) {
2612 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2613 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2614 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2615 			printf(", port disabled\n");
2616 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2617 			printf(", memory disabled\n");
2618 		else
2619 			printf(", enabled\n");
2620 	}
2621 
2622 	/*
2623 	 * If base is 0, then we have problems if this architecture does
2624 	 * not allow that.  It is best to ignore such entries for the
2625 	 * moment.  These will be allocated later if the driver specifically
2626 	 * requests them.  However, some removable busses look better when
2627 	 * all resources are allocated, so allow '0' to be overriden.
2628 	 *
2629 	 * Similarly treat maps whose values is the same as the test value
2630 	 * read back.  These maps have had all f's written to them by the
2631 	 * BIOS in an attempt to disable the resources.
2632 	 */
2633 	if (!force && (basezero || map == testval))
2634 		return (barlen);
2635 	if ((u_long)base != base) {
2636 		device_printf(bus,
2637 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2638 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2639 		    pci_get_function(dev), reg);
2640 		return (barlen);
2641 	}
2642 
2643 	/*
2644 	 * This code theoretically does the right thing, but has
2645 	 * undesirable side effects in some cases where peripherals
2646 	 * respond oddly to having these bits enabled.  Let the user
2647 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2648 	 * default).
2649 	 */
2650 	if (pci_enable_io_modes) {
2651 		/* Turn on resources that have been left off by a lazy BIOS */
2652 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2653 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2654 			cmd |= PCIM_CMD_PORTEN;
2655 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2656 		}
2657 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2658 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2659 			cmd |= PCIM_CMD_MEMEN;
2660 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2661 		}
2662 	} else {
2663 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2664 			return (barlen);
2665 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2666 			return (barlen);
2667 	}
2668 
2669 	count = (pci_addr_t)1 << mapsize;
2670 	if (basezero || base == pci_mapbase(testval)) {
2671 		start = 0;	/* Let the parent decide. */
2672 		end = ~0ULL;
2673 	} else {
2674 		start = base;
2675 		end = base + count - 1;
2676 	}
2677 	resource_list_add(rl, type, reg, start, end, count);
2678 
2679 	/*
2680 	 * Try to allocate the resource for this BAR from our parent
2681 	 * so that this resource range is already reserved.  The
2682 	 * driver for this device will later inherit this resource in
2683 	 * pci_alloc_resource().
2684 	 */
2685 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2686 	    prefetch ? RF_PREFETCHABLE : 0);
2687 	if (res == NULL) {
2688 		/*
2689 		 * If the allocation fails, clear the BAR and delete
2690 		 * the resource list entry to force
2691 		 * pci_alloc_resource() to allocate resources from the
2692 		 * parent.
2693 		 */
2694 		resource_list_delete(rl, type, reg);
2695 		start = 0;
2696 	} else
2697 		start = rman_get_start(res);
2698 	pci_write_bar(dev, pm, start);
2699 	return (barlen);
2700 }
2701 
2702 /*
2703  * For ATA devices we need to decide early what addressing mode to use.
2704  * Legacy demands that the primary and secondary ATA ports sits on the
2705  * same addresses that old ISA hardware did. This dictates that we use
2706  * those addresses and ignore the BAR's if we cannot set PCI native
2707  * addressing mode.
2708  */
2709 static void
2710 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2711     uint32_t prefetchmask)
2712 {
2713 	struct resource *r;
2714 	int rid, type, progif;
2715 #if 0
2716 	/* if this device supports PCI native addressing use it */
2717 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2718 	if ((progif & 0x8a) == 0x8a) {
2719 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2720 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2721 			printf("Trying ATA native PCI addressing mode\n");
2722 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2723 		}
2724 	}
2725 #endif
2726 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2727 	type = SYS_RES_IOPORT;
2728 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2729 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2730 		    prefetchmask & (1 << 0));
2731 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2732 		    prefetchmask & (1 << 1));
2733 	} else {
2734 		rid = PCIR_BAR(0);
2735 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2736 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2737 		    0x1f7, 8, 0);
2738 		rid = PCIR_BAR(1);
2739 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2740 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2741 		    0x3f6, 1, 0);
2742 	}
2743 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2744 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2745 		    prefetchmask & (1 << 2));
2746 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2747 		    prefetchmask & (1 << 3));
2748 	} else {
2749 		rid = PCIR_BAR(2);
2750 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2751 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2752 		    0x177, 8, 0);
2753 		rid = PCIR_BAR(3);
2754 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2755 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2756 		    0x376, 1, 0);
2757 	}
2758 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2759 	    prefetchmask & (1 << 4));
2760 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2761 	    prefetchmask & (1 << 5));
2762 }
2763 
2764 static void
2765 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2766 {
2767 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2768 	pcicfgregs *cfg = &dinfo->cfg;
2769 	char tunable_name[64];
2770 	int irq;
2771 
2772 	/* Has to have an intpin to have an interrupt. */
2773 	if (cfg->intpin == 0)
2774 		return;
2775 
2776 	/* Let the user override the IRQ with a tunable. */
2777 	irq = PCI_INVALID_IRQ;
2778 	snprintf(tunable_name, sizeof(tunable_name),
2779 	    "hw.pci%d.%d.%d.INT%c.irq",
2780 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2781 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2782 		irq = PCI_INVALID_IRQ;
2783 
2784 	/*
2785 	 * If we didn't get an IRQ via the tunable, then we either use the
2786 	 * IRQ value in the intline register or we ask the bus to route an
2787 	 * interrupt for us.  If force_route is true, then we only use the
2788 	 * value in the intline register if the bus was unable to assign an
2789 	 * IRQ.
2790 	 */
2791 	if (!PCI_INTERRUPT_VALID(irq)) {
2792 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2793 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2794 		if (!PCI_INTERRUPT_VALID(irq))
2795 			irq = cfg->intline;
2796 	}
2797 
2798 	/* If after all that we don't have an IRQ, just bail. */
2799 	if (!PCI_INTERRUPT_VALID(irq))
2800 		return;
2801 
2802 	/* Update the config register if it changed. */
2803 	if (irq != cfg->intline) {
2804 		cfg->intline = irq;
2805 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2806 	}
2807 
2808 	/* Add this IRQ as rid 0 interrupt resource. */
2809 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2810 }
2811 
2812 /* Perform early OHCI takeover from SMM. */
2813 static void
2814 ohci_early_takeover(device_t self)
2815 {
2816 	struct resource *res;
2817 	uint32_t ctl;
2818 	int rid;
2819 	int i;
2820 
2821 	rid = PCIR_BAR(0);
2822 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2823 	if (res == NULL)
2824 		return;
2825 
2826 	ctl = bus_read_4(res, OHCI_CONTROL);
2827 	if (ctl & OHCI_IR) {
2828 		if (bootverbose)
2829 			printf("ohci early: "
2830 			    "SMM active, request owner change\n");
2831 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2832 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2833 			DELAY(1000);
2834 			ctl = bus_read_4(res, OHCI_CONTROL);
2835 		}
2836 		if (ctl & OHCI_IR) {
2837 			if (bootverbose)
2838 				printf("ohci early: "
2839 				    "SMM does not respond, resetting\n");
2840 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2841 		}
2842 		/* Disable interrupts */
2843 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2844 	}
2845 
2846 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2847 }
2848 
2849 /* Perform early UHCI takeover from SMM. */
2850 static void
2851 uhci_early_takeover(device_t self)
2852 {
2853 	struct resource *res;
2854 	int rid;
2855 
2856 	/*
2857 	 * Set the PIRQD enable bit and switch off all the others. We don't
2858 	 * want legacy support to interfere with us XXX Does this also mean
2859 	 * that the BIOS won't touch the keyboard anymore if it is connected
2860 	 * to the ports of the root hub?
2861 	 */
2862 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2863 
2864 	/* Disable interrupts */
2865 	rid = PCI_UHCI_BASE_REG;
2866 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2867 	if (res != NULL) {
2868 		bus_write_2(res, UHCI_INTR, 0);
2869 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2870 	}
2871 }
2872 
2873 /* Perform early EHCI takeover from SMM. */
2874 static void
2875 ehci_early_takeover(device_t self)
2876 {
2877 	struct resource *res;
2878 	uint32_t cparams;
2879 	uint32_t eec;
2880 	uint8_t eecp;
2881 	uint8_t bios_sem;
2882 	uint8_t offs;
2883 	int rid;
2884 	int i;
2885 
2886 	rid = PCIR_BAR(0);
2887 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2888 	if (res == NULL)
2889 		return;
2890 
2891 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2892 
2893 	/* Synchronise with the BIOS if it owns the controller. */
2894 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2895 	    eecp = EHCI_EECP_NEXT(eec)) {
2896 		eec = pci_read_config(self, eecp, 4);
2897 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2898 			continue;
2899 		}
2900 		bios_sem = pci_read_config(self, eecp +
2901 		    EHCI_LEGSUP_BIOS_SEM, 1);
2902 		if (bios_sem == 0) {
2903 			continue;
2904 		}
2905 		if (bootverbose)
2906 			printf("ehci early: "
2907 			    "SMM active, request owner change\n");
2908 
2909 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2910 
2911 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2912 			DELAY(1000);
2913 			bios_sem = pci_read_config(self, eecp +
2914 			    EHCI_LEGSUP_BIOS_SEM, 1);
2915 		}
2916 
2917 		if (bios_sem != 0) {
2918 			if (bootverbose)
2919 				printf("ehci early: "
2920 				    "SMM does not respond\n");
2921 		}
2922 		/* Disable interrupts */
2923 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2924 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2925 	}
2926 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2927 }
2928 
2929 void
2930 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2931 {
2932 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2933 	pcicfgregs *cfg = &dinfo->cfg;
2934 	struct resource_list *rl = &dinfo->resources;
2935 	struct pci_quirk *q;
2936 	int i;
2937 
2938 	/* ATA devices needs special map treatment */
2939 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2940 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2941 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2942 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2943 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2944 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
2945 	else
2946 		for (i = 0; i < cfg->nummaps;)
2947 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
2948 			    prefetchmask & (1 << i));
2949 
2950 	/*
2951 	 * Add additional, quirked resources.
2952 	 */
2953 	for (q = &pci_quirks[0]; q->devid; q++) {
2954 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2955 		    && q->type == PCI_QUIRK_MAP_REG)
2956 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
2957 	}
2958 
2959 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2960 #ifdef __PCI_REROUTE_INTERRUPT
2961 		/*
2962 		 * Try to re-route interrupts. Sometimes the BIOS or
2963 		 * firmware may leave bogus values in these registers.
2964 		 * If the re-route fails, then just stick with what we
2965 		 * have.
2966 		 */
2967 		pci_assign_interrupt(bus, dev, 1);
2968 #else
2969 		pci_assign_interrupt(bus, dev, 0);
2970 #endif
2971 	}
2972 
2973 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
2974 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
2975 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
2976 			ehci_early_takeover(dev);
2977 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
2978 			ohci_early_takeover(dev);
2979 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
2980 			uhci_early_takeover(dev);
2981 	}
2982 }
2983 
2984 void
2985 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2986 {
2987 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2988 	device_t pcib = device_get_parent(dev);
2989 	struct pci_devinfo *dinfo;
2990 	int maxslots;
2991 	int s, f, pcifunchigh;
2992 	uint8_t hdrtype;
2993 
2994 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2995 	    ("dinfo_size too small"));
2996 	maxslots = PCIB_MAXSLOTS(pcib);
2997 	for (s = 0; s <= maxslots; s++) {
2998 		pcifunchigh = 0;
2999 		f = 0;
3000 		DELAY(1);
3001 		hdrtype = REG(PCIR_HDRTYPE, 1);
3002 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3003 			continue;
3004 		if (hdrtype & PCIM_MFDEV)
3005 			pcifunchigh = PCI_FUNCMAX;
3006 		for (f = 0; f <= pcifunchigh; f++) {
3007 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3008 			    dinfo_size);
3009 			if (dinfo != NULL) {
3010 				pci_add_child(dev, dinfo);
3011 			}
3012 		}
3013 	}
3014 #undef REG
3015 }
3016 
3017 void
3018 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3019 {
3020 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3021 	device_set_ivars(dinfo->cfg.dev, dinfo);
3022 	resource_list_init(&dinfo->resources);
3023 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3024 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3025 	pci_print_verbose(dinfo);
3026 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3027 }
3028 
3029 static int
3030 pci_probe(device_t dev)
3031 {
3032 
3033 	device_set_desc(dev, "PCI bus");
3034 
3035 	/* Allow other subclasses to override this driver. */
3036 	return (BUS_PROBE_GENERIC);
3037 }
3038 
3039 static int
3040 pci_attach(device_t dev)
3041 {
3042 	int busno, domain;
3043 
3044 	/*
3045 	 * Since there can be multiple independantly numbered PCI
3046 	 * busses on systems with multiple PCI domains, we can't use
3047 	 * the unit number to decide which bus we are probing. We ask
3048 	 * the parent pcib what our domain and bus numbers are.
3049 	 */
3050 	domain = pcib_get_domain(dev);
3051 	busno = pcib_get_bus(dev);
3052 	if (bootverbose)
3053 		device_printf(dev, "domain=%d, physical bus=%d\n",
3054 		    domain, busno);
3055 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3056 	return (bus_generic_attach(dev));
3057 }
3058 
3059 static void
3060 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3061     int state)
3062 {
3063 	device_t child, pcib;
3064 	struct pci_devinfo *dinfo;
3065 	int dstate, i;
3066 
3067 	/*
3068 	 * Set the device to the given state.  If the firmware suggests
3069 	 * a different power state, use it instead.  If power management
3070 	 * is not present, the firmware is responsible for managing
3071 	 * device power.  Skip children who aren't attached since they
3072 	 * are handled separately.
3073 	 */
3074 	pcib = device_get_parent(dev);
3075 	for (i = 0; i < numdevs; i++) {
3076 		child = devlist[i];
3077 		dinfo = device_get_ivars(child);
3078 		dstate = state;
3079 		if (device_is_attached(child) &&
3080 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3081 			pci_set_powerstate(child, dstate);
3082 	}
3083 }
3084 
3085 int
3086 pci_suspend(device_t dev)
3087 {
3088 	device_t child, *devlist;
3089 	struct pci_devinfo *dinfo;
3090 	int error, i, numdevs;
3091 
3092 	/*
3093 	 * Save the PCI configuration space for each child and set the
3094 	 * device in the appropriate power state for this sleep state.
3095 	 */
3096 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3097 		return (error);
3098 	for (i = 0; i < numdevs; i++) {
3099 		child = devlist[i];
3100 		dinfo = device_get_ivars(child);
3101 		pci_cfg_save(child, dinfo, 0);
3102 	}
3103 
3104 	/* Suspend devices before potentially powering them down. */
3105 	error = bus_generic_suspend(dev);
3106 	if (error) {
3107 		free(devlist, M_TEMP);
3108 		return (error);
3109 	}
3110 	if (pci_do_power_suspend)
3111 		pci_set_power_children(dev, devlist, numdevs,
3112 		    PCI_POWERSTATE_D3);
3113 	free(devlist, M_TEMP);
3114 	return (0);
3115 }
3116 
3117 int
3118 pci_resume(device_t dev)
3119 {
3120 	device_t child, *devlist;
3121 	struct pci_devinfo *dinfo;
3122 	int error, i, numdevs;
3123 
3124 	/*
3125 	 * Set each child to D0 and restore its PCI configuration space.
3126 	 */
3127 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3128 		return (error);
3129 	if (pci_do_power_resume)
3130 		pci_set_power_children(dev, devlist, numdevs,
3131 		    PCI_POWERSTATE_D0);
3132 
3133 	/* Now the device is powered up, restore its config space. */
3134 	for (i = 0; i < numdevs; i++) {
3135 		child = devlist[i];
3136 		dinfo = device_get_ivars(child);
3137 
3138 		pci_cfg_restore(child, dinfo);
3139 		if (!device_is_attached(child))
3140 			pci_cfg_save(child, dinfo, 1);
3141 	}
3142 
3143 	/*
3144 	 * Resume critical devices first, then everything else later.
3145 	 */
3146 	for (i = 0; i < numdevs; i++) {
3147 		child = devlist[i];
3148 		switch (pci_get_class(child)) {
3149 		case PCIC_DISPLAY:
3150 		case PCIC_MEMORY:
3151 		case PCIC_BRIDGE:
3152 		case PCIC_BASEPERIPH:
3153 			DEVICE_RESUME(child);
3154 			break;
3155 		}
3156 	}
3157 	for (i = 0; i < numdevs; i++) {
3158 		child = devlist[i];
3159 		switch (pci_get_class(child)) {
3160 		case PCIC_DISPLAY:
3161 		case PCIC_MEMORY:
3162 		case PCIC_BRIDGE:
3163 		case PCIC_BASEPERIPH:
3164 			break;
3165 		default:
3166 			DEVICE_RESUME(child);
3167 		}
3168 	}
3169 	free(devlist, M_TEMP);
3170 	return (0);
3171 }
3172 
3173 static void
3174 pci_load_vendor_data(void)
3175 {
3176 	caddr_t data;
3177 	void *ptr;
3178 	size_t sz;
3179 
3180 	data = preload_search_by_type("pci_vendor_data");
3181 	if (data != NULL) {
3182 		ptr = preload_fetch_addr(data);
3183 		sz = preload_fetch_size(data);
3184 		if (ptr != NULL && sz != 0) {
3185 			pci_vendordata = ptr;
3186 			pci_vendordata_size = sz;
3187 			/* terminate the database */
3188 			pci_vendordata[pci_vendordata_size] = '\n';
3189 		}
3190 	}
3191 }
3192 
3193 void
3194 pci_driver_added(device_t dev, driver_t *driver)
3195 {
3196 	int numdevs;
3197 	device_t *devlist;
3198 	device_t child;
3199 	struct pci_devinfo *dinfo;
3200 	int i;
3201 
3202 	if (bootverbose)
3203 		device_printf(dev, "driver added\n");
3204 	DEVICE_IDENTIFY(driver, dev);
3205 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3206 		return;
3207 	for (i = 0; i < numdevs; i++) {
3208 		child = devlist[i];
3209 		if (device_get_state(child) != DS_NOTPRESENT)
3210 			continue;
3211 		dinfo = device_get_ivars(child);
3212 		pci_print_verbose(dinfo);
3213 		if (bootverbose)
3214 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3215 		pci_cfg_restore(child, dinfo);
3216 		if (device_probe_and_attach(child) != 0)
3217 			pci_cfg_save(child, dinfo, 1);
3218 	}
3219 	free(devlist, M_TEMP);
3220 }
3221 
3222 int
3223 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3224     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3225 {
3226 	struct pci_devinfo *dinfo;
3227 	struct msix_table_entry *mte;
3228 	struct msix_vector *mv;
3229 	uint64_t addr;
3230 	uint32_t data;
3231 	void *cookie;
3232 	int error, rid;
3233 
3234 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3235 	    arg, &cookie);
3236 	if (error)
3237 		return (error);
3238 
3239 	/* If this is not a direct child, just bail out. */
3240 	if (device_get_parent(child) != dev) {
3241 		*cookiep = cookie;
3242 		return(0);
3243 	}
3244 
3245 	rid = rman_get_rid(irq);
3246 	if (rid == 0) {
3247 		/* Make sure that INTx is enabled */
3248 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3249 	} else {
3250 		/*
3251 		 * Check to see if the interrupt is MSI or MSI-X.
3252 		 * Ask our parent to map the MSI and give
3253 		 * us the address and data register values.
3254 		 * If we fail for some reason, teardown the
3255 		 * interrupt handler.
3256 		 */
3257 		dinfo = device_get_ivars(child);
3258 		if (dinfo->cfg.msi.msi_alloc > 0) {
3259 			if (dinfo->cfg.msi.msi_addr == 0) {
3260 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3261 			    ("MSI has handlers, but vectors not mapped"));
3262 				error = PCIB_MAP_MSI(device_get_parent(dev),
3263 				    child, rman_get_start(irq), &addr, &data);
3264 				if (error)
3265 					goto bad;
3266 				dinfo->cfg.msi.msi_addr = addr;
3267 				dinfo->cfg.msi.msi_data = data;
3268 			}
3269 			if (dinfo->cfg.msi.msi_handlers == 0)
3270 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3271 				    dinfo->cfg.msi.msi_data);
3272 			dinfo->cfg.msi.msi_handlers++;
3273 		} else {
3274 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3275 			    ("No MSI or MSI-X interrupts allocated"));
3276 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3277 			    ("MSI-X index too high"));
3278 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3279 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3280 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3281 			KASSERT(mv->mv_irq == rman_get_start(irq),
3282 			    ("IRQ mismatch"));
3283 			if (mv->mv_address == 0) {
3284 				KASSERT(mte->mte_handlers == 0,
3285 		    ("MSI-X table entry has handlers, but vector not mapped"));
3286 				error = PCIB_MAP_MSI(device_get_parent(dev),
3287 				    child, rman_get_start(irq), &addr, &data);
3288 				if (error)
3289 					goto bad;
3290 				mv->mv_address = addr;
3291 				mv->mv_data = data;
3292 			}
3293 			if (mte->mte_handlers == 0) {
3294 				pci_enable_msix(child, rid - 1, mv->mv_address,
3295 				    mv->mv_data);
3296 				pci_unmask_msix(child, rid - 1);
3297 			}
3298 			mte->mte_handlers++;
3299 		}
3300 
3301 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3302 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3303 	bad:
3304 		if (error) {
3305 			(void)bus_generic_teardown_intr(dev, child, irq,
3306 			    cookie);
3307 			return (error);
3308 		}
3309 	}
3310 	*cookiep = cookie;
3311 	return (0);
3312 }
3313 
3314 int
3315 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3316     void *cookie)
3317 {
3318 	struct msix_table_entry *mte;
3319 	struct resource_list_entry *rle;
3320 	struct pci_devinfo *dinfo;
3321 	int error, rid;
3322 
3323 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3324 		return (EINVAL);
3325 
3326 	/* If this isn't a direct child, just bail out */
3327 	if (device_get_parent(child) != dev)
3328 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3329 
3330 	rid = rman_get_rid(irq);
3331 	if (rid == 0) {
3332 		/* Mask INTx */
3333 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3334 	} else {
3335 		/*
3336 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3337 		 * decrement the appropriate handlers count and mask the
3338 		 * MSI-X message, or disable MSI messages if the count
3339 		 * drops to 0.
3340 		 */
3341 		dinfo = device_get_ivars(child);
3342 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3343 		if (rle->res != irq)
3344 			return (EINVAL);
3345 		if (dinfo->cfg.msi.msi_alloc > 0) {
3346 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3347 			    ("MSI-X index too high"));
3348 			if (dinfo->cfg.msi.msi_handlers == 0)
3349 				return (EINVAL);
3350 			dinfo->cfg.msi.msi_handlers--;
3351 			if (dinfo->cfg.msi.msi_handlers == 0)
3352 				pci_disable_msi(child);
3353 		} else {
3354 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3355 			    ("No MSI or MSI-X interrupts allocated"));
3356 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3357 			    ("MSI-X index too high"));
3358 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3359 			if (mte->mte_handlers == 0)
3360 				return (EINVAL);
3361 			mte->mte_handlers--;
3362 			if (mte->mte_handlers == 0)
3363 				pci_mask_msix(child, rid - 1);
3364 		}
3365 	}
3366 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3367 	if (rid > 0)
3368 		KASSERT(error == 0,
3369 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3370 	return (error);
3371 }
3372 
3373 int
3374 pci_print_child(device_t dev, device_t child)
3375 {
3376 	struct pci_devinfo *dinfo;
3377 	struct resource_list *rl;
3378 	int retval = 0;
3379 
3380 	dinfo = device_get_ivars(child);
3381 	rl = &dinfo->resources;
3382 
3383 	retval += bus_print_child_header(dev, child);
3384 
3385 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3386 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3387 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3388 	if (device_get_flags(dev))
3389 		retval += printf(" flags %#x", device_get_flags(dev));
3390 
3391 	retval += printf(" at device %d.%d", pci_get_slot(child),
3392 	    pci_get_function(child));
3393 
3394 	retval += bus_print_child_footer(dev, child);
3395 
3396 	return (retval);
3397 }
3398 
3399 static struct
3400 {
3401 	int	class;
3402 	int	subclass;
3403 	char	*desc;
3404 } pci_nomatch_tab[] = {
3405 	{PCIC_OLD,		-1,			"old"},
3406 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3407 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3408 	{PCIC_STORAGE,		-1,			"mass storage"},
3409 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3410 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3411 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3412 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3413 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3414 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3415 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3416 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3417 	{PCIC_NETWORK,		-1,			"network"},
3418 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3419 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3420 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3421 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3422 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3423 	{PCIC_DISPLAY,		-1,			"display"},
3424 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3425 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3426 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3427 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3428 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3429 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3430 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3431 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3432 	{PCIC_MEMORY,		-1,			"memory"},
3433 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3434 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3435 	{PCIC_BRIDGE,		-1,			"bridge"},
3436 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3437 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3438 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3439 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3440 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3441 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3442 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3443 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3444 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3445 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3446 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3447 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3448 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3449 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3450 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3451 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3452 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3453 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3454 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3455 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3456 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3457 	{PCIC_INPUTDEV,		-1,			"input device"},
3458 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3459 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3460 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3461 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3462 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3463 	{PCIC_DOCKING,		-1,			"docking station"},
3464 	{PCIC_PROCESSOR,	-1,			"processor"},
3465 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3466 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3467 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3468 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3469 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3470 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3471 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3472 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3473 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3474 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3475 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3476 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3477 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3478 	{PCIC_SATCOM,		-1,			"satellite communication"},
3479 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3480 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3481 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3482 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3483 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3484 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3485 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3486 	{PCIC_DASP,		-1,			"dasp"},
3487 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3488 	{0, 0,		NULL}
3489 };
3490 
3491 void
3492 pci_probe_nomatch(device_t dev, device_t child)
3493 {
3494 	int	i;
3495 	char	*cp, *scp, *device;
3496 
3497 	/*
3498 	 * Look for a listing for this device in a loaded device database.
3499 	 */
3500 	if ((device = pci_describe_device(child)) != NULL) {
3501 		device_printf(dev, "<%s>", device);
3502 		free(device, M_DEVBUF);
3503 	} else {
3504 		/*
3505 		 * Scan the class/subclass descriptions for a general
3506 		 * description.
3507 		 */
3508 		cp = "unknown";
3509 		scp = NULL;
3510 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3511 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3512 				if (pci_nomatch_tab[i].subclass == -1) {
3513 					cp = pci_nomatch_tab[i].desc;
3514 				} else if (pci_nomatch_tab[i].subclass ==
3515 				    pci_get_subclass(child)) {
3516 					scp = pci_nomatch_tab[i].desc;
3517 				}
3518 			}
3519 		}
3520 		device_printf(dev, "<%s%s%s>",
3521 		    cp ? cp : "",
3522 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3523 		    scp ? scp : "");
3524 	}
3525 	printf(" at device %d.%d (no driver attached)\n",
3526 	    pci_get_slot(child), pci_get_function(child));
3527 	pci_cfg_save(child, device_get_ivars(child), 1);
3528 	return;
3529 }
3530 
3531 /*
3532  * Parse the PCI device database, if loaded, and return a pointer to a
3533  * description of the device.
3534  *
3535  * The database is flat text formatted as follows:
3536  *
3537  * Any line not in a valid format is ignored.
3538  * Lines are terminated with newline '\n' characters.
3539  *
3540  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3541  * the vendor name.
3542  *
3543  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3544  * - devices cannot be listed without a corresponding VENDOR line.
3545  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3546  * another TAB, then the device name.
3547  */
3548 
3549 /*
3550  * Assuming (ptr) points to the beginning of a line in the database,
3551  * return the vendor or device and description of the next entry.
3552  * The value of (vendor) or (device) inappropriate for the entry type
3553  * is set to -1.  Returns nonzero at the end of the database.
3554  *
3555  * Note that this is slightly unrobust in the face of corrupt data;
3556  * we attempt to safeguard against this by spamming the end of the
3557  * database with a newline when we initialise.
3558  */
3559 static int
3560 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3561 {
3562 	char	*cp = *ptr;
3563 	int	left;
3564 
3565 	*device = -1;
3566 	*vendor = -1;
3567 	**desc = '\0';
3568 	for (;;) {
3569 		left = pci_vendordata_size - (cp - pci_vendordata);
3570 		if (left <= 0) {
3571 			*ptr = cp;
3572 			return(1);
3573 		}
3574 
3575 		/* vendor entry? */
3576 		if (*cp != '\t' &&
3577 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3578 			break;
3579 		/* device entry? */
3580 		if (*cp == '\t' &&
3581 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3582 			break;
3583 
3584 		/* skip to next line */
3585 		while (*cp != '\n' && left > 0) {
3586 			cp++;
3587 			left--;
3588 		}
3589 		if (*cp == '\n') {
3590 			cp++;
3591 			left--;
3592 		}
3593 	}
3594 	/* skip to next line */
3595 	while (*cp != '\n' && left > 0) {
3596 		cp++;
3597 		left--;
3598 	}
3599 	if (*cp == '\n' && left > 0)
3600 		cp++;
3601 	*ptr = cp;
3602 	return(0);
3603 }
3604 
3605 static char *
3606 pci_describe_device(device_t dev)
3607 {
3608 	int	vendor, device;
3609 	char	*desc, *vp, *dp, *line;
3610 
3611 	desc = vp = dp = NULL;
3612 
3613 	/*
3614 	 * If we have no vendor data, we can't do anything.
3615 	 */
3616 	if (pci_vendordata == NULL)
3617 		goto out;
3618 
3619 	/*
3620 	 * Scan the vendor data looking for this device
3621 	 */
3622 	line = pci_vendordata;
3623 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3624 		goto out;
3625 	for (;;) {
3626 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3627 			goto out;
3628 		if (vendor == pci_get_vendor(dev))
3629 			break;
3630 	}
3631 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3632 		goto out;
3633 	for (;;) {
3634 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3635 			*dp = 0;
3636 			break;
3637 		}
3638 		if (vendor != -1) {
3639 			*dp = 0;
3640 			break;
3641 		}
3642 		if (device == pci_get_device(dev))
3643 			break;
3644 	}
3645 	if (dp[0] == '\0')
3646 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3647 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3648 	    NULL)
3649 		sprintf(desc, "%s, %s", vp, dp);
3650  out:
3651 	if (vp != NULL)
3652 		free(vp, M_DEVBUF);
3653 	if (dp != NULL)
3654 		free(dp, M_DEVBUF);
3655 	return(desc);
3656 }
3657 
3658 int
3659 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3660 {
3661 	struct pci_devinfo *dinfo;
3662 	pcicfgregs *cfg;
3663 
3664 	dinfo = device_get_ivars(child);
3665 	cfg = &dinfo->cfg;
3666 
3667 	switch (which) {
3668 	case PCI_IVAR_ETHADDR:
3669 		/*
3670 		 * The generic accessor doesn't deal with failure, so
3671 		 * we set the return value, then return an error.
3672 		 */
3673 		*((uint8_t **) result) = NULL;
3674 		return (EINVAL);
3675 	case PCI_IVAR_SUBVENDOR:
3676 		*result = cfg->subvendor;
3677 		break;
3678 	case PCI_IVAR_SUBDEVICE:
3679 		*result = cfg->subdevice;
3680 		break;
3681 	case PCI_IVAR_VENDOR:
3682 		*result = cfg->vendor;
3683 		break;
3684 	case PCI_IVAR_DEVICE:
3685 		*result = cfg->device;
3686 		break;
3687 	case PCI_IVAR_DEVID:
3688 		*result = (cfg->device << 16) | cfg->vendor;
3689 		break;
3690 	case PCI_IVAR_CLASS:
3691 		*result = cfg->baseclass;
3692 		break;
3693 	case PCI_IVAR_SUBCLASS:
3694 		*result = cfg->subclass;
3695 		break;
3696 	case PCI_IVAR_PROGIF:
3697 		*result = cfg->progif;
3698 		break;
3699 	case PCI_IVAR_REVID:
3700 		*result = cfg->revid;
3701 		break;
3702 	case PCI_IVAR_INTPIN:
3703 		*result = cfg->intpin;
3704 		break;
3705 	case PCI_IVAR_IRQ:
3706 		*result = cfg->intline;
3707 		break;
3708 	case PCI_IVAR_DOMAIN:
3709 		*result = cfg->domain;
3710 		break;
3711 	case PCI_IVAR_BUS:
3712 		*result = cfg->bus;
3713 		break;
3714 	case PCI_IVAR_SLOT:
3715 		*result = cfg->slot;
3716 		break;
3717 	case PCI_IVAR_FUNCTION:
3718 		*result = cfg->func;
3719 		break;
3720 	case PCI_IVAR_CMDREG:
3721 		*result = cfg->cmdreg;
3722 		break;
3723 	case PCI_IVAR_CACHELNSZ:
3724 		*result = cfg->cachelnsz;
3725 		break;
3726 	case PCI_IVAR_MINGNT:
3727 		*result = cfg->mingnt;
3728 		break;
3729 	case PCI_IVAR_MAXLAT:
3730 		*result = cfg->maxlat;
3731 		break;
3732 	case PCI_IVAR_LATTIMER:
3733 		*result = cfg->lattimer;
3734 		break;
3735 	default:
3736 		return (ENOENT);
3737 	}
3738 	return (0);
3739 }
3740 
3741 int
3742 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3743 {
3744 	struct pci_devinfo *dinfo;
3745 
3746 	dinfo = device_get_ivars(child);
3747 
3748 	switch (which) {
3749 	case PCI_IVAR_INTPIN:
3750 		dinfo->cfg.intpin = value;
3751 		return (0);
3752 	case PCI_IVAR_ETHADDR:
3753 	case PCI_IVAR_SUBVENDOR:
3754 	case PCI_IVAR_SUBDEVICE:
3755 	case PCI_IVAR_VENDOR:
3756 	case PCI_IVAR_DEVICE:
3757 	case PCI_IVAR_DEVID:
3758 	case PCI_IVAR_CLASS:
3759 	case PCI_IVAR_SUBCLASS:
3760 	case PCI_IVAR_PROGIF:
3761 	case PCI_IVAR_REVID:
3762 	case PCI_IVAR_IRQ:
3763 	case PCI_IVAR_DOMAIN:
3764 	case PCI_IVAR_BUS:
3765 	case PCI_IVAR_SLOT:
3766 	case PCI_IVAR_FUNCTION:
3767 		return (EINVAL);	/* disallow for now */
3768 
3769 	default:
3770 		return (ENOENT);
3771 	}
3772 }
3773 
3774 
3775 #include "opt_ddb.h"
3776 #ifdef DDB
3777 #include <ddb/ddb.h>
3778 #include <sys/cons.h>
3779 
3780 /*
3781  * List resources based on pci map registers, used for within ddb
3782  */
3783 
3784 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3785 {
3786 	struct pci_devinfo *dinfo;
3787 	struct devlist *devlist_head;
3788 	struct pci_conf *p;
3789 	const char *name;
3790 	int i, error, none_count;
3791 
3792 	none_count = 0;
3793 	/* get the head of the device queue */
3794 	devlist_head = &pci_devq;
3795 
3796 	/*
3797 	 * Go through the list of devices and print out devices
3798 	 */
3799 	for (error = 0, i = 0,
3800 	     dinfo = STAILQ_FIRST(devlist_head);
3801 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3802 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3803 
3804 		/* Populate pd_name and pd_unit */
3805 		name = NULL;
3806 		if (dinfo->cfg.dev)
3807 			name = device_get_name(dinfo->cfg.dev);
3808 
3809 		p = &dinfo->conf;
3810 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3811 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3812 			(name && *name) ? name : "none",
3813 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3814 			none_count++,
3815 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3816 			p->pc_sel.pc_func, (p->pc_class << 16) |
3817 			(p->pc_subclass << 8) | p->pc_progif,
3818 			(p->pc_subdevice << 16) | p->pc_subvendor,
3819 			(p->pc_device << 16) | p->pc_vendor,
3820 			p->pc_revid, p->pc_hdr);
3821 	}
3822 }
3823 #endif /* DDB */
3824 
3825 static struct resource *
3826 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3827     u_long start, u_long end, u_long count, u_int flags)
3828 {
3829 	struct pci_devinfo *dinfo = device_get_ivars(child);
3830 	struct resource_list *rl = &dinfo->resources;
3831 	struct resource_list_entry *rle;
3832 	struct resource *res;
3833 	struct pci_map *pm;
3834 	pci_addr_t map, testval;
3835 	int mapsize;
3836 
3837 	res = NULL;
3838 	pm = pci_find_bar(child, *rid);
3839 	if (pm != NULL) {
3840 		/* This is a BAR that we failed to allocate earlier. */
3841 		mapsize = pm->pm_size;
3842 		map = pm->pm_value;
3843 	} else {
3844 		/*
3845 		 * Weed out the bogons, and figure out how large the
3846 		 * BAR/map is.  BARs that read back 0 here are bogus
3847 		 * and unimplemented.  Note: atapci in legacy mode are
3848 		 * special and handled elsewhere in the code.  If you
3849 		 * have a atapci device in legacy mode and it fails
3850 		 * here, that other code is broken.
3851 		 */
3852 		pci_read_bar(child, *rid, &map, &testval);
3853 
3854 		/*
3855 		 * Determine the size of the BAR and ignore BARs with a size
3856 		 * of 0.  Device ROM BARs use a different mask value.
3857 		 */
3858 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3859 			mapsize = pci_romsize(testval);
3860 		else
3861 			mapsize = pci_mapsize(testval);
3862 		if (mapsize == 0)
3863 			goto out;
3864 		pm = pci_add_bar(child, *rid, map, mapsize);
3865 	}
3866 
3867 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3868 		if (type != SYS_RES_MEMORY) {
3869 			if (bootverbose)
3870 				device_printf(dev,
3871 				    "child %s requested type %d for rid %#x,"
3872 				    " but the BAR says it is an memio\n",
3873 				    device_get_nameunit(child), type, *rid);
3874 			goto out;
3875 		}
3876 	} else {
3877 		if (type != SYS_RES_IOPORT) {
3878 			if (bootverbose)
3879 				device_printf(dev,
3880 				    "child %s requested type %d for rid %#x,"
3881 				    " but the BAR says it is an ioport\n",
3882 				    device_get_nameunit(child), type, *rid);
3883 			goto out;
3884 		}
3885 	}
3886 
3887 	/*
3888 	 * For real BARs, we need to override the size that
3889 	 * the driver requests, because that's what the BAR
3890 	 * actually uses and we would otherwise have a
3891 	 * situation where we might allocate the excess to
3892 	 * another driver, which won't work.
3893 	 */
3894 	count = (pci_addr_t)1 << mapsize;
3895 	if (RF_ALIGNMENT(flags) < mapsize)
3896 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3897 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3898 		flags |= RF_PREFETCHABLE;
3899 
3900 	/*
3901 	 * Allocate enough resource, and then write back the
3902 	 * appropriate BAR for that resource.
3903 	 */
3904 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3905 	    start, end, count, flags & ~RF_ACTIVE);
3906 	if (res == NULL) {
3907 		device_printf(child,
3908 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3909 		    count, *rid, type, start, end);
3910 		goto out;
3911 	}
3912 	resource_list_add(rl, type, *rid, start, end, count);
3913 	rle = resource_list_find(rl, type, *rid);
3914 	if (rle == NULL)
3915 		panic("pci_reserve_map: unexpectedly can't find resource.");
3916 	rle->res = res;
3917 	rle->start = rman_get_start(res);
3918 	rle->end = rman_get_end(res);
3919 	rle->count = count;
3920 	rle->flags = RLE_RESERVED;
3921 	if (bootverbose)
3922 		device_printf(child,
3923 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3924 		    count, *rid, type, rman_get_start(res));
3925 	map = rman_get_start(res);
3926 	pci_write_bar(child, pm, map);
3927 out:;
3928 	return (res);
3929 }
3930 
3931 
3932 struct resource *
3933 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3934 		   u_long start, u_long end, u_long count, u_int flags)
3935 {
3936 	struct pci_devinfo *dinfo = device_get_ivars(child);
3937 	struct resource_list *rl = &dinfo->resources;
3938 	struct resource_list_entry *rle;
3939 	struct resource *res;
3940 	pcicfgregs *cfg = &dinfo->cfg;
3941 
3942 	if (device_get_parent(child) != dev)
3943 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
3944 		    type, rid, start, end, count, flags));
3945 
3946 	/*
3947 	 * Perform lazy resource allocation
3948 	 */
3949 	switch (type) {
3950 	case SYS_RES_IRQ:
3951 		/*
3952 		 * Can't alloc legacy interrupt once MSI messages have
3953 		 * been allocated.
3954 		 */
3955 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3956 		    cfg->msix.msix_alloc > 0))
3957 			return (NULL);
3958 
3959 		/*
3960 		 * If the child device doesn't have an interrupt
3961 		 * routed and is deserving of an interrupt, try to
3962 		 * assign it one.
3963 		 */
3964 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3965 		    (cfg->intpin != 0))
3966 			pci_assign_interrupt(dev, child, 0);
3967 		break;
3968 	case SYS_RES_IOPORT:
3969 	case SYS_RES_MEMORY:
3970 #ifdef NEW_PCIB
3971 		/*
3972 		 * PCI-PCI bridge I/O window resources are not BARs.
3973 		 * For those allocations just pass the request up the
3974 		 * tree.
3975 		 */
3976 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
3977 			switch (*rid) {
3978 			case PCIR_IOBASEL_1:
3979 			case PCIR_MEMBASE_1:
3980 			case PCIR_PMBASEL_1:
3981 				/*
3982 				 * XXX: Should we bother creating a resource
3983 				 * list entry?
3984 				 */
3985 				return (bus_generic_alloc_resource(dev, child,
3986 				    type, rid, start, end, count, flags));
3987 			}
3988 		}
3989 #endif
3990 		/* Reserve resources for this BAR if needed. */
3991 		rle = resource_list_find(rl, type, *rid);
3992 		if (rle == NULL) {
3993 			res = pci_reserve_map(dev, child, type, rid, start, end,
3994 			    count, flags);
3995 			if (res == NULL)
3996 				return (NULL);
3997 		}
3998 	}
3999 	return (resource_list_alloc(rl, dev, child, type, rid,
4000 	    start, end, count, flags));
4001 }
4002 
4003 int
4004 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4005     struct resource *r)
4006 {
4007 	struct pci_devinfo *dinfo;
4008 	int error;
4009 
4010 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4011 	if (error)
4012 		return (error);
4013 
4014 	/* Enable decoding in the command register when activating BARs. */
4015 	if (device_get_parent(child) == dev) {
4016 		/* Device ROMs need their decoding explicitly enabled. */
4017 		dinfo = device_get_ivars(child);
4018 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4019 			pci_write_bar(child, pci_find_bar(child, rid),
4020 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4021 		switch (type) {
4022 		case SYS_RES_IOPORT:
4023 		case SYS_RES_MEMORY:
4024 			error = PCI_ENABLE_IO(dev, child, type);
4025 			break;
4026 		}
4027 	}
4028 	return (error);
4029 }
4030 
4031 int
4032 pci_deactivate_resource(device_t dev, device_t child, int type,
4033     int rid, struct resource *r)
4034 {
4035 	struct pci_devinfo *dinfo;
4036 	int error;
4037 
4038 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4039 	if (error)
4040 		return (error);
4041 
4042 	/* Disable decoding for device ROMs. */
4043 	if (device_get_parent(child) == dev) {
4044 		dinfo = device_get_ivars(child);
4045 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4046 			pci_write_bar(child, pci_find_bar(child, rid),
4047 			    rman_get_start(r));
4048 	}
4049 	return (0);
4050 }
4051 
4052 void
4053 pci_delete_child(device_t dev, device_t child)
4054 {
4055 	struct resource_list_entry *rle;
4056 	struct resource_list *rl;
4057 	struct pci_devinfo *dinfo;
4058 
4059 	dinfo = device_get_ivars(child);
4060 	rl = &dinfo->resources;
4061 
4062 	if (device_is_attached(child))
4063 		device_detach(child);
4064 
4065 	/* Turn off access to resources we're about to free */
4066 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4067 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4068 
4069 	/* Free all allocated resources */
4070 	STAILQ_FOREACH(rle, rl, link) {
4071 		if (rle->res) {
4072 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4073 			    resource_list_busy(rl, rle->type, rle->rid)) {
4074 				pci_printf(&dinfo->cfg,
4075 				    "Resource still owned, oops. "
4076 				    "(type=%d, rid=%d, addr=%lx)\n",
4077 				    rle->type, rle->rid,
4078 				    rman_get_start(rle->res));
4079 				bus_release_resource(child, rle->type, rle->rid,
4080 				    rle->res);
4081 			}
4082 			resource_list_unreserve(rl, dev, child, rle->type,
4083 			    rle->rid);
4084 		}
4085 	}
4086 	resource_list_free(rl);
4087 
4088 	device_delete_child(dev, child);
4089 	pci_freecfg(dinfo);
4090 }
4091 
4092 void
4093 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4094 {
4095 	struct pci_devinfo *dinfo;
4096 	struct resource_list *rl;
4097 	struct resource_list_entry *rle;
4098 
4099 	if (device_get_parent(child) != dev)
4100 		return;
4101 
4102 	dinfo = device_get_ivars(child);
4103 	rl = &dinfo->resources;
4104 	rle = resource_list_find(rl, type, rid);
4105 	if (rle == NULL)
4106 		return;
4107 
4108 	if (rle->res) {
4109 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4110 		    resource_list_busy(rl, type, rid)) {
4111 			device_printf(dev, "delete_resource: "
4112 			    "Resource still owned by child, oops. "
4113 			    "(type=%d, rid=%d, addr=%lx)\n",
4114 			    type, rid, rman_get_start(rle->res));
4115 			return;
4116 		}
4117 
4118 #ifndef __PCI_BAR_ZERO_VALID
4119 		/*
4120 		 * If this is a BAR, clear the BAR so it stops
4121 		 * decoding before releasing the resource.
4122 		 */
4123 		switch (type) {
4124 		case SYS_RES_IOPORT:
4125 		case SYS_RES_MEMORY:
4126 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4127 			break;
4128 		}
4129 #endif
4130 		resource_list_unreserve(rl, dev, child, type, rid);
4131 	}
4132 	resource_list_delete(rl, type, rid);
4133 }
4134 
4135 struct resource_list *
4136 pci_get_resource_list (device_t dev, device_t child)
4137 {
4138 	struct pci_devinfo *dinfo = device_get_ivars(child);
4139 
4140 	return (&dinfo->resources);
4141 }
4142 
4143 uint32_t
4144 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4145 {
4146 	struct pci_devinfo *dinfo = device_get_ivars(child);
4147 	pcicfgregs *cfg = &dinfo->cfg;
4148 
4149 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4150 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4151 }
4152 
4153 void
4154 pci_write_config_method(device_t dev, device_t child, int reg,
4155     uint32_t val, int width)
4156 {
4157 	struct pci_devinfo *dinfo = device_get_ivars(child);
4158 	pcicfgregs *cfg = &dinfo->cfg;
4159 
4160 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4161 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4162 }
4163 
4164 int
4165 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4166     size_t buflen)
4167 {
4168 
4169 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4170 	    pci_get_function(child));
4171 	return (0);
4172 }
4173 
4174 int
4175 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4176     size_t buflen)
4177 {
4178 	struct pci_devinfo *dinfo;
4179 	pcicfgregs *cfg;
4180 
4181 	dinfo = device_get_ivars(child);
4182 	cfg = &dinfo->cfg;
4183 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4184 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4185 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4186 	    cfg->progif);
4187 	return (0);
4188 }
4189 
4190 int
4191 pci_assign_interrupt_method(device_t dev, device_t child)
4192 {
4193 	struct pci_devinfo *dinfo = device_get_ivars(child);
4194 	pcicfgregs *cfg = &dinfo->cfg;
4195 
4196 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4197 	    cfg->intpin));
4198 }
4199 
4200 static int
4201 pci_modevent(module_t mod, int what, void *arg)
4202 {
4203 	static struct cdev *pci_cdev;
4204 
4205 	switch (what) {
4206 	case MOD_LOAD:
4207 		STAILQ_INIT(&pci_devq);
4208 		pci_generation = 0;
4209 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4210 		    "pci");
4211 		pci_load_vendor_data();
4212 		break;
4213 
4214 	case MOD_UNLOAD:
4215 		destroy_dev(pci_cdev);
4216 		break;
4217 	}
4218 
4219 	return (0);
4220 }
4221 
4222 void
4223 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4224 {
4225 
4226 	/*
4227 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4228 	 * which we know need special treatment.  Type 2 devices are
4229 	 * cardbus bridges which also require special treatment.
4230 	 * Other types are unknown, and we err on the side of safety
4231 	 * by ignoring them.
4232 	 */
4233 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4234 		return;
4235 
4236 	/*
4237 	 * Restore the device to full power mode.  We must do this
4238 	 * before we restore the registers because moving from D3 to
4239 	 * D0 will cause the chip's BARs and some other registers to
4240 	 * be reset to some unknown power on reset values.  Cut down
4241 	 * the noise on boot by doing nothing if we are already in
4242 	 * state D0.
4243 	 */
4244 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4245 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4246 	pci_restore_bars(dev);
4247 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4248 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4249 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4250 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4251 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4252 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4253 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4254 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4255 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4256 
4257 	/* Restore MSI and MSI-X configurations if they are present. */
4258 	if (dinfo->cfg.msi.msi_location != 0)
4259 		pci_resume_msi(dev);
4260 	if (dinfo->cfg.msix.msix_location != 0)
4261 		pci_resume_msix(dev);
4262 }
4263 
4264 void
4265 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4266 {
4267 	uint32_t cls;
4268 	int ps;
4269 
4270 	/*
4271 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4272 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4273 	 * which also require special treatment.  Other types are unknown, and
4274 	 * we err on the side of safety by ignoring them.  Powering down
4275 	 * bridges should not be undertaken lightly.
4276 	 */
4277 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4278 		return;
4279 
4280 	/*
4281 	 * Some drivers apparently write to these registers w/o updating our
4282 	 * cached copy.  No harm happens if we update the copy, so do so here
4283 	 * so we can restore them.  The COMMAND register is modified by the
4284 	 * bus w/o updating the cache.  This should represent the normally
4285 	 * writable portion of the 'defined' part of type 0 headers.  In
4286 	 * theory we also need to save/restore the PCI capability structures
4287 	 * we know about, but apart from power we don't know any that are
4288 	 * writable.
4289 	 */
4290 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4291 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4292 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4293 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4294 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4295 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4296 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4297 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4298 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4299 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4300 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4301 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4302 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4303 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4304 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4305 
4306 	/*
4307 	 * don't set the state for display devices, base peripherals and
4308 	 * memory devices since bad things happen when they are powered down.
4309 	 * We should (a) have drivers that can easily detach and (b) use
4310 	 * generic drivers for these devices so that some device actually
4311 	 * attaches.  We need to make sure that when we implement (a) we don't
4312 	 * power the device down on a reattach.
4313 	 */
4314 	cls = pci_get_class(dev);
4315 	if (!setstate)
4316 		return;
4317 	switch (pci_do_power_nodriver)
4318 	{
4319 		case 0:		/* NO powerdown at all */
4320 			return;
4321 		case 1:		/* Conservative about what to power down */
4322 			if (cls == PCIC_STORAGE)
4323 				return;
4324 			/*FALLTHROUGH*/
4325 		case 2:		/* Agressive about what to power down */
4326 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4327 			    cls == PCIC_BASEPERIPH)
4328 				return;
4329 			/*FALLTHROUGH*/
4330 		case 3:		/* Power down everything */
4331 			break;
4332 	}
4333 	/*
4334 	 * PCI spec says we can only go into D3 state from D0 state.
4335 	 * Transition from D[12] into D0 before going to D3 state.
4336 	 */
4337 	ps = pci_get_powerstate(dev);
4338 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4339 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4340 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4341 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4342 }
4343