xref: /freebsd/sys/dev/pci/pci.c (revision 586f63035fbe5e45cfc971037fd76375661ece26)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 #include <machine/stdarg.h>
55 
56 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
57 #include <machine/intr_machdep.h>
58 #endif
59 
60 #include <sys/pciio.h>
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 #include <dev/pci/pci_private.h>
64 
65 #include <dev/usb/controller/xhcireg.h>
66 #include <dev/usb/controller/ehcireg.h>
67 #include <dev/usb/controller/ohcireg.h>
68 #include <dev/usb/controller/uhcireg.h>
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #define	PCIR_IS_BIOS(cfg, reg)						\
74 	(((cfg)->hdrtype == PCIM_HDRTYPE_NORMAL && reg == PCIR_BIOS) ||	\
75 	 ((cfg)->hdrtype == PCIM_HDRTYPE_BRIDGE && reg == PCIR_BIOS_1))
76 
77 
78 static pci_addr_t	pci_mapbase(uint64_t mapreg);
79 static const char	*pci_maptype(uint64_t mapreg);
80 static int		pci_mapsize(uint64_t testval);
81 static int		pci_maprange(uint64_t mapreg);
82 static pci_addr_t	pci_rombase(uint64_t mapreg);
83 static int		pci_romsize(uint64_t testval);
84 static void		pci_fixancient(pcicfgregs *cfg);
85 static int		pci_printf(pcicfgregs *cfg, const char *fmt, ...);
86 
87 static int		pci_porten(device_t dev);
88 static int		pci_memen(device_t dev);
89 static void		pci_assign_interrupt(device_t bus, device_t dev,
90 			    int force_route);
91 static int		pci_add_map(device_t bus, device_t dev, int reg,
92 			    struct resource_list *rl, int force, int prefetch);
93 static int		pci_probe(device_t dev);
94 static int		pci_attach(device_t dev);
95 static void		pci_load_vendor_data(void);
96 static int		pci_describe_parse_line(char **ptr, int *vendor,
97 			    int *device, char **desc);
98 static char		*pci_describe_device(device_t dev);
99 static int		pci_modevent(module_t mod, int what, void *arg);
100 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101 			    pcicfgregs *cfg);
102 static void		pci_read_cap(device_t pcib, pcicfgregs *cfg);
103 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t *data);
105 #if 0
106 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t data);
108 #endif
109 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110 static void		pci_disable_msi(device_t dev);
111 static void		pci_enable_msi(device_t dev, uint64_t address,
112 			    uint16_t data);
113 static void		pci_enable_msix(device_t dev, u_int index,
114 			    uint64_t address, uint32_t data);
115 static void		pci_mask_msix(device_t dev, u_int index);
116 static void		pci_unmask_msix(device_t dev, u_int index);
117 static int		pci_msi_blacklisted(void);
118 static void		pci_resume_msi(device_t dev);
119 static void		pci_resume_msix(device_t dev);
120 static int		pci_remap_intr_method(device_t bus, device_t dev,
121 			    u_int irq);
122 
123 static device_method_t pci_methods[] = {
124 	/* Device interface */
125 	DEVMETHOD(device_probe,		pci_probe),
126 	DEVMETHOD(device_attach,	pci_attach),
127 	DEVMETHOD(device_detach,	bus_generic_detach),
128 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
129 	DEVMETHOD(device_suspend,	pci_suspend),
130 	DEVMETHOD(device_resume,	pci_resume),
131 
132 	/* Bus interface */
133 	DEVMETHOD(bus_print_child,	pci_print_child),
134 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
135 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
136 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
137 	DEVMETHOD(bus_driver_added,	pci_driver_added),
138 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
139 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
140 
141 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
142 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
143 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
144 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
145 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
146 	DEVMETHOD(bus_adjust_resource,	bus_generic_adjust_resource),
147 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
148 	DEVMETHOD(bus_activate_resource, pci_activate_resource),
149 	DEVMETHOD(bus_deactivate_resource, pci_deactivate_resource),
150 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
151 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
152 	DEVMETHOD(bus_remap_intr,	pci_remap_intr_method),
153 
154 	/* PCI interface */
155 	DEVMETHOD(pci_read_config,	pci_read_config_method),
156 	DEVMETHOD(pci_write_config,	pci_write_config_method),
157 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
158 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
159 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
160 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
161 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
162 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
163 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
164 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
165 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
166 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
167 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
168 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
169 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
170 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
171 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
172 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
173 
174 	{ 0, 0 }
175 };
176 
177 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
178 
179 static devclass_t pci_devclass;
180 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
181 MODULE_VERSION(pci, 1);
182 
183 static char	*pci_vendordata;
184 static size_t	pci_vendordata_size;
185 
186 
187 struct pci_quirk {
188 	uint32_t devid;	/* Vendor/device of the card */
189 	int	type;
190 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
191 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
192 #define	PCI_QUIRK_ENABLE_MSI_VM	3 /* Older chipset in VM where MSI works */
193 	int	arg1;
194 	int	arg2;
195 };
196 
197 struct pci_quirk pci_quirks[] = {
198 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
199 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
200 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
201 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
202 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
203 
204 	/*
205 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
206 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
207 	 */
208 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 
211 	/*
212 	 * MSI doesn't work on earlier Intel chipsets including
213 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
214 	 */
215 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
216 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
218 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
219 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
220 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
221 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
222 
223 	/*
224 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
225 	 * bridge.
226 	 */
227 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 
229 	/*
230 	 * Some virtualization environments emulate an older chipset
231 	 * but support MSI just fine.  QEMU uses the Intel 82440.
232 	 */
233 	{ 0x12378086, PCI_QUIRK_ENABLE_MSI_VM,	0,	0 },
234 
235 	{ 0 }
236 };
237 
238 /* map register information */
239 #define	PCI_MAPMEM	0x01	/* memory map */
240 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
241 #define	PCI_MAPPORT	0x04	/* port map */
242 
243 struct devlist pci_devq;
244 uint32_t pci_generation;
245 uint32_t pci_numdevs = 0;
246 static int pcie_chipset, pcix_chipset;
247 
248 /* sysctl vars */
249 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
250 
251 static int pci_enable_io_modes = 1;
252 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
253 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
254     &pci_enable_io_modes, 1,
255     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
256 enable these bits correctly.  We'd like to do this all the time, but there\n\
257 are some peripherals that this causes problems with.");
258 
259 static int pci_do_power_nodriver = 0;
260 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
261 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
262     &pci_do_power_nodriver, 0,
263   "Place a function into D3 state when no driver attaches to it.  0 means\n\
264 disable.  1 means conservatively place devices into D3 state.  2 means\n\
265 agressively place devices into D3 state.  3 means put absolutely everything\n\
266 in D3 state.");
267 
268 int pci_do_power_resume = 1;
269 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
270 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
271     &pci_do_power_resume, 1,
272   "Transition from D3 -> D0 on resume.");
273 
274 int pci_do_power_suspend = 1;
275 TUNABLE_INT("hw.pci.do_power_suspend", &pci_do_power_suspend);
276 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_suspend, CTLFLAG_RW,
277     &pci_do_power_suspend, 1,
278   "Transition from D0 -> D3 on suspend.");
279 
280 static int pci_do_msi = 1;
281 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
282 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
283     "Enable support for MSI interrupts");
284 
285 static int pci_do_msix = 1;
286 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
287 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
288     "Enable support for MSI-X interrupts");
289 
290 static int pci_honor_msi_blacklist = 1;
291 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
292 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
293     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
294 
295 #if defined(__i386__) || defined(__amd64__)
296 static int pci_usb_takeover = 1;
297 #else
298 static int pci_usb_takeover = 0;
299 #endif
300 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
301 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RDTUN,
302     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
303 Disable this if you depend on BIOS emulation of USB devices, that is\n\
304 you use USB devices (like keyboard or mouse) but do not load USB drivers");
305 
306 /* Find a device_t by bus/slot/function in domain 0 */
307 
308 device_t
309 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
310 {
311 
312 	return (pci_find_dbsf(0, bus, slot, func));
313 }
314 
315 /* Find a device_t by domain/bus/slot/function */
316 
317 device_t
318 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
319 {
320 	struct pci_devinfo *dinfo;
321 
322 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
323 		if ((dinfo->cfg.domain == domain) &&
324 		    (dinfo->cfg.bus == bus) &&
325 		    (dinfo->cfg.slot == slot) &&
326 		    (dinfo->cfg.func == func)) {
327 			return (dinfo->cfg.dev);
328 		}
329 	}
330 
331 	return (NULL);
332 }
333 
334 /* Find a device_t by vendor/device ID */
335 
336 device_t
337 pci_find_device(uint16_t vendor, uint16_t device)
338 {
339 	struct pci_devinfo *dinfo;
340 
341 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
342 		if ((dinfo->cfg.vendor == vendor) &&
343 		    (dinfo->cfg.device == device)) {
344 			return (dinfo->cfg.dev);
345 		}
346 	}
347 
348 	return (NULL);
349 }
350 
351 device_t
352 pci_find_class(uint8_t class, uint8_t subclass)
353 {
354 	struct pci_devinfo *dinfo;
355 
356 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
357 		if (dinfo->cfg.baseclass == class &&
358 		    dinfo->cfg.subclass == subclass) {
359 			return (dinfo->cfg.dev);
360 		}
361 	}
362 
363 	return (NULL);
364 }
365 
366 static int
367 pci_printf(pcicfgregs *cfg, const char *fmt, ...)
368 {
369 	va_list ap;
370 	int retval;
371 
372 	retval = printf("pci%d:%d:%d:%d: ", cfg->domain, cfg->bus, cfg->slot,
373 	    cfg->func);
374 	va_start(ap, fmt);
375 	retval += vprintf(fmt, ap);
376 	va_end(ap);
377 	return (retval);
378 }
379 
380 /* return base address of memory or port map */
381 
382 static pci_addr_t
383 pci_mapbase(uint64_t mapreg)
384 {
385 
386 	if (PCI_BAR_MEM(mapreg))
387 		return (mapreg & PCIM_BAR_MEM_BASE);
388 	else
389 		return (mapreg & PCIM_BAR_IO_BASE);
390 }
391 
392 /* return map type of memory or port map */
393 
394 static const char *
395 pci_maptype(uint64_t mapreg)
396 {
397 
398 	if (PCI_BAR_IO(mapreg))
399 		return ("I/O Port");
400 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
401 		return ("Prefetchable Memory");
402 	return ("Memory");
403 }
404 
405 /* return log2 of map size decoded for memory or port map */
406 
407 static int
408 pci_mapsize(uint64_t testval)
409 {
410 	int ln2size;
411 
412 	testval = pci_mapbase(testval);
413 	ln2size = 0;
414 	if (testval != 0) {
415 		while ((testval & 1) == 0)
416 		{
417 			ln2size++;
418 			testval >>= 1;
419 		}
420 	}
421 	return (ln2size);
422 }
423 
424 /* return base address of device ROM */
425 
426 static pci_addr_t
427 pci_rombase(uint64_t mapreg)
428 {
429 
430 	return (mapreg & PCIM_BIOS_ADDR_MASK);
431 }
432 
433 /* return log2 of map size decided for device ROM */
434 
435 static int
436 pci_romsize(uint64_t testval)
437 {
438 	int ln2size;
439 
440 	testval = pci_rombase(testval);
441 	ln2size = 0;
442 	if (testval != 0) {
443 		while ((testval & 1) == 0)
444 		{
445 			ln2size++;
446 			testval >>= 1;
447 		}
448 	}
449 	return (ln2size);
450 }
451 
452 /* return log2 of address range supported by map register */
453 
454 static int
455 pci_maprange(uint64_t mapreg)
456 {
457 	int ln2range = 0;
458 
459 	if (PCI_BAR_IO(mapreg))
460 		ln2range = 32;
461 	else
462 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
463 		case PCIM_BAR_MEM_32:
464 			ln2range = 32;
465 			break;
466 		case PCIM_BAR_MEM_1MB:
467 			ln2range = 20;
468 			break;
469 		case PCIM_BAR_MEM_64:
470 			ln2range = 64;
471 			break;
472 		}
473 	return (ln2range);
474 }
475 
476 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
477 
478 static void
479 pci_fixancient(pcicfgregs *cfg)
480 {
481 	if ((cfg->hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
482 		return;
483 
484 	/* PCI to PCI bridges use header type 1 */
485 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
486 		cfg->hdrtype = PCIM_HDRTYPE_BRIDGE;
487 }
488 
489 /* extract header type specific config data */
490 
491 static void
492 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
493 {
494 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
495 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
496 	case PCIM_HDRTYPE_NORMAL:
497 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
498 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
499 		cfg->nummaps	    = PCI_MAXMAPS_0;
500 		break;
501 	case PCIM_HDRTYPE_BRIDGE:
502 		cfg->nummaps	    = PCI_MAXMAPS_1;
503 		break;
504 	case PCIM_HDRTYPE_CARDBUS:
505 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
506 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
507 		cfg->nummaps	    = PCI_MAXMAPS_2;
508 		break;
509 	}
510 #undef REG
511 }
512 
513 /* read configuration header into pcicfgregs structure */
514 struct pci_devinfo *
515 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
516 {
517 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
518 	pcicfgregs *cfg = NULL;
519 	struct pci_devinfo *devlist_entry;
520 	struct devlist *devlist_head;
521 
522 	devlist_head = &pci_devq;
523 
524 	devlist_entry = NULL;
525 
526 	if (REG(PCIR_DEVVENDOR, 4) != 0xfffffffful) {
527 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
528 		if (devlist_entry == NULL)
529 			return (NULL);
530 
531 		cfg = &devlist_entry->cfg;
532 
533 		cfg->domain		= d;
534 		cfg->bus		= b;
535 		cfg->slot		= s;
536 		cfg->func		= f;
537 		cfg->vendor		= REG(PCIR_VENDOR, 2);
538 		cfg->device		= REG(PCIR_DEVICE, 2);
539 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
540 		cfg->statreg		= REG(PCIR_STATUS, 2);
541 		cfg->baseclass		= REG(PCIR_CLASS, 1);
542 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
543 		cfg->progif		= REG(PCIR_PROGIF, 1);
544 		cfg->revid		= REG(PCIR_REVID, 1);
545 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
546 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
547 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
548 		cfg->intpin		= REG(PCIR_INTPIN, 1);
549 		cfg->intline		= REG(PCIR_INTLINE, 1);
550 
551 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
552 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
553 
554 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
555 		cfg->hdrtype		&= ~PCIM_MFDEV;
556 		STAILQ_INIT(&cfg->maps);
557 
558 		pci_fixancient(cfg);
559 		pci_hdrtypedata(pcib, b, s, f, cfg);
560 
561 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
562 			pci_read_cap(pcib, cfg);
563 
564 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
565 
566 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
567 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
568 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
569 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
570 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
571 
572 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
573 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
574 		devlist_entry->conf.pc_vendor = cfg->vendor;
575 		devlist_entry->conf.pc_device = cfg->device;
576 
577 		devlist_entry->conf.pc_class = cfg->baseclass;
578 		devlist_entry->conf.pc_subclass = cfg->subclass;
579 		devlist_entry->conf.pc_progif = cfg->progif;
580 		devlist_entry->conf.pc_revid = cfg->revid;
581 
582 		pci_numdevs++;
583 		pci_generation++;
584 	}
585 	return (devlist_entry);
586 #undef REG
587 }
588 
589 static void
590 pci_read_cap(device_t pcib, pcicfgregs *cfg)
591 {
592 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
593 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
594 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
595 	uint64_t addr;
596 #endif
597 	uint32_t val;
598 	int	ptr, nextptr, ptrptr;
599 
600 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
601 	case PCIM_HDRTYPE_NORMAL:
602 	case PCIM_HDRTYPE_BRIDGE:
603 		ptrptr = PCIR_CAP_PTR;
604 		break;
605 	case PCIM_HDRTYPE_CARDBUS:
606 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
607 		break;
608 	default:
609 		return;		/* no extended capabilities support */
610 	}
611 	nextptr = REG(ptrptr, 1);	/* sanity check? */
612 
613 	/*
614 	 * Read capability entries.
615 	 */
616 	while (nextptr != 0) {
617 		/* Sanity check */
618 		if (nextptr > 255) {
619 			printf("illegal PCI extended capability offset %d\n",
620 			    nextptr);
621 			return;
622 		}
623 		/* Find the next entry */
624 		ptr = nextptr;
625 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
626 
627 		/* Process this entry */
628 		switch (REG(ptr + PCICAP_ID, 1)) {
629 		case PCIY_PMG:		/* PCI power management */
630 			if (cfg->pp.pp_cap == 0) {
631 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
632 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
633 				cfg->pp.pp_bse = ptr + PCIR_POWER_BSE;
634 				if ((nextptr - ptr) > PCIR_POWER_DATA)
635 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
636 			}
637 			break;
638 		case PCIY_HT:		/* HyperTransport */
639 			/* Determine HT-specific capability type. */
640 			val = REG(ptr + PCIR_HT_COMMAND, 2);
641 
642 			if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
643 				cfg->ht.ht_slave = ptr;
644 
645 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
646 			switch (val & PCIM_HTCMD_CAP_MASK) {
647 			case PCIM_HTCAP_MSI_MAPPING:
648 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
649 					/* Sanity check the mapping window. */
650 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
651 					    4);
652 					addr <<= 32;
653 					addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO,
654 					    4);
655 					if (addr != MSI_INTEL_ADDR_BASE)
656 						device_printf(pcib,
657 	    "HT device at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
658 						    cfg->domain, cfg->bus,
659 						    cfg->slot, cfg->func,
660 						    (long long)addr);
661 				} else
662 					addr = MSI_INTEL_ADDR_BASE;
663 
664 				cfg->ht.ht_msimap = ptr;
665 				cfg->ht.ht_msictrl = val;
666 				cfg->ht.ht_msiaddr = addr;
667 				break;
668 			}
669 #endif
670 			break;
671 		case PCIY_MSI:		/* PCI MSI */
672 			cfg->msi.msi_location = ptr;
673 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
674 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
675 						     PCIM_MSICTRL_MMC_MASK)>>1);
676 			break;
677 		case PCIY_MSIX:		/* PCI MSI-X */
678 			cfg->msix.msix_location = ptr;
679 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
680 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
681 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
682 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
683 			cfg->msix.msix_table_bar = PCIR_BAR(val &
684 			    PCIM_MSIX_BIR_MASK);
685 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
686 			val = REG(ptr + PCIR_MSIX_PBA, 4);
687 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
688 			    PCIM_MSIX_BIR_MASK);
689 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
690 			break;
691 		case PCIY_VPD:		/* PCI Vital Product Data */
692 			cfg->vpd.vpd_reg = ptr;
693 			break;
694 		case PCIY_SUBVENDOR:
695 			/* Should always be true. */
696 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
697 			    PCIM_HDRTYPE_BRIDGE) {
698 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
699 				cfg->subvendor = val & 0xffff;
700 				cfg->subdevice = val >> 16;
701 			}
702 			break;
703 		case PCIY_PCIX:		/* PCI-X */
704 			/*
705 			 * Assume we have a PCI-X chipset if we have
706 			 * at least one PCI-PCI bridge with a PCI-X
707 			 * capability.  Note that some systems with
708 			 * PCI-express or HT chipsets might match on
709 			 * this check as well.
710 			 */
711 			if ((cfg->hdrtype & PCIM_HDRTYPE) ==
712 			    PCIM_HDRTYPE_BRIDGE)
713 				pcix_chipset = 1;
714 			break;
715 		case PCIY_EXPRESS:	/* PCI-express */
716 			/*
717 			 * Assume we have a PCI-express chipset if we have
718 			 * at least one PCI-express device.
719 			 */
720 			pcie_chipset = 1;
721 			break;
722 		default:
723 			break;
724 		}
725 	}
726 
727 
728 #if defined(__i386__) || defined(__amd64__) || defined(__powerpc__)
729 	/*
730 	 * Enable the MSI mapping window for all HyperTransport
731 	 * slaves.  PCI-PCI bridges have their windows enabled via
732 	 * PCIB_MAP_MSI().
733 	 */
734 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
735 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
736 		device_printf(pcib,
737 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
738 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
739 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
740 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
741 		     2);
742 	}
743 #endif
744 /* REG and WREG use carry through to next functions */
745 }
746 
747 /*
748  * PCI Vital Product Data
749  */
750 
751 #define	PCI_VPD_TIMEOUT		1000000
752 
753 static int
754 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
755 {
756 	int count = PCI_VPD_TIMEOUT;
757 
758 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
759 
760 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
761 
762 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
763 		if (--count < 0)
764 			return (ENXIO);
765 		DELAY(1);	/* limit looping */
766 	}
767 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
768 
769 	return (0);
770 }
771 
772 #if 0
773 static int
774 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
775 {
776 	int count = PCI_VPD_TIMEOUT;
777 
778 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
779 
780 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
781 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
782 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
783 		if (--count < 0)
784 			return (ENXIO);
785 		DELAY(1);	/* limit looping */
786 	}
787 
788 	return (0);
789 }
790 #endif
791 
792 #undef PCI_VPD_TIMEOUT
793 
794 struct vpd_readstate {
795 	device_t	pcib;
796 	pcicfgregs	*cfg;
797 	uint32_t	val;
798 	int		bytesinval;
799 	int		off;
800 	uint8_t		cksum;
801 };
802 
803 static int
804 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
805 {
806 	uint32_t reg;
807 	uint8_t byte;
808 
809 	if (vrs->bytesinval == 0) {
810 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
811 			return (ENXIO);
812 		vrs->val = le32toh(reg);
813 		vrs->off += 4;
814 		byte = vrs->val & 0xff;
815 		vrs->bytesinval = 3;
816 	} else {
817 		vrs->val = vrs->val >> 8;
818 		byte = vrs->val & 0xff;
819 		vrs->bytesinval--;
820 	}
821 
822 	vrs->cksum += byte;
823 	*data = byte;
824 	return (0);
825 }
826 
827 static void
828 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
829 {
830 	struct vpd_readstate vrs;
831 	int state;
832 	int name;
833 	int remain;
834 	int i;
835 	int alloc, off;		/* alloc/off for RO/W arrays */
836 	int cksumvalid;
837 	int dflen;
838 	uint8_t byte;
839 	uint8_t byte2;
840 
841 	/* init vpd reader */
842 	vrs.bytesinval = 0;
843 	vrs.off = 0;
844 	vrs.pcib = pcib;
845 	vrs.cfg = cfg;
846 	vrs.cksum = 0;
847 
848 	state = 0;
849 	name = remain = i = 0;	/* shut up stupid gcc */
850 	alloc = off = 0;	/* shut up stupid gcc */
851 	dflen = 0;		/* shut up stupid gcc */
852 	cksumvalid = -1;
853 	while (state >= 0) {
854 		if (vpd_nextbyte(&vrs, &byte)) {
855 			state = -2;
856 			break;
857 		}
858 #if 0
859 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
860 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
861 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
862 #endif
863 		switch (state) {
864 		case 0:		/* item name */
865 			if (byte & 0x80) {
866 				if (vpd_nextbyte(&vrs, &byte2)) {
867 					state = -2;
868 					break;
869 				}
870 				remain = byte2;
871 				if (vpd_nextbyte(&vrs, &byte2)) {
872 					state = -2;
873 					break;
874 				}
875 				remain |= byte2 << 8;
876 				if (remain > (0x7f*4 - vrs.off)) {
877 					state = -1;
878 					printf(
879 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
880 					    cfg->domain, cfg->bus, cfg->slot,
881 					    cfg->func, remain);
882 				}
883 				name = byte & 0x7f;
884 			} else {
885 				remain = byte & 0x7;
886 				name = (byte >> 3) & 0xf;
887 			}
888 			switch (name) {
889 			case 0x2:	/* String */
890 				cfg->vpd.vpd_ident = malloc(remain + 1,
891 				    M_DEVBUF, M_WAITOK);
892 				i = 0;
893 				state = 1;
894 				break;
895 			case 0xf:	/* End */
896 				state = -1;
897 				break;
898 			case 0x10:	/* VPD-R */
899 				alloc = 8;
900 				off = 0;
901 				cfg->vpd.vpd_ros = malloc(alloc *
902 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
903 				    M_WAITOK | M_ZERO);
904 				state = 2;
905 				break;
906 			case 0x11:	/* VPD-W */
907 				alloc = 8;
908 				off = 0;
909 				cfg->vpd.vpd_w = malloc(alloc *
910 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
911 				    M_WAITOK | M_ZERO);
912 				state = 5;
913 				break;
914 			default:	/* Invalid data, abort */
915 				state = -1;
916 				break;
917 			}
918 			break;
919 
920 		case 1:	/* Identifier String */
921 			cfg->vpd.vpd_ident[i++] = byte;
922 			remain--;
923 			if (remain == 0)  {
924 				cfg->vpd.vpd_ident[i] = '\0';
925 				state = 0;
926 			}
927 			break;
928 
929 		case 2:	/* VPD-R Keyword Header */
930 			if (off == alloc) {
931 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
932 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
933 				    M_DEVBUF, M_WAITOK | M_ZERO);
934 			}
935 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
936 			if (vpd_nextbyte(&vrs, &byte2)) {
937 				state = -2;
938 				break;
939 			}
940 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
941 			if (vpd_nextbyte(&vrs, &byte2)) {
942 				state = -2;
943 				break;
944 			}
945 			dflen = byte2;
946 			if (dflen == 0 &&
947 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
948 			    2) == 0) {
949 				/*
950 				 * if this happens, we can't trust the rest
951 				 * of the VPD.
952 				 */
953 				printf(
954 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
955 				    cfg->domain, cfg->bus, cfg->slot,
956 				    cfg->func, dflen);
957 				cksumvalid = 0;
958 				state = -1;
959 				break;
960 			} else if (dflen == 0) {
961 				cfg->vpd.vpd_ros[off].value = malloc(1 *
962 				    sizeof(*cfg->vpd.vpd_ros[off].value),
963 				    M_DEVBUF, M_WAITOK);
964 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
965 			} else
966 				cfg->vpd.vpd_ros[off].value = malloc(
967 				    (dflen + 1) *
968 				    sizeof(*cfg->vpd.vpd_ros[off].value),
969 				    M_DEVBUF, M_WAITOK);
970 			remain -= 3;
971 			i = 0;
972 			/* keep in sync w/ state 3's transistions */
973 			if (dflen == 0 && remain == 0)
974 				state = 0;
975 			else if (dflen == 0)
976 				state = 2;
977 			else
978 				state = 3;
979 			break;
980 
981 		case 3:	/* VPD-R Keyword Value */
982 			cfg->vpd.vpd_ros[off].value[i++] = byte;
983 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
984 			    "RV", 2) == 0 && cksumvalid == -1) {
985 				if (vrs.cksum == 0)
986 					cksumvalid = 1;
987 				else {
988 					if (bootverbose)
989 						printf(
990 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
991 						    cfg->domain, cfg->bus,
992 						    cfg->slot, cfg->func,
993 						    vrs.cksum);
994 					cksumvalid = 0;
995 					state = -1;
996 					break;
997 				}
998 			}
999 			dflen--;
1000 			remain--;
1001 			/* keep in sync w/ state 2's transistions */
1002 			if (dflen == 0)
1003 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1004 			if (dflen == 0 && remain == 0) {
1005 				cfg->vpd.vpd_rocnt = off;
1006 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
1007 				    off * sizeof(*cfg->vpd.vpd_ros),
1008 				    M_DEVBUF, M_WAITOK | M_ZERO);
1009 				state = 0;
1010 			} else if (dflen == 0)
1011 				state = 2;
1012 			break;
1013 
1014 		case 4:
1015 			remain--;
1016 			if (remain == 0)
1017 				state = 0;
1018 			break;
1019 
1020 		case 5:	/* VPD-W Keyword Header */
1021 			if (off == alloc) {
1022 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1023 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1024 				    M_DEVBUF, M_WAITOK | M_ZERO);
1025 			}
1026 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1027 			if (vpd_nextbyte(&vrs, &byte2)) {
1028 				state = -2;
1029 				break;
1030 			}
1031 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1032 			if (vpd_nextbyte(&vrs, &byte2)) {
1033 				state = -2;
1034 				break;
1035 			}
1036 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1037 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1038 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
1039 			    sizeof(*cfg->vpd.vpd_w[off].value),
1040 			    M_DEVBUF, M_WAITOK);
1041 			remain -= 3;
1042 			i = 0;
1043 			/* keep in sync w/ state 6's transistions */
1044 			if (dflen == 0 && remain == 0)
1045 				state = 0;
1046 			else if (dflen == 0)
1047 				state = 5;
1048 			else
1049 				state = 6;
1050 			break;
1051 
1052 		case 6:	/* VPD-W Keyword Value */
1053 			cfg->vpd.vpd_w[off].value[i++] = byte;
1054 			dflen--;
1055 			remain--;
1056 			/* keep in sync w/ state 5's transistions */
1057 			if (dflen == 0)
1058 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1059 			if (dflen == 0 && remain == 0) {
1060 				cfg->vpd.vpd_wcnt = off;
1061 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
1062 				    off * sizeof(*cfg->vpd.vpd_w),
1063 				    M_DEVBUF, M_WAITOK | M_ZERO);
1064 				state = 0;
1065 			} else if (dflen == 0)
1066 				state = 5;
1067 			break;
1068 
1069 		default:
1070 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
1071 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1072 			    state);
1073 			state = -1;
1074 			break;
1075 		}
1076 	}
1077 
1078 	if (cksumvalid == 0 || state < -1) {
1079 		/* read-only data bad, clean up */
1080 		if (cfg->vpd.vpd_ros != NULL) {
1081 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1082 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1083 			free(cfg->vpd.vpd_ros, M_DEVBUF);
1084 			cfg->vpd.vpd_ros = NULL;
1085 		}
1086 	}
1087 	if (state < -1) {
1088 		/* I/O error, clean up */
1089 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1090 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1091 		if (cfg->vpd.vpd_ident != NULL) {
1092 			free(cfg->vpd.vpd_ident, M_DEVBUF);
1093 			cfg->vpd.vpd_ident = NULL;
1094 		}
1095 		if (cfg->vpd.vpd_w != NULL) {
1096 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1097 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1098 			free(cfg->vpd.vpd_w, M_DEVBUF);
1099 			cfg->vpd.vpd_w = NULL;
1100 		}
1101 	}
1102 	cfg->vpd.vpd_cached = 1;
1103 #undef REG
1104 #undef WREG
1105 }
1106 
1107 int
1108 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1109 {
1110 	struct pci_devinfo *dinfo = device_get_ivars(child);
1111 	pcicfgregs *cfg = &dinfo->cfg;
1112 
1113 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1114 		pci_read_vpd(device_get_parent(dev), cfg);
1115 
1116 	*identptr = cfg->vpd.vpd_ident;
1117 
1118 	if (*identptr == NULL)
1119 		return (ENXIO);
1120 
1121 	return (0);
1122 }
1123 
1124 int
1125 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1126 	const char **vptr)
1127 {
1128 	struct pci_devinfo *dinfo = device_get_ivars(child);
1129 	pcicfgregs *cfg = &dinfo->cfg;
1130 	int i;
1131 
1132 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1133 		pci_read_vpd(device_get_parent(dev), cfg);
1134 
1135 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1136 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1137 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1138 			*vptr = cfg->vpd.vpd_ros[i].value;
1139 		}
1140 
1141 	if (i != cfg->vpd.vpd_rocnt)
1142 		return (0);
1143 
1144 	*vptr = NULL;
1145 	return (ENXIO);
1146 }
1147 
1148 /*
1149  * Find the requested extended capability and return the offset in
1150  * configuration space via the pointer provided. The function returns
1151  * 0 on success and error code otherwise.
1152  */
1153 int
1154 pci_find_extcap_method(device_t dev, device_t child, int capability,
1155     int *capreg)
1156 {
1157 	struct pci_devinfo *dinfo = device_get_ivars(child);
1158 	pcicfgregs *cfg = &dinfo->cfg;
1159 	u_int32_t status;
1160 	u_int8_t ptr;
1161 
1162 	/*
1163 	 * Check the CAP_LIST bit of the PCI status register first.
1164 	 */
1165 	status = pci_read_config(child, PCIR_STATUS, 2);
1166 	if (!(status & PCIM_STATUS_CAPPRESENT))
1167 		return (ENXIO);
1168 
1169 	/*
1170 	 * Determine the start pointer of the capabilities list.
1171 	 */
1172 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1173 	case PCIM_HDRTYPE_NORMAL:
1174 	case PCIM_HDRTYPE_BRIDGE:
1175 		ptr = PCIR_CAP_PTR;
1176 		break;
1177 	case PCIM_HDRTYPE_CARDBUS:
1178 		ptr = PCIR_CAP_PTR_2;
1179 		break;
1180 	default:
1181 		/* XXX: panic? */
1182 		return (ENXIO);		/* no extended capabilities support */
1183 	}
1184 	ptr = pci_read_config(child, ptr, 1);
1185 
1186 	/*
1187 	 * Traverse the capabilities list.
1188 	 */
1189 	while (ptr != 0) {
1190 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1191 			if (capreg != NULL)
1192 				*capreg = ptr;
1193 			return (0);
1194 		}
1195 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1196 	}
1197 
1198 	return (ENOENT);
1199 }
1200 
1201 /*
1202  * Support for MSI-X message interrupts.
1203  */
1204 void
1205 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1206 {
1207 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1208 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1209 	uint32_t offset;
1210 
1211 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1212 	offset = msix->msix_table_offset + index * 16;
1213 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1214 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1215 	bus_write_4(msix->msix_table_res, offset + 8, data);
1216 
1217 	/* Enable MSI -> HT mapping. */
1218 	pci_ht_map_msi(dev, address);
1219 }
1220 
1221 void
1222 pci_mask_msix(device_t dev, u_int index)
1223 {
1224 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1225 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1226 	uint32_t offset, val;
1227 
1228 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1229 	offset = msix->msix_table_offset + index * 16 + 12;
1230 	val = bus_read_4(msix->msix_table_res, offset);
1231 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1232 		val |= PCIM_MSIX_VCTRL_MASK;
1233 		bus_write_4(msix->msix_table_res, offset, val);
1234 	}
1235 }
1236 
1237 void
1238 pci_unmask_msix(device_t dev, u_int index)
1239 {
1240 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1241 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1242 	uint32_t offset, val;
1243 
1244 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1245 	offset = msix->msix_table_offset + index * 16 + 12;
1246 	val = bus_read_4(msix->msix_table_res, offset);
1247 	if (val & PCIM_MSIX_VCTRL_MASK) {
1248 		val &= ~PCIM_MSIX_VCTRL_MASK;
1249 		bus_write_4(msix->msix_table_res, offset, val);
1250 	}
1251 }
1252 
1253 int
1254 pci_pending_msix(device_t dev, u_int index)
1255 {
1256 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1257 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1258 	uint32_t offset, bit;
1259 
1260 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1261 	offset = msix->msix_pba_offset + (index / 32) * 4;
1262 	bit = 1 << index % 32;
1263 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1264 }
1265 
1266 /*
1267  * Restore MSI-X registers and table during resume.  If MSI-X is
1268  * enabled then walk the virtual table to restore the actual MSI-X
1269  * table.
1270  */
1271 static void
1272 pci_resume_msix(device_t dev)
1273 {
1274 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1275 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1276 	struct msix_table_entry *mte;
1277 	struct msix_vector *mv;
1278 	int i;
1279 
1280 	if (msix->msix_alloc > 0) {
1281 		/* First, mask all vectors. */
1282 		for (i = 0; i < msix->msix_msgnum; i++)
1283 			pci_mask_msix(dev, i);
1284 
1285 		/* Second, program any messages with at least one handler. */
1286 		for (i = 0; i < msix->msix_table_len; i++) {
1287 			mte = &msix->msix_table[i];
1288 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1289 				continue;
1290 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1291 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1292 			pci_unmask_msix(dev, i);
1293 		}
1294 	}
1295 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1296 	    msix->msix_ctrl, 2);
1297 }
1298 
1299 /*
1300  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1301  * returned in *count.  After this function returns, each message will be
1302  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1303  */
1304 int
1305 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1306 {
1307 	struct pci_devinfo *dinfo = device_get_ivars(child);
1308 	pcicfgregs *cfg = &dinfo->cfg;
1309 	struct resource_list_entry *rle;
1310 	int actual, error, i, irq, max;
1311 
1312 	/* Don't let count == 0 get us into trouble. */
1313 	if (*count == 0)
1314 		return (EINVAL);
1315 
1316 	/* If rid 0 is allocated, then fail. */
1317 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1318 	if (rle != NULL && rle->res != NULL)
1319 		return (ENXIO);
1320 
1321 	/* Already have allocated messages? */
1322 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1323 		return (ENXIO);
1324 
1325 	/* If MSI is blacklisted for this system, fail. */
1326 	if (pci_msi_blacklisted())
1327 		return (ENXIO);
1328 
1329 	/* MSI-X capability present? */
1330 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1331 		return (ENODEV);
1332 
1333 	/* Make sure the appropriate BARs are mapped. */
1334 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1335 	    cfg->msix.msix_table_bar);
1336 	if (rle == NULL || rle->res == NULL ||
1337 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1338 		return (ENXIO);
1339 	cfg->msix.msix_table_res = rle->res;
1340 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1341 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1342 		    cfg->msix.msix_pba_bar);
1343 		if (rle == NULL || rle->res == NULL ||
1344 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1345 			return (ENXIO);
1346 	}
1347 	cfg->msix.msix_pba_res = rle->res;
1348 
1349 	if (bootverbose)
1350 		device_printf(child,
1351 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1352 		    *count, cfg->msix.msix_msgnum);
1353 	max = min(*count, cfg->msix.msix_msgnum);
1354 	for (i = 0; i < max; i++) {
1355 		/* Allocate a message. */
1356 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1357 		if (error) {
1358 			if (i == 0)
1359 				return (error);
1360 			break;
1361 		}
1362 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1363 		    irq, 1);
1364 	}
1365 	actual = i;
1366 
1367 	if (bootverbose) {
1368 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1369 		if (actual == 1)
1370 			device_printf(child, "using IRQ %lu for MSI-X\n",
1371 			    rle->start);
1372 		else {
1373 			int run;
1374 
1375 			/*
1376 			 * Be fancy and try to print contiguous runs of
1377 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1378 			 * 'run' is true if we are in a range.
1379 			 */
1380 			device_printf(child, "using IRQs %lu", rle->start);
1381 			irq = rle->start;
1382 			run = 0;
1383 			for (i = 1; i < actual; i++) {
1384 				rle = resource_list_find(&dinfo->resources,
1385 				    SYS_RES_IRQ, i + 1);
1386 
1387 				/* Still in a run? */
1388 				if (rle->start == irq + 1) {
1389 					run = 1;
1390 					irq++;
1391 					continue;
1392 				}
1393 
1394 				/* Finish previous range. */
1395 				if (run) {
1396 					printf("-%d", irq);
1397 					run = 0;
1398 				}
1399 
1400 				/* Start new range. */
1401 				printf(",%lu", rle->start);
1402 				irq = rle->start;
1403 			}
1404 
1405 			/* Unfinished range? */
1406 			if (run)
1407 				printf("-%d", irq);
1408 			printf(" for MSI-X\n");
1409 		}
1410 	}
1411 
1412 	/* Mask all vectors. */
1413 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1414 		pci_mask_msix(child, i);
1415 
1416 	/* Allocate and initialize vector data and virtual table. */
1417 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1418 	    M_DEVBUF, M_WAITOK | M_ZERO);
1419 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1420 	    M_DEVBUF, M_WAITOK | M_ZERO);
1421 	for (i = 0; i < actual; i++) {
1422 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1423 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1424 		cfg->msix.msix_table[i].mte_vector = i + 1;
1425 	}
1426 
1427 	/* Update control register to enable MSI-X. */
1428 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1429 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1430 	    cfg->msix.msix_ctrl, 2);
1431 
1432 	/* Update counts of alloc'd messages. */
1433 	cfg->msix.msix_alloc = actual;
1434 	cfg->msix.msix_table_len = actual;
1435 	*count = actual;
1436 	return (0);
1437 }
1438 
1439 /*
1440  * By default, pci_alloc_msix() will assign the allocated IRQ
1441  * resources consecutively to the first N messages in the MSI-X table.
1442  * However, device drivers may want to use different layouts if they
1443  * either receive fewer messages than they asked for, or they wish to
1444  * populate the MSI-X table sparsely.  This method allows the driver
1445  * to specify what layout it wants.  It must be called after a
1446  * successful pci_alloc_msix() but before any of the associated
1447  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1448  *
1449  * The 'vectors' array contains 'count' message vectors.  The array
1450  * maps directly to the MSI-X table in that index 0 in the array
1451  * specifies the vector for the first message in the MSI-X table, etc.
1452  * The vector value in each array index can either be 0 to indicate
1453  * that no vector should be assigned to a message slot, or it can be a
1454  * number from 1 to N (where N is the count returned from a
1455  * succcessful call to pci_alloc_msix()) to indicate which message
1456  * vector (IRQ) to be used for the corresponding message.
1457  *
1458  * On successful return, each message with a non-zero vector will have
1459  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1460  * 1.  Additionally, if any of the IRQs allocated via the previous
1461  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1462  * will be freed back to the system automatically.
1463  *
1464  * For example, suppose a driver has a MSI-X table with 6 messages and
1465  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1466  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1467  * C.  After the call to pci_alloc_msix(), the device will be setup to
1468  * have an MSI-X table of ABC--- (where - means no vector assigned).
1469  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1470  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1471  * be freed back to the system.  This device will also have valid
1472  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1473  *
1474  * In any case, the SYS_RES_IRQ rid X will always map to the message
1475  * at MSI-X table index X - 1 and will only be valid if a vector is
1476  * assigned to that table entry.
1477  */
1478 int
1479 pci_remap_msix_method(device_t dev, device_t child, int count,
1480     const u_int *vectors)
1481 {
1482 	struct pci_devinfo *dinfo = device_get_ivars(child);
1483 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1484 	struct resource_list_entry *rle;
1485 	int i, irq, j, *used;
1486 
1487 	/*
1488 	 * Have to have at least one message in the table but the
1489 	 * table can't be bigger than the actual MSI-X table in the
1490 	 * device.
1491 	 */
1492 	if (count == 0 || count > msix->msix_msgnum)
1493 		return (EINVAL);
1494 
1495 	/* Sanity check the vectors. */
1496 	for (i = 0; i < count; i++)
1497 		if (vectors[i] > msix->msix_alloc)
1498 			return (EINVAL);
1499 
1500 	/*
1501 	 * Make sure there aren't any holes in the vectors to be used.
1502 	 * It's a big pain to support it, and it doesn't really make
1503 	 * sense anyway.  Also, at least one vector must be used.
1504 	 */
1505 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1506 	    M_ZERO);
1507 	for (i = 0; i < count; i++)
1508 		if (vectors[i] != 0)
1509 			used[vectors[i] - 1] = 1;
1510 	for (i = 0; i < msix->msix_alloc - 1; i++)
1511 		if (used[i] == 0 && used[i + 1] == 1) {
1512 			free(used, M_DEVBUF);
1513 			return (EINVAL);
1514 		}
1515 	if (used[0] != 1) {
1516 		free(used, M_DEVBUF);
1517 		return (EINVAL);
1518 	}
1519 
1520 	/* Make sure none of the resources are allocated. */
1521 	for (i = 0; i < msix->msix_table_len; i++) {
1522 		if (msix->msix_table[i].mte_vector == 0)
1523 			continue;
1524 		if (msix->msix_table[i].mte_handlers > 0)
1525 			return (EBUSY);
1526 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1527 		KASSERT(rle != NULL, ("missing resource"));
1528 		if (rle->res != NULL)
1529 			return (EBUSY);
1530 	}
1531 
1532 	/* Free the existing resource list entries. */
1533 	for (i = 0; i < msix->msix_table_len; i++) {
1534 		if (msix->msix_table[i].mte_vector == 0)
1535 			continue;
1536 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1537 	}
1538 
1539 	/*
1540 	 * Build the new virtual table keeping track of which vectors are
1541 	 * used.
1542 	 */
1543 	free(msix->msix_table, M_DEVBUF);
1544 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1545 	    M_DEVBUF, M_WAITOK | M_ZERO);
1546 	for (i = 0; i < count; i++)
1547 		msix->msix_table[i].mte_vector = vectors[i];
1548 	msix->msix_table_len = count;
1549 
1550 	/* Free any unused IRQs and resize the vectors array if necessary. */
1551 	j = msix->msix_alloc - 1;
1552 	if (used[j] == 0) {
1553 		struct msix_vector *vec;
1554 
1555 		while (used[j] == 0) {
1556 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1557 			    msix->msix_vectors[j].mv_irq);
1558 			j--;
1559 		}
1560 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1561 		    M_WAITOK);
1562 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1563 		    (j + 1));
1564 		free(msix->msix_vectors, M_DEVBUF);
1565 		msix->msix_vectors = vec;
1566 		msix->msix_alloc = j + 1;
1567 	}
1568 	free(used, M_DEVBUF);
1569 
1570 	/* Map the IRQs onto the rids. */
1571 	for (i = 0; i < count; i++) {
1572 		if (vectors[i] == 0)
1573 			continue;
1574 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1575 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1576 		    irq, 1);
1577 	}
1578 
1579 	if (bootverbose) {
1580 		device_printf(child, "Remapped MSI-X IRQs as: ");
1581 		for (i = 0; i < count; i++) {
1582 			if (i != 0)
1583 				printf(", ");
1584 			if (vectors[i] == 0)
1585 				printf("---");
1586 			else
1587 				printf("%d",
1588 				    msix->msix_vectors[vectors[i]].mv_irq);
1589 		}
1590 		printf("\n");
1591 	}
1592 
1593 	return (0);
1594 }
1595 
1596 static int
1597 pci_release_msix(device_t dev, device_t child)
1598 {
1599 	struct pci_devinfo *dinfo = device_get_ivars(child);
1600 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1601 	struct resource_list_entry *rle;
1602 	int i;
1603 
1604 	/* Do we have any messages to release? */
1605 	if (msix->msix_alloc == 0)
1606 		return (ENODEV);
1607 
1608 	/* Make sure none of the resources are allocated. */
1609 	for (i = 0; i < msix->msix_table_len; i++) {
1610 		if (msix->msix_table[i].mte_vector == 0)
1611 			continue;
1612 		if (msix->msix_table[i].mte_handlers > 0)
1613 			return (EBUSY);
1614 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1615 		KASSERT(rle != NULL, ("missing resource"));
1616 		if (rle->res != NULL)
1617 			return (EBUSY);
1618 	}
1619 
1620 	/* Update control register to disable MSI-X. */
1621 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1622 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1623 	    msix->msix_ctrl, 2);
1624 
1625 	/* Free the resource list entries. */
1626 	for (i = 0; i < msix->msix_table_len; i++) {
1627 		if (msix->msix_table[i].mte_vector == 0)
1628 			continue;
1629 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1630 	}
1631 	free(msix->msix_table, M_DEVBUF);
1632 	msix->msix_table_len = 0;
1633 
1634 	/* Release the IRQs. */
1635 	for (i = 0; i < msix->msix_alloc; i++)
1636 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1637 		    msix->msix_vectors[i].mv_irq);
1638 	free(msix->msix_vectors, M_DEVBUF);
1639 	msix->msix_alloc = 0;
1640 	return (0);
1641 }
1642 
1643 /*
1644  * Return the max supported MSI-X messages this device supports.
1645  * Basically, assuming the MD code can alloc messages, this function
1646  * should return the maximum value that pci_alloc_msix() can return.
1647  * Thus, it is subject to the tunables, etc.
1648  */
1649 int
1650 pci_msix_count_method(device_t dev, device_t child)
1651 {
1652 	struct pci_devinfo *dinfo = device_get_ivars(child);
1653 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1654 
1655 	if (pci_do_msix && msix->msix_location != 0)
1656 		return (msix->msix_msgnum);
1657 	return (0);
1658 }
1659 
1660 /*
1661  * HyperTransport MSI mapping control
1662  */
1663 void
1664 pci_ht_map_msi(device_t dev, uint64_t addr)
1665 {
1666 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1667 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1668 
1669 	if (!ht->ht_msimap)
1670 		return;
1671 
1672 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1673 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1674 		/* Enable MSI -> HT mapping. */
1675 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1676 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1677 		    ht->ht_msictrl, 2);
1678 	}
1679 
1680 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1681 		/* Disable MSI -> HT mapping. */
1682 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1683 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1684 		    ht->ht_msictrl, 2);
1685 	}
1686 }
1687 
1688 int
1689 pci_get_max_read_req(device_t dev)
1690 {
1691 	int cap;
1692 	uint16_t val;
1693 
1694 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1695 		return (0);
1696 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1697 	val &= PCIM_EXP_CTL_MAX_READ_REQUEST;
1698 	val >>= 12;
1699 	return (1 << (val + 7));
1700 }
1701 
1702 int
1703 pci_set_max_read_req(device_t dev, int size)
1704 {
1705 	int cap;
1706 	uint16_t val;
1707 
1708 	if (pci_find_cap(dev, PCIY_EXPRESS, &cap) != 0)
1709 		return (0);
1710 	if (size < 128)
1711 		size = 128;
1712 	if (size > 4096)
1713 		size = 4096;
1714 	size = (1 << (fls(size) - 1));
1715 	val = pci_read_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, 2);
1716 	val &= ~PCIM_EXP_CTL_MAX_READ_REQUEST;
1717 	val |= (fls(size) - 8) << 12;
1718 	pci_write_config(dev, cap + PCIR_EXPRESS_DEVICE_CTL, val, 2);
1719 	return (size);
1720 }
1721 
1722 /*
1723  * Support for MSI message signalled interrupts.
1724  */
1725 void
1726 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1727 {
1728 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1729 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1730 
1731 	/* Write data and address values. */
1732 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1733 	    address & 0xffffffff, 4);
1734 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1735 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1736 		    address >> 32, 4);
1737 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1738 		    data, 2);
1739 	} else
1740 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1741 		    2);
1742 
1743 	/* Enable MSI in the control register. */
1744 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1745 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1746 	    2);
1747 
1748 	/* Enable MSI -> HT mapping. */
1749 	pci_ht_map_msi(dev, address);
1750 }
1751 
1752 void
1753 pci_disable_msi(device_t dev)
1754 {
1755 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1756 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1757 
1758 	/* Disable MSI -> HT mapping. */
1759 	pci_ht_map_msi(dev, 0);
1760 
1761 	/* Disable MSI in the control register. */
1762 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1763 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1764 	    2);
1765 }
1766 
1767 /*
1768  * Restore MSI registers during resume.  If MSI is enabled then
1769  * restore the data and address registers in addition to the control
1770  * register.
1771  */
1772 static void
1773 pci_resume_msi(device_t dev)
1774 {
1775 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1776 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1777 	uint64_t address;
1778 	uint16_t data;
1779 
1780 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1781 		address = msi->msi_addr;
1782 		data = msi->msi_data;
1783 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1784 		    address & 0xffffffff, 4);
1785 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1786 			pci_write_config(dev, msi->msi_location +
1787 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1788 			pci_write_config(dev, msi->msi_location +
1789 			    PCIR_MSI_DATA_64BIT, data, 2);
1790 		} else
1791 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1792 			    data, 2);
1793 	}
1794 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1795 	    2);
1796 }
1797 
1798 static int
1799 pci_remap_intr_method(device_t bus, device_t dev, u_int irq)
1800 {
1801 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1802 	pcicfgregs *cfg = &dinfo->cfg;
1803 	struct resource_list_entry *rle;
1804 	struct msix_table_entry *mte;
1805 	struct msix_vector *mv;
1806 	uint64_t addr;
1807 	uint32_t data;
1808 	int error, i, j;
1809 
1810 	/*
1811 	 * Handle MSI first.  We try to find this IRQ among our list
1812 	 * of MSI IRQs.  If we find it, we request updated address and
1813 	 * data registers and apply the results.
1814 	 */
1815 	if (cfg->msi.msi_alloc > 0) {
1816 
1817 		/* If we don't have any active handlers, nothing to do. */
1818 		if (cfg->msi.msi_handlers == 0)
1819 			return (0);
1820 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1821 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1822 			    i + 1);
1823 			if (rle->start == irq) {
1824 				error = PCIB_MAP_MSI(device_get_parent(bus),
1825 				    dev, irq, &addr, &data);
1826 				if (error)
1827 					return (error);
1828 				pci_disable_msi(dev);
1829 				dinfo->cfg.msi.msi_addr = addr;
1830 				dinfo->cfg.msi.msi_data = data;
1831 				pci_enable_msi(dev, addr, data);
1832 				return (0);
1833 			}
1834 		}
1835 		return (ENOENT);
1836 	}
1837 
1838 	/*
1839 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1840 	 * we request the updated mapping info.  If that works, we go
1841 	 * through all the slots that use this IRQ and update them.
1842 	 */
1843 	if (cfg->msix.msix_alloc > 0) {
1844 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1845 			mv = &cfg->msix.msix_vectors[i];
1846 			if (mv->mv_irq == irq) {
1847 				error = PCIB_MAP_MSI(device_get_parent(bus),
1848 				    dev, irq, &addr, &data);
1849 				if (error)
1850 					return (error);
1851 				mv->mv_address = addr;
1852 				mv->mv_data = data;
1853 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1854 					mte = &cfg->msix.msix_table[j];
1855 					if (mte->mte_vector != i + 1)
1856 						continue;
1857 					if (mte->mte_handlers == 0)
1858 						continue;
1859 					pci_mask_msix(dev, j);
1860 					pci_enable_msix(dev, j, addr, data);
1861 					pci_unmask_msix(dev, j);
1862 				}
1863 			}
1864 		}
1865 		return (ENOENT);
1866 	}
1867 
1868 	return (ENOENT);
1869 }
1870 
1871 /*
1872  * Returns true if the specified device is blacklisted because MSI
1873  * doesn't work.
1874  */
1875 int
1876 pci_msi_device_blacklisted(device_t dev)
1877 {
1878 	struct pci_quirk *q;
1879 
1880 	if (!pci_honor_msi_blacklist)
1881 		return (0);
1882 
1883 	for (q = &pci_quirks[0]; q->devid; q++) {
1884 		if (q->devid == pci_get_devid(dev) &&
1885 		    q->type == PCI_QUIRK_DISABLE_MSI)
1886 			return (1);
1887 	}
1888 	return (0);
1889 }
1890 
1891 /*
1892  * Returns true if a specified chipset supports MSI when it is
1893  * emulated hardware in a virtual machine.
1894  */
1895 static int
1896 pci_msi_vm_chipset(device_t dev)
1897 {
1898 	struct pci_quirk *q;
1899 
1900 	for (q = &pci_quirks[0]; q->devid; q++) {
1901 		if (q->devid == pci_get_devid(dev) &&
1902 		    q->type == PCI_QUIRK_ENABLE_MSI_VM)
1903 			return (1);
1904 	}
1905 	return (0);
1906 }
1907 
1908 /*
1909  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1910  * we just check for blacklisted chipsets as represented by the
1911  * host-PCI bridge at device 0:0:0.  In the future, it may become
1912  * necessary to check other system attributes, such as the kenv values
1913  * that give the motherboard manufacturer and model number.
1914  */
1915 static int
1916 pci_msi_blacklisted(void)
1917 {
1918 	device_t dev;
1919 
1920 	if (!pci_honor_msi_blacklist)
1921 		return (0);
1922 
1923 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1924 	if (!(pcie_chipset || pcix_chipset)) {
1925 		if (vm_guest != VM_GUEST_NO) {
1926 			dev = pci_find_bsf(0, 0, 0);
1927 			if (dev != NULL)
1928 				return (pci_msi_vm_chipset(dev) == 0);
1929 		}
1930 		return (1);
1931 	}
1932 
1933 	dev = pci_find_bsf(0, 0, 0);
1934 	if (dev != NULL)
1935 		return (pci_msi_device_blacklisted(dev));
1936 	return (0);
1937 }
1938 
1939 /*
1940  * Attempt to allocate *count MSI messages.  The actual number allocated is
1941  * returned in *count.  After this function returns, each message will be
1942  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1943  */
1944 int
1945 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1946 {
1947 	struct pci_devinfo *dinfo = device_get_ivars(child);
1948 	pcicfgregs *cfg = &dinfo->cfg;
1949 	struct resource_list_entry *rle;
1950 	int actual, error, i, irqs[32];
1951 	uint16_t ctrl;
1952 
1953 	/* Don't let count == 0 get us into trouble. */
1954 	if (*count == 0)
1955 		return (EINVAL);
1956 
1957 	/* If rid 0 is allocated, then fail. */
1958 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1959 	if (rle != NULL && rle->res != NULL)
1960 		return (ENXIO);
1961 
1962 	/* Already have allocated messages? */
1963 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1964 		return (ENXIO);
1965 
1966 	/* If MSI is blacklisted for this system, fail. */
1967 	if (pci_msi_blacklisted())
1968 		return (ENXIO);
1969 
1970 	/* MSI capability present? */
1971 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1972 		return (ENODEV);
1973 
1974 	if (bootverbose)
1975 		device_printf(child,
1976 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1977 		    *count, cfg->msi.msi_msgnum);
1978 
1979 	/* Don't ask for more than the device supports. */
1980 	actual = min(*count, cfg->msi.msi_msgnum);
1981 
1982 	/* Don't ask for more than 32 messages. */
1983 	actual = min(actual, 32);
1984 
1985 	/* MSI requires power of 2 number of messages. */
1986 	if (!powerof2(actual))
1987 		return (EINVAL);
1988 
1989 	for (;;) {
1990 		/* Try to allocate N messages. */
1991 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1992 		    actual, irqs);
1993 		if (error == 0)
1994 			break;
1995 		if (actual == 1)
1996 			return (error);
1997 
1998 		/* Try N / 2. */
1999 		actual >>= 1;
2000 	}
2001 
2002 	/*
2003 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2004 	 * resources in the irqs[] array, so add new resources
2005 	 * starting at rid 1.
2006 	 */
2007 	for (i = 0; i < actual; i++)
2008 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2009 		    irqs[i], irqs[i], 1);
2010 
2011 	if (bootverbose) {
2012 		if (actual == 1)
2013 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2014 		else {
2015 			int run;
2016 
2017 			/*
2018 			 * Be fancy and try to print contiguous runs
2019 			 * of IRQ values as ranges.  'run' is true if
2020 			 * we are in a range.
2021 			 */
2022 			device_printf(child, "using IRQs %d", irqs[0]);
2023 			run = 0;
2024 			for (i = 1; i < actual; i++) {
2025 
2026 				/* Still in a run? */
2027 				if (irqs[i] == irqs[i - 1] + 1) {
2028 					run = 1;
2029 					continue;
2030 				}
2031 
2032 				/* Finish previous range. */
2033 				if (run) {
2034 					printf("-%d", irqs[i - 1]);
2035 					run = 0;
2036 				}
2037 
2038 				/* Start new range. */
2039 				printf(",%d", irqs[i]);
2040 			}
2041 
2042 			/* Unfinished range? */
2043 			if (run)
2044 				printf("-%d", irqs[actual - 1]);
2045 			printf(" for MSI\n");
2046 		}
2047 	}
2048 
2049 	/* Update control register with actual count. */
2050 	ctrl = cfg->msi.msi_ctrl;
2051 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2052 	ctrl |= (ffs(actual) - 1) << 4;
2053 	cfg->msi.msi_ctrl = ctrl;
2054 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2055 
2056 	/* Update counts of alloc'd messages. */
2057 	cfg->msi.msi_alloc = actual;
2058 	cfg->msi.msi_handlers = 0;
2059 	*count = actual;
2060 	return (0);
2061 }
2062 
2063 /* Release the MSI messages associated with this device. */
2064 int
2065 pci_release_msi_method(device_t dev, device_t child)
2066 {
2067 	struct pci_devinfo *dinfo = device_get_ivars(child);
2068 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2069 	struct resource_list_entry *rle;
2070 	int error, i, irqs[32];
2071 
2072 	/* Try MSI-X first. */
2073 	error = pci_release_msix(dev, child);
2074 	if (error != ENODEV)
2075 		return (error);
2076 
2077 	/* Do we have any messages to release? */
2078 	if (msi->msi_alloc == 0)
2079 		return (ENODEV);
2080 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2081 
2082 	/* Make sure none of the resources are allocated. */
2083 	if (msi->msi_handlers > 0)
2084 		return (EBUSY);
2085 	for (i = 0; i < msi->msi_alloc; i++) {
2086 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2087 		KASSERT(rle != NULL, ("missing MSI resource"));
2088 		if (rle->res != NULL)
2089 			return (EBUSY);
2090 		irqs[i] = rle->start;
2091 	}
2092 
2093 	/* Update control register with 0 count. */
2094 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2095 	    ("%s: MSI still enabled", __func__));
2096 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2097 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2098 	    msi->msi_ctrl, 2);
2099 
2100 	/* Release the messages. */
2101 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2102 	for (i = 0; i < msi->msi_alloc; i++)
2103 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2104 
2105 	/* Update alloc count. */
2106 	msi->msi_alloc = 0;
2107 	msi->msi_addr = 0;
2108 	msi->msi_data = 0;
2109 	return (0);
2110 }
2111 
2112 /*
2113  * Return the max supported MSI messages this device supports.
2114  * Basically, assuming the MD code can alloc messages, this function
2115  * should return the maximum value that pci_alloc_msi() can return.
2116  * Thus, it is subject to the tunables, etc.
2117  */
2118 int
2119 pci_msi_count_method(device_t dev, device_t child)
2120 {
2121 	struct pci_devinfo *dinfo = device_get_ivars(child);
2122 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2123 
2124 	if (pci_do_msi && msi->msi_location != 0)
2125 		return (msi->msi_msgnum);
2126 	return (0);
2127 }
2128 
2129 /* free pcicfgregs structure and all depending data structures */
2130 
2131 int
2132 pci_freecfg(struct pci_devinfo *dinfo)
2133 {
2134 	struct devlist *devlist_head;
2135 	struct pci_map *pm, *next;
2136 	int i;
2137 
2138 	devlist_head = &pci_devq;
2139 
2140 	if (dinfo->cfg.vpd.vpd_reg) {
2141 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2142 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2143 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2144 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2145 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2146 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2147 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2148 	}
2149 	STAILQ_FOREACH_SAFE(pm, &dinfo->cfg.maps, pm_link, next) {
2150 		free(pm, M_DEVBUF);
2151 	}
2152 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2153 	free(dinfo, M_DEVBUF);
2154 
2155 	/* increment the generation count */
2156 	pci_generation++;
2157 
2158 	/* we're losing one device */
2159 	pci_numdevs--;
2160 	return (0);
2161 }
2162 
2163 /*
2164  * PCI power manangement
2165  */
2166 int
2167 pci_set_powerstate_method(device_t dev, device_t child, int state)
2168 {
2169 	struct pci_devinfo *dinfo = device_get_ivars(child);
2170 	pcicfgregs *cfg = &dinfo->cfg;
2171 	uint16_t status;
2172 	int result, oldstate, highest, delay;
2173 
2174 	if (cfg->pp.pp_cap == 0)
2175 		return (EOPNOTSUPP);
2176 
2177 	/*
2178 	 * Optimize a no state change request away.  While it would be OK to
2179 	 * write to the hardware in theory, some devices have shown odd
2180 	 * behavior when going from D3 -> D3.
2181 	 */
2182 	oldstate = pci_get_powerstate(child);
2183 	if (oldstate == state)
2184 		return (0);
2185 
2186 	/*
2187 	 * The PCI power management specification states that after a state
2188 	 * transition between PCI power states, system software must
2189 	 * guarantee a minimal delay before the function accesses the device.
2190 	 * Compute the worst case delay that we need to guarantee before we
2191 	 * access the device.  Many devices will be responsive much more
2192 	 * quickly than this delay, but there are some that don't respond
2193 	 * instantly to state changes.  Transitions to/from D3 state require
2194 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2195 	 * is done below with DELAY rather than a sleeper function because
2196 	 * this function can be called from contexts where we cannot sleep.
2197 	 */
2198 	highest = (oldstate > state) ? oldstate : state;
2199 	if (highest == PCI_POWERSTATE_D3)
2200 	    delay = 10000;
2201 	else if (highest == PCI_POWERSTATE_D2)
2202 	    delay = 200;
2203 	else
2204 	    delay = 0;
2205 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2206 	    & ~PCIM_PSTAT_DMASK;
2207 	result = 0;
2208 	switch (state) {
2209 	case PCI_POWERSTATE_D0:
2210 		status |= PCIM_PSTAT_D0;
2211 		break;
2212 	case PCI_POWERSTATE_D1:
2213 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2214 			return (EOPNOTSUPP);
2215 		status |= PCIM_PSTAT_D1;
2216 		break;
2217 	case PCI_POWERSTATE_D2:
2218 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2219 			return (EOPNOTSUPP);
2220 		status |= PCIM_PSTAT_D2;
2221 		break;
2222 	case PCI_POWERSTATE_D3:
2223 		status |= PCIM_PSTAT_D3;
2224 		break;
2225 	default:
2226 		return (EINVAL);
2227 	}
2228 
2229 	if (bootverbose)
2230 		pci_printf(cfg, "Transition from D%d to D%d\n", oldstate,
2231 		    state);
2232 
2233 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2234 	if (delay)
2235 		DELAY(delay);
2236 	return (0);
2237 }
2238 
2239 int
2240 pci_get_powerstate_method(device_t dev, device_t child)
2241 {
2242 	struct pci_devinfo *dinfo = device_get_ivars(child);
2243 	pcicfgregs *cfg = &dinfo->cfg;
2244 	uint16_t status;
2245 	int result;
2246 
2247 	if (cfg->pp.pp_cap != 0) {
2248 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2249 		switch (status & PCIM_PSTAT_DMASK) {
2250 		case PCIM_PSTAT_D0:
2251 			result = PCI_POWERSTATE_D0;
2252 			break;
2253 		case PCIM_PSTAT_D1:
2254 			result = PCI_POWERSTATE_D1;
2255 			break;
2256 		case PCIM_PSTAT_D2:
2257 			result = PCI_POWERSTATE_D2;
2258 			break;
2259 		case PCIM_PSTAT_D3:
2260 			result = PCI_POWERSTATE_D3;
2261 			break;
2262 		default:
2263 			result = PCI_POWERSTATE_UNKNOWN;
2264 			break;
2265 		}
2266 	} else {
2267 		/* No support, device is always at D0 */
2268 		result = PCI_POWERSTATE_D0;
2269 	}
2270 	return (result);
2271 }
2272 
2273 /*
2274  * Some convenience functions for PCI device drivers.
2275  */
2276 
2277 static __inline void
2278 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2279 {
2280 	uint16_t	command;
2281 
2282 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2283 	command |= bit;
2284 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2285 }
2286 
2287 static __inline void
2288 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2289 {
2290 	uint16_t	command;
2291 
2292 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2293 	command &= ~bit;
2294 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2295 }
2296 
2297 int
2298 pci_enable_busmaster_method(device_t dev, device_t child)
2299 {
2300 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2301 	return (0);
2302 }
2303 
2304 int
2305 pci_disable_busmaster_method(device_t dev, device_t child)
2306 {
2307 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2308 	return (0);
2309 }
2310 
2311 int
2312 pci_enable_io_method(device_t dev, device_t child, int space)
2313 {
2314 	uint16_t bit;
2315 
2316 	switch(space) {
2317 	case SYS_RES_IOPORT:
2318 		bit = PCIM_CMD_PORTEN;
2319 		break;
2320 	case SYS_RES_MEMORY:
2321 		bit = PCIM_CMD_MEMEN;
2322 		break;
2323 	default:
2324 		return (EINVAL);
2325 	}
2326 	pci_set_command_bit(dev, child, bit);
2327 	return (0);
2328 }
2329 
2330 int
2331 pci_disable_io_method(device_t dev, device_t child, int space)
2332 {
2333 	uint16_t bit;
2334 
2335 	switch(space) {
2336 	case SYS_RES_IOPORT:
2337 		bit = PCIM_CMD_PORTEN;
2338 		break;
2339 	case SYS_RES_MEMORY:
2340 		bit = PCIM_CMD_MEMEN;
2341 		break;
2342 	default:
2343 		return (EINVAL);
2344 	}
2345 	pci_clear_command_bit(dev, child, bit);
2346 	return (0);
2347 }
2348 
2349 /*
2350  * New style pci driver.  Parent device is either a pci-host-bridge or a
2351  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2352  */
2353 
2354 void
2355 pci_print_verbose(struct pci_devinfo *dinfo)
2356 {
2357 
2358 	if (bootverbose) {
2359 		pcicfgregs *cfg = &dinfo->cfg;
2360 
2361 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2362 		    cfg->vendor, cfg->device, cfg->revid);
2363 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2364 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2365 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2366 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2367 		    cfg->mfdev);
2368 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2369 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2370 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2371 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2372 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2373 		if (cfg->intpin > 0)
2374 			printf("\tintpin=%c, irq=%d\n",
2375 			    cfg->intpin +'a' -1, cfg->intline);
2376 		if (cfg->pp.pp_cap) {
2377 			uint16_t status;
2378 
2379 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2380 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2381 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2382 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2383 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2384 			    status & PCIM_PSTAT_DMASK);
2385 		}
2386 		if (cfg->msi.msi_location) {
2387 			int ctrl;
2388 
2389 			ctrl = cfg->msi.msi_ctrl;
2390 			printf("\tMSI supports %d message%s%s%s\n",
2391 			    cfg->msi.msi_msgnum,
2392 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2393 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2394 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2395 		}
2396 		if (cfg->msix.msix_location) {
2397 			printf("\tMSI-X supports %d message%s ",
2398 			    cfg->msix.msix_msgnum,
2399 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2400 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2401 				printf("in map 0x%x\n",
2402 				    cfg->msix.msix_table_bar);
2403 			else
2404 				printf("in maps 0x%x and 0x%x\n",
2405 				    cfg->msix.msix_table_bar,
2406 				    cfg->msix.msix_pba_bar);
2407 		}
2408 	}
2409 }
2410 
2411 static int
2412 pci_porten(device_t dev)
2413 {
2414 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_PORTEN) != 0;
2415 }
2416 
2417 static int
2418 pci_memen(device_t dev)
2419 {
2420 	return (pci_read_config(dev, PCIR_COMMAND, 2) & PCIM_CMD_MEMEN) != 0;
2421 }
2422 
2423 static void
2424 pci_read_bar(device_t dev, int reg, pci_addr_t *mapp, pci_addr_t *testvalp)
2425 {
2426 	struct pci_devinfo *dinfo;
2427 	pci_addr_t map, testval;
2428 	int ln2range;
2429 	uint16_t cmd;
2430 
2431 	/*
2432 	 * The device ROM BAR is special.  It is always a 32-bit
2433 	 * memory BAR.  Bit 0 is special and should not be set when
2434 	 * sizing the BAR.
2435 	 */
2436 	dinfo = device_get_ivars(dev);
2437 	if (PCIR_IS_BIOS(&dinfo->cfg, reg)) {
2438 		map = pci_read_config(dev, reg, 4);
2439 		pci_write_config(dev, reg, 0xfffffffe, 4);
2440 		testval = pci_read_config(dev, reg, 4);
2441 		pci_write_config(dev, reg, map, 4);
2442 		*mapp = map;
2443 		*testvalp = testval;
2444 		return;
2445 	}
2446 
2447 	map = pci_read_config(dev, reg, 4);
2448 	ln2range = pci_maprange(map);
2449 	if (ln2range == 64)
2450 		map |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2451 
2452 	/*
2453 	 * Disable decoding via the command register before
2454 	 * determining the BAR's length since we will be placing it in
2455 	 * a weird state.
2456 	 */
2457 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2458 	pci_write_config(dev, PCIR_COMMAND,
2459 	    cmd & ~(PCI_BAR_MEM(map) ? PCIM_CMD_MEMEN : PCIM_CMD_PORTEN), 2);
2460 
2461 	/*
2462 	 * Determine the BAR's length by writing all 1's.  The bottom
2463 	 * log_2(size) bits of the BAR will stick as 0 when we read
2464 	 * the value back.
2465 	 */
2466 	pci_write_config(dev, reg, 0xffffffff, 4);
2467 	testval = pci_read_config(dev, reg, 4);
2468 	if (ln2range == 64) {
2469 		pci_write_config(dev, reg + 4, 0xffffffff, 4);
2470 		testval |= (pci_addr_t)pci_read_config(dev, reg + 4, 4) << 32;
2471 	}
2472 
2473 	/*
2474 	 * Restore the original value of the BAR.  We may have reprogrammed
2475 	 * the BAR of the low-level console device and when booting verbose,
2476 	 * we need the console device addressable.
2477 	 */
2478 	pci_write_config(dev, reg, map, 4);
2479 	if (ln2range == 64)
2480 		pci_write_config(dev, reg + 4, map >> 32, 4);
2481 	pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2482 
2483 	*mapp = map;
2484 	*testvalp = testval;
2485 }
2486 
2487 static void
2488 pci_write_bar(device_t dev, struct pci_map *pm, pci_addr_t base)
2489 {
2490 	struct pci_devinfo *dinfo;
2491 	int ln2range;
2492 
2493 	/* The device ROM BAR is always a 32-bit memory BAR. */
2494 	dinfo = device_get_ivars(dev);
2495 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2496 		ln2range = 32;
2497 	else
2498 		ln2range = pci_maprange(pm->pm_value);
2499 	pci_write_config(dev, pm->pm_reg, base, 4);
2500 	if (ln2range == 64)
2501 		pci_write_config(dev, pm->pm_reg + 4, base >> 32, 4);
2502 	pm->pm_value = pci_read_config(dev, pm->pm_reg, 4);
2503 	if (ln2range == 64)
2504 		pm->pm_value |= (pci_addr_t)pci_read_config(dev,
2505 		    pm->pm_reg + 4, 4) << 32;
2506 }
2507 
2508 struct pci_map *
2509 pci_find_bar(device_t dev, int reg)
2510 {
2511 	struct pci_devinfo *dinfo;
2512 	struct pci_map *pm;
2513 
2514 	dinfo = device_get_ivars(dev);
2515 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2516 		if (pm->pm_reg == reg)
2517 			return (pm);
2518 	}
2519 	return (NULL);
2520 }
2521 
2522 int
2523 pci_bar_enabled(device_t dev, struct pci_map *pm)
2524 {
2525 	struct pci_devinfo *dinfo;
2526 	uint16_t cmd;
2527 
2528 	dinfo = device_get_ivars(dev);
2529 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) &&
2530 	    !(pm->pm_value & PCIM_BIOS_ENABLE))
2531 		return (0);
2532 	cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2533 	if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg) || PCI_BAR_MEM(pm->pm_value))
2534 		return ((cmd & PCIM_CMD_MEMEN) != 0);
2535 	else
2536 		return ((cmd & PCIM_CMD_PORTEN) != 0);
2537 }
2538 
2539 static struct pci_map *
2540 pci_add_bar(device_t dev, int reg, pci_addr_t value, pci_addr_t size)
2541 {
2542 	struct pci_devinfo *dinfo;
2543 	struct pci_map *pm, *prev;
2544 
2545 	dinfo = device_get_ivars(dev);
2546 	pm = malloc(sizeof(*pm), M_DEVBUF, M_WAITOK | M_ZERO);
2547 	pm->pm_reg = reg;
2548 	pm->pm_value = value;
2549 	pm->pm_size = size;
2550 	STAILQ_FOREACH(prev, &dinfo->cfg.maps, pm_link) {
2551 		KASSERT(prev->pm_reg != pm->pm_reg, ("duplicate map %02x",
2552 		    reg));
2553 		if (STAILQ_NEXT(prev, pm_link) == NULL ||
2554 		    STAILQ_NEXT(prev, pm_link)->pm_reg > pm->pm_reg)
2555 			break;
2556 	}
2557 	if (prev != NULL)
2558 		STAILQ_INSERT_AFTER(&dinfo->cfg.maps, prev, pm, pm_link);
2559 	else
2560 		STAILQ_INSERT_TAIL(&dinfo->cfg.maps, pm, pm_link);
2561 	return (pm);
2562 }
2563 
2564 static void
2565 pci_restore_bars(device_t dev)
2566 {
2567 	struct pci_devinfo *dinfo;
2568 	struct pci_map *pm;
2569 	int ln2range;
2570 
2571 	dinfo = device_get_ivars(dev);
2572 	STAILQ_FOREACH(pm, &dinfo->cfg.maps, pm_link) {
2573 		if (PCIR_IS_BIOS(&dinfo->cfg, pm->pm_reg))
2574 			ln2range = 32;
2575 		else
2576 			ln2range = pci_maprange(pm->pm_value);
2577 		pci_write_config(dev, pm->pm_reg, pm->pm_value, 4);
2578 		if (ln2range == 64)
2579 			pci_write_config(dev, pm->pm_reg + 4,
2580 			    pm->pm_value >> 32, 4);
2581 	}
2582 }
2583 
2584 /*
2585  * Add a resource based on a pci map register. Return 1 if the map
2586  * register is a 32bit map register or 2 if it is a 64bit register.
2587  */
2588 static int
2589 pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
2590     int force, int prefetch)
2591 {
2592 	struct pci_map *pm;
2593 	pci_addr_t base, map, testval;
2594 	pci_addr_t start, end, count;
2595 	int barlen, basezero, maprange, mapsize, type;
2596 	uint16_t cmd;
2597 	struct resource *res;
2598 
2599 	/*
2600 	 * The BAR may already exist if the device is a CardBus card
2601 	 * whose CIS is stored in this BAR.
2602 	 */
2603 	pm = pci_find_bar(dev, reg);
2604 	if (pm != NULL) {
2605 		maprange = pci_maprange(pm->pm_value);
2606 		barlen = maprange == 64 ? 2 : 1;
2607 		return (barlen);
2608 	}
2609 
2610 	pci_read_bar(dev, reg, &map, &testval);
2611 	if (PCI_BAR_MEM(map)) {
2612 		type = SYS_RES_MEMORY;
2613 		if (map & PCIM_BAR_MEM_PREFETCH)
2614 			prefetch = 1;
2615 	} else
2616 		type = SYS_RES_IOPORT;
2617 	mapsize = pci_mapsize(testval);
2618 	base = pci_mapbase(map);
2619 #ifdef __PCI_BAR_ZERO_VALID
2620 	basezero = 0;
2621 #else
2622 	basezero = base == 0;
2623 #endif
2624 	maprange = pci_maprange(map);
2625 	barlen = maprange == 64 ? 2 : 1;
2626 
2627 	/*
2628 	 * For I/O registers, if bottom bit is set, and the next bit up
2629 	 * isn't clear, we know we have a BAR that doesn't conform to the
2630 	 * spec, so ignore it.  Also, sanity check the size of the data
2631 	 * areas to the type of memory involved.  Memory must be at least
2632 	 * 16 bytes in size, while I/O ranges must be at least 4.
2633 	 */
2634 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2635 		return (barlen);
2636 	if ((type == SYS_RES_MEMORY && mapsize < 4) ||
2637 	    (type == SYS_RES_IOPORT && mapsize < 2))
2638 		return (barlen);
2639 
2640 	/* Save a record of this BAR. */
2641 	pm = pci_add_bar(dev, reg, map, mapsize);
2642 	if (bootverbose) {
2643 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2644 		    reg, pci_maptype(map), maprange, (uintmax_t)base, mapsize);
2645 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2646 			printf(", port disabled\n");
2647 		else if (type == SYS_RES_MEMORY && !pci_memen(dev))
2648 			printf(", memory disabled\n");
2649 		else
2650 			printf(", enabled\n");
2651 	}
2652 
2653 	/*
2654 	 * If base is 0, then we have problems if this architecture does
2655 	 * not allow that.  It is best to ignore such entries for the
2656 	 * moment.  These will be allocated later if the driver specifically
2657 	 * requests them.  However, some removable busses look better when
2658 	 * all resources are allocated, so allow '0' to be overriden.
2659 	 *
2660 	 * Similarly treat maps whose values is the same as the test value
2661 	 * read back.  These maps have had all f's written to them by the
2662 	 * BIOS in an attempt to disable the resources.
2663 	 */
2664 	if (!force && (basezero || map == testval))
2665 		return (barlen);
2666 	if ((u_long)base != base) {
2667 		device_printf(bus,
2668 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2669 		    pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev),
2670 		    pci_get_function(dev), reg);
2671 		return (barlen);
2672 	}
2673 
2674 	/*
2675 	 * This code theoretically does the right thing, but has
2676 	 * undesirable side effects in some cases where peripherals
2677 	 * respond oddly to having these bits enabled.  Let the user
2678 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2679 	 * default).
2680 	 */
2681 	if (pci_enable_io_modes) {
2682 		/* Turn on resources that have been left off by a lazy BIOS */
2683 		if (type == SYS_RES_IOPORT && !pci_porten(dev)) {
2684 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2685 			cmd |= PCIM_CMD_PORTEN;
2686 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2687 		}
2688 		if (type == SYS_RES_MEMORY && !pci_memen(dev)) {
2689 			cmd = pci_read_config(dev, PCIR_COMMAND, 2);
2690 			cmd |= PCIM_CMD_MEMEN;
2691 			pci_write_config(dev, PCIR_COMMAND, cmd, 2);
2692 		}
2693 	} else {
2694 		if (type == SYS_RES_IOPORT && !pci_porten(dev))
2695 			return (barlen);
2696 		if (type == SYS_RES_MEMORY && !pci_memen(dev))
2697 			return (barlen);
2698 	}
2699 
2700 	count = (pci_addr_t)1 << mapsize;
2701 	if (basezero || base == pci_mapbase(testval)) {
2702 		start = 0;	/* Let the parent decide. */
2703 		end = ~0ul;
2704 	} else {
2705 		start = base;
2706 		end = base + count - 1;
2707 	}
2708 	resource_list_add(rl, type, reg, start, end, count);
2709 
2710 	/*
2711 	 * Try to allocate the resource for this BAR from our parent
2712 	 * so that this resource range is already reserved.  The
2713 	 * driver for this device will later inherit this resource in
2714 	 * pci_alloc_resource().
2715 	 */
2716 	res = resource_list_reserve(rl, bus, dev, type, &reg, start, end, count,
2717 	    prefetch ? RF_PREFETCHABLE : 0);
2718 	if (res == NULL) {
2719 		/*
2720 		 * If the allocation fails, clear the BAR and delete
2721 		 * the resource list entry to force
2722 		 * pci_alloc_resource() to allocate resources from the
2723 		 * parent.
2724 		 */
2725 		resource_list_delete(rl, type, reg);
2726 		start = 0;
2727 	} else
2728 		start = rman_get_start(res);
2729 	pci_write_bar(dev, pm, start);
2730 	return (barlen);
2731 }
2732 
2733 /*
2734  * For ATA devices we need to decide early what addressing mode to use.
2735  * Legacy demands that the primary and secondary ATA ports sits on the
2736  * same addresses that old ISA hardware did. This dictates that we use
2737  * those addresses and ignore the BAR's if we cannot set PCI native
2738  * addressing mode.
2739  */
2740 static void
2741 pci_ata_maps(device_t bus, device_t dev, struct resource_list *rl, int force,
2742     uint32_t prefetchmask)
2743 {
2744 	struct resource *r;
2745 	int rid, type, progif;
2746 #if 0
2747 	/* if this device supports PCI native addressing use it */
2748 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2749 	if ((progif & 0x8a) == 0x8a) {
2750 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2751 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2752 			printf("Trying ATA native PCI addressing mode\n");
2753 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2754 		}
2755 	}
2756 #endif
2757 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2758 	type = SYS_RES_IOPORT;
2759 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2760 		pci_add_map(bus, dev, PCIR_BAR(0), rl, force,
2761 		    prefetchmask & (1 << 0));
2762 		pci_add_map(bus, dev, PCIR_BAR(1), rl, force,
2763 		    prefetchmask & (1 << 1));
2764 	} else {
2765 		rid = PCIR_BAR(0);
2766 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2767 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x1f0,
2768 		    0x1f7, 8, 0);
2769 		rid = PCIR_BAR(1);
2770 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2771 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x3f6,
2772 		    0x3f6, 1, 0);
2773 	}
2774 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2775 		pci_add_map(bus, dev, PCIR_BAR(2), rl, force,
2776 		    prefetchmask & (1 << 2));
2777 		pci_add_map(bus, dev, PCIR_BAR(3), rl, force,
2778 		    prefetchmask & (1 << 3));
2779 	} else {
2780 		rid = PCIR_BAR(2);
2781 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2782 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x170,
2783 		    0x177, 8, 0);
2784 		rid = PCIR_BAR(3);
2785 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2786 		r = resource_list_reserve(rl, bus, dev, type, &rid, 0x376,
2787 		    0x376, 1, 0);
2788 	}
2789 	pci_add_map(bus, dev, PCIR_BAR(4), rl, force,
2790 	    prefetchmask & (1 << 4));
2791 	pci_add_map(bus, dev, PCIR_BAR(5), rl, force,
2792 	    prefetchmask & (1 << 5));
2793 }
2794 
2795 static void
2796 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2797 {
2798 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2799 	pcicfgregs *cfg = &dinfo->cfg;
2800 	char tunable_name[64];
2801 	int irq;
2802 
2803 	/* Has to have an intpin to have an interrupt. */
2804 	if (cfg->intpin == 0)
2805 		return;
2806 
2807 	/* Let the user override the IRQ with a tunable. */
2808 	irq = PCI_INVALID_IRQ;
2809 	snprintf(tunable_name, sizeof(tunable_name),
2810 	    "hw.pci%d.%d.%d.INT%c.irq",
2811 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2812 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2813 		irq = PCI_INVALID_IRQ;
2814 
2815 	/*
2816 	 * If we didn't get an IRQ via the tunable, then we either use the
2817 	 * IRQ value in the intline register or we ask the bus to route an
2818 	 * interrupt for us.  If force_route is true, then we only use the
2819 	 * value in the intline register if the bus was unable to assign an
2820 	 * IRQ.
2821 	 */
2822 	if (!PCI_INTERRUPT_VALID(irq)) {
2823 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2824 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2825 		if (!PCI_INTERRUPT_VALID(irq))
2826 			irq = cfg->intline;
2827 	}
2828 
2829 	/* If after all that we don't have an IRQ, just bail. */
2830 	if (!PCI_INTERRUPT_VALID(irq))
2831 		return;
2832 
2833 	/* Update the config register if it changed. */
2834 	if (irq != cfg->intline) {
2835 		cfg->intline = irq;
2836 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2837 	}
2838 
2839 	/* Add this IRQ as rid 0 interrupt resource. */
2840 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2841 }
2842 
2843 /* Perform early OHCI takeover from SMM. */
2844 static void
2845 ohci_early_takeover(device_t self)
2846 {
2847 	struct resource *res;
2848 	uint32_t ctl;
2849 	int rid;
2850 	int i;
2851 
2852 	rid = PCIR_BAR(0);
2853 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2854 	if (res == NULL)
2855 		return;
2856 
2857 	ctl = bus_read_4(res, OHCI_CONTROL);
2858 	if (ctl & OHCI_IR) {
2859 		if (bootverbose)
2860 			printf("ohci early: "
2861 			    "SMM active, request owner change\n");
2862 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2863 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2864 			DELAY(1000);
2865 			ctl = bus_read_4(res, OHCI_CONTROL);
2866 		}
2867 		if (ctl & OHCI_IR) {
2868 			if (bootverbose)
2869 				printf("ohci early: "
2870 				    "SMM does not respond, resetting\n");
2871 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2872 		}
2873 		/* Disable interrupts */
2874 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2875 	}
2876 
2877 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2878 }
2879 
2880 /* Perform early UHCI takeover from SMM. */
2881 static void
2882 uhci_early_takeover(device_t self)
2883 {
2884 	struct resource *res;
2885 	int rid;
2886 
2887 	/*
2888 	 * Set the PIRQD enable bit and switch off all the others. We don't
2889 	 * want legacy support to interfere with us XXX Does this also mean
2890 	 * that the BIOS won't touch the keyboard anymore if it is connected
2891 	 * to the ports of the root hub?
2892 	 */
2893 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2894 
2895 	/* Disable interrupts */
2896 	rid = PCI_UHCI_BASE_REG;
2897 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2898 	if (res != NULL) {
2899 		bus_write_2(res, UHCI_INTR, 0);
2900 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2901 	}
2902 }
2903 
2904 /* Perform early EHCI takeover from SMM. */
2905 static void
2906 ehci_early_takeover(device_t self)
2907 {
2908 	struct resource *res;
2909 	uint32_t cparams;
2910 	uint32_t eec;
2911 	uint8_t eecp;
2912 	uint8_t bios_sem;
2913 	uint8_t offs;
2914 	int rid;
2915 	int i;
2916 
2917 	rid = PCIR_BAR(0);
2918 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2919 	if (res == NULL)
2920 		return;
2921 
2922 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2923 
2924 	/* Synchronise with the BIOS if it owns the controller. */
2925 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2926 	    eecp = EHCI_EECP_NEXT(eec)) {
2927 		eec = pci_read_config(self, eecp, 4);
2928 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2929 			continue;
2930 		}
2931 		bios_sem = pci_read_config(self, eecp +
2932 		    EHCI_LEGSUP_BIOS_SEM, 1);
2933 		if (bios_sem == 0) {
2934 			continue;
2935 		}
2936 		if (bootverbose)
2937 			printf("ehci early: "
2938 			    "SMM active, request owner change\n");
2939 
2940 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2941 
2942 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2943 			DELAY(1000);
2944 			bios_sem = pci_read_config(self, eecp +
2945 			    EHCI_LEGSUP_BIOS_SEM, 1);
2946 		}
2947 
2948 		if (bios_sem != 0) {
2949 			if (bootverbose)
2950 				printf("ehci early: "
2951 				    "SMM does not respond\n");
2952 		}
2953 		/* Disable interrupts */
2954 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2955 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2956 	}
2957 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2958 }
2959 
2960 /* Perform early XHCI takeover from SMM. */
2961 static void
2962 xhci_early_takeover(device_t self)
2963 {
2964 	struct resource *res;
2965 	uint32_t cparams;
2966 	uint32_t eec;
2967 	uint8_t eecp;
2968 	uint8_t bios_sem;
2969 	uint8_t offs;
2970 	int rid;
2971 	int i;
2972 
2973 	rid = PCIR_BAR(0);
2974 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2975 	if (res == NULL)
2976 		return;
2977 
2978 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2979 
2980 	eec = -1;
2981 
2982 	/* Synchronise with the BIOS if it owns the controller. */
2983 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2984 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2985 		eec = bus_read_4(res, eecp);
2986 
2987 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2988 			continue;
2989 
2990 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2991 		if (bios_sem == 0)
2992 			continue;
2993 
2994 		if (bootverbose)
2995 			printf("xhci early: "
2996 			    "SMM active, request owner change\n");
2997 
2998 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2999 
3000 		/* wait a maximum of 5 second */
3001 
3002 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
3003 			DELAY(1000);
3004 			bios_sem = bus_read_1(res, eecp +
3005 			    XHCI_XECP_BIOS_SEM);
3006 		}
3007 
3008 		if (bios_sem != 0) {
3009 			if (bootverbose)
3010 				printf("xhci early: "
3011 				    "SMM does not respond\n");
3012 		}
3013 
3014 		/* Disable interrupts */
3015 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3016 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3017 		bus_read_4(res, offs + XHCI_USBSTS);
3018 	}
3019 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3020 }
3021 
3022 void
3023 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
3024 {
3025 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3026 	pcicfgregs *cfg = &dinfo->cfg;
3027 	struct resource_list *rl = &dinfo->resources;
3028 	struct pci_quirk *q;
3029 	int i;
3030 
3031 	/* ATA devices needs special map treatment */
3032 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3033 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3034 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3035 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3036 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3037 		pci_ata_maps(bus, dev, rl, force, prefetchmask);
3038 	else
3039 		for (i = 0; i < cfg->nummaps;)
3040 			i += pci_add_map(bus, dev, PCIR_BAR(i), rl, force,
3041 			    prefetchmask & (1 << i));
3042 
3043 	/*
3044 	 * Add additional, quirked resources.
3045 	 */
3046 	for (q = &pci_quirks[0]; q->devid; q++) {
3047 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3048 		    && q->type == PCI_QUIRK_MAP_REG)
3049 			pci_add_map(bus, dev, q->arg1, rl, force, 0);
3050 	}
3051 
3052 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3053 #ifdef __PCI_REROUTE_INTERRUPT
3054 		/*
3055 		 * Try to re-route interrupts. Sometimes the BIOS or
3056 		 * firmware may leave bogus values in these registers.
3057 		 * If the re-route fails, then just stick with what we
3058 		 * have.
3059 		 */
3060 		pci_assign_interrupt(bus, dev, 1);
3061 #else
3062 		pci_assign_interrupt(bus, dev, 0);
3063 #endif
3064 	}
3065 
3066 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3067 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3068 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3069 			xhci_early_takeover(dev);
3070 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3071 			ehci_early_takeover(dev);
3072 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3073 			ohci_early_takeover(dev);
3074 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3075 			uhci_early_takeover(dev);
3076 	}
3077 }
3078 
3079 void
3080 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3081 {
3082 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3083 	device_t pcib = device_get_parent(dev);
3084 	struct pci_devinfo *dinfo;
3085 	int maxslots;
3086 	int s, f, pcifunchigh;
3087 	uint8_t hdrtype;
3088 
3089 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3090 	    ("dinfo_size too small"));
3091 	maxslots = PCIB_MAXSLOTS(pcib);
3092 	for (s = 0; s <= maxslots; s++) {
3093 		pcifunchigh = 0;
3094 		f = 0;
3095 		DELAY(1);
3096 		hdrtype = REG(PCIR_HDRTYPE, 1);
3097 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3098 			continue;
3099 		if (hdrtype & PCIM_MFDEV)
3100 			pcifunchigh = PCI_FUNCMAX;
3101 		for (f = 0; f <= pcifunchigh; f++) {
3102 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3103 			    dinfo_size);
3104 			if (dinfo != NULL) {
3105 				pci_add_child(dev, dinfo);
3106 			}
3107 		}
3108 	}
3109 #undef REG
3110 }
3111 
3112 void
3113 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3114 {
3115 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3116 	device_set_ivars(dinfo->cfg.dev, dinfo);
3117 	resource_list_init(&dinfo->resources);
3118 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3119 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3120 	pci_print_verbose(dinfo);
3121 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
3122 }
3123 
3124 static int
3125 pci_probe(device_t dev)
3126 {
3127 
3128 	device_set_desc(dev, "PCI bus");
3129 
3130 	/* Allow other subclasses to override this driver. */
3131 	return (BUS_PROBE_GENERIC);
3132 }
3133 
3134 static int
3135 pci_attach(device_t dev)
3136 {
3137 	int busno, domain;
3138 
3139 	/*
3140 	 * Since there can be multiple independantly numbered PCI
3141 	 * busses on systems with multiple PCI domains, we can't use
3142 	 * the unit number to decide which bus we are probing. We ask
3143 	 * the parent pcib what our domain and bus numbers are.
3144 	 */
3145 	domain = pcib_get_domain(dev);
3146 	busno = pcib_get_bus(dev);
3147 	if (bootverbose)
3148 		device_printf(dev, "domain=%d, physical bus=%d\n",
3149 		    domain, busno);
3150 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3151 	return (bus_generic_attach(dev));
3152 }
3153 
3154 static void
3155 pci_set_power_children(device_t dev, device_t *devlist, int numdevs,
3156     int state)
3157 {
3158 	device_t child, pcib;
3159 	struct pci_devinfo *dinfo;
3160 	int dstate, i;
3161 
3162 	/*
3163 	 * Set the device to the given state.  If the firmware suggests
3164 	 * a different power state, use it instead.  If power management
3165 	 * is not present, the firmware is responsible for managing
3166 	 * device power.  Skip children who aren't attached since they
3167 	 * are handled separately.
3168 	 */
3169 	pcib = device_get_parent(dev);
3170 	for (i = 0; i < numdevs; i++) {
3171 		child = devlist[i];
3172 		dinfo = device_get_ivars(child);
3173 		dstate = state;
3174 		if (device_is_attached(child) &&
3175 		    PCIB_POWER_FOR_SLEEP(pcib, dev, &dstate) == 0)
3176 			pci_set_powerstate(child, dstate);
3177 	}
3178 }
3179 
3180 int
3181 pci_suspend(device_t dev)
3182 {
3183 	device_t child, *devlist;
3184 	struct pci_devinfo *dinfo;
3185 	int error, i, numdevs;
3186 
3187 	/*
3188 	 * Save the PCI configuration space for each child and set the
3189 	 * device in the appropriate power state for this sleep state.
3190 	 */
3191 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3192 		return (error);
3193 	for (i = 0; i < numdevs; i++) {
3194 		child = devlist[i];
3195 		dinfo = device_get_ivars(child);
3196 		pci_cfg_save(child, dinfo, 0);
3197 	}
3198 
3199 	/* Suspend devices before potentially powering them down. */
3200 	error = bus_generic_suspend(dev);
3201 	if (error) {
3202 		free(devlist, M_TEMP);
3203 		return (error);
3204 	}
3205 	if (pci_do_power_suspend)
3206 		pci_set_power_children(dev, devlist, numdevs,
3207 		    PCI_POWERSTATE_D3);
3208 	free(devlist, M_TEMP);
3209 	return (0);
3210 }
3211 
3212 int
3213 pci_resume(device_t dev)
3214 {
3215 	device_t child, *devlist;
3216 	struct pci_devinfo *dinfo;
3217 	int error, i, numdevs;
3218 
3219 	/*
3220 	 * Set each child to D0 and restore its PCI configuration space.
3221 	 */
3222 	if ((error = device_get_children(dev, &devlist, &numdevs)) != 0)
3223 		return (error);
3224 	if (pci_do_power_resume)
3225 		pci_set_power_children(dev, devlist, numdevs,
3226 		    PCI_POWERSTATE_D0);
3227 
3228 	/* Now the device is powered up, restore its config space. */
3229 	for (i = 0; i < numdevs; i++) {
3230 		child = devlist[i];
3231 		dinfo = device_get_ivars(child);
3232 
3233 		pci_cfg_restore(child, dinfo);
3234 		if (!device_is_attached(child))
3235 			pci_cfg_save(child, dinfo, 1);
3236 	}
3237 
3238 	/*
3239 	 * Resume critical devices first, then everything else later.
3240 	 */
3241 	for (i = 0; i < numdevs; i++) {
3242 		child = devlist[i];
3243 		switch (pci_get_class(child)) {
3244 		case PCIC_DISPLAY:
3245 		case PCIC_MEMORY:
3246 		case PCIC_BRIDGE:
3247 		case PCIC_BASEPERIPH:
3248 			DEVICE_RESUME(child);
3249 			break;
3250 		}
3251 	}
3252 	for (i = 0; i < numdevs; i++) {
3253 		child = devlist[i];
3254 		switch (pci_get_class(child)) {
3255 		case PCIC_DISPLAY:
3256 		case PCIC_MEMORY:
3257 		case PCIC_BRIDGE:
3258 		case PCIC_BASEPERIPH:
3259 			break;
3260 		default:
3261 			DEVICE_RESUME(child);
3262 		}
3263 	}
3264 	free(devlist, M_TEMP);
3265 	return (0);
3266 }
3267 
3268 static void
3269 pci_load_vendor_data(void)
3270 {
3271 	caddr_t data;
3272 	void *ptr;
3273 	size_t sz;
3274 
3275 	data = preload_search_by_type("pci_vendor_data");
3276 	if (data != NULL) {
3277 		ptr = preload_fetch_addr(data);
3278 		sz = preload_fetch_size(data);
3279 		if (ptr != NULL && sz != 0) {
3280 			pci_vendordata = ptr;
3281 			pci_vendordata_size = sz;
3282 			/* terminate the database */
3283 			pci_vendordata[pci_vendordata_size] = '\n';
3284 		}
3285 	}
3286 }
3287 
3288 void
3289 pci_driver_added(device_t dev, driver_t *driver)
3290 {
3291 	int numdevs;
3292 	device_t *devlist;
3293 	device_t child;
3294 	struct pci_devinfo *dinfo;
3295 	int i;
3296 
3297 	if (bootverbose)
3298 		device_printf(dev, "driver added\n");
3299 	DEVICE_IDENTIFY(driver, dev);
3300 	if (device_get_children(dev, &devlist, &numdevs) != 0)
3301 		return;
3302 	for (i = 0; i < numdevs; i++) {
3303 		child = devlist[i];
3304 		if (device_get_state(child) != DS_NOTPRESENT)
3305 			continue;
3306 		dinfo = device_get_ivars(child);
3307 		pci_print_verbose(dinfo);
3308 		if (bootverbose)
3309 			pci_printf(&dinfo->cfg, "reprobing on driver added\n");
3310 		pci_cfg_restore(child, dinfo);
3311 		if (device_probe_and_attach(child) != 0)
3312 			pci_cfg_save(child, dinfo, 1);
3313 	}
3314 	free(devlist, M_TEMP);
3315 }
3316 
3317 int
3318 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3319     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
3320 {
3321 	struct pci_devinfo *dinfo;
3322 	struct msix_table_entry *mte;
3323 	struct msix_vector *mv;
3324 	uint64_t addr;
3325 	uint32_t data;
3326 	void *cookie;
3327 	int error, rid;
3328 
3329 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
3330 	    arg, &cookie);
3331 	if (error)
3332 		return (error);
3333 
3334 	/* If this is not a direct child, just bail out. */
3335 	if (device_get_parent(child) != dev) {
3336 		*cookiep = cookie;
3337 		return(0);
3338 	}
3339 
3340 	rid = rman_get_rid(irq);
3341 	if (rid == 0) {
3342 		/* Make sure that INTx is enabled */
3343 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3344 	} else {
3345 		/*
3346 		 * Check to see if the interrupt is MSI or MSI-X.
3347 		 * Ask our parent to map the MSI and give
3348 		 * us the address and data register values.
3349 		 * If we fail for some reason, teardown the
3350 		 * interrupt handler.
3351 		 */
3352 		dinfo = device_get_ivars(child);
3353 		if (dinfo->cfg.msi.msi_alloc > 0) {
3354 			if (dinfo->cfg.msi.msi_addr == 0) {
3355 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3356 			    ("MSI has handlers, but vectors not mapped"));
3357 				error = PCIB_MAP_MSI(device_get_parent(dev),
3358 				    child, rman_get_start(irq), &addr, &data);
3359 				if (error)
3360 					goto bad;
3361 				dinfo->cfg.msi.msi_addr = addr;
3362 				dinfo->cfg.msi.msi_data = data;
3363 			}
3364 			if (dinfo->cfg.msi.msi_handlers == 0)
3365 				pci_enable_msi(child, dinfo->cfg.msi.msi_addr,
3366 				    dinfo->cfg.msi.msi_data);
3367 			dinfo->cfg.msi.msi_handlers++;
3368 		} else {
3369 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3370 			    ("No MSI or MSI-X interrupts allocated"));
3371 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3372 			    ("MSI-X index too high"));
3373 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3374 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3375 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3376 			KASSERT(mv->mv_irq == rman_get_start(irq),
3377 			    ("IRQ mismatch"));
3378 			if (mv->mv_address == 0) {
3379 				KASSERT(mte->mte_handlers == 0,
3380 		    ("MSI-X table entry has handlers, but vector not mapped"));
3381 				error = PCIB_MAP_MSI(device_get_parent(dev),
3382 				    child, rman_get_start(irq), &addr, &data);
3383 				if (error)
3384 					goto bad;
3385 				mv->mv_address = addr;
3386 				mv->mv_data = data;
3387 			}
3388 			if (mte->mte_handlers == 0) {
3389 				pci_enable_msix(child, rid - 1, mv->mv_address,
3390 				    mv->mv_data);
3391 				pci_unmask_msix(child, rid - 1);
3392 			}
3393 			mte->mte_handlers++;
3394 		}
3395 
3396 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3397 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3398 	bad:
3399 		if (error) {
3400 			(void)bus_generic_teardown_intr(dev, child, irq,
3401 			    cookie);
3402 			return (error);
3403 		}
3404 	}
3405 	*cookiep = cookie;
3406 	return (0);
3407 }
3408 
3409 int
3410 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3411     void *cookie)
3412 {
3413 	struct msix_table_entry *mte;
3414 	struct resource_list_entry *rle;
3415 	struct pci_devinfo *dinfo;
3416 	int error, rid;
3417 
3418 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3419 		return (EINVAL);
3420 
3421 	/* If this isn't a direct child, just bail out */
3422 	if (device_get_parent(child) != dev)
3423 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3424 
3425 	rid = rman_get_rid(irq);
3426 	if (rid == 0) {
3427 		/* Mask INTx */
3428 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3429 	} else {
3430 		/*
3431 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3432 		 * decrement the appropriate handlers count and mask the
3433 		 * MSI-X message, or disable MSI messages if the count
3434 		 * drops to 0.
3435 		 */
3436 		dinfo = device_get_ivars(child);
3437 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3438 		if (rle->res != irq)
3439 			return (EINVAL);
3440 		if (dinfo->cfg.msi.msi_alloc > 0) {
3441 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3442 			    ("MSI-X index too high"));
3443 			if (dinfo->cfg.msi.msi_handlers == 0)
3444 				return (EINVAL);
3445 			dinfo->cfg.msi.msi_handlers--;
3446 			if (dinfo->cfg.msi.msi_handlers == 0)
3447 				pci_disable_msi(child);
3448 		} else {
3449 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3450 			    ("No MSI or MSI-X interrupts allocated"));
3451 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3452 			    ("MSI-X index too high"));
3453 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3454 			if (mte->mte_handlers == 0)
3455 				return (EINVAL);
3456 			mte->mte_handlers--;
3457 			if (mte->mte_handlers == 0)
3458 				pci_mask_msix(child, rid - 1);
3459 		}
3460 	}
3461 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3462 	if (rid > 0)
3463 		KASSERT(error == 0,
3464 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3465 	return (error);
3466 }
3467 
3468 int
3469 pci_print_child(device_t dev, device_t child)
3470 {
3471 	struct pci_devinfo *dinfo;
3472 	struct resource_list *rl;
3473 	int retval = 0;
3474 
3475 	dinfo = device_get_ivars(child);
3476 	rl = &dinfo->resources;
3477 
3478 	retval += bus_print_child_header(dev, child);
3479 
3480 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3481 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3482 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3483 	if (device_get_flags(dev))
3484 		retval += printf(" flags %#x", device_get_flags(dev));
3485 
3486 	retval += printf(" at device %d.%d", pci_get_slot(child),
3487 	    pci_get_function(child));
3488 
3489 	retval += bus_print_child_footer(dev, child);
3490 
3491 	return (retval);
3492 }
3493 
3494 static struct
3495 {
3496 	int	class;
3497 	int	subclass;
3498 	char	*desc;
3499 } pci_nomatch_tab[] = {
3500 	{PCIC_OLD,		-1,			"old"},
3501 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3502 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3503 	{PCIC_STORAGE,		-1,			"mass storage"},
3504 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3505 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3506 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3507 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3508 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3509 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3510 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3511 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3512 	{PCIC_NETWORK,		-1,			"network"},
3513 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3514 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3515 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3516 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3517 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3518 	{PCIC_DISPLAY,		-1,			"display"},
3519 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3520 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3521 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3522 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3523 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3524 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3525 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3526 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3527 	{PCIC_MEMORY,		-1,			"memory"},
3528 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3529 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3530 	{PCIC_BRIDGE,		-1,			"bridge"},
3531 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3532 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3533 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3534 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3535 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3536 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3537 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3538 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3539 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3540 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3541 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3542 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3543 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3544 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3545 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3546 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3547 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3548 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3549 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3550 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3551 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3552 	{PCIC_INPUTDEV,		-1,			"input device"},
3553 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3554 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3555 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3556 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3557 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3558 	{PCIC_DOCKING,		-1,			"docking station"},
3559 	{PCIC_PROCESSOR,	-1,			"processor"},
3560 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3561 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3562 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3563 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3564 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3565 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3566 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3567 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3568 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3569 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3570 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3571 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3572 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3573 	{PCIC_SATCOM,		-1,			"satellite communication"},
3574 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3575 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3576 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3577 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3578 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3579 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3580 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3581 	{PCIC_DASP,		-1,			"dasp"},
3582 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3583 	{0, 0,		NULL}
3584 };
3585 
3586 void
3587 pci_probe_nomatch(device_t dev, device_t child)
3588 {
3589 	int	i;
3590 	char	*cp, *scp, *device;
3591 
3592 	/*
3593 	 * Look for a listing for this device in a loaded device database.
3594 	 */
3595 	if ((device = pci_describe_device(child)) != NULL) {
3596 		device_printf(dev, "<%s>", device);
3597 		free(device, M_DEVBUF);
3598 	} else {
3599 		/*
3600 		 * Scan the class/subclass descriptions for a general
3601 		 * description.
3602 		 */
3603 		cp = "unknown";
3604 		scp = NULL;
3605 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3606 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3607 				if (pci_nomatch_tab[i].subclass == -1) {
3608 					cp = pci_nomatch_tab[i].desc;
3609 				} else if (pci_nomatch_tab[i].subclass ==
3610 				    pci_get_subclass(child)) {
3611 					scp = pci_nomatch_tab[i].desc;
3612 				}
3613 			}
3614 		}
3615 		device_printf(dev, "<%s%s%s>",
3616 		    cp ? cp : "",
3617 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3618 		    scp ? scp : "");
3619 	}
3620 	printf(" at device %d.%d (no driver attached)\n",
3621 	    pci_get_slot(child), pci_get_function(child));
3622 	pci_cfg_save(child, device_get_ivars(child), 1);
3623 	return;
3624 }
3625 
3626 /*
3627  * Parse the PCI device database, if loaded, and return a pointer to a
3628  * description of the device.
3629  *
3630  * The database is flat text formatted as follows:
3631  *
3632  * Any line not in a valid format is ignored.
3633  * Lines are terminated with newline '\n' characters.
3634  *
3635  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3636  * the vendor name.
3637  *
3638  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3639  * - devices cannot be listed without a corresponding VENDOR line.
3640  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3641  * another TAB, then the device name.
3642  */
3643 
3644 /*
3645  * Assuming (ptr) points to the beginning of a line in the database,
3646  * return the vendor or device and description of the next entry.
3647  * The value of (vendor) or (device) inappropriate for the entry type
3648  * is set to -1.  Returns nonzero at the end of the database.
3649  *
3650  * Note that this is slightly unrobust in the face of corrupt data;
3651  * we attempt to safeguard against this by spamming the end of the
3652  * database with a newline when we initialise.
3653  */
3654 static int
3655 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3656 {
3657 	char	*cp = *ptr;
3658 	int	left;
3659 
3660 	*device = -1;
3661 	*vendor = -1;
3662 	**desc = '\0';
3663 	for (;;) {
3664 		left = pci_vendordata_size - (cp - pci_vendordata);
3665 		if (left <= 0) {
3666 			*ptr = cp;
3667 			return(1);
3668 		}
3669 
3670 		/* vendor entry? */
3671 		if (*cp != '\t' &&
3672 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3673 			break;
3674 		/* device entry? */
3675 		if (*cp == '\t' &&
3676 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3677 			break;
3678 
3679 		/* skip to next line */
3680 		while (*cp != '\n' && left > 0) {
3681 			cp++;
3682 			left--;
3683 		}
3684 		if (*cp == '\n') {
3685 			cp++;
3686 			left--;
3687 		}
3688 	}
3689 	/* skip to next line */
3690 	while (*cp != '\n' && left > 0) {
3691 		cp++;
3692 		left--;
3693 	}
3694 	if (*cp == '\n' && left > 0)
3695 		cp++;
3696 	*ptr = cp;
3697 	return(0);
3698 }
3699 
3700 static char *
3701 pci_describe_device(device_t dev)
3702 {
3703 	int	vendor, device;
3704 	char	*desc, *vp, *dp, *line;
3705 
3706 	desc = vp = dp = NULL;
3707 
3708 	/*
3709 	 * If we have no vendor data, we can't do anything.
3710 	 */
3711 	if (pci_vendordata == NULL)
3712 		goto out;
3713 
3714 	/*
3715 	 * Scan the vendor data looking for this device
3716 	 */
3717 	line = pci_vendordata;
3718 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3719 		goto out;
3720 	for (;;) {
3721 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3722 			goto out;
3723 		if (vendor == pci_get_vendor(dev))
3724 			break;
3725 	}
3726 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3727 		goto out;
3728 	for (;;) {
3729 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3730 			*dp = 0;
3731 			break;
3732 		}
3733 		if (vendor != -1) {
3734 			*dp = 0;
3735 			break;
3736 		}
3737 		if (device == pci_get_device(dev))
3738 			break;
3739 	}
3740 	if (dp[0] == '\0')
3741 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3742 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3743 	    NULL)
3744 		sprintf(desc, "%s, %s", vp, dp);
3745  out:
3746 	if (vp != NULL)
3747 		free(vp, M_DEVBUF);
3748 	if (dp != NULL)
3749 		free(dp, M_DEVBUF);
3750 	return(desc);
3751 }
3752 
3753 int
3754 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3755 {
3756 	struct pci_devinfo *dinfo;
3757 	pcicfgregs *cfg;
3758 
3759 	dinfo = device_get_ivars(child);
3760 	cfg = &dinfo->cfg;
3761 
3762 	switch (which) {
3763 	case PCI_IVAR_ETHADDR:
3764 		/*
3765 		 * The generic accessor doesn't deal with failure, so
3766 		 * we set the return value, then return an error.
3767 		 */
3768 		*((uint8_t **) result) = NULL;
3769 		return (EINVAL);
3770 	case PCI_IVAR_SUBVENDOR:
3771 		*result = cfg->subvendor;
3772 		break;
3773 	case PCI_IVAR_SUBDEVICE:
3774 		*result = cfg->subdevice;
3775 		break;
3776 	case PCI_IVAR_VENDOR:
3777 		*result = cfg->vendor;
3778 		break;
3779 	case PCI_IVAR_DEVICE:
3780 		*result = cfg->device;
3781 		break;
3782 	case PCI_IVAR_DEVID:
3783 		*result = (cfg->device << 16) | cfg->vendor;
3784 		break;
3785 	case PCI_IVAR_CLASS:
3786 		*result = cfg->baseclass;
3787 		break;
3788 	case PCI_IVAR_SUBCLASS:
3789 		*result = cfg->subclass;
3790 		break;
3791 	case PCI_IVAR_PROGIF:
3792 		*result = cfg->progif;
3793 		break;
3794 	case PCI_IVAR_REVID:
3795 		*result = cfg->revid;
3796 		break;
3797 	case PCI_IVAR_INTPIN:
3798 		*result = cfg->intpin;
3799 		break;
3800 	case PCI_IVAR_IRQ:
3801 		*result = cfg->intline;
3802 		break;
3803 	case PCI_IVAR_DOMAIN:
3804 		*result = cfg->domain;
3805 		break;
3806 	case PCI_IVAR_BUS:
3807 		*result = cfg->bus;
3808 		break;
3809 	case PCI_IVAR_SLOT:
3810 		*result = cfg->slot;
3811 		break;
3812 	case PCI_IVAR_FUNCTION:
3813 		*result = cfg->func;
3814 		break;
3815 	case PCI_IVAR_CMDREG:
3816 		*result = cfg->cmdreg;
3817 		break;
3818 	case PCI_IVAR_CACHELNSZ:
3819 		*result = cfg->cachelnsz;
3820 		break;
3821 	case PCI_IVAR_MINGNT:
3822 		*result = cfg->mingnt;
3823 		break;
3824 	case PCI_IVAR_MAXLAT:
3825 		*result = cfg->maxlat;
3826 		break;
3827 	case PCI_IVAR_LATTIMER:
3828 		*result = cfg->lattimer;
3829 		break;
3830 	default:
3831 		return (ENOENT);
3832 	}
3833 	return (0);
3834 }
3835 
3836 int
3837 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3838 {
3839 	struct pci_devinfo *dinfo;
3840 
3841 	dinfo = device_get_ivars(child);
3842 
3843 	switch (which) {
3844 	case PCI_IVAR_INTPIN:
3845 		dinfo->cfg.intpin = value;
3846 		return (0);
3847 	case PCI_IVAR_ETHADDR:
3848 	case PCI_IVAR_SUBVENDOR:
3849 	case PCI_IVAR_SUBDEVICE:
3850 	case PCI_IVAR_VENDOR:
3851 	case PCI_IVAR_DEVICE:
3852 	case PCI_IVAR_DEVID:
3853 	case PCI_IVAR_CLASS:
3854 	case PCI_IVAR_SUBCLASS:
3855 	case PCI_IVAR_PROGIF:
3856 	case PCI_IVAR_REVID:
3857 	case PCI_IVAR_IRQ:
3858 	case PCI_IVAR_DOMAIN:
3859 	case PCI_IVAR_BUS:
3860 	case PCI_IVAR_SLOT:
3861 	case PCI_IVAR_FUNCTION:
3862 		return (EINVAL);	/* disallow for now */
3863 
3864 	default:
3865 		return (ENOENT);
3866 	}
3867 }
3868 
3869 
3870 #include "opt_ddb.h"
3871 #ifdef DDB
3872 #include <ddb/ddb.h>
3873 #include <sys/cons.h>
3874 
3875 /*
3876  * List resources based on pci map registers, used for within ddb
3877  */
3878 
3879 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3880 {
3881 	struct pci_devinfo *dinfo;
3882 	struct devlist *devlist_head;
3883 	struct pci_conf *p;
3884 	const char *name;
3885 	int i, error, none_count;
3886 
3887 	none_count = 0;
3888 	/* get the head of the device queue */
3889 	devlist_head = &pci_devq;
3890 
3891 	/*
3892 	 * Go through the list of devices and print out devices
3893 	 */
3894 	for (error = 0, i = 0,
3895 	     dinfo = STAILQ_FIRST(devlist_head);
3896 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3897 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3898 
3899 		/* Populate pd_name and pd_unit */
3900 		name = NULL;
3901 		if (dinfo->cfg.dev)
3902 			name = device_get_name(dinfo->cfg.dev);
3903 
3904 		p = &dinfo->conf;
3905 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3906 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3907 			(name && *name) ? name : "none",
3908 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3909 			none_count++,
3910 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3911 			p->pc_sel.pc_func, (p->pc_class << 16) |
3912 			(p->pc_subclass << 8) | p->pc_progif,
3913 			(p->pc_subdevice << 16) | p->pc_subvendor,
3914 			(p->pc_device << 16) | p->pc_vendor,
3915 			p->pc_revid, p->pc_hdr);
3916 	}
3917 }
3918 #endif /* DDB */
3919 
3920 static struct resource *
3921 pci_reserve_map(device_t dev, device_t child, int type, int *rid,
3922     u_long start, u_long end, u_long count, u_int flags)
3923 {
3924 	struct pci_devinfo *dinfo = device_get_ivars(child);
3925 	struct resource_list *rl = &dinfo->resources;
3926 	struct resource_list_entry *rle;
3927 	struct resource *res;
3928 	struct pci_map *pm;
3929 	pci_addr_t map, testval;
3930 	int mapsize;
3931 
3932 	res = NULL;
3933 	pm = pci_find_bar(child, *rid);
3934 	if (pm != NULL) {
3935 		/* This is a BAR that we failed to allocate earlier. */
3936 		mapsize = pm->pm_size;
3937 		map = pm->pm_value;
3938 	} else {
3939 		/*
3940 		 * Weed out the bogons, and figure out how large the
3941 		 * BAR/map is.  BARs that read back 0 here are bogus
3942 		 * and unimplemented.  Note: atapci in legacy mode are
3943 		 * special and handled elsewhere in the code.  If you
3944 		 * have a atapci device in legacy mode and it fails
3945 		 * here, that other code is broken.
3946 		 */
3947 		pci_read_bar(child, *rid, &map, &testval);
3948 
3949 		/*
3950 		 * Determine the size of the BAR and ignore BARs with a size
3951 		 * of 0.  Device ROM BARs use a different mask value.
3952 		 */
3953 		if (PCIR_IS_BIOS(&dinfo->cfg, *rid))
3954 			mapsize = pci_romsize(testval);
3955 		else
3956 			mapsize = pci_mapsize(testval);
3957 		if (mapsize == 0)
3958 			goto out;
3959 		pm = pci_add_bar(child, *rid, map, mapsize);
3960 	}
3961 
3962 	if (PCI_BAR_MEM(map) || PCIR_IS_BIOS(&dinfo->cfg, *rid)) {
3963 		if (type != SYS_RES_MEMORY) {
3964 			if (bootverbose)
3965 				device_printf(dev,
3966 				    "child %s requested type %d for rid %#x,"
3967 				    " but the BAR says it is an memio\n",
3968 				    device_get_nameunit(child), type, *rid);
3969 			goto out;
3970 		}
3971 	} else {
3972 		if (type != SYS_RES_IOPORT) {
3973 			if (bootverbose)
3974 				device_printf(dev,
3975 				    "child %s requested type %d for rid %#x,"
3976 				    " but the BAR says it is an ioport\n",
3977 				    device_get_nameunit(child), type, *rid);
3978 			goto out;
3979 		}
3980 	}
3981 
3982 	/*
3983 	 * For real BARs, we need to override the size that
3984 	 * the driver requests, because that's what the BAR
3985 	 * actually uses and we would otherwise have a
3986 	 * situation where we might allocate the excess to
3987 	 * another driver, which won't work.
3988 	 */
3989 	count = (pci_addr_t)1 << mapsize;
3990 	if (RF_ALIGNMENT(flags) < mapsize)
3991 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3992 	if (PCI_BAR_MEM(map) && (map & PCIM_BAR_MEM_PREFETCH))
3993 		flags |= RF_PREFETCHABLE;
3994 
3995 	/*
3996 	 * Allocate enough resource, and then write back the
3997 	 * appropriate BAR for that resource.
3998 	 */
3999 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
4000 	    start, end, count, flags & ~RF_ACTIVE);
4001 	if (res == NULL) {
4002 		device_printf(child,
4003 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
4004 		    count, *rid, type, start, end);
4005 		goto out;
4006 	}
4007 	resource_list_add(rl, type, *rid, start, end, count);
4008 	rle = resource_list_find(rl, type, *rid);
4009 	if (rle == NULL)
4010 		panic("pci_reserve_map: unexpectedly can't find resource.");
4011 	rle->res = res;
4012 	rle->start = rman_get_start(res);
4013 	rle->end = rman_get_end(res);
4014 	rle->count = count;
4015 	rle->flags = RLE_RESERVED;
4016 	if (bootverbose)
4017 		device_printf(child,
4018 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4019 		    count, *rid, type, rman_get_start(res));
4020 	map = rman_get_start(res);
4021 	pci_write_bar(child, pm, map);
4022 out:;
4023 	return (res);
4024 }
4025 
4026 
4027 struct resource *
4028 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4029 		   u_long start, u_long end, u_long count, u_int flags)
4030 {
4031 	struct pci_devinfo *dinfo = device_get_ivars(child);
4032 	struct resource_list *rl = &dinfo->resources;
4033 	struct resource_list_entry *rle;
4034 	struct resource *res;
4035 	pcicfgregs *cfg = &dinfo->cfg;
4036 
4037 	if (device_get_parent(child) != dev)
4038 		return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child,
4039 		    type, rid, start, end, count, flags));
4040 
4041 	/*
4042 	 * Perform lazy resource allocation
4043 	 */
4044 	switch (type) {
4045 	case SYS_RES_IRQ:
4046 		/*
4047 		 * Can't alloc legacy interrupt once MSI messages have
4048 		 * been allocated.
4049 		 */
4050 		if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4051 		    cfg->msix.msix_alloc > 0))
4052 			return (NULL);
4053 
4054 		/*
4055 		 * If the child device doesn't have an interrupt
4056 		 * routed and is deserving of an interrupt, try to
4057 		 * assign it one.
4058 		 */
4059 		if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4060 		    (cfg->intpin != 0))
4061 			pci_assign_interrupt(dev, child, 0);
4062 		break;
4063 	case SYS_RES_IOPORT:
4064 	case SYS_RES_MEMORY:
4065 #ifdef NEW_PCIB
4066 		/*
4067 		 * PCI-PCI bridge I/O window resources are not BARs.
4068 		 * For those allocations just pass the request up the
4069 		 * tree.
4070 		 */
4071 		if (cfg->hdrtype == PCIM_HDRTYPE_BRIDGE) {
4072 			switch (*rid) {
4073 			case PCIR_IOBASEL_1:
4074 			case PCIR_MEMBASE_1:
4075 			case PCIR_PMBASEL_1:
4076 				/*
4077 				 * XXX: Should we bother creating a resource
4078 				 * list entry?
4079 				 */
4080 				return (bus_generic_alloc_resource(dev, child,
4081 				    type, rid, start, end, count, flags));
4082 			}
4083 		}
4084 #endif
4085 		/* Reserve resources for this BAR if needed. */
4086 		rle = resource_list_find(rl, type, *rid);
4087 		if (rle == NULL) {
4088 			res = pci_reserve_map(dev, child, type, rid, start, end,
4089 			    count, flags);
4090 			if (res == NULL)
4091 				return (NULL);
4092 		}
4093 	}
4094 	return (resource_list_alloc(rl, dev, child, type, rid,
4095 	    start, end, count, flags));
4096 }
4097 
4098 int
4099 pci_activate_resource(device_t dev, device_t child, int type, int rid,
4100     struct resource *r)
4101 {
4102 	struct pci_devinfo *dinfo;
4103 	int error;
4104 
4105 	error = bus_generic_activate_resource(dev, child, type, rid, r);
4106 	if (error)
4107 		return (error);
4108 
4109 	/* Enable decoding in the command register when activating BARs. */
4110 	if (device_get_parent(child) == dev) {
4111 		/* Device ROMs need their decoding explicitly enabled. */
4112 		dinfo = device_get_ivars(child);
4113 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4114 			pci_write_bar(child, pci_find_bar(child, rid),
4115 			    rman_get_start(r) | PCIM_BIOS_ENABLE);
4116 		switch (type) {
4117 		case SYS_RES_IOPORT:
4118 		case SYS_RES_MEMORY:
4119 			error = PCI_ENABLE_IO(dev, child, type);
4120 			break;
4121 		}
4122 	}
4123 	return (error);
4124 }
4125 
4126 int
4127 pci_deactivate_resource(device_t dev, device_t child, int type,
4128     int rid, struct resource *r)
4129 {
4130 	struct pci_devinfo *dinfo;
4131 	int error;
4132 
4133 	error = bus_generic_deactivate_resource(dev, child, type, rid, r);
4134 	if (error)
4135 		return (error);
4136 
4137 	/* Disable decoding for device ROMs. */
4138 	if (device_get_parent(child) == dev) {
4139 		dinfo = device_get_ivars(child);
4140 		if (PCIR_IS_BIOS(&dinfo->cfg, rid))
4141 			pci_write_bar(child, pci_find_bar(child, rid),
4142 			    rman_get_start(r));
4143 	}
4144 	return (0);
4145 }
4146 
4147 void
4148 pci_delete_child(device_t dev, device_t child)
4149 {
4150 	struct resource_list_entry *rle;
4151 	struct resource_list *rl;
4152 	struct pci_devinfo *dinfo;
4153 
4154 	dinfo = device_get_ivars(child);
4155 	rl = &dinfo->resources;
4156 
4157 	if (device_is_attached(child))
4158 		device_detach(child);
4159 
4160 	/* Turn off access to resources we're about to free */
4161 	pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
4162 	    PCIR_COMMAND, 2) & ~(PCIM_CMD_MEMEN | PCIM_CMD_PORTEN), 2);
4163 
4164 	/* Free all allocated resources */
4165 	STAILQ_FOREACH(rle, rl, link) {
4166 		if (rle->res) {
4167 			if (rman_get_flags(rle->res) & RF_ACTIVE ||
4168 			    resource_list_busy(rl, rle->type, rle->rid)) {
4169 				pci_printf(&dinfo->cfg,
4170 				    "Resource still owned, oops. "
4171 				    "(type=%d, rid=%d, addr=%lx)\n",
4172 				    rle->type, rle->rid,
4173 				    rman_get_start(rle->res));
4174 				bus_release_resource(child, rle->type, rle->rid,
4175 				    rle->res);
4176 			}
4177 			resource_list_unreserve(rl, dev, child, rle->type,
4178 			    rle->rid);
4179 		}
4180 	}
4181 	resource_list_free(rl);
4182 
4183 	device_delete_child(dev, child);
4184 	pci_freecfg(dinfo);
4185 }
4186 
4187 void
4188 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4189 {
4190 	struct pci_devinfo *dinfo;
4191 	struct resource_list *rl;
4192 	struct resource_list_entry *rle;
4193 
4194 	if (device_get_parent(child) != dev)
4195 		return;
4196 
4197 	dinfo = device_get_ivars(child);
4198 	rl = &dinfo->resources;
4199 	rle = resource_list_find(rl, type, rid);
4200 	if (rle == NULL)
4201 		return;
4202 
4203 	if (rle->res) {
4204 		if (rman_get_flags(rle->res) & RF_ACTIVE ||
4205 		    resource_list_busy(rl, type, rid)) {
4206 			device_printf(dev, "delete_resource: "
4207 			    "Resource still owned by child, oops. "
4208 			    "(type=%d, rid=%d, addr=%lx)\n",
4209 			    type, rid, rman_get_start(rle->res));
4210 			return;
4211 		}
4212 
4213 #ifndef __PCI_BAR_ZERO_VALID
4214 		/*
4215 		 * If this is a BAR, clear the BAR so it stops
4216 		 * decoding before releasing the resource.
4217 		 */
4218 		switch (type) {
4219 		case SYS_RES_IOPORT:
4220 		case SYS_RES_MEMORY:
4221 			pci_write_bar(child, pci_find_bar(child, rid), 0);
4222 			break;
4223 		}
4224 #endif
4225 		resource_list_unreserve(rl, dev, child, type, rid);
4226 	}
4227 	resource_list_delete(rl, type, rid);
4228 }
4229 
4230 struct resource_list *
4231 pci_get_resource_list (device_t dev, device_t child)
4232 {
4233 	struct pci_devinfo *dinfo = device_get_ivars(child);
4234 
4235 	return (&dinfo->resources);
4236 }
4237 
4238 uint32_t
4239 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4240 {
4241 	struct pci_devinfo *dinfo = device_get_ivars(child);
4242 	pcicfgregs *cfg = &dinfo->cfg;
4243 
4244 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4245 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4246 }
4247 
4248 void
4249 pci_write_config_method(device_t dev, device_t child, int reg,
4250     uint32_t val, int width)
4251 {
4252 	struct pci_devinfo *dinfo = device_get_ivars(child);
4253 	pcicfgregs *cfg = &dinfo->cfg;
4254 
4255 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4256 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4257 }
4258 
4259 int
4260 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4261     size_t buflen)
4262 {
4263 
4264 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4265 	    pci_get_function(child));
4266 	return (0);
4267 }
4268 
4269 int
4270 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4271     size_t buflen)
4272 {
4273 	struct pci_devinfo *dinfo;
4274 	pcicfgregs *cfg;
4275 
4276 	dinfo = device_get_ivars(child);
4277 	cfg = &dinfo->cfg;
4278 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4279 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4280 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4281 	    cfg->progif);
4282 	return (0);
4283 }
4284 
4285 int
4286 pci_assign_interrupt_method(device_t dev, device_t child)
4287 {
4288 	struct pci_devinfo *dinfo = device_get_ivars(child);
4289 	pcicfgregs *cfg = &dinfo->cfg;
4290 
4291 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4292 	    cfg->intpin));
4293 }
4294 
4295 static int
4296 pci_modevent(module_t mod, int what, void *arg)
4297 {
4298 	static struct cdev *pci_cdev;
4299 
4300 	switch (what) {
4301 	case MOD_LOAD:
4302 		STAILQ_INIT(&pci_devq);
4303 		pci_generation = 0;
4304 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
4305 		    "pci");
4306 		pci_load_vendor_data();
4307 		break;
4308 
4309 	case MOD_UNLOAD:
4310 		destroy_dev(pci_cdev);
4311 		break;
4312 	}
4313 
4314 	return (0);
4315 }
4316 
4317 void
4318 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4319 {
4320 
4321 	/*
4322 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4323 	 * which we know need special treatment.  Type 2 devices are
4324 	 * cardbus bridges which also require special treatment.
4325 	 * Other types are unknown, and we err on the side of safety
4326 	 * by ignoring them.
4327 	 */
4328 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4329 		return;
4330 
4331 	/*
4332 	 * Restore the device to full power mode.  We must do this
4333 	 * before we restore the registers because moving from D3 to
4334 	 * D0 will cause the chip's BARs and some other registers to
4335 	 * be reset to some unknown power on reset values.  Cut down
4336 	 * the noise on boot by doing nothing if we are already in
4337 	 * state D0.
4338 	 */
4339 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0)
4340 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4341 	pci_restore_bars(dev);
4342 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4343 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4344 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4345 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4346 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4347 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4348 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4349 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4350 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4351 
4352 	/* Restore MSI and MSI-X configurations if they are present. */
4353 	if (dinfo->cfg.msi.msi_location != 0)
4354 		pci_resume_msi(dev);
4355 	if (dinfo->cfg.msix.msix_location != 0)
4356 		pci_resume_msix(dev);
4357 }
4358 
4359 void
4360 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4361 {
4362 	uint32_t cls;
4363 	int ps;
4364 
4365 	/*
4366 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4367 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4368 	 * which also require special treatment.  Other types are unknown, and
4369 	 * we err on the side of safety by ignoring them.  Powering down
4370 	 * bridges should not be undertaken lightly.
4371 	 */
4372 	if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
4373 		return;
4374 
4375 	/*
4376 	 * Some drivers apparently write to these registers w/o updating our
4377 	 * cached copy.  No harm happens if we update the copy, so do so here
4378 	 * so we can restore them.  The COMMAND register is modified by the
4379 	 * bus w/o updating the cache.  This should represent the normally
4380 	 * writable portion of the 'defined' part of type 0 headers.  In
4381 	 * theory we also need to save/restore the PCI capability structures
4382 	 * we know about, but apart from power we don't know any that are
4383 	 * writable.
4384 	 */
4385 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4386 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4387 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4388 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4389 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4390 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4391 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4392 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4393 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4394 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4395 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4396 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4397 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4398 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4399 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4400 
4401 	/*
4402 	 * don't set the state for display devices, base peripherals and
4403 	 * memory devices since bad things happen when they are powered down.
4404 	 * We should (a) have drivers that can easily detach and (b) use
4405 	 * generic drivers for these devices so that some device actually
4406 	 * attaches.  We need to make sure that when we implement (a) we don't
4407 	 * power the device down on a reattach.
4408 	 */
4409 	cls = pci_get_class(dev);
4410 	if (!setstate)
4411 		return;
4412 	switch (pci_do_power_nodriver)
4413 	{
4414 		case 0:		/* NO powerdown at all */
4415 			return;
4416 		case 1:		/* Conservative about what to power down */
4417 			if (cls == PCIC_STORAGE)
4418 				return;
4419 			/*FALLTHROUGH*/
4420 		case 2:		/* Agressive about what to power down */
4421 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4422 			    cls == PCIC_BASEPERIPH)
4423 				return;
4424 			/*FALLTHROUGH*/
4425 		case 3:		/* Power down everything */
4426 			break;
4427 	}
4428 	/*
4429 	 * PCI spec says we can only go into D3 state from D0 state.
4430 	 * Transition from D[12] into D0 before going to D3 state.
4431 	 */
4432 	ps = pci_get_powerstate(dev);
4433 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4434 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4435 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4436 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4437 }
4438