xref: /freebsd/sys/dev/pci/pci.c (revision 995dc984471c92c03daad19a1d35af46c086ef3e)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@freebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_bus.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 
46 #include <vm/vm.h>
47 #include <vm/pmap.h>
48 #include <vm/vm_extern.h>
49 
50 #include <sys/bus.h>
51 #include <machine/bus.h>
52 #include <sys/rman.h>
53 #include <machine/resource.h>
54 
55 #if defined(__i386__) || defined(__amd64__)
56 #include <machine/intr_machdep.h>
57 #endif
58 
59 #include <sys/pciio.h>
60 #include <dev/pci/pcireg.h>
61 #include <dev/pci/pcivar.h>
62 #include <dev/pci/pci_private.h>
63 
64 #include "pcib_if.h"
65 #include "pci_if.h"
66 
67 #ifdef __HAVE_ACPI
68 #include <contrib/dev/acpica/acpi.h>
69 #include "acpi_if.h"
70 #else
71 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
72 #endif
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static const char	*pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_load_vendor_data(void);
90 static int		pci_describe_parse_line(char **ptr, int *vendor,
91 			    int *device, char **desc);
92 static char		*pci_describe_device(device_t dev);
93 static int		pci_modevent(module_t mod, int what, void *arg);
94 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
95 			    pcicfgregs *cfg);
96 static void		pci_read_extcap(device_t pcib, pcicfgregs *cfg);
97 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
98 			    int reg, uint32_t *data);
99 #if 0
100 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t data);
102 #endif
103 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
104 static void		pci_disable_msi(device_t dev);
105 static void		pci_enable_msi(device_t dev, uint64_t address,
106 			    uint16_t data);
107 static void		pci_enable_msix(device_t dev, u_int index,
108 			    uint64_t address, uint32_t data);
109 static void		pci_mask_msix(device_t dev, u_int index);
110 static void		pci_unmask_msix(device_t dev, u_int index);
111 static int		pci_msi_blacklisted(void);
112 static void		pci_resume_msi(device_t dev);
113 static void		pci_resume_msix(device_t dev);
114 
115 static device_method_t pci_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_probe,		pci_probe),
118 	DEVMETHOD(device_attach,	pci_attach),
119 	DEVMETHOD(device_detach,	bus_generic_detach),
120 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
121 	DEVMETHOD(device_suspend,	pci_suspend),
122 	DEVMETHOD(device_resume,	pci_resume),
123 
124 	/* Bus interface */
125 	DEVMETHOD(bus_print_child,	pci_print_child),
126 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
127 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
128 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
129 	DEVMETHOD(bus_driver_added,	pci_driver_added),
130 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
131 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
132 
133 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
134 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
135 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
136 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
137 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
138 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
139 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
140 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
141 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
142 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
143 
144 	/* PCI interface */
145 	DEVMETHOD(pci_read_config,	pci_read_config_method),
146 	DEVMETHOD(pci_write_config,	pci_write_config_method),
147 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
148 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
149 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
150 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
151 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
152 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
153 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
154 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
155 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
156 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
157 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
158 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
159 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
160 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
161 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
162 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
163 
164 	{ 0, 0 }
165 };
166 
167 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
168 
169 static devclass_t pci_devclass;
170 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
171 MODULE_VERSION(pci, 1);
172 
173 static char	*pci_vendordata;
174 static size_t	pci_vendordata_size;
175 
176 
177 struct pci_quirk {
178 	uint32_t devid;	/* Vendor/device of the card */
179 	int	type;
180 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
181 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
182 	int	arg1;
183 	int	arg2;
184 };
185 
186 struct pci_quirk pci_quirks[] = {
187 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
188 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
189 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
190 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
191 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
192 
193 	/*
194 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
195 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
196 	 */
197 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
198 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
199 
200 	/*
201 	 * MSI doesn't work on earlier Intel chipsets including
202 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
203 	 */
204 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
205 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
206 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
207 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
208 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
209 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
210 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
211 
212 	/*
213 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
214 	 * bridge.
215 	 */
216 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
217 
218 	{ 0 }
219 };
220 
221 /* map register information */
222 #define	PCI_MAPMEM	0x01	/* memory map */
223 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
224 #define	PCI_MAPPORT	0x04	/* port map */
225 
226 struct devlist pci_devq;
227 uint32_t pci_generation;
228 uint32_t pci_numdevs = 0;
229 static int pcie_chipset, pcix_chipset;
230 
231 /* sysctl vars */
232 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
233 
234 static int pci_enable_io_modes = 1;
235 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
236 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
237     &pci_enable_io_modes, 1,
238     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
239 enable these bits correctly.  We'd like to do this all the time, but there\n\
240 are some peripherals that this causes problems with.");
241 
242 static int pci_do_power_nodriver = 0;
243 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
244 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
245     &pci_do_power_nodriver, 0,
246   "Place a function into D3 state when no driver attaches to it.  0 means\n\
247 disable.  1 means conservatively place devices into D3 state.  2 means\n\
248 agressively place devices into D3 state.  3 means put absolutely everything\n\
249 in D3 state.");
250 
251 static int pci_do_power_resume = 1;
252 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
253 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
254     &pci_do_power_resume, 1,
255   "Transition from D3 -> D0 on resume.");
256 
257 static int pci_do_msi = 1;
258 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
259 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
260     "Enable support for MSI interrupts");
261 
262 static int pci_do_msix = 1;
263 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
264 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
265     "Enable support for MSI-X interrupts");
266 
267 static int pci_honor_msi_blacklist = 1;
268 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
269 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
270     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
271 
272 /* Find a device_t by bus/slot/function in domain 0 */
273 
274 device_t
275 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
276 {
277 
278 	return (pci_find_dbsf(0, bus, slot, func));
279 }
280 
281 /* Find a device_t by domain/bus/slot/function */
282 
283 device_t
284 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
285 {
286 	struct pci_devinfo *dinfo;
287 
288 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
289 		if ((dinfo->cfg.domain == domain) &&
290 		    (dinfo->cfg.bus == bus) &&
291 		    (dinfo->cfg.slot == slot) &&
292 		    (dinfo->cfg.func == func)) {
293 			return (dinfo->cfg.dev);
294 		}
295 	}
296 
297 	return (NULL);
298 }
299 
300 /* Find a device_t by vendor/device ID */
301 
302 device_t
303 pci_find_device(uint16_t vendor, uint16_t device)
304 {
305 	struct pci_devinfo *dinfo;
306 
307 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
308 		if ((dinfo->cfg.vendor == vendor) &&
309 		    (dinfo->cfg.device == device)) {
310 			return (dinfo->cfg.dev);
311 		}
312 	}
313 
314 	return (NULL);
315 }
316 
317 /* return base address of memory or port map */
318 
319 static uint32_t
320 pci_mapbase(uint32_t mapreg)
321 {
322 
323 	if (PCI_BAR_MEM(mapreg))
324 		return (mapreg & PCIM_BAR_MEM_BASE);
325 	else
326 		return (mapreg & PCIM_BAR_IO_BASE);
327 }
328 
329 /* return map type of memory or port map */
330 
331 static const char *
332 pci_maptype(unsigned mapreg)
333 {
334 
335 	if (PCI_BAR_IO(mapreg))
336 		return ("I/O Port");
337 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
338 		return ("Prefetchable Memory");
339 	return ("Memory");
340 }
341 
342 /* return log2 of map size decoded for memory or port map */
343 
344 static int
345 pci_mapsize(uint32_t testval)
346 {
347 	int ln2size;
348 
349 	testval = pci_mapbase(testval);
350 	ln2size = 0;
351 	if (testval != 0) {
352 		while ((testval & 1) == 0)
353 		{
354 			ln2size++;
355 			testval >>= 1;
356 		}
357 	}
358 	return (ln2size);
359 }
360 
361 /* return log2 of address range supported by map register */
362 
363 static int
364 pci_maprange(unsigned mapreg)
365 {
366 	int ln2range = 0;
367 
368 	if (PCI_BAR_IO(mapreg))
369 		ln2range = 32;
370 	else
371 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
372 		case PCIM_BAR_MEM_32:
373 			ln2range = 32;
374 			break;
375 		case PCIM_BAR_MEM_1MB:
376 			ln2range = 20;
377 			break;
378 		case PCIM_BAR_MEM_64:
379 			ln2range = 64;
380 			break;
381 		}
382 	return (ln2range);
383 }
384 
385 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
386 
387 static void
388 pci_fixancient(pcicfgregs *cfg)
389 {
390 	if (cfg->hdrtype != 0)
391 		return;
392 
393 	/* PCI to PCI bridges use header type 1 */
394 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
395 		cfg->hdrtype = 1;
396 }
397 
398 /* extract header type specific config data */
399 
400 static void
401 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
402 {
403 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
404 	switch (cfg->hdrtype) {
405 	case 0:
406 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
407 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
408 		cfg->nummaps	    = PCI_MAXMAPS_0;
409 		break;
410 	case 1:
411 		cfg->nummaps	    = PCI_MAXMAPS_1;
412 		break;
413 	case 2:
414 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
415 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
416 		cfg->nummaps	    = PCI_MAXMAPS_2;
417 		break;
418 	}
419 #undef REG
420 }
421 
422 /* read configuration header into pcicfgregs structure */
423 struct pci_devinfo *
424 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
425 {
426 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
427 	pcicfgregs *cfg = NULL;
428 	struct pci_devinfo *devlist_entry;
429 	struct devlist *devlist_head;
430 
431 	devlist_head = &pci_devq;
432 
433 	devlist_entry = NULL;
434 
435 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
436 		devlist_entry = malloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
437 		if (devlist_entry == NULL)
438 			return (NULL);
439 
440 		cfg = &devlist_entry->cfg;
441 
442 		cfg->domain		= d;
443 		cfg->bus		= b;
444 		cfg->slot		= s;
445 		cfg->func		= f;
446 		cfg->vendor		= REG(PCIR_VENDOR, 2);
447 		cfg->device		= REG(PCIR_DEVICE, 2);
448 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
449 		cfg->statreg		= REG(PCIR_STATUS, 2);
450 		cfg->baseclass		= REG(PCIR_CLASS, 1);
451 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
452 		cfg->progif		= REG(PCIR_PROGIF, 1);
453 		cfg->revid		= REG(PCIR_REVID, 1);
454 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
455 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
456 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
457 		cfg->intpin		= REG(PCIR_INTPIN, 1);
458 		cfg->intline		= REG(PCIR_INTLINE, 1);
459 
460 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
461 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
462 
463 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
464 		cfg->hdrtype		&= ~PCIM_MFDEV;
465 
466 		pci_fixancient(cfg);
467 		pci_hdrtypedata(pcib, b, s, f, cfg);
468 
469 		if (REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT)
470 			pci_read_extcap(pcib, cfg);
471 
472 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
473 
474 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
475 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
476 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
477 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
478 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
479 
480 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
481 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
482 		devlist_entry->conf.pc_vendor = cfg->vendor;
483 		devlist_entry->conf.pc_device = cfg->device;
484 
485 		devlist_entry->conf.pc_class = cfg->baseclass;
486 		devlist_entry->conf.pc_subclass = cfg->subclass;
487 		devlist_entry->conf.pc_progif = cfg->progif;
488 		devlist_entry->conf.pc_revid = cfg->revid;
489 
490 		pci_numdevs++;
491 		pci_generation++;
492 	}
493 	return (devlist_entry);
494 #undef REG
495 }
496 
497 static void
498 pci_read_extcap(device_t pcib, pcicfgregs *cfg)
499 {
500 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
501 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
502 #if defined(__i386__) || defined(__amd64__)
503 	uint64_t addr;
504 #endif
505 	uint32_t val;
506 	int	ptr, nextptr, ptrptr;
507 
508 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
509 	case 0:
510 	case 1:
511 		ptrptr = PCIR_CAP_PTR;
512 		break;
513 	case 2:
514 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
515 		break;
516 	default:
517 		return;		/* no extended capabilities support */
518 	}
519 	nextptr = REG(ptrptr, 1);	/* sanity check? */
520 
521 	/*
522 	 * Read capability entries.
523 	 */
524 	while (nextptr != 0) {
525 		/* Sanity check */
526 		if (nextptr > 255) {
527 			printf("illegal PCI extended capability offset %d\n",
528 			    nextptr);
529 			return;
530 		}
531 		/* Find the next entry */
532 		ptr = nextptr;
533 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
534 
535 		/* Process this entry */
536 		switch (REG(ptr + PCICAP_ID, 1)) {
537 		case PCIY_PMG:		/* PCI power management */
538 			if (cfg->pp.pp_cap == 0) {
539 				cfg->pp.pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
540 				cfg->pp.pp_status = ptr + PCIR_POWER_STATUS;
541 				cfg->pp.pp_pmcsr = ptr + PCIR_POWER_PMCSR;
542 				if ((nextptr - ptr) > PCIR_POWER_DATA)
543 					cfg->pp.pp_data = ptr + PCIR_POWER_DATA;
544 			}
545 			break;
546 #if defined(__i386__) || defined(__amd64__)
547 		case PCIY_HT:		/* HyperTransport */
548 			/* Determine HT-specific capability type. */
549 			val = REG(ptr + PCIR_HT_COMMAND, 2);
550 			switch (val & PCIM_HTCMD_CAP_MASK) {
551 			case PCIM_HTCAP_MSI_MAPPING:
552 				if (!(val & PCIM_HTCMD_MSI_FIXED)) {
553 					/* Sanity check the mapping window. */
554 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI,
555 					    4);
556 					addr <<= 32;
557 					addr = REG(ptr + PCIR_HTMSI_ADDRESS_LO,
558 					    4);
559 					if (addr != MSI_INTEL_ADDR_BASE)
560 						device_printf(pcib,
561 	    "HT Bridge at pci%d:%d:%d:%d has non-default MSI window 0x%llx\n",
562 						    cfg->domain, cfg->bus,
563 						    cfg->slot, cfg->func,
564 						    (long long)addr);
565 				}
566 
567 				/* Enable MSI -> HT mapping. */
568 				val |= PCIM_HTCMD_MSI_ENABLE;
569 				WREG(ptr + PCIR_HT_COMMAND, val, 2);
570 				break;
571 			}
572 			break;
573 #endif
574 		case PCIY_MSI:		/* PCI MSI */
575 			cfg->msi.msi_location = ptr;
576 			cfg->msi.msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
577 			cfg->msi.msi_msgnum = 1 << ((cfg->msi.msi_ctrl &
578 						     PCIM_MSICTRL_MMC_MASK)>>1);
579 			break;
580 		case PCIY_MSIX:		/* PCI MSI-X */
581 			cfg->msix.msix_location = ptr;
582 			cfg->msix.msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
583 			cfg->msix.msix_msgnum = (cfg->msix.msix_ctrl &
584 			    PCIM_MSIXCTRL_TABLE_SIZE) + 1;
585 			val = REG(ptr + PCIR_MSIX_TABLE, 4);
586 			cfg->msix.msix_table_bar = PCIR_BAR(val &
587 			    PCIM_MSIX_BIR_MASK);
588 			cfg->msix.msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
589 			val = REG(ptr + PCIR_MSIX_PBA, 4);
590 			cfg->msix.msix_pba_bar = PCIR_BAR(val &
591 			    PCIM_MSIX_BIR_MASK);
592 			cfg->msix.msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
593 			break;
594 		case PCIY_VPD:		/* PCI Vital Product Data */
595 			cfg->vpd.vpd_reg = ptr;
596 			break;
597 		case PCIY_SUBVENDOR:
598 			/* Should always be true. */
599 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
600 				val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
601 				cfg->subvendor = val & 0xffff;
602 				cfg->subdevice = val >> 16;
603 			}
604 			break;
605 		case PCIY_PCIX:		/* PCI-X */
606 			/*
607 			 * Assume we have a PCI-X chipset if we have
608 			 * at least one PCI-PCI bridge with a PCI-X
609 			 * capability.  Note that some systems with
610 			 * PCI-express or HT chipsets might match on
611 			 * this check as well.
612 			 */
613 			if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
614 				pcix_chipset = 1;
615 			break;
616 		case PCIY_EXPRESS:	/* PCI-express */
617 			/*
618 			 * Assume we have a PCI-express chipset if we have
619 			 * at least one PCI-express device.
620 			 */
621 			pcie_chipset = 1;
622 			break;
623 		default:
624 			break;
625 		}
626 	}
627 /* REG and WREG use carry through to next functions */
628 }
629 
630 /*
631  * PCI Vital Product Data
632  */
633 
634 #define	PCI_VPD_TIMEOUT		1000000
635 
636 static int
637 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
638 {
639 	int count = PCI_VPD_TIMEOUT;
640 
641 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
642 
643 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
644 
645 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
646 		if (--count < 0)
647 			return (ENXIO);
648 		DELAY(1);	/* limit looping */
649 	}
650 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
651 
652 	return (0);
653 }
654 
655 #if 0
656 static int
657 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
658 {
659 	int count = PCI_VPD_TIMEOUT;
660 
661 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
662 
663 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
664 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
665 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
666 		if (--count < 0)
667 			return (ENXIO);
668 		DELAY(1);	/* limit looping */
669 	}
670 
671 	return (0);
672 }
673 #endif
674 
675 #undef PCI_VPD_TIMEOUT
676 
677 struct vpd_readstate {
678 	device_t	pcib;
679 	pcicfgregs	*cfg;
680 	uint32_t	val;
681 	int		bytesinval;
682 	int		off;
683 	uint8_t		cksum;
684 };
685 
686 static int
687 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
688 {
689 	uint32_t reg;
690 	uint8_t byte;
691 
692 	if (vrs->bytesinval == 0) {
693 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
694 			return (ENXIO);
695 		vrs->val = le32toh(reg);
696 		vrs->off += 4;
697 		byte = vrs->val & 0xff;
698 		vrs->bytesinval = 3;
699 	} else {
700 		vrs->val = vrs->val >> 8;
701 		byte = vrs->val & 0xff;
702 		vrs->bytesinval--;
703 	}
704 
705 	vrs->cksum += byte;
706 	*data = byte;
707 	return (0);
708 }
709 
710 static void
711 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
712 {
713 	struct vpd_readstate vrs;
714 	int state;
715 	int name;
716 	int remain;
717 	int i;
718 	int alloc, off;		/* alloc/off for RO/W arrays */
719 	int cksumvalid;
720 	int dflen;
721 	uint8_t byte;
722 	uint8_t byte2;
723 
724 	/* init vpd reader */
725 	vrs.bytesinval = 0;
726 	vrs.off = 0;
727 	vrs.pcib = pcib;
728 	vrs.cfg = cfg;
729 	vrs.cksum = 0;
730 
731 	state = 0;
732 	name = remain = i = 0;	/* shut up stupid gcc */
733 	alloc = off = 0;	/* shut up stupid gcc */
734 	dflen = 0;		/* shut up stupid gcc */
735 	cksumvalid = -1;
736 	while (state >= 0) {
737 		if (vpd_nextbyte(&vrs, &byte)) {
738 			state = -2;
739 			break;
740 		}
741 #if 0
742 		printf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
743 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
744 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
745 #endif
746 		switch (state) {
747 		case 0:		/* item name */
748 			if (byte & 0x80) {
749 				if (vpd_nextbyte(&vrs, &byte2)) {
750 					state = -2;
751 					break;
752 				}
753 				remain = byte2;
754 				if (vpd_nextbyte(&vrs, &byte2)) {
755 					state = -2;
756 					break;
757 				}
758 				remain |= byte2 << 8;
759 				if (remain > (0x7f*4 - vrs.off)) {
760 					state = -1;
761 					printf(
762 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
763 					    cfg->domain, cfg->bus, cfg->slot,
764 					    cfg->func, remain);
765 				}
766 				name = byte & 0x7f;
767 			} else {
768 				remain = byte & 0x7;
769 				name = (byte >> 3) & 0xf;
770 			}
771 			switch (name) {
772 			case 0x2:	/* String */
773 				cfg->vpd.vpd_ident = malloc(remain + 1,
774 				    M_DEVBUF, M_WAITOK);
775 				i = 0;
776 				state = 1;
777 				break;
778 			case 0xf:	/* End */
779 				state = -1;
780 				break;
781 			case 0x10:	/* VPD-R */
782 				alloc = 8;
783 				off = 0;
784 				cfg->vpd.vpd_ros = malloc(alloc *
785 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
786 				    M_WAITOK | M_ZERO);
787 				state = 2;
788 				break;
789 			case 0x11:	/* VPD-W */
790 				alloc = 8;
791 				off = 0;
792 				cfg->vpd.vpd_w = malloc(alloc *
793 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
794 				    M_WAITOK | M_ZERO);
795 				state = 5;
796 				break;
797 			default:	/* Invalid data, abort */
798 				state = -1;
799 				break;
800 			}
801 			break;
802 
803 		case 1:	/* Identifier String */
804 			cfg->vpd.vpd_ident[i++] = byte;
805 			remain--;
806 			if (remain == 0)  {
807 				cfg->vpd.vpd_ident[i] = '\0';
808 				state = 0;
809 			}
810 			break;
811 
812 		case 2:	/* VPD-R Keyword Header */
813 			if (off == alloc) {
814 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
815 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
816 				    M_DEVBUF, M_WAITOK | M_ZERO);
817 			}
818 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
819 			if (vpd_nextbyte(&vrs, &byte2)) {
820 				state = -2;
821 				break;
822 			}
823 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
824 			if (vpd_nextbyte(&vrs, &byte2)) {
825 				state = -2;
826 				break;
827 			}
828 			dflen = byte2;
829 			if (dflen == 0 &&
830 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
831 			    2) == 0) {
832 				/*
833 				 * if this happens, we can't trust the rest
834 				 * of the VPD.
835 				 */
836 				printf(
837 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
838 				    cfg->domain, cfg->bus, cfg->slot,
839 				    cfg->func, dflen);
840 				cksumvalid = 0;
841 				state = -1;
842 				break;
843 			} else if (dflen == 0) {
844 				cfg->vpd.vpd_ros[off].value = malloc(1 *
845 				    sizeof(*cfg->vpd.vpd_ros[off].value),
846 				    M_DEVBUF, M_WAITOK);
847 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
848 			} else
849 				cfg->vpd.vpd_ros[off].value = malloc(
850 				    (dflen + 1) *
851 				    sizeof(*cfg->vpd.vpd_ros[off].value),
852 				    M_DEVBUF, M_WAITOK);
853 			remain -= 3;
854 			i = 0;
855 			/* keep in sync w/ state 3's transistions */
856 			if (dflen == 0 && remain == 0)
857 				state = 0;
858 			else if (dflen == 0)
859 				state = 2;
860 			else
861 				state = 3;
862 			break;
863 
864 		case 3:	/* VPD-R Keyword Value */
865 			cfg->vpd.vpd_ros[off].value[i++] = byte;
866 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
867 			    "RV", 2) == 0 && cksumvalid == -1) {
868 				if (vrs.cksum == 0)
869 					cksumvalid = 1;
870 				else {
871 					if (bootverbose)
872 						printf(
873 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
874 						    cfg->domain, cfg->bus,
875 						    cfg->slot, cfg->func,
876 						    vrs.cksum);
877 					cksumvalid = 0;
878 					state = -1;
879 					break;
880 				}
881 			}
882 			dflen--;
883 			remain--;
884 			/* keep in sync w/ state 2's transistions */
885 			if (dflen == 0)
886 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
887 			if (dflen == 0 && remain == 0) {
888 				cfg->vpd.vpd_rocnt = off;
889 				cfg->vpd.vpd_ros = reallocf(cfg->vpd.vpd_ros,
890 				    off * sizeof(*cfg->vpd.vpd_ros),
891 				    M_DEVBUF, M_WAITOK | M_ZERO);
892 				state = 0;
893 			} else if (dflen == 0)
894 				state = 2;
895 			break;
896 
897 		case 4:
898 			remain--;
899 			if (remain == 0)
900 				state = 0;
901 			break;
902 
903 		case 5:	/* VPD-W Keyword Header */
904 			if (off == alloc) {
905 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
906 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
907 				    M_DEVBUF, M_WAITOK | M_ZERO);
908 			}
909 			cfg->vpd.vpd_w[off].keyword[0] = byte;
910 			if (vpd_nextbyte(&vrs, &byte2)) {
911 				state = -2;
912 				break;
913 			}
914 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
915 			if (vpd_nextbyte(&vrs, &byte2)) {
916 				state = -2;
917 				break;
918 			}
919 			cfg->vpd.vpd_w[off].len = dflen = byte2;
920 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
921 			cfg->vpd.vpd_w[off].value = malloc((dflen + 1) *
922 			    sizeof(*cfg->vpd.vpd_w[off].value),
923 			    M_DEVBUF, M_WAITOK);
924 			remain -= 3;
925 			i = 0;
926 			/* keep in sync w/ state 6's transistions */
927 			if (dflen == 0 && remain == 0)
928 				state = 0;
929 			else if (dflen == 0)
930 				state = 5;
931 			else
932 				state = 6;
933 			break;
934 
935 		case 6:	/* VPD-W Keyword Value */
936 			cfg->vpd.vpd_w[off].value[i++] = byte;
937 			dflen--;
938 			remain--;
939 			/* keep in sync w/ state 5's transistions */
940 			if (dflen == 0)
941 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
942 			if (dflen == 0 && remain == 0) {
943 				cfg->vpd.vpd_wcnt = off;
944 				cfg->vpd.vpd_w = reallocf(cfg->vpd.vpd_w,
945 				    off * sizeof(*cfg->vpd.vpd_w),
946 				    M_DEVBUF, M_WAITOK | M_ZERO);
947 				state = 0;
948 			} else if (dflen == 0)
949 				state = 5;
950 			break;
951 
952 		default:
953 			printf("pci%d:%d:%d:%d: invalid state: %d\n",
954 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
955 			    state);
956 			state = -1;
957 			break;
958 		}
959 	}
960 
961 	if (cksumvalid == 0 || state < -1) {
962 		/* read-only data bad, clean up */
963 		if (cfg->vpd.vpd_ros != NULL) {
964 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
965 				free(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
966 			free(cfg->vpd.vpd_ros, M_DEVBUF);
967 			cfg->vpd.vpd_ros = NULL;
968 		}
969 	}
970 	if (state < -1) {
971 		/* I/O error, clean up */
972 		printf("pci%d:%d:%d:%d: failed to read VPD data.\n",
973 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
974 		if (cfg->vpd.vpd_ident != NULL) {
975 			free(cfg->vpd.vpd_ident, M_DEVBUF);
976 			cfg->vpd.vpd_ident = NULL;
977 		}
978 		if (cfg->vpd.vpd_w != NULL) {
979 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
980 				free(cfg->vpd.vpd_w[off].value, M_DEVBUF);
981 			free(cfg->vpd.vpd_w, M_DEVBUF);
982 			cfg->vpd.vpd_w = NULL;
983 		}
984 	}
985 	cfg->vpd.vpd_cached = 1;
986 #undef REG
987 #undef WREG
988 }
989 
990 int
991 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
992 {
993 	struct pci_devinfo *dinfo = device_get_ivars(child);
994 	pcicfgregs *cfg = &dinfo->cfg;
995 
996 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
997 		pci_read_vpd(device_get_parent(dev), cfg);
998 
999 	*identptr = cfg->vpd.vpd_ident;
1000 
1001 	if (*identptr == NULL)
1002 		return (ENXIO);
1003 
1004 	return (0);
1005 }
1006 
1007 int
1008 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1009 	const char **vptr)
1010 {
1011 	struct pci_devinfo *dinfo = device_get_ivars(child);
1012 	pcicfgregs *cfg = &dinfo->cfg;
1013 	int i;
1014 
1015 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1016 		pci_read_vpd(device_get_parent(dev), cfg);
1017 
1018 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1019 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1020 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1021 			*vptr = cfg->vpd.vpd_ros[i].value;
1022 		}
1023 
1024 	if (i != cfg->vpd.vpd_rocnt)
1025 		return (0);
1026 
1027 	*vptr = NULL;
1028 	return (ENXIO);
1029 }
1030 
1031 /*
1032  * Return the offset in configuration space of the requested extended
1033  * capability entry or 0 if the specified capability was not found.
1034  */
1035 int
1036 pci_find_extcap_method(device_t dev, device_t child, int capability,
1037     int *capreg)
1038 {
1039 	struct pci_devinfo *dinfo = device_get_ivars(child);
1040 	pcicfgregs *cfg = &dinfo->cfg;
1041 	u_int32_t status;
1042 	u_int8_t ptr;
1043 
1044 	/*
1045 	 * Check the CAP_LIST bit of the PCI status register first.
1046 	 */
1047 	status = pci_read_config(child, PCIR_STATUS, 2);
1048 	if (!(status & PCIM_STATUS_CAPPRESENT))
1049 		return (ENXIO);
1050 
1051 	/*
1052 	 * Determine the start pointer of the capabilities list.
1053 	 */
1054 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1055 	case 0:
1056 	case 1:
1057 		ptr = PCIR_CAP_PTR;
1058 		break;
1059 	case 2:
1060 		ptr = PCIR_CAP_PTR_2;
1061 		break;
1062 	default:
1063 		/* XXX: panic? */
1064 		return (ENXIO);		/* no extended capabilities support */
1065 	}
1066 	ptr = pci_read_config(child, ptr, 1);
1067 
1068 	/*
1069 	 * Traverse the capabilities list.
1070 	 */
1071 	while (ptr != 0) {
1072 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1073 			if (capreg != NULL)
1074 				*capreg = ptr;
1075 			return (0);
1076 		}
1077 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1078 	}
1079 
1080 	return (ENOENT);
1081 }
1082 
1083 /*
1084  * Support for MSI-X message interrupts.
1085  */
1086 void
1087 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1088 {
1089 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1090 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1091 	uint32_t offset;
1092 
1093 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1094 	offset = msix->msix_table_offset + index * 16;
1095 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1096 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1097 	bus_write_4(msix->msix_table_res, offset + 8, data);
1098 }
1099 
1100 void
1101 pci_mask_msix(device_t dev, u_int index)
1102 {
1103 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1104 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1105 	uint32_t offset, val;
1106 
1107 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1108 	offset = msix->msix_table_offset + index * 16 + 12;
1109 	val = bus_read_4(msix->msix_table_res, offset);
1110 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1111 		val |= PCIM_MSIX_VCTRL_MASK;
1112 		bus_write_4(msix->msix_table_res, offset, val);
1113 	}
1114 }
1115 
1116 void
1117 pci_unmask_msix(device_t dev, u_int index)
1118 {
1119 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1120 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1121 	uint32_t offset, val;
1122 
1123 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1124 	offset = msix->msix_table_offset + index * 16 + 12;
1125 	val = bus_read_4(msix->msix_table_res, offset);
1126 	if (val & PCIM_MSIX_VCTRL_MASK) {
1127 		val &= ~PCIM_MSIX_VCTRL_MASK;
1128 		bus_write_4(msix->msix_table_res, offset, val);
1129 	}
1130 }
1131 
1132 int
1133 pci_pending_msix(device_t dev, u_int index)
1134 {
1135 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1136 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1137 	uint32_t offset, bit;
1138 
1139 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1140 	offset = msix->msix_pba_offset + (index / 32) * 4;
1141 	bit = 1 << index % 32;
1142 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1143 }
1144 
1145 /*
1146  * Restore MSI-X registers and table during resume.  If MSI-X is
1147  * enabled then walk the virtual table to restore the actual MSI-X
1148  * table.
1149  */
1150 static void
1151 pci_resume_msix(device_t dev)
1152 {
1153 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1154 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1155 	struct msix_table_entry *mte;
1156 	struct msix_vector *mv;
1157 	int i;
1158 
1159 	if (msix->msix_alloc > 0) {
1160 		/* First, mask all vectors. */
1161 		for (i = 0; i < msix->msix_msgnum; i++)
1162 			pci_mask_msix(dev, i);
1163 
1164 		/* Second, program any messages with at least one handler. */
1165 		for (i = 0; i < msix->msix_table_len; i++) {
1166 			mte = &msix->msix_table[i];
1167 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1168 				continue;
1169 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1170 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1171 			pci_unmask_msix(dev, i);
1172 		}
1173 	}
1174 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1175 	    msix->msix_ctrl, 2);
1176 }
1177 
1178 /*
1179  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1180  * returned in *count.  After this function returns, each message will be
1181  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1182  */
1183 int
1184 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1185 {
1186 	struct pci_devinfo *dinfo = device_get_ivars(child);
1187 	pcicfgregs *cfg = &dinfo->cfg;
1188 	struct resource_list_entry *rle;
1189 	int actual, error, i, irq, max;
1190 
1191 	/* Don't let count == 0 get us into trouble. */
1192 	if (*count == 0)
1193 		return (EINVAL);
1194 
1195 	/* If rid 0 is allocated, then fail. */
1196 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1197 	if (rle != NULL && rle->res != NULL)
1198 		return (ENXIO);
1199 
1200 	/* Already have allocated messages? */
1201 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1202 		return (ENXIO);
1203 
1204 	/* If MSI is blacklisted for this system, fail. */
1205 	if (pci_msi_blacklisted())
1206 		return (ENXIO);
1207 
1208 	/* MSI-X capability present? */
1209 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1210 		return (ENODEV);
1211 
1212 	/* Make sure the appropriate BARs are mapped. */
1213 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1214 	    cfg->msix.msix_table_bar);
1215 	if (rle == NULL || rle->res == NULL ||
1216 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1217 		return (ENXIO);
1218 	cfg->msix.msix_table_res = rle->res;
1219 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1220 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1221 		    cfg->msix.msix_pba_bar);
1222 		if (rle == NULL || rle->res == NULL ||
1223 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1224 			return (ENXIO);
1225 	}
1226 	cfg->msix.msix_pba_res = rle->res;
1227 
1228 	if (bootverbose)
1229 		device_printf(child,
1230 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1231 		    *count, cfg->msix.msix_msgnum);
1232 	max = min(*count, cfg->msix.msix_msgnum);
1233 	for (i = 0; i < max; i++) {
1234 		/* Allocate a message. */
1235 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1236 		if (error)
1237 			break;
1238 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1239 		    irq, 1);
1240 	}
1241 	actual = i;
1242 
1243 	if (bootverbose) {
1244 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1245 		if (actual == 1)
1246 			device_printf(child, "using IRQ %lu for MSI-X\n",
1247 			    rle->start);
1248 		else {
1249 			int run;
1250 
1251 			/*
1252 			 * Be fancy and try to print contiguous runs of
1253 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1254 			 * 'run' is true if we are in a range.
1255 			 */
1256 			device_printf(child, "using IRQs %lu", rle->start);
1257 			irq = rle->start;
1258 			run = 0;
1259 			for (i = 1; i < actual; i++) {
1260 				rle = resource_list_find(&dinfo->resources,
1261 				    SYS_RES_IRQ, i + 1);
1262 
1263 				/* Still in a run? */
1264 				if (rle->start == irq + 1) {
1265 					run = 1;
1266 					irq++;
1267 					continue;
1268 				}
1269 
1270 				/* Finish previous range. */
1271 				if (run) {
1272 					printf("-%d", irq);
1273 					run = 0;
1274 				}
1275 
1276 				/* Start new range. */
1277 				printf(",%lu", rle->start);
1278 				irq = rle->start;
1279 			}
1280 
1281 			/* Unfinished range? */
1282 			if (run)
1283 				printf("-%d", irq);
1284 			printf(" for MSI-X\n");
1285 		}
1286 	}
1287 
1288 	/* Mask all vectors. */
1289 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1290 		pci_mask_msix(child, i);
1291 
1292 	/* Allocate and initialize vector data and virtual table. */
1293 	cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual,
1294 	    M_DEVBUF, M_WAITOK | M_ZERO);
1295 	cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual,
1296 	    M_DEVBUF, M_WAITOK | M_ZERO);
1297 	for (i = 0; i < actual; i++) {
1298 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1299 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1300 		cfg->msix.msix_table[i].mte_vector = i + 1;
1301 	}
1302 
1303 	/* Update control register to enable MSI-X. */
1304 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1305 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1306 	    cfg->msix.msix_ctrl, 2);
1307 
1308 	/* Update counts of alloc'd messages. */
1309 	cfg->msix.msix_alloc = actual;
1310 	cfg->msix.msix_table_len = actual;
1311 	*count = actual;
1312 	return (0);
1313 }
1314 
1315 /*
1316  * By default, pci_alloc_msix() will assign the allocated IRQ
1317  * resources consecutively to the first N messages in the MSI-X table.
1318  * However, device drivers may want to use different layouts if they
1319  * either receive fewer messages than they asked for, or they wish to
1320  * populate the MSI-X table sparsely.  This method allows the driver
1321  * to specify what layout it wants.  It must be called after a
1322  * successful pci_alloc_msix() but before any of the associated
1323  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1324  *
1325  * The 'vectors' array contains 'count' message vectors.  The array
1326  * maps directly to the MSI-X table in that index 0 in the array
1327  * specifies the vector for the first message in the MSI-X table, etc.
1328  * The vector value in each array index can either be 0 to indicate
1329  * that no vector should be assigned to a message slot, or it can be a
1330  * number from 1 to N (where N is the count returned from a
1331  * succcessful call to pci_alloc_msix()) to indicate which message
1332  * vector (IRQ) to be used for the corresponding message.
1333  *
1334  * On successful return, each message with a non-zero vector will have
1335  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1336  * 1.  Additionally, if any of the IRQs allocated via the previous
1337  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1338  * will be freed back to the system automatically.
1339  *
1340  * For example, suppose a driver has a MSI-X table with 6 messages and
1341  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1342  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1343  * C.  After the call to pci_alloc_msix(), the device will be setup to
1344  * have an MSI-X table of ABC--- (where - means no vector assigned).
1345  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1346  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1347  * be freed back to the system.  This device will also have valid
1348  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1349  *
1350  * In any case, the SYS_RES_IRQ rid X will always map to the message
1351  * at MSI-X table index X - 1 and will only be valid if a vector is
1352  * assigned to that table entry.
1353  */
1354 int
1355 pci_remap_msix_method(device_t dev, device_t child, int count,
1356     const u_int *vectors)
1357 {
1358 	struct pci_devinfo *dinfo = device_get_ivars(child);
1359 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1360 	struct resource_list_entry *rle;
1361 	int i, irq, j, *used;
1362 
1363 	/*
1364 	 * Have to have at least one message in the table but the
1365 	 * table can't be bigger than the actual MSI-X table in the
1366 	 * device.
1367 	 */
1368 	if (count == 0 || count > msix->msix_msgnum)
1369 		return (EINVAL);
1370 
1371 	/* Sanity check the vectors. */
1372 	for (i = 0; i < count; i++)
1373 		if (vectors[i] > msix->msix_alloc)
1374 			return (EINVAL);
1375 
1376 	/*
1377 	 * Make sure there aren't any holes in the vectors to be used.
1378 	 * It's a big pain to support it, and it doesn't really make
1379 	 * sense anyway.  Also, at least one vector must be used.
1380 	 */
1381 	used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1382 	    M_ZERO);
1383 	for (i = 0; i < count; i++)
1384 		if (vectors[i] != 0)
1385 			used[vectors[i] - 1] = 1;
1386 	for (i = 0; i < msix->msix_alloc - 1; i++)
1387 		if (used[i] == 0 && used[i + 1] == 1) {
1388 			free(used, M_DEVBUF);
1389 			return (EINVAL);
1390 		}
1391 	if (used[0] != 1) {
1392 		free(used, M_DEVBUF);
1393 		return (EINVAL);
1394 	}
1395 
1396 	/* Make sure none of the resources are allocated. */
1397 	for (i = 0; i < msix->msix_table_len; i++) {
1398 		if (msix->msix_table[i].mte_vector == 0)
1399 			continue;
1400 		if (msix->msix_table[i].mte_handlers > 0)
1401 			return (EBUSY);
1402 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1403 		KASSERT(rle != NULL, ("missing resource"));
1404 		if (rle->res != NULL)
1405 			return (EBUSY);
1406 	}
1407 
1408 	/* Free the existing resource list entries. */
1409 	for (i = 0; i < msix->msix_table_len; i++) {
1410 		if (msix->msix_table[i].mte_vector == 0)
1411 			continue;
1412 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1413 	}
1414 
1415 	/*
1416 	 * Build the new virtual table keeping track of which vectors are
1417 	 * used.
1418 	 */
1419 	free(msix->msix_table, M_DEVBUF);
1420 	msix->msix_table = malloc(sizeof(struct msix_table_entry) * count,
1421 	    M_DEVBUF, M_WAITOK | M_ZERO);
1422 	for (i = 0; i < count; i++)
1423 		msix->msix_table[i].mte_vector = vectors[i];
1424 	msix->msix_table_len = count;
1425 
1426 	/* Free any unused IRQs and resize the vectors array if necessary. */
1427 	j = msix->msix_alloc - 1;
1428 	if (used[j] == 0) {
1429 		struct msix_vector *vec;
1430 
1431 		while (used[j] == 0) {
1432 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1433 			    msix->msix_vectors[j].mv_irq);
1434 			j--;
1435 		}
1436 		vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1437 		    M_WAITOK);
1438 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1439 		    (j + 1));
1440 		free(msix->msix_vectors, M_DEVBUF);
1441 		msix->msix_vectors = vec;
1442 		msix->msix_alloc = j + 1;
1443 	}
1444 	free(used, M_DEVBUF);
1445 
1446 	/* Map the IRQs onto the rids. */
1447 	for (i = 0; i < count; i++) {
1448 		if (vectors[i] == 0)
1449 			continue;
1450 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1451 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1452 		    irq, 1);
1453 	}
1454 
1455 	if (bootverbose) {
1456 		device_printf(child, "Remapped MSI-X IRQs as: ");
1457 		for (i = 0; i < count; i++) {
1458 			if (i != 0)
1459 				printf(", ");
1460 			if (vectors[i] == 0)
1461 				printf("---");
1462 			else
1463 				printf("%d",
1464 				    msix->msix_vectors[vectors[i]].mv_irq);
1465 		}
1466 		printf("\n");
1467 	}
1468 
1469 	return (0);
1470 }
1471 
1472 static int
1473 pci_release_msix(device_t dev, device_t child)
1474 {
1475 	struct pci_devinfo *dinfo = device_get_ivars(child);
1476 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1477 	struct resource_list_entry *rle;
1478 	int i;
1479 
1480 	/* Do we have any messages to release? */
1481 	if (msix->msix_alloc == 0)
1482 		return (ENODEV);
1483 
1484 	/* Make sure none of the resources are allocated. */
1485 	for (i = 0; i < msix->msix_table_len; i++) {
1486 		if (msix->msix_table[i].mte_vector == 0)
1487 			continue;
1488 		if (msix->msix_table[i].mte_handlers > 0)
1489 			return (EBUSY);
1490 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1491 		KASSERT(rle != NULL, ("missing resource"));
1492 		if (rle->res != NULL)
1493 			return (EBUSY);
1494 	}
1495 
1496 	/* Update control register to disable MSI-X. */
1497 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1498 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1499 	    msix->msix_ctrl, 2);
1500 
1501 	/* Free the resource list entries. */
1502 	for (i = 0; i < msix->msix_table_len; i++) {
1503 		if (msix->msix_table[i].mte_vector == 0)
1504 			continue;
1505 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1506 	}
1507 	free(msix->msix_table, M_DEVBUF);
1508 	msix->msix_table_len = 0;
1509 
1510 	/* Release the IRQs. */
1511 	for (i = 0; i < msix->msix_alloc; i++)
1512 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1513 		    msix->msix_vectors[i].mv_irq);
1514 	free(msix->msix_vectors, M_DEVBUF);
1515 	msix->msix_alloc = 0;
1516 	return (0);
1517 }
1518 
1519 /*
1520  * Return the max supported MSI-X messages this device supports.
1521  * Basically, assuming the MD code can alloc messages, this function
1522  * should return the maximum value that pci_alloc_msix() can return.
1523  * Thus, it is subject to the tunables, etc.
1524  */
1525 int
1526 pci_msix_count_method(device_t dev, device_t child)
1527 {
1528 	struct pci_devinfo *dinfo = device_get_ivars(child);
1529 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1530 
1531 	if (pci_do_msix && msix->msix_location != 0)
1532 		return (msix->msix_msgnum);
1533 	return (0);
1534 }
1535 
1536 /*
1537  * Support for MSI message signalled interrupts.
1538  */
1539 void
1540 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1541 {
1542 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1543 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1544 
1545 	/* Write data and address values. */
1546 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1547 	    address & 0xffffffff, 4);
1548 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1549 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1550 		    address >> 32, 4);
1551 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1552 		    data, 2);
1553 	} else
1554 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1555 		    2);
1556 
1557 	/* Enable MSI in the control register. */
1558 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1559 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1560 	    2);
1561 }
1562 
1563 void
1564 pci_disable_msi(device_t dev)
1565 {
1566 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1567 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1568 
1569 	/* Disable MSI in the control register. */
1570 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1571 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1572 	    2);
1573 }
1574 
1575 /*
1576  * Restore MSI registers during resume.  If MSI is enabled then
1577  * restore the data and address registers in addition to the control
1578  * register.
1579  */
1580 static void
1581 pci_resume_msi(device_t dev)
1582 {
1583 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1584 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1585 	uint64_t address;
1586 	uint16_t data;
1587 
1588 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1589 		address = msi->msi_addr;
1590 		data = msi->msi_data;
1591 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1592 		    address & 0xffffffff, 4);
1593 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1594 			pci_write_config(dev, msi->msi_location +
1595 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1596 			pci_write_config(dev, msi->msi_location +
1597 			    PCIR_MSI_DATA_64BIT, data, 2);
1598 		} else
1599 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1600 			    data, 2);
1601 	}
1602 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1603 	    2);
1604 }
1605 
1606 int
1607 pci_remap_msi_irq(device_t dev, u_int irq)
1608 {
1609 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1610 	pcicfgregs *cfg = &dinfo->cfg;
1611 	struct resource_list_entry *rle;
1612 	struct msix_table_entry *mte;
1613 	struct msix_vector *mv;
1614 	device_t bus;
1615 	uint64_t addr;
1616 	uint32_t data;
1617 	int error, i, j;
1618 
1619 	bus = device_get_parent(dev);
1620 
1621 	/*
1622 	 * Handle MSI first.  We try to find this IRQ among our list
1623 	 * of MSI IRQs.  If we find it, we request updated address and
1624 	 * data registers and apply the results.
1625 	 */
1626 	if (cfg->msi.msi_alloc > 0) {
1627 
1628 		/* If we don't have any active handlers, nothing to do. */
1629 		if (cfg->msi.msi_handlers == 0)
1630 			return (0);
1631 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1632 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1633 			    i + 1);
1634 			if (rle->start == irq) {
1635 				error = PCIB_MAP_MSI(device_get_parent(bus),
1636 				    dev, irq, &addr, &data);
1637 				if (error)
1638 					return (error);
1639 				pci_disable_msi(dev);
1640 				dinfo->cfg.msi.msi_addr = addr;
1641 				dinfo->cfg.msi.msi_data = data;
1642 				pci_enable_msi(dev, addr, data);
1643 				return (0);
1644 			}
1645 		}
1646 		return (ENOENT);
1647 	}
1648 
1649 	/*
1650 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1651 	 * we request the updated mapping info.  If that works, we go
1652 	 * through all the slots that use this IRQ and update them.
1653 	 */
1654 	if (cfg->msix.msix_alloc > 0) {
1655 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1656 			mv = &cfg->msix.msix_vectors[i];
1657 			if (mv->mv_irq == irq) {
1658 				error = PCIB_MAP_MSI(device_get_parent(bus),
1659 				    dev, irq, &addr, &data);
1660 				if (error)
1661 					return (error);
1662 				mv->mv_address = addr;
1663 				mv->mv_data = data;
1664 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1665 					mte = &cfg->msix.msix_table[j];
1666 					if (mte->mte_vector != i + 1)
1667 						continue;
1668 					if (mte->mte_handlers == 0)
1669 						continue;
1670 					pci_mask_msix(dev, j);
1671 					pci_enable_msix(dev, j, addr, data);
1672 					pci_unmask_msix(dev, j);
1673 				}
1674 			}
1675 		}
1676 		return (ENOENT);
1677 	}
1678 
1679 	return (ENOENT);
1680 }
1681 
1682 /*
1683  * Returns true if the specified device is blacklisted because MSI
1684  * doesn't work.
1685  */
1686 int
1687 pci_msi_device_blacklisted(device_t dev)
1688 {
1689 	struct pci_quirk *q;
1690 
1691 	if (!pci_honor_msi_blacklist)
1692 		return (0);
1693 
1694 	for (q = &pci_quirks[0]; q->devid; q++) {
1695 		if (q->devid == pci_get_devid(dev) &&
1696 		    q->type == PCI_QUIRK_DISABLE_MSI)
1697 			return (1);
1698 	}
1699 	return (0);
1700 }
1701 
1702 /*
1703  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1704  * we just check for blacklisted chipsets as represented by the
1705  * host-PCI bridge at device 0:0:0.  In the future, it may become
1706  * necessary to check other system attributes, such as the kenv values
1707  * that give the motherboard manufacturer and model number.
1708  */
1709 static int
1710 pci_msi_blacklisted(void)
1711 {
1712 	device_t dev;
1713 
1714 	if (!pci_honor_msi_blacklist)
1715 		return (0);
1716 
1717 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1718 	if (!(pcie_chipset || pcix_chipset))
1719 		return (1);
1720 
1721 	dev = pci_find_bsf(0, 0, 0);
1722 	if (dev != NULL)
1723 		return (pci_msi_device_blacklisted(dev));
1724 	return (0);
1725 }
1726 
1727 /*
1728  * Attempt to allocate *count MSI messages.  The actual number allocated is
1729  * returned in *count.  After this function returns, each message will be
1730  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
1731  */
1732 int
1733 pci_alloc_msi_method(device_t dev, device_t child, int *count)
1734 {
1735 	struct pci_devinfo *dinfo = device_get_ivars(child);
1736 	pcicfgregs *cfg = &dinfo->cfg;
1737 	struct resource_list_entry *rle;
1738 	int actual, error, i, irqs[32];
1739 	uint16_t ctrl;
1740 
1741 	/* Don't let count == 0 get us into trouble. */
1742 	if (*count == 0)
1743 		return (EINVAL);
1744 
1745 	/* If rid 0 is allocated, then fail. */
1746 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1747 	if (rle != NULL && rle->res != NULL)
1748 		return (ENXIO);
1749 
1750 	/* Already have allocated messages? */
1751 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1752 		return (ENXIO);
1753 
1754 	/* If MSI is blacklisted for this system, fail. */
1755 	if (pci_msi_blacklisted())
1756 		return (ENXIO);
1757 
1758 	/* MSI capability present? */
1759 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
1760 		return (ENODEV);
1761 
1762 	if (bootverbose)
1763 		device_printf(child,
1764 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1765 		    *count, cfg->msi.msi_msgnum);
1766 
1767 	/* Don't ask for more than the device supports. */
1768 	actual = min(*count, cfg->msi.msi_msgnum);
1769 
1770 	/* Don't ask for more than 32 messages. */
1771 	actual = min(actual, 32);
1772 
1773 	/* MSI requires power of 2 number of messages. */
1774 	if (!powerof2(actual))
1775 		return (EINVAL);
1776 
1777 	for (;;) {
1778 		/* Try to allocate N messages. */
1779 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
1780 		    cfg->msi.msi_msgnum, irqs);
1781 		if (error == 0)
1782 			break;
1783 		if (actual == 1)
1784 			return (error);
1785 
1786 		/* Try N / 2. */
1787 		actual >>= 1;
1788 	}
1789 
1790 	/*
1791 	 * We now have N actual messages mapped onto SYS_RES_IRQ
1792 	 * resources in the irqs[] array, so add new resources
1793 	 * starting at rid 1.
1794 	 */
1795 	for (i = 0; i < actual; i++)
1796 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1797 		    irqs[i], irqs[i], 1);
1798 
1799 	if (bootverbose) {
1800 		if (actual == 1)
1801 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
1802 		else {
1803 			int run;
1804 
1805 			/*
1806 			 * Be fancy and try to print contiguous runs
1807 			 * of IRQ values as ranges.  'run' is true if
1808 			 * we are in a range.
1809 			 */
1810 			device_printf(child, "using IRQs %d", irqs[0]);
1811 			run = 0;
1812 			for (i = 1; i < actual; i++) {
1813 
1814 				/* Still in a run? */
1815 				if (irqs[i] == irqs[i - 1] + 1) {
1816 					run = 1;
1817 					continue;
1818 				}
1819 
1820 				/* Finish previous range. */
1821 				if (run) {
1822 					printf("-%d", irqs[i - 1]);
1823 					run = 0;
1824 				}
1825 
1826 				/* Start new range. */
1827 				printf(",%d", irqs[i]);
1828 			}
1829 
1830 			/* Unfinished range? */
1831 			if (run)
1832 				printf("-%d", irqs[actual - 1]);
1833 			printf(" for MSI\n");
1834 		}
1835 	}
1836 
1837 	/* Update control register with actual count. */
1838 	ctrl = cfg->msi.msi_ctrl;
1839 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1840 	ctrl |= (ffs(actual) - 1) << 4;
1841 	cfg->msi.msi_ctrl = ctrl;
1842 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1843 
1844 	/* Update counts of alloc'd messages. */
1845 	cfg->msi.msi_alloc = actual;
1846 	cfg->msi.msi_handlers = 0;
1847 	*count = actual;
1848 	return (0);
1849 }
1850 
1851 /* Release the MSI messages associated with this device. */
1852 int
1853 pci_release_msi_method(device_t dev, device_t child)
1854 {
1855 	struct pci_devinfo *dinfo = device_get_ivars(child);
1856 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1857 	struct resource_list_entry *rle;
1858 	int error, i, irqs[32];
1859 
1860 	/* Try MSI-X first. */
1861 	error = pci_release_msix(dev, child);
1862 	if (error != ENODEV)
1863 		return (error);
1864 
1865 	/* Do we have any messages to release? */
1866 	if (msi->msi_alloc == 0)
1867 		return (ENODEV);
1868 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
1869 
1870 	/* Make sure none of the resources are allocated. */
1871 	if (msi->msi_handlers > 0)
1872 		return (EBUSY);
1873 	for (i = 0; i < msi->msi_alloc; i++) {
1874 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1875 		KASSERT(rle != NULL, ("missing MSI resource"));
1876 		if (rle->res != NULL)
1877 			return (EBUSY);
1878 		irqs[i] = rle->start;
1879 	}
1880 
1881 	/* Update control register with 0 count. */
1882 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
1883 	    ("%s: MSI still enabled", __func__));
1884 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
1885 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
1886 	    msi->msi_ctrl, 2);
1887 
1888 	/* Release the messages. */
1889 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
1890 	for (i = 0; i < msi->msi_alloc; i++)
1891 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1892 
1893 	/* Update alloc count. */
1894 	msi->msi_alloc = 0;
1895 	msi->msi_addr = 0;
1896 	msi->msi_data = 0;
1897 	return (0);
1898 }
1899 
1900 /*
1901  * Return the max supported MSI messages this device supports.
1902  * Basically, assuming the MD code can alloc messages, this function
1903  * should return the maximum value that pci_alloc_msi() can return.
1904  * Thus, it is subject to the tunables, etc.
1905  */
1906 int
1907 pci_msi_count_method(device_t dev, device_t child)
1908 {
1909 	struct pci_devinfo *dinfo = device_get_ivars(child);
1910 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1911 
1912 	if (pci_do_msi && msi->msi_location != 0)
1913 		return (msi->msi_msgnum);
1914 	return (0);
1915 }
1916 
1917 /* free pcicfgregs structure and all depending data structures */
1918 
1919 int
1920 pci_freecfg(struct pci_devinfo *dinfo)
1921 {
1922 	struct devlist *devlist_head;
1923 	int i;
1924 
1925 	devlist_head = &pci_devq;
1926 
1927 	if (dinfo->cfg.vpd.vpd_reg) {
1928 		free(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
1929 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
1930 			free(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
1931 		free(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
1932 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
1933 			free(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
1934 		free(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
1935 	}
1936 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
1937 	free(dinfo, M_DEVBUF);
1938 
1939 	/* increment the generation count */
1940 	pci_generation++;
1941 
1942 	/* we're losing one device */
1943 	pci_numdevs--;
1944 	return (0);
1945 }
1946 
1947 /*
1948  * PCI power manangement
1949  */
1950 int
1951 pci_set_powerstate_method(device_t dev, device_t child, int state)
1952 {
1953 	struct pci_devinfo *dinfo = device_get_ivars(child);
1954 	pcicfgregs *cfg = &dinfo->cfg;
1955 	uint16_t status;
1956 	int result, oldstate, highest, delay;
1957 
1958 	if (cfg->pp.pp_cap == 0)
1959 		return (EOPNOTSUPP);
1960 
1961 	/*
1962 	 * Optimize a no state change request away.  While it would be OK to
1963 	 * write to the hardware in theory, some devices have shown odd
1964 	 * behavior when going from D3 -> D3.
1965 	 */
1966 	oldstate = pci_get_powerstate(child);
1967 	if (oldstate == state)
1968 		return (0);
1969 
1970 	/*
1971 	 * The PCI power management specification states that after a state
1972 	 * transition between PCI power states, system software must
1973 	 * guarantee a minimal delay before the function accesses the device.
1974 	 * Compute the worst case delay that we need to guarantee before we
1975 	 * access the device.  Many devices will be responsive much more
1976 	 * quickly than this delay, but there are some that don't respond
1977 	 * instantly to state changes.  Transitions to/from D3 state require
1978 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
1979 	 * is done below with DELAY rather than a sleeper function because
1980 	 * this function can be called from contexts where we cannot sleep.
1981 	 */
1982 	highest = (oldstate > state) ? oldstate : state;
1983 	if (highest == PCI_POWERSTATE_D3)
1984 	    delay = 10000;
1985 	else if (highest == PCI_POWERSTATE_D2)
1986 	    delay = 200;
1987 	else
1988 	    delay = 0;
1989 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
1990 	    & ~PCIM_PSTAT_DMASK;
1991 	result = 0;
1992 	switch (state) {
1993 	case PCI_POWERSTATE_D0:
1994 		status |= PCIM_PSTAT_D0;
1995 		break;
1996 	case PCI_POWERSTATE_D1:
1997 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
1998 			return (EOPNOTSUPP);
1999 		status |= PCIM_PSTAT_D1;
2000 		break;
2001 	case PCI_POWERSTATE_D2:
2002 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2003 			return (EOPNOTSUPP);
2004 		status |= PCIM_PSTAT_D2;
2005 		break;
2006 	case PCI_POWERSTATE_D3:
2007 		status |= PCIM_PSTAT_D3;
2008 		break;
2009 	default:
2010 		return (EINVAL);
2011 	}
2012 
2013 	if (bootverbose)
2014 		printf(
2015 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2016 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2017 		    dinfo->cfg.func, oldstate, state);
2018 
2019 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2020 	if (delay)
2021 		DELAY(delay);
2022 	return (0);
2023 }
2024 
2025 int
2026 pci_get_powerstate_method(device_t dev, device_t child)
2027 {
2028 	struct pci_devinfo *dinfo = device_get_ivars(child);
2029 	pcicfgregs *cfg = &dinfo->cfg;
2030 	uint16_t status;
2031 	int result;
2032 
2033 	if (cfg->pp.pp_cap != 0) {
2034 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2035 		switch (status & PCIM_PSTAT_DMASK) {
2036 		case PCIM_PSTAT_D0:
2037 			result = PCI_POWERSTATE_D0;
2038 			break;
2039 		case PCIM_PSTAT_D1:
2040 			result = PCI_POWERSTATE_D1;
2041 			break;
2042 		case PCIM_PSTAT_D2:
2043 			result = PCI_POWERSTATE_D2;
2044 			break;
2045 		case PCIM_PSTAT_D3:
2046 			result = PCI_POWERSTATE_D3;
2047 			break;
2048 		default:
2049 			result = PCI_POWERSTATE_UNKNOWN;
2050 			break;
2051 		}
2052 	} else {
2053 		/* No support, device is always at D0 */
2054 		result = PCI_POWERSTATE_D0;
2055 	}
2056 	return (result);
2057 }
2058 
2059 /*
2060  * Some convenience functions for PCI device drivers.
2061  */
2062 
2063 static __inline void
2064 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2065 {
2066 	uint16_t	command;
2067 
2068 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2069 	command |= bit;
2070 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2071 }
2072 
2073 static __inline void
2074 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2075 {
2076 	uint16_t	command;
2077 
2078 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2079 	command &= ~bit;
2080 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2081 }
2082 
2083 int
2084 pci_enable_busmaster_method(device_t dev, device_t child)
2085 {
2086 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2087 	return (0);
2088 }
2089 
2090 int
2091 pci_disable_busmaster_method(device_t dev, device_t child)
2092 {
2093 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2094 	return (0);
2095 }
2096 
2097 int
2098 pci_enable_io_method(device_t dev, device_t child, int space)
2099 {
2100 	uint16_t command;
2101 	uint16_t bit;
2102 	char *error;
2103 
2104 	bit = 0;
2105 	error = NULL;
2106 
2107 	switch(space) {
2108 	case SYS_RES_IOPORT:
2109 		bit = PCIM_CMD_PORTEN;
2110 		error = "port";
2111 		break;
2112 	case SYS_RES_MEMORY:
2113 		bit = PCIM_CMD_MEMEN;
2114 		error = "memory";
2115 		break;
2116 	default:
2117 		return (EINVAL);
2118 	}
2119 	pci_set_command_bit(dev, child, bit);
2120 	/* Some devices seem to need a brief stall here, what do to? */
2121 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2122 	if (command & bit)
2123 		return (0);
2124 	device_printf(child, "failed to enable %s mapping!\n", error);
2125 	return (ENXIO);
2126 }
2127 
2128 int
2129 pci_disable_io_method(device_t dev, device_t child, int space)
2130 {
2131 	uint16_t command;
2132 	uint16_t bit;
2133 	char *error;
2134 
2135 	bit = 0;
2136 	error = NULL;
2137 
2138 	switch(space) {
2139 	case SYS_RES_IOPORT:
2140 		bit = PCIM_CMD_PORTEN;
2141 		error = "port";
2142 		break;
2143 	case SYS_RES_MEMORY:
2144 		bit = PCIM_CMD_MEMEN;
2145 		error = "memory";
2146 		break;
2147 	default:
2148 		return (EINVAL);
2149 	}
2150 	pci_clear_command_bit(dev, child, bit);
2151 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2152 	if (command & bit) {
2153 		device_printf(child, "failed to disable %s mapping!\n", error);
2154 		return (ENXIO);
2155 	}
2156 	return (0);
2157 }
2158 
2159 /*
2160  * New style pci driver.  Parent device is either a pci-host-bridge or a
2161  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2162  */
2163 
2164 void
2165 pci_print_verbose(struct pci_devinfo *dinfo)
2166 {
2167 
2168 	if (bootverbose) {
2169 		pcicfgregs *cfg = &dinfo->cfg;
2170 
2171 		printf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2172 		    cfg->vendor, cfg->device, cfg->revid);
2173 		printf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2174 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2175 		printf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2176 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2177 		    cfg->mfdev);
2178 		printf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2179 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2180 		printf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2181 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2182 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2183 		if (cfg->intpin > 0)
2184 			printf("\tintpin=%c, irq=%d\n",
2185 			    cfg->intpin +'a' -1, cfg->intline);
2186 		if (cfg->pp.pp_cap) {
2187 			uint16_t status;
2188 
2189 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2190 			printf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2191 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2192 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2193 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2194 			    status & PCIM_PSTAT_DMASK);
2195 		}
2196 		if (cfg->msi.msi_location) {
2197 			int ctrl;
2198 
2199 			ctrl = cfg->msi.msi_ctrl;
2200 			printf("\tMSI supports %d message%s%s%s\n",
2201 			    cfg->msi.msi_msgnum,
2202 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2203 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2204 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2205 		}
2206 		if (cfg->msix.msix_location) {
2207 			printf("\tMSI-X supports %d message%s ",
2208 			    cfg->msix.msix_msgnum,
2209 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2210 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2211 				printf("in map 0x%x\n",
2212 				    cfg->msix.msix_table_bar);
2213 			else
2214 				printf("in maps 0x%x and 0x%x\n",
2215 				    cfg->msix.msix_table_bar,
2216 				    cfg->msix.msix_pba_bar);
2217 		}
2218 	}
2219 }
2220 
2221 static int
2222 pci_porten(device_t pcib, int b, int s, int f)
2223 {
2224 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2225 		& PCIM_CMD_PORTEN) != 0;
2226 }
2227 
2228 static int
2229 pci_memen(device_t pcib, int b, int s, int f)
2230 {
2231 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2232 		& PCIM_CMD_MEMEN) != 0;
2233 }
2234 
2235 /*
2236  * Add a resource based on a pci map register. Return 1 if the map
2237  * register is a 32bit map register or 2 if it is a 64bit register.
2238  */
2239 static int
2240 pci_add_map(device_t pcib, device_t bus, device_t dev,
2241     int b, int s, int f, int reg, struct resource_list *rl, int force,
2242     int prefetch)
2243 {
2244 	uint32_t map;
2245 	pci_addr_t base;
2246 	pci_addr_t start, end, count;
2247 	uint8_t ln2size;
2248 	uint8_t ln2range;
2249 	uint32_t testval;
2250 	uint16_t cmd;
2251 	int type;
2252 	int barlen;
2253 	struct resource *res;
2254 
2255 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2256 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2257 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2258 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2259 
2260 	if (PCI_BAR_MEM(map))
2261 		type = SYS_RES_MEMORY;
2262 	else
2263 		type = SYS_RES_IOPORT;
2264 	ln2size = pci_mapsize(testval);
2265 	ln2range = pci_maprange(testval);
2266 	base = pci_mapbase(map);
2267 	barlen = ln2range == 64 ? 2 : 1;
2268 
2269 	/*
2270 	 * For I/O registers, if bottom bit is set, and the next bit up
2271 	 * isn't clear, we know we have a BAR that doesn't conform to the
2272 	 * spec, so ignore it.  Also, sanity check the size of the data
2273 	 * areas to the type of memory involved.  Memory must be at least
2274 	 * 16 bytes in size, while I/O ranges must be at least 4.
2275 	 */
2276 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2277 		return (barlen);
2278 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2279 	    (type == SYS_RES_IOPORT && ln2size < 2))
2280 		return (barlen);
2281 
2282 	if (ln2range == 64)
2283 		/* Read the other half of a 64bit map register */
2284 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2285 	if (bootverbose) {
2286 		printf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2287 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2288 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2289 			printf(", port disabled\n");
2290 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2291 			printf(", memory disabled\n");
2292 		else
2293 			printf(", enabled\n");
2294 	}
2295 
2296 	/*
2297 	 * If base is 0, then we have problems.  It is best to ignore
2298 	 * such entries for the moment.  These will be allocated later if
2299 	 * the driver specifically requests them.  However, some
2300 	 * removable busses look better when all resources are allocated,
2301 	 * so allow '0' to be overriden.
2302 	 *
2303 	 * Similarly treat maps whose values is the same as the test value
2304 	 * read back.  These maps have had all f's written to them by the
2305 	 * BIOS in an attempt to disable the resources.
2306 	 */
2307 	if (!force && (base == 0 || map == testval))
2308 		return (barlen);
2309 	if ((u_long)base != base) {
2310 		device_printf(bus,
2311 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2312 		    pci_get_domain(dev), b, s, f, reg);
2313 		return (barlen);
2314 	}
2315 
2316 	/*
2317 	 * This code theoretically does the right thing, but has
2318 	 * undesirable side effects in some cases where peripherals
2319 	 * respond oddly to having these bits enabled.  Let the user
2320 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2321 	 * default).
2322 	 */
2323 	if (pci_enable_io_modes) {
2324 		/* Turn on resources that have been left off by a lazy BIOS */
2325 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2326 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2327 			cmd |= PCIM_CMD_PORTEN;
2328 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2329 		}
2330 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2331 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2332 			cmd |= PCIM_CMD_MEMEN;
2333 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2334 		}
2335 	} else {
2336 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2337 			return (barlen);
2338 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2339 			return (barlen);
2340 	}
2341 
2342 	count = 1 << ln2size;
2343 	if (base == 0 || base == pci_mapbase(testval)) {
2344 		start = 0;	/* Let the parent deside */
2345 		end = ~0ULL;
2346 	} else {
2347 		start = base;
2348 		end = base + (1 << ln2size) - 1;
2349 	}
2350 	resource_list_add(rl, type, reg, start, end, count);
2351 
2352 	/*
2353 	 * Not quite sure what to do on failure of allocating the resource
2354 	 * since I can postulate several right answers.
2355 	 */
2356 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2357 	    prefetch ? RF_PREFETCHABLE : 0);
2358 	if (res == NULL)
2359 		return (barlen);
2360 	start = rman_get_start(res);
2361 	if ((u_long)start != start) {
2362 		/* Wait a minute!  this platform can't do this address. */
2363 		device_printf(bus,
2364 		    "pci%d:%d.%d.%x bar %#x start %#jx, too many bits.",
2365 		    pci_get_domain(dev), b, s, f, reg, (uintmax_t)start);
2366 		resource_list_release(rl, bus, dev, type, reg, res);
2367 		return (barlen);
2368 	}
2369 	pci_write_config(dev, reg, start, 4);
2370 	if (ln2range == 64)
2371 		pci_write_config(dev, reg + 4, start >> 32, 4);
2372 	return (barlen);
2373 }
2374 
2375 /*
2376  * For ATA devices we need to decide early what addressing mode to use.
2377  * Legacy demands that the primary and secondary ATA ports sits on the
2378  * same addresses that old ISA hardware did. This dictates that we use
2379  * those addresses and ignore the BAR's if we cannot set PCI native
2380  * addressing mode.
2381  */
2382 static void
2383 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2384     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2385 {
2386 	int rid, type, progif;
2387 #if 0
2388 	/* if this device supports PCI native addressing use it */
2389 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2390 	if ((progif & 0x8a) == 0x8a) {
2391 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2392 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2393 			printf("Trying ATA native PCI addressing mode\n");
2394 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2395 		}
2396 	}
2397 #endif
2398 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2399 	type = SYS_RES_IOPORT;
2400 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2401 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2402 		    prefetchmask & (1 << 0));
2403 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2404 		    prefetchmask & (1 << 1));
2405 	} else {
2406 		rid = PCIR_BAR(0);
2407 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2408 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2409 		    0);
2410 		rid = PCIR_BAR(1);
2411 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2412 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2413 		    0);
2414 	}
2415 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2416 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2417 		    prefetchmask & (1 << 2));
2418 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2419 		    prefetchmask & (1 << 3));
2420 	} else {
2421 		rid = PCIR_BAR(2);
2422 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2423 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2424 		    0);
2425 		rid = PCIR_BAR(3);
2426 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2427 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2428 		    0);
2429 	}
2430 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2431 	    prefetchmask & (1 << 4));
2432 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2433 	    prefetchmask & (1 << 5));
2434 }
2435 
2436 static void
2437 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2438 {
2439 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2440 	pcicfgregs *cfg = &dinfo->cfg;
2441 	char tunable_name[64];
2442 	int irq;
2443 
2444 	/* Has to have an intpin to have an interrupt. */
2445 	if (cfg->intpin == 0)
2446 		return;
2447 
2448 	/* Let the user override the IRQ with a tunable. */
2449 	irq = PCI_INVALID_IRQ;
2450 	snprintf(tunable_name, sizeof(tunable_name),
2451 	    "hw.pci%d.%d.%d.INT%c.irq",
2452 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2453 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2454 		irq = PCI_INVALID_IRQ;
2455 
2456 	/*
2457 	 * If we didn't get an IRQ via the tunable, then we either use the
2458 	 * IRQ value in the intline register or we ask the bus to route an
2459 	 * interrupt for us.  If force_route is true, then we only use the
2460 	 * value in the intline register if the bus was unable to assign an
2461 	 * IRQ.
2462 	 */
2463 	if (!PCI_INTERRUPT_VALID(irq)) {
2464 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2465 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2466 		if (!PCI_INTERRUPT_VALID(irq))
2467 			irq = cfg->intline;
2468 	}
2469 
2470 	/* If after all that we don't have an IRQ, just bail. */
2471 	if (!PCI_INTERRUPT_VALID(irq))
2472 		return;
2473 
2474 	/* Update the config register if it changed. */
2475 	if (irq != cfg->intline) {
2476 		cfg->intline = irq;
2477 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2478 	}
2479 
2480 	/* Add this IRQ as rid 0 interrupt resource. */
2481 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2482 }
2483 
2484 void
2485 pci_add_resources(device_t bus, device_t dev, int force, uint32_t prefetchmask)
2486 {
2487 	device_t pcib;
2488 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2489 	pcicfgregs *cfg = &dinfo->cfg;
2490 	struct resource_list *rl = &dinfo->resources;
2491 	struct pci_quirk *q;
2492 	int b, i, f, s;
2493 
2494 	pcib = device_get_parent(bus);
2495 
2496 	b = cfg->bus;
2497 	s = cfg->slot;
2498 	f = cfg->func;
2499 
2500 	/* ATA devices needs special map treatment */
2501 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2502 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2503 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2504 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2505 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2506 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2507 	else
2508 		for (i = 0; i < cfg->nummaps;)
2509 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2510 			    rl, force, prefetchmask & (1 << i));
2511 
2512 	/*
2513 	 * Add additional, quirked resources.
2514 	 */
2515 	for (q = &pci_quirks[0]; q->devid; q++) {
2516 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2517 		    && q->type == PCI_QUIRK_MAP_REG)
2518 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2519 			  force, 0);
2520 	}
2521 
2522 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2523 #ifdef __PCI_REROUTE_INTERRUPT
2524 		/*
2525 		 * Try to re-route interrupts. Sometimes the BIOS or
2526 		 * firmware may leave bogus values in these registers.
2527 		 * If the re-route fails, then just stick with what we
2528 		 * have.
2529 		 */
2530 		pci_assign_interrupt(bus, dev, 1);
2531 #else
2532 		pci_assign_interrupt(bus, dev, 0);
2533 #endif
2534 	}
2535 }
2536 
2537 void
2538 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2539 {
2540 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2541 	device_t pcib = device_get_parent(dev);
2542 	struct pci_devinfo *dinfo;
2543 	int maxslots;
2544 	int s, f, pcifunchigh;
2545 	uint8_t hdrtype;
2546 
2547 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2548 	    ("dinfo_size too small"));
2549 	maxslots = PCIB_MAXSLOTS(pcib);
2550 	for (s = 0; s <= maxslots; s++) {
2551 		pcifunchigh = 0;
2552 		f = 0;
2553 		DELAY(1);
2554 		hdrtype = REG(PCIR_HDRTYPE, 1);
2555 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2556 			continue;
2557 		if (hdrtype & PCIM_MFDEV)
2558 			pcifunchigh = PCI_FUNCMAX;
2559 		for (f = 0; f <= pcifunchigh; f++) {
2560 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2561 			    dinfo_size);
2562 			if (dinfo != NULL) {
2563 				pci_add_child(dev, dinfo);
2564 			}
2565 		}
2566 	}
2567 #undef REG
2568 }
2569 
2570 void
2571 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2572 {
2573 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2574 	device_set_ivars(dinfo->cfg.dev, dinfo);
2575 	resource_list_init(&dinfo->resources);
2576 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2577 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2578 	pci_print_verbose(dinfo);
2579 	pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
2580 }
2581 
2582 static int
2583 pci_probe(device_t dev)
2584 {
2585 
2586 	device_set_desc(dev, "PCI bus");
2587 
2588 	/* Allow other subclasses to override this driver. */
2589 	return (-1000);
2590 }
2591 
2592 static int
2593 pci_attach(device_t dev)
2594 {
2595 	int busno, domain;
2596 
2597 	/*
2598 	 * Since there can be multiple independantly numbered PCI
2599 	 * busses on systems with multiple PCI domains, we can't use
2600 	 * the unit number to decide which bus we are probing. We ask
2601 	 * the parent pcib what our domain and bus numbers are.
2602 	 */
2603 	domain = pcib_get_domain(dev);
2604 	busno = pcib_get_bus(dev);
2605 	if (bootverbose)
2606 		device_printf(dev, "domain=%d, physical bus=%d\n",
2607 		    domain, busno);
2608 
2609 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2610 
2611 	return (bus_generic_attach(dev));
2612 }
2613 
2614 int
2615 pci_suspend(device_t dev)
2616 {
2617 	int dstate, error, i, numdevs;
2618 	device_t acpi_dev, child, *devlist;
2619 	struct pci_devinfo *dinfo;
2620 
2621 	/*
2622 	 * Save the PCI configuration space for each child and set the
2623 	 * device in the appropriate power state for this sleep state.
2624 	 */
2625 	acpi_dev = NULL;
2626 	if (pci_do_power_resume)
2627 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2628 	device_get_children(dev, &devlist, &numdevs);
2629 	for (i = 0; i < numdevs; i++) {
2630 		child = devlist[i];
2631 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2632 		pci_cfg_save(child, dinfo, 0);
2633 	}
2634 
2635 	/* Suspend devices before potentially powering them down. */
2636 	error = bus_generic_suspend(dev);
2637 	if (error) {
2638 		free(devlist, M_TEMP);
2639 		return (error);
2640 	}
2641 
2642 	/*
2643 	 * Always set the device to D3.  If ACPI suggests a different
2644 	 * power state, use it instead.  If ACPI is not present, the
2645 	 * firmware is responsible for managing device power.  Skip
2646 	 * children who aren't attached since they are powered down
2647 	 * separately.  Only manage type 0 devices for now.
2648 	 */
2649 	for (i = 0; acpi_dev && i < numdevs; i++) {
2650 		child = devlist[i];
2651 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2652 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2653 			dstate = PCI_POWERSTATE_D3;
2654 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2655 			pci_set_powerstate(child, dstate);
2656 		}
2657 	}
2658 	free(devlist, M_TEMP);
2659 	return (0);
2660 }
2661 
2662 int
2663 pci_resume(device_t dev)
2664 {
2665 	int i, numdevs;
2666 	device_t acpi_dev, child, *devlist;
2667 	struct pci_devinfo *dinfo;
2668 
2669 	/*
2670 	 * Set each child to D0 and restore its PCI configuration space.
2671 	 */
2672 	acpi_dev = NULL;
2673 	if (pci_do_power_resume)
2674 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2675 	device_get_children(dev, &devlist, &numdevs);
2676 	for (i = 0; i < numdevs; i++) {
2677 		/*
2678 		 * Notify ACPI we're going to D0 but ignore the result.  If
2679 		 * ACPI is not present, the firmware is responsible for
2680 		 * managing device power.  Only manage type 0 devices for now.
2681 		 */
2682 		child = devlist[i];
2683 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2684 		if (acpi_dev && device_is_attached(child) &&
2685 		    dinfo->cfg.hdrtype == 0) {
2686 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2687 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2688 		}
2689 
2690 		/* Now the device is powered up, restore its config space. */
2691 		pci_cfg_restore(child, dinfo);
2692 	}
2693 	free(devlist, M_TEMP);
2694 	return (bus_generic_resume(dev));
2695 }
2696 
2697 static void
2698 pci_load_vendor_data(void)
2699 {
2700 	caddr_t vendordata, info;
2701 
2702 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2703 		info = preload_search_info(vendordata, MODINFO_ADDR);
2704 		pci_vendordata = *(char **)info;
2705 		info = preload_search_info(vendordata, MODINFO_SIZE);
2706 		pci_vendordata_size = *(size_t *)info;
2707 		/* terminate the database */
2708 		pci_vendordata[pci_vendordata_size] = '\n';
2709 	}
2710 }
2711 
2712 void
2713 pci_driver_added(device_t dev, driver_t *driver)
2714 {
2715 	int numdevs;
2716 	device_t *devlist;
2717 	device_t child;
2718 	struct pci_devinfo *dinfo;
2719 	int i;
2720 
2721 	if (bootverbose)
2722 		device_printf(dev, "driver added\n");
2723 	DEVICE_IDENTIFY(driver, dev);
2724 	device_get_children(dev, &devlist, &numdevs);
2725 	for (i = 0; i < numdevs; i++) {
2726 		child = devlist[i];
2727 		if (device_get_state(child) != DS_NOTPRESENT)
2728 			continue;
2729 		dinfo = device_get_ivars(child);
2730 		pci_print_verbose(dinfo);
2731 		if (bootverbose)
2732 			printf("pci%d:%d:%d:%d: reprobing on driver added\n",
2733 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2734 			    dinfo->cfg.func);
2735 		pci_cfg_restore(child, dinfo);
2736 		if (device_probe_and_attach(child) != 0)
2737 			pci_cfg_save(child, dinfo, 1);
2738 	}
2739 	free(devlist, M_TEMP);
2740 }
2741 
2742 int
2743 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2744     driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep)
2745 {
2746 	struct pci_devinfo *dinfo;
2747 	struct msix_table_entry *mte;
2748 	struct msix_vector *mv;
2749 	uint64_t addr;
2750 	uint32_t data;
2751 	void *cookie;
2752 	int error, rid;
2753 
2754 	error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr,
2755 	    arg, &cookie);
2756 	if (error)
2757 		return (error);
2758 
2759 	/*
2760 	 * If this is a direct child, check to see if the interrupt is
2761 	 * MSI or MSI-X.  If so, ask our parent to map the MSI and give
2762 	 * us the address and data register values.  If we fail for some
2763 	 * reason, teardown the interrupt handler.
2764 	 */
2765 	rid = rman_get_rid(irq);
2766 	if (device_get_parent(child) == dev && rid > 0) {
2767 		dinfo = device_get_ivars(child);
2768 		if (dinfo->cfg.msi.msi_alloc > 0) {
2769 			if (dinfo->cfg.msi.msi_addr == 0) {
2770 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
2771 			    ("MSI has handlers, but vectors not mapped"));
2772 				error = PCIB_MAP_MSI(device_get_parent(dev),
2773 				    child, rman_get_start(irq), &addr, &data);
2774 				if (error)
2775 					goto bad;
2776 				dinfo->cfg.msi.msi_addr = addr;
2777 				dinfo->cfg.msi.msi_data = data;
2778 				pci_enable_msi(child, addr, data);
2779 			}
2780 			dinfo->cfg.msi.msi_handlers++;
2781 		} else {
2782 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2783 			    ("No MSI or MSI-X interrupts allocated"));
2784 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2785 			    ("MSI-X index too high"));
2786 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2787 			KASSERT(mte->mte_vector != 0, ("no message vector"));
2788 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
2789 			KASSERT(mv->mv_irq == rman_get_start(irq),
2790 			    ("IRQ mismatch"));
2791 			if (mv->mv_address == 0) {
2792 				KASSERT(mte->mte_handlers == 0,
2793 		    ("MSI-X table entry has handlers, but vector not mapped"));
2794 				error = PCIB_MAP_MSI(device_get_parent(dev),
2795 				    child, rman_get_start(irq), &addr, &data);
2796 				if (error)
2797 					goto bad;
2798 				mv->mv_address = addr;
2799 				mv->mv_data = data;
2800 			}
2801 			if (mte->mte_handlers == 0) {
2802 				pci_enable_msix(child, rid - 1, mv->mv_address,
2803 				    mv->mv_data);
2804 				pci_unmask_msix(child, rid - 1);
2805 			}
2806 			mte->mte_handlers++;
2807 		}
2808 	bad:
2809 		if (error) {
2810 			(void)bus_generic_teardown_intr(dev, child, irq,
2811 			    cookie);
2812 			return (error);
2813 		}
2814 	}
2815 	*cookiep = cookie;
2816 	return (0);
2817 }
2818 
2819 int
2820 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
2821     void *cookie)
2822 {
2823 	struct msix_table_entry *mte;
2824 	struct resource_list_entry *rle;
2825 	struct pci_devinfo *dinfo;
2826 	int error, rid;
2827 
2828 	/*
2829 	 * If this is a direct child, check to see if the interrupt is
2830 	 * MSI or MSI-X.  If so, decrement the appropriate handlers
2831 	 * count and mask the MSI-X message, or disable MSI messages
2832 	 * if the count drops to 0.
2833 	 */
2834 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
2835 		return (EINVAL);
2836 	rid = rman_get_rid(irq);
2837 	if (device_get_parent(child) == dev && rid > 0) {
2838 		dinfo = device_get_ivars(child);
2839 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
2840 		if (rle->res != irq)
2841 			return (EINVAL);
2842 		if (dinfo->cfg.msi.msi_alloc > 0) {
2843 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
2844 			    ("MSI-X index too high"));
2845 			if (dinfo->cfg.msi.msi_handlers == 0)
2846 				return (EINVAL);
2847 			dinfo->cfg.msi.msi_handlers--;
2848 			if (dinfo->cfg.msi.msi_handlers == 0)
2849 				pci_disable_msi(child);
2850 		} else {
2851 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
2852 			    ("No MSI or MSI-X interrupts allocated"));
2853 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
2854 			    ("MSI-X index too high"));
2855 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
2856 			if (mte->mte_handlers == 0)
2857 				return (EINVAL);
2858 			mte->mte_handlers--;
2859 			if (mte->mte_handlers == 0)
2860 				pci_mask_msix(child, rid - 1);
2861 		}
2862 	}
2863 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
2864 	if (device_get_parent(child) == dev && rid > 0)
2865 		KASSERT(error == 0,
2866 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
2867 	return (error);
2868 }
2869 
2870 int
2871 pci_print_child(device_t dev, device_t child)
2872 {
2873 	struct pci_devinfo *dinfo;
2874 	struct resource_list *rl;
2875 	int retval = 0;
2876 
2877 	dinfo = device_get_ivars(child);
2878 	rl = &dinfo->resources;
2879 
2880 	retval += bus_print_child_header(dev, child);
2881 
2882 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
2883 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
2884 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
2885 	if (device_get_flags(dev))
2886 		retval += printf(" flags %#x", device_get_flags(dev));
2887 
2888 	retval += printf(" at device %d.%d", pci_get_slot(child),
2889 	    pci_get_function(child));
2890 
2891 	retval += bus_print_child_footer(dev, child);
2892 
2893 	return (retval);
2894 }
2895 
2896 static struct
2897 {
2898 	int	class;
2899 	int	subclass;
2900 	char	*desc;
2901 } pci_nomatch_tab[] = {
2902 	{PCIC_OLD,		-1,			"old"},
2903 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
2904 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
2905 	{PCIC_STORAGE,		-1,			"mass storage"},
2906 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
2907 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
2908 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
2909 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
2910 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
2911 	{PCIC_NETWORK,		-1,			"network"},
2912 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
2913 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
2914 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
2915 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
2916 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
2917 	{PCIC_DISPLAY,		-1,			"display"},
2918 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
2919 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
2920 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
2921 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
2922 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
2923 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
2924 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
2925 	{PCIC_MEMORY,		-1,			"memory"},
2926 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
2927 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
2928 	{PCIC_BRIDGE,		-1,			"bridge"},
2929 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
2930 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
2931 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
2932 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
2933 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
2934 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
2935 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
2936 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
2937 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
2938 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
2939 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
2940 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
2941 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
2942 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
2943 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
2944 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
2945 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
2946 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
2947 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
2948 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
2949 	{PCIC_INPUTDEV,		-1,			"input device"},
2950 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
2951 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
2952 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
2953 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
2954 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
2955 	{PCIC_DOCKING,		-1,			"docking station"},
2956 	{PCIC_PROCESSOR,	-1,			"processor"},
2957 	{PCIC_SERIALBUS,	-1,			"serial bus"},
2958 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
2959 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
2960 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
2961 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
2962 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
2963 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
2964 	{PCIC_WIRELESS,		-1,			"wireless controller"},
2965 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
2966 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
2967 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
2968 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
2969 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
2970 	{PCIC_SATCOM,		-1,			"satellite communication"},
2971 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
2972 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
2973 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
2974 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
2975 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
2976 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
2977 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
2978 	{PCIC_DASP,		-1,			"dasp"},
2979 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
2980 	{0, 0,		NULL}
2981 };
2982 
2983 void
2984 pci_probe_nomatch(device_t dev, device_t child)
2985 {
2986 	int	i;
2987 	char	*cp, *scp, *device;
2988 
2989 	/*
2990 	 * Look for a listing for this device in a loaded device database.
2991 	 */
2992 	if ((device = pci_describe_device(child)) != NULL) {
2993 		device_printf(dev, "<%s>", device);
2994 		free(device, M_DEVBUF);
2995 	} else {
2996 		/*
2997 		 * Scan the class/subclass descriptions for a general
2998 		 * description.
2999 		 */
3000 		cp = "unknown";
3001 		scp = NULL;
3002 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3003 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3004 				if (pci_nomatch_tab[i].subclass == -1) {
3005 					cp = pci_nomatch_tab[i].desc;
3006 				} else if (pci_nomatch_tab[i].subclass ==
3007 				    pci_get_subclass(child)) {
3008 					scp = pci_nomatch_tab[i].desc;
3009 				}
3010 			}
3011 		}
3012 		device_printf(dev, "<%s%s%s>",
3013 		    cp ? cp : "",
3014 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3015 		    scp ? scp : "");
3016 	}
3017 	printf(" at device %d.%d (no driver attached)\n",
3018 	    pci_get_slot(child), pci_get_function(child));
3019 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3020 	return;
3021 }
3022 
3023 /*
3024  * Parse the PCI device database, if loaded, and return a pointer to a
3025  * description of the device.
3026  *
3027  * The database is flat text formatted as follows:
3028  *
3029  * Any line not in a valid format is ignored.
3030  * Lines are terminated with newline '\n' characters.
3031  *
3032  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3033  * the vendor name.
3034  *
3035  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3036  * - devices cannot be listed without a corresponding VENDOR line.
3037  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3038  * another TAB, then the device name.
3039  */
3040 
3041 /*
3042  * Assuming (ptr) points to the beginning of a line in the database,
3043  * return the vendor or device and description of the next entry.
3044  * The value of (vendor) or (device) inappropriate for the entry type
3045  * is set to -1.  Returns nonzero at the end of the database.
3046  *
3047  * Note that this is slightly unrobust in the face of corrupt data;
3048  * we attempt to safeguard against this by spamming the end of the
3049  * database with a newline when we initialise.
3050  */
3051 static int
3052 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3053 {
3054 	char	*cp = *ptr;
3055 	int	left;
3056 
3057 	*device = -1;
3058 	*vendor = -1;
3059 	**desc = '\0';
3060 	for (;;) {
3061 		left = pci_vendordata_size - (cp - pci_vendordata);
3062 		if (left <= 0) {
3063 			*ptr = cp;
3064 			return(1);
3065 		}
3066 
3067 		/* vendor entry? */
3068 		if (*cp != '\t' &&
3069 		    sscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3070 			break;
3071 		/* device entry? */
3072 		if (*cp == '\t' &&
3073 		    sscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3074 			break;
3075 
3076 		/* skip to next line */
3077 		while (*cp != '\n' && left > 0) {
3078 			cp++;
3079 			left--;
3080 		}
3081 		if (*cp == '\n') {
3082 			cp++;
3083 			left--;
3084 		}
3085 	}
3086 	/* skip to next line */
3087 	while (*cp != '\n' && left > 0) {
3088 		cp++;
3089 		left--;
3090 	}
3091 	if (*cp == '\n' && left > 0)
3092 		cp++;
3093 	*ptr = cp;
3094 	return(0);
3095 }
3096 
3097 static char *
3098 pci_describe_device(device_t dev)
3099 {
3100 	int	vendor, device;
3101 	char	*desc, *vp, *dp, *line;
3102 
3103 	desc = vp = dp = NULL;
3104 
3105 	/*
3106 	 * If we have no vendor data, we can't do anything.
3107 	 */
3108 	if (pci_vendordata == NULL)
3109 		goto out;
3110 
3111 	/*
3112 	 * Scan the vendor data looking for this device
3113 	 */
3114 	line = pci_vendordata;
3115 	if ((vp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3116 		goto out;
3117 	for (;;) {
3118 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3119 			goto out;
3120 		if (vendor == pci_get_vendor(dev))
3121 			break;
3122 	}
3123 	if ((dp = malloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3124 		goto out;
3125 	for (;;) {
3126 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3127 			*dp = 0;
3128 			break;
3129 		}
3130 		if (vendor != -1) {
3131 			*dp = 0;
3132 			break;
3133 		}
3134 		if (device == pci_get_device(dev))
3135 			break;
3136 	}
3137 	if (dp[0] == '\0')
3138 		snprintf(dp, 80, "0x%x", pci_get_device(dev));
3139 	if ((desc = malloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3140 	    NULL)
3141 		sprintf(desc, "%s, %s", vp, dp);
3142  out:
3143 	if (vp != NULL)
3144 		free(vp, M_DEVBUF);
3145 	if (dp != NULL)
3146 		free(dp, M_DEVBUF);
3147 	return(desc);
3148 }
3149 
3150 int
3151 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3152 {
3153 	struct pci_devinfo *dinfo;
3154 	pcicfgregs *cfg;
3155 
3156 	dinfo = device_get_ivars(child);
3157 	cfg = &dinfo->cfg;
3158 
3159 	switch (which) {
3160 	case PCI_IVAR_ETHADDR:
3161 		/*
3162 		 * The generic accessor doesn't deal with failure, so
3163 		 * we set the return value, then return an error.
3164 		 */
3165 		*((uint8_t **) result) = NULL;
3166 		return (EINVAL);
3167 	case PCI_IVAR_SUBVENDOR:
3168 		*result = cfg->subvendor;
3169 		break;
3170 	case PCI_IVAR_SUBDEVICE:
3171 		*result = cfg->subdevice;
3172 		break;
3173 	case PCI_IVAR_VENDOR:
3174 		*result = cfg->vendor;
3175 		break;
3176 	case PCI_IVAR_DEVICE:
3177 		*result = cfg->device;
3178 		break;
3179 	case PCI_IVAR_DEVID:
3180 		*result = (cfg->device << 16) | cfg->vendor;
3181 		break;
3182 	case PCI_IVAR_CLASS:
3183 		*result = cfg->baseclass;
3184 		break;
3185 	case PCI_IVAR_SUBCLASS:
3186 		*result = cfg->subclass;
3187 		break;
3188 	case PCI_IVAR_PROGIF:
3189 		*result = cfg->progif;
3190 		break;
3191 	case PCI_IVAR_REVID:
3192 		*result = cfg->revid;
3193 		break;
3194 	case PCI_IVAR_INTPIN:
3195 		*result = cfg->intpin;
3196 		break;
3197 	case PCI_IVAR_IRQ:
3198 		*result = cfg->intline;
3199 		break;
3200 	case PCI_IVAR_DOMAIN:
3201 		*result = cfg->domain;
3202 		break;
3203 	case PCI_IVAR_BUS:
3204 		*result = cfg->bus;
3205 		break;
3206 	case PCI_IVAR_SLOT:
3207 		*result = cfg->slot;
3208 		break;
3209 	case PCI_IVAR_FUNCTION:
3210 		*result = cfg->func;
3211 		break;
3212 	case PCI_IVAR_CMDREG:
3213 		*result = cfg->cmdreg;
3214 		break;
3215 	case PCI_IVAR_CACHELNSZ:
3216 		*result = cfg->cachelnsz;
3217 		break;
3218 	case PCI_IVAR_MINGNT:
3219 		*result = cfg->mingnt;
3220 		break;
3221 	case PCI_IVAR_MAXLAT:
3222 		*result = cfg->maxlat;
3223 		break;
3224 	case PCI_IVAR_LATTIMER:
3225 		*result = cfg->lattimer;
3226 		break;
3227 	default:
3228 		return (ENOENT);
3229 	}
3230 	return (0);
3231 }
3232 
3233 int
3234 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3235 {
3236 	struct pci_devinfo *dinfo;
3237 
3238 	dinfo = device_get_ivars(child);
3239 
3240 	switch (which) {
3241 	case PCI_IVAR_INTPIN:
3242 		dinfo->cfg.intpin = value;
3243 		return (0);
3244 	case PCI_IVAR_ETHADDR:
3245 	case PCI_IVAR_SUBVENDOR:
3246 	case PCI_IVAR_SUBDEVICE:
3247 	case PCI_IVAR_VENDOR:
3248 	case PCI_IVAR_DEVICE:
3249 	case PCI_IVAR_DEVID:
3250 	case PCI_IVAR_CLASS:
3251 	case PCI_IVAR_SUBCLASS:
3252 	case PCI_IVAR_PROGIF:
3253 	case PCI_IVAR_REVID:
3254 	case PCI_IVAR_IRQ:
3255 	case PCI_IVAR_DOMAIN:
3256 	case PCI_IVAR_BUS:
3257 	case PCI_IVAR_SLOT:
3258 	case PCI_IVAR_FUNCTION:
3259 		return (EINVAL);	/* disallow for now */
3260 
3261 	default:
3262 		return (ENOENT);
3263 	}
3264 }
3265 
3266 
3267 #include "opt_ddb.h"
3268 #ifdef DDB
3269 #include <ddb/ddb.h>
3270 #include <sys/cons.h>
3271 
3272 /*
3273  * List resources based on pci map registers, used for within ddb
3274  */
3275 
3276 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3277 {
3278 	struct pci_devinfo *dinfo;
3279 	struct devlist *devlist_head;
3280 	struct pci_conf *p;
3281 	const char *name;
3282 	int i, error, none_count;
3283 
3284 	none_count = 0;
3285 	/* get the head of the device queue */
3286 	devlist_head = &pci_devq;
3287 
3288 	/*
3289 	 * Go through the list of devices and print out devices
3290 	 */
3291 	for (error = 0, i = 0,
3292 	     dinfo = STAILQ_FIRST(devlist_head);
3293 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3294 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3295 
3296 		/* Populate pd_name and pd_unit */
3297 		name = NULL;
3298 		if (dinfo->cfg.dev)
3299 			name = device_get_name(dinfo->cfg.dev);
3300 
3301 		p = &dinfo->conf;
3302 		db_printf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3303 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3304 			(name && *name) ? name : "none",
3305 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3306 			none_count++,
3307 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3308 			p->pc_sel.pc_func, (p->pc_class << 16) |
3309 			(p->pc_subclass << 8) | p->pc_progif,
3310 			(p->pc_subdevice << 16) | p->pc_subvendor,
3311 			(p->pc_device << 16) | p->pc_vendor,
3312 			p->pc_revid, p->pc_hdr);
3313 	}
3314 }
3315 #endif /* DDB */
3316 
3317 static struct resource *
3318 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3319     u_long start, u_long end, u_long count, u_int flags)
3320 {
3321 	struct pci_devinfo *dinfo = device_get_ivars(child);
3322 	struct resource_list *rl = &dinfo->resources;
3323 	struct resource_list_entry *rle;
3324 	struct resource *res;
3325 	pci_addr_t map, testval;
3326 	int mapsize;
3327 
3328 	/*
3329 	 * Weed out the bogons, and figure out how large the BAR/map
3330 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3331 	 * Note: atapci in legacy mode are special and handled elsewhere
3332 	 * in the code.  If you have a atapci device in legacy mode and
3333 	 * it fails here, that other code is broken.
3334 	 */
3335 	res = NULL;
3336 	map = pci_read_config(child, *rid, 4);
3337 	pci_write_config(child, *rid, 0xffffffff, 4);
3338 	testval = pci_read_config(child, *rid, 4);
3339 	if (pci_maprange(testval) == 64)
3340 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3341 	if (pci_mapbase(testval) == 0)
3342 		goto out;
3343 
3344 	/*
3345 	 * Restore the original value of the BAR.  We may have reprogrammed
3346 	 * the BAR of the low-level console device and when booting verbose,
3347 	 * we need the console device addressable.
3348 	 */
3349 	pci_write_config(child, *rid, map, 4);
3350 
3351 	if (PCI_BAR_MEM(testval)) {
3352 		if (type != SYS_RES_MEMORY) {
3353 			if (bootverbose)
3354 				device_printf(dev,
3355 				    "child %s requested type %d for rid %#x,"
3356 				    " but the BAR says it is an memio\n",
3357 				    device_get_nameunit(child), type, *rid);
3358 			goto out;
3359 		}
3360 	} else {
3361 		if (type != SYS_RES_IOPORT) {
3362 			if (bootverbose)
3363 				device_printf(dev,
3364 				    "child %s requested type %d for rid %#x,"
3365 				    " but the BAR says it is an ioport\n",
3366 				    device_get_nameunit(child), type, *rid);
3367 			goto out;
3368 		}
3369 	}
3370 	/*
3371 	 * For real BARs, we need to override the size that
3372 	 * the driver requests, because that's what the BAR
3373 	 * actually uses and we would otherwise have a
3374 	 * situation where we might allocate the excess to
3375 	 * another driver, which won't work.
3376 	 */
3377 	mapsize = pci_mapsize(testval);
3378 	count = 1UL << mapsize;
3379 	if (RF_ALIGNMENT(flags) < mapsize)
3380 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3381 
3382 	/*
3383 	 * Allocate enough resource, and then write back the
3384 	 * appropriate bar for that resource.
3385 	 */
3386 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3387 	    start, end, count, flags);
3388 	if (res == NULL) {
3389 		device_printf(child,
3390 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3391 		    count, *rid, type, start, end);
3392 		goto out;
3393 	}
3394 	resource_list_add(rl, type, *rid, start, end, count);
3395 	rle = resource_list_find(rl, type, *rid);
3396 	if (rle == NULL)
3397 		panic("pci_alloc_map: unexpectedly can't find resource.");
3398 	rle->res = res;
3399 	rle->start = rman_get_start(res);
3400 	rle->end = rman_get_end(res);
3401 	rle->count = count;
3402 	if (bootverbose)
3403 		device_printf(child,
3404 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3405 		    count, *rid, type, rman_get_start(res));
3406 	map = rman_get_start(res);
3407 out:;
3408 	pci_write_config(child, *rid, map, 4);
3409 	if (pci_maprange(testval) == 64)
3410 		pci_write_config(child, *rid + 4, map >> 32, 4);
3411 	return (res);
3412 }
3413 
3414 
3415 struct resource *
3416 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3417 		   u_long start, u_long end, u_long count, u_int flags)
3418 {
3419 	struct pci_devinfo *dinfo = device_get_ivars(child);
3420 	struct resource_list *rl = &dinfo->resources;
3421 	struct resource_list_entry *rle;
3422 	pcicfgregs *cfg = &dinfo->cfg;
3423 
3424 	/*
3425 	 * Perform lazy resource allocation
3426 	 */
3427 	if (device_get_parent(child) == dev) {
3428 		switch (type) {
3429 		case SYS_RES_IRQ:
3430 			/*
3431 			 * Can't alloc legacy interrupt once MSI messages
3432 			 * have been allocated.
3433 			 */
3434 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3435 			    cfg->msix.msix_alloc > 0))
3436 				return (NULL);
3437 			/*
3438 			 * If the child device doesn't have an
3439 			 * interrupt routed and is deserving of an
3440 			 * interrupt, try to assign it one.
3441 			 */
3442 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3443 			    (cfg->intpin != 0))
3444 				pci_assign_interrupt(dev, child, 0);
3445 			break;
3446 		case SYS_RES_IOPORT:
3447 		case SYS_RES_MEMORY:
3448 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3449 				/*
3450 				 * Enable the I/O mode.  We should
3451 				 * also be assigning resources too
3452 				 * when none are present.  The
3453 				 * resource_list_alloc kind of sorta does
3454 				 * this...
3455 				 */
3456 				if (PCI_ENABLE_IO(dev, child, type))
3457 					return (NULL);
3458 			}
3459 			rle = resource_list_find(rl, type, *rid);
3460 			if (rle == NULL)
3461 				return (pci_alloc_map(dev, child, type, rid,
3462 				    start, end, count, flags));
3463 			break;
3464 		}
3465 		/*
3466 		 * If we've already allocated the resource, then
3467 		 * return it now.  But first we may need to activate
3468 		 * it, since we don't allocate the resource as active
3469 		 * above.  Normally this would be done down in the
3470 		 * nexus, but since we short-circuit that path we have
3471 		 * to do its job here.  Not sure if we should free the
3472 		 * resource if it fails to activate.
3473 		 */
3474 		rle = resource_list_find(rl, type, *rid);
3475 		if (rle != NULL && rle->res != NULL) {
3476 			if (bootverbose)
3477 				device_printf(child,
3478 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3479 				    rman_get_size(rle->res), *rid, type,
3480 				    rman_get_start(rle->res));
3481 			if ((flags & RF_ACTIVE) &&
3482 			    bus_generic_activate_resource(dev, child, type,
3483 			    *rid, rle->res) != 0)
3484 				return (NULL);
3485 			return (rle->res);
3486 		}
3487 	}
3488 	return (resource_list_alloc(rl, dev, child, type, rid,
3489 	    start, end, count, flags));
3490 }
3491 
3492 void
3493 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3494 {
3495 	struct pci_devinfo *dinfo;
3496 	struct resource_list *rl;
3497 	struct resource_list_entry *rle;
3498 
3499 	if (device_get_parent(child) != dev)
3500 		return;
3501 
3502 	dinfo = device_get_ivars(child);
3503 	rl = &dinfo->resources;
3504 	rle = resource_list_find(rl, type, rid);
3505 	if (rle) {
3506 		if (rle->res) {
3507 			if (rman_get_device(rle->res) != dev ||
3508 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3509 				device_printf(dev, "delete_resource: "
3510 				    "Resource still owned by child, oops. "
3511 				    "(type=%d, rid=%d, addr=%lx)\n",
3512 				    rle->type, rle->rid,
3513 				    rman_get_start(rle->res));
3514 				return;
3515 			}
3516 			bus_release_resource(dev, type, rid, rle->res);
3517 		}
3518 		resource_list_delete(rl, type, rid);
3519 	}
3520 	/*
3521 	 * Why do we turn off the PCI configuration BAR when we delete a
3522 	 * resource? -- imp
3523 	 */
3524 	pci_write_config(child, rid, 0, 4);
3525 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3526 }
3527 
3528 struct resource_list *
3529 pci_get_resource_list (device_t dev, device_t child)
3530 {
3531 	struct pci_devinfo *dinfo = device_get_ivars(child);
3532 
3533 	return (&dinfo->resources);
3534 }
3535 
3536 uint32_t
3537 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3538 {
3539 	struct pci_devinfo *dinfo = device_get_ivars(child);
3540 	pcicfgregs *cfg = &dinfo->cfg;
3541 
3542 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3543 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3544 }
3545 
3546 void
3547 pci_write_config_method(device_t dev, device_t child, int reg,
3548     uint32_t val, int width)
3549 {
3550 	struct pci_devinfo *dinfo = device_get_ivars(child);
3551 	pcicfgregs *cfg = &dinfo->cfg;
3552 
3553 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3554 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3555 }
3556 
3557 int
3558 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3559     size_t buflen)
3560 {
3561 
3562 	snprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3563 	    pci_get_function(child));
3564 	return (0);
3565 }
3566 
3567 int
3568 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3569     size_t buflen)
3570 {
3571 	struct pci_devinfo *dinfo;
3572 	pcicfgregs *cfg;
3573 
3574 	dinfo = device_get_ivars(child);
3575 	cfg = &dinfo->cfg;
3576 	snprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3577 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3578 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3579 	    cfg->progif);
3580 	return (0);
3581 }
3582 
3583 int
3584 pci_assign_interrupt_method(device_t dev, device_t child)
3585 {
3586 	struct pci_devinfo *dinfo = device_get_ivars(child);
3587 	pcicfgregs *cfg = &dinfo->cfg;
3588 
3589 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3590 	    cfg->intpin));
3591 }
3592 
3593 static int
3594 pci_modevent(module_t mod, int what, void *arg)
3595 {
3596 	static struct cdev *pci_cdev;
3597 
3598 	switch (what) {
3599 	case MOD_LOAD:
3600 		STAILQ_INIT(&pci_devq);
3601 		pci_generation = 0;
3602 		pci_cdev = make_dev(&pcicdev, 0, UID_ROOT, GID_WHEEL, 0644,
3603 		    "pci");
3604 		pci_load_vendor_data();
3605 		break;
3606 
3607 	case MOD_UNLOAD:
3608 		destroy_dev(pci_cdev);
3609 		break;
3610 	}
3611 
3612 	return (0);
3613 }
3614 
3615 void
3616 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3617 {
3618 	int i;
3619 
3620 	/*
3621 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3622 	 * which we know need special treatment.  Type 2 devices are
3623 	 * cardbus bridges which also require special treatment.
3624 	 * Other types are unknown, and we err on the side of safety
3625 	 * by ignoring them.
3626 	 */
3627 	if (dinfo->cfg.hdrtype != 0)
3628 		return;
3629 
3630 	/*
3631 	 * Restore the device to full power mode.  We must do this
3632 	 * before we restore the registers because moving from D3 to
3633 	 * D0 will cause the chip's BARs and some other registers to
3634 	 * be reset to some unknown power on reset values.  Cut down
3635 	 * the noise on boot by doing nothing if we are already in
3636 	 * state D0.
3637 	 */
3638 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3639 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3640 	}
3641 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3642 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3643 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3644 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3645 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3646 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3647 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3648 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3649 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3650 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3651 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3652 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3653 
3654 	/* Restore MSI and MSI-X configurations if they are present. */
3655 	if (dinfo->cfg.msi.msi_location != 0)
3656 		pci_resume_msi(dev);
3657 	if (dinfo->cfg.msix.msix_location != 0)
3658 		pci_resume_msix(dev);
3659 }
3660 
3661 void
3662 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3663 {
3664 	int i;
3665 	uint32_t cls;
3666 	int ps;
3667 
3668 	/*
3669 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3670 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3671 	 * which also require special treatment.  Other types are unknown, and
3672 	 * we err on the side of safety by ignoring them.  Powering down
3673 	 * bridges should not be undertaken lightly.
3674 	 */
3675 	if (dinfo->cfg.hdrtype != 0)
3676 		return;
3677 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3678 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3679 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3680 
3681 	/*
3682 	 * Some drivers apparently write to these registers w/o updating our
3683 	 * cached copy.  No harm happens if we update the copy, so do so here
3684 	 * so we can restore them.  The COMMAND register is modified by the
3685 	 * bus w/o updating the cache.  This should represent the normally
3686 	 * writable portion of the 'defined' part of type 0 headers.  In
3687 	 * theory we also need to save/restore the PCI capability structures
3688 	 * we know about, but apart from power we don't know any that are
3689 	 * writable.
3690 	 */
3691 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3692 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3693 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3694 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3695 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3696 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3697 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3698 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3699 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3700 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3701 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3702 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3703 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3704 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3705 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3706 
3707 	/*
3708 	 * don't set the state for display devices, base peripherals and
3709 	 * memory devices since bad things happen when they are powered down.
3710 	 * We should (a) have drivers that can easily detach and (b) use
3711 	 * generic drivers for these devices so that some device actually
3712 	 * attaches.  We need to make sure that when we implement (a) we don't
3713 	 * power the device down on a reattach.
3714 	 */
3715 	cls = pci_get_class(dev);
3716 	if (!setstate)
3717 		return;
3718 	switch (pci_do_power_nodriver)
3719 	{
3720 		case 0:		/* NO powerdown at all */
3721 			return;
3722 		case 1:		/* Conservative about what to power down */
3723 			if (cls == PCIC_STORAGE)
3724 				return;
3725 			/*FALLTHROUGH*/
3726 		case 2:		/* Agressive about what to power down */
3727 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
3728 			    cls == PCIC_BASEPERIPH)
3729 				return;
3730 			/*FALLTHROUGH*/
3731 		case 3:		/* Power down everything */
3732 			break;
3733 	}
3734 	/*
3735 	 * PCI spec says we can only go into D3 state from D0 state.
3736 	 * Transition from D[12] into D0 before going to D3 state.
3737 	 */
3738 	ps = pci_get_powerstate(dev);
3739 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
3740 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3741 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
3742 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
3743 }
3744