xref: /illumos-gate/usr/src/uts/common/io/pciex/pcieb.c (revision b8052df9f609edb713f6828c9eecc3d7be19dfb3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
26  * Copyright 2019 Joyent, Inc.
27  * Copyright 2022 Oxide Computer Company
28  */
29 
30 /*
31  * Common x86 and SPARC PCI-E to PCI bus bridge nexus driver
32  *
33  * Background
34  * ----------
35  *
36  * The PCI Express (PCIe) specification defines that all of the PCIe devices in
37  * the system are connected together in a series of different fabrics. A way to
38  * think of these fabrics is that they are small networks where there are links
39  * between different devices and switches that allow fan out or fan in of the
40  * fabric. The entry point to that fabric is called a root complex and the
41  * fabric terminates at a what is called an endpoint, which is really just PCIe
42  * terminology for the common cards that are inserted into the system (HBAs,
43  * NICs, USB, NVMe, etc.).
44  *
45  * The PCIe specification states that every link on the system has a virtual
46  * PCI-to-PCI bridge. This allows PCIe devices to still be configured the same
47  * way traditional PCI devices are to the operating system and allows them to
48  * have a traditional PCI bus, device, and function associated with them, even
49  * though there is no actual shared bus. In addition, bridges are also used to
50  * connect traditional PCI and PCI-X devices into them.
51  *
52  * The PCIe specification refers to upstream and downstream ports. Upstream
53  * ports are considered closer the root complex and downstream ports are closer
54  * to the endpoint. We can divide the devices that the bridge driver attaches to
55  * into two groups. Those that are considered upstream ports, these include root
56  * complexes and parts of PCIe switches. And downstream ports, which are the
57  * other half of PCIe switches and endpoints (which this driver does not attach
58  * to, normal hardware-specific or class-specific drivers attach to those).
59  *
60  * Interrupt Management
61  * --------------------
62  *
63  * Upstream ports of bridges have additional things that we care about.
64  * Specifically they're the means through which we find out about:
65  *
66  *  - Advanced Error Reporting (AERs)
67  *  - Hotplug events
68  *  - Link Bandwidth Events
69  *  - Power Management Events (PME)
70  *
71  * Each of these features is an optional feature (though ones we hope are
72  * implemented). The features above are grouped into two different buckets based
73  * on which PCI capability they appear in. AER management is done through a PCI
74  * Express extended configuration header (it lives in extended PCI configuration
75  * space) called the 'Advanced Error Reporting Extended Capability'. The other
76  * events are all managed as part of the 'PCI Express Capability Structure'.
77  * This structure is found in traditional PCI configuration space.
78  *
79  * The way that the interrupts are programmed for these types of events differs
80  * a bit from the way one might expect a normal device to operate. For most
81  * devices, one allocates a number of interrupts based on a combination of what
82  * the device supports, what the OS supports per device, and the number the
83  * driver needs. Then the driver programs the device in a device-specific manner
84  * to indicate which events should trigger a specific interrupt vector.
85  *
86  * However, for both the AER and PCI capabilities, the driver has to do
87  * something different. The driver first allocates interrupts by programming the
88  * MSI or MSI-X table and then asks the device which interrupts have been
89  * assigned to these purposes. Because these events are only supported in
90  * 'upstream' devices, this does not interfere with the traditional management
91  * of MSI and MSI-X interrupts. At this time, the pcieb driver only supports the
92  * use of MSI interrupts.
93  *
94  * Once the interrupts have been allocated, we read back which vectors have been
95  * nominated by the device to cover the corresponding capability. The interrupt
96  * is allocated on a per-capability basis. Therefore, one interrupt would cover
97  * AERs, while another interrupt would cover the rest of the desired functions.
98  *
99  * To track which interrupts cover which behaviors, each driver state
100  * (pcieb_devstate_t) has a member called 'pcieb_isr_tab'. Each index represents
101  * an interrupt vector and there are a series of flags that represent the
102  * different possible interrupt sources: PCIEB_INTR_SRC_HP (hotplug),
103  * PCEIB_INTR_SRC_PME (power management event), PCIEB_INTR_SRC_AER (error
104  * reporting), PCIEB_INTR_SRC_LBW (link bandwidth).
105  *
106  * Because the hotplug, link bandwidth, and power management events all share
107  * the same vector, if an interrupt comes in, we must check all of the enabled
108  * sources that might generate this interrupt. It is highly likely that more
109  * than one will fire at the same time, for example, a hotplug event that fires
110  * because a device has been inserted or removed, will likely trigger a link
111  * bandwidth event.
112  *
113  * The pcieb driver itself does not actually have much logic to deal with and
114  * clear the interrupts in question. It generally speaking will vector most
115  * events back to the more general pcie driver or, in the case of AERs, initiate
116  * a scan of the fabric itself (also part of the pcie driver).
117  *
118  * Link Management
119  * ---------------
120  *
121  * The pcieb driver is used to take care of two different aspects of link
122  * management. The first of these, as described briefly above, is to monitor for
123  * changes to the negotiated link bandwidth. These events are managed by
124  * enabling support for the interrupts in the PCI Express Capability Structure.
125  * This is all taken care of by the pcie driver through functions like
126  * pcie_link_bw_enable().
127  *
128  * The second aspect of link management the pcieb driver enables is the ability
129  * to retrain the link and optionally limit the speed. This is enabled through a
130  * series of private ioctls that are driven through a private userland utility,
131  * /usr/lib/pci/pcieb. Eventually, this should be more fleshed out and a more
132  * uniform interface based around the devctls that can be leveraged across
133  * different classes of devices should be used.
134  *
135  * Under the hood this basically leverages the ability of the upstream port to
136  * retrain a link by writing a bit to the PCIe link control register. See
137  * pcieb_ioctl_retrain(). From there, if the driver ever receives a request to
138  * change the maximum speed, that is updated in the card; however, it does not
139  * immediately retrain the link. A separate ioctl request is required to do so.
140  * Once the speed has been changed, regardless of whether or not it has been
141  * retrained, that fact will always be noted.
142  */
143 
144 #include <sys/sysmacros.h>
145 #include <sys/conf.h>
146 #include <sys/kmem.h>
147 #include <sys/debug.h>
148 #include <sys/modctl.h>
149 #include <sys/autoconf.h>
150 #include <sys/ddi_impldefs.h>
151 #include <sys/pci.h>
152 #include <sys/ddi.h>
153 #include <sys/sunddi.h>
154 #include <sys/sunndi.h>
155 #include <sys/fm/util.h>
156 #include <sys/pci_cap.h>
157 #include <sys/pci_impl.h>
158 #include <sys/pcie_impl.h>
159 #include <sys/open.h>
160 #include <sys/stat.h>
161 #include <sys/file.h>
162 #include <sys/promif.h>		/* prom_printf */
163 #include <sys/disp.h>
164 #include <sys/pcie_pwr.h>
165 #include <sys/hotplug/pci/pcie_hp.h>
166 #include "pcieb.h"
167 #include "pcieb_ioctl.h"
168 #ifdef PX_PLX
169 #include <io/pciex/pcieb_plx.h>
170 #endif /* PX_PLX */
171 
172 /*LINTLIBRARY*/
173 
174 /* panic flag */
175 int pcieb_die = PF_ERR_FATAL_FLAGS;
176 int pcieb_disable_41210_wkarnd = 0;
177 
178 /* flag to turn on MSI support */
179 int pcieb_enable_msi = 1;
180 
181 #if defined(DEBUG)
182 uint_t pcieb_dbg_print = 0;
183 
184 static char *pcieb_debug_sym [] = {	/* same sequence as pcieb_debug_bit */
185 	/*  0 */ "attach",
186 	/*  1 */ "pwr",
187 	/*  2 */ "intr"
188 };
189 #endif /* DEBUG */
190 
191 static int pcieb_bus_map(dev_info_t *, dev_info_t *, ddi_map_req_t *, off_t,
192 	off_t, caddr_t *);
193 static int pcieb_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
194 	void *);
195 static int pcieb_fm_init(pcieb_devstate_t *pcieb_p);
196 static void pcieb_fm_fini(pcieb_devstate_t *pcieb_p);
197 static int pcieb_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
198     ddi_iblock_cookie_t *ibc_p);
199 static int pcieb_dma_allochdl(dev_info_t *dip, dev_info_t *rdip,
200 	ddi_dma_attr_t *attr_p, int (*waitfp)(caddr_t), caddr_t arg,
201 	ddi_dma_handle_t *handlep);
202 static int pcieb_dma_mctl(dev_info_t *dip, dev_info_t *rdip,
203 	ddi_dma_handle_t handle, enum ddi_dma_ctlops cmd, off_t *offp,
204 	size_t *lenp, caddr_t *objp, uint_t cache_flags);
205 static int pcieb_intr_ops(dev_info_t *dip, dev_info_t *rdip,
206 	ddi_intr_op_t intr_op, ddi_intr_handle_impl_t *hdlp, void *result);
207 
208 static struct bus_ops pcieb_bus_ops = {
209 	BUSO_REV,
210 	pcieb_bus_map,
211 	0,
212 	0,
213 	0,
214 	i_ddi_map_fault,
215 	0,
216 	pcieb_dma_allochdl,
217 	ddi_dma_freehdl,
218 	ddi_dma_bindhdl,
219 	ddi_dma_unbindhdl,
220 	ddi_dma_flush,
221 	ddi_dma_win,
222 	pcieb_dma_mctl,
223 	pcieb_ctlops,
224 	ddi_bus_prop_op,
225 	ndi_busop_get_eventcookie,	/* (*bus_get_eventcookie)();	*/
226 	ndi_busop_add_eventcall,	/* (*bus_add_eventcall)();	*/
227 	ndi_busop_remove_eventcall,	/* (*bus_remove_eventcall)();	*/
228 	ndi_post_event,			/* (*bus_post_event)();		*/
229 	NULL,				/* (*bus_intr_ctl)();		*/
230 	NULL,				/* (*bus_config)();		*/
231 	NULL,				/* (*bus_unconfig)();		*/
232 	pcieb_fm_init_child,		/* (*bus_fm_init)();		*/
233 	NULL,				/* (*bus_fm_fini)();		*/
234 	i_ndi_busop_access_enter,	/* (*bus_fm_access_enter)();	*/
235 	i_ndi_busop_access_exit,	/* (*bus_fm_access_exit)();	*/
236 	pcie_bus_power,			/* (*bus_power)();		*/
237 	pcieb_intr_ops,			/* (*bus_intr_op)();		*/
238 	pcie_hp_common_ops		/* (*bus_hp_op)();		*/
239 };
240 
241 static int	pcieb_open(dev_t *, int, int, cred_t *);
242 static int	pcieb_close(dev_t, int, int, cred_t *);
243 static int	pcieb_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
244 static int	pcieb_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
245 static uint_t	pcieb_intr_handler(caddr_t arg1, caddr_t arg2);
246 
247 /* PM related functions */
248 static int	pcieb_pwr_setup(dev_info_t *dip);
249 static int	pcieb_pwr_init_and_raise(dev_info_t *dip, pcie_pwr_t *pwr_p);
250 static void	pcieb_pwr_teardown(dev_info_t *dip);
251 static int	pcieb_pwr_disable(dev_info_t *dip);
252 
253 /* Hotplug related functions */
254 static void pcieb_id_props(pcieb_devstate_t *pcieb);
255 
256 /*
257  * soft state pointer
258  */
259 void *pcieb_state;
260 
261 static struct cb_ops pcieb_cb_ops = {
262 	pcieb_open,			/* open */
263 	pcieb_close,			/* close */
264 	nodev,				/* strategy */
265 	nodev,				/* print */
266 	nodev,				/* dump */
267 	nodev,				/* read */
268 	nodev,				/* write */
269 	pcieb_ioctl,			/* ioctl */
270 	nodev,				/* devmap */
271 	nodev,				/* mmap */
272 	nodev,				/* segmap */
273 	nochpoll,			/* poll */
274 	pcie_prop_op,			/* cb_prop_op */
275 	NULL,				/* streamtab */
276 	D_NEW | D_MP | D_HOTPLUG,	/* Driver compatibility flag */
277 	CB_REV,				/* rev */
278 	nodev,				/* int (*cb_aread)() */
279 	nodev				/* int (*cb_awrite)() */
280 };
281 
282 static int	pcieb_probe(dev_info_t *);
283 static int	pcieb_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
284 static int	pcieb_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
285 
286 static struct dev_ops pcieb_ops = {
287 	DEVO_REV,		/* devo_rev */
288 	0,			/* refcnt  */
289 	pcieb_info,		/* info */
290 	nulldev,		/* identify */
291 	pcieb_probe,		/* probe */
292 	pcieb_attach,		/* attach */
293 	pcieb_detach,		/* detach */
294 	nulldev,		/* reset */
295 	&pcieb_cb_ops,		/* driver operations */
296 	&pcieb_bus_ops,		/* bus operations */
297 	pcie_power,		/* power */
298 	ddi_quiesce_not_needed,		/* quiesce */
299 };
300 
301 /*
302  * Module linkage information for the kernel.
303  */
304 
305 static struct modldrv modldrv = {
306 	&mod_driverops, /* Type of module */
307 	"PCIe bridge/switch driver",
308 	&pcieb_ops,	/* driver ops */
309 };
310 
311 static struct modlinkage modlinkage = {
312 	MODREV_1,
313 	(void *)&modldrv,
314 	NULL
315 };
316 
317 /*
318  * forward function declarations:
319  */
320 static void	pcieb_uninitchild(dev_info_t *);
321 static int	pcieb_initchild(dev_info_t *child);
322 static void	pcieb_create_ranges_prop(dev_info_t *, ddi_acc_handle_t);
323 static boolean_t pcieb_is_pcie_device_type(dev_info_t *dip);
324 
325 /* interrupt related declarations */
326 static int	pcieb_msi_supported(dev_info_t *);
327 static int	pcieb_intr_attach(pcieb_devstate_t *pcieb);
328 static int	pcieb_intr_init(pcieb_devstate_t *pcieb_p, int intr_type);
329 static void	pcieb_intr_fini(pcieb_devstate_t *pcieb_p);
330 
331 int
332 _init(void)
333 {
334 	int e;
335 
336 	if ((e = ddi_soft_state_init(&pcieb_state, sizeof (pcieb_devstate_t),
337 	    1)) == 0 && (e = mod_install(&modlinkage)) != 0)
338 		ddi_soft_state_fini(&pcieb_state);
339 	return (e);
340 }
341 
342 int
343 _fini(void)
344 {
345 	int e;
346 
347 	if ((e = mod_remove(&modlinkage)) == 0) {
348 		ddi_soft_state_fini(&pcieb_state);
349 	}
350 	return (e);
351 }
352 
353 int
354 _info(struct modinfo *modinfop)
355 {
356 	return (mod_info(&modlinkage, modinfop));
357 }
358 
359 /* ARGSUSED */
360 static int
361 pcieb_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
362 {
363 	minor_t		minor = getminor((dev_t)arg);
364 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
365 	pcieb_devstate_t *pcieb = ddi_get_soft_state(pcieb_state, instance);
366 	int		ret = DDI_SUCCESS;
367 
368 	switch (infocmd) {
369 	case DDI_INFO_DEVT2INSTANCE:
370 		*result = (void *)(intptr_t)instance;
371 		break;
372 	case DDI_INFO_DEVT2DEVINFO:
373 		if (pcieb == NULL) {
374 			ret = DDI_FAILURE;
375 			break;
376 		}
377 
378 		*result = (void *)pcieb->pcieb_dip;
379 		break;
380 	default:
381 		ret = DDI_FAILURE;
382 		break;
383 	}
384 
385 	return (ret);
386 }
387 
388 
389 /*ARGSUSED*/
390 static int
391 pcieb_probe(dev_info_t *devi)
392 {
393 	return (DDI_PROBE_SUCCESS);
394 }
395 
396 /*
397  * This is a workaround for an undocumented HW erratum with the
398  * multi-function, F0 and F2, Intel 41210 PCIe-to-PCI bridge. When
399  * Fn (cdip) attaches, this workaround is called to initialize Fn's
400  * sibling (sdip) with MPS/MRRS if it isn't already configured.
401  * Doing so prevents a malformed TLP panic.
402  */
403 static void
404 pcieb_41210_mps_wkrnd(dev_info_t *cdip)
405 {
406 	dev_info_t *sdip;
407 	ddi_acc_handle_t cfg_hdl;
408 	uint16_t cdip_dev_ctrl, cdip_mrrs_mps;
409 	pcie_bus_t *cdip_bus_p = PCIE_DIP2BUS(cdip);
410 
411 	/* Get cdip's MPS/MRRS already setup by pcie_initchild_mps() */
412 	ASSERT(cdip_bus_p);
413 	cdip_dev_ctrl  = PCIE_CAP_GET(16, cdip_bus_p, PCIE_DEVCTL);
414 	cdip_mrrs_mps  = cdip_dev_ctrl &
415 	    (PCIE_DEVCTL_MAX_READ_REQ_MASK | PCIE_DEVCTL_MAX_PAYLOAD_MASK);
416 
417 	/* Locate sdip and set its MPS/MRRS when applicable */
418 	for (sdip = ddi_get_child(ddi_get_parent(cdip)); sdip;
419 	    sdip = ddi_get_next_sibling(sdip)) {
420 		uint16_t sdip_dev_ctrl, sdip_mrrs_mps, cap_ptr;
421 		uint32_t bus_dev_ven_id;
422 
423 		if (sdip == cdip || pci_config_setup(sdip, &cfg_hdl)
424 		    != DDI_SUCCESS)
425 			continue;
426 
427 		/* must be an Intel 41210 bridge */
428 		bus_dev_ven_id = pci_config_get32(cfg_hdl, PCI_CONF_VENID);
429 		if (!PCIEB_IS_41210_BRIDGE(bus_dev_ven_id)) {
430 			pci_config_teardown(&cfg_hdl);
431 			continue;
432 		}
433 
434 		if (PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)
435 		    != DDI_SUCCESS) {
436 			pci_config_teardown(&cfg_hdl);
437 			continue;
438 		}
439 
440 		/* get sdip's MPS/MRRS to compare to cdip's */
441 		sdip_dev_ctrl = PCI_CAP_GET16(cfg_hdl, 0, cap_ptr,
442 		    PCIE_DEVCTL);
443 		sdip_mrrs_mps = sdip_dev_ctrl &
444 		    (PCIE_DEVCTL_MAX_READ_REQ_MASK |
445 		    PCIE_DEVCTL_MAX_PAYLOAD_MASK);
446 
447 		/* if sdip already attached then its MPS/MRRS is configured */
448 		if (i_ddi_devi_attached(sdip)) {
449 			ASSERT(sdip_mrrs_mps == cdip_mrrs_mps);
450 			pci_config_teardown(&cfg_hdl);
451 			continue;
452 		}
453 
454 		/* otherwise, update sdip's MPS/MRRS if different from cdip's */
455 		if (sdip_mrrs_mps != cdip_mrrs_mps) {
456 			sdip_dev_ctrl = (sdip_dev_ctrl &
457 			    ~(PCIE_DEVCTL_MAX_READ_REQ_MASK |
458 			    PCIE_DEVCTL_MAX_PAYLOAD_MASK)) | cdip_mrrs_mps;
459 
460 			(void) PCI_CAP_PUT16(cfg_hdl, 0, cap_ptr, PCIE_DEVCTL,
461 			    sdip_dev_ctrl);
462 		}
463 
464 		/*
465 		 * note: sdip's bus_mps will be updated by
466 		 * pcie_initchild_mps()
467 		 */
468 
469 		pci_config_teardown(&cfg_hdl);
470 
471 		break;
472 	}
473 }
474 
475 static int
476 pcieb_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
477 {
478 	int			instance;
479 	char			device_type[8];
480 	pcieb_devstate_t	*pcieb;
481 	pcie_bus_t		*bus_p = PCIE_DIP2UPBUS(devi);
482 	ddi_acc_handle_t	config_handle = bus_p->bus_cfg_hdl;
483 
484 	switch (cmd) {
485 	case DDI_RESUME:
486 		(void) pcie_pwr_resume(devi);
487 		return (DDI_SUCCESS);
488 
489 	default:
490 		return (DDI_FAILURE);
491 
492 	case DDI_ATTACH:
493 		break;
494 	}
495 
496 	if (!(PCIE_IS_BDG(bus_p))) {
497 		PCIEB_DEBUG(DBG_ATTACH, devi, "This is not a switch or"
498 		" bridge\n");
499 		return (DDI_FAILURE);
500 	}
501 
502 	/*
503 	 * If PCIE_LINKCTL_LINK_DISABLE bit in the PCIe Config
504 	 * Space (PCIe Capability Link Control Register) is set,
505 	 * then do not bind the driver.
506 	 */
507 	if (PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL) & PCIE_LINKCTL_LINK_DISABLE)
508 		return (DDI_FAILURE);
509 
510 	/*
511 	 * Allocate and get soft state structure.
512 	 */
513 	instance = ddi_get_instance(devi);
514 	if (ddi_soft_state_zalloc(pcieb_state, instance) != DDI_SUCCESS)
515 		return (DDI_FAILURE);
516 	pcieb = ddi_get_soft_state(pcieb_state, instance);
517 	pcieb->pcieb_dip = devi;
518 
519 	if ((pcieb_fm_init(pcieb)) != DDI_SUCCESS) {
520 		PCIEB_DEBUG(DBG_ATTACH, devi, "Failed in pcieb_fm_init\n");
521 		goto fail;
522 	}
523 	pcieb->pcieb_init_flags |= PCIEB_INIT_FM;
524 
525 	mutex_init(&pcieb->pcieb_mutex, NULL, MUTEX_DRIVER, NULL);
526 	mutex_init(&pcieb->pcieb_err_mutex, NULL, MUTEX_DRIVER,
527 	    (void *)pcieb->pcieb_fm_ibc);
528 	mutex_init(&pcieb->pcieb_peek_poke_mutex, NULL, MUTEX_DRIVER,
529 	    (void *)pcieb->pcieb_fm_ibc);
530 
531 	/* create special properties for device identification */
532 	pcieb_id_props(pcieb);
533 
534 	/*
535 	 * Power management setup. This also makes sure that switch/bridge
536 	 * is at D0 during attach.
537 	 */
538 	if (pwr_common_setup(devi) != DDI_SUCCESS) {
539 		PCIEB_DEBUG(DBG_PWR, devi, "pwr_common_setup failed\n");
540 		goto fail;
541 	}
542 
543 	if (pcieb_pwr_setup(devi) != DDI_SUCCESS) {
544 		PCIEB_DEBUG(DBG_PWR, devi, "pxb_pwr_setup failed \n");
545 		goto fail;
546 	}
547 
548 	/*
549 	 * Make sure the "device_type" property exists.
550 	 */
551 	if (pcieb_is_pcie_device_type(devi))
552 		(void) strcpy(device_type, "pciex");
553 	else
554 		(void) strcpy(device_type, "pci");
555 
556 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
557 	    "device_type", device_type);
558 
559 	/*
560 	 * Check whether the "ranges" property is present.
561 	 * Otherwise create the ranges property by reading
562 	 * the configuration registers
563 	 */
564 	if (ddi_prop_exists(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
565 	    "ranges") == 0) {
566 		pcieb_create_ranges_prop(devi, config_handle);
567 	}
568 
569 	if (PCIE_IS_PCI_BDG(bus_p))
570 		pcieb_set_pci_perf_parameters(devi, config_handle);
571 
572 #ifdef PX_PLX
573 	pcieb_attach_plx_workarounds(pcieb);
574 #endif /* PX_PLX */
575 
576 	if (pcie_init(devi, NULL) != DDI_SUCCESS)
577 		goto fail;
578 
579 	/* Intel PCIe-to-PCI 41210 bridge workaround -- if applicable */
580 	if (pcieb_disable_41210_wkarnd == 0 &&
581 	    PCIEB_IS_41210_BRIDGE(bus_p->bus_dev_ven_id))
582 		pcieb_41210_mps_wkrnd(devi);
583 
584 	/*
585 	 * Initialize interrupt handlers. Ignore return value.
586 	 */
587 	(void) pcieb_intr_attach(pcieb);
588 
589 	(void) pcie_hpintr_enable(devi);
590 
591 	(void) pcie_link_bw_enable(devi);
592 
593 	/* Do any platform specific workarounds needed at this time */
594 	pcieb_plat_attach_workaround(devi);
595 
596 	/*
597 	 * If this is a root port, we need to go through and at this point in
598 	 * time set up and initialize all fabric-wide settings such as the max
599 	 * packet size, tagging, etc. Since this will involve scanning the
600 	 * fabric, all error enabling and sw workarounds should be in place
601 	 * before doing this. For hotplug-capable bridges, this will happen
602 	 * again when a hotplug event occurs. See the pcie theory statement in
603 	 * uts/common/io/pciex/pcie.c for more information.
604 	 */
605 	if (PCIE_IS_RP(bus_p))
606 		pcie_fabric_setup(devi);
607 
608 	ddi_report_dev(devi);
609 	return (DDI_SUCCESS);
610 
611 fail:
612 	(void) pcieb_detach(devi, DDI_DETACH);
613 	return (DDI_FAILURE);
614 }
615 
616 static int
617 pcieb_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
618 {
619 	pcieb_devstate_t *pcieb;
620 	int error = DDI_SUCCESS;
621 
622 	switch (cmd) {
623 	case DDI_SUSPEND:
624 		error = pcie_pwr_suspend(devi);
625 		return (error);
626 
627 	case DDI_DETACH:
628 		break;
629 
630 	default:
631 		return (DDI_FAILURE);
632 	}
633 
634 	pcieb = ddi_get_soft_state(pcieb_state, ddi_get_instance(devi));
635 
636 	/* disable hotplug interrupt */
637 	(void) pcie_hpintr_disable(devi);
638 
639 	/* remove interrupt handlers */
640 	pcieb_intr_fini(pcieb);
641 
642 	/* uninitialize inband PCI-E HPC if present */
643 	(void) pcie_uninit(devi);
644 
645 	(void) ddi_prop_remove(DDI_DEV_T_NONE, devi, "device_type");
646 
647 	(void) ndi_prop_remove(DDI_DEV_T_NONE, pcieb->pcieb_dip,
648 	    "pcie_ce_mask");
649 
650 	if (pcieb->pcieb_init_flags & PCIEB_INIT_FM)
651 		pcieb_fm_fini(pcieb);
652 
653 	pcieb_pwr_teardown(devi);
654 	pwr_common_teardown(devi);
655 
656 	mutex_destroy(&pcieb->pcieb_peek_poke_mutex);
657 	mutex_destroy(&pcieb->pcieb_err_mutex);
658 	mutex_destroy(&pcieb->pcieb_mutex);
659 
660 	/*
661 	 * And finally free the per-pci soft state.
662 	 */
663 	ddi_soft_state_free(pcieb_state, ddi_get_instance(devi));
664 
665 	return (DDI_SUCCESS);
666 }
667 
668 static int
669 pcieb_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
670     off_t offset, off_t len, caddr_t *vaddrp)
671 {
672 	dev_info_t *pdip;
673 
674 	if (PCIE_IS_RP(PCIE_DIP2BUS(dip)) && mp->map_handlep != NULL) {
675 		ddi_acc_impl_t *hdlp =
676 		    (ddi_acc_impl_t *)(mp->map_handlep)->ah_platform_private;
677 
678 		pcieb_set_prot_scan(dip, hdlp);
679 	}
680 	pdip = (dev_info_t *)DEVI(dip)->devi_parent;
681 	return ((DEVI(pdip)->devi_ops->devo_bus_ops->bus_map)(pdip, rdip, mp,
682 	    offset, len, vaddrp));
683 }
684 
685 static int
686 pcieb_ctlops(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
687     void *arg, void *result)
688 {
689 	pci_regspec_t *drv_regp;
690 	int	reglen;
691 	int	rn;
692 	int	totreg;
693 	pcieb_devstate_t *pcieb = ddi_get_soft_state(pcieb_state,
694 	    ddi_get_instance(dip));
695 	struct detachspec *ds;
696 	struct attachspec *as;
697 
698 	switch (ctlop) {
699 	case DDI_CTLOPS_REPORTDEV:
700 		if (rdip == (dev_info_t *)0)
701 			return (DDI_FAILURE);
702 
703 		if (ddi_get_parent(rdip) == dip) {
704 			cmn_err(CE_CONT, "?PCIE-device: %s@%s, %s%d\n",
705 			    ddi_node_name(rdip), ddi_get_name_addr(rdip),
706 			    ddi_driver_name(rdip), ddi_get_instance(rdip));
707 		}
708 
709 		/* Pass it up for fabric sync */
710 		(void) ddi_ctlops(dip, rdip, ctlop, arg, result);
711 		return (DDI_SUCCESS);
712 
713 	case DDI_CTLOPS_INITCHILD:
714 		return (pcieb_initchild((dev_info_t *)arg));
715 
716 	case DDI_CTLOPS_UNINITCHILD:
717 		pcieb_uninitchild((dev_info_t *)arg);
718 		return (DDI_SUCCESS);
719 
720 	case DDI_CTLOPS_SIDDEV:
721 		return (DDI_SUCCESS);
722 
723 	case DDI_CTLOPS_REGSIZE:
724 	case DDI_CTLOPS_NREGS:
725 		if (rdip == (dev_info_t *)0)
726 			return (DDI_FAILURE);
727 		break;
728 
729 	case DDI_CTLOPS_PEEK:
730 	case DDI_CTLOPS_POKE:
731 		return (pcieb_plat_peekpoke(dip, rdip, ctlop, arg, result));
732 	case DDI_CTLOPS_ATTACH:
733 		if (!pcie_is_child(dip, rdip))
734 			return (DDI_SUCCESS);
735 
736 		as = (struct attachspec *)arg;
737 		switch (as->when) {
738 		case DDI_PRE:
739 			if (as->cmd == DDI_RESUME) {
740 				pcie_clear_errors(rdip);
741 				if (pcieb_plat_ctlops(rdip, ctlop, arg) !=
742 				    DDI_SUCCESS)
743 					return (DDI_FAILURE);
744 			}
745 
746 			if (as->cmd == DDI_ATTACH)
747 				return (pcie_pm_hold(dip));
748 
749 			return (DDI_SUCCESS);
750 
751 		case DDI_POST:
752 			if (as->cmd == DDI_ATTACH &&
753 			    as->result != DDI_SUCCESS) {
754 				/*
755 				 * Attach failed for the child device. The child
756 				 * driver may have made PM calls before the
757 				 * attach failed. pcie_pm_remove_child() should
758 				 * cleanup PM state and holds (if any)
759 				 * associated with the child device.
760 				 */
761 				return (pcie_pm_remove_child(dip, rdip));
762 			}
763 
764 			if (as->result == DDI_SUCCESS) {
765 				pf_init(rdip, (void *)pcieb->pcieb_fm_ibc,
766 				    as->cmd);
767 
768 				(void) pcieb_plat_ctlops(rdip, ctlop, arg);
769 			}
770 
771 			/*
772 			 * For empty hotplug-capable slots, we should explicitly
773 			 * disable the errors, so that we won't panic upon
774 			 * unsupported hotplug messages.
775 			 */
776 			if ((!ddi_prop_exists(DDI_DEV_T_ANY, rdip,
777 			    DDI_PROP_DONTPASS, "hotplug-capable")) ||
778 			    ddi_get_child(rdip)) {
779 				(void) pcie_postattach_child(rdip);
780 				return (DDI_SUCCESS);
781 			}
782 
783 			pcie_disable_errors(rdip);
784 
785 			return (DDI_SUCCESS);
786 		default:
787 			break;
788 		}
789 		return (DDI_SUCCESS);
790 
791 	case DDI_CTLOPS_DETACH:
792 		if (!pcie_is_child(dip, rdip))
793 			return (DDI_SUCCESS);
794 
795 		ds = (struct detachspec *)arg;
796 		switch (ds->when) {
797 		case DDI_PRE:
798 			pf_fini(rdip, ds->cmd);
799 			return (DDI_SUCCESS);
800 
801 		case DDI_POST:
802 			if (pcieb_plat_ctlops(rdip, ctlop, arg) != DDI_SUCCESS)
803 				return (DDI_FAILURE);
804 			if (ds->cmd == DDI_DETACH &&
805 			    ds->result == DDI_SUCCESS) {
806 				return (pcie_pm_remove_child(dip, rdip));
807 			}
808 			return (DDI_SUCCESS);
809 		default:
810 			break;
811 		}
812 		return (DDI_SUCCESS);
813 	default:
814 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
815 	}
816 
817 	*(int *)result = 0;
818 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip,
819 	    DDI_PROP_DONTPASS | DDI_PROP_CANSLEEP, "reg", (caddr_t)&drv_regp,
820 	    &reglen) != DDI_SUCCESS)
821 		return (DDI_FAILURE);
822 
823 	totreg = reglen / sizeof (pci_regspec_t);
824 	if (ctlop == DDI_CTLOPS_NREGS)
825 		*(int *)result = totreg;
826 	else if (ctlop == DDI_CTLOPS_REGSIZE) {
827 		rn = *(int *)arg;
828 		if (rn >= totreg) {
829 			kmem_free(drv_regp, reglen);
830 			return (DDI_FAILURE);
831 		}
832 
833 		*(off_t *)result = drv_regp[rn].pci_size_low |
834 		    ((uint64_t)drv_regp[rn].pci_size_hi << 32);
835 	}
836 
837 	kmem_free(drv_regp, reglen);
838 	return (DDI_SUCCESS);
839 }
840 
841 /*
842  * name_child
843  *
844  * This function is called from init_child to name a node. It is
845  * also passed as a callback for node merging functions.
846  *
847  * return value: DDI_SUCCESS, DDI_FAILURE
848  */
849 static int
850 pcieb_name_child(dev_info_t *child, char *name, int namelen)
851 {
852 	pci_regspec_t *pci_rp;
853 	uint_t device, func;
854 	char **unit_addr;
855 	uint_t n;
856 
857 	/*
858 	 * For .conf nodes, use unit-address property as name
859 	 */
860 	if (ndi_dev_is_persistent_node(child) == 0) {
861 		if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, child,
862 		    DDI_PROP_DONTPASS, "unit-address", &unit_addr, &n) !=
863 		    DDI_PROP_SUCCESS) {
864 			cmn_err(CE_WARN,
865 			    "cannot find unit-address in %s.conf",
866 			    ddi_driver_name(child));
867 			return (DDI_FAILURE);
868 		}
869 		if (n != 1 || *unit_addr == NULL || **unit_addr == 0) {
870 			cmn_err(CE_WARN, "unit-address property in %s.conf"
871 			    " not well-formed", ddi_driver_name(child));
872 			ddi_prop_free(unit_addr);
873 			return (DDI_FAILURE);
874 		}
875 		(void) snprintf(name, namelen, "%s", *unit_addr);
876 		ddi_prop_free(unit_addr);
877 		return (DDI_SUCCESS);
878 	}
879 
880 	/*
881 	 * Get the address portion of the node name based on
882 	 * the function and device number.
883 	 */
884 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, child,
885 	    DDI_PROP_DONTPASS, "reg", (int **)&pci_rp, &n) != DDI_SUCCESS) {
886 		return (DDI_FAILURE);
887 	}
888 
889 	/* copy the device identifications */
890 	device = PCI_REG_DEV_G(pci_rp[0].pci_phys_hi);
891 	func = PCI_REG_FUNC_G(pci_rp[0].pci_phys_hi);
892 
893 	if (pcie_ari_is_enabled(ddi_get_parent(child))
894 	    == PCIE_ARI_FORW_ENABLED) {
895 		func = (device << 3) | func;
896 		device = 0;
897 	}
898 
899 	if (func != 0)
900 		(void) snprintf(name, namelen, "%x,%x", device, func);
901 	else
902 		(void) snprintf(name, namelen, "%x", device);
903 
904 	ddi_prop_free(pci_rp);
905 	return (DDI_SUCCESS);
906 }
907 
908 static int
909 pcieb_initchild(dev_info_t *child)
910 {
911 	char name[MAXNAMELEN];
912 	int result = DDI_FAILURE;
913 	pcieb_devstate_t *pcieb =
914 	    (pcieb_devstate_t *)ddi_get_soft_state(pcieb_state,
915 	    ddi_get_instance(ddi_get_parent(child)));
916 
917 	/*
918 	 * Name the child
919 	 */
920 	if (pcieb_name_child(child, name, MAXNAMELEN) != DDI_SUCCESS) {
921 		result = DDI_FAILURE;
922 		goto done;
923 	}
924 	ddi_set_name_addr(child, name);
925 
926 	/*
927 	 * Pseudo nodes indicate a prototype node with per-instance
928 	 * properties to be merged into the real h/w device node.
929 	 * The interpretation of the unit-address is DD[,F]
930 	 * where DD is the device id and F is the function.
931 	 */
932 	if (ndi_dev_is_persistent_node(child) == 0) {
933 		extern int pci_allow_pseudo_children;
934 
935 		/*
936 		 * Try to merge the properties from this prototype
937 		 * node into real h/w nodes.
938 		 */
939 		if (ndi_merge_node(child, pcieb_name_child) == DDI_SUCCESS) {
940 			/*
941 			 * Merged ok - return failure to remove the node.
942 			 */
943 			ddi_set_name_addr(child, NULL);
944 			result = DDI_FAILURE;
945 			goto done;
946 		}
947 
948 		/* workaround for ddivs to run under PCI-E */
949 		if (pci_allow_pseudo_children) {
950 			result = DDI_SUCCESS;
951 			goto done;
952 		}
953 
954 		/*
955 		 * The child was not merged into a h/w node,
956 		 * but there's not much we can do with it other
957 		 * than return failure to cause the node to be removed.
958 		 */
959 		cmn_err(CE_WARN, "!%s@%s: %s.conf properties not merged",
960 		    ddi_driver_name(child), ddi_get_name_addr(child),
961 		    ddi_driver_name(child));
962 		ddi_set_name_addr(child, NULL);
963 		result = DDI_NOT_WELL_FORMED;
964 		goto done;
965 	}
966 
967 	/* platform specific initchild */
968 	pcieb_plat_initchild(child);
969 
970 	if (pcie_pm_hold(pcieb->pcieb_dip) != DDI_SUCCESS) {
971 		PCIEB_DEBUG(DBG_PWR, pcieb->pcieb_dip,
972 		    "INITCHILD: px_pm_hold failed\n");
973 		result = DDI_FAILURE;
974 		goto done;
975 	}
976 	/* Any return from here must call pcie_pm_release */
977 
978 	/*
979 	 * If configuration registers were previously saved by
980 	 * child (before it entered D3), then let the child do the
981 	 * restore to set up the config regs as it'll first need to
982 	 * power the device out of D3.
983 	 */
984 	if (ddi_prop_exists(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
985 	    "config-regs-saved-by-child") == 1) {
986 		PCIEB_DEBUG(DBG_PWR, ddi_get_parent(child),
987 		    "INITCHILD: config regs to be restored by child"
988 		    " for %s@%s\n", ddi_node_name(child),
989 		    ddi_get_name_addr(child));
990 
991 		result = DDI_SUCCESS;
992 		goto cleanup;
993 	}
994 
995 	PCIEB_DEBUG(DBG_PWR, ddi_get_parent(child),
996 	    "INITCHILD: config regs setup for %s@%s\n",
997 	    ddi_node_name(child), ddi_get_name_addr(child));
998 
999 	pcie_init_dom(child);
1000 
1001 	if (pcie_initchild(child) != DDI_SUCCESS) {
1002 		result = DDI_FAILURE;
1003 		pcie_fini_dom(child);
1004 		goto cleanup;
1005 	}
1006 
1007 #ifdef PX_PLX
1008 	if (pcieb_init_plx_workarounds(pcieb, child) == DDI_FAILURE) {
1009 		result = DDI_FAILURE;
1010 		pcie_fini_dom(child);
1011 		goto cleanup;
1012 	}
1013 #endif /* PX_PLX */
1014 
1015 	result = DDI_SUCCESS;
1016 cleanup:
1017 	pcie_pm_release(pcieb->pcieb_dip);
1018 done:
1019 	return (result);
1020 }
1021 
1022 static void
1023 pcieb_uninitchild(dev_info_t *dip)
1024 {
1025 
1026 	pcie_uninitchild(dip);
1027 
1028 	pcieb_plat_uninitchild(dip);
1029 
1030 	ddi_set_name_addr(dip, NULL);
1031 
1032 	/*
1033 	 * Strip the node to properly convert it back to prototype form
1034 	 */
1035 	ddi_remove_minor_node(dip, NULL);
1036 
1037 	ddi_prop_remove_all(dip);
1038 }
1039 
1040 static boolean_t
1041 pcieb_is_pcie_device_type(dev_info_t *dip)
1042 {
1043 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
1044 
1045 	if (PCIE_IS_SW(bus_p) || PCIE_IS_RP(bus_p) || PCIE_IS_PCI2PCIE(bus_p))
1046 		return (B_TRUE);
1047 
1048 	return (B_FALSE);
1049 }
1050 
1051 static int
1052 pcieb_intr_attach(pcieb_devstate_t *pcieb)
1053 {
1054 	int			intr_types;
1055 	dev_info_t		*dip = pcieb->pcieb_dip;
1056 
1057 	/* Allow platform specific code to do any initialization first */
1058 	pcieb_plat_intr_attach(pcieb);
1059 
1060 	/*
1061 	 * Initialize interrupt handlers.
1062 	 * If both MSI and FIXED are supported, try to attach MSI first.
1063 	 * If MSI fails for any reason, then try FIXED, but only allow one
1064 	 * type to be attached.
1065 	 */
1066 	if (ddi_intr_get_supported_types(dip, &intr_types) != DDI_SUCCESS) {
1067 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_supported_types"
1068 		    " failed\n");
1069 		goto FAIL;
1070 	}
1071 
1072 	if ((intr_types & DDI_INTR_TYPE_MSI) &&
1073 	    (pcieb_msi_supported(dip) == DDI_SUCCESS)) {
1074 		if (pcieb_intr_init(pcieb, DDI_INTR_TYPE_MSI) == DDI_SUCCESS)
1075 			intr_types = DDI_INTR_TYPE_MSI;
1076 		else {
1077 			PCIEB_DEBUG(DBG_ATTACH, dip, "Unable to attach MSI"
1078 			    " handler\n");
1079 		}
1080 	}
1081 
1082 	if (intr_types != DDI_INTR_TYPE_MSI) {
1083 		/*
1084 		 * MSIs are not supported or MSI initialization failed. For Root
1085 		 * Ports mark this so error handling might try to fallback to
1086 		 * some other mechanism if available (machinecheck etc.).
1087 		 */
1088 		if (PCIE_IS_RP(PCIE_DIP2UPBUS(dip)))
1089 			pcieb->pcieb_no_aer_msi = B_TRUE;
1090 	}
1091 
1092 	if (intr_types & DDI_INTR_TYPE_FIXED) {
1093 		if (pcieb_intr_init(pcieb, DDI_INTR_TYPE_FIXED) !=
1094 		    DDI_SUCCESS) {
1095 			PCIEB_DEBUG(DBG_ATTACH, dip,
1096 			    "Unable to attach INTx handler\n");
1097 			goto FAIL;
1098 		}
1099 	}
1100 	return (DDI_SUCCESS);
1101 
1102 FAIL:
1103 	return (DDI_FAILURE);
1104 }
1105 
1106 /*
1107  * This function initializes internally generated interrupts only.
1108  * It does not affect any interrupts generated by downstream devices
1109  * or the forwarding of them.
1110  *
1111  * Enable Device Specific Interrupts or Hotplug features here.
1112  * Enabling features may change how many interrupts are requested
1113  * by the device.  If features are not enabled first, the
1114  * device might not ask for any interrupts.
1115  */
1116 static int
1117 pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type)
1118 {
1119 	dev_info_t	*dip = pcieb->pcieb_dip;
1120 	int		nintrs, request, count, x;
1121 	int		intr_cap = 0;
1122 	int		inum = 0;
1123 	int		ret;
1124 	pcie_bus_t	*bus_p = PCIE_DIP2UPBUS(dip);
1125 	uint16_t	vendorid = bus_p->bus_dev_ven_id & 0xFFFF;
1126 	boolean_t	is_hp = B_FALSE;
1127 	boolean_t	is_pme = B_FALSE;
1128 	boolean_t	is_lbw = B_FALSE;
1129 
1130 	PCIEB_DEBUG(DBG_ATTACH, dip, "pcieb_intr_init: Attaching %s handler\n",
1131 	    (intr_type == DDI_INTR_TYPE_MSI) ? "MSI" : "INTx");
1132 
1133 	request = 0;
1134 	if (PCIE_IS_HOTPLUG_ENABLED(dip)) {
1135 		is_hp = B_TRUE;
1136 	}
1137 
1138 	if ((intr_type == DDI_INTR_TYPE_MSI) && PCIE_IS_RP(bus_p) &&
1139 	    (vendorid == NVIDIA_VENDOR_ID)) {
1140 		is_pme = B_TRUE;
1141 	}
1142 
1143 	if (intr_type == DDI_INTR_TYPE_MSI && pcie_link_bw_supported(dip)) {
1144 		is_lbw = B_TRUE;
1145 	}
1146 
1147 	/*
1148 	 * The hot-plug, link bandwidth, and power management events all are
1149 	 * based on the PCI Express capability. Therefore, they all share their
1150 	 * own interrupt.
1151 	 */
1152 	if (is_hp || is_pme || is_lbw) {
1153 		request++;
1154 	}
1155 
1156 	/*
1157 	 * If this device is a root port, which means it can have MSI interrupts
1158 	 * enabled for AERs, then we need to request one.
1159 	 */
1160 	if (intr_type == DDI_INTR_TYPE_MSI) {
1161 		if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) {
1162 			request++;
1163 		}
1164 	}
1165 
1166 	if (request == 0)
1167 		return (DDI_SUCCESS);
1168 
1169 	/*
1170 	 * Get number of supported interrupts.
1171 	 *
1172 	 * Several Bridges/Switches will not have this property set, resulting
1173 	 * in a FAILURE, if the device is not configured in a way that
1174 	 * interrupts are needed. (eg. hotplugging)
1175 	 */
1176 	ret = ddi_intr_get_nintrs(dip, intr_type, &nintrs);
1177 	if ((ret != DDI_SUCCESS) || (nintrs == 0)) {
1178 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_nintrs ret:%d"
1179 		    " req:%d\n", ret, nintrs);
1180 		return (DDI_FAILURE);
1181 	}
1182 
1183 	PCIEB_DEBUG(DBG_ATTACH, dip, "bdf 0x%x: ddi_intr_get_nintrs: nintrs %d",
1184 	    " request %d\n", bus_p->bus_bdf, nintrs, request);
1185 
1186 	if (request > nintrs)
1187 		request = nintrs;
1188 
1189 	/* Allocate an array of interrupt handlers */
1190 	pcieb->pcieb_htable_size = sizeof (ddi_intr_handle_t) * request;
1191 	pcieb->pcieb_htable = kmem_zalloc(pcieb->pcieb_htable_size,
1192 	    KM_SLEEP);
1193 	pcieb->pcieb_init_flags |= PCIEB_INIT_HTABLE;
1194 
1195 	ret = ddi_intr_alloc(dip, pcieb->pcieb_htable, intr_type, inum,
1196 	    request, &count, DDI_INTR_ALLOC_NORMAL);
1197 	if ((ret != DDI_SUCCESS) || (count == 0)) {
1198 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_alloc() ret: %d ask: %d"
1199 		    " actual: %d\n", ret, request, count);
1200 		goto FAIL;
1201 	}
1202 	pcieb->pcieb_init_flags |= PCIEB_INIT_ALLOC;
1203 
1204 	/* Save the actual number of interrupts allocated */
1205 	pcieb->pcieb_intr_count = count;
1206 	if (count < request) {
1207 		PCIEB_DEBUG(DBG_ATTACH, dip, "bdf 0%x: Requested Intr: %d"
1208 		    " Received: %d\n", bus_p->bus_bdf, request, count);
1209 	}
1210 
1211 	/*
1212 	 * NVidia (MCP55 and other) chipsets have a errata that if the number
1213 	 * of requested MSI intrs is not allocated we have to fall back to INTx.
1214 	 */
1215 	if (intr_type == DDI_INTR_TYPE_MSI) {
1216 		if (PCIE_IS_RP(bus_p) && (vendorid == NVIDIA_VENDOR_ID)) {
1217 			if (request != count)
1218 				goto FAIL;
1219 		}
1220 	}
1221 
1222 	/* Get interrupt priority */
1223 	ret = ddi_intr_get_pri(pcieb->pcieb_htable[0],
1224 	    &pcieb->pcieb_intr_priority);
1225 	if (ret != DDI_SUCCESS) {
1226 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_pri() ret: %d\n",
1227 		    ret);
1228 		goto FAIL;
1229 	}
1230 
1231 	if (pcieb->pcieb_intr_priority >= LOCK_LEVEL) {
1232 		pcieb->pcieb_intr_priority = LOCK_LEVEL - 1;
1233 		ret = ddi_intr_set_pri(pcieb->pcieb_htable[0],
1234 		    pcieb->pcieb_intr_priority);
1235 		if (ret != DDI_SUCCESS) {
1236 			PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_set_pri() ret:"
1237 			" %d\n", ret);
1238 
1239 			goto FAIL;
1240 		}
1241 	}
1242 
1243 	mutex_init(&pcieb->pcieb_intr_mutex, NULL, MUTEX_DRIVER, NULL);
1244 
1245 	pcieb->pcieb_init_flags |= PCIEB_INIT_MUTEX;
1246 
1247 	for (count = 0; count < pcieb->pcieb_intr_count; count++) {
1248 		ret = ddi_intr_add_handler(pcieb->pcieb_htable[count],
1249 		    pcieb_intr_handler, (caddr_t)pcieb,
1250 		    (caddr_t)(uintptr_t)(inum + count));
1251 
1252 		if (ret != DDI_SUCCESS) {
1253 			PCIEB_DEBUG(DBG_ATTACH, dip, "Cannot add "
1254 			    "interrupt(%d)\n", ret);
1255 			break;
1256 		}
1257 	}
1258 
1259 	/* If unsucessful, remove the added handlers */
1260 	if (ret != DDI_SUCCESS) {
1261 		for (x = 0; x < count; x++) {
1262 			(void) ddi_intr_remove_handler(pcieb->pcieb_htable[x]);
1263 		}
1264 		goto FAIL;
1265 	}
1266 
1267 	pcieb->pcieb_init_flags |= PCIEB_INIT_HANDLER;
1268 
1269 	(void) ddi_intr_get_cap(pcieb->pcieb_htable[0], &intr_cap);
1270 
1271 	/*
1272 	 * Get this intr lock because we are not quite ready to handle
1273 	 * interrupts immediately after enabling it. The MSI multi register
1274 	 * gets programmed in ddi_intr_enable after which we need to get the
1275 	 * MSI offsets for Hotplug/AER.
1276 	 */
1277 	mutex_enter(&pcieb->pcieb_intr_mutex);
1278 
1279 	if (intr_cap & DDI_INTR_FLAG_BLOCK) {
1280 		(void) ddi_intr_block_enable(pcieb->pcieb_htable,
1281 		    pcieb->pcieb_intr_count);
1282 		pcieb->pcieb_init_flags |= PCIEB_INIT_BLOCK;
1283 	} else {
1284 		for (count = 0; count < pcieb->pcieb_intr_count; count++) {
1285 			(void) ddi_intr_enable(pcieb->pcieb_htable[count]);
1286 		}
1287 	}
1288 	pcieb->pcieb_init_flags |= PCIEB_INIT_ENABLE;
1289 
1290 	/* Save the interrupt type */
1291 	pcieb->pcieb_intr_type = intr_type;
1292 
1293 	/* Get the MSI offset for hotplug/PME from the PCIe cap reg */
1294 	if (intr_type == DDI_INTR_TYPE_MSI) {
1295 		uint16_t pcie_msi_off;
1296 		pcie_msi_off = PCI_CAP_GET16(bus_p->bus_cfg_hdl, 0,
1297 		    bus_p->bus_pcie_off, PCIE_PCIECAP) &
1298 		    PCIE_PCIECAP_INT_MSG_NUM;
1299 
1300 		if (pcie_msi_off >= count) {
1301 			PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %u in PCIe "
1302 			    "cap > max allocated %d\n", pcie_msi_off, count);
1303 			mutex_exit(&pcieb->pcieb_intr_mutex);
1304 			goto FAIL;
1305 		}
1306 
1307 		if (is_hp) {
1308 			pcieb->pcieb_isr_tab[pcie_msi_off] |= PCIEB_INTR_SRC_HP;
1309 		}
1310 
1311 		if (is_pme) {
1312 			pcieb->pcieb_isr_tab[pcie_msi_off] |=
1313 			    PCIEB_INTR_SRC_PME;
1314 		}
1315 
1316 		if (is_lbw) {
1317 			pcieb->pcieb_isr_tab[pcie_msi_off] |=
1318 			    PCIEB_INTR_SRC_LBW;
1319 		}
1320 	} else {
1321 		/* INTx handles only Hotplug interrupts */
1322 		if (is_hp)
1323 			pcieb->pcieb_isr_tab[0] |= PCIEB_INTR_SRC_HP;
1324 	}
1325 
1326 
1327 	/*
1328 	 * Get the MSI offset for errors from the AER Root Error status
1329 	 * register.
1330 	 */
1331 	if ((intr_type == DDI_INTR_TYPE_MSI) && PCIE_IS_RP(bus_p)) {
1332 		if (PCIE_HAS_AER(bus_p)) {
1333 			int aer_msi_off;
1334 			aer_msi_off = (PCI_XCAP_GET32(bus_p->bus_cfg_hdl, 0,
1335 			    bus_p->bus_aer_off, PCIE_AER_RE_STS) >>
1336 			    PCIE_AER_RE_STS_MSG_NUM_SHIFT) &
1337 			    PCIE_AER_RE_STS_MSG_NUM_MASK;
1338 
1339 			if (aer_msi_off >= count) {
1340 				PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %d in"
1341 				    " AER cap > max allocated %d\n",
1342 				    aer_msi_off, count);
1343 				mutex_exit(&pcieb->pcieb_intr_mutex);
1344 				goto FAIL;
1345 			}
1346 			pcieb->pcieb_isr_tab[aer_msi_off] |= PCIEB_INTR_SRC_AER;
1347 		} else {
1348 			/*
1349 			 * This RP does not have AER. Fallback to the
1350 			 * SERR+Machinecheck approach if available.
1351 			 */
1352 			pcieb->pcieb_no_aer_msi = B_TRUE;
1353 		}
1354 	}
1355 
1356 	mutex_exit(&pcieb->pcieb_intr_mutex);
1357 	return (DDI_SUCCESS);
1358 
1359 FAIL:
1360 	pcieb_intr_fini(pcieb);
1361 	return (DDI_FAILURE);
1362 }
1363 
1364 static void
1365 pcieb_intr_fini(pcieb_devstate_t *pcieb)
1366 {
1367 	int x;
1368 	int count = pcieb->pcieb_intr_count;
1369 	int flags = pcieb->pcieb_init_flags;
1370 
1371 	if ((flags & PCIEB_INIT_ENABLE) &&
1372 	    (flags & PCIEB_INIT_BLOCK)) {
1373 		(void) ddi_intr_block_disable(pcieb->pcieb_htable, count);
1374 		flags &= ~(PCIEB_INIT_ENABLE |
1375 		    PCIEB_INIT_BLOCK);
1376 	}
1377 
1378 	if (flags & PCIEB_INIT_MUTEX)
1379 		mutex_destroy(&pcieb->pcieb_intr_mutex);
1380 
1381 	for (x = 0; x < count; x++) {
1382 		if (flags & PCIEB_INIT_ENABLE)
1383 			(void) ddi_intr_disable(pcieb->pcieb_htable[x]);
1384 
1385 		if (flags & PCIEB_INIT_HANDLER)
1386 			(void) ddi_intr_remove_handler(pcieb->pcieb_htable[x]);
1387 
1388 		if (flags & PCIEB_INIT_ALLOC)
1389 			(void) ddi_intr_free(pcieb->pcieb_htable[x]);
1390 	}
1391 
1392 	flags &= ~(PCIEB_INIT_ENABLE | PCIEB_INIT_HANDLER | PCIEB_INIT_ALLOC |
1393 	    PCIEB_INIT_MUTEX);
1394 
1395 	if (flags & PCIEB_INIT_HTABLE)
1396 		kmem_free(pcieb->pcieb_htable, pcieb->pcieb_htable_size);
1397 
1398 	flags &= ~PCIEB_INIT_HTABLE;
1399 
1400 	pcieb->pcieb_init_flags &= flags;
1401 }
1402 
1403 /*
1404  * Checks if this device needs MSIs enabled or not.
1405  */
1406 /*ARGSUSED*/
1407 static int
1408 pcieb_msi_supported(dev_info_t *dip)
1409 {
1410 	return ((pcieb_enable_msi && pcieb_plat_msi_supported(dip)) ?
1411 	    DDI_SUCCESS: DDI_FAILURE);
1412 }
1413 
1414 /*ARGSUSED*/
1415 static int
1416 pcieb_fm_init_child(dev_info_t *dip, dev_info_t *tdip, int cap,
1417     ddi_iblock_cookie_t *ibc)
1418 {
1419 	pcieb_devstate_t  *pcieb = ddi_get_soft_state(pcieb_state,
1420 	    ddi_get_instance(dip));
1421 
1422 	ASSERT(ibc != NULL);
1423 	*ibc = pcieb->pcieb_fm_ibc;
1424 
1425 	return (DEVI(dip)->devi_fmhdl->fh_cap | DDI_FM_ACCCHK_CAPABLE |
1426 	    DDI_FM_DMACHK_CAPABLE);
1427 }
1428 
1429 static int
1430 pcieb_fm_init(pcieb_devstate_t *pcieb_p)
1431 {
1432 	dev_info_t	*dip = pcieb_p->pcieb_dip;
1433 	int		fm_cap = DDI_FM_EREPORT_CAPABLE;
1434 
1435 	/*
1436 	 * Request our capability level and get our parents capability
1437 	 * and ibc.
1438 	 */
1439 	ddi_fm_init(dip, &fm_cap, &pcieb_p->pcieb_fm_ibc);
1440 
1441 	return (DDI_SUCCESS);
1442 }
1443 
1444 /*
1445  * Breakdown our FMA resources
1446  */
1447 static void
1448 pcieb_fm_fini(pcieb_devstate_t *pcieb_p)
1449 {
1450 	/*
1451 	 * Clean up allocated fm structures
1452 	 */
1453 	ddi_fm_fini(pcieb_p->pcieb_dip);
1454 }
1455 
1456 static int
1457 pcieb_open(dev_t *devp, int flags, int otyp, cred_t *credp)
1458 {
1459 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(*devp));
1460 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1461 	int	rv;
1462 
1463 	if (pcieb == NULL)
1464 		return (ENXIO);
1465 
1466 	mutex_enter(&pcieb->pcieb_mutex);
1467 	rv = pcie_open(pcieb->pcieb_dip, devp, flags, otyp, credp);
1468 	mutex_exit(&pcieb->pcieb_mutex);
1469 
1470 	return (rv);
1471 }
1472 
1473 static int
1474 pcieb_close(dev_t dev, int flags, int otyp, cred_t *credp)
1475 {
1476 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(dev));
1477 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1478 	int	rv;
1479 
1480 	if (pcieb == NULL)
1481 		return (ENXIO);
1482 
1483 	mutex_enter(&pcieb->pcieb_mutex);
1484 	rv = pcie_close(pcieb->pcieb_dip, dev, flags, otyp, credp);
1485 	mutex_exit(&pcieb->pcieb_mutex);
1486 
1487 	return (rv);
1488 }
1489 
1490 static int
1491 pcieb_ioctl_retrain(pcieb_devstate_t *pcieb, cred_t *credp)
1492 {
1493 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1494 
1495 	if (drv_priv(credp) != 0) {
1496 		return (EPERM);
1497 	}
1498 
1499 	if (!PCIE_IS_PCIE(bus_p)) {
1500 		return (ENOTSUP);
1501 	}
1502 
1503 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1504 		return (ENOTSUP);
1505 	}
1506 
1507 	return (pcie_link_retrain(pcieb->pcieb_dip));
1508 }
1509 
1510 static int
1511 pcieb_ioctl_get_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode,
1512     cred_t *credp)
1513 {
1514 	pcie_bus_t			*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1515 	pcieb_ioctl_target_speed_t	pits;
1516 
1517 	if (drv_priv(credp) != 0) {
1518 		return (EPERM);
1519 	}
1520 
1521 	if (!PCIE_IS_PCIE(bus_p)) {
1522 		return (ENOTSUP);
1523 	}
1524 
1525 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1526 		return (ENOTSUP);
1527 	}
1528 
1529 	pits.pits_flags = 0;
1530 	pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN;
1531 
1532 	mutex_enter(&bus_p->bus_speed_mutex);
1533 	if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) {
1534 		pits.pits_flags |= PCIEB_FLAGS_ADMIN_SET;
1535 	}
1536 	switch (bus_p->bus_target_speed) {
1537 	case PCIE_LINK_SPEED_2_5:
1538 		pits.pits_speed = PCIEB_LINK_SPEED_GEN1;
1539 		break;
1540 	case PCIE_LINK_SPEED_5:
1541 		pits.pits_speed = PCIEB_LINK_SPEED_GEN2;
1542 		break;
1543 	case PCIE_LINK_SPEED_8:
1544 		pits.pits_speed = PCIEB_LINK_SPEED_GEN3;
1545 		break;
1546 	case PCIE_LINK_SPEED_16:
1547 		pits.pits_speed = PCIEB_LINK_SPEED_GEN4;
1548 		break;
1549 	case PCIE_LINK_SPEED_32:
1550 		pits.pits_speed = PCIEB_LINK_SPEED_GEN5;
1551 		break;
1552 	case PCIE_LINK_SPEED_64:
1553 		pits.pits_speed = PCIEB_LINK_SPEED_GEN6;
1554 		break;
1555 	default:
1556 		pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN;
1557 		break;
1558 	}
1559 	mutex_exit(&bus_p->bus_speed_mutex);
1560 
1561 	if (ddi_copyout(&pits, (void *)arg, sizeof (pits),
1562 	    mode & FKIOCTL) != 0) {
1563 		return (EFAULT);
1564 	}
1565 
1566 	return (0);
1567 }
1568 
1569 static int
1570 pcieb_ioctl_set_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode,
1571     cred_t *credp)
1572 {
1573 	pcie_bus_t			*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1574 	pcieb_ioctl_target_speed_t	pits;
1575 	pcie_link_speed_t		speed;
1576 
1577 	if (drv_priv(credp) != 0) {
1578 		return (EPERM);
1579 	}
1580 
1581 	if (!PCIE_IS_PCIE(bus_p)) {
1582 		return (ENOTSUP);
1583 	}
1584 
1585 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1586 		return (ENOTSUP);
1587 	}
1588 
1589 	if (ddi_copyin((void *)arg, &pits, sizeof (pits),
1590 	    mode & FKIOCTL) != 0) {
1591 		return (EFAULT);
1592 	}
1593 
1594 	if (pits.pits_flags != 0) {
1595 		return (EINVAL);
1596 	}
1597 
1598 	switch (pits.pits_speed) {
1599 	case PCIEB_LINK_SPEED_GEN1:
1600 		speed = PCIE_LINK_SPEED_2_5;
1601 		break;
1602 	case PCIEB_LINK_SPEED_GEN2:
1603 		speed = PCIE_LINK_SPEED_5;
1604 		break;
1605 	case PCIEB_LINK_SPEED_GEN3:
1606 		speed = PCIE_LINK_SPEED_8;
1607 		break;
1608 	case PCIEB_LINK_SPEED_GEN4:
1609 		speed = PCIE_LINK_SPEED_16;
1610 		break;
1611 	case PCIEB_LINK_SPEED_GEN5:
1612 		speed = PCIE_LINK_SPEED_32;
1613 		break;
1614 	case PCIEB_LINK_SPEED_GEN6:
1615 		speed = PCIE_LINK_SPEED_64;
1616 		break;
1617 	default:
1618 		return (EINVAL);
1619 	}
1620 
1621 	return (pcie_link_set_target(pcieb->pcieb_dip, speed));
1622 }
1623 
1624 static int
1625 pcieb_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
1626     int *rvalp)
1627 {
1628 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(dev));
1629 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1630 	int		rv;
1631 
1632 	if (pcieb == NULL)
1633 		return (ENXIO);
1634 
1635 	/*
1636 	 * Check if this is one of the commands that the bridge driver natively
1637 	 * understands. There are only a handful of such private ioctls defined
1638 	 * in pcieb_ioctl.h. Otherwise, this ioctl should be handled by the
1639 	 * general pcie driver.
1640 	 */
1641 	switch (cmd) {
1642 	case PCIEB_IOCTL_RETRAIN:
1643 		rv = pcieb_ioctl_retrain(pcieb, credp);
1644 		break;
1645 	case PCIEB_IOCTL_GET_TARGET_SPEED:
1646 		rv = pcieb_ioctl_get_speed(pcieb, arg, mode, credp);
1647 		break;
1648 	case PCIEB_IOCTL_SET_TARGET_SPEED:
1649 		rv = pcieb_ioctl_set_speed(pcieb, arg, mode, credp);
1650 		break;
1651 	default:
1652 		/* To handle devctl and hotplug related ioctls */
1653 		rv = pcie_ioctl(pcieb->pcieb_dip, dev, cmd, arg, mode, credp,
1654 		    rvalp);
1655 		break;
1656 	}
1657 
1658 	return (rv);
1659 }
1660 
1661 /*
1662  * Common interrupt handler for hotplug, PME and errors.
1663  */
1664 static uint_t
1665 pcieb_intr_handler(caddr_t arg1, caddr_t arg2)
1666 {
1667 	pcieb_devstate_t *pcieb_p = (pcieb_devstate_t *)arg1;
1668 	dev_info_t	*dip = pcieb_p->pcieb_dip;
1669 	ddi_fm_error_t	derr;
1670 	int		sts = 0;
1671 	int		ret = DDI_INTR_UNCLAIMED;
1672 	int		isrc;
1673 
1674 	if (!(pcieb_p->pcieb_init_flags & PCIEB_INIT_ENABLE))
1675 		goto FAIL;
1676 
1677 	mutex_enter(&pcieb_p->pcieb_intr_mutex);
1678 	isrc = pcieb_p->pcieb_isr_tab[(int)(uintptr_t)arg2];
1679 	mutex_exit(&pcieb_p->pcieb_intr_mutex);
1680 
1681 	PCIEB_DEBUG(DBG_INTR, dip, "Received intr number %d\n",
1682 	    (int)(uintptr_t)arg2);
1683 
1684 	if (isrc == PCIEB_INTR_SRC_UNKNOWN)
1685 		goto FAIL;
1686 
1687 	if (isrc & (PCIEB_INTR_SRC_HP | PCIEB_INTR_SRC_LBW))
1688 		ret = pcie_intr(dip);
1689 
1690 	if (isrc & PCIEB_INTR_SRC_PME)
1691 		ret = DDI_INTR_CLAIMED;
1692 
1693 	/* AER Error */
1694 	if (isrc & PCIEB_INTR_SRC_AER) {
1695 		/*
1696 		 *  If MSI is shared with PME/hotplug then check Root Error
1697 		 *  Status Reg before claiming it. For now it's ok since
1698 		 *  we know we get 2 MSIs.
1699 		 */
1700 		ret = DDI_INTR_CLAIMED;
1701 		bzero(&derr, sizeof (ddi_fm_error_t));
1702 		derr.fme_version = DDI_FME_VERSION;
1703 		mutex_enter(&pcieb_p->pcieb_peek_poke_mutex);
1704 		mutex_enter(&pcieb_p->pcieb_err_mutex);
1705 
1706 		pf_eh_enter(PCIE_DIP2BUS(dip));
1707 		PCIE_ROOT_EH_SRC(PCIE_DIP2PFD(dip))->intr_type =
1708 		    PF_INTR_TYPE_AER;
1709 
1710 		if ((DEVI(dip)->devi_fmhdl->fh_cap) & DDI_FM_EREPORT_CAPABLE)
1711 			sts = pf_scan_fabric(dip, &derr, NULL);
1712 		pf_eh_exit(PCIE_DIP2BUS(dip));
1713 
1714 		mutex_exit(&pcieb_p->pcieb_err_mutex);
1715 		mutex_exit(&pcieb_p->pcieb_peek_poke_mutex);
1716 		if (pcieb_die & sts)
1717 			fm_panic("%s-%d: PCI(-X) Express Fatal Error. (0x%x)",
1718 			    ddi_driver_name(dip), ddi_get_instance(dip), sts);
1719 	}
1720 FAIL:
1721 	return (ret);
1722 }
1723 
1724 /*
1725  * Some PCI-X to PCI-E bridges do not support full 64-bit addressing on the
1726  * PCI-X side of the bridge.  We build a special version of this driver for
1727  * those bridges, which uses PCIEB_ADDR_LIMIT_LO and/or PCIEB_ADDR_LIMIT_HI
1728  * to define the range of values which the chip can handle.  The code below
1729  * then clamps the DMA address range supplied by the driver, preventing the
1730  * PCI-E nexus driver from allocating any memory the bridge can't deal
1731  * with.
1732  */
1733 static int
1734 pcieb_dma_allochdl(dev_info_t *dip, dev_info_t *rdip,
1735     ddi_dma_attr_t *attr_p, int (*waitfp)(caddr_t), caddr_t arg,
1736     ddi_dma_handle_t *handlep)
1737 {
1738 	int		ret;
1739 #ifdef	PCIEB_BCM
1740 	uint64_t	lim;
1741 
1742 	/*
1743 	 * If the leaf device's limits are outside than what the Broadcom
1744 	 * bridge can handle, we need to clip the values passed up the chain.
1745 	 */
1746 	lim = attr_p->dma_attr_addr_lo;
1747 	attr_p->dma_attr_addr_lo = MAX(lim, PCIEB_ADDR_LIMIT_LO);
1748 
1749 	lim = attr_p->dma_attr_addr_hi;
1750 	attr_p->dma_attr_addr_hi = MIN(lim, PCIEB_ADDR_LIMIT_HI);
1751 
1752 #endif	/* PCIEB_BCM */
1753 
1754 	/*
1755 	 * This is a software workaround to fix the Broadcom 5714/5715 PCIe-PCI
1756 	 * bridge prefetch bug. Intercept the DMA alloc handle request and set
1757 	 * PX_DMAI_FLAGS_MAP_BUFZONE flag in the handle. If this flag is set,
1758 	 * the px nexus driver will allocate an extra page & make it valid one,
1759 	 * for any DVMA request that comes from any of the Broadcom bridge child
1760 	 * devices.
1761 	 */
1762 	if ((ret = ddi_dma_allochdl(dip, rdip, attr_p, waitfp, arg,
1763 	    handlep)) == DDI_SUCCESS) {
1764 		ddi_dma_impl_t	*mp = (ddi_dma_impl_t *)*handlep;
1765 #ifdef	PCIEB_BCM
1766 		mp->dmai_inuse |= PX_DMAI_FLAGS_MAP_BUFZONE;
1767 #endif	/* PCIEB_BCM */
1768 		/*
1769 		 * For a given rdip, update mp->dmai_bdf with the bdf value
1770 		 * of pcieb's immediate child or secondary bus-id of the
1771 		 * PCIe2PCI bridge.
1772 		 */
1773 		mp->dmai_minxfer = pcie_get_bdf_for_dma_xfer(dip, rdip);
1774 	}
1775 
1776 	return (ret);
1777 }
1778 
1779 /*
1780  * FDVMA feature is not supported for any child device of Broadcom 5714/5715
1781  * PCIe-PCI bridge due to prefetch bug. Return failure immediately, so that
1782  * these drivers will switch to regular DVMA path.
1783  */
1784 /*ARGSUSED*/
1785 static int
1786 pcieb_dma_mctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
1787     enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1788     uint_t cache_flags)
1789 {
1790 	int	ret;
1791 
1792 #ifdef	PCIEB_BCM
1793 	if (cmd == DDI_DMA_RESERVE)
1794 		return (DDI_FAILURE);
1795 #endif	/* PCIEB_BCM */
1796 
1797 	if (((ret = ddi_dma_mctl(dip, rdip, handle, cmd, offp, lenp, objp,
1798 	    cache_flags)) == DDI_SUCCESS) && (cmd == DDI_DMA_RESERVE)) {
1799 		ddi_dma_impl_t	*mp = (ddi_dma_impl_t *)*objp;
1800 
1801 		/*
1802 		 * For a given rdip, update mp->dmai_bdf with the bdf value
1803 		 * of pcieb's immediate child or secondary bus-id of the
1804 		 * PCIe2PCI bridge.
1805 		 */
1806 		mp->dmai_minxfer = pcie_get_bdf_for_dma_xfer(dip, rdip);
1807 	}
1808 
1809 	return (ret);
1810 }
1811 
1812 static int
1813 pcieb_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
1814     ddi_intr_handle_impl_t *hdlp, void *result)
1815 {
1816 	return (pcieb_plat_intr_ops(dip, rdip, intr_op, hdlp, result));
1817 
1818 }
1819 
1820 /*
1821  * Power management related initialization specific to pcieb.
1822  * Called by pcieb_attach()
1823  */
1824 static int
1825 pcieb_pwr_setup(dev_info_t *dip)
1826 {
1827 	char *comp_array[5];
1828 	int i;
1829 	ddi_acc_handle_t conf_hdl;
1830 	uint16_t pmcap, cap_ptr;
1831 	pcie_pwr_t *pwr_p;
1832 
1833 	/* Some platforms/devices may choose to disable PM */
1834 	if (pcieb_plat_pwr_disable(dip)) {
1835 		(void) pcieb_pwr_disable(dip);
1836 		return (DDI_SUCCESS);
1837 	}
1838 
1839 	ASSERT(PCIE_PMINFO(dip));
1840 	pwr_p = PCIE_NEXUS_PMINFO(dip);
1841 	ASSERT(pwr_p);
1842 
1843 	/* Code taken from pci_pci driver */
1844 	if (pci_config_setup(dip, &pwr_p->pwr_conf_hdl) != DDI_SUCCESS) {
1845 		PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_setup: pci_config_setup "
1846 		    "failed\n");
1847 		return (DDI_FAILURE);
1848 	}
1849 	conf_hdl = pwr_p->pwr_conf_hdl;
1850 
1851 	/*
1852 	 * Walk the capabilities searching for a PM entry.
1853 	 */
1854 	if ((PCI_CAP_LOCATE(conf_hdl, PCI_CAP_ID_PM, &cap_ptr)) ==
1855 	    DDI_FAILURE) {
1856 		PCIEB_DEBUG(DBG_PWR, dip, "switch/bridge does not support PM. "
1857 		    " PCI PM data structure not found in config header\n");
1858 		pci_config_teardown(&conf_hdl);
1859 		return (DDI_SUCCESS);
1860 	}
1861 	/*
1862 	 * Save offset to pmcsr for future references.
1863 	 */
1864 	pwr_p->pwr_pmcsr_offset = cap_ptr + PCI_PMCSR;
1865 	pmcap = PCI_CAP_GET16(conf_hdl, 0, cap_ptr, PCI_PMCAP);
1866 	if (pmcap & PCI_PMCAP_D1) {
1867 		PCIEB_DEBUG(DBG_PWR, dip, "D1 state supported\n");
1868 		pwr_p->pwr_pmcaps |= PCIE_SUPPORTS_D1;
1869 	}
1870 	if (pmcap & PCI_PMCAP_D2) {
1871 		PCIEB_DEBUG(DBG_PWR, dip, "D2 state supported\n");
1872 		pwr_p->pwr_pmcaps |= PCIE_SUPPORTS_D2;
1873 	}
1874 
1875 	i = 0;
1876 	comp_array[i++] = "NAME=PCIe switch/bridge PM";
1877 	comp_array[i++] = "0=Power Off (D3)";
1878 	if (pwr_p->pwr_pmcaps & PCIE_SUPPORTS_D2)
1879 		comp_array[i++] = "1=D2";
1880 	if (pwr_p->pwr_pmcaps & PCIE_SUPPORTS_D1)
1881 		comp_array[i++] = "2=D1";
1882 	comp_array[i++] = "3=Full Power D0";
1883 
1884 	/*
1885 	 * Create pm-components property, if it does not exist already.
1886 	 */
1887 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, dip,
1888 	    "pm-components", comp_array, i) != DDI_PROP_SUCCESS) {
1889 		PCIEB_DEBUG(DBG_PWR, dip, "could not create pm-components "
1890 		    " prop\n");
1891 		pci_config_teardown(&conf_hdl);
1892 		return (DDI_FAILURE);
1893 	}
1894 	return (pcieb_pwr_init_and_raise(dip, pwr_p));
1895 }
1896 
1897 /*
1898  * undo whatever is done in pcieb_pwr_setup. called by pcieb_detach()
1899  */
1900 static void
1901 pcieb_pwr_teardown(dev_info_t *dip)
1902 {
1903 	pcie_pwr_t	*pwr_p;
1904 
1905 	if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
1906 		return;
1907 
1908 	(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "pm-components");
1909 	if (pwr_p->pwr_conf_hdl)
1910 		pci_config_teardown(&pwr_p->pwr_conf_hdl);
1911 }
1912 
1913 /*
1914  * Initializes the power level and raise the power to D0, if it is
1915  * not at D0.
1916  */
1917 static int
1918 pcieb_pwr_init_and_raise(dev_info_t *dip, pcie_pwr_t *pwr_p)
1919 {
1920 	uint16_t pmcsr;
1921 	int ret = DDI_SUCCESS;
1922 
1923 	/*
1924 	 * Intialize our power level from PMCSR. The common code initializes
1925 	 * this to UNKNOWN. There is no guarantee that we will be at full
1926 	 * power at attach. If we are not at D0, raise the power.
1927 	 */
1928 	pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset);
1929 	pmcsr &= PCI_PMCSR_STATE_MASK;
1930 	switch (pmcsr) {
1931 	case PCI_PMCSR_D0:
1932 		pwr_p->pwr_func_lvl = PM_LEVEL_D0;
1933 		break;
1934 
1935 	case PCI_PMCSR_D1:
1936 		pwr_p->pwr_func_lvl = PM_LEVEL_D1;
1937 		break;
1938 
1939 	case PCI_PMCSR_D2:
1940 		pwr_p->pwr_func_lvl = PM_LEVEL_D2;
1941 		break;
1942 
1943 	case PCI_PMCSR_D3HOT:
1944 		pwr_p->pwr_func_lvl = PM_LEVEL_D3;
1945 		break;
1946 
1947 	default:
1948 		break;
1949 	}
1950 
1951 	/* Raise the power to D0. */
1952 	if (pwr_p->pwr_func_lvl != PM_LEVEL_D0 &&
1953 	    ((ret = pm_raise_power(dip, 0, PM_LEVEL_D0)) != DDI_SUCCESS)) {
1954 		/*
1955 		 * Read PMCSR again. If it is at D0, ignore the return
1956 		 * value from pm_raise_power.
1957 		 */
1958 		pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl,
1959 		    pwr_p->pwr_pmcsr_offset);
1960 		if ((pmcsr & PCI_PMCSR_STATE_MASK) == PCI_PMCSR_D0)
1961 			ret = DDI_SUCCESS;
1962 		else {
1963 			PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_setup: could not "
1964 			    "raise power to D0 \n");
1965 		}
1966 	}
1967 	if (ret == DDI_SUCCESS)
1968 		pwr_p->pwr_func_lvl = PM_LEVEL_D0;
1969 	return (ret);
1970 }
1971 
1972 /*
1973  * Disable PM for x86 and PLX 8532 switch.
1974  * For PLX Transitioning one port on this switch to low power causes links
1975  * on other ports on the same station to die. Due to PLX erratum #34, we
1976  * can't allow the downstream device go to non-D0 state.
1977  */
1978 static int
1979 pcieb_pwr_disable(dev_info_t *dip)
1980 {
1981 	pcie_pwr_t *pwr_p;
1982 
1983 	ASSERT(PCIE_PMINFO(dip));
1984 	pwr_p = PCIE_NEXUS_PMINFO(dip);
1985 	ASSERT(pwr_p);
1986 	PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_disable: disabling PM\n");
1987 	pwr_p->pwr_func_lvl = PM_LEVEL_D0;
1988 	pwr_p->pwr_flags = PCIE_NO_CHILD_PM;
1989 	return (DDI_SUCCESS);
1990 }
1991 
1992 #ifdef DEBUG
1993 int pcieb_dbg_intr_print = 0;
1994 void
1995 pcieb_dbg(uint_t bit, dev_info_t *dip, char *fmt, ...)
1996 {
1997 	va_list ap;
1998 
1999 	if (!pcieb_dbg_print)
2000 		return;
2001 
2002 	if (dip)
2003 		prom_printf("%s(%d): %s", ddi_driver_name(dip),
2004 		    ddi_get_instance(dip), pcieb_debug_sym[bit]);
2005 
2006 	va_start(ap, fmt);
2007 	if (servicing_interrupt()) {
2008 		if (pcieb_dbg_intr_print)
2009 			prom_vprintf(fmt, ap);
2010 	} else {
2011 		prom_vprintf(fmt, ap);
2012 	}
2013 
2014 	va_end(ap);
2015 }
2016 #endif
2017 
2018 static void
2019 pcieb_id_props(pcieb_devstate_t *pcieb)
2020 {
2021 	uint64_t serialid = 0;	/* 40b field of EUI-64 serial no. register */
2022 	uint16_t cap_ptr;
2023 	uint8_t fic = 0;	/* 1 = first in chassis device */
2024 	pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
2025 	ddi_acc_handle_t config_handle = bus_p->bus_cfg_hdl;
2026 
2027 	/*
2028 	 * Identify first in chassis.  In the special case of a Sun branded
2029 	 * PLX device, it obviously is first in chassis.  Otherwise, in the
2030 	 * general case, look for an Expansion Slot Register and check its
2031 	 * first-in-chassis bit.
2032 	 */
2033 #ifdef	PX_PLX
2034 	uint16_t vendor_id = bus_p->bus_dev_ven_id & 0xFFFF;
2035 	uint16_t device_id = bus_p->bus_dev_ven_id >> 16;
2036 	if ((vendor_id == PXB_VENDOR_SUN) &&
2037 	    ((device_id == PXB_DEVICE_PLX_PCIX) ||
2038 	    (device_id == PXB_DEVICE_PLX_PCIE))) {
2039 		fic = 1;
2040 	}
2041 #endif	/* PX_PLX */
2042 	if ((fic == 0) && ((PCI_CAP_LOCATE(config_handle,
2043 	    PCI_CAP_ID_SLOT_ID, &cap_ptr)) != DDI_FAILURE)) {
2044 		uint8_t esr = PCI_CAP_GET8(config_handle, 0,
2045 		    cap_ptr, PCI_CAP_ID_REGS_OFF);
2046 		if (PCI_CAPSLOT_FIC(esr))
2047 			fic = 1;
2048 	}
2049 
2050 	if ((PCI_CAP_LOCATE(config_handle,
2051 	    PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_SER), &cap_ptr)) != DDI_FAILURE) {
2052 		/* Serialid can be 0 thru a full 40b number */
2053 		serialid = PCI_XCAP_GET32(config_handle, 0,
2054 		    cap_ptr, PCIE_SER_SID_UPPER_DW);
2055 		serialid <<= 32;
2056 		serialid |= PCI_XCAP_GET32(config_handle, 0,
2057 		    cap_ptr, PCIE_SER_SID_LOWER_DW);
2058 	}
2059 
2060 	if (fic)
2061 		(void) ndi_prop_create_boolean(DDI_DEV_T_NONE, pcieb->pcieb_dip,
2062 		    "first-in-chassis");
2063 	if (serialid)
2064 		(void) ddi_prop_update_int64(DDI_DEV_T_NONE, pcieb->pcieb_dip,
2065 		    "serialid#", serialid);
2066 }
2067 
2068 static void
2069 pcieb_create_ranges_prop(dev_info_t *dip,
2070     ddi_acc_handle_t config_handle)
2071 {
2072 	uint32_t base, limit;
2073 	ppb_ranges_t	ranges[PCIEB_RANGE_LEN];
2074 	uint8_t io_base_lo, io_limit_lo;
2075 	uint16_t io_base_hi, io_limit_hi, mem_base, mem_limit;
2076 	int i = 0, rangelen = sizeof (ppb_ranges_t)/sizeof (int);
2077 
2078 	io_base_lo = pci_config_get8(config_handle, PCI_BCNF_IO_BASE_LOW);
2079 	io_limit_lo = pci_config_get8(config_handle, PCI_BCNF_IO_LIMIT_LOW);
2080 	io_base_hi = pci_config_get16(config_handle, PCI_BCNF_IO_BASE_HI);
2081 	io_limit_hi = pci_config_get16(config_handle, PCI_BCNF_IO_LIMIT_HI);
2082 	mem_base = pci_config_get16(config_handle, PCI_BCNF_MEM_BASE);
2083 	mem_limit = pci_config_get16(config_handle, PCI_BCNF_MEM_LIMIT);
2084 
2085 	/*
2086 	 * Create ranges for IO space
2087 	 */
2088 	ranges[i].size_low = ranges[i].size_high = 0;
2089 	ranges[i].parent_mid = ranges[i].child_mid = ranges[i].parent_high = 0;
2090 	ranges[i].child_high = ranges[i].parent_high |=
2091 	    (PCI_REG_REL_M | PCI_ADDR_IO);
2092 	base = PCIEB_16bit_IOADDR(io_base_lo);
2093 	limit = PCIEB_16bit_IOADDR(io_limit_lo);
2094 
2095 	if ((io_base_lo & 0xf) == PCIEB_32BIT_IO) {
2096 		base = PCIEB_LADDR(base, io_base_hi);
2097 	}
2098 	if ((io_limit_lo & 0xf) == PCIEB_32BIT_IO) {
2099 		limit = PCIEB_LADDR(limit, io_limit_hi);
2100 	}
2101 
2102 	if ((io_base_lo & PCIEB_32BIT_IO) && (io_limit_hi > 0)) {
2103 		base = PCIEB_LADDR(base, io_base_hi);
2104 		limit = PCIEB_LADDR(limit, io_limit_hi);
2105 	}
2106 
2107 	/*
2108 	 * Create ranges for 32bit memory space
2109 	 */
2110 	base = PCIEB_32bit_MEMADDR(mem_base);
2111 	limit = PCIEB_32bit_MEMADDR(mem_limit);
2112 	ranges[i].size_low = ranges[i].size_high = 0;
2113 	ranges[i].parent_mid = ranges[i].child_mid = ranges[i].parent_high = 0;
2114 	ranges[i].child_high = ranges[i].parent_high |=
2115 	    (PCI_REG_REL_M | PCI_ADDR_MEM32);
2116 	ranges[i].child_low = ranges[i].parent_low = base;
2117 	if (limit >= base) {
2118 		ranges[i].size_low = limit - base + PCIEB_MEMGRAIN;
2119 		i++;
2120 	}
2121 
2122 	if (i) {
2123 		(void) ndi_prop_update_int_array(DDI_DEV_T_NONE, dip, "ranges",
2124 		    (int *)ranges, i * rangelen);
2125 	}
2126 }
2127 
2128 /*
2129  * For PCI and PCI-X devices including PCIe2PCI bridge, initialize
2130  * cache-line-size and latency timer configuration registers.
2131  */
2132 void
2133 pcieb_set_pci_perf_parameters(dev_info_t *dip, ddi_acc_handle_t cfg_hdl)
2134 {
2135 	uint_t	n;
2136 
2137 	/* Initialize cache-line-size configuration register if needed */
2138 	if (ddi_getprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2139 	    "cache-line-size", 0) == 0) {
2140 		pci_config_put8(cfg_hdl, PCI_CONF_CACHE_LINESZ,
2141 		    PCIEB_CACHE_LINE_SIZE);
2142 		n = pci_config_get8(cfg_hdl, PCI_CONF_CACHE_LINESZ);
2143 		if (n != 0) {
2144 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
2145 			    "cache-line-size", n);
2146 		}
2147 	}
2148 
2149 	/* Initialize latency timer configuration registers if needed */
2150 	if (ddi_getprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2151 	    "latency-timer", 0) == 0) {
2152 		uchar_t	min_gnt, latency_timer;
2153 		uchar_t header_type;
2154 
2155 		/* Determine the configuration header type */
2156 		header_type = pci_config_get8(cfg_hdl, PCI_CONF_HEADER);
2157 
2158 		if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) {
2159 			latency_timer = PCIEB_LATENCY_TIMER;
2160 			pci_config_put8(cfg_hdl, PCI_BCNF_LATENCY_TIMER,
2161 			    latency_timer);
2162 		} else {
2163 			min_gnt = pci_config_get8(cfg_hdl, PCI_CONF_MIN_G);
2164 			latency_timer = min_gnt * 8;
2165 		}
2166 
2167 		pci_config_put8(cfg_hdl, PCI_CONF_LATENCY_TIMER,
2168 		    latency_timer);
2169 		n = pci_config_get8(cfg_hdl, PCI_CONF_LATENCY_TIMER);
2170 		if (n != 0) {
2171 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
2172 			    "latency-timer", n);
2173 		}
2174 	}
2175 }
2176