xref: /illumos-gate/usr/src/uts/common/io/pciex/pcieb.c (revision d6beba26494f4877120c99b5931876f56ba5dee5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
26  * Copyright 2019 Joyent, Inc.
27  */
28 
29 /*
30  * Common x86 and SPARC PCI-E to PCI bus bridge nexus driver
31  *
32  * Background
33  * ----------
34  *
35  * The PCI Express (PCIe) specification defines that all of the PCIe devices in
36  * the system are connected together in a series of different fabrics. A way to
37  * think of these fabrics is that they are small networks where there are links
38  * between different devices and switches that allow fan out or fan in of the
39  * fabric. The entry point to that fabric is called a root complex and the
40  * fabric terminates at a what is called an endpoint, which is really just PCIe
41  * terminology for the common cards that are inserted into the system (HBAs,
42  * NICs, USB, NVMe, etc.).
43  *
44  * The PCIe specification states that every link on the system has a virtual
45  * PCI-to-PCI bridge. This allows PCIe devices to still be configured the same
46  * way traditional PCI devices are to the operating system and allows them to
47  * have a traditional PCI bus, device, and function associated with them, even
48  * though there is no actual shared bus. In addition, bridges are also used to
49  * connect traditional PCI and PCI-X devices into them.
50  *
51  * The PCIe specification refers to upstream and downstream ports. Upstream
52  * ports are considered closer the root complex and downstream ports are closer
53  * to the endpoint. We can divide the devices that the bridge driver attaches to
54  * into two groups. Those that are considered upstream ports, these include root
55  * complexes and parts of PCIe switches. And downstream ports, which are the
56  * other half of PCIe switches and endpoints (which this driver does not attach
57  * to, normal hardware-specific or class-specific drivers attach to those).
58  *
59  * Interrupt Management
60  * --------------------
61  *
62  * Upstream ports of bridges have additional things that we care about.
63  * Specifically they're the means through which we find out about:
64  *
65  *  - Advanced Error Reporting (AERs)
66  *  - Hotplug events
67  *  - Link Bandwidth Events
68  *  - Power Management Events (PME)
69  *
70  * Each of these features is an optional feature (though ones we hope are
71  * implemented). The features above are grouped into two different buckets based
72  * on which PCI capability they appear in. AER management is done through a PCI
73  * Express extended configuration header (it lives in extended PCI configuration
74  * space) called the 'Advanced Error Reporting Extended Capability'. The other
75  * events are all managed as part of the 'PCI Express Capability Structure'.
76  * This structure is found in traditional PCI configuration space.
77  *
78  * The way that the interrupts are programmed for these types of events differs
79  * a bit from the way one might expect a normal device to operate. For most
80  * devices, one allocates a number of interrupts based on a combination of what
81  * the device supports, what the OS supports per device, and the number the
82  * driver needs. Then the driver programs the device in a device-specific manner
83  * to indicate which events should trigger a specific interrupt vector.
84  *
85  * However, for both the AER and PCI capabilities, the driver has to do
86  * something different. The driver first allocates interrupts by programming the
87  * MSI or MSI-X table and then asks the device which interrupts have been
88  * assigned to these purposes. Because these events are only supported in
89  * 'upstream' devices, this does not interfere with the traditional management
90  * of MSI and MSI-X interrupts. At this time, the pcieb driver only supports the
91  * use of MSI interrupts.
92  *
93  * Once the interrupts have been allocated, we read back which vectors have been
94  * nominated by the device to cover the corresponding capability. The interrupt
95  * is allocated on a per-capability basis. Therefore, one interrupt would cover
96  * AERs, while another interrupt would cover the rest of the desired functions.
97  *
98  * To track which interrupts cover which behaviors, each driver state
99  * (pcieb_devstate_t) has a member called 'pcieb_isr_tab'. Each index represents
100  * an interrupt vector and there are a series of flags that represent the
101  * different possible interrupt sources: PCIEB_INTR_SRC_HP (hotplug),
102  * PCEIB_INTR_SRC_PME (power management event), PCIEB_INTR_SRC_AER (error
103  * reporting), PCIEB_INTR_SRC_LBW (link bandwidth).
104  *
105  * Because the hotplug, link bandwidth, and power management events all share
106  * the same vector, if an interrupt comes in, we must check all of the enabled
107  * sources that might generate this interrupt. It is highly likely that more
108  * than one will fire at the same time, for example, a hotplug event that fires
109  * because a device has been inserted or removed, will likely trigger a link
110  * bandwidth event.
111  *
112  * The pcieb driver itself does not actually have much logic to deal with and
113  * clear the interrupts in question. It generally speaking will vector most
114  * events back to the more general pcie driver or, in the case of AERs, initiate
115  * a scan of the fabric itself (also part of the pcie driver).
116  *
117  * Link Management
118  * ---------------
119  *
120  * The pcieb driver is used to take care of two different aspects of link
121  * management. The first of these, as described briefly above, is to monitor for
122  * changes to the negotiated link bandwidth. These events are managed by
123  * enabling support for the interrupts in the PCI Express Capability Structure.
124  * This is all taken care of by the pcie driver through functions like
125  * pcie_link_bw_enabled().
126  *
127  * The second aspect of link management the pcieb driver enables is the ability
128  * to retrain the link and optionally limit the speed. This is enabled through a
129  * series of private ioctls that are driven through a private userland utility,
130  * /usr/lib/pci/pcieb. Eventually, this should be more fleshed out and a more
131  * uniform interface based around the devctls that can be leveraged across
132  * different classes of devices should be used.
133  *
134  * Under the hood this basically leverages the ability of the upstream port to
135  * retrain a link by writing a bit to the PCIe link control register. See
136  * pcieb_ioctl_retrain(). From there, if the driver ever receives a request to
137  * change the maximum speed, that is updated in the card; however, it does not
138  * immediately retrain the link. A separate ioctl request is required to do so.
139  * Once the speed has been changed, regardless of whether or not it has been
140  * retrained, that fact will always be noted.
141  */
142 
143 #include <sys/sysmacros.h>
144 #include <sys/conf.h>
145 #include <sys/kmem.h>
146 #include <sys/debug.h>
147 #include <sys/modctl.h>
148 #include <sys/autoconf.h>
149 #include <sys/ddi_impldefs.h>
150 #include <sys/pci.h>
151 #include <sys/ddi.h>
152 #include <sys/sunddi.h>
153 #include <sys/sunndi.h>
154 #include <sys/fm/util.h>
155 #include <sys/pci_cap.h>
156 #include <sys/pci_impl.h>
157 #include <sys/pcie_impl.h>
158 #include <sys/open.h>
159 #include <sys/stat.h>
160 #include <sys/file.h>
161 #include <sys/promif.h>		/* prom_printf */
162 #include <sys/disp.h>
163 #include <sys/pcie_pwr.h>
164 #include <sys/hotplug/pci/pcie_hp.h>
165 #include "pcieb.h"
166 #include "pcieb_ioctl.h"
167 #ifdef PX_PLX
168 #include <io/pciex/pcieb_plx.h>
169 #endif /* PX_PLX */
170 
171 /*LINTLIBRARY*/
172 
173 /* panic flag */
174 int pcieb_die = PF_ERR_FATAL_FLAGS;
175 int pcieb_disable_41210_wkarnd = 0;
176 
177 /* flag to turn on MSI support */
178 int pcieb_enable_msi = 1;
179 
180 #if defined(DEBUG)
181 uint_t pcieb_dbg_print = 0;
182 
183 static char *pcieb_debug_sym [] = {	/* same sequence as pcieb_debug_bit */
184 	/*  0 */ "attach",
185 	/*  1 */ "pwr",
186 	/*  2 */ "intr"
187 };
188 #endif /* DEBUG */
189 
190 static int pcieb_bus_map(dev_info_t *, dev_info_t *, ddi_map_req_t *, off_t,
191 	off_t, caddr_t *);
192 static int pcieb_ctlops(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *,
193 	void *);
194 static int pcieb_fm_init(pcieb_devstate_t *pcieb_p);
195 static void pcieb_fm_fini(pcieb_devstate_t *pcieb_p);
196 static int pcieb_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap,
197     ddi_iblock_cookie_t *ibc_p);
198 static int pcieb_dma_allochdl(dev_info_t *dip, dev_info_t *rdip,
199 	ddi_dma_attr_t *attr_p, int (*waitfp)(caddr_t), caddr_t arg,
200 	ddi_dma_handle_t *handlep);
201 static int pcieb_dma_mctl(dev_info_t *dip, dev_info_t *rdip,
202 	ddi_dma_handle_t handle, enum ddi_dma_ctlops cmd, off_t *offp,
203 	size_t *lenp, caddr_t *objp, uint_t cache_flags);
204 static int pcieb_intr_ops(dev_info_t *dip, dev_info_t *rdip,
205 	ddi_intr_op_t intr_op, ddi_intr_handle_impl_t *hdlp, void *result);
206 
207 static struct bus_ops pcieb_bus_ops = {
208 	BUSO_REV,
209 	pcieb_bus_map,
210 	0,
211 	0,
212 	0,
213 	i_ddi_map_fault,
214 	0,
215 	pcieb_dma_allochdl,
216 	ddi_dma_freehdl,
217 	ddi_dma_bindhdl,
218 	ddi_dma_unbindhdl,
219 	ddi_dma_flush,
220 	ddi_dma_win,
221 	pcieb_dma_mctl,
222 	pcieb_ctlops,
223 	ddi_bus_prop_op,
224 	ndi_busop_get_eventcookie,	/* (*bus_get_eventcookie)();	*/
225 	ndi_busop_add_eventcall,	/* (*bus_add_eventcall)();	*/
226 	ndi_busop_remove_eventcall,	/* (*bus_remove_eventcall)();	*/
227 	ndi_post_event,			/* (*bus_post_event)();		*/
228 	NULL,				/* (*bus_intr_ctl)();		*/
229 	NULL,				/* (*bus_config)();		*/
230 	NULL,				/* (*bus_unconfig)();		*/
231 	pcieb_fm_init_child,		/* (*bus_fm_init)();		*/
232 	NULL,				/* (*bus_fm_fini)();		*/
233 	i_ndi_busop_access_enter,	/* (*bus_fm_access_enter)();	*/
234 	i_ndi_busop_access_exit,	/* (*bus_fm_access_exit)();	*/
235 	pcie_bus_power,			/* (*bus_power)();		*/
236 	pcieb_intr_ops,			/* (*bus_intr_op)();		*/
237 	pcie_hp_common_ops		/* (*bus_hp_op)();		*/
238 };
239 
240 static int	pcieb_open(dev_t *, int, int, cred_t *);
241 static int	pcieb_close(dev_t, int, int, cred_t *);
242 static int	pcieb_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
243 static int	pcieb_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
244 static uint_t	pcieb_intr_handler(caddr_t arg1, caddr_t arg2);
245 
246 /* PM related functions */
247 static int	pcieb_pwr_setup(dev_info_t *dip);
248 static int	pcieb_pwr_init_and_raise(dev_info_t *dip, pcie_pwr_t *pwr_p);
249 static void	pcieb_pwr_teardown(dev_info_t *dip);
250 static int	pcieb_pwr_disable(dev_info_t *dip);
251 
252 /* Hotplug related functions */
253 static void pcieb_id_props(pcieb_devstate_t *pcieb);
254 
255 /*
256  * soft state pointer
257  */
258 void *pcieb_state;
259 
260 static struct cb_ops pcieb_cb_ops = {
261 	pcieb_open,			/* open */
262 	pcieb_close,			/* close */
263 	nodev,				/* strategy */
264 	nodev,				/* print */
265 	nodev,				/* dump */
266 	nodev,				/* read */
267 	nodev,				/* write */
268 	pcieb_ioctl,			/* ioctl */
269 	nodev,				/* devmap */
270 	nodev,				/* mmap */
271 	nodev,				/* segmap */
272 	nochpoll,			/* poll */
273 	pcie_prop_op,			/* cb_prop_op */
274 	NULL,				/* streamtab */
275 	D_NEW | D_MP | D_HOTPLUG,	/* Driver compatibility flag */
276 	CB_REV,				/* rev */
277 	nodev,				/* int (*cb_aread)() */
278 	nodev				/* int (*cb_awrite)() */
279 };
280 
281 static int	pcieb_probe(dev_info_t *);
282 static int	pcieb_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
283 static int	pcieb_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
284 
285 static struct dev_ops pcieb_ops = {
286 	DEVO_REV,		/* devo_rev */
287 	0,			/* refcnt  */
288 	pcieb_info,		/* info */
289 	nulldev,		/* identify */
290 	pcieb_probe,		/* probe */
291 	pcieb_attach,		/* attach */
292 	pcieb_detach,		/* detach */
293 	nulldev,		/* reset */
294 	&pcieb_cb_ops,		/* driver operations */
295 	&pcieb_bus_ops,		/* bus operations */
296 	pcie_power,		/* power */
297 	ddi_quiesce_not_needed,		/* quiesce */
298 };
299 
300 /*
301  * Module linkage information for the kernel.
302  */
303 
304 static struct modldrv modldrv = {
305 	&mod_driverops, /* Type of module */
306 	"PCIe bridge/switch driver",
307 	&pcieb_ops,	/* driver ops */
308 };
309 
310 static struct modlinkage modlinkage = {
311 	MODREV_1,
312 	(void *)&modldrv,
313 	NULL
314 };
315 
316 /*
317  * forward function declarations:
318  */
319 static void	pcieb_uninitchild(dev_info_t *);
320 static int	pcieb_initchild(dev_info_t *child);
321 static void	pcieb_create_ranges_prop(dev_info_t *, ddi_acc_handle_t);
322 static boolean_t pcieb_is_pcie_device_type(dev_info_t *dip);
323 
324 /* interrupt related declarations */
325 static int	pcieb_msi_supported(dev_info_t *);
326 static int	pcieb_intr_attach(pcieb_devstate_t *pcieb);
327 static int	pcieb_intr_init(pcieb_devstate_t *pcieb_p, int intr_type);
328 static void	pcieb_intr_fini(pcieb_devstate_t *pcieb_p);
329 
330 int
331 _init(void)
332 {
333 	int e;
334 
335 	if ((e = ddi_soft_state_init(&pcieb_state, sizeof (pcieb_devstate_t),
336 	    1)) == 0 && (e = mod_install(&modlinkage)) != 0)
337 		ddi_soft_state_fini(&pcieb_state);
338 	return (e);
339 }
340 
341 int
342 _fini(void)
343 {
344 	int e;
345 
346 	if ((e = mod_remove(&modlinkage)) == 0) {
347 		ddi_soft_state_fini(&pcieb_state);
348 	}
349 	return (e);
350 }
351 
352 int
353 _info(struct modinfo *modinfop)
354 {
355 	return (mod_info(&modlinkage, modinfop));
356 }
357 
358 /* ARGSUSED */
359 static int
360 pcieb_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
361 {
362 	minor_t		minor = getminor((dev_t)arg);
363 	int		instance = PCI_MINOR_NUM_TO_INSTANCE(minor);
364 	pcieb_devstate_t *pcieb = ddi_get_soft_state(pcieb_state, instance);
365 	int		ret = DDI_SUCCESS;
366 
367 	switch (infocmd) {
368 	case DDI_INFO_DEVT2INSTANCE:
369 		*result = (void *)(intptr_t)instance;
370 		break;
371 	case DDI_INFO_DEVT2DEVINFO:
372 		if (pcieb == NULL) {
373 			ret = DDI_FAILURE;
374 			break;
375 		}
376 
377 		*result = (void *)pcieb->pcieb_dip;
378 		break;
379 	default:
380 		ret = DDI_FAILURE;
381 		break;
382 	}
383 
384 	return (ret);
385 }
386 
387 
388 /*ARGSUSED*/
389 static int
390 pcieb_probe(dev_info_t *devi)
391 {
392 	return (DDI_PROBE_SUCCESS);
393 }
394 
395 /*
396  * This is a workaround for an undocumented HW erratum with the
397  * multi-function, F0 and F2, Intel 41210 PCIe-to-PCI bridge. When
398  * Fn (cdip) attaches, this workaround is called to initialize Fn's
399  * sibling (sdip) with MPS/MRRS if it isn't already configured.
400  * Doing so prevents a malformed TLP panic.
401  */
402 static void
403 pcieb_41210_mps_wkrnd(dev_info_t *cdip)
404 {
405 	dev_info_t *sdip;
406 	ddi_acc_handle_t cfg_hdl;
407 	uint16_t cdip_dev_ctrl, cdip_mrrs_mps;
408 	pcie_bus_t *cdip_bus_p = PCIE_DIP2BUS(cdip);
409 
410 	/* Get cdip's MPS/MRRS already setup by pcie_initchild_mps() */
411 	ASSERT(cdip_bus_p);
412 	cdip_dev_ctrl  = PCIE_CAP_GET(16, cdip_bus_p, PCIE_DEVCTL);
413 	cdip_mrrs_mps  = cdip_dev_ctrl &
414 	    (PCIE_DEVCTL_MAX_READ_REQ_MASK | PCIE_DEVCTL_MAX_PAYLOAD_MASK);
415 
416 	/* Locate sdip and set its MPS/MRRS when applicable */
417 	for (sdip = ddi_get_child(ddi_get_parent(cdip)); sdip;
418 	    sdip = ddi_get_next_sibling(sdip)) {
419 		uint16_t sdip_dev_ctrl, sdip_mrrs_mps, cap_ptr;
420 		uint32_t bus_dev_ven_id;
421 
422 		if (sdip == cdip || pci_config_setup(sdip, &cfg_hdl)
423 		    != DDI_SUCCESS)
424 			continue;
425 
426 		/* must be an Intel 41210 bridge */
427 		bus_dev_ven_id = pci_config_get32(cfg_hdl, PCI_CONF_VENID);
428 		if (!PCIEB_IS_41210_BRIDGE(bus_dev_ven_id)) {
429 			pci_config_teardown(&cfg_hdl);
430 			continue;
431 		}
432 
433 		if (PCI_CAP_LOCATE(cfg_hdl, PCI_CAP_ID_PCI_E, &cap_ptr)
434 		    != DDI_SUCCESS) {
435 			pci_config_teardown(&cfg_hdl);
436 			continue;
437 		}
438 
439 		/* get sdip's MPS/MRRS to compare to cdip's */
440 		sdip_dev_ctrl = PCI_CAP_GET16(cfg_hdl, 0, cap_ptr,
441 		    PCIE_DEVCTL);
442 		sdip_mrrs_mps = sdip_dev_ctrl &
443 		    (PCIE_DEVCTL_MAX_READ_REQ_MASK |
444 		    PCIE_DEVCTL_MAX_PAYLOAD_MASK);
445 
446 		/* if sdip already attached then its MPS/MRRS is configured */
447 		if (i_ddi_devi_attached(sdip)) {
448 			ASSERT(sdip_mrrs_mps == cdip_mrrs_mps);
449 			pci_config_teardown(&cfg_hdl);
450 			continue;
451 		}
452 
453 		/* otherwise, update sdip's MPS/MRRS if different from cdip's */
454 		if (sdip_mrrs_mps != cdip_mrrs_mps) {
455 			sdip_dev_ctrl = (sdip_dev_ctrl &
456 			    ~(PCIE_DEVCTL_MAX_READ_REQ_MASK |
457 			    PCIE_DEVCTL_MAX_PAYLOAD_MASK)) | cdip_mrrs_mps;
458 
459 			(void) PCI_CAP_PUT16(cfg_hdl, 0, cap_ptr, PCIE_DEVCTL,
460 			    sdip_dev_ctrl);
461 		}
462 
463 		/*
464 		 * note: sdip's bus_mps will be updated by
465 		 * pcie_initchild_mps()
466 		 */
467 
468 		pci_config_teardown(&cfg_hdl);
469 
470 		break;
471 	}
472 }
473 
474 static int
475 pcieb_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
476 {
477 	int			instance;
478 	char			device_type[8];
479 	pcieb_devstate_t	*pcieb;
480 	pcie_bus_t		*bus_p = PCIE_DIP2UPBUS(devi);
481 	ddi_acc_handle_t	config_handle = bus_p->bus_cfg_hdl;
482 
483 	switch (cmd) {
484 	case DDI_RESUME:
485 		(void) pcie_pwr_resume(devi);
486 		return (DDI_SUCCESS);
487 
488 	default:
489 		return (DDI_FAILURE);
490 
491 	case DDI_ATTACH:
492 		break;
493 	}
494 
495 	if (!(PCIE_IS_BDG(bus_p))) {
496 		PCIEB_DEBUG(DBG_ATTACH, devi, "This is not a switch or"
497 		" bridge\n");
498 		return (DDI_FAILURE);
499 	}
500 
501 	/*
502 	 * If PCIE_LINKCTL_LINK_DISABLE bit in the PCIe Config
503 	 * Space (PCIe Capability Link Control Register) is set,
504 	 * then do not bind the driver.
505 	 */
506 	if (PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL) & PCIE_LINKCTL_LINK_DISABLE)
507 		return (DDI_FAILURE);
508 
509 	/*
510 	 * Allocate and get soft state structure.
511 	 */
512 	instance = ddi_get_instance(devi);
513 	if (ddi_soft_state_zalloc(pcieb_state, instance) != DDI_SUCCESS)
514 		return (DDI_FAILURE);
515 	pcieb = ddi_get_soft_state(pcieb_state, instance);
516 	pcieb->pcieb_dip = devi;
517 
518 	if ((pcieb_fm_init(pcieb)) != DDI_SUCCESS) {
519 		PCIEB_DEBUG(DBG_ATTACH, devi, "Failed in pcieb_fm_init\n");
520 		goto fail;
521 	}
522 	pcieb->pcieb_init_flags |= PCIEB_INIT_FM;
523 
524 	mutex_init(&pcieb->pcieb_mutex, NULL, MUTEX_DRIVER, NULL);
525 	mutex_init(&pcieb->pcieb_err_mutex, NULL, MUTEX_DRIVER,
526 	    (void *)pcieb->pcieb_fm_ibc);
527 	mutex_init(&pcieb->pcieb_peek_poke_mutex, NULL, MUTEX_DRIVER,
528 	    (void *)pcieb->pcieb_fm_ibc);
529 
530 	/* create special properties for device identification */
531 	pcieb_id_props(pcieb);
532 
533 	/*
534 	 * Power management setup. This also makes sure that switch/bridge
535 	 * is at D0 during attach.
536 	 */
537 	if (pwr_common_setup(devi) != DDI_SUCCESS) {
538 		PCIEB_DEBUG(DBG_PWR, devi, "pwr_common_setup failed\n");
539 		goto fail;
540 	}
541 
542 	if (pcieb_pwr_setup(devi) != DDI_SUCCESS) {
543 		PCIEB_DEBUG(DBG_PWR, devi, "pxb_pwr_setup failed \n");
544 		goto fail;
545 	}
546 
547 	/*
548 	 * Make sure the "device_type" property exists.
549 	 */
550 	if (pcieb_is_pcie_device_type(devi))
551 		(void) strcpy(device_type, "pciex");
552 	else
553 		(void) strcpy(device_type, "pci");
554 
555 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
556 	    "device_type", device_type);
557 
558 	/*
559 	 * Check whether the "ranges" property is present.
560 	 * Otherwise create the ranges property by reading
561 	 * the configuration registers
562 	 */
563 	if (ddi_prop_exists(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
564 	    "ranges") == 0) {
565 		pcieb_create_ranges_prop(devi, config_handle);
566 	}
567 
568 	if (PCIE_IS_PCI_BDG(bus_p))
569 		pcieb_set_pci_perf_parameters(devi, config_handle);
570 
571 #ifdef PX_PLX
572 	pcieb_attach_plx_workarounds(pcieb);
573 #endif /* PX_PLX */
574 
575 	if (pcie_init(devi, NULL) != DDI_SUCCESS)
576 		goto fail;
577 
578 	/* Intel PCIe-to-PCI 41210 bridge workaround -- if applicable */
579 	if (pcieb_disable_41210_wkarnd == 0 &&
580 	    PCIEB_IS_41210_BRIDGE(bus_p->bus_dev_ven_id))
581 		pcieb_41210_mps_wkrnd(devi);
582 
583 	/*
584 	 * Initialize interrupt handlers. Ignore return value.
585 	 */
586 	(void) pcieb_intr_attach(pcieb);
587 
588 	(void) pcie_hpintr_enable(devi);
589 
590 	(void) pcie_link_bw_enable(devi);
591 
592 	/* Do any platform specific workarounds needed at this time */
593 	pcieb_plat_attach_workaround(devi);
594 
595 	/*
596 	 * If this is a root port, determine and set the max payload size.
597 	 * Since this will involve scanning the fabric, all error enabling
598 	 * and sw workarounds should be in place before doing this.
599 	 */
600 	if (PCIE_IS_RP(bus_p))
601 		pcie_init_root_port_mps(devi);
602 
603 	ddi_report_dev(devi);
604 	return (DDI_SUCCESS);
605 
606 fail:
607 	(void) pcieb_detach(devi, DDI_DETACH);
608 	return (DDI_FAILURE);
609 }
610 
611 static int
612 pcieb_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
613 {
614 	pcieb_devstate_t *pcieb;
615 	int error = DDI_SUCCESS;
616 
617 	switch (cmd) {
618 	case DDI_SUSPEND:
619 		error = pcie_pwr_suspend(devi);
620 		return (error);
621 
622 	case DDI_DETACH:
623 		break;
624 
625 	default:
626 		return (DDI_FAILURE);
627 	}
628 
629 	pcieb = ddi_get_soft_state(pcieb_state, ddi_get_instance(devi));
630 
631 	/* disable hotplug interrupt */
632 	(void) pcie_hpintr_disable(devi);
633 
634 	/* remove interrupt handlers */
635 	pcieb_intr_fini(pcieb);
636 
637 	/* uninitialize inband PCI-E HPC if present */
638 	(void) pcie_uninit(devi);
639 
640 	(void) ddi_prop_remove(DDI_DEV_T_NONE, devi, "device_type");
641 
642 	(void) ndi_prop_remove(DDI_DEV_T_NONE, pcieb->pcieb_dip,
643 	    "pcie_ce_mask");
644 
645 	if (pcieb->pcieb_init_flags & PCIEB_INIT_FM)
646 		pcieb_fm_fini(pcieb);
647 
648 	pcieb_pwr_teardown(devi);
649 	pwr_common_teardown(devi);
650 
651 	mutex_destroy(&pcieb->pcieb_peek_poke_mutex);
652 	mutex_destroy(&pcieb->pcieb_err_mutex);
653 	mutex_destroy(&pcieb->pcieb_mutex);
654 
655 	/*
656 	 * And finally free the per-pci soft state.
657 	 */
658 	ddi_soft_state_free(pcieb_state, ddi_get_instance(devi));
659 
660 	return (DDI_SUCCESS);
661 }
662 
663 static int
664 pcieb_bus_map(dev_info_t *dip, dev_info_t *rdip, ddi_map_req_t *mp,
665     off_t offset, off_t len, caddr_t *vaddrp)
666 {
667 	dev_info_t *pdip;
668 
669 	if (PCIE_IS_RP(PCIE_DIP2BUS(dip)) && mp->map_handlep != NULL) {
670 		ddi_acc_impl_t *hdlp =
671 		    (ddi_acc_impl_t *)(mp->map_handlep)->ah_platform_private;
672 
673 		pcieb_set_prot_scan(dip, hdlp);
674 	}
675 	pdip = (dev_info_t *)DEVI(dip)->devi_parent;
676 	return ((DEVI(pdip)->devi_ops->devo_bus_ops->bus_map)(pdip, rdip, mp,
677 	    offset, len, vaddrp));
678 }
679 
680 static int
681 pcieb_ctlops(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
682     void *arg, void *result)
683 {
684 	pci_regspec_t *drv_regp;
685 	int	reglen;
686 	int	rn;
687 	int	totreg;
688 	pcieb_devstate_t *pcieb = ddi_get_soft_state(pcieb_state,
689 	    ddi_get_instance(dip));
690 	struct detachspec *ds;
691 	struct attachspec *as;
692 
693 	switch (ctlop) {
694 	case DDI_CTLOPS_REPORTDEV:
695 		if (rdip == (dev_info_t *)0)
696 			return (DDI_FAILURE);
697 
698 		if (ddi_get_parent(rdip) == dip) {
699 			cmn_err(CE_CONT, "?PCIE-device: %s@%s, %s%d\n",
700 			    ddi_node_name(rdip), ddi_get_name_addr(rdip),
701 			    ddi_driver_name(rdip), ddi_get_instance(rdip));
702 		}
703 
704 		/* Pass it up for fabric sync */
705 		(void) ddi_ctlops(dip, rdip, ctlop, arg, result);
706 		return (DDI_SUCCESS);
707 
708 	case DDI_CTLOPS_INITCHILD:
709 		return (pcieb_initchild((dev_info_t *)arg));
710 
711 	case DDI_CTLOPS_UNINITCHILD:
712 		pcieb_uninitchild((dev_info_t *)arg);
713 		return (DDI_SUCCESS);
714 
715 	case DDI_CTLOPS_SIDDEV:
716 		return (DDI_SUCCESS);
717 
718 	case DDI_CTLOPS_REGSIZE:
719 	case DDI_CTLOPS_NREGS:
720 		if (rdip == (dev_info_t *)0)
721 			return (DDI_FAILURE);
722 		break;
723 
724 	case DDI_CTLOPS_PEEK:
725 	case DDI_CTLOPS_POKE:
726 		return (pcieb_plat_peekpoke(dip, rdip, ctlop, arg, result));
727 	case DDI_CTLOPS_ATTACH:
728 		if (!pcie_is_child(dip, rdip))
729 			return (DDI_SUCCESS);
730 
731 		as = (struct attachspec *)arg;
732 		switch (as->when) {
733 		case DDI_PRE:
734 			if (as->cmd == DDI_RESUME) {
735 				pcie_clear_errors(rdip);
736 				if (pcieb_plat_ctlops(rdip, ctlop, arg) !=
737 				    DDI_SUCCESS)
738 					return (DDI_FAILURE);
739 			}
740 
741 			if (as->cmd == DDI_ATTACH)
742 				return (pcie_pm_hold(dip));
743 
744 			return (DDI_SUCCESS);
745 
746 		case DDI_POST:
747 			if (as->cmd == DDI_ATTACH &&
748 			    as->result != DDI_SUCCESS) {
749 				/*
750 				 * Attach failed for the child device. The child
751 				 * driver may have made PM calls before the
752 				 * attach failed. pcie_pm_remove_child() should
753 				 * cleanup PM state and holds (if any)
754 				 * associated with the child device.
755 				 */
756 				return (pcie_pm_remove_child(dip, rdip));
757 			}
758 
759 			if (as->result == DDI_SUCCESS) {
760 				pf_init(rdip, (void *)pcieb->pcieb_fm_ibc,
761 				    as->cmd);
762 
763 				(void) pcieb_plat_ctlops(rdip, ctlop, arg);
764 			}
765 
766 			/*
767 			 * For empty hotplug-capable slots, we should explicitly
768 			 * disable the errors, so that we won't panic upon
769 			 * unsupported hotplug messages.
770 			 */
771 			if ((!ddi_prop_exists(DDI_DEV_T_ANY, rdip,
772 			    DDI_PROP_DONTPASS, "hotplug-capable")) ||
773 			    ddi_get_child(rdip)) {
774 				(void) pcie_postattach_child(rdip);
775 				return (DDI_SUCCESS);
776 			}
777 
778 			pcie_disable_errors(rdip);
779 
780 			return (DDI_SUCCESS);
781 		default:
782 			break;
783 		}
784 		return (DDI_SUCCESS);
785 
786 	case DDI_CTLOPS_DETACH:
787 		if (!pcie_is_child(dip, rdip))
788 			return (DDI_SUCCESS);
789 
790 		ds = (struct detachspec *)arg;
791 		switch (ds->when) {
792 		case DDI_PRE:
793 			pf_fini(rdip, ds->cmd);
794 			return (DDI_SUCCESS);
795 
796 		case DDI_POST:
797 			if (pcieb_plat_ctlops(rdip, ctlop, arg) != DDI_SUCCESS)
798 				return (DDI_FAILURE);
799 			if (ds->cmd == DDI_DETACH &&
800 			    ds->result == DDI_SUCCESS) {
801 				return (pcie_pm_remove_child(dip, rdip));
802 			}
803 			return (DDI_SUCCESS);
804 		default:
805 			break;
806 		}
807 		return (DDI_SUCCESS);
808 	default:
809 		return (ddi_ctlops(dip, rdip, ctlop, arg, result));
810 	}
811 
812 	*(int *)result = 0;
813 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip,
814 	    DDI_PROP_DONTPASS | DDI_PROP_CANSLEEP, "reg", (caddr_t)&drv_regp,
815 	    &reglen) != DDI_SUCCESS)
816 		return (DDI_FAILURE);
817 
818 	totreg = reglen / sizeof (pci_regspec_t);
819 	if (ctlop == DDI_CTLOPS_NREGS)
820 		*(int *)result = totreg;
821 	else if (ctlop == DDI_CTLOPS_REGSIZE) {
822 		rn = *(int *)arg;
823 		if (rn >= totreg) {
824 			kmem_free(drv_regp, reglen);
825 			return (DDI_FAILURE);
826 		}
827 
828 		*(off_t *)result = drv_regp[rn].pci_size_low |
829 		    ((uint64_t)drv_regp[rn].pci_size_hi << 32);
830 	}
831 
832 	kmem_free(drv_regp, reglen);
833 	return (DDI_SUCCESS);
834 }
835 
836 /*
837  * name_child
838  *
839  * This function is called from init_child to name a node. It is
840  * also passed as a callback for node merging functions.
841  *
842  * return value: DDI_SUCCESS, DDI_FAILURE
843  */
844 static int
845 pcieb_name_child(dev_info_t *child, char *name, int namelen)
846 {
847 	pci_regspec_t *pci_rp;
848 	uint_t device, func;
849 	char **unit_addr;
850 	uint_t n;
851 
852 	/*
853 	 * For .conf nodes, use unit-address property as name
854 	 */
855 	if (ndi_dev_is_persistent_node(child) == 0) {
856 		if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, child,
857 		    DDI_PROP_DONTPASS, "unit-address", &unit_addr, &n) !=
858 		    DDI_PROP_SUCCESS) {
859 			cmn_err(CE_WARN,
860 			    "cannot find unit-address in %s.conf",
861 			    ddi_driver_name(child));
862 			return (DDI_FAILURE);
863 		}
864 		if (n != 1 || *unit_addr == NULL || **unit_addr == 0) {
865 			cmn_err(CE_WARN, "unit-address property in %s.conf"
866 			    " not well-formed", ddi_driver_name(child));
867 			ddi_prop_free(unit_addr);
868 			return (DDI_FAILURE);
869 		}
870 		(void) snprintf(name, namelen, "%s", *unit_addr);
871 		ddi_prop_free(unit_addr);
872 		return (DDI_SUCCESS);
873 	}
874 
875 	/*
876 	 * Get the address portion of the node name based on
877 	 * the function and device number.
878 	 */
879 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, child,
880 	    DDI_PROP_DONTPASS, "reg", (int **)&pci_rp, &n) != DDI_SUCCESS) {
881 		return (DDI_FAILURE);
882 	}
883 
884 	/* copy the device identifications */
885 	device = PCI_REG_DEV_G(pci_rp[0].pci_phys_hi);
886 	func = PCI_REG_FUNC_G(pci_rp[0].pci_phys_hi);
887 
888 	if (pcie_ari_is_enabled(ddi_get_parent(child))
889 	    == PCIE_ARI_FORW_ENABLED) {
890 		func = (device << 3) | func;
891 		device = 0;
892 	}
893 
894 	if (func != 0)
895 		(void) snprintf(name, namelen, "%x,%x", device, func);
896 	else
897 		(void) snprintf(name, namelen, "%x", device);
898 
899 	ddi_prop_free(pci_rp);
900 	return (DDI_SUCCESS);
901 }
902 
903 static int
904 pcieb_initchild(dev_info_t *child)
905 {
906 	char name[MAXNAMELEN];
907 	int result = DDI_FAILURE;
908 	pcieb_devstate_t *pcieb =
909 	    (pcieb_devstate_t *)ddi_get_soft_state(pcieb_state,
910 	    ddi_get_instance(ddi_get_parent(child)));
911 
912 	/*
913 	 * Name the child
914 	 */
915 	if (pcieb_name_child(child, name, MAXNAMELEN) != DDI_SUCCESS) {
916 		result = DDI_FAILURE;
917 		goto done;
918 	}
919 	ddi_set_name_addr(child, name);
920 
921 	/*
922 	 * Pseudo nodes indicate a prototype node with per-instance
923 	 * properties to be merged into the real h/w device node.
924 	 * The interpretation of the unit-address is DD[,F]
925 	 * where DD is the device id and F is the function.
926 	 */
927 	if (ndi_dev_is_persistent_node(child) == 0) {
928 		extern int pci_allow_pseudo_children;
929 
930 		/*
931 		 * Try to merge the properties from this prototype
932 		 * node into real h/w nodes.
933 		 */
934 		if (ndi_merge_node(child, pcieb_name_child) == DDI_SUCCESS) {
935 			/*
936 			 * Merged ok - return failure to remove the node.
937 			 */
938 			ddi_set_name_addr(child, NULL);
939 			result = DDI_FAILURE;
940 			goto done;
941 		}
942 
943 		/* workaround for ddivs to run under PCI-E */
944 		if (pci_allow_pseudo_children) {
945 			result = DDI_SUCCESS;
946 			goto done;
947 		}
948 
949 		/*
950 		 * The child was not merged into a h/w node,
951 		 * but there's not much we can do with it other
952 		 * than return failure to cause the node to be removed.
953 		 */
954 		cmn_err(CE_WARN, "!%s@%s: %s.conf properties not merged",
955 		    ddi_driver_name(child), ddi_get_name_addr(child),
956 		    ddi_driver_name(child));
957 		ddi_set_name_addr(child, NULL);
958 		result = DDI_NOT_WELL_FORMED;
959 		goto done;
960 	}
961 
962 	/* platform specific initchild */
963 	pcieb_plat_initchild(child);
964 
965 	if (pcie_pm_hold(pcieb->pcieb_dip) != DDI_SUCCESS) {
966 		PCIEB_DEBUG(DBG_PWR, pcieb->pcieb_dip,
967 		    "INITCHILD: px_pm_hold failed\n");
968 		result = DDI_FAILURE;
969 		goto done;
970 	}
971 	/* Any return from here must call pcie_pm_release */
972 
973 	/*
974 	 * If configuration registers were previously saved by
975 	 * child (before it entered D3), then let the child do the
976 	 * restore to set up the config regs as it'll first need to
977 	 * power the device out of D3.
978 	 */
979 	if (ddi_prop_exists(DDI_DEV_T_ANY, child, DDI_PROP_DONTPASS,
980 	    "config-regs-saved-by-child") == 1) {
981 		PCIEB_DEBUG(DBG_PWR, ddi_get_parent(child),
982 		    "INITCHILD: config regs to be restored by child"
983 		    " for %s@%s\n", ddi_node_name(child),
984 		    ddi_get_name_addr(child));
985 
986 		result = DDI_SUCCESS;
987 		goto cleanup;
988 	}
989 
990 	PCIEB_DEBUG(DBG_PWR, ddi_get_parent(child),
991 	    "INITCHILD: config regs setup for %s@%s\n",
992 	    ddi_node_name(child), ddi_get_name_addr(child));
993 
994 	pcie_init_dom(child);
995 
996 	if (pcie_initchild(child) != DDI_SUCCESS) {
997 		result = DDI_FAILURE;
998 		pcie_fini_dom(child);
999 		goto cleanup;
1000 	}
1001 
1002 #ifdef PX_PLX
1003 	if (pcieb_init_plx_workarounds(pcieb, child) == DDI_FAILURE) {
1004 		result = DDI_FAILURE;
1005 		pcie_fini_dom(child);
1006 		goto cleanup;
1007 	}
1008 #endif /* PX_PLX */
1009 
1010 	result = DDI_SUCCESS;
1011 cleanup:
1012 	pcie_pm_release(pcieb->pcieb_dip);
1013 done:
1014 	return (result);
1015 }
1016 
1017 static void
1018 pcieb_uninitchild(dev_info_t *dip)
1019 {
1020 
1021 	pcie_uninitchild(dip);
1022 
1023 	pcieb_plat_uninitchild(dip);
1024 
1025 	ddi_set_name_addr(dip, NULL);
1026 
1027 	/*
1028 	 * Strip the node to properly convert it back to prototype form
1029 	 */
1030 	ddi_remove_minor_node(dip, NULL);
1031 
1032 	ddi_prop_remove_all(dip);
1033 }
1034 
1035 static boolean_t
1036 pcieb_is_pcie_device_type(dev_info_t *dip)
1037 {
1038 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
1039 
1040 	if (PCIE_IS_SW(bus_p) || PCIE_IS_RP(bus_p) || PCIE_IS_PCI2PCIE(bus_p))
1041 		return (B_TRUE);
1042 
1043 	return (B_FALSE);
1044 }
1045 
1046 static int
1047 pcieb_intr_attach(pcieb_devstate_t *pcieb)
1048 {
1049 	int			intr_types;
1050 	dev_info_t		*dip = pcieb->pcieb_dip;
1051 
1052 	/* Allow platform specific code to do any initialization first */
1053 	pcieb_plat_intr_attach(pcieb);
1054 
1055 	/*
1056 	 * Initialize interrupt handlers.
1057 	 * If both MSI and FIXED are supported, try to attach MSI first.
1058 	 * If MSI fails for any reason, then try FIXED, but only allow one
1059 	 * type to be attached.
1060 	 */
1061 	if (ddi_intr_get_supported_types(dip, &intr_types) != DDI_SUCCESS) {
1062 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_supported_types"
1063 		    " failed\n");
1064 		goto FAIL;
1065 	}
1066 
1067 	if ((intr_types & DDI_INTR_TYPE_MSI) &&
1068 	    (pcieb_msi_supported(dip) == DDI_SUCCESS)) {
1069 		if (pcieb_intr_init(pcieb, DDI_INTR_TYPE_MSI) == DDI_SUCCESS)
1070 			intr_types = DDI_INTR_TYPE_MSI;
1071 		else {
1072 			PCIEB_DEBUG(DBG_ATTACH, dip, "Unable to attach MSI"
1073 			    " handler\n");
1074 		}
1075 	}
1076 
1077 	if (intr_types != DDI_INTR_TYPE_MSI) {
1078 		/*
1079 		 * MSIs are not supported or MSI initialization failed. For Root
1080 		 * Ports mark this so error handling might try to fallback to
1081 		 * some other mechanism if available (machinecheck etc.).
1082 		 */
1083 		if (PCIE_IS_RP(PCIE_DIP2UPBUS(dip)))
1084 			pcieb->pcieb_no_aer_msi = B_TRUE;
1085 	}
1086 
1087 	if (intr_types & DDI_INTR_TYPE_FIXED) {
1088 		if (pcieb_intr_init(pcieb, DDI_INTR_TYPE_FIXED) !=
1089 		    DDI_SUCCESS) {
1090 			PCIEB_DEBUG(DBG_ATTACH, dip,
1091 			    "Unable to attach INTx handler\n");
1092 			goto FAIL;
1093 		}
1094 	}
1095 	return (DDI_SUCCESS);
1096 
1097 FAIL:
1098 	return (DDI_FAILURE);
1099 }
1100 
1101 /*
1102  * This function initializes internally generated interrupts only.
1103  * It does not affect any interrupts generated by downstream devices
1104  * or the forwarding of them.
1105  *
1106  * Enable Device Specific Interrupts or Hotplug features here.
1107  * Enabling features may change how many interrupts are requested
1108  * by the device.  If features are not enabled first, the
1109  * device might not ask for any interrupts.
1110  */
1111 static int
1112 pcieb_intr_init(pcieb_devstate_t *pcieb, int intr_type)
1113 {
1114 	dev_info_t	*dip = pcieb->pcieb_dip;
1115 	int		nintrs, request, count, x;
1116 	int		intr_cap = 0;
1117 	int		inum = 0;
1118 	int		ret;
1119 	pcie_bus_t	*bus_p = PCIE_DIP2UPBUS(dip);
1120 	uint16_t	vendorid = bus_p->bus_dev_ven_id & 0xFFFF;
1121 	boolean_t	is_hp = B_FALSE;
1122 	boolean_t	is_pme = B_FALSE;
1123 	boolean_t	is_lbw = B_FALSE;
1124 
1125 	PCIEB_DEBUG(DBG_ATTACH, dip, "pcieb_intr_init: Attaching %s handler\n",
1126 	    (intr_type == DDI_INTR_TYPE_MSI) ? "MSI" : "INTx");
1127 
1128 	request = 0;
1129 	if (PCIE_IS_HOTPLUG_ENABLED(dip)) {
1130 		is_hp = B_TRUE;
1131 	}
1132 
1133 	if ((intr_type == DDI_INTR_TYPE_MSI) && PCIE_IS_RP(bus_p) &&
1134 	    (vendorid == NVIDIA_VENDOR_ID)) {
1135 		is_pme = B_TRUE;
1136 	}
1137 
1138 	if (intr_type == DDI_INTR_TYPE_MSI && pcie_link_bw_supported(dip)) {
1139 		is_lbw = B_TRUE;
1140 	}
1141 
1142 	/*
1143 	 * The hot-plug, link bandwidth, and power management events all are
1144 	 * based on the PCI Express capability. Therefore, they all share their
1145 	 * own interrupt.
1146 	 */
1147 	if (is_hp || is_pme || is_lbw) {
1148 		request++;
1149 	}
1150 
1151 	/*
1152 	 * If this device is a root port, which means it can have MSI interrupts
1153 	 * enabled for AERs, then we need to request one.
1154 	 */
1155 	if (intr_type == DDI_INTR_TYPE_MSI) {
1156 		if (PCIE_IS_RP(bus_p) && PCIE_HAS_AER(bus_p)) {
1157 			request++;
1158 		}
1159 	}
1160 
1161 	if (request == 0)
1162 		return (DDI_SUCCESS);
1163 
1164 	/*
1165 	 * Get number of supported interrupts.
1166 	 *
1167 	 * Several Bridges/Switches will not have this property set, resulting
1168 	 * in a FAILURE, if the device is not configured in a way that
1169 	 * interrupts are needed. (eg. hotplugging)
1170 	 */
1171 	ret = ddi_intr_get_nintrs(dip, intr_type, &nintrs);
1172 	if ((ret != DDI_SUCCESS) || (nintrs == 0)) {
1173 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_nintrs ret:%d"
1174 		    " req:%d\n", ret, nintrs);
1175 		return (DDI_FAILURE);
1176 	}
1177 
1178 	PCIEB_DEBUG(DBG_ATTACH, dip, "bdf 0x%x: ddi_intr_get_nintrs: nintrs %d",
1179 	    " request %d\n", bus_p->bus_bdf, nintrs, request);
1180 
1181 	if (request > nintrs)
1182 		request = nintrs;
1183 
1184 	/* Allocate an array of interrupt handlers */
1185 	pcieb->pcieb_htable_size = sizeof (ddi_intr_handle_t) * request;
1186 	pcieb->pcieb_htable = kmem_zalloc(pcieb->pcieb_htable_size,
1187 	    KM_SLEEP);
1188 	pcieb->pcieb_init_flags |= PCIEB_INIT_HTABLE;
1189 
1190 	ret = ddi_intr_alloc(dip, pcieb->pcieb_htable, intr_type, inum,
1191 	    request, &count, DDI_INTR_ALLOC_NORMAL);
1192 	if ((ret != DDI_SUCCESS) || (count == 0)) {
1193 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_alloc() ret: %d ask: %d"
1194 		    " actual: %d\n", ret, request, count);
1195 		goto FAIL;
1196 	}
1197 	pcieb->pcieb_init_flags |= PCIEB_INIT_ALLOC;
1198 
1199 	/* Save the actual number of interrupts allocated */
1200 	pcieb->pcieb_intr_count = count;
1201 	if (count < request) {
1202 		PCIEB_DEBUG(DBG_ATTACH, dip, "bdf 0%x: Requested Intr: %d"
1203 		    " Received: %d\n", bus_p->bus_bdf, request, count);
1204 	}
1205 
1206 	/*
1207 	 * NVidia (MCP55 and other) chipsets have a errata that if the number
1208 	 * of requested MSI intrs is not allocated we have to fall back to INTx.
1209 	 */
1210 	if (intr_type == DDI_INTR_TYPE_MSI) {
1211 		if (PCIE_IS_RP(bus_p) && (vendorid == NVIDIA_VENDOR_ID)) {
1212 			if (request != count)
1213 				goto FAIL;
1214 		}
1215 	}
1216 
1217 	/* Get interrupt priority */
1218 	ret = ddi_intr_get_pri(pcieb->pcieb_htable[0],
1219 	    &pcieb->pcieb_intr_priority);
1220 	if (ret != DDI_SUCCESS) {
1221 		PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_get_pri() ret: %d\n",
1222 		    ret);
1223 		goto FAIL;
1224 	}
1225 
1226 	if (pcieb->pcieb_intr_priority >= LOCK_LEVEL) {
1227 		pcieb->pcieb_intr_priority = LOCK_LEVEL - 1;
1228 		ret = ddi_intr_set_pri(pcieb->pcieb_htable[0],
1229 		    pcieb->pcieb_intr_priority);
1230 		if (ret != DDI_SUCCESS) {
1231 			PCIEB_DEBUG(DBG_ATTACH, dip, "ddi_intr_set_pri() ret:"
1232 			" %d\n", ret);
1233 
1234 			goto FAIL;
1235 		}
1236 	}
1237 
1238 	mutex_init(&pcieb->pcieb_intr_mutex, NULL, MUTEX_DRIVER, NULL);
1239 
1240 	pcieb->pcieb_init_flags |= PCIEB_INIT_MUTEX;
1241 
1242 	for (count = 0; count < pcieb->pcieb_intr_count; count++) {
1243 		ret = ddi_intr_add_handler(pcieb->pcieb_htable[count],
1244 		    pcieb_intr_handler, (caddr_t)pcieb,
1245 		    (caddr_t)(uintptr_t)(inum + count));
1246 
1247 		if (ret != DDI_SUCCESS) {
1248 			PCIEB_DEBUG(DBG_ATTACH, dip, "Cannot add "
1249 			    "interrupt(%d)\n", ret);
1250 			break;
1251 		}
1252 	}
1253 
1254 	/* If unsucessful, remove the added handlers */
1255 	if (ret != DDI_SUCCESS) {
1256 		for (x = 0; x < count; x++) {
1257 			(void) ddi_intr_remove_handler(pcieb->pcieb_htable[x]);
1258 		}
1259 		goto FAIL;
1260 	}
1261 
1262 	pcieb->pcieb_init_flags |= PCIEB_INIT_HANDLER;
1263 
1264 	(void) ddi_intr_get_cap(pcieb->pcieb_htable[0], &intr_cap);
1265 
1266 	/*
1267 	 * Get this intr lock because we are not quite ready to handle
1268 	 * interrupts immediately after enabling it. The MSI multi register
1269 	 * gets programmed in ddi_intr_enable after which we need to get the
1270 	 * MSI offsets for Hotplug/AER.
1271 	 */
1272 	mutex_enter(&pcieb->pcieb_intr_mutex);
1273 
1274 	if (intr_cap & DDI_INTR_FLAG_BLOCK) {
1275 		(void) ddi_intr_block_enable(pcieb->pcieb_htable,
1276 		    pcieb->pcieb_intr_count);
1277 		pcieb->pcieb_init_flags |= PCIEB_INIT_BLOCK;
1278 	} else {
1279 		for (count = 0; count < pcieb->pcieb_intr_count; count++) {
1280 			(void) ddi_intr_enable(pcieb->pcieb_htable[count]);
1281 		}
1282 	}
1283 	pcieb->pcieb_init_flags |= PCIEB_INIT_ENABLE;
1284 
1285 	/* Save the interrupt type */
1286 	pcieb->pcieb_intr_type = intr_type;
1287 
1288 	/* Get the MSI offset for hotplug/PME from the PCIe cap reg */
1289 	if (intr_type == DDI_INTR_TYPE_MSI) {
1290 		uint16_t pcie_msi_off;
1291 		pcie_msi_off = PCI_CAP_GET16(bus_p->bus_cfg_hdl, 0,
1292 		    bus_p->bus_pcie_off, PCIE_PCIECAP) &
1293 		    PCIE_PCIECAP_INT_MSG_NUM;
1294 
1295 		if (pcie_msi_off >= count) {
1296 			PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %u in PCIe "
1297 			    "cap > max allocated %d\n", pcie_msi_off, count);
1298 			mutex_exit(&pcieb->pcieb_intr_mutex);
1299 			goto FAIL;
1300 		}
1301 
1302 		if (is_hp) {
1303 			pcieb->pcieb_isr_tab[pcie_msi_off] |= PCIEB_INTR_SRC_HP;
1304 		}
1305 
1306 		if (is_pme) {
1307 			pcieb->pcieb_isr_tab[pcie_msi_off] |=
1308 			    PCIEB_INTR_SRC_PME;
1309 		}
1310 
1311 		if (is_lbw) {
1312 			pcieb->pcieb_isr_tab[pcie_msi_off] |=
1313 			    PCIEB_INTR_SRC_LBW;
1314 		}
1315 	} else {
1316 		/* INTx handles only Hotplug interrupts */
1317 		if (is_hp)
1318 			pcieb->pcieb_isr_tab[0] |= PCIEB_INTR_SRC_HP;
1319 	}
1320 
1321 
1322 	/*
1323 	 * Get the MSI offset for errors from the AER Root Error status
1324 	 * register.
1325 	 */
1326 	if ((intr_type == DDI_INTR_TYPE_MSI) && PCIE_IS_RP(bus_p)) {
1327 		if (PCIE_HAS_AER(bus_p)) {
1328 			int aer_msi_off;
1329 			aer_msi_off = (PCI_XCAP_GET32(bus_p->bus_cfg_hdl, 0,
1330 			    bus_p->bus_aer_off, PCIE_AER_RE_STS) >>
1331 			    PCIE_AER_RE_STS_MSG_NUM_SHIFT) &
1332 			    PCIE_AER_RE_STS_MSG_NUM_MASK;
1333 
1334 			if (aer_msi_off >= count) {
1335 				PCIEB_DEBUG(DBG_ATTACH, dip, "MSI number %d in"
1336 				    " AER cap > max allocated %d\n",
1337 				    aer_msi_off, count);
1338 				mutex_exit(&pcieb->pcieb_intr_mutex);
1339 				goto FAIL;
1340 			}
1341 			pcieb->pcieb_isr_tab[aer_msi_off] |= PCIEB_INTR_SRC_AER;
1342 		} else {
1343 			/*
1344 			 * This RP does not have AER. Fallback to the
1345 			 * SERR+Machinecheck approach if available.
1346 			 */
1347 			pcieb->pcieb_no_aer_msi = B_TRUE;
1348 		}
1349 	}
1350 
1351 	mutex_exit(&pcieb->pcieb_intr_mutex);
1352 	return (DDI_SUCCESS);
1353 
1354 FAIL:
1355 	pcieb_intr_fini(pcieb);
1356 	return (DDI_FAILURE);
1357 }
1358 
1359 static void
1360 pcieb_intr_fini(pcieb_devstate_t *pcieb)
1361 {
1362 	int x;
1363 	int count = pcieb->pcieb_intr_count;
1364 	int flags = pcieb->pcieb_init_flags;
1365 
1366 	if ((flags & PCIEB_INIT_ENABLE) &&
1367 	    (flags & PCIEB_INIT_BLOCK)) {
1368 		(void) ddi_intr_block_disable(pcieb->pcieb_htable, count);
1369 		flags &= ~(PCIEB_INIT_ENABLE |
1370 		    PCIEB_INIT_BLOCK);
1371 	}
1372 
1373 	if (flags & PCIEB_INIT_MUTEX)
1374 		mutex_destroy(&pcieb->pcieb_intr_mutex);
1375 
1376 	for (x = 0; x < count; x++) {
1377 		if (flags & PCIEB_INIT_ENABLE)
1378 			(void) ddi_intr_disable(pcieb->pcieb_htable[x]);
1379 
1380 		if (flags & PCIEB_INIT_HANDLER)
1381 			(void) ddi_intr_remove_handler(pcieb->pcieb_htable[x]);
1382 
1383 		if (flags & PCIEB_INIT_ALLOC)
1384 			(void) ddi_intr_free(pcieb->pcieb_htable[x]);
1385 	}
1386 
1387 	flags &= ~(PCIEB_INIT_ENABLE | PCIEB_INIT_HANDLER | PCIEB_INIT_ALLOC |
1388 	    PCIEB_INIT_MUTEX);
1389 
1390 	if (flags & PCIEB_INIT_HTABLE)
1391 		kmem_free(pcieb->pcieb_htable, pcieb->pcieb_htable_size);
1392 
1393 	flags &= ~PCIEB_INIT_HTABLE;
1394 
1395 	pcieb->pcieb_init_flags &= flags;
1396 }
1397 
1398 /*
1399  * Checks if this device needs MSIs enabled or not.
1400  */
1401 /*ARGSUSED*/
1402 static int
1403 pcieb_msi_supported(dev_info_t *dip)
1404 {
1405 	return ((pcieb_enable_msi && pcieb_plat_msi_supported(dip)) ?
1406 	    DDI_SUCCESS: DDI_FAILURE);
1407 }
1408 
1409 /*ARGSUSED*/
1410 static int
1411 pcieb_fm_init_child(dev_info_t *dip, dev_info_t *tdip, int cap,
1412     ddi_iblock_cookie_t *ibc)
1413 {
1414 	pcieb_devstate_t  *pcieb = ddi_get_soft_state(pcieb_state,
1415 	    ddi_get_instance(dip));
1416 
1417 	ASSERT(ibc != NULL);
1418 	*ibc = pcieb->pcieb_fm_ibc;
1419 
1420 	return (DEVI(dip)->devi_fmhdl->fh_cap | DDI_FM_ACCCHK_CAPABLE |
1421 	    DDI_FM_DMACHK_CAPABLE);
1422 }
1423 
1424 static int
1425 pcieb_fm_init(pcieb_devstate_t *pcieb_p)
1426 {
1427 	dev_info_t	*dip = pcieb_p->pcieb_dip;
1428 	int		fm_cap = DDI_FM_EREPORT_CAPABLE;
1429 
1430 	/*
1431 	 * Request our capability level and get our parents capability
1432 	 * and ibc.
1433 	 */
1434 	ddi_fm_init(dip, &fm_cap, &pcieb_p->pcieb_fm_ibc);
1435 
1436 	return (DDI_SUCCESS);
1437 }
1438 
1439 /*
1440  * Breakdown our FMA resources
1441  */
1442 static void
1443 pcieb_fm_fini(pcieb_devstate_t *pcieb_p)
1444 {
1445 	/*
1446 	 * Clean up allocated fm structures
1447 	 */
1448 	ddi_fm_fini(pcieb_p->pcieb_dip);
1449 }
1450 
1451 static int
1452 pcieb_open(dev_t *devp, int flags, int otyp, cred_t *credp)
1453 {
1454 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(*devp));
1455 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1456 	int	rv;
1457 
1458 	if (pcieb == NULL)
1459 		return (ENXIO);
1460 
1461 	mutex_enter(&pcieb->pcieb_mutex);
1462 	rv = pcie_open(pcieb->pcieb_dip, devp, flags, otyp, credp);
1463 	mutex_exit(&pcieb->pcieb_mutex);
1464 
1465 	return (rv);
1466 }
1467 
1468 static int
1469 pcieb_close(dev_t dev, int flags, int otyp, cred_t *credp)
1470 {
1471 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(dev));
1472 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1473 	int	rv;
1474 
1475 	if (pcieb == NULL)
1476 		return (ENXIO);
1477 
1478 	mutex_enter(&pcieb->pcieb_mutex);
1479 	rv = pcie_close(pcieb->pcieb_dip, dev, flags, otyp, credp);
1480 	mutex_exit(&pcieb->pcieb_mutex);
1481 
1482 	return (rv);
1483 }
1484 
1485 static int
1486 pcieb_ioctl_retrain(pcieb_devstate_t *pcieb, cred_t *credp)
1487 {
1488 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1489 
1490 	if (drv_priv(credp) != 0) {
1491 		return (EPERM);
1492 	}
1493 
1494 	if (!PCIE_IS_PCIE(bus_p)) {
1495 		return (ENOTSUP);
1496 	}
1497 
1498 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1499 		return (ENOTSUP);
1500 	}
1501 
1502 	return (pcie_link_retrain(pcieb->pcieb_dip));
1503 }
1504 
1505 static int
1506 pcieb_ioctl_get_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode,
1507     cred_t *credp)
1508 {
1509 	pcie_bus_t			*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1510 	pcieb_ioctl_target_speed_t	pits;
1511 
1512 	if (drv_priv(credp) != 0) {
1513 		return (EPERM);
1514 	}
1515 
1516 	if (!PCIE_IS_PCIE(bus_p)) {
1517 		return (ENOTSUP);
1518 	}
1519 
1520 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1521 		return (ENOTSUP);
1522 	}
1523 
1524 	pits.pits_flags = 0;
1525 	pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN;
1526 
1527 	mutex_enter(&bus_p->bus_speed_mutex);
1528 	if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) {
1529 		pits.pits_flags |= PCIEB_FLAGS_ADMIN_SET;
1530 	}
1531 	switch (bus_p->bus_target_speed) {
1532 	case PCIE_LINK_SPEED_2_5:
1533 		pits.pits_speed = PCIEB_LINK_SPEED_GEN1;
1534 		break;
1535 	case PCIE_LINK_SPEED_5:
1536 		pits.pits_speed = PCIEB_LINK_SPEED_GEN2;
1537 		break;
1538 	case PCIE_LINK_SPEED_8:
1539 		pits.pits_speed = PCIEB_LINK_SPEED_GEN3;
1540 		break;
1541 	case PCIE_LINK_SPEED_16:
1542 		pits.pits_speed = PCIEB_LINK_SPEED_GEN4;
1543 		break;
1544 	default:
1545 		pits.pits_speed = PCIEB_LINK_SPEED_UNKNOWN;
1546 		break;
1547 	}
1548 	mutex_exit(&bus_p->bus_speed_mutex);
1549 
1550 	if (ddi_copyout(&pits, (void *)arg, sizeof (pits),
1551 	    mode & FKIOCTL) != 0) {
1552 		return (EFAULT);
1553 	}
1554 
1555 	return (0);
1556 }
1557 
1558 static int
1559 pcieb_ioctl_set_speed(pcieb_devstate_t *pcieb, intptr_t arg, int mode,
1560     cred_t *credp)
1561 {
1562 	pcie_bus_t			*bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
1563 	pcieb_ioctl_target_speed_t	pits;
1564 	pcie_link_speed_t		speed;
1565 
1566 	if (drv_priv(credp) != 0) {
1567 		return (EPERM);
1568 	}
1569 
1570 	if (!PCIE_IS_PCIE(bus_p)) {
1571 		return (ENOTSUP);
1572 	}
1573 
1574 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
1575 		return (ENOTSUP);
1576 	}
1577 
1578 	if (ddi_copyin((void *)arg, &pits, sizeof (pits),
1579 	    mode & FKIOCTL) != 0) {
1580 		return (EFAULT);
1581 	}
1582 
1583 	if (pits.pits_flags != 0) {
1584 		return (EINVAL);
1585 	}
1586 
1587 	switch (pits.pits_speed) {
1588 	case PCIEB_LINK_SPEED_GEN1:
1589 		speed = PCIE_LINK_SPEED_2_5;
1590 		break;
1591 	case PCIEB_LINK_SPEED_GEN2:
1592 		speed = PCIE_LINK_SPEED_5;
1593 		break;
1594 	case PCIEB_LINK_SPEED_GEN3:
1595 		speed = PCIE_LINK_SPEED_8;
1596 		break;
1597 	case PCIEB_LINK_SPEED_GEN4:
1598 		speed = PCIE_LINK_SPEED_16;
1599 		break;
1600 	default:
1601 		return (EINVAL);
1602 	}
1603 
1604 	return (pcie_link_set_target(pcieb->pcieb_dip, speed));
1605 }
1606 
1607 static int
1608 pcieb_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
1609     int *rvalp)
1610 {
1611 	int		inst = PCI_MINOR_NUM_TO_INSTANCE(getminor(dev));
1612 	pcieb_devstate_t	*pcieb = ddi_get_soft_state(pcieb_state, inst);
1613 	int		rv;
1614 
1615 	if (pcieb == NULL)
1616 		return (ENXIO);
1617 
1618 	/*
1619 	 * Check if this is one of the commands that the bridge driver natively
1620 	 * understands. There are only a handful of such private ioctls defined
1621 	 * in pcieb_ioctl.h. Otherwise, this ioctl should be handled by the
1622 	 * general pcie driver.
1623 	 */
1624 	switch (cmd) {
1625 	case PCIEB_IOCTL_RETRAIN:
1626 		rv = pcieb_ioctl_retrain(pcieb, credp);
1627 		break;
1628 	case PCIEB_IOCTL_GET_TARGET_SPEED:
1629 		rv = pcieb_ioctl_get_speed(pcieb, arg, mode, credp);
1630 		break;
1631 	case PCIEB_IOCTL_SET_TARGET_SPEED:
1632 		rv = pcieb_ioctl_set_speed(pcieb, arg, mode, credp);
1633 		break;
1634 	default:
1635 		/* To handle devctl and hotplug related ioctls */
1636 		rv = pcie_ioctl(pcieb->pcieb_dip, dev, cmd, arg, mode, credp,
1637 		    rvalp);
1638 		break;
1639 	}
1640 
1641 	return (rv);
1642 }
1643 
1644 /*
1645  * Common interrupt handler for hotplug, PME and errors.
1646  */
1647 static uint_t
1648 pcieb_intr_handler(caddr_t arg1, caddr_t arg2)
1649 {
1650 	pcieb_devstate_t *pcieb_p = (pcieb_devstate_t *)arg1;
1651 	dev_info_t	*dip = pcieb_p->pcieb_dip;
1652 	ddi_fm_error_t	derr;
1653 	int		sts = 0;
1654 	int		ret = DDI_INTR_UNCLAIMED;
1655 	int		isrc;
1656 
1657 	if (!(pcieb_p->pcieb_init_flags & PCIEB_INIT_ENABLE))
1658 		goto FAIL;
1659 
1660 	mutex_enter(&pcieb_p->pcieb_intr_mutex);
1661 	isrc = pcieb_p->pcieb_isr_tab[(int)(uintptr_t)arg2];
1662 	mutex_exit(&pcieb_p->pcieb_intr_mutex);
1663 
1664 	PCIEB_DEBUG(DBG_INTR, dip, "Received intr number %d\n",
1665 	    (int)(uintptr_t)arg2);
1666 
1667 	if (isrc == PCIEB_INTR_SRC_UNKNOWN)
1668 		goto FAIL;
1669 
1670 	if (isrc & (PCIEB_INTR_SRC_HP | PCIEB_INTR_SRC_LBW))
1671 		ret = pcie_intr(dip);
1672 
1673 	if (isrc & PCIEB_INTR_SRC_PME)
1674 		ret = DDI_INTR_CLAIMED;
1675 
1676 	/* AER Error */
1677 	if (isrc & PCIEB_INTR_SRC_AER) {
1678 		/*
1679 		 *  If MSI is shared with PME/hotplug then check Root Error
1680 		 *  Status Reg before claiming it. For now it's ok since
1681 		 *  we know we get 2 MSIs.
1682 		 */
1683 		ret = DDI_INTR_CLAIMED;
1684 		bzero(&derr, sizeof (ddi_fm_error_t));
1685 		derr.fme_version = DDI_FME_VERSION;
1686 		mutex_enter(&pcieb_p->pcieb_peek_poke_mutex);
1687 		mutex_enter(&pcieb_p->pcieb_err_mutex);
1688 
1689 		pf_eh_enter(PCIE_DIP2BUS(dip));
1690 		PCIE_ROOT_EH_SRC(PCIE_DIP2PFD(dip))->intr_type =
1691 		    PF_INTR_TYPE_AER;
1692 
1693 		if ((DEVI(dip)->devi_fmhdl->fh_cap) & DDI_FM_EREPORT_CAPABLE)
1694 			sts = pf_scan_fabric(dip, &derr, NULL);
1695 		pf_eh_exit(PCIE_DIP2BUS(dip));
1696 
1697 		mutex_exit(&pcieb_p->pcieb_err_mutex);
1698 		mutex_exit(&pcieb_p->pcieb_peek_poke_mutex);
1699 		if (pcieb_die & sts)
1700 			fm_panic("%s-%d: PCI(-X) Express Fatal Error. (0x%x)",
1701 			    ddi_driver_name(dip), ddi_get_instance(dip), sts);
1702 	}
1703 FAIL:
1704 	return (ret);
1705 }
1706 
1707 /*
1708  * Some PCI-X to PCI-E bridges do not support full 64-bit addressing on the
1709  * PCI-X side of the bridge.  We build a special version of this driver for
1710  * those bridges, which uses PCIEB_ADDR_LIMIT_LO and/or PCIEB_ADDR_LIMIT_HI
1711  * to define the range of values which the chip can handle.  The code below
1712  * then clamps the DMA address range supplied by the driver, preventing the
1713  * PCI-E nexus driver from allocating any memory the bridge can't deal
1714  * with.
1715  */
1716 static int
1717 pcieb_dma_allochdl(dev_info_t *dip, dev_info_t *rdip,
1718     ddi_dma_attr_t *attr_p, int (*waitfp)(caddr_t), caddr_t arg,
1719     ddi_dma_handle_t *handlep)
1720 {
1721 	int		ret;
1722 #ifdef	PCIEB_BCM
1723 	uint64_t	lim;
1724 
1725 	/*
1726 	 * If the leaf device's limits are outside than what the Broadcom
1727 	 * bridge can handle, we need to clip the values passed up the chain.
1728 	 */
1729 	lim = attr_p->dma_attr_addr_lo;
1730 	attr_p->dma_attr_addr_lo = MAX(lim, PCIEB_ADDR_LIMIT_LO);
1731 
1732 	lim = attr_p->dma_attr_addr_hi;
1733 	attr_p->dma_attr_addr_hi = MIN(lim, PCIEB_ADDR_LIMIT_HI);
1734 
1735 #endif	/* PCIEB_BCM */
1736 
1737 	/*
1738 	 * This is a software workaround to fix the Broadcom 5714/5715 PCIe-PCI
1739 	 * bridge prefetch bug. Intercept the DMA alloc handle request and set
1740 	 * PX_DMAI_FLAGS_MAP_BUFZONE flag in the handle. If this flag is set,
1741 	 * the px nexus driver will allocate an extra page & make it valid one,
1742 	 * for any DVMA request that comes from any of the Broadcom bridge child
1743 	 * devices.
1744 	 */
1745 	if ((ret = ddi_dma_allochdl(dip, rdip, attr_p, waitfp, arg,
1746 	    handlep)) == DDI_SUCCESS) {
1747 		ddi_dma_impl_t	*mp = (ddi_dma_impl_t *)*handlep;
1748 #ifdef	PCIEB_BCM
1749 		mp->dmai_inuse |= PX_DMAI_FLAGS_MAP_BUFZONE;
1750 #endif	/* PCIEB_BCM */
1751 		/*
1752 		 * For a given rdip, update mp->dmai_bdf with the bdf value
1753 		 * of pcieb's immediate child or secondary bus-id of the
1754 		 * PCIe2PCI bridge.
1755 		 */
1756 		mp->dmai_minxfer = pcie_get_bdf_for_dma_xfer(dip, rdip);
1757 	}
1758 
1759 	return (ret);
1760 }
1761 
1762 /*
1763  * FDVMA feature is not supported for any child device of Broadcom 5714/5715
1764  * PCIe-PCI bridge due to prefetch bug. Return failure immediately, so that
1765  * these drivers will switch to regular DVMA path.
1766  */
1767 /*ARGSUSED*/
1768 static int
1769 pcieb_dma_mctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
1770     enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
1771     uint_t cache_flags)
1772 {
1773 	int	ret;
1774 
1775 #ifdef	PCIEB_BCM
1776 	if (cmd == DDI_DMA_RESERVE)
1777 		return (DDI_FAILURE);
1778 #endif	/* PCIEB_BCM */
1779 
1780 	if (((ret = ddi_dma_mctl(dip, rdip, handle, cmd, offp, lenp, objp,
1781 	    cache_flags)) == DDI_SUCCESS) && (cmd == DDI_DMA_RESERVE)) {
1782 		ddi_dma_impl_t	*mp = (ddi_dma_impl_t *)*objp;
1783 
1784 		/*
1785 		 * For a given rdip, update mp->dmai_bdf with the bdf value
1786 		 * of pcieb's immediate child or secondary bus-id of the
1787 		 * PCIe2PCI bridge.
1788 		 */
1789 		mp->dmai_minxfer = pcie_get_bdf_for_dma_xfer(dip, rdip);
1790 	}
1791 
1792 	return (ret);
1793 }
1794 
1795 static int
1796 pcieb_intr_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
1797     ddi_intr_handle_impl_t *hdlp, void *result)
1798 {
1799 	return (pcieb_plat_intr_ops(dip, rdip, intr_op, hdlp, result));
1800 
1801 }
1802 
1803 /*
1804  * Power management related initialization specific to pcieb.
1805  * Called by pcieb_attach()
1806  */
1807 static int
1808 pcieb_pwr_setup(dev_info_t *dip)
1809 {
1810 	char *comp_array[5];
1811 	int i;
1812 	ddi_acc_handle_t conf_hdl;
1813 	uint16_t pmcap, cap_ptr;
1814 	pcie_pwr_t *pwr_p;
1815 
1816 	/* Some platforms/devices may choose to disable PM */
1817 	if (pcieb_plat_pwr_disable(dip)) {
1818 		(void) pcieb_pwr_disable(dip);
1819 		return (DDI_SUCCESS);
1820 	}
1821 
1822 	ASSERT(PCIE_PMINFO(dip));
1823 	pwr_p = PCIE_NEXUS_PMINFO(dip);
1824 	ASSERT(pwr_p);
1825 
1826 	/* Code taken from pci_pci driver */
1827 	if (pci_config_setup(dip, &pwr_p->pwr_conf_hdl) != DDI_SUCCESS) {
1828 		PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_setup: pci_config_setup "
1829 		    "failed\n");
1830 		return (DDI_FAILURE);
1831 	}
1832 	conf_hdl = pwr_p->pwr_conf_hdl;
1833 
1834 	/*
1835 	 * Walk the capabilities searching for a PM entry.
1836 	 */
1837 	if ((PCI_CAP_LOCATE(conf_hdl, PCI_CAP_ID_PM, &cap_ptr)) ==
1838 	    DDI_FAILURE) {
1839 		PCIEB_DEBUG(DBG_PWR, dip, "switch/bridge does not support PM. "
1840 		    " PCI PM data structure not found in config header\n");
1841 		pci_config_teardown(&conf_hdl);
1842 		return (DDI_SUCCESS);
1843 	}
1844 	/*
1845 	 * Save offset to pmcsr for future references.
1846 	 */
1847 	pwr_p->pwr_pmcsr_offset = cap_ptr + PCI_PMCSR;
1848 	pmcap = PCI_CAP_GET16(conf_hdl, 0, cap_ptr, PCI_PMCAP);
1849 	if (pmcap & PCI_PMCAP_D1) {
1850 		PCIEB_DEBUG(DBG_PWR, dip, "D1 state supported\n");
1851 		pwr_p->pwr_pmcaps |= PCIE_SUPPORTS_D1;
1852 	}
1853 	if (pmcap & PCI_PMCAP_D2) {
1854 		PCIEB_DEBUG(DBG_PWR, dip, "D2 state supported\n");
1855 		pwr_p->pwr_pmcaps |= PCIE_SUPPORTS_D2;
1856 	}
1857 
1858 	i = 0;
1859 	comp_array[i++] = "NAME=PCIe switch/bridge PM";
1860 	comp_array[i++] = "0=Power Off (D3)";
1861 	if (pwr_p->pwr_pmcaps & PCIE_SUPPORTS_D2)
1862 		comp_array[i++] = "1=D2";
1863 	if (pwr_p->pwr_pmcaps & PCIE_SUPPORTS_D1)
1864 		comp_array[i++] = "2=D1";
1865 	comp_array[i++] = "3=Full Power D0";
1866 
1867 	/*
1868 	 * Create pm-components property, if it does not exist already.
1869 	 */
1870 	if (ddi_prop_update_string_array(DDI_DEV_T_NONE, dip,
1871 	    "pm-components", comp_array, i) != DDI_PROP_SUCCESS) {
1872 		PCIEB_DEBUG(DBG_PWR, dip, "could not create pm-components "
1873 		    " prop\n");
1874 		pci_config_teardown(&conf_hdl);
1875 		return (DDI_FAILURE);
1876 	}
1877 	return (pcieb_pwr_init_and_raise(dip, pwr_p));
1878 }
1879 
1880 /*
1881  * undo whatever is done in pcieb_pwr_setup. called by pcieb_detach()
1882  */
1883 static void
1884 pcieb_pwr_teardown(dev_info_t *dip)
1885 {
1886 	pcie_pwr_t	*pwr_p;
1887 
1888 	if (!PCIE_PMINFO(dip) || !(pwr_p = PCIE_NEXUS_PMINFO(dip)))
1889 		return;
1890 
1891 	(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "pm-components");
1892 	if (pwr_p->pwr_conf_hdl)
1893 		pci_config_teardown(&pwr_p->pwr_conf_hdl);
1894 }
1895 
1896 /*
1897  * Initializes the power level and raise the power to D0, if it is
1898  * not at D0.
1899  */
1900 static int
1901 pcieb_pwr_init_and_raise(dev_info_t *dip, pcie_pwr_t *pwr_p)
1902 {
1903 	uint16_t pmcsr;
1904 	int ret = DDI_SUCCESS;
1905 
1906 	/*
1907 	 * Intialize our power level from PMCSR. The common code initializes
1908 	 * this to UNKNOWN. There is no guarantee that we will be at full
1909 	 * power at attach. If we are not at D0, raise the power.
1910 	 */
1911 	pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl, pwr_p->pwr_pmcsr_offset);
1912 	pmcsr &= PCI_PMCSR_STATE_MASK;
1913 	switch (pmcsr) {
1914 	case PCI_PMCSR_D0:
1915 		pwr_p->pwr_func_lvl = PM_LEVEL_D0;
1916 		break;
1917 
1918 	case PCI_PMCSR_D1:
1919 		pwr_p->pwr_func_lvl = PM_LEVEL_D1;
1920 		break;
1921 
1922 	case PCI_PMCSR_D2:
1923 		pwr_p->pwr_func_lvl = PM_LEVEL_D2;
1924 		break;
1925 
1926 	case PCI_PMCSR_D3HOT:
1927 		pwr_p->pwr_func_lvl = PM_LEVEL_D3;
1928 		break;
1929 
1930 	default:
1931 		break;
1932 	}
1933 
1934 	/* Raise the power to D0. */
1935 	if (pwr_p->pwr_func_lvl != PM_LEVEL_D0 &&
1936 	    ((ret = pm_raise_power(dip, 0, PM_LEVEL_D0)) != DDI_SUCCESS)) {
1937 		/*
1938 		 * Read PMCSR again. If it is at D0, ignore the return
1939 		 * value from pm_raise_power.
1940 		 */
1941 		pmcsr = pci_config_get16(pwr_p->pwr_conf_hdl,
1942 		    pwr_p->pwr_pmcsr_offset);
1943 		if ((pmcsr & PCI_PMCSR_STATE_MASK) == PCI_PMCSR_D0)
1944 			ret = DDI_SUCCESS;
1945 		else {
1946 			PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_setup: could not "
1947 			    "raise power to D0 \n");
1948 		}
1949 	}
1950 	if (ret == DDI_SUCCESS)
1951 		pwr_p->pwr_func_lvl = PM_LEVEL_D0;
1952 	return (ret);
1953 }
1954 
1955 /*
1956  * Disable PM for x86 and PLX 8532 switch.
1957  * For PLX Transitioning one port on this switch to low power causes links
1958  * on other ports on the same station to die. Due to PLX erratum #34, we
1959  * can't allow the downstream device go to non-D0 state.
1960  */
1961 static int
1962 pcieb_pwr_disable(dev_info_t *dip)
1963 {
1964 	pcie_pwr_t *pwr_p;
1965 
1966 	ASSERT(PCIE_PMINFO(dip));
1967 	pwr_p = PCIE_NEXUS_PMINFO(dip);
1968 	ASSERT(pwr_p);
1969 	PCIEB_DEBUG(DBG_PWR, dip, "pcieb_pwr_disable: disabling PM\n");
1970 	pwr_p->pwr_func_lvl = PM_LEVEL_D0;
1971 	pwr_p->pwr_flags = PCIE_NO_CHILD_PM;
1972 	return (DDI_SUCCESS);
1973 }
1974 
1975 #ifdef DEBUG
1976 int pcieb_dbg_intr_print = 0;
1977 void
1978 pcieb_dbg(uint_t bit, dev_info_t *dip, char *fmt, ...)
1979 {
1980 	va_list ap;
1981 
1982 	if (!pcieb_dbg_print)
1983 		return;
1984 
1985 	if (dip)
1986 		prom_printf("%s(%d): %s", ddi_driver_name(dip),
1987 		    ddi_get_instance(dip), pcieb_debug_sym[bit]);
1988 
1989 	va_start(ap, fmt);
1990 	if (servicing_interrupt()) {
1991 		if (pcieb_dbg_intr_print)
1992 			prom_vprintf(fmt, ap);
1993 	} else {
1994 		prom_vprintf(fmt, ap);
1995 	}
1996 
1997 	va_end(ap);
1998 }
1999 #endif
2000 
2001 static void
2002 pcieb_id_props(pcieb_devstate_t *pcieb)
2003 {
2004 	uint64_t serialid = 0;	/* 40b field of EUI-64 serial no. register */
2005 	uint16_t cap_ptr;
2006 	uint8_t fic = 0;	/* 1 = first in chassis device */
2007 	pcie_bus_t *bus_p = PCIE_DIP2BUS(pcieb->pcieb_dip);
2008 	ddi_acc_handle_t config_handle = bus_p->bus_cfg_hdl;
2009 
2010 	/*
2011 	 * Identify first in chassis.  In the special case of a Sun branded
2012 	 * PLX device, it obviously is first in chassis.  Otherwise, in the
2013 	 * general case, look for an Expansion Slot Register and check its
2014 	 * first-in-chassis bit.
2015 	 */
2016 #ifdef	PX_PLX
2017 	uint16_t vendor_id = bus_p->bus_dev_ven_id & 0xFFFF;
2018 	uint16_t device_id = bus_p->bus_dev_ven_id >> 16;
2019 	if ((vendor_id == PXB_VENDOR_SUN) &&
2020 	    ((device_id == PXB_DEVICE_PLX_PCIX) ||
2021 	    (device_id == PXB_DEVICE_PLX_PCIE))) {
2022 		fic = 1;
2023 	}
2024 #endif	/* PX_PLX */
2025 	if ((fic == 0) && ((PCI_CAP_LOCATE(config_handle,
2026 	    PCI_CAP_ID_SLOT_ID, &cap_ptr)) != DDI_FAILURE)) {
2027 		uint8_t esr = PCI_CAP_GET8(config_handle, 0,
2028 		    cap_ptr, PCI_CAP_ID_REGS_OFF);
2029 		if (PCI_CAPSLOT_FIC(esr))
2030 			fic = 1;
2031 	}
2032 
2033 	if ((PCI_CAP_LOCATE(config_handle,
2034 	    PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_SER), &cap_ptr)) != DDI_FAILURE) {
2035 		/* Serialid can be 0 thru a full 40b number */
2036 		serialid = PCI_XCAP_GET32(config_handle, 0,
2037 		    cap_ptr, PCIE_SER_SID_UPPER_DW);
2038 		serialid <<= 32;
2039 		serialid |= PCI_XCAP_GET32(config_handle, 0,
2040 		    cap_ptr, PCIE_SER_SID_LOWER_DW);
2041 	}
2042 
2043 	if (fic)
2044 		(void) ndi_prop_create_boolean(DDI_DEV_T_NONE, pcieb->pcieb_dip,
2045 		    "first-in-chassis");
2046 	if (serialid)
2047 		(void) ddi_prop_update_int64(DDI_DEV_T_NONE, pcieb->pcieb_dip,
2048 		    "serialid#", serialid);
2049 }
2050 
2051 static void
2052 pcieb_create_ranges_prop(dev_info_t *dip,
2053     ddi_acc_handle_t config_handle)
2054 {
2055 	uint32_t base, limit;
2056 	ppb_ranges_t	ranges[PCIEB_RANGE_LEN];
2057 	uint8_t io_base_lo, io_limit_lo;
2058 	uint16_t io_base_hi, io_limit_hi, mem_base, mem_limit;
2059 	int i = 0, rangelen = sizeof (ppb_ranges_t)/sizeof (int);
2060 
2061 	io_base_lo = pci_config_get8(config_handle, PCI_BCNF_IO_BASE_LOW);
2062 	io_limit_lo = pci_config_get8(config_handle, PCI_BCNF_IO_LIMIT_LOW);
2063 	io_base_hi = pci_config_get16(config_handle, PCI_BCNF_IO_BASE_HI);
2064 	io_limit_hi = pci_config_get16(config_handle, PCI_BCNF_IO_LIMIT_HI);
2065 	mem_base = pci_config_get16(config_handle, PCI_BCNF_MEM_BASE);
2066 	mem_limit = pci_config_get16(config_handle, PCI_BCNF_MEM_LIMIT);
2067 
2068 	/*
2069 	 * Create ranges for IO space
2070 	 */
2071 	ranges[i].size_low = ranges[i].size_high = 0;
2072 	ranges[i].parent_mid = ranges[i].child_mid = ranges[i].parent_high = 0;
2073 	ranges[i].child_high = ranges[i].parent_high |=
2074 	    (PCI_REG_REL_M | PCI_ADDR_IO);
2075 	base = PCIEB_16bit_IOADDR(io_base_lo);
2076 	limit = PCIEB_16bit_IOADDR(io_limit_lo);
2077 
2078 	if ((io_base_lo & 0xf) == PCIEB_32BIT_IO) {
2079 		base = PCIEB_LADDR(base, io_base_hi);
2080 	}
2081 	if ((io_limit_lo & 0xf) == PCIEB_32BIT_IO) {
2082 		limit = PCIEB_LADDR(limit, io_limit_hi);
2083 	}
2084 
2085 	if ((io_base_lo & PCIEB_32BIT_IO) && (io_limit_hi > 0)) {
2086 		base = PCIEB_LADDR(base, io_base_hi);
2087 		limit = PCIEB_LADDR(limit, io_limit_hi);
2088 	}
2089 
2090 	/*
2091 	 * Create ranges for 32bit memory space
2092 	 */
2093 	base = PCIEB_32bit_MEMADDR(mem_base);
2094 	limit = PCIEB_32bit_MEMADDR(mem_limit);
2095 	ranges[i].size_low = ranges[i].size_high = 0;
2096 	ranges[i].parent_mid = ranges[i].child_mid = ranges[i].parent_high = 0;
2097 	ranges[i].child_high = ranges[i].parent_high |=
2098 	    (PCI_REG_REL_M | PCI_ADDR_MEM32);
2099 	ranges[i].child_low = ranges[i].parent_low = base;
2100 	if (limit >= base) {
2101 		ranges[i].size_low = limit - base + PCIEB_MEMGRAIN;
2102 		i++;
2103 	}
2104 
2105 	if (i) {
2106 		(void) ndi_prop_update_int_array(DDI_DEV_T_NONE, dip, "ranges",
2107 		    (int *)ranges, i * rangelen);
2108 	}
2109 }
2110 
2111 /*
2112  * For PCI and PCI-X devices including PCIe2PCI bridge, initialize
2113  * cache-line-size and latency timer configuration registers.
2114  */
2115 void
2116 pcieb_set_pci_perf_parameters(dev_info_t *dip, ddi_acc_handle_t cfg_hdl)
2117 {
2118 	uint_t	n;
2119 
2120 	/* Initialize cache-line-size configuration register if needed */
2121 	if (ddi_getprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2122 	    "cache-line-size", 0) == 0) {
2123 		pci_config_put8(cfg_hdl, PCI_CONF_CACHE_LINESZ,
2124 		    PCIEB_CACHE_LINE_SIZE);
2125 		n = pci_config_get8(cfg_hdl, PCI_CONF_CACHE_LINESZ);
2126 		if (n != 0) {
2127 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
2128 			    "cache-line-size", n);
2129 		}
2130 	}
2131 
2132 	/* Initialize latency timer configuration registers if needed */
2133 	if (ddi_getprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2134 	    "latency-timer", 0) == 0) {
2135 		uchar_t	min_gnt, latency_timer;
2136 		uchar_t header_type;
2137 
2138 		/* Determine the configuration header type */
2139 		header_type = pci_config_get8(cfg_hdl, PCI_CONF_HEADER);
2140 
2141 		if ((header_type & PCI_HEADER_TYPE_M) == PCI_HEADER_ONE) {
2142 			latency_timer = PCIEB_LATENCY_TIMER;
2143 			pci_config_put8(cfg_hdl, PCI_BCNF_LATENCY_TIMER,
2144 			    latency_timer);
2145 		} else {
2146 			min_gnt = pci_config_get8(cfg_hdl, PCI_CONF_MIN_G);
2147 			latency_timer = min_gnt * 8;
2148 		}
2149 
2150 		pci_config_put8(cfg_hdl, PCI_CONF_LATENCY_TIMER,
2151 		    latency_timer);
2152 		n = pci_config_get8(cfg_hdl, PCI_CONF_LATENCY_TIMER);
2153 		if (n != 0) {
2154 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
2155 			    "latency-timer", n);
2156 		}
2157 	}
2158 }
2159