xref: /titanic_52/usr/src/uts/sun4u/io/px/px_lib4u.c (revision f936286c99fb83153e4bfd870eb2830a990a82c1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/kmem.h>
27 #include <sys/conf.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/sunndi.h>
31 #include <sys/fm/protocol.h>
32 #include <sys/fm/util.h>
33 #include <sys/modctl.h>
34 #include <sys/disp.h>
35 #include <sys/stat.h>
36 #include <sys/ddi_impldefs.h>
37 #include <sys/vmem.h>
38 #include <sys/iommutsb.h>
39 #include <sys/cpuvar.h>
40 #include <sys/ivintr.h>
41 #include <sys/byteorder.h>
42 #include <sys/spl.h>
43 #include <px_obj.h>
44 #include <sys/pcie_pwr.h>
45 #include "px_tools_var.h"
46 #include <px_regs.h>
47 #include <px_csr.h>
48 #include <sys/machsystm.h>
49 #include "px_lib4u.h"
50 #include "px_err.h"
51 #include "oberon_regs.h"
52 #include <sys/hotplug/pci/pcie_hp.h>
53 
54 #pragma weak jbus_stst_order
55 
56 extern void jbus_stst_order();
57 
58 ulong_t px_mmu_dvma_end = 0xfffffffful;
59 uint_t px_ranges_phi_mask = 0xfffffffful;
60 uint64_t *px_oberon_ubc_scratch_regs;
61 uint64_t px_paddr_mask;
62 
63 static int px_goto_l23ready(px_t *px_p);
64 static int px_goto_l0(px_t *px_p);
65 static int px_pre_pwron_check(px_t *px_p);
66 static uint32_t px_identity_init(px_t *px_p);
67 static boolean_t px_cpr_callb(void *arg, int code);
68 static uint_t px_cb_intr(caddr_t arg);
69 
70 /*
71  * ACKNAK Latency Threshold Table.
72  * See Fire PRM 2.0 section 1.2.12.2, table 1-17.
73  */
74 int px_acknak_timer_table[LINK_MAX_PKT_ARR_SIZE][LINK_WIDTH_ARR_SIZE] = {
75 	{0xED,   0x49,  0x43,  0x30},
76 	{0x1A0,  0x76,  0x6B,  0x48},
77 	{0x22F,  0x9A,  0x56,  0x56},
78 	{0x42F,  0x11A, 0x96,  0x96},
79 	{0x82F,  0x21A, 0x116, 0x116},
80 	{0x102F, 0x41A, 0x216, 0x216}
81 };
82 
83 /*
84  * TxLink Replay Timer Latency Table
85  * See Fire PRM 2.0 sections 1.2.12.3, table 1-18.
86  */
87 int px_replay_timer_table[LINK_MAX_PKT_ARR_SIZE][LINK_WIDTH_ARR_SIZE] = {
88 	{0x379,  0x112, 0xFC,  0xB4},
89 	{0x618,  0x1BA, 0x192, 0x10E},
90 	{0x831,  0x242, 0x143, 0x143},
91 	{0xFB1,  0x422, 0x233, 0x233},
92 	{0x1EB0, 0x7E1, 0x412, 0x412},
93 	{0x3CB0, 0xF61, 0x7D2, 0x7D2}
94 };
95 /*
96  * px_lib_map_registers
97  *
98  * This function is called from the attach routine to map the registers
99  * accessed by this driver.
100  *
101  * used by: px_attach()
102  *
103  * return value: DDI_FAILURE on failure
104  */
105 int
106 px_lib_map_regs(pxu_t *pxu_p, dev_info_t *dip)
107 {
108 	ddi_device_acc_attr_t	attr;
109 	px_reg_bank_t		reg_bank = PX_REG_CSR;
110 
111 	DBG(DBG_ATTACH, dip, "px_lib_map_regs: pxu_p:0x%p, dip 0x%p\n",
112 	    pxu_p, dip);
113 
114 	attr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
115 	attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
116 	attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
117 
118 	/*
119 	 * PCI CSR Base
120 	 */
121 	if (ddi_regs_map_setup(dip, reg_bank, &pxu_p->px_address[reg_bank],
122 	    0, 0, &attr, &pxu_p->px_ac[reg_bank]) != DDI_SUCCESS) {
123 		goto fail;
124 	}
125 
126 	reg_bank++;
127 
128 	/*
129 	 * XBUS CSR Base
130 	 */
131 	if (ddi_regs_map_setup(dip, reg_bank, &pxu_p->px_address[reg_bank],
132 	    0, 0, &attr, &pxu_p->px_ac[reg_bank]) != DDI_SUCCESS) {
133 		goto fail;
134 	}
135 
136 	pxu_p->px_address[reg_bank] -= FIRE_CONTROL_STATUS;
137 
138 done:
139 	for (; reg_bank >= PX_REG_CSR; reg_bank--) {
140 		DBG(DBG_ATTACH, dip, "reg_bank 0x%x address 0x%p\n",
141 		    reg_bank, pxu_p->px_address[reg_bank]);
142 	}
143 
144 	return (DDI_SUCCESS);
145 
146 fail:
147 	cmn_err(CE_WARN, "%s%d: unable to map reg entry %d\n",
148 	    ddi_driver_name(dip), ddi_get_instance(dip), reg_bank);
149 
150 	for (reg_bank--; reg_bank >= PX_REG_CSR; reg_bank--) {
151 		pxu_p->px_address[reg_bank] = NULL;
152 		ddi_regs_map_free(&pxu_p->px_ac[reg_bank]);
153 	}
154 
155 	return (DDI_FAILURE);
156 }
157 
158 /*
159  * px_lib_unmap_regs:
160  *
161  * This routine unmaps the registers mapped by map_px_registers.
162  *
163  * used by: px_detach(), and error conditions in px_attach()
164  *
165  * return value: none
166  */
167 void
168 px_lib_unmap_regs(pxu_t *pxu_p)
169 {
170 	int i;
171 
172 	for (i = 0; i < PX_REG_MAX; i++) {
173 		if (pxu_p->px_ac[i])
174 			ddi_regs_map_free(&pxu_p->px_ac[i]);
175 	}
176 }
177 
178 int
179 px_lib_dev_init(dev_info_t *dip, devhandle_t *dev_hdl)
180 {
181 
182 	caddr_t			xbc_csr_base, csr_base;
183 	px_dvma_range_prop_t	px_dvma_range;
184 	pxu_t			*pxu_p;
185 	uint8_t			chip_mask;
186 	px_t			*px_p = DIP_TO_STATE(dip);
187 	px_chip_type_t		chip_type = px_identity_init(px_p);
188 
189 	DBG(DBG_ATTACH, dip, "px_lib_dev_init: dip 0x%p", dip);
190 
191 	if (chip_type == PX_CHIP_UNIDENTIFIED) {
192 		cmn_err(CE_WARN, "%s%d: Unrecognized Hardware Version\n",
193 		    NAMEINST(dip));
194 		return (DDI_FAILURE);
195 	}
196 
197 	chip_mask = BITMASK(chip_type);
198 	px_paddr_mask = (chip_type == PX_CHIP_FIRE) ? MMU_FIRE_PADDR_MASK :
199 	    MMU_OBERON_PADDR_MASK;
200 
201 	/*
202 	 * Allocate platform specific structure and link it to
203 	 * the px state structure.
204 	 */
205 	pxu_p = kmem_zalloc(sizeof (pxu_t), KM_SLEEP);
206 	pxu_p->chip_type = chip_type;
207 	pxu_p->portid  = ddi_getprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
208 	    "portid", -1);
209 
210 	/* Map in the registers */
211 	if (px_lib_map_regs(pxu_p, dip) == DDI_FAILURE) {
212 		kmem_free(pxu_p, sizeof (pxu_t));
213 
214 		return (DDI_FAILURE);
215 	}
216 
217 	xbc_csr_base = (caddr_t)pxu_p->px_address[PX_REG_XBC];
218 	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
219 
220 	pxu_p->tsb_cookie = iommu_tsb_alloc(pxu_p->portid);
221 	pxu_p->tsb_size = iommu_tsb_cookie_to_size(pxu_p->tsb_cookie);
222 	pxu_p->tsb_vaddr = iommu_tsb_cookie_to_va(pxu_p->tsb_cookie);
223 
224 	pxu_p->tsb_paddr = va_to_pa(pxu_p->tsb_vaddr);
225 
226 	/*
227 	 * Create "virtual-dma" property to support child devices
228 	 * needing to know DVMA range.
229 	 */
230 	px_dvma_range.dvma_base = (uint32_t)px_mmu_dvma_end + 1
231 	    - ((pxu_p->tsb_size >> 3) << MMU_PAGE_SHIFT);
232 	px_dvma_range.dvma_len = (uint32_t)
233 	    px_mmu_dvma_end - px_dvma_range.dvma_base + 1;
234 
235 	(void) ddi_prop_update_int_array(DDI_DEV_T_NONE, dip,
236 	    "virtual-dma", (int *)&px_dvma_range,
237 	    sizeof (px_dvma_range_prop_t) / sizeof (int));
238 	/*
239 	 * Initilize all fire hardware specific blocks.
240 	 */
241 	hvio_cb_init(xbc_csr_base, pxu_p);
242 	hvio_ib_init(csr_base, pxu_p);
243 	hvio_pec_init(csr_base, pxu_p);
244 	hvio_mmu_init(csr_base, pxu_p);
245 
246 	px_p->px_plat_p = (void *)pxu_p;
247 
248 	/*
249 	 * Initialize all the interrupt handlers
250 	 */
251 	switch (PX_CHIP_TYPE(pxu_p)) {
252 	case PX_CHIP_OBERON:
253 		/*
254 		 * Oberon hotplug uses SPARE3 field in ILU Error Log Enable
255 		 * register to indicate the status of leaf reset,
256 		 * we need to preserve the value of this bit, and keep it in
257 		 * px_ilu_log_mask to reflect the state of the bit
258 		 */
259 		if (CSR_BR(csr_base, ILU_ERROR_LOG_ENABLE, SPARE3))
260 			px_ilu_log_mask |= (1ull <<
261 			    ILU_ERROR_LOG_ENABLE_SPARE3);
262 		else
263 			px_ilu_log_mask &= ~(1ull <<
264 			    ILU_ERROR_LOG_ENABLE_SPARE3);
265 
266 		px_err_reg_setup_pcie(chip_mask, csr_base, PX_ERR_ENABLE);
267 		break;
268 
269 	case PX_CHIP_FIRE:
270 		px_err_reg_setup_pcie(chip_mask, csr_base, PX_ERR_ENABLE);
271 		break;
272 
273 	default:
274 		cmn_err(CE_WARN, "%s%d: PX primary bus Unknown\n",
275 		    ddi_driver_name(dip), ddi_get_instance(dip));
276 		return (DDI_FAILURE);
277 	}
278 
279 	/* Initilize device handle */
280 	*dev_hdl = (devhandle_t)csr_base;
281 
282 	DBG(DBG_ATTACH, dip, "px_lib_dev_init: dev_hdl 0x%llx\n", *dev_hdl);
283 
284 	/* Sun4u always support fixed interrupt */
285 	px_p->px_supp_intr_types |= DDI_INTR_TYPE_FIXED;
286 
287 	return (DDI_SUCCESS);
288 }
289 
290 int
291 px_lib_dev_fini(dev_info_t *dip)
292 {
293 	caddr_t			csr_base;
294 	uint8_t			chip_mask;
295 	px_t			*px_p = DIP_TO_STATE(dip);
296 	pxu_t			*pxu_p = (pxu_t *)px_p->px_plat_p;
297 
298 	DBG(DBG_DETACH, dip, "px_lib_dev_fini: dip 0x%p\n", dip);
299 
300 	/*
301 	 * Deinitialize all the interrupt handlers
302 	 */
303 	switch (PX_CHIP_TYPE(pxu_p)) {
304 	case PX_CHIP_OBERON:
305 	case PX_CHIP_FIRE:
306 		chip_mask = BITMASK(PX_CHIP_TYPE(pxu_p));
307 		csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
308 		px_err_reg_setup_pcie(chip_mask, csr_base, PX_ERR_DISABLE);
309 		break;
310 
311 	default:
312 		cmn_err(CE_WARN, "%s%d: PX primary bus Unknown\n",
313 		    ddi_driver_name(dip), ddi_get_instance(dip));
314 		return (DDI_FAILURE);
315 	}
316 
317 	iommu_tsb_free(pxu_p->tsb_cookie);
318 
319 	px_lib_unmap_regs((pxu_t *)px_p->px_plat_p);
320 	kmem_free(px_p->px_plat_p, sizeof (pxu_t));
321 	px_p->px_plat_p = NULL;
322 	(void) ddi_prop_remove(DDI_DEV_T_NONE, dip, "virtual-dma");
323 
324 	return (DDI_SUCCESS);
325 }
326 
327 /*ARGSUSED*/
328 int
329 px_lib_intr_devino_to_sysino(dev_info_t *dip, devino_t devino,
330     sysino_t *sysino)
331 {
332 	px_t	*px_p = DIP_TO_STATE(dip);
333 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
334 	uint64_t	ret;
335 
336 	DBG(DBG_LIB_INT, dip, "px_lib_intr_devino_to_sysino: dip 0x%p "
337 	    "devino 0x%x\n", dip, devino);
338 
339 	if ((ret = hvio_intr_devino_to_sysino(DIP_TO_HANDLE(dip),
340 	    pxu_p, devino, sysino)) != H_EOK) {
341 		DBG(DBG_LIB_INT, dip,
342 		    "hvio_intr_devino_to_sysino failed, ret 0x%lx\n", ret);
343 		return (DDI_FAILURE);
344 	}
345 
346 	DBG(DBG_LIB_INT, dip, "px_lib_intr_devino_to_sysino: sysino 0x%llx\n",
347 	    *sysino);
348 
349 	return (DDI_SUCCESS);
350 }
351 
352 /*ARGSUSED*/
353 int
354 px_lib_intr_getvalid(dev_info_t *dip, sysino_t sysino,
355     intr_valid_state_t *intr_valid_state)
356 {
357 	uint64_t	ret;
358 
359 	DBG(DBG_LIB_INT, dip, "px_lib_intr_getvalid: dip 0x%p sysino 0x%llx\n",
360 	    dip, sysino);
361 
362 	if ((ret = hvio_intr_getvalid(DIP_TO_HANDLE(dip),
363 	    sysino, intr_valid_state)) != H_EOK) {
364 		DBG(DBG_LIB_INT, dip, "hvio_intr_getvalid failed, ret 0x%lx\n",
365 		    ret);
366 		return (DDI_FAILURE);
367 	}
368 
369 	DBG(DBG_LIB_INT, dip, "px_lib_intr_getvalid: intr_valid_state 0x%x\n",
370 	    *intr_valid_state);
371 
372 	return (DDI_SUCCESS);
373 }
374 
375 /*ARGSUSED*/
376 int
377 px_lib_intr_setvalid(dev_info_t *dip, sysino_t sysino,
378     intr_valid_state_t intr_valid_state)
379 {
380 	uint64_t	ret;
381 
382 	DBG(DBG_LIB_INT, dip, "px_lib_intr_setvalid: dip 0x%p sysino 0x%llx "
383 	    "intr_valid_state 0x%x\n", dip, sysino, intr_valid_state);
384 
385 	if ((ret = hvio_intr_setvalid(DIP_TO_HANDLE(dip),
386 	    sysino, intr_valid_state)) != H_EOK) {
387 		DBG(DBG_LIB_INT, dip, "hvio_intr_setvalid failed, ret 0x%lx\n",
388 		    ret);
389 		return (DDI_FAILURE);
390 	}
391 
392 	return (DDI_SUCCESS);
393 }
394 
395 /*ARGSUSED*/
396 int
397 px_lib_intr_getstate(dev_info_t *dip, sysino_t sysino,
398     intr_state_t *intr_state)
399 {
400 	uint64_t	ret;
401 
402 	DBG(DBG_LIB_INT, dip, "px_lib_intr_getstate: dip 0x%p sysino 0x%llx\n",
403 	    dip, sysino);
404 
405 	if ((ret = hvio_intr_getstate(DIP_TO_HANDLE(dip),
406 	    sysino, intr_state)) != H_EOK) {
407 		DBG(DBG_LIB_INT, dip, "hvio_intr_getstate failed, ret 0x%lx\n",
408 		    ret);
409 		return (DDI_FAILURE);
410 	}
411 
412 	DBG(DBG_LIB_INT, dip, "px_lib_intr_getstate: intr_state 0x%x\n",
413 	    *intr_state);
414 
415 	return (DDI_SUCCESS);
416 }
417 
418 /*ARGSUSED*/
419 int
420 px_lib_intr_setstate(dev_info_t *dip, sysino_t sysino,
421     intr_state_t intr_state)
422 {
423 	uint64_t	ret;
424 
425 	DBG(DBG_LIB_INT, dip, "px_lib_intr_setstate: dip 0x%p sysino 0x%llx "
426 	    "intr_state 0x%x\n", dip, sysino, intr_state);
427 
428 	if ((ret = hvio_intr_setstate(DIP_TO_HANDLE(dip),
429 	    sysino, intr_state)) != H_EOK) {
430 		DBG(DBG_LIB_INT, dip, "hvio_intr_setstate failed, ret 0x%lx\n",
431 		    ret);
432 		return (DDI_FAILURE);
433 	}
434 
435 	return (DDI_SUCCESS);
436 }
437 
438 /*ARGSUSED*/
439 int
440 px_lib_intr_gettarget(dev_info_t *dip, sysino_t sysino, cpuid_t *cpuid)
441 {
442 	px_t		*px_p = DIP_TO_STATE(dip);
443 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
444 	uint64_t	ret;
445 
446 	DBG(DBG_LIB_INT, dip, "px_lib_intr_gettarget: dip 0x%p sysino 0x%llx\n",
447 	    dip, sysino);
448 
449 	if ((ret = hvio_intr_gettarget(DIP_TO_HANDLE(dip), pxu_p,
450 	    sysino, cpuid)) != H_EOK) {
451 		DBG(DBG_LIB_INT, dip, "hvio_intr_gettarget failed, ret 0x%lx\n",
452 		    ret);
453 		return (DDI_FAILURE);
454 	}
455 
456 	DBG(DBG_LIB_INT, dip, "px_lib_intr_gettarget: cpuid 0x%x\n", cpuid);
457 
458 	return (DDI_SUCCESS);
459 }
460 
461 /*ARGSUSED*/
462 int
463 px_lib_intr_settarget(dev_info_t *dip, sysino_t sysino, cpuid_t cpuid)
464 {
465 	px_t		*px_p = DIP_TO_STATE(dip);
466 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
467 	uint64_t	ret;
468 
469 	DBG(DBG_LIB_INT, dip, "px_lib_intr_settarget: dip 0x%p sysino 0x%llx "
470 	    "cpuid 0x%x\n", dip, sysino, cpuid);
471 
472 	if ((ret = hvio_intr_settarget(DIP_TO_HANDLE(dip), pxu_p,
473 	    sysino, cpuid)) != H_EOK) {
474 		DBG(DBG_LIB_INT, dip, "hvio_intr_settarget failed, ret 0x%lx\n",
475 		    ret);
476 		return (DDI_FAILURE);
477 	}
478 
479 	return (DDI_SUCCESS);
480 }
481 
482 /*ARGSUSED*/
483 int
484 px_lib_intr_reset(dev_info_t *dip)
485 {
486 	devino_t	ino;
487 	sysino_t	sysino;
488 
489 	DBG(DBG_LIB_INT, dip, "px_lib_intr_reset: dip 0x%p\n", dip);
490 
491 	/* Reset all Interrupts */
492 	for (ino = 0; ino < INTERRUPT_MAPPING_ENTRIES; ino++) {
493 		if (px_lib_intr_devino_to_sysino(dip, ino,
494 		    &sysino) != DDI_SUCCESS)
495 			return (BF_FATAL);
496 
497 		if (px_lib_intr_setstate(dip, sysino,
498 		    INTR_IDLE_STATE) != DDI_SUCCESS)
499 			return (BF_FATAL);
500 	}
501 
502 	return (BF_NONE);
503 }
504 
505 /*ARGSUSED*/
506 int
507 px_lib_iommu_map(dev_info_t *dip, tsbid_t tsbid, pages_t pages,
508     io_attributes_t attr, void *addr, size_t pfn_index, int flags)
509 {
510 	px_t		*px_p = DIP_TO_STATE(dip);
511 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
512 	uint64_t	ret;
513 
514 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_map: dip 0x%p tsbid 0x%llx "
515 	    "pages 0x%x attr 0x%llx addr 0x%p pfn_index 0x%llx flags 0x%x\n",
516 	    dip, tsbid, pages, attr, addr, pfn_index, flags);
517 
518 	if ((ret = hvio_iommu_map(px_p->px_dev_hdl, pxu_p, tsbid, pages,
519 	    attr, addr, pfn_index, flags)) != H_EOK) {
520 		DBG(DBG_LIB_DMA, dip,
521 		    "px_lib_iommu_map failed, ret 0x%lx\n", ret);
522 		return (DDI_FAILURE);
523 	}
524 
525 	return (DDI_SUCCESS);
526 }
527 
528 /*ARGSUSED*/
529 int
530 px_lib_iommu_demap(dev_info_t *dip, tsbid_t tsbid, pages_t pages)
531 {
532 	px_t		*px_p = DIP_TO_STATE(dip);
533 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
534 	uint64_t	ret;
535 
536 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_demap: dip 0x%p tsbid 0x%llx "
537 	    "pages 0x%x\n", dip, tsbid, pages);
538 
539 	if ((ret = hvio_iommu_demap(px_p->px_dev_hdl, pxu_p, tsbid, pages))
540 	    != H_EOK) {
541 		DBG(DBG_LIB_DMA, dip,
542 		    "px_lib_iommu_demap failed, ret 0x%lx\n", ret);
543 
544 		return (DDI_FAILURE);
545 	}
546 
547 	return (DDI_SUCCESS);
548 }
549 
550 /*ARGSUSED*/
551 int
552 px_lib_iommu_getmap(dev_info_t *dip, tsbid_t tsbid, io_attributes_t *attr_p,
553     r_addr_t *r_addr_p)
554 {
555 	px_t	*px_p = DIP_TO_STATE(dip);
556 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
557 	uint64_t	ret;
558 
559 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_getmap: dip 0x%p tsbid 0x%llx\n",
560 	    dip, tsbid);
561 
562 	if ((ret = hvio_iommu_getmap(DIP_TO_HANDLE(dip), pxu_p, tsbid,
563 	    attr_p, r_addr_p)) != H_EOK) {
564 		DBG(DBG_LIB_DMA, dip,
565 		    "hvio_iommu_getmap failed, ret 0x%lx\n", ret);
566 
567 		return ((ret == H_ENOMAP) ? DDI_DMA_NOMAPPING:DDI_FAILURE);
568 	}
569 
570 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_getmap: attr 0x%llx "
571 	    "r_addr 0x%llx\n", *attr_p, *r_addr_p);
572 
573 	return (DDI_SUCCESS);
574 }
575 
576 int
577 px_lib_iommu_detach(px_t *px_p)
578 {
579 	/*
580 	 * Deallocate DVMA addr space that was reserved for OBP TTE's
581 	 * during Attach.
582 	 */
583 	hvio_obptsb_detach(px_p);
584 
585 	return (DDI_SUCCESS);
586 }
587 
588 /*
589  * Checks dma attributes against system bypass ranges
590  * The bypass range is determined by the hardware. Return them so the
591  * common code can do generic checking against them.
592  */
593 /*ARGSUSED*/
594 int
595 px_lib_dma_bypass_rngchk(dev_info_t *dip, ddi_dma_attr_t *attr_p,
596     uint64_t *lo_p, uint64_t *hi_p)
597 {
598 	px_t	*px_p = DIP_TO_STATE(dip);
599 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
600 
601 	*lo_p = hvio_get_bypass_base(pxu_p);
602 	*hi_p = hvio_get_bypass_end(pxu_p);
603 
604 	return (DDI_SUCCESS);
605 }
606 
607 
608 /*ARGSUSED*/
609 int
610 px_lib_iommu_getbypass(dev_info_t *dip, r_addr_t ra, io_attributes_t attr,
611     io_addr_t *io_addr_p)
612 {
613 	uint64_t	ret;
614 	px_t	*px_p = DIP_TO_STATE(dip);
615 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
616 
617 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_getbypass: dip 0x%p ra 0x%llx "
618 	    "attr 0x%llx\n", dip, ra, attr);
619 
620 	if ((ret = hvio_iommu_getbypass(DIP_TO_HANDLE(dip), pxu_p, ra,
621 	    attr, io_addr_p)) != H_EOK) {
622 		DBG(DBG_LIB_DMA, dip,
623 		    "hvio_iommu_getbypass failed, ret 0x%lx\n", ret);
624 		return (DDI_FAILURE);
625 	}
626 
627 	DBG(DBG_LIB_DMA, dip, "px_lib_iommu_getbypass: io_addr 0x%llx\n",
628 	    *io_addr_p);
629 
630 	return (DDI_SUCCESS);
631 }
632 
633 /*
634  * Returns any needed IO address bit(s) for relaxed ordering in IOMMU
635  * bypass mode.
636  */
637 uint64_t
638 px_lib_ro_bypass(dev_info_t *dip, io_attributes_t attr, uint64_t ioaddr)
639 {
640 	px_t	*px_p = DIP_TO_STATE(dip);
641 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
642 
643 	if ((PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON) && (attr & PCI_MAP_ATTR_RO))
644 		return (MMU_OBERON_BYPASS_RO | ioaddr);
645 	else
646 		return (ioaddr);
647 }
648 
649 /*
650  * bus dma sync entry point.
651  */
652 /*ARGSUSED*/
653 int
654 px_lib_dma_sync(dev_info_t *dip, dev_info_t *rdip, ddi_dma_handle_t handle,
655     off_t off, size_t len, uint_t cache_flags)
656 {
657 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle;
658 	px_t	*px_p = DIP_TO_STATE(dip);
659 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
660 
661 	DBG(DBG_LIB_DMA, dip, "px_lib_dma_sync: dip 0x%p rdip 0x%p "
662 	    "handle 0x%llx off 0x%x len 0x%x flags 0x%x\n",
663 	    dip, rdip, handle, off, len, cache_flags);
664 
665 	/*
666 	 * No flush needed for Oberon
667 	 */
668 	if (PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON)
669 		return (DDI_SUCCESS);
670 
671 	/*
672 	 * jbus_stst_order is found only in certain cpu modules.
673 	 * Just return success if not present.
674 	 */
675 	if (&jbus_stst_order == NULL)
676 		return (DDI_SUCCESS);
677 
678 	if (!(mp->dmai_flags & PX_DMAI_FLAGS_INUSE)) {
679 		cmn_err(CE_WARN, "%s%d: Unbound dma handle %p.",
680 		    ddi_driver_name(rdip), ddi_get_instance(rdip), (void *)mp);
681 
682 		return (DDI_FAILURE);
683 	}
684 
685 	if (mp->dmai_flags & PX_DMAI_FLAGS_NOSYNC)
686 		return (DDI_SUCCESS);
687 
688 	/*
689 	 * No flush needed when sending data from memory to device.
690 	 * Nothing to do to "sync" memory to what device would already see.
691 	 */
692 	if (!(mp->dmai_rflags & DDI_DMA_READ) ||
693 	    ((cache_flags & PX_DMA_SYNC_DDI_FLAGS) == DDI_DMA_SYNC_FORDEV))
694 		return (DDI_SUCCESS);
695 
696 	/*
697 	 * Perform necessary cpu workaround to ensure jbus ordering.
698 	 * CPU's internal "invalidate FIFOs" are flushed.
699 	 */
700 
701 #if !defined(lint)
702 	kpreempt_disable();
703 #endif
704 	jbus_stst_order();
705 #if !defined(lint)
706 	kpreempt_enable();
707 #endif
708 	return (DDI_SUCCESS);
709 }
710 
711 /*
712  * MSIQ Functions:
713  */
714 /*ARGSUSED*/
715 int
716 px_lib_msiq_init(dev_info_t *dip)
717 {
718 	px_t		*px_p = DIP_TO_STATE(dip);
719 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
720 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
721 	px_dvma_addr_t	pg_index;
722 	size_t		q_sz = msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t);
723 	size_t		size;
724 	int		i, ret;
725 
726 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_init: dip 0x%p\n", dip);
727 
728 	/* must aligned on q_sz (happens to be !!! page) boundary */
729 	ASSERT(q_sz == 8 * 1024);
730 
731 	/*
732 	 * Map the EQ memory into the Fire MMU (has to be 512KB aligned)
733 	 * and then initialize the base address register.
734 	 *
735 	 * Allocate entries from Fire IOMMU so that the resulting address
736 	 * is properly aligned.  Calculate the index of the first allocated
737 	 * entry.  Note: The size of the mapping is assumed to be a multiple
738 	 * of the page size.
739 	 */
740 	size = msiq_state_p->msiq_cnt * q_sz;
741 
742 	msiq_state_p->msiq_buf_p = kmem_zalloc(size, KM_SLEEP);
743 
744 	for (i = 0; i < msiq_state_p->msiq_cnt; i++)
745 		msiq_state_p->msiq_p[i].msiq_base_p = (msiqhead_t *)
746 		    ((caddr_t)msiq_state_p->msiq_buf_p + (i * q_sz));
747 
748 	pxu_p->msiq_mapped_p = vmem_xalloc(px_p->px_mmu_p->mmu_dvma_map,
749 	    size, (512 * 1024), 0, 0, NULL, NULL, VM_NOSLEEP | VM_BESTFIT);
750 
751 	if (pxu_p->msiq_mapped_p == NULL)
752 		return (DDI_FAILURE);
753 
754 	pg_index = MMU_PAGE_INDEX(px_p->px_mmu_p,
755 	    MMU_BTOP((ulong_t)pxu_p->msiq_mapped_p));
756 
757 	if ((ret = px_lib_iommu_map(px_p->px_dip, PCI_TSBID(0, pg_index),
758 	    MMU_BTOP(size), PCI_MAP_ATTR_WRITE, msiq_state_p->msiq_buf_p,
759 	    0, MMU_MAP_BUF)) != DDI_SUCCESS) {
760 		DBG(DBG_LIB_MSIQ, dip,
761 		    "px_lib_msiq_init: px_lib_iommu_map failed, "
762 		    "ret 0x%lx\n", ret);
763 
764 		(void) px_lib_msiq_fini(dip);
765 		return (DDI_FAILURE);
766 	}
767 
768 	if ((ret = hvio_msiq_init(DIP_TO_HANDLE(dip),
769 	    pxu_p)) != H_EOK) {
770 		DBG(DBG_LIB_MSIQ, dip,
771 		    "hvio_msiq_init failed, ret 0x%lx\n", ret);
772 
773 		(void) px_lib_msiq_fini(dip);
774 		return (DDI_FAILURE);
775 	}
776 
777 	return (DDI_SUCCESS);
778 }
779 
780 /*ARGSUSED*/
781 int
782 px_lib_msiq_fini(dev_info_t *dip)
783 {
784 	px_t		*px_p = DIP_TO_STATE(dip);
785 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
786 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
787 	px_dvma_addr_t	pg_index;
788 	size_t		size;
789 
790 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_fini: dip 0x%p\n", dip);
791 
792 	/*
793 	 * Unmap and free the EQ memory that had been mapped
794 	 * into the Fire IOMMU.
795 	 */
796 	size = msiq_state_p->msiq_cnt *
797 	    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t);
798 
799 	pg_index = MMU_PAGE_INDEX(px_p->px_mmu_p,
800 	    MMU_BTOP((ulong_t)pxu_p->msiq_mapped_p));
801 
802 	(void) px_lib_iommu_demap(px_p->px_dip,
803 	    PCI_TSBID(0, pg_index), MMU_BTOP(size));
804 
805 	/* Free the entries from the Fire MMU */
806 	vmem_xfree(px_p->px_mmu_p->mmu_dvma_map,
807 	    (void *)pxu_p->msiq_mapped_p, size);
808 
809 	kmem_free(msiq_state_p->msiq_buf_p, msiq_state_p->msiq_cnt *
810 	    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t));
811 
812 	return (DDI_SUCCESS);
813 }
814 
815 /*ARGSUSED*/
816 int
817 px_lib_msiq_info(dev_info_t *dip, msiqid_t msiq_id, r_addr_t *ra_p,
818     uint_t *msiq_rec_cnt_p)
819 {
820 	px_t		*px_p = DIP_TO_STATE(dip);
821 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
822 	size_t		msiq_size;
823 
824 	DBG(DBG_LIB_MSIQ, dip, "px_msiq_info: dip 0x%p msiq_id 0x%x\n",
825 	    dip, msiq_id);
826 
827 	msiq_size = msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t);
828 	ra_p = (r_addr_t *)((caddr_t)msiq_state_p->msiq_buf_p +
829 	    (msiq_id * msiq_size));
830 
831 	*msiq_rec_cnt_p = msiq_state_p->msiq_rec_cnt;
832 
833 	DBG(DBG_LIB_MSIQ, dip, "px_msiq_info: ra_p 0x%p msiq_rec_cnt 0x%x\n",
834 	    ra_p, *msiq_rec_cnt_p);
835 
836 	return (DDI_SUCCESS);
837 }
838 
839 /*ARGSUSED*/
840 int
841 px_lib_msiq_getvalid(dev_info_t *dip, msiqid_t msiq_id,
842     pci_msiq_valid_state_t *msiq_valid_state)
843 {
844 	uint64_t	ret;
845 
846 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_getvalid: dip 0x%p msiq_id 0x%x\n",
847 	    dip, msiq_id);
848 
849 	if ((ret = hvio_msiq_getvalid(DIP_TO_HANDLE(dip),
850 	    msiq_id, msiq_valid_state)) != H_EOK) {
851 		DBG(DBG_LIB_MSIQ, dip,
852 		    "hvio_msiq_getvalid failed, ret 0x%lx\n", ret);
853 		return (DDI_FAILURE);
854 	}
855 
856 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_getvalid: msiq_valid_state 0x%x\n",
857 	    *msiq_valid_state);
858 
859 	return (DDI_SUCCESS);
860 }
861 
862 /*ARGSUSED*/
863 int
864 px_lib_msiq_setvalid(dev_info_t *dip, msiqid_t msiq_id,
865     pci_msiq_valid_state_t msiq_valid_state)
866 {
867 	uint64_t	ret;
868 
869 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_setvalid: dip 0x%p msiq_id 0x%x "
870 	    "msiq_valid_state 0x%x\n", dip, msiq_id, msiq_valid_state);
871 
872 	if ((ret = hvio_msiq_setvalid(DIP_TO_HANDLE(dip),
873 	    msiq_id, msiq_valid_state)) != H_EOK) {
874 		DBG(DBG_LIB_MSIQ, dip,
875 		    "hvio_msiq_setvalid failed, ret 0x%lx\n", ret);
876 		return (DDI_FAILURE);
877 	}
878 
879 	return (DDI_SUCCESS);
880 }
881 
882 /*ARGSUSED*/
883 int
884 px_lib_msiq_getstate(dev_info_t *dip, msiqid_t msiq_id,
885     pci_msiq_state_t *msiq_state)
886 {
887 	uint64_t	ret;
888 
889 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_getstate: dip 0x%p msiq_id 0x%x\n",
890 	    dip, msiq_id);
891 
892 	if ((ret = hvio_msiq_getstate(DIP_TO_HANDLE(dip),
893 	    msiq_id, msiq_state)) != H_EOK) {
894 		DBG(DBG_LIB_MSIQ, dip,
895 		    "hvio_msiq_getstate failed, ret 0x%lx\n", ret);
896 		return (DDI_FAILURE);
897 	}
898 
899 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_getstate: msiq_state 0x%x\n",
900 	    *msiq_state);
901 
902 	return (DDI_SUCCESS);
903 }
904 
905 /*ARGSUSED*/
906 int
907 px_lib_msiq_setstate(dev_info_t *dip, msiqid_t msiq_id,
908     pci_msiq_state_t msiq_state)
909 {
910 	uint64_t	ret;
911 
912 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_setstate: dip 0x%p msiq_id 0x%x "
913 	    "msiq_state 0x%x\n", dip, msiq_id, msiq_state);
914 
915 	if ((ret = hvio_msiq_setstate(DIP_TO_HANDLE(dip),
916 	    msiq_id, msiq_state)) != H_EOK) {
917 		DBG(DBG_LIB_MSIQ, dip,
918 		    "hvio_msiq_setstate failed, ret 0x%lx\n", ret);
919 		return (DDI_FAILURE);
920 	}
921 
922 	return (DDI_SUCCESS);
923 }
924 
925 /*ARGSUSED*/
926 int
927 px_lib_msiq_gethead(dev_info_t *dip, msiqid_t msiq_id,
928     msiqhead_t *msiq_head)
929 {
930 	uint64_t	ret;
931 
932 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_gethead: dip 0x%p msiq_id 0x%x\n",
933 	    dip, msiq_id);
934 
935 	if ((ret = hvio_msiq_gethead(DIP_TO_HANDLE(dip),
936 	    msiq_id, msiq_head)) != H_EOK) {
937 		DBG(DBG_LIB_MSIQ, dip,
938 		    "hvio_msiq_gethead failed, ret 0x%lx\n", ret);
939 		return (DDI_FAILURE);
940 	}
941 
942 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_gethead: msiq_head 0x%x\n",
943 	    *msiq_head);
944 
945 	return (DDI_SUCCESS);
946 }
947 
948 /*ARGSUSED*/
949 int
950 px_lib_msiq_sethead(dev_info_t *dip, msiqid_t msiq_id,
951     msiqhead_t msiq_head)
952 {
953 	uint64_t	ret;
954 
955 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_sethead: dip 0x%p msiq_id 0x%x "
956 	    "msiq_head 0x%x\n", dip, msiq_id, msiq_head);
957 
958 	if ((ret = hvio_msiq_sethead(DIP_TO_HANDLE(dip),
959 	    msiq_id, msiq_head)) != H_EOK) {
960 		DBG(DBG_LIB_MSIQ, dip,
961 		    "hvio_msiq_sethead failed, ret 0x%lx\n", ret);
962 		return (DDI_FAILURE);
963 	}
964 
965 	return (DDI_SUCCESS);
966 }
967 
968 /*ARGSUSED*/
969 int
970 px_lib_msiq_gettail(dev_info_t *dip, msiqid_t msiq_id,
971     msiqtail_t *msiq_tail)
972 {
973 	uint64_t	ret;
974 
975 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_gettail: dip 0x%p msiq_id 0x%x\n",
976 	    dip, msiq_id);
977 
978 	if ((ret = hvio_msiq_gettail(DIP_TO_HANDLE(dip),
979 	    msiq_id, msiq_tail)) != H_EOK) {
980 		DBG(DBG_LIB_MSIQ, dip,
981 		    "hvio_msiq_gettail failed, ret 0x%lx\n", ret);
982 		return (DDI_FAILURE);
983 	}
984 
985 	DBG(DBG_LIB_MSIQ, dip, "px_lib_msiq_gettail: msiq_tail 0x%x\n",
986 	    *msiq_tail);
987 
988 	return (DDI_SUCCESS);
989 }
990 
991 /*ARGSUSED*/
992 void
993 px_lib_get_msiq_rec(dev_info_t *dip, msiqhead_t *msiq_head_p,
994     msiq_rec_t *msiq_rec_p)
995 {
996 	eq_rec_t	*eq_rec_p = (eq_rec_t *)msiq_head_p;
997 
998 	DBG(DBG_LIB_MSIQ, dip, "px_lib_get_msiq_rec: dip 0x%p eq_rec_p 0x%p\n",
999 	    dip, eq_rec_p);
1000 
1001 	if (!eq_rec_p->eq_rec_fmt_type) {
1002 		/* Set msiq_rec_type to zero */
1003 		msiq_rec_p->msiq_rec_type = 0;
1004 
1005 		return;
1006 	}
1007 
1008 	DBG(DBG_LIB_MSIQ, dip, "px_lib_get_msiq_rec: EQ RECORD, "
1009 	    "eq_rec_rid 0x%llx eq_rec_fmt_type 0x%llx "
1010 	    "eq_rec_len 0x%llx eq_rec_addr0 0x%llx "
1011 	    "eq_rec_addr1 0x%llx eq_rec_data0 0x%llx "
1012 	    "eq_rec_data1 0x%llx\n", eq_rec_p->eq_rec_rid,
1013 	    eq_rec_p->eq_rec_fmt_type, eq_rec_p->eq_rec_len,
1014 	    eq_rec_p->eq_rec_addr0, eq_rec_p->eq_rec_addr1,
1015 	    eq_rec_p->eq_rec_data0, eq_rec_p->eq_rec_data1);
1016 
1017 	/*
1018 	 * Only upper 4 bits of eq_rec_fmt_type is used
1019 	 * to identify the EQ record type.
1020 	 */
1021 	switch (eq_rec_p->eq_rec_fmt_type >> 3) {
1022 	case EQ_REC_MSI32:
1023 		msiq_rec_p->msiq_rec_type = MSI32_REC;
1024 
1025 		msiq_rec_p->msiq_rec_data.msi.msi_data =
1026 		    eq_rec_p->eq_rec_data0;
1027 		break;
1028 	case EQ_REC_MSI64:
1029 		msiq_rec_p->msiq_rec_type = MSI64_REC;
1030 
1031 		msiq_rec_p->msiq_rec_data.msi.msi_data =
1032 		    eq_rec_p->eq_rec_data0;
1033 		break;
1034 	case EQ_REC_MSG:
1035 		msiq_rec_p->msiq_rec_type = MSG_REC;
1036 
1037 		msiq_rec_p->msiq_rec_data.msg.msg_route =
1038 		    eq_rec_p->eq_rec_fmt_type & 7;
1039 		msiq_rec_p->msiq_rec_data.msg.msg_targ = eq_rec_p->eq_rec_rid;
1040 		msiq_rec_p->msiq_rec_data.msg.msg_code = eq_rec_p->eq_rec_data0;
1041 		break;
1042 	default:
1043 		cmn_err(CE_WARN, "%s%d: px_lib_get_msiq_rec: "
1044 		    "0x%x is an unknown EQ record type",
1045 		    ddi_driver_name(dip), ddi_get_instance(dip),
1046 		    (int)eq_rec_p->eq_rec_fmt_type);
1047 		break;
1048 	}
1049 
1050 	msiq_rec_p->msiq_rec_rid = eq_rec_p->eq_rec_rid;
1051 	msiq_rec_p->msiq_rec_msi_addr = ((eq_rec_p->eq_rec_addr1 << 16) |
1052 	    (eq_rec_p->eq_rec_addr0 << 2));
1053 }
1054 
1055 /*ARGSUSED*/
1056 void
1057 px_lib_clr_msiq_rec(dev_info_t *dip, msiqhead_t *msiq_head_p)
1058 {
1059 	eq_rec_t	*eq_rec_p = (eq_rec_t *)msiq_head_p;
1060 
1061 	DBG(DBG_LIB_MSIQ, dip, "px_lib_clr_msiq_rec: dip 0x%p eq_rec_p 0x%p\n",
1062 	    dip, eq_rec_p);
1063 
1064 	if (eq_rec_p->eq_rec_fmt_type) {
1065 		/* Zero out eq_rec_fmt_type field */
1066 		eq_rec_p->eq_rec_fmt_type = 0;
1067 	}
1068 }
1069 
1070 /*
1071  * MSI Functions:
1072  */
1073 /*ARGSUSED*/
1074 int
1075 px_lib_msi_init(dev_info_t *dip)
1076 {
1077 	px_t		*px_p = DIP_TO_STATE(dip);
1078 	px_msi_state_t	*msi_state_p = &px_p->px_ib_p->ib_msi_state;
1079 	uint64_t	ret;
1080 
1081 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_init: dip 0x%p\n", dip);
1082 
1083 	if ((ret = hvio_msi_init(DIP_TO_HANDLE(dip),
1084 	    msi_state_p->msi_addr32, msi_state_p->msi_addr64)) != H_EOK) {
1085 		DBG(DBG_LIB_MSIQ, dip, "px_lib_msi_init failed, ret 0x%lx\n",
1086 		    ret);
1087 		return (DDI_FAILURE);
1088 	}
1089 
1090 	return (DDI_SUCCESS);
1091 }
1092 
1093 /*ARGSUSED*/
1094 int
1095 px_lib_msi_getmsiq(dev_info_t *dip, msinum_t msi_num,
1096     msiqid_t *msiq_id)
1097 {
1098 	uint64_t	ret;
1099 
1100 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getmsiq: dip 0x%p msi_num 0x%x\n",
1101 	    dip, msi_num);
1102 
1103 	if ((ret = hvio_msi_getmsiq(DIP_TO_HANDLE(dip),
1104 	    msi_num, msiq_id)) != H_EOK) {
1105 		DBG(DBG_LIB_MSI, dip,
1106 		    "hvio_msi_getmsiq failed, ret 0x%lx\n", ret);
1107 		return (DDI_FAILURE);
1108 	}
1109 
1110 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getmsiq: msiq_id 0x%x\n",
1111 	    *msiq_id);
1112 
1113 	return (DDI_SUCCESS);
1114 }
1115 
1116 /*ARGSUSED*/
1117 int
1118 px_lib_msi_setmsiq(dev_info_t *dip, msinum_t msi_num,
1119     msiqid_t msiq_id, msi_type_t msitype)
1120 {
1121 	uint64_t	ret;
1122 
1123 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_setmsiq: dip 0x%p msi_num 0x%x "
1124 	    "msq_id 0x%x\n", dip, msi_num, msiq_id);
1125 
1126 	if ((ret = hvio_msi_setmsiq(DIP_TO_HANDLE(dip),
1127 	    msi_num, msiq_id)) != H_EOK) {
1128 		DBG(DBG_LIB_MSI, dip,
1129 		    "hvio_msi_setmsiq failed, ret 0x%lx\n", ret);
1130 		return (DDI_FAILURE);
1131 	}
1132 
1133 	return (DDI_SUCCESS);
1134 }
1135 
1136 /*ARGSUSED*/
1137 int
1138 px_lib_msi_getvalid(dev_info_t *dip, msinum_t msi_num,
1139     pci_msi_valid_state_t *msi_valid_state)
1140 {
1141 	uint64_t	ret;
1142 
1143 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getvalid: dip 0x%p msi_num 0x%x\n",
1144 	    dip, msi_num);
1145 
1146 	if ((ret = hvio_msi_getvalid(DIP_TO_HANDLE(dip),
1147 	    msi_num, msi_valid_state)) != H_EOK) {
1148 		DBG(DBG_LIB_MSI, dip,
1149 		    "hvio_msi_getvalid failed, ret 0x%lx\n", ret);
1150 		return (DDI_FAILURE);
1151 	}
1152 
1153 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getvalid: msiq_id 0x%x\n",
1154 	    *msi_valid_state);
1155 
1156 	return (DDI_SUCCESS);
1157 }
1158 
1159 /*ARGSUSED*/
1160 int
1161 px_lib_msi_setvalid(dev_info_t *dip, msinum_t msi_num,
1162     pci_msi_valid_state_t msi_valid_state)
1163 {
1164 	uint64_t	ret;
1165 
1166 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_setvalid: dip 0x%p msi_num 0x%x "
1167 	    "msi_valid_state 0x%x\n", dip, msi_num, msi_valid_state);
1168 
1169 	if ((ret = hvio_msi_setvalid(DIP_TO_HANDLE(dip),
1170 	    msi_num, msi_valid_state)) != H_EOK) {
1171 		DBG(DBG_LIB_MSI, dip,
1172 		    "hvio_msi_setvalid failed, ret 0x%lx\n", ret);
1173 		return (DDI_FAILURE);
1174 	}
1175 
1176 	return (DDI_SUCCESS);
1177 }
1178 
1179 /*ARGSUSED*/
1180 int
1181 px_lib_msi_getstate(dev_info_t *dip, msinum_t msi_num,
1182     pci_msi_state_t *msi_state)
1183 {
1184 	uint64_t	ret;
1185 
1186 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getstate: dip 0x%p msi_num 0x%x\n",
1187 	    dip, msi_num);
1188 
1189 	if ((ret = hvio_msi_getstate(DIP_TO_HANDLE(dip),
1190 	    msi_num, msi_state)) != H_EOK) {
1191 		DBG(DBG_LIB_MSI, dip,
1192 		    "hvio_msi_getstate failed, ret 0x%lx\n", ret);
1193 		return (DDI_FAILURE);
1194 	}
1195 
1196 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_getstate: msi_state 0x%x\n",
1197 	    *msi_state);
1198 
1199 	return (DDI_SUCCESS);
1200 }
1201 
1202 /*ARGSUSED*/
1203 int
1204 px_lib_msi_setstate(dev_info_t *dip, msinum_t msi_num,
1205     pci_msi_state_t msi_state)
1206 {
1207 	uint64_t	ret;
1208 
1209 	DBG(DBG_LIB_MSI, dip, "px_lib_msi_setstate: dip 0x%p msi_num 0x%x "
1210 	    "msi_state 0x%x\n", dip, msi_num, msi_state);
1211 
1212 	if ((ret = hvio_msi_setstate(DIP_TO_HANDLE(dip),
1213 	    msi_num, msi_state)) != H_EOK) {
1214 		DBG(DBG_LIB_MSI, dip,
1215 		    "hvio_msi_setstate failed, ret 0x%lx\n", ret);
1216 		return (DDI_FAILURE);
1217 	}
1218 
1219 	return (DDI_SUCCESS);
1220 }
1221 
1222 /*
1223  * MSG Functions:
1224  */
1225 /*ARGSUSED*/
1226 int
1227 px_lib_msg_getmsiq(dev_info_t *dip, pcie_msg_type_t msg_type,
1228     msiqid_t *msiq_id)
1229 {
1230 	uint64_t	ret;
1231 
1232 	DBG(DBG_LIB_MSG, dip, "px_lib_msg_getmsiq: dip 0x%p msg_type 0x%x\n",
1233 	    dip, msg_type);
1234 
1235 	if ((ret = hvio_msg_getmsiq(DIP_TO_HANDLE(dip),
1236 	    msg_type, msiq_id)) != H_EOK) {
1237 		DBG(DBG_LIB_MSG, dip,
1238 		    "hvio_msg_getmsiq failed, ret 0x%lx\n", ret);
1239 		return (DDI_FAILURE);
1240 	}
1241 
1242 	DBG(DBG_LIB_MSI, dip, "px_lib_msg_getmsiq: msiq_id 0x%x\n",
1243 	    *msiq_id);
1244 
1245 	return (DDI_SUCCESS);
1246 }
1247 
1248 /*ARGSUSED*/
1249 int
1250 px_lib_msg_setmsiq(dev_info_t *dip, pcie_msg_type_t msg_type,
1251     msiqid_t msiq_id)
1252 {
1253 	uint64_t	ret;
1254 
1255 	DBG(DBG_LIB_MSG, dip, "px_lib_msi_setstate: dip 0x%p msg_type 0x%x "
1256 	    "msiq_id 0x%x\n", dip, msg_type, msiq_id);
1257 
1258 	if ((ret = hvio_msg_setmsiq(DIP_TO_HANDLE(dip),
1259 	    msg_type, msiq_id)) != H_EOK) {
1260 		DBG(DBG_LIB_MSG, dip,
1261 		    "hvio_msg_setmsiq failed, ret 0x%lx\n", ret);
1262 		return (DDI_FAILURE);
1263 	}
1264 
1265 	return (DDI_SUCCESS);
1266 }
1267 
1268 /*ARGSUSED*/
1269 int
1270 px_lib_msg_getvalid(dev_info_t *dip, pcie_msg_type_t msg_type,
1271     pcie_msg_valid_state_t *msg_valid_state)
1272 {
1273 	uint64_t	ret;
1274 
1275 	DBG(DBG_LIB_MSG, dip, "px_lib_msg_getvalid: dip 0x%p msg_type 0x%x\n",
1276 	    dip, msg_type);
1277 
1278 	if ((ret = hvio_msg_getvalid(DIP_TO_HANDLE(dip), msg_type,
1279 	    msg_valid_state)) != H_EOK) {
1280 		DBG(DBG_LIB_MSG, dip,
1281 		    "hvio_msg_getvalid failed, ret 0x%lx\n", ret);
1282 		return (DDI_FAILURE);
1283 	}
1284 
1285 	DBG(DBG_LIB_MSI, dip, "px_lib_msg_getvalid: msg_valid_state 0x%x\n",
1286 	    *msg_valid_state);
1287 
1288 	return (DDI_SUCCESS);
1289 }
1290 
1291 /*ARGSUSED*/
1292 int
1293 px_lib_msg_setvalid(dev_info_t *dip, pcie_msg_type_t msg_type,
1294     pcie_msg_valid_state_t msg_valid_state)
1295 {
1296 	uint64_t	ret;
1297 
1298 	DBG(DBG_LIB_MSG, dip, "px_lib_msg_setvalid: dip 0x%p msg_type 0x%x "
1299 	    "msg_valid_state 0x%x\n", dip, msg_type, msg_valid_state);
1300 
1301 	if ((ret = hvio_msg_setvalid(DIP_TO_HANDLE(dip), msg_type,
1302 	    msg_valid_state)) != H_EOK) {
1303 		DBG(DBG_LIB_MSG, dip,
1304 		    "hvio_msg_setvalid failed, ret 0x%lx\n", ret);
1305 		return (DDI_FAILURE);
1306 	}
1307 
1308 	return (DDI_SUCCESS);
1309 }
1310 
1311 /*ARGSUSED*/
1312 void
1313 px_panic_domain(px_t *px_p, pcie_req_id_t bdf)
1314 {
1315 }
1316 
1317 /*
1318  * Suspend/Resume Functions:
1319  * Currently unsupported by hypervisor
1320  */
1321 int
1322 px_lib_suspend(dev_info_t *dip)
1323 {
1324 	px_t		*px_p = DIP_TO_STATE(dip);
1325 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
1326 	px_cb_t		*cb_p = PX2CB(px_p);
1327 	devhandle_t	dev_hdl, xbus_dev_hdl;
1328 	uint64_t	ret = H_EOK;
1329 
1330 	DBG(DBG_DETACH, dip, "px_lib_suspend: dip 0x%p\n", dip);
1331 
1332 	dev_hdl = (devhandle_t)pxu_p->px_address[PX_REG_CSR];
1333 	xbus_dev_hdl = (devhandle_t)pxu_p->px_address[PX_REG_XBC];
1334 
1335 	if ((ret = hvio_suspend(dev_hdl, pxu_p)) != H_EOK)
1336 		goto fail;
1337 
1338 	if (--cb_p->attachcnt == 0) {
1339 		ret = hvio_cb_suspend(xbus_dev_hdl, pxu_p);
1340 		if (ret != H_EOK)
1341 			cb_p->attachcnt++;
1342 	}
1343 	pxu_p->cpr_flag = PX_ENTERED_CPR;
1344 
1345 fail:
1346 	return ((ret != H_EOK) ? DDI_FAILURE: DDI_SUCCESS);
1347 }
1348 
1349 void
1350 px_lib_resume(dev_info_t *dip)
1351 {
1352 	px_t		*px_p = DIP_TO_STATE(dip);
1353 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
1354 	px_cb_t		*cb_p = PX2CB(px_p);
1355 	devhandle_t	dev_hdl, xbus_dev_hdl;
1356 	devino_t	pec_ino = px_p->px_inos[PX_INTR_PEC];
1357 	devino_t	xbc_ino = px_p->px_inos[PX_INTR_XBC];
1358 
1359 	DBG(DBG_ATTACH, dip, "px_lib_resume: dip 0x%p\n", dip);
1360 
1361 	dev_hdl = (devhandle_t)pxu_p->px_address[PX_REG_CSR];
1362 	xbus_dev_hdl = (devhandle_t)pxu_p->px_address[PX_REG_XBC];
1363 
1364 	if (++cb_p->attachcnt == 1)
1365 		hvio_cb_resume(dev_hdl, xbus_dev_hdl, xbc_ino, pxu_p);
1366 
1367 	hvio_resume(dev_hdl, pec_ino, pxu_p);
1368 }
1369 
1370 /*
1371  * Generate a unique Oberon UBC ID based on the Logicial System Board and
1372  * the IO Channel from the portid property field.
1373  */
1374 static uint64_t
1375 oberon_get_ubc_id(dev_info_t *dip)
1376 {
1377 	px_t	*px_p = DIP_TO_STATE(dip);
1378 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
1379 	uint64_t	ubc_id;
1380 
1381 	/*
1382 	 * Generate a unique 6 bit UBC ID using the 2 IO_Channel#[1:0] bits and
1383 	 * the 4 LSB_ID[3:0] bits from the Oberon's portid property.
1384 	 */
1385 	ubc_id = (((pxu_p->portid >> OBERON_PORT_ID_IOC) &
1386 	    OBERON_PORT_ID_IOC_MASK) | (((pxu_p->portid >>
1387 	    OBERON_PORT_ID_LSB) & OBERON_PORT_ID_LSB_MASK)
1388 	    << OBERON_UBC_ID_LSB));
1389 
1390 	return (ubc_id);
1391 }
1392 
1393 /*
1394  * Oberon does not have a UBC scratch register, so alloc an array of scratch
1395  * registers when needed and use a unique UBC ID as an index. This code
1396  * can be simplified if we use a pre-allocated array. They are currently
1397  * being dynamically allocated because it's only needed by the Oberon.
1398  */
1399 static void
1400 oberon_set_cb(dev_info_t *dip, uint64_t val)
1401 {
1402 	uint64_t	ubc_id;
1403 
1404 	if (px_oberon_ubc_scratch_regs == NULL)
1405 		px_oberon_ubc_scratch_regs =
1406 		    (uint64_t *)kmem_zalloc(sizeof (uint64_t)*
1407 		    OBERON_UBC_ID_MAX, KM_SLEEP);
1408 
1409 	ubc_id = oberon_get_ubc_id(dip);
1410 
1411 	px_oberon_ubc_scratch_regs[ubc_id] = val;
1412 
1413 	/*
1414 	 * Check if any scratch registers are still in use. If all scratch
1415 	 * registers are currently set to zero, then deallocate the scratch
1416 	 * register array.
1417 	 */
1418 	for (ubc_id = 0; ubc_id < OBERON_UBC_ID_MAX; ubc_id++) {
1419 		if (px_oberon_ubc_scratch_regs[ubc_id] != NULL)
1420 			return;
1421 	}
1422 
1423 	/*
1424 	 * All scratch registers are set to zero so deallocate the scratch
1425 	 * register array and set the pointer to NULL.
1426 	 */
1427 	kmem_free(px_oberon_ubc_scratch_regs,
1428 	    (sizeof (uint64_t)*OBERON_UBC_ID_MAX));
1429 
1430 	px_oberon_ubc_scratch_regs = NULL;
1431 }
1432 
1433 /*
1434  * Oberon does not have a UBC scratch register, so use an allocated array of
1435  * scratch registers and use the unique UBC ID as an index into that array.
1436  */
1437 static uint64_t
1438 oberon_get_cb(dev_info_t *dip)
1439 {
1440 	uint64_t	ubc_id;
1441 
1442 	if (px_oberon_ubc_scratch_regs == NULL)
1443 		return (0);
1444 
1445 	ubc_id = oberon_get_ubc_id(dip);
1446 
1447 	return (px_oberon_ubc_scratch_regs[ubc_id]);
1448 }
1449 
1450 /*
1451  * Misc Functions:
1452  * Currently unsupported by hypervisor
1453  */
1454 static uint64_t
1455 px_get_cb(dev_info_t *dip)
1456 {
1457 	px_t	*px_p = DIP_TO_STATE(dip);
1458 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
1459 
1460 	/*
1461 	 * Oberon does not currently have Scratchpad registers.
1462 	 */
1463 	if (PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON)
1464 		return (oberon_get_cb(dip));
1465 
1466 	return (CSR_XR((caddr_t)pxu_p->px_address[PX_REG_XBC], JBUS_SCRATCH_1));
1467 }
1468 
1469 static void
1470 px_set_cb(dev_info_t *dip, uint64_t val)
1471 {
1472 	px_t	*px_p = DIP_TO_STATE(dip);
1473 	pxu_t	*pxu_p = (pxu_t *)px_p->px_plat_p;
1474 
1475 	/*
1476 	 * Oberon does not currently have Scratchpad registers.
1477 	 */
1478 	if (PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON) {
1479 		oberon_set_cb(dip, val);
1480 		return;
1481 	}
1482 
1483 	CSR_XS((caddr_t)pxu_p->px_address[PX_REG_XBC], JBUS_SCRATCH_1, val);
1484 }
1485 
1486 /*ARGSUSED*/
1487 int
1488 px_lib_map_vconfig(dev_info_t *dip,
1489 	ddi_map_req_t *mp, pci_config_offset_t off,
1490 		pci_regspec_t *rp, caddr_t *addrp)
1491 {
1492 	/*
1493 	 * No special config space access services in this layer.
1494 	 */
1495 	return (DDI_FAILURE);
1496 }
1497 
1498 void
1499 px_lib_map_attr_check(ddi_map_req_t *mp)
1500 {
1501 	ddi_acc_hdl_t *hp = mp->map_handlep;
1502 
1503 	/* fire does not accept byte masks from PIO store merge */
1504 	if (hp->ah_acc.devacc_attr_dataorder == DDI_STORECACHING_OK_ACC)
1505 		hp->ah_acc.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1506 }
1507 
1508 /* This function is called only by poke, caut put and pxtool poke. */
1509 void
1510 px_lib_clr_errs(px_t *px_p, dev_info_t *rdip, uint64_t addr)
1511 {
1512 	px_pec_t	*pec_p = px_p->px_pec_p;
1513 	dev_info_t	*rpdip = px_p->px_dip;
1514 	int		rc_err, fab_err, i;
1515 	int		acctype = pec_p->pec_safeacc_type;
1516 	ddi_fm_error_t	derr;
1517 	pci_ranges_t	*ranges_p;
1518 	int		range_len;
1519 	uint32_t	addr_high, addr_low;
1520 	pcie_req_id_t	bdf = PCIE_INVALID_BDF;
1521 
1522 	/* Create the derr */
1523 	bzero(&derr, sizeof (ddi_fm_error_t));
1524 	derr.fme_version = DDI_FME_VERSION;
1525 	derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1);
1526 	derr.fme_flag = acctype;
1527 
1528 	if (acctype == DDI_FM_ERR_EXPECTED) {
1529 		derr.fme_status = DDI_FM_NONFATAL;
1530 		ndi_fm_acc_err_set(pec_p->pec_acc_hdl, &derr);
1531 	}
1532 
1533 	if (px_fm_enter(px_p) != DDI_SUCCESS)
1534 		return;
1535 
1536 	/* send ereport/handle/clear fire registers */
1537 	rc_err = px_err_cmn_intr(px_p, &derr, PX_LIB_CALL, PX_FM_BLOCK_ALL);
1538 
1539 	/* Figure out if this is a cfg or mem32 access */
1540 	addr_high = (uint32_t)(addr >> 32);
1541 	addr_low = (uint32_t)addr;
1542 	range_len = px_p->px_ranges_length / sizeof (pci_ranges_t);
1543 	i = 0;
1544 	for (ranges_p = px_p->px_ranges_p; i < range_len; i++, ranges_p++) {
1545 		if (ranges_p->parent_high == addr_high) {
1546 			switch (ranges_p->child_high & PCI_ADDR_MASK) {
1547 			case PCI_ADDR_CONFIG:
1548 				bdf = (pcie_req_id_t)(addr_low >> 12);
1549 				addr_low = 0;
1550 				break;
1551 			case PCI_ADDR_MEM32:
1552 				if (rdip)
1553 					bdf = PCI_GET_BDF(rdip);
1554 				else
1555 					bdf = PCIE_INVALID_BDF;
1556 				break;
1557 			}
1558 			break;
1559 		}
1560 	}
1561 
1562 	(void) px_rp_en_q(px_p, bdf, addr_low, NULL);
1563 
1564 	/*
1565 	 * XXX - Current code scans the fabric for all px_tool accesses.
1566 	 * In future, do not scan fabric for px_tool access to IO Root Nexus
1567 	 */
1568 	fab_err = px_scan_fabric(px_p, rpdip, &derr);
1569 
1570 	px_err_panic(rc_err, PX_RC, fab_err, B_TRUE);
1571 	px_fm_exit(px_p);
1572 	px_err_panic(rc_err, PX_RC, fab_err, B_FALSE);
1573 }
1574 
1575 #ifdef  DEBUG
1576 int	px_peekfault_cnt = 0;
1577 int	px_pokefault_cnt = 0;
1578 #endif  /* DEBUG */
1579 
1580 /*ARGSUSED*/
1581 static int
1582 px_lib_do_poke(dev_info_t *dip, dev_info_t *rdip,
1583     peekpoke_ctlops_t *in_args)
1584 {
1585 	px_t *px_p = DIP_TO_STATE(dip);
1586 	px_pec_t *pec_p = px_p->px_pec_p;
1587 	int err = DDI_SUCCESS;
1588 	on_trap_data_t otd;
1589 
1590 	mutex_enter(&pec_p->pec_pokefault_mutex);
1591 	pec_p->pec_ontrap_data = &otd;
1592 	pec_p->pec_safeacc_type = DDI_FM_ERR_POKE;
1593 
1594 	/* Set up protected environment. */
1595 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
1596 		uintptr_t tramp = otd.ot_trampoline;
1597 
1598 		otd.ot_trampoline = (uintptr_t)&poke_fault;
1599 		err = do_poke(in_args->size, (void *)in_args->dev_addr,
1600 		    (void *)in_args->host_addr);
1601 		otd.ot_trampoline = tramp;
1602 	} else
1603 		err = DDI_FAILURE;
1604 
1605 	px_lib_clr_errs(px_p, rdip, in_args->dev_addr);
1606 
1607 	if (otd.ot_trap & OT_DATA_ACCESS)
1608 		err = DDI_FAILURE;
1609 
1610 	/* Take down protected environment. */
1611 	no_trap();
1612 
1613 	pec_p->pec_ontrap_data = NULL;
1614 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
1615 	mutex_exit(&pec_p->pec_pokefault_mutex);
1616 
1617 #ifdef  DEBUG
1618 	if (err == DDI_FAILURE)
1619 		px_pokefault_cnt++;
1620 #endif
1621 	return (err);
1622 }
1623 
1624 /*ARGSUSED*/
1625 static int
1626 px_lib_do_caut_put(dev_info_t *dip, dev_info_t *rdip,
1627     peekpoke_ctlops_t *cautacc_ctlops_arg)
1628 {
1629 	size_t size = cautacc_ctlops_arg->size;
1630 	uintptr_t dev_addr = cautacc_ctlops_arg->dev_addr;
1631 	uintptr_t host_addr = cautacc_ctlops_arg->host_addr;
1632 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)cautacc_ctlops_arg->handle;
1633 	size_t repcount = cautacc_ctlops_arg->repcount;
1634 	uint_t flags = cautacc_ctlops_arg->flags;
1635 
1636 	px_t *px_p = DIP_TO_STATE(dip);
1637 	px_pec_t *pec_p = px_p->px_pec_p;
1638 	int err = DDI_SUCCESS;
1639 
1640 	/*
1641 	 * Note that i_ndi_busop_access_enter ends up grabbing the pokefault
1642 	 * mutex.
1643 	 */
1644 	i_ndi_busop_access_enter(hp->ahi_common.ah_dip, (ddi_acc_handle_t)hp);
1645 
1646 	pec_p->pec_ontrap_data = (on_trap_data_t *)hp->ahi_err->err_ontrap;
1647 	pec_p->pec_safeacc_type = DDI_FM_ERR_EXPECTED;
1648 	hp->ahi_err->err_expected = DDI_FM_ERR_EXPECTED;
1649 
1650 	if (!i_ddi_ontrap((ddi_acc_handle_t)hp)) {
1651 		for (; repcount; repcount--) {
1652 			switch (size) {
1653 
1654 			case sizeof (uint8_t):
1655 				i_ddi_put8(hp, (uint8_t *)dev_addr,
1656 				    *(uint8_t *)host_addr);
1657 				break;
1658 
1659 			case sizeof (uint16_t):
1660 				i_ddi_put16(hp, (uint16_t *)dev_addr,
1661 				    *(uint16_t *)host_addr);
1662 				break;
1663 
1664 			case sizeof (uint32_t):
1665 				i_ddi_put32(hp, (uint32_t *)dev_addr,
1666 				    *(uint32_t *)host_addr);
1667 				break;
1668 
1669 			case sizeof (uint64_t):
1670 				i_ddi_put64(hp, (uint64_t *)dev_addr,
1671 				    *(uint64_t *)host_addr);
1672 				break;
1673 			}
1674 
1675 			host_addr += size;
1676 
1677 			if (flags == DDI_DEV_AUTOINCR)
1678 				dev_addr += size;
1679 
1680 			px_lib_clr_errs(px_p, rdip, dev_addr);
1681 
1682 			if (pec_p->pec_ontrap_data->ot_trap & OT_DATA_ACCESS) {
1683 				err = DDI_FAILURE;
1684 #ifdef  DEBUG
1685 				px_pokefault_cnt++;
1686 #endif
1687 				break;
1688 			}
1689 		}
1690 	}
1691 
1692 	i_ddi_notrap((ddi_acc_handle_t)hp);
1693 	pec_p->pec_ontrap_data = NULL;
1694 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
1695 	i_ndi_busop_access_exit(hp->ahi_common.ah_dip, (ddi_acc_handle_t)hp);
1696 	hp->ahi_err->err_expected = DDI_FM_ERR_UNEXPECTED;
1697 
1698 	return (err);
1699 }
1700 
1701 
1702 int
1703 px_lib_ctlops_poke(dev_info_t *dip, dev_info_t *rdip,
1704     peekpoke_ctlops_t *in_args)
1705 {
1706 	return (in_args->handle ? px_lib_do_caut_put(dip, rdip, in_args) :
1707 	    px_lib_do_poke(dip, rdip, in_args));
1708 }
1709 
1710 
1711 /*ARGSUSED*/
1712 static int
1713 px_lib_do_peek(dev_info_t *dip, peekpoke_ctlops_t *in_args)
1714 {
1715 	px_t *px_p = DIP_TO_STATE(dip);
1716 	px_pec_t *pec_p = px_p->px_pec_p;
1717 	int err = DDI_SUCCESS;
1718 	on_trap_data_t otd;
1719 
1720 	mutex_enter(&pec_p->pec_pokefault_mutex);
1721 	if (px_fm_enter(px_p) != DDI_SUCCESS) {
1722 		mutex_exit(&pec_p->pec_pokefault_mutex);
1723 		return (DDI_FAILURE);
1724 	}
1725 	pec_p->pec_safeacc_type = DDI_FM_ERR_PEEK;
1726 	px_fm_exit(px_p);
1727 
1728 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
1729 		uintptr_t tramp = otd.ot_trampoline;
1730 
1731 		otd.ot_trampoline = (uintptr_t)&peek_fault;
1732 		err = do_peek(in_args->size, (void *)in_args->dev_addr,
1733 		    (void *)in_args->host_addr);
1734 		otd.ot_trampoline = tramp;
1735 	} else
1736 		err = DDI_FAILURE;
1737 
1738 	no_trap();
1739 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
1740 	mutex_exit(&pec_p->pec_pokefault_mutex);
1741 
1742 #ifdef  DEBUG
1743 	if (err == DDI_FAILURE)
1744 		px_peekfault_cnt++;
1745 #endif
1746 	return (err);
1747 }
1748 
1749 
1750 static int
1751 px_lib_do_caut_get(dev_info_t *dip, peekpoke_ctlops_t *cautacc_ctlops_arg)
1752 {
1753 	size_t size = cautacc_ctlops_arg->size;
1754 	uintptr_t dev_addr = cautacc_ctlops_arg->dev_addr;
1755 	uintptr_t host_addr = cautacc_ctlops_arg->host_addr;
1756 	ddi_acc_impl_t *hp = (ddi_acc_impl_t *)cautacc_ctlops_arg->handle;
1757 	size_t repcount = cautacc_ctlops_arg->repcount;
1758 	uint_t flags = cautacc_ctlops_arg->flags;
1759 
1760 	px_t *px_p = DIP_TO_STATE(dip);
1761 	px_pec_t *pec_p = px_p->px_pec_p;
1762 	int err = DDI_SUCCESS;
1763 
1764 	/*
1765 	 * Note that i_ndi_busop_access_enter ends up grabbing the pokefault
1766 	 * mutex.
1767 	 */
1768 	i_ndi_busop_access_enter(hp->ahi_common.ah_dip, (ddi_acc_handle_t)hp);
1769 
1770 	pec_p->pec_ontrap_data = (on_trap_data_t *)hp->ahi_err->err_ontrap;
1771 	pec_p->pec_safeacc_type = DDI_FM_ERR_EXPECTED;
1772 	hp->ahi_err->err_expected = DDI_FM_ERR_EXPECTED;
1773 
1774 	if (repcount == 1) {
1775 		if (!i_ddi_ontrap((ddi_acc_handle_t)hp)) {
1776 			i_ddi_caut_get(size, (void *)dev_addr,
1777 			    (void *)host_addr);
1778 		} else {
1779 			int i;
1780 			uint8_t *ff_addr = (uint8_t *)host_addr;
1781 			for (i = 0; i < size; i++)
1782 				*ff_addr++ = 0xff;
1783 
1784 			err = DDI_FAILURE;
1785 #ifdef  DEBUG
1786 			px_peekfault_cnt++;
1787 #endif
1788 		}
1789 	} else {
1790 		if (!i_ddi_ontrap((ddi_acc_handle_t)hp)) {
1791 			for (; repcount; repcount--) {
1792 				i_ddi_caut_get(size, (void *)dev_addr,
1793 				    (void *)host_addr);
1794 
1795 				host_addr += size;
1796 
1797 				if (flags == DDI_DEV_AUTOINCR)
1798 					dev_addr += size;
1799 			}
1800 		} else {
1801 			err = DDI_FAILURE;
1802 #ifdef  DEBUG
1803 			px_peekfault_cnt++;
1804 #endif
1805 		}
1806 	}
1807 
1808 	i_ddi_notrap((ddi_acc_handle_t)hp);
1809 	pec_p->pec_ontrap_data = NULL;
1810 	pec_p->pec_safeacc_type = DDI_FM_ERR_UNEXPECTED;
1811 	i_ndi_busop_access_exit(hp->ahi_common.ah_dip, (ddi_acc_handle_t)hp);
1812 	hp->ahi_err->err_expected = DDI_FM_ERR_UNEXPECTED;
1813 
1814 	return (err);
1815 }
1816 
1817 /*ARGSUSED*/
1818 int
1819 px_lib_ctlops_peek(dev_info_t *dip, dev_info_t *rdip,
1820     peekpoke_ctlops_t *in_args, void *result)
1821 {
1822 	result = (void *)in_args->host_addr;
1823 	return (in_args->handle ? px_lib_do_caut_get(dip, in_args) :
1824 	    px_lib_do_peek(dip, in_args));
1825 }
1826 
1827 /*
1828  * implements PPM interface
1829  */
1830 int
1831 px_lib_pmctl(int cmd, px_t *px_p)
1832 {
1833 	ASSERT((cmd & ~PPMREQ_MASK) == PPMREQ);
1834 	switch (cmd) {
1835 	case PPMREQ_PRE_PWR_OFF:
1836 		/*
1837 		 * Currently there is no device power management for
1838 		 * the root complex (fire). When there is we need to make
1839 		 * sure that it is at full power before trying to send the
1840 		 * PME_Turn_Off message.
1841 		 */
1842 		DBG(DBG_PWR, px_p->px_dip,
1843 		    "ioctl: request to send PME_Turn_Off\n");
1844 		return (px_goto_l23ready(px_p));
1845 
1846 	case PPMREQ_PRE_PWR_ON:
1847 		DBG(DBG_PWR, px_p->px_dip, "ioctl: PRE_PWR_ON request\n");
1848 		return (px_pre_pwron_check(px_p));
1849 
1850 	case PPMREQ_POST_PWR_ON:
1851 		DBG(DBG_PWR, px_p->px_dip, "ioctl: POST_PWR_ON request\n");
1852 		return (px_goto_l0(px_p));
1853 
1854 	default:
1855 		return (DDI_FAILURE);
1856 	}
1857 }
1858 
1859 /*
1860  * sends PME_Turn_Off message to put the link in L2/L3 ready state.
1861  * called by px_ioctl.
1862  * returns DDI_SUCCESS or DDI_FAILURE
1863  * 1. Wait for link to be in L1 state (link status reg)
1864  * 2. write to PME_Turn_off reg to boradcast
1865  * 3. set timeout
1866  * 4. If timeout, return failure.
1867  * 5. If PM_TO_Ack, wait till link is in L2/L3 ready
1868  */
1869 static int
1870 px_goto_l23ready(px_t *px_p)
1871 {
1872 	pcie_pwr_t	*pwr_p;
1873 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
1874 	caddr_t	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
1875 	int		ret = DDI_SUCCESS;
1876 	clock_t		end, timeleft;
1877 	int		mutex_held = 1;
1878 
1879 	/* If no PM info, return failure */
1880 	if (!PCIE_PMINFO(px_p->px_dip) ||
1881 	    !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)))
1882 		return (DDI_FAILURE);
1883 
1884 	mutex_enter(&pwr_p->pwr_lock);
1885 	mutex_enter(&px_p->px_l23ready_lock);
1886 	/* Clear the PME_To_ACK receieved flag */
1887 	px_p->px_pm_flags &= ~PX_PMETOACK_RECVD;
1888 	/*
1889 	 * When P25 is the downstream device, after receiving
1890 	 * PME_To_ACK, fire will go to Detect state, which causes
1891 	 * the link down event. Inform FMA that this is expected.
1892 	 * In case of all other cards complaint with the pci express
1893 	 * spec, this will happen when the power is re-applied. FMA
1894 	 * code will clear this flag after one instance of LDN. Since
1895 	 * there will not be a LDN event for the spec compliant cards,
1896 	 * we need to clear the flag after receiving PME_To_ACK.
1897 	 */
1898 	px_p->px_pm_flags |= PX_LDN_EXPECTED;
1899 	if (px_send_pme_turnoff(csr_base) != DDI_SUCCESS) {
1900 		ret = DDI_FAILURE;
1901 		goto l23ready_done;
1902 	}
1903 	px_p->px_pm_flags |= PX_PME_TURNOFF_PENDING;
1904 
1905 	end = ddi_get_lbolt() + drv_usectohz(px_pme_to_ack_timeout);
1906 	while (!(px_p->px_pm_flags & PX_PMETOACK_RECVD)) {
1907 		timeleft = cv_timedwait(&px_p->px_l23ready_cv,
1908 		    &px_p->px_l23ready_lock, end);
1909 		/*
1910 		 * if cv_timedwait returns -1, it is either
1911 		 * 1) timed out or
1912 		 * 2) there was a pre-mature wakeup but by the time
1913 		 * cv_timedwait is called again end < lbolt i.e.
1914 		 * end is in the past.
1915 		 * 3) By the time we make first cv_timedwait call,
1916 		 * end < lbolt is true.
1917 		 */
1918 		if (timeleft == -1)
1919 			break;
1920 	}
1921 	if (!(px_p->px_pm_flags & PX_PMETOACK_RECVD)) {
1922 		/*
1923 		 * Either timedout or interrupt didn't get a
1924 		 * chance to grab the mutex and set the flag.
1925 		 * release the mutex and delay for sometime.
1926 		 * This will 1) give a chance for interrupt to
1927 		 * set the flag 2) creates a delay between two
1928 		 * consequetive requests.
1929 		 */
1930 		mutex_exit(&px_p->px_l23ready_lock);
1931 		delay(drv_usectohz(50 * PX_MSEC_TO_USEC));
1932 		mutex_held = 0;
1933 		if (!(px_p->px_pm_flags & PX_PMETOACK_RECVD)) {
1934 			ret = DDI_FAILURE;
1935 			DBG(DBG_PWR, px_p->px_dip, " Timed out while waiting"
1936 			    " for PME_TO_ACK\n");
1937 		}
1938 	}
1939 	px_p->px_pm_flags &=
1940 	    ~(PX_PME_TURNOFF_PENDING | PX_PMETOACK_RECVD | PX_LDN_EXPECTED);
1941 
1942 l23ready_done:
1943 	if (mutex_held)
1944 		mutex_exit(&px_p->px_l23ready_lock);
1945 	/*
1946 	 * Wait till link is in L1 idle, if sending PME_Turn_Off
1947 	 * was succesful.
1948 	 */
1949 	if (ret == DDI_SUCCESS) {
1950 		if (px_link_wait4l1idle(csr_base) != DDI_SUCCESS) {
1951 			DBG(DBG_PWR, px_p->px_dip, " Link is not at L1"
1952 			    " even though we received PME_To_ACK.\n");
1953 			/*
1954 			 * Workaround for hardware bug with P25.
1955 			 * Due to a hardware bug with P25, link state
1956 			 * will be Detect state rather than L1 after
1957 			 * link is transitioned to L23Ready state. Since
1958 			 * we don't know whether link is L23ready state
1959 			 * without Fire's state being L1_idle, we delay
1960 			 * here just to make sure that we wait till link
1961 			 * is transitioned to L23Ready state.
1962 			 */
1963 			delay(drv_usectohz(100 * PX_MSEC_TO_USEC));
1964 		}
1965 		pwr_p->pwr_link_lvl = PM_LEVEL_L3;
1966 
1967 	}
1968 	mutex_exit(&pwr_p->pwr_lock);
1969 	return (ret);
1970 }
1971 
1972 /*
1973  * Message interrupt handler intended to be shared for both
1974  * PME and PME_TO_ACK msg handling, currently only handles
1975  * PME_To_ACK message.
1976  */
1977 uint_t
1978 px_pmeq_intr(caddr_t arg)
1979 {
1980 	px_t	*px_p = (px_t *)arg;
1981 
1982 	DBG(DBG_PWR, px_p->px_dip, " PME_To_ACK received \n");
1983 	mutex_enter(&px_p->px_l23ready_lock);
1984 	cv_broadcast(&px_p->px_l23ready_cv);
1985 	if (px_p->px_pm_flags & PX_PME_TURNOFF_PENDING) {
1986 		px_p->px_pm_flags |= PX_PMETOACK_RECVD;
1987 	} else {
1988 		/*
1989 		 * This maybe the second ack received. If so then,
1990 		 * we should be receiving it during wait4L1 stage.
1991 		 */
1992 		px_p->px_pmetoack_ignored++;
1993 	}
1994 	mutex_exit(&px_p->px_l23ready_lock);
1995 	return (DDI_INTR_CLAIMED);
1996 }
1997 
1998 static int
1999 px_pre_pwron_check(px_t *px_p)
2000 {
2001 	pcie_pwr_t	*pwr_p;
2002 
2003 	/* If no PM info, return failure */
2004 	if (!PCIE_PMINFO(px_p->px_dip) ||
2005 	    !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)))
2006 		return (DDI_FAILURE);
2007 
2008 	/*
2009 	 * For the spec compliant downstream cards link down
2010 	 * is expected when the device is powered on.
2011 	 */
2012 	px_p->px_pm_flags |= PX_LDN_EXPECTED;
2013 	return (pwr_p->pwr_link_lvl == PM_LEVEL_L3 ? DDI_SUCCESS : DDI_FAILURE);
2014 }
2015 
2016 static int
2017 px_goto_l0(px_t *px_p)
2018 {
2019 	pcie_pwr_t	*pwr_p;
2020 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2021 	caddr_t csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2022 	int		ret = DDI_SUCCESS;
2023 	uint64_t	time_spent = 0;
2024 
2025 	/* If no PM info, return failure */
2026 	if (!PCIE_PMINFO(px_p->px_dip) ||
2027 	    !(pwr_p = PCIE_NEXUS_PMINFO(px_p->px_dip)))
2028 		return (DDI_FAILURE);
2029 
2030 	mutex_enter(&pwr_p->pwr_lock);
2031 	/*
2032 	 * The following link retrain activity will cause LDN and LUP event.
2033 	 * Receiving LDN prior to receiving LUP is expected, not an error in
2034 	 * this case.  Receiving LUP indicates link is fully up to support
2035 	 * powering up down stream device, and of course any further LDN and
2036 	 * LUP outside this context will be error.
2037 	 */
2038 	px_p->px_lup_pending = 1;
2039 	if (px_link_retrain(csr_base) != DDI_SUCCESS) {
2040 		ret = DDI_FAILURE;
2041 		goto l0_done;
2042 	}
2043 
2044 	/* LUP event takes the order of 15ms amount of time to occur */
2045 	for (; px_p->px_lup_pending && (time_spent < px_lup_poll_to);
2046 	    time_spent += px_lup_poll_interval)
2047 		drv_usecwait(px_lup_poll_interval);
2048 	if (px_p->px_lup_pending)
2049 		ret = DDI_FAILURE;
2050 l0_done:
2051 	px_enable_detect_quiet(csr_base);
2052 	if (ret == DDI_SUCCESS)
2053 		pwr_p->pwr_link_lvl = PM_LEVEL_L0;
2054 	mutex_exit(&pwr_p->pwr_lock);
2055 	return (ret);
2056 }
2057 
2058 /*
2059  * Extract the drivers binding name to identify which chip we're binding to.
2060  * Whenever a new bus bridge is created, the driver alias entry should be
2061  * added here to identify the device if needed.  If a device isn't added,
2062  * the identity defaults to PX_CHIP_UNIDENTIFIED.
2063  */
2064 static uint32_t
2065 px_identity_init(px_t *px_p)
2066 {
2067 	dev_info_t	*dip = px_p->px_dip;
2068 	char		*name = ddi_binding_name(dip);
2069 	uint32_t	revision = 0;
2070 
2071 	revision = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2072 	    "module-revision#", 0);
2073 
2074 	/* Check for Fire driver binding name */
2075 	if (strcmp(name, "pciex108e,80f0") == 0) {
2076 		DBG(DBG_ATTACH, dip, "px_identity_init: %s%d: "
2077 		    "(FIRE), module-revision %d\n", NAMEINST(dip),
2078 		    revision);
2079 
2080 		return ((revision >= FIRE_MOD_REV_20) ?
2081 		    PX_CHIP_FIRE : PX_CHIP_UNIDENTIFIED);
2082 	}
2083 
2084 	/* Check for Oberon driver binding name */
2085 	if (strcmp(name, "pciex108e,80f8") == 0) {
2086 		DBG(DBG_ATTACH, dip, "px_identity_init: %s%d: "
2087 		    "(OBERON), module-revision %d\n", NAMEINST(dip),
2088 		    revision);
2089 
2090 		return (PX_CHIP_OBERON);
2091 	}
2092 
2093 	DBG(DBG_ATTACH, dip, "%s%d: Unknown PCI Express Host bridge %s %x\n",
2094 	    ddi_driver_name(dip), ddi_get_instance(dip), name, revision);
2095 
2096 	return (PX_CHIP_UNIDENTIFIED);
2097 }
2098 
2099 int
2100 px_err_add_intr(px_fault_t *px_fault_p)
2101 {
2102 	dev_info_t	*dip = px_fault_p->px_fh_dip;
2103 	px_t		*px_p = DIP_TO_STATE(dip);
2104 
2105 	VERIFY(add_ivintr(px_fault_p->px_fh_sysino, PX_ERR_PIL,
2106 	    (intrfunc)px_fault_p->px_err_func, (caddr_t)px_fault_p,
2107 	    NULL, NULL) == 0);
2108 
2109 	px_ib_intr_enable(px_p, intr_dist_cpuid(), px_fault_p->px_intr_ino);
2110 
2111 	return (DDI_SUCCESS);
2112 }
2113 
2114 void
2115 px_err_rem_intr(px_fault_t *px_fault_p)
2116 {
2117 	dev_info_t	*dip = px_fault_p->px_fh_dip;
2118 	px_t		*px_p = DIP_TO_STATE(dip);
2119 
2120 	px_ib_intr_disable(px_p->px_ib_p, px_fault_p->px_intr_ino,
2121 	    IB_INTR_WAIT);
2122 
2123 	VERIFY(rem_ivintr(px_fault_p->px_fh_sysino, PX_ERR_PIL) == 0);
2124 }
2125 
2126 /*
2127  * px_cb_intr_redist() - sun4u only, CB interrupt redistribution
2128  */
2129 void
2130 px_cb_intr_redist(void *arg)
2131 {
2132 	px_cb_t		*cb_p = (px_cb_t *)arg;
2133 	px_cb_list_t	*pxl;
2134 	px_t		*pxp = NULL;
2135 	px_fault_t	*f_p = NULL;
2136 	uint32_t	new_cpuid;
2137 	intr_valid_state_t	enabled = 0;
2138 
2139 	mutex_enter(&cb_p->cb_mutex);
2140 
2141 	pxl = cb_p->pxl;
2142 	if (!pxl)
2143 		goto cb_done;
2144 
2145 	pxp = pxl->pxp;
2146 	f_p = &pxp->px_cb_fault;
2147 	for (; pxl && (f_p->px_fh_sysino != cb_p->sysino); ) {
2148 		pxl = pxl->next;
2149 		pxp = pxl->pxp;
2150 		f_p = &pxp->px_cb_fault;
2151 	}
2152 	if (pxl == NULL)
2153 		goto cb_done;
2154 
2155 	new_cpuid =  intr_dist_cpuid();
2156 	if (new_cpuid == cb_p->cpuid)
2157 		goto cb_done;
2158 
2159 	if ((px_lib_intr_getvalid(pxp->px_dip, f_p->px_fh_sysino, &enabled)
2160 	    != DDI_SUCCESS) || !enabled) {
2161 		DBG(DBG_IB, pxp->px_dip, "px_cb_intr_redist: CB not enabled, "
2162 		    "sysino(0x%x)\n", f_p->px_fh_sysino);
2163 		goto cb_done;
2164 	}
2165 
2166 	PX_INTR_DISABLE(pxp->px_dip, f_p->px_fh_sysino);
2167 
2168 	cb_p->cpuid = new_cpuid;
2169 	cb_p->sysino = f_p->px_fh_sysino;
2170 	PX_INTR_ENABLE(pxp->px_dip, cb_p->sysino, cb_p->cpuid);
2171 
2172 cb_done:
2173 	mutex_exit(&cb_p->cb_mutex);
2174 }
2175 
2176 /*
2177  * px_cb_add_intr() - Called from attach(9E) to create CB if not yet
2178  * created, to add CB interrupt vector always, but enable only once.
2179  */
2180 int
2181 px_cb_add_intr(px_fault_t *fault_p)
2182 {
2183 	px_t		*px_p = DIP_TO_STATE(fault_p->px_fh_dip);
2184 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2185 	px_cb_t		*cb_p = (px_cb_t *)px_get_cb(fault_p->px_fh_dip);
2186 	px_cb_list_t	*pxl, *pxl_new;
2187 	boolean_t	is_proxy = B_FALSE;
2188 
2189 	/* create cb */
2190 	if (cb_p == NULL) {
2191 		cb_p = kmem_zalloc(sizeof (px_cb_t), KM_SLEEP);
2192 
2193 		mutex_init(&cb_p->cb_mutex, NULL, MUTEX_DRIVER,
2194 		    (void *) ipltospl(FM_ERR_PIL));
2195 
2196 		cb_p->px_cb_func = px_cb_intr;
2197 		pxu_p->px_cb_p = cb_p;
2198 		px_set_cb(fault_p->px_fh_dip, (uint64_t)cb_p);
2199 
2200 		/* px_lib_dev_init allows only FIRE and OBERON */
2201 		px_err_reg_enable(
2202 		    (pxu_p->chip_type == PX_CHIP_FIRE) ?
2203 		    PX_ERR_JBC : PX_ERR_UBC,
2204 		    pxu_p->px_address[PX_REG_XBC]);
2205 	} else
2206 		pxu_p->px_cb_p = cb_p;
2207 
2208 	/* register cb interrupt */
2209 	VERIFY(add_ivintr(fault_p->px_fh_sysino, PX_ERR_PIL,
2210 	    (intrfunc)cb_p->px_cb_func, (caddr_t)cb_p, NULL, NULL) == 0);
2211 
2212 
2213 	/* update cb list */
2214 	mutex_enter(&cb_p->cb_mutex);
2215 	if (cb_p->pxl == NULL) {
2216 		is_proxy = B_TRUE;
2217 		pxl = kmem_zalloc(sizeof (px_cb_list_t), KM_SLEEP);
2218 		pxl->pxp = px_p;
2219 		cb_p->pxl = pxl;
2220 		cb_p->sysino = fault_p->px_fh_sysino;
2221 		cb_p->cpuid = intr_dist_cpuid();
2222 	} else {
2223 		/*
2224 		 * Find the last pxl or
2225 		 * stop short at encountering a redundent entry, or
2226 		 * both.
2227 		 */
2228 		pxl = cb_p->pxl;
2229 		for (; !(pxl->pxp == px_p) && pxl->next; pxl = pxl->next) {};
2230 		ASSERT(pxl->pxp != px_p);
2231 
2232 		/* add to linked list */
2233 		pxl_new = kmem_zalloc(sizeof (px_cb_list_t), KM_SLEEP);
2234 		pxl_new->pxp = px_p;
2235 		pxl->next = pxl_new;
2236 	}
2237 	cb_p->attachcnt++;
2238 	mutex_exit(&cb_p->cb_mutex);
2239 
2240 	if (is_proxy) {
2241 		/* add to interrupt redistribution list */
2242 		intr_dist_add(px_cb_intr_redist, cb_p);
2243 
2244 		/* enable cb hw interrupt */
2245 		px_ib_intr_enable(px_p, cb_p->cpuid, fault_p->px_intr_ino);
2246 	}
2247 
2248 	return (DDI_SUCCESS);
2249 }
2250 
2251 /*
2252  * px_cb_rem_intr() - Called from detach(9E) to remove its CB
2253  * interrupt vector, to shift proxy to the next available px,
2254  * or disable CB interrupt when itself is the last.
2255  */
2256 void
2257 px_cb_rem_intr(px_fault_t *fault_p)
2258 {
2259 	px_t		*px_p = DIP_TO_STATE(fault_p->px_fh_dip), *pxp;
2260 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2261 	px_cb_t		*cb_p = PX2CB(px_p);
2262 	px_cb_list_t	*pxl, *prev;
2263 	px_fault_t	*f_p;
2264 
2265 	ASSERT(cb_p->pxl);
2266 
2267 	/* find and remove this px, and update cb list */
2268 	mutex_enter(&cb_p->cb_mutex);
2269 
2270 	pxl = cb_p->pxl;
2271 	if (pxl->pxp == px_p) {
2272 		cb_p->pxl = pxl->next;
2273 	} else {
2274 		prev = pxl;
2275 		pxl = pxl->next;
2276 		for (; pxl && (pxl->pxp != px_p); prev = pxl, pxl = pxl->next) {
2277 		};
2278 		if (!pxl) {
2279 			cmn_err(CE_WARN, "px_cb_rem_intr: can't find px_p 0x%p "
2280 			    "in registered CB list.", (void *)px_p);
2281 			mutex_exit(&cb_p->cb_mutex);
2282 			return;
2283 		}
2284 		prev->next = pxl->next;
2285 	}
2286 	pxu_p->px_cb_p = NULL;
2287 	cb_p->attachcnt--;
2288 	kmem_free(pxl, sizeof (px_cb_list_t));
2289 	mutex_exit(&cb_p->cb_mutex);
2290 
2291 	/* disable cb hw interrupt */
2292 	if (fault_p->px_fh_sysino == cb_p->sysino)
2293 		px_ib_intr_disable(px_p->px_ib_p, fault_p->px_intr_ino,
2294 		    IB_INTR_WAIT);
2295 
2296 	/* if last px, remove from interrupt redistribution list */
2297 	if (cb_p->pxl == NULL)
2298 		intr_dist_rem(px_cb_intr_redist, cb_p);
2299 
2300 	/* de-register interrupt */
2301 	VERIFY(rem_ivintr(fault_p->px_fh_sysino, PX_ERR_PIL) == 0);
2302 
2303 	/* if not last px, assign next px to manage cb */
2304 	mutex_enter(&cb_p->cb_mutex);
2305 	if (cb_p->pxl) {
2306 		if (fault_p->px_fh_sysino == cb_p->sysino) {
2307 			pxp = cb_p->pxl->pxp;
2308 			f_p = &pxp->px_cb_fault;
2309 			cb_p->sysino = f_p->px_fh_sysino;
2310 
2311 			PX_INTR_ENABLE(pxp->px_dip, cb_p->sysino, cb_p->cpuid);
2312 			(void) px_lib_intr_setstate(pxp->px_dip, cb_p->sysino,
2313 			    INTR_IDLE_STATE);
2314 		}
2315 		mutex_exit(&cb_p->cb_mutex);
2316 		return;
2317 	}
2318 
2319 	/* clean up after the last px */
2320 	mutex_exit(&cb_p->cb_mutex);
2321 
2322 	/* px_lib_dev_init allows only FIRE and OBERON */
2323 	px_err_reg_disable(
2324 	    (pxu_p->chip_type == PX_CHIP_FIRE) ? PX_ERR_JBC : PX_ERR_UBC,
2325 	    pxu_p->px_address[PX_REG_XBC]);
2326 
2327 	mutex_destroy(&cb_p->cb_mutex);
2328 	px_set_cb(fault_p->px_fh_dip, 0ull);
2329 	kmem_free(cb_p, sizeof (px_cb_t));
2330 }
2331 
2332 /*
2333  * px_cb_intr() - sun4u only,  CB interrupt dispatcher
2334  */
2335 uint_t
2336 px_cb_intr(caddr_t arg)
2337 {
2338 	px_cb_t		*cb_p = (px_cb_t *)arg;
2339 	px_t		*pxp;
2340 	px_fault_t	*f_p;
2341 	int		ret;
2342 
2343 	mutex_enter(&cb_p->cb_mutex);
2344 
2345 	if (!cb_p->pxl) {
2346 		mutex_exit(&cb_p->cb_mutex);
2347 		return (DDI_INTR_UNCLAIMED);
2348 	}
2349 
2350 	pxp = cb_p->pxl->pxp;
2351 	f_p = &pxp->px_cb_fault;
2352 
2353 	ret = f_p->px_err_func((caddr_t)f_p);
2354 
2355 	mutex_exit(&cb_p->cb_mutex);
2356 	return (ret);
2357 }
2358 
2359 #ifdef	FMA
2360 void
2361 px_fill_rc_status(px_fault_t *px_fault_p, pciex_rc_error_regs_t *rc_status)
2362 {
2363 	/* populate the rc_status by reading the registers - TBD */
2364 }
2365 #endif /* FMA */
2366 
2367 /*
2368  * cpr callback
2369  *
2370  * disable fabric error msg interrupt prior to suspending
2371  * all device drivers; re-enable fabric error msg interrupt
2372  * after all devices are resumed.
2373  */
2374 static boolean_t
2375 px_cpr_callb(void *arg, int code)
2376 {
2377 	px_t		*px_p = (px_t *)arg;
2378 	px_ib_t		*ib_p = px_p->px_ib_p;
2379 	px_pec_t	*pec_p = px_p->px_pec_p;
2380 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2381 	caddr_t		csr_base;
2382 	devino_t	ce_ino, nf_ino, f_ino;
2383 	px_ino_t	*ce_ino_p, *nf_ino_p, *f_ino_p;
2384 	uint64_t	imu_log_enable, imu_intr_enable;
2385 	uint64_t	imu_log_mask, imu_intr_mask;
2386 
2387 	ce_ino = px_msiqid_to_devino(px_p, pec_p->pec_corr_msg_msiq_id);
2388 	nf_ino = px_msiqid_to_devino(px_p, pec_p->pec_non_fatal_msg_msiq_id);
2389 	f_ino = px_msiqid_to_devino(px_p, pec_p->pec_fatal_msg_msiq_id);
2390 	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2391 
2392 	imu_log_enable = CSR_XR(csr_base, IMU_ERROR_LOG_ENABLE);
2393 	imu_intr_enable = CSR_XR(csr_base, IMU_INTERRUPT_ENABLE);
2394 
2395 	imu_log_mask = BITMASK(IMU_ERROR_LOG_ENABLE_FATAL_MES_NOT_EN_LOG_EN) |
2396 	    BITMASK(IMU_ERROR_LOG_ENABLE_NONFATAL_MES_NOT_EN_LOG_EN) |
2397 	    BITMASK(IMU_ERROR_LOG_ENABLE_COR_MES_NOT_EN_LOG_EN);
2398 
2399 	imu_intr_mask =
2400 	    BITMASK(IMU_INTERRUPT_ENABLE_FATAL_MES_NOT_EN_S_INT_EN) |
2401 	    BITMASK(IMU_INTERRUPT_ENABLE_NONFATAL_MES_NOT_EN_S_INT_EN) |
2402 	    BITMASK(IMU_INTERRUPT_ENABLE_COR_MES_NOT_EN_S_INT_EN) |
2403 	    BITMASK(IMU_INTERRUPT_ENABLE_FATAL_MES_NOT_EN_P_INT_EN) |
2404 	    BITMASK(IMU_INTERRUPT_ENABLE_NONFATAL_MES_NOT_EN_P_INT_EN) |
2405 	    BITMASK(IMU_INTERRUPT_ENABLE_COR_MES_NOT_EN_P_INT_EN);
2406 
2407 	switch (code) {
2408 	case CB_CODE_CPR_CHKPT:
2409 		/* disable imu rbne on corr/nonfatal/fatal errors */
2410 		CSR_XS(csr_base, IMU_ERROR_LOG_ENABLE,
2411 		    imu_log_enable & (~imu_log_mask));
2412 
2413 		CSR_XS(csr_base, IMU_INTERRUPT_ENABLE,
2414 		    imu_intr_enable & (~imu_intr_mask));
2415 
2416 		/* disable CORR intr mapping */
2417 		px_ib_intr_disable(ib_p, ce_ino, IB_INTR_NOWAIT);
2418 
2419 		/* disable NON FATAL intr mapping */
2420 		px_ib_intr_disable(ib_p, nf_ino, IB_INTR_NOWAIT);
2421 
2422 		/* disable FATAL intr mapping */
2423 		px_ib_intr_disable(ib_p, f_ino, IB_INTR_NOWAIT);
2424 
2425 		break;
2426 
2427 	case CB_CODE_CPR_RESUME:
2428 		pxu_p->cpr_flag = PX_NOT_CPR;
2429 		mutex_enter(&ib_p->ib_ino_lst_mutex);
2430 
2431 		ce_ino_p = px_ib_locate_ino(ib_p, ce_ino);
2432 		nf_ino_p = px_ib_locate_ino(ib_p, nf_ino);
2433 		f_ino_p = px_ib_locate_ino(ib_p, f_ino);
2434 
2435 		/* enable CORR intr mapping */
2436 		if (ce_ino_p)
2437 			px_ib_intr_enable(px_p, ce_ino_p->ino_cpuid, ce_ino);
2438 		else
2439 			cmn_err(CE_WARN, "px_cpr_callb: RESUME unable to "
2440 			    "reenable PCIe Correctable msg intr.\n");
2441 
2442 		/* enable NON FATAL intr mapping */
2443 		if (nf_ino_p)
2444 			px_ib_intr_enable(px_p, nf_ino_p->ino_cpuid, nf_ino);
2445 		else
2446 			cmn_err(CE_WARN, "px_cpr_callb: RESUME unable to "
2447 			    "reenable PCIe Non Fatal msg intr.\n");
2448 
2449 		/* enable FATAL intr mapping */
2450 		if (f_ino_p)
2451 			px_ib_intr_enable(px_p, f_ino_p->ino_cpuid, f_ino);
2452 		else
2453 			cmn_err(CE_WARN, "px_cpr_callb: RESUME unable to "
2454 			    "reenable PCIe Fatal msg intr.\n");
2455 
2456 		mutex_exit(&ib_p->ib_ino_lst_mutex);
2457 
2458 		/* enable corr/nonfatal/fatal not enable error */
2459 		CSR_XS(csr_base, IMU_ERROR_LOG_ENABLE, (imu_log_enable |
2460 		    (imu_log_mask & px_imu_log_mask)));
2461 		CSR_XS(csr_base, IMU_INTERRUPT_ENABLE, (imu_intr_enable |
2462 		    (imu_intr_mask & px_imu_intr_mask)));
2463 
2464 		break;
2465 	}
2466 
2467 	return (B_TRUE);
2468 }
2469 
2470 uint64_t
2471 px_get_rng_parent_hi_mask(px_t *px_p)
2472 {
2473 	pxu_t *pxu_p = (pxu_t *)px_p->px_plat_p;
2474 	uint64_t mask;
2475 
2476 	switch (PX_CHIP_TYPE(pxu_p)) {
2477 	case PX_CHIP_OBERON:
2478 		mask = OBERON_RANGE_PROP_MASK;
2479 		break;
2480 	case PX_CHIP_FIRE:
2481 		mask = PX_RANGE_PROP_MASK;
2482 		break;
2483 	default:
2484 		mask = PX_RANGE_PROP_MASK;
2485 	}
2486 
2487 	return (mask);
2488 }
2489 
2490 /*
2491  * fetch chip's range propery's value
2492  */
2493 uint64_t
2494 px_get_range_prop(px_t *px_p, pci_ranges_t *rp, int bank)
2495 {
2496 	uint64_t mask, range_prop;
2497 
2498 	mask = px_get_rng_parent_hi_mask(px_p);
2499 	range_prop = (((uint64_t)(rp[bank].parent_high & mask)) << 32) |
2500 	    rp[bank].parent_low;
2501 
2502 	return (range_prop);
2503 }
2504 
2505 /*
2506  * fetch the config space base addr of the root complex
2507  * note this depends on px structure being initialized
2508  */
2509 uint64_t
2510 px_lib_get_cfgacc_base(dev_info_t *dip)
2511 {
2512 	int		instance = DIP_TO_INST(dip);
2513 	px_t		*px_p = INST_TO_STATE(instance);
2514 	pci_ranges_t	*rp = px_p->px_ranges_p;
2515 	int		bank = PCI_REG_ADDR_G(PCI_ADDR_CONFIG);
2516 
2517 	/* Get Fire's Physical Base Address */
2518 	return (px_get_range_prop(px_p, rp, bank));
2519 }
2520 
2521 /*
2522  * add cpr callback
2523  */
2524 void
2525 px_cpr_add_callb(px_t *px_p)
2526 {
2527 	px_p->px_cprcb_id = callb_add(px_cpr_callb, (void *)px_p,
2528 	    CB_CL_CPR_POST_USER, "px_cpr");
2529 }
2530 
2531 /*
2532  * remove cpr callback
2533  */
2534 void
2535 px_cpr_rem_callb(px_t *px_p)
2536 {
2537 	(void) callb_delete(px_p->px_cprcb_id);
2538 }
2539 
2540 /*ARGSUSED*/
2541 static uint_t
2542 px_hp_intr(caddr_t arg1, caddr_t arg2)
2543 {
2544 	px_t		*px_p = (px_t *)arg1;
2545 	pxu_t		*pxu_p = (pxu_t *)px_p->px_plat_p;
2546 	int		rval;
2547 
2548 	rval = pcie_intr(px_p->px_dip);
2549 
2550 #ifdef  DEBUG
2551 	if (rval == DDI_INTR_UNCLAIMED)
2552 		cmn_err(CE_WARN, "%s%d: UNCLAIMED intr\n",
2553 		    ddi_driver_name(px_p->px_dip),
2554 		    ddi_get_instance(px_p->px_dip));
2555 #endif
2556 
2557 	/* Set the interrupt state to idle */
2558 	if (px_lib_intr_setstate(px_p->px_dip,
2559 	    pxu_p->hp_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
2560 		return (DDI_INTR_UNCLAIMED);
2561 
2562 	return (rval);
2563 }
2564 
2565 int
2566 px_lib_hotplug_init(dev_info_t *dip, void *arg)
2567 {
2568 	px_t	*px_p = DIP_TO_STATE(dip);
2569 	pxu_t 	*pxu_p = (pxu_t *)px_p->px_plat_p;
2570 	uint64_t ret;
2571 
2572 	if (ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2573 	    "hotplug-capable") == 0)
2574 		return (DDI_FAILURE);
2575 
2576 	if ((ret = hvio_hotplug_init(dip, arg)) == DDI_SUCCESS) {
2577 		if (px_lib_intr_devino_to_sysino(px_p->px_dip,
2578 		    px_p->px_inos[PX_INTR_HOTPLUG], &pxu_p->hp_sysino) !=
2579 		    DDI_SUCCESS) {
2580 #ifdef	DEBUG
2581 			cmn_err(CE_WARN, "%s%d: devino_to_sysino fails\n",
2582 			    ddi_driver_name(px_p->px_dip),
2583 			    ddi_get_instance(px_p->px_dip));
2584 #endif
2585 			return (DDI_FAILURE);
2586 		}
2587 
2588 		VERIFY(add_ivintr(pxu_p->hp_sysino, PCIE_INTR_PRI,
2589 		    (intrfunc)px_hp_intr, (caddr_t)px_p, NULL, NULL) == 0);
2590 
2591 		px_ib_intr_enable(px_p, intr_dist_cpuid(),
2592 		    px_p->px_inos[PX_INTR_HOTPLUG]);
2593 	}
2594 
2595 	return (ret);
2596 }
2597 
2598 void
2599 px_lib_hotplug_uninit(dev_info_t *dip)
2600 {
2601 	if (hvio_hotplug_uninit(dip) == DDI_SUCCESS) {
2602 		px_t	*px_p = DIP_TO_STATE(dip);
2603 		pxu_t 	*pxu_p = (pxu_t *)px_p->px_plat_p;
2604 
2605 		px_ib_intr_disable(px_p->px_ib_p,
2606 		    px_p->px_inos[PX_INTR_HOTPLUG], IB_INTR_WAIT);
2607 
2608 		VERIFY(rem_ivintr(pxu_p->hp_sysino, PCIE_INTR_PRI) == 0);
2609 	}
2610 }
2611 
2612 /*
2613  * px_hp_intr_redist() - sun4u only, HP interrupt redistribution
2614  */
2615 void
2616 px_hp_intr_redist(px_t *px_p)
2617 {
2618 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(px_p->px_dip);
2619 
2620 	if (px_p && PCIE_IS_PCIE_HOTPLUG_ENABLED(bus_p)) {
2621 		px_ib_intr_dist_en(px_p->px_dip, intr_dist_cpuid(),
2622 		    px_p->px_inos[PX_INTR_HOTPLUG], B_FALSE);
2623 	}
2624 }
2625 
2626 boolean_t
2627 px_lib_is_in_drain_state(px_t *px_p)
2628 {
2629 	pxu_t 	*pxu_p = (pxu_t *)px_p->px_plat_p;
2630 	caddr_t csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2631 	uint64_t drain_status;
2632 
2633 	if (PX_CHIP_TYPE(pxu_p) == PX_CHIP_OBERON) {
2634 		drain_status = CSR_BR(csr_base, DRAIN_CONTROL_STATUS, DRAIN);
2635 	} else {
2636 		drain_status = CSR_BR(csr_base, TLU_STATUS, DRAIN);
2637 	}
2638 
2639 	return (drain_status);
2640 }
2641 
2642 pcie_req_id_t
2643 px_lib_get_bdf(px_t *px_p)
2644 {
2645 	pxu_t 	*pxu_p = (pxu_t *)px_p->px_plat_p;
2646 	caddr_t csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2647 	pcie_req_id_t bdf;
2648 
2649 	bdf = CSR_BR(csr_base, DMC_PCI_EXPRESS_CONFIGURATION, REQ_ID);
2650 
2651 	return (bdf);
2652 }
2653 
2654 /*ARGSUSED*/
2655 int
2656 px_lib_get_root_complex_mps(px_t *px_p, dev_info_t *dip, int *mps)
2657 {
2658 	pxu_t	*pxu_p;
2659 	caddr_t csr_base;
2660 
2661 	pxu_p = (pxu_t *)px_p->px_plat_p;
2662 
2663 	if (pxu_p == NULL)
2664 		return (DDI_FAILURE);
2665 
2666 	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2667 
2668 
2669 	*mps = CSR_XR(csr_base, TLU_DEVICE_CAPABILITIES) &
2670 	    TLU_DEVICE_CAPABILITIES_MPS_MASK;
2671 
2672 	return (DDI_SUCCESS);
2673 }
2674 
2675 /*ARGSUSED*/
2676 int
2677 px_lib_set_root_complex_mps(px_t *px_p,  dev_info_t *dip, int mps)
2678 {
2679 	pxu_t	*pxu_p;
2680 	caddr_t csr_base;
2681 	uint64_t dev_ctrl;
2682 	int link_width, val;
2683 	px_chip_type_t chip_type = px_identity_init(px_p);
2684 
2685 	pxu_p = (pxu_t *)px_p->px_plat_p;
2686 
2687 	if (pxu_p == NULL)
2688 		return (DDI_FAILURE);
2689 
2690 	csr_base = (caddr_t)pxu_p->px_address[PX_REG_CSR];
2691 
2692 	dev_ctrl = CSR_XR(csr_base, TLU_DEVICE_CONTROL);
2693 	dev_ctrl |= (mps << TLU_DEVICE_CONTROL_MPS);
2694 
2695 	CSR_XS(csr_base, TLU_DEVICE_CONTROL, dev_ctrl);
2696 
2697 	link_width = CSR_FR(csr_base, TLU_LINK_STATUS, WIDTH);
2698 
2699 	/*
2700 	 * Convert link_width to match timer array configuration.
2701 	 */
2702 	switch (link_width) {
2703 	case 1:
2704 		link_width = 0;
2705 		break;
2706 	case 4:
2707 		link_width = 1;
2708 		break;
2709 	case 8:
2710 		link_width = 2;
2711 		break;
2712 	case 16:
2713 		link_width = 3;
2714 		break;
2715 	default:
2716 		link_width = 0;
2717 	}
2718 
2719 	val = px_replay_timer_table[mps][link_width];
2720 	CSR_XS(csr_base, LPU_TXLINK_REPLAY_TIMER_THRESHOLD, val);
2721 
2722 	if (chip_type == PX_CHIP_OBERON)
2723 		return (DDI_SUCCESS);
2724 
2725 	val = px_acknak_timer_table[mps][link_width];
2726 	CSR_XS(csr_base, LPU_TXLINK_FREQUENT_NAK_LATENCY_TIMER_THRESHOLD, val);
2727 
2728 	return (DDI_SUCCESS);
2729 }
2730 
2731 /*ARGSUSED*/
2732 int
2733 px_lib_fabric_sync(dev_info_t *dip)
2734 {
2735 	/* an no-op on sun4u platform */
2736 	return (DDI_SUCCESS);
2737 }
2738