1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * sun4v Fire Error Handling 31 */ 32 33 #include <sys/types.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/fm/protocol.h> 37 #include <sys/fm/util.h> 38 #include <sys/membar.h> 39 #include "px_obj.h" 40 #include "px_err.h" 41 42 static uint_t px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt); 43 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, 44 px_rc_err_t *epkt, int caller); 45 46 static int px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 47 px_rc_err_t *epkt, int caller); 48 static int px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 49 px_rc_err_t *epkt, int caller); 50 static int px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 51 px_rc_err_t *epkt, int caller); 52 53 /* 54 * px_err_cb_intr: 55 * Interrupt handler for the Host Bus Block. 56 */ 57 uint_t 58 px_err_cb_intr(caddr_t arg) 59 { 60 px_fault_t *fault_p = (px_fault_t *)arg; 61 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 62 63 if (epkt != NULL) { 64 return (px_err_common_intr(fault_p, epkt)); 65 } 66 67 return (DDI_INTR_UNCLAIMED); 68 } 69 70 /* 71 * px_err_dmc_pec_intr: 72 * Interrupt handler for the DMC/PEC block. 73 */ 74 uint_t 75 px_err_dmc_pec_intr(caddr_t arg) 76 { 77 px_fault_t *fault_p = (px_fault_t *)arg; 78 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 79 80 if (epkt != NULL) { 81 return (px_err_common_intr(fault_p, epkt)); 82 } 83 84 return (DDI_INTR_UNCLAIMED); 85 } 86 87 /* 88 * px_err_handle: 89 * Common function called by trap, mondo and fabric intr. 90 * This function is more meaningful in sun4u implementation. Kept 91 * to mirror sun4u call stack. 92 * o check for safe access 93 * 94 * @param px_p leaf in which to check access 95 * @param derr fm err data structure to be updated 96 * @param caller PX_TRAP_CALL | PX_INTR_CALL 97 * @param chkjbc whether to handle hostbus registers (ignored) 98 * @return err PX_OK | PX_NONFATAL | 99 * PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL 100 */ 101 /* ARGSUSED */ 102 int 103 px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, 104 boolean_t chkxbc) 105 { 106 /* check for safe access */ 107 px_err_safeacc_check(px_p, derr); 108 109 return (DDI_FM_OK); 110 } 111 112 /* 113 * px_err_common_intr: 114 * Interrupt handler for the JBC/DMC/PEC block. 115 * o lock 116 * o create derr 117 * o check safe access 118 * o px_err_check_severiy(epkt) 119 * o dispatch 120 * o Idle intr state 121 * o unlock 122 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 123 */ 124 static uint_t 125 px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt) 126 { 127 px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); 128 dev_info_t *rpdip = px_p->px_dip; 129 px_cb_t *cb_p = px_p->px_cb_p; 130 int err, ret; 131 ddi_fm_error_t derr; 132 133 mutex_enter(&cb_p->xbc_fm_mutex); 134 135 /* Create the derr */ 136 bzero(&derr, sizeof (ddi_fm_error_t)); 137 derr.fme_version = DDI_FME_VERSION; 138 derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1); 139 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 140 141 /* Basically check for safe access */ 142 (void) px_err_handle(px_p, &derr, PX_INTR_CALL, B_FALSE); 143 144 /* Check the severity of this error */ 145 err = px_err_check_severity(px_p, &derr, epkt, PX_INTR_CALL); 146 147 /* check for error severity */ 148 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 149 150 /* Set the intr state to idle for the leaf that received the mondo */ 151 if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, 152 INTR_IDLE_STATE) != DDI_SUCCESS) { 153 mutex_exit(&cb_p->xbc_fm_mutex); 154 return (DDI_INTR_UNCLAIMED); 155 } 156 157 mutex_exit(&cb_p->xbc_fm_mutex); 158 159 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) 160 PX_FM_PANIC("Fatal System Bus Error has occurred\n"); 161 162 return (DDI_INTR_CLAIMED); 163 } 164 165 /* 166 * px_err_check_severity: 167 * Check the severity of the fire error based the epkt received 168 * 169 * @param px_p leaf in which to take the snap shot. 170 * @param derr fm err in which the ereport is to be based on 171 * @param epkt epkt recevied from HV 172 */ 173 static int 174 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, 175 int caller) 176 { 177 px_pec_t *pec_p = px_p->px_pec_p; 178 dev_info_t *dip = px_p->px_dip; 179 int err = 0; 180 181 /* Cautious access error handling */ 182 if (derr->fme_flag == DDI_FM_ERR_EXPECTED) { 183 if (caller == PX_TRAP_CALL) { 184 /* 185 * for ddi_caut_get treat all events as nonfatal 186 * The trampoline will set err_ena = 0, 187 * err_status = NONFATAL. 188 */ 189 derr->fme_status = DDI_FM_NONFATAL; 190 } else { 191 /* 192 * For ddi_caut_put treat all events as nonfatal. Here 193 * we have the handle and can call ndi_fm_acc_err_set(). 194 */ 195 derr->fme_status = DDI_FM_NONFATAL; 196 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 197 } 198 } 199 200 switch (epkt->rc_descr.block) { 201 case BLOCK_HOSTBUS: 202 err = px_cb_check_errors(dip, derr, epkt, caller); 203 break; 204 case BLOCK_MMU: 205 err = px_mmu_check_errors(dip, derr, epkt, caller); 206 break; 207 case BLOCK_INTR: 208 err = PX_NONFATAL; 209 break; 210 case BLOCK_PCIE: 211 err = px_pcie_check_errors(dip, derr, epkt, caller); 212 break; 213 default: 214 err = PX_ERR_UNKNOWN; 215 } 216 217 return (err); 218 } 219 220 /* ARGSUSED */ 221 static int 222 px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 223 px_rc_err_t *epkt, int caller) 224 { 225 int fme_flag = derr->fme_flag; 226 boolean_t is_safeacc; 227 int ret, err = 0; 228 229 is_safeacc = (fme_flag == DDI_FM_ERR_EXPECTED) || 230 (fme_flag == DDI_FM_ERR_PEEK) || 231 (fme_flag == DDI_FM_ERR_POKE); 232 233 /* block/op/phase/cond/dir/flag... */ 234 switch (epkt->rc_descr.op) { 235 case OP_PIO: 236 err = PX_NONFATAL; 237 /* check handle if affected memory address is captured */ 238 if (epkt->rc_descr.M != 0) { 239 ret = px_handle_lookup(dip, ACC_HANDLE, 240 derr->fme_ena, (void *)epkt->addr); 241 } 242 if (ret == DDI_FM_FATAL) 243 err |= PX_FATAL_GOS; 244 break; 245 246 case OP_DMA: 247 switch (epkt->rc_descr.phase) { 248 case PH_ADDR: 249 err = PX_FATAL_GOS; 250 break; 251 case PH_DATA: 252 if (epkt->rc_descr.cond == CND_UE) { 253 err = PX_FATAL_GOS; 254 break; 255 } 256 257 err = PX_NONFATAL; 258 if (epkt->rc_descr.M == 1) { 259 ret = px_handle_lookup(dip, DMA_HANDLE, 260 derr->fme_ena, (void *)epkt->addr); 261 if (ret == DDI_FM_FATAL) 262 err |= PX_FATAL_GOS; 263 } 264 break; 265 default: 266 DBG(DBG_ERR_INTR, dip, "Unexpected epkt"); 267 err = PX_FATAL_GOS; 268 break; 269 } 270 break; 271 case OP_UNKNOWN: 272 err = PX_NONFATAL; 273 if (epkt->rc_descr.M == 1) { 274 int ret1, ret2; 275 276 ret1 = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, 277 (void *)epkt->addr); 278 ret2 = px_handle_lookup(dip, ACC_HANDLE, derr->fme_ena, 279 (void *)epkt->addr); 280 281 if (ret1 == DDI_FM_FATAL || ret2 == DDI_FM_FATAL) 282 err |= PX_FATAL_GOS; 283 } 284 break; 285 286 case OP_RESERVED: 287 default: 288 DBG(DBG_ERR_INTR, NULL, "Unrecognized JBC error."); 289 err = PX_FATAL_GOS; 290 break; 291 } 292 293 /* 294 * For protected safe access, consider PX_FATAL_GOS as the only 295 * exception for px to take immediate panic, else, treat errors 296 * as nonfatal. 297 */ 298 if (is_safeacc) { 299 if (err & PX_FATAL_GOS) 300 err = PX_FATAL_GOS; 301 else 302 err = PX_NONFATAL; 303 } 304 305 return (err); 306 } 307 308 /* ARGSUSED */ 309 static int 310 px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 311 px_rc_err_t *epkt, int caller) 312 { 313 int ret, err = 0; 314 315 switch (epkt->rc_descr.op) { 316 case OP_BYPASS: /* nonfatal */ 317 case OP_XLAT: /* nonfatal, stuck-fatal, fatal-reset */ 318 case OP_TBW: /* nonfatal, stuck-fatal */ 319 err = PX_NONFATAL; 320 break; 321 default: 322 err = PX_ERR_UNKNOWN; 323 break; 324 } 325 326 if ((epkt->rc_descr.D != 0) || (epkt->rc_descr.M != 0)) { 327 ret = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, 328 (void *)epkt->addr); 329 if (ret == DDI_FM_FATAL) 330 err |= PX_FATAL_GOS; 331 else 332 err |= PX_NONFATAL; 333 } else 334 err |= PX_NONFATAL; 335 336 return (err); 337 } 338 339 /* ARGSUSED */ 340 static int 341 px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 342 px_rc_err_t *epkt, int caller) 343 { 344 int ret = PX_NONFATAL; 345 px_pec_err_t *pec = (px_pec_err_t *)epkt; 346 347 switch (pec->pec_descr.dir) { 348 case DIR_INGRESS: 349 case DIR_EGRESS: 350 case DIR_LINK: 351 ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, 352 px_fabric_die_rc_ue, px_fabric_die_rc_ue_gos); 353 ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, 354 px_fabric_die_rc_ce, px_fabric_die_rc_ce_gos); 355 break; 356 default: 357 ret = PX_ERR_UNKNOWN; 358 break; 359 } 360 361 return (ret); 362 } 363