1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * sun4v Fire Error Handling 30 */ 31 32 #include <sys/types.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/fm/protocol.h> 36 #include <sys/fm/util.h> 37 #include <sys/membar.h> 38 #include "px_obj.h" 39 #include "px_err.h" 40 41 static uint_t px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt); 42 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, 43 px_rc_err_t *epkt, int caller); 44 45 static int px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 46 px_rc_err_t *epkt, int caller); 47 static int px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 48 px_rc_err_t *epkt, int caller); 49 static int px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 50 px_rc_err_t *epkt, int caller); 51 52 /* 53 * px_err_cb_intr: 54 * Interrupt handler for the Host Bus Block. 55 */ 56 uint_t 57 px_err_cb_intr(caddr_t arg) 58 { 59 px_fault_t *fault_p = (px_fault_t *)arg; 60 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 61 62 if (epkt != NULL) { 63 return (px_err_common_intr(fault_p, epkt)); 64 } 65 66 return (DDI_INTR_UNCLAIMED); 67 } 68 69 /* 70 * px_err_dmc_pec_intr: 71 * Interrupt handler for the DMC/PEC block. 72 */ 73 uint_t 74 px_err_dmc_pec_intr(caddr_t arg) 75 { 76 px_fault_t *fault_p = (px_fault_t *)arg; 77 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 78 79 if (epkt != NULL) { 80 return (px_err_common_intr(fault_p, epkt)); 81 } 82 83 return (DDI_INTR_UNCLAIMED); 84 } 85 86 /* 87 * px_err_handle: 88 * Common function called by trap, mondo and fabric intr. 89 * This function is more meaningful in sun4u implementation. Kept 90 * to mirror sun4u call stack. 91 * o check for safe access 92 * 93 * @param px_p leaf in which to check access 94 * @param derr fm err data structure to be updated 95 * @param caller PX_TRAP_CALL | PX_INTR_CALL 96 * @param chkjbc whether to handle hostbus registers (ignored) 97 * @return err PX_OK | PX_NONFATAL | 98 * PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL 99 */ 100 /* ARGSUSED */ 101 int 102 px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, 103 boolean_t chkxbc) 104 { 105 /* check for safe access */ 106 px_err_safeacc_check(px_p, derr); 107 108 return (DDI_FM_OK); 109 } 110 111 /* 112 * px_err_common_intr: 113 * Interrupt handler for the JBC/DMC/PEC block. 114 * o lock 115 * o create derr 116 * o check safe access 117 * o px_err_check_severiy(epkt) 118 * o dispatch 119 * o Idle intr state 120 * o unlock 121 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 122 */ 123 static uint_t 124 px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt) 125 { 126 px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); 127 dev_info_t *rpdip = px_p->px_dip; 128 int err, ret; 129 ddi_fm_error_t derr; 130 131 mutex_enter(&px_p->px_fm_mutex); 132 133 /* Create the derr */ 134 bzero(&derr, sizeof (ddi_fm_error_t)); 135 derr.fme_version = DDI_FME_VERSION; 136 derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1); 137 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 138 139 /* Basically check for safe access */ 140 (void) px_err_handle(px_p, &derr, PX_INTR_CALL, B_FALSE); 141 142 /* Check the severity of this error */ 143 err = px_err_check_severity(px_p, &derr, epkt, PX_INTR_CALL); 144 145 /* check for error severity */ 146 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 147 148 /* Set the intr state to idle for the leaf that received the mondo */ 149 if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, 150 INTR_IDLE_STATE) != DDI_SUCCESS) { 151 mutex_exit(&px_p->px_fm_mutex); 152 return (DDI_INTR_UNCLAIMED); 153 } 154 155 mutex_exit(&px_p->px_fm_mutex); 156 157 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) 158 PX_FM_PANIC("Fatal System Bus Error has occurred\n"); 159 160 return (DDI_INTR_CLAIMED); 161 } 162 163 /* 164 * px_err_check_severity: 165 * Check the severity of the fire error based the epkt received 166 * 167 * @param px_p leaf in which to take the snap shot. 168 * @param derr fm err in which the ereport is to be based on 169 * @param epkt epkt recevied from HV 170 */ 171 static int 172 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, 173 int caller) 174 { 175 px_pec_t *pec_p = px_p->px_pec_p; 176 dev_info_t *dip = px_p->px_dip; 177 int err = 0; 178 179 /* Cautious access error handling */ 180 if (derr->fme_flag == DDI_FM_ERR_EXPECTED) { 181 if (caller == PX_TRAP_CALL) { 182 /* 183 * for ddi_caut_get treat all events as nonfatal 184 * The trampoline will set err_ena = 0, 185 * err_status = NONFATAL. 186 */ 187 derr->fme_status = DDI_FM_NONFATAL; 188 } else { 189 /* 190 * For ddi_caut_put treat all events as nonfatal. Here 191 * we have the handle and can call ndi_fm_acc_err_set(). 192 */ 193 derr->fme_status = DDI_FM_NONFATAL; 194 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 195 } 196 } 197 198 switch (epkt->rc_descr.block) { 199 case BLOCK_HOSTBUS: 200 err = px_cb_check_errors(dip, derr, epkt, caller); 201 break; 202 case BLOCK_MMU: 203 err = px_mmu_check_errors(dip, derr, epkt, caller); 204 break; 205 case BLOCK_INTR: 206 err = PX_NONFATAL; 207 break; 208 case BLOCK_PCIE: 209 err = px_pcie_check_errors(dip, derr, epkt, caller); 210 break; 211 default: 212 err = PX_ERR_UNKNOWN; 213 } 214 215 return (err); 216 } 217 218 /* ARGSUSED */ 219 static int 220 px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 221 px_rc_err_t *epkt, int caller) 222 { 223 int fme_flag = derr->fme_flag; 224 boolean_t is_safeacc; 225 int ret, err = 0; 226 227 is_safeacc = (fme_flag == DDI_FM_ERR_EXPECTED) || 228 (fme_flag == DDI_FM_ERR_PEEK) || 229 (fme_flag == DDI_FM_ERR_POKE); 230 231 /* block/op/phase/cond/dir/flag... */ 232 switch (epkt->rc_descr.op) { 233 case OP_PIO: 234 err = PX_NONFATAL; 235 /* check handle if affected memory address is captured */ 236 if (epkt->rc_descr.M != 0) { 237 ret = px_handle_lookup(dip, ACC_HANDLE, 238 derr->fme_ena, (void *)epkt->addr); 239 } 240 if (ret == DDI_FM_FATAL) 241 err |= PX_FATAL_GOS; 242 break; 243 244 case OP_DMA: 245 switch (epkt->rc_descr.phase) { 246 case PH_ADDR: 247 err = PX_FATAL_GOS; 248 break; 249 case PH_DATA: 250 if (epkt->rc_descr.cond == CND_UE) { 251 err = PX_FATAL_GOS; 252 break; 253 } 254 255 err = PX_NONFATAL; 256 if (epkt->rc_descr.M == 1) { 257 ret = px_handle_lookup(dip, DMA_HANDLE, 258 derr->fme_ena, (void *)epkt->addr); 259 if (ret == DDI_FM_FATAL) 260 err |= PX_FATAL_GOS; 261 } 262 break; 263 default: 264 DBG(DBG_ERR_INTR, dip, "Unexpected epkt"); 265 err = PX_FATAL_GOS; 266 break; 267 } 268 break; 269 case OP_UNKNOWN: 270 err = PX_NONFATAL; 271 if ((epkt->rc_descr.cond == CND_UNMAP) || 272 (epkt->rc_descr.cond == CND_UE) || 273 (epkt->rc_descr.cond == CND_INT) || 274 (epkt->rc_descr.cond == CND_ILL)) 275 err |= PX_FATAL_GOS; 276 277 if (epkt->rc_descr.M == 1) { 278 int ret1, ret2; 279 280 ret1 = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, 281 (void *)epkt->addr); 282 ret2 = px_handle_lookup(dip, ACC_HANDLE, derr->fme_ena, 283 (void *)epkt->addr); 284 285 if (ret1 == DDI_FM_FATAL || ret2 == DDI_FM_FATAL) 286 err |= PX_FATAL_GOS; 287 } 288 break; 289 290 case OP_RESERVED: 291 default: 292 DBG(DBG_ERR_INTR, NULL, "Unrecognized JBC error."); 293 err = PX_FATAL_GOS; 294 break; 295 } 296 297 /* 298 * For protected safe access, consider PX_FATAL_GOS as the only 299 * exception for px to take immediate panic, else, treat errors 300 * as nonfatal. 301 */ 302 if (is_safeacc) { 303 if (err & PX_FATAL_GOS) 304 err = PX_FATAL_GOS; 305 else 306 err = PX_NONFATAL; 307 } 308 309 return (err); 310 } 311 312 /* ARGSUSED */ 313 static int 314 px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 315 px_rc_err_t *epkt, int caller) 316 { 317 int ret, err = 0; 318 319 switch (epkt->rc_descr.op) { 320 case OP_BYPASS: /* nonfatal */ 321 case OP_XLAT: /* nonfatal, stuck-fatal, fatal-reset */ 322 case OP_TBW: /* nonfatal, stuck-fatal */ 323 err = PX_NONFATAL; 324 break; 325 default: 326 err = PX_ERR_UNKNOWN; 327 break; 328 } 329 330 if ((epkt->rc_descr.D != 0) || (epkt->rc_descr.M != 0)) { 331 ret = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, 332 (void *)epkt->addr); 333 if (ret == DDI_FM_FATAL) 334 err |= PX_FATAL_GOS; 335 else 336 err |= PX_NONFATAL; 337 } else 338 err |= PX_NONFATAL; 339 340 return (err); 341 } 342 343 /* ARGSUSED */ 344 static int 345 px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 346 px_rc_err_t *epkt, int caller) 347 { 348 int ret = PX_NONFATAL; 349 px_pec_err_t *pec = (px_pec_err_t *)epkt; 350 351 switch (pec->pec_descr.dir) { 352 case DIR_INGRESS: 353 case DIR_EGRESS: 354 case DIR_LINK: 355 ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, 356 px_fabric_die_rc_ue, px_fabric_die_rc_ue_gos); 357 ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, 358 px_fabric_die_rc_ce, px_fabric_die_rc_ce_gos); 359 break; 360 default: 361 ret = PX_ERR_UNKNOWN; 362 break; 363 } 364 365 return (ret); 366 } 367