1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * sun4v Fire Error Handling 31 */ 32 33 #include <sys/types.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/fm/protocol.h> 37 #include <sys/fm/util.h> 38 #include <sys/membar.h> 39 #include "px_obj.h" 40 #include "px_err.h" 41 42 static uint_t px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt); 43 static int px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, 44 px_rc_err_t *epkt, int caller); 45 46 static int px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 47 px_rc_err_t *epkt, int caller); 48 static int px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 49 px_rc_err_t *epkt, int caller); 50 static int px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 51 px_rc_err_t *epkt, int caller); 52 53 /* 54 * px_err_cb_intr: 55 * Interrupt handler for the Host Bus Block. 56 */ 57 uint_t 58 px_err_cb_intr(caddr_t arg) 59 { 60 px_fault_t *fault_p = (px_fault_t *)arg; 61 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 62 63 if (epkt != NULL) { 64 return (px_err_common_intr(fault_p, epkt)); 65 } 66 67 return (DDI_INTR_UNCLAIMED); 68 } 69 70 /* 71 * px_err_dmc_pec_intr: 72 * Interrupt handler for the DMC/PEC block. 73 */ 74 uint_t 75 px_err_dmc_pec_intr(caddr_t arg) 76 { 77 px_fault_t *fault_p = (px_fault_t *)arg; 78 px_rc_err_t *epkt = (px_rc_err_t *)fault_p->px_intr_payload; 79 80 if (epkt != NULL) { 81 return (px_err_common_intr(fault_p, epkt)); 82 } 83 84 return (DDI_INTR_UNCLAIMED); 85 } 86 87 /* 88 * px_err_handle: 89 * Common function called by trap, mondo and fabric intr. 90 * This function is more meaningful in sun4u implementation. Kept 91 * to mirror sun4u call stack. 92 * o check for safe access 93 * 94 * @param px_p leaf in which to check access 95 * @param derr fm err data structure to be updated 96 * @param caller PX_TRAP_CALL | PX_INTR_CALL 97 * @param chkjbc whether to handle hostbus registers (ignored) 98 * @return err PX_OK | PX_NONFATAL | 99 * PX_FATAL_GOS | PX_FATAL_HW | PX_STUCK_FATAL 100 */ 101 /* ARGSUSED */ 102 int 103 px_err_handle(px_t *px_p, ddi_fm_error_t *derr, int caller, 104 boolean_t chkxbc) 105 { 106 /* check for safe access */ 107 px_err_safeacc_check(px_p, derr); 108 109 return (DDI_FM_OK); 110 } 111 112 /* 113 * px_err_common_intr: 114 * Interrupt handler for the JBC/DMC/PEC block. 115 * o lock 116 * o create derr 117 * o check safe access 118 * o px_err_check_severiy(epkt) 119 * o dispatch 120 * o Idle intr state 121 * o unlock 122 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 123 */ 124 static uint_t 125 px_err_common_intr(px_fault_t *fault_p, px_rc_err_t *epkt) 126 { 127 px_t *px_p = DIP_TO_STATE(fault_p->px_fh_dip); 128 dev_info_t *rpdip = px_p->px_dip; 129 px_cb_t *cb_p = px_p->px_cb_p; 130 int err, ret; 131 ddi_fm_error_t derr; 132 133 mutex_enter(&cb_p->xbc_fm_mutex); 134 135 /* Create the derr */ 136 bzero(&derr, sizeof (ddi_fm_error_t)); 137 derr.fme_version = DDI_FME_VERSION; 138 derr.fme_ena = fm_ena_generate(epkt->stick, FM_ENA_FMT1); 139 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 140 141 /* Basically check for safe access */ 142 (void) px_err_handle(px_p, &derr, PX_INTR_CALL, B_FALSE); 143 144 /* Check the severity of this error */ 145 err = px_err_check_severity(px_p, &derr, epkt, PX_INTR_CALL); 146 147 /* check for error severity */ 148 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 149 150 /* Set the intr state to idle for the leaf that received the mondo */ 151 if (px_lib_intr_setstate(rpdip, fault_p->px_fh_sysino, 152 INTR_IDLE_STATE) != DDI_SUCCESS) { 153 mutex_exit(&cb_p->xbc_fm_mutex); 154 return (DDI_INTR_UNCLAIMED); 155 } 156 157 mutex_exit(&cb_p->xbc_fm_mutex); 158 159 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) 160 fm_panic("Fatal System Bus Error has occurred\n"); 161 162 return (DDI_INTR_CLAIMED); 163 } 164 165 /* 166 * px_err_check_severity: 167 * Check the severity of the fire error based the epkt received 168 * 169 * @param px_p leaf in which to take the snap shot. 170 * @param derr fm err in which the ereport is to be based on 171 * @param epkt epkt recevied from HV 172 */ 173 static int 174 px_err_check_severity(px_t *px_p, ddi_fm_error_t *derr, px_rc_err_t *epkt, 175 int caller) 176 { 177 px_pec_t *pec_p = px_p->px_pec_p; 178 dev_info_t *dip = px_p->px_dip; 179 int err = 0; 180 181 /* Cautious access error handling */ 182 if (derr->fme_flag == DDI_FM_ERR_EXPECTED) { 183 if (caller == PX_TRAP_CALL) { 184 /* 185 * for ddi_caut_get treat all events as nonfatal 186 * The trampoline will set err_ena = 0, 187 * err_status = NONFATAL. 188 */ 189 derr->fme_status = DDI_FM_NONFATAL; 190 } else { 191 /* 192 * For ddi_caut_put treat all events as nonfatal. Here 193 * we have the handle and can call ndi_fm_acc_err_set(). 194 */ 195 derr->fme_status = DDI_FM_NONFATAL; 196 ndi_fm_acc_err_set(pec_p->pec_acc_hdl, derr); 197 } 198 } 199 200 switch (epkt->rc_descr.block) { 201 case BLOCK_HOSTBUS: 202 err = px_cb_check_errors(dip, derr, epkt, caller); 203 break; 204 case BLOCK_MMU: 205 err = px_mmu_check_errors(dip, derr, epkt, caller); 206 break; 207 case BLOCK_INTR: 208 err = PX_NONFATAL; 209 break; 210 case BLOCK_PCIE: 211 err = px_pcie_check_errors(dip, derr, epkt, caller); 212 break; 213 default: 214 err = PX_ERR_UNKNOWN; 215 } 216 217 return (err); 218 } 219 220 /* ARGSUSED */ 221 static int 222 px_cb_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 223 px_rc_err_t *epkt, int caller) 224 { 225 int fme_flag = derr->fme_flag; 226 boolean_t is_safeacc; 227 int ret, err = 0; 228 229 is_safeacc = (fme_flag == DDI_FM_ERR_EXPECTED) || 230 (fme_flag == DDI_FM_ERR_PEEK) || 231 (fme_flag == DDI_FM_ERR_POKE); 232 233 /* block/op/phase/cond/dir/flag... */ 234 switch (epkt->rc_descr.op) { 235 case OP_PIO: 236 err |= PX_NONFATAL; 237 238 /* check handle if affected memory address is captured */ 239 if (epkt->rc_descr.M != 0) { 240 ret = px_handle_lookup(dip, ACC_HANDLE, 241 derr->fme_ena, (void *)epkt->addr); 242 } 243 if (ret == DDI_FM_FATAL) 244 err |= PX_FATAL_SW; 245 break; 246 247 case OP_DMA: 248 switch (epkt->rc_descr.phase) { 249 case PH_ADDR: 250 err |= PX_FATAL_GOS; 251 break; 252 case PH_DATA: 253 if (epkt->rc_descr.cond == CND_UE) { 254 err |= PX_FATAL_GOS; 255 break; 256 } 257 258 err |= PX_NONFATAL; 259 if (epkt->rc_descr.M == 1) { 260 ret = px_handle_lookup(dip, DMA_HANDLE, 261 derr->fme_ena, (void *)epkt->addr); 262 if (ret == DDI_FM_FATAL) 263 err |= PX_FATAL_SW; 264 } 265 break; 266 default: 267 DBG(DBG_ERR_INTR, dip, "Unexpected epkt"); 268 err |= PX_ERR_UNKNOWN; 269 break; 270 } 271 break; 272 case OP_UNKNOWN: 273 err |= PX_NONFATAL; 274 if (epkt->rc_descr.M == 1) { 275 int ret1, ret2; 276 ret1 = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, 277 (void *)epkt->addr); 278 ret2 = px_handle_lookup(dip, ACC_HANDLE, derr->fme_ena, 279 (void *)epkt->addr); 280 if ((ret1 == DDI_FM_FATAL) || (ret2 == DDI_FM_FATAL)) 281 err |= PX_FATAL_SW; 282 } 283 break; 284 285 case OP_RESERVED: 286 default: 287 DBG(DBG_ERR_INTR, NULL, "Unrecognized JBC error."); 288 err |= PX_ERR_UNKNOWN; 289 break; 290 } 291 292 /* 293 * For protected safe access, consider PX_FATAL_GOS as the only 294 * exception for px to take immediate panic, else, treat errors 295 * as nonfatal. 296 */ 297 if (is_safeacc) { 298 if (err & PX_FATAL_GOS) 299 err = PX_FATAL_GOS; 300 else 301 err = PX_NONFATAL; 302 } 303 304 return (err); 305 } 306 307 /* ARGSUSED */ 308 static int 309 px_mmu_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 310 px_rc_err_t *epkt, int caller) 311 { 312 int ret, err = 0; 313 314 switch (epkt->rc_descr.op) { 315 case OP_BYPASS: /* nonfatal */ 316 case OP_XLAT: /* nonfatal, stuck-fatal, fatal-reset */ 317 case OP_TBW: /* nonfatal, stuck-fatal */ 318 err = PX_NONFATAL; 319 break; 320 321 default: 322 err = PX_ERR_UNKNOWN; 323 break; 324 } 325 326 if ((epkt->rc_descr.D != 0) || (epkt->rc_descr.M != 0)) { 327 ret = px_handle_lookup(dip, DMA_HANDLE, derr->fme_ena, 328 (void *)epkt->addr); 329 } 330 331 if (ret == DDI_FM_FATAL) 332 err = PX_FATAL_SW; 333 else if ((ret == DDI_FM_NONFATAL) && (err = PX_ERR_UNKNOWN)) 334 err = PX_NONFATAL; 335 336 return (err); 337 } 338 339 /* ARGSUSED */ 340 static int 341 px_pcie_check_errors(dev_info_t *dip, ddi_fm_error_t *derr, 342 px_rc_err_t *epkt, int caller) 343 { 344 int ret = PX_NONFATAL; 345 px_pec_err_t *pec = (px_pec_err_t *)epkt; 346 347 switch (pec->pec_descr.dir) { 348 case DIR_INGRESS: 349 case DIR_EGRESS: 350 case DIR_LINK: 351 ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, 352 px_fabric_die_rc_ue, px_fabric_die_rc_ue_gos); 353 ret |= PX_FABRIC_ERR_SEV(pec->ue_reg_status, 354 px_fabric_die_rc_ce, px_fabric_die_rc_ce_gos); 355 break; 356 default: 357 ret = PX_ERR_UNKNOWN; 358 break; 359 } 360 361 return (ret); 362 } 363