1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * PX Fault Management Architecture 31 */ 32 #include <sys/types.h> 33 #include <sys/sunndi.h> 34 #include <sys/sunddi.h> 35 #include <sys/fm/protocol.h> 36 #include <sys/fm/util.h> 37 #include <sys/membar.h> 38 #include "px_obj.h" 39 40 /* 41 * Initialize px FMA support 42 */ 43 int 44 px_fm_attach(px_t *px_p) 45 { 46 px_p->px_fm_cap = DDI_FM_EREPORT_CAPABLE | DDI_FM_ERRCB_CAPABLE | 47 DDI_FM_ACCCHK_CAPABLE | DDI_FM_DMACHK_CAPABLE; 48 49 /* 50 * check parents' capability 51 */ 52 ddi_fm_init(px_p->px_dip, &px_p->px_fm_cap, &px_p->px_fm_ibc); 53 54 /* 55 * parents need to be ereport and error handling capable 56 */ 57 ASSERT(px_p->px_fm_cap && 58 (DDI_FM_ERRCB_CAPABLE | DDI_FM_EREPORT_CAPABLE)); 59 60 /* 61 * register error callback in parent 62 */ 63 ddi_fm_handler_register(px_p->px_dip, px_fm_callback, px_p); 64 65 return (DDI_SUCCESS); 66 } 67 68 /* 69 * Deregister FMA 70 */ 71 void 72 px_fm_detach(px_t *px_p) 73 { 74 ddi_fm_handler_unregister(px_p->px_dip); 75 ddi_fm_fini(px_p->px_dip); 76 } 77 78 /* 79 * Function used to setup access functions depending on level of desired 80 * protection. 81 */ 82 void 83 px_fm_acc_setup(ddi_map_req_t *mp, dev_info_t *rdip) 84 { 85 uchar_t fflag; 86 ddi_acc_hdl_t *hp; 87 ddi_acc_impl_t *ap; 88 89 hp = mp->map_handlep; 90 ap = (ddi_acc_impl_t *)hp->ah_platform_private; 91 fflag = ap->ahi_common.ah_acc.devacc_attr_access; 92 93 if (mp->map_op == DDI_MO_MAP_LOCKED) { 94 ndi_fmc_insert(rdip, ACC_HANDLE, (void *)hp, NULL); 95 switch (fflag) { 96 case DDI_FLAGERR_ACC: 97 ap->ahi_get8 = i_ddi_prot_get8; 98 ap->ahi_get16 = i_ddi_prot_get16; 99 ap->ahi_get32 = i_ddi_prot_get32; 100 ap->ahi_get64 = i_ddi_prot_get64; 101 ap->ahi_put8 = i_ddi_prot_put8; 102 ap->ahi_put16 = i_ddi_prot_put16; 103 ap->ahi_put32 = i_ddi_prot_put32; 104 ap->ahi_put64 = i_ddi_prot_put64; 105 ap->ahi_rep_get8 = i_ddi_prot_rep_get8; 106 ap->ahi_rep_get16 = i_ddi_prot_rep_get16; 107 ap->ahi_rep_get32 = i_ddi_prot_rep_get32; 108 ap->ahi_rep_get64 = i_ddi_prot_rep_get64; 109 ap->ahi_rep_put8 = i_ddi_prot_rep_put8; 110 ap->ahi_rep_put16 = i_ddi_prot_rep_put16; 111 ap->ahi_rep_put32 = i_ddi_prot_rep_put32; 112 ap->ahi_rep_put64 = i_ddi_prot_rep_put64; 113 break; 114 case DDI_CAUTIOUS_ACC : 115 ap->ahi_get8 = i_ddi_caut_get8; 116 ap->ahi_get16 = i_ddi_caut_get16; 117 ap->ahi_get32 = i_ddi_caut_get32; 118 ap->ahi_get64 = i_ddi_caut_get64; 119 ap->ahi_put8 = i_ddi_caut_put8; 120 ap->ahi_put16 = i_ddi_caut_put16; 121 ap->ahi_put32 = i_ddi_caut_put32; 122 ap->ahi_put64 = i_ddi_caut_put64; 123 ap->ahi_rep_get8 = i_ddi_caut_rep_get8; 124 ap->ahi_rep_get16 = i_ddi_caut_rep_get16; 125 ap->ahi_rep_get32 = i_ddi_caut_rep_get32; 126 ap->ahi_rep_get64 = i_ddi_caut_rep_get64; 127 ap->ahi_rep_put8 = i_ddi_caut_rep_put8; 128 ap->ahi_rep_put16 = i_ddi_caut_rep_put16; 129 ap->ahi_rep_put32 = i_ddi_caut_rep_put32; 130 ap->ahi_rep_put64 = i_ddi_caut_rep_put64; 131 break; 132 default: 133 break; 134 } 135 } else if (mp->map_op == DDI_MO_UNMAP) { 136 ndi_fmc_remove(rdip, ACC_HANDLE, (void *)hp); 137 } 138 } 139 140 /* 141 * Function called after a dma fault occurred to find out whether the 142 * fault address is associated with a driver that is able to handle faults 143 * and recover from faults. The driver has to set DDI_DMA_FLAGERR and 144 * cache dma handles in order to make this checking effective to help 145 * recovery from dma faults. 146 */ 147 /* ARGSUSED */ 148 static int 149 px_dma_check(dev_info_t *dip, const void *handle, const void *comp_addr, 150 const void *not_used) 151 { 152 ddi_dma_impl_t *mp = (ddi_dma_impl_t *)handle; 153 pfn_t fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 154 pfn_t comp_pfn; 155 int page; 156 157 /* 158 * Assertion failure if DDI_FM_DMACHK_CAPABLE capability has not 159 * been effectively initialized during attach. 160 */ 161 ASSERT(mp); 162 163 for (page = 0; page < mp->dmai_ndvmapages; page++) { 164 comp_pfn = PX_GET_MP_PFN(mp, page); 165 if (fault_pfn == comp_pfn) 166 return (DDI_FM_NONFATAL); 167 } 168 169 return (DDI_FM_UNKNOWN); 170 } 171 172 /* 173 * Function used to check if a given access handle owns the failing address. 174 * Called by ndi_fmc_error, when we detect a PIO error. 175 */ 176 /* ARGSUSED */ 177 static int 178 px_acc_check(dev_info_t *dip, const void *handle, const void *comp_addr, 179 const void *not_used) 180 { 181 pfn_t pfn, fault_pfn; 182 ddi_acc_hdl_t *hp = impl_acc_hdl_get((ddi_acc_handle_t)handle); 183 184 /* 185 * Assertion failure if DDI_FM_ACCCHK_CAPABLE capability has not 186 * been effectively initialized during attach. 187 */ 188 ASSERT(hp); 189 190 pfn = hp->ah_pfn; 191 fault_pfn = mmu_btop(*(uint64_t *)comp_addr); 192 if (fault_pfn >= pfn && fault_pfn < (pfn + hp->ah_pnum)) 193 return (DDI_FM_NONFATAL); 194 195 return (DDI_FM_UNKNOWN); 196 } 197 198 /* 199 * Function used by PCI error handlers to check if captured address is stored 200 * in the DMA or ACC handle caches. 201 */ 202 int 203 px_handle_lookup(dev_info_t *dip, int type, uint64_t fme_ena, void *afar) 204 { 205 uint32_t cap = ((px_t *)DIP_TO_STATE(dip))->px_fm_cap; 206 int (*f)() = type == DMA_HANDLE ? 207 (DDI_FM_DMA_ERR_CAP(cap) ? px_dma_check : NULL) : 208 (DDI_FM_ACC_ERR_CAP(cap) ? px_acc_check : NULL); 209 210 return (f ? ndi_fmc_error(dip, NULL, type, f, fme_ena, afar) : 211 DDI_FM_UNKNOWN); 212 } 213 214 /* 215 * Function used to initialize FMA for our children nodes. Called 216 * through pci busops when child node calls ddi_fm_init. 217 */ 218 /*ARGSUSED*/ 219 int 220 px_fm_init_child(dev_info_t *dip, dev_info_t *cdip, int cap, 221 ddi_iblock_cookie_t *ibc_p) 222 { 223 px_t *px_p = DIP_TO_STATE(dip); 224 225 ASSERT(ibc_p != NULL); 226 *ibc_p = px_p->px_fm_ibc; 227 228 return (px_p->px_fm_cap); 229 } 230 231 /* 232 * lock access for exclusive PCIe access 233 */ 234 void 235 px_bus_enter(dev_info_t *dip, ddi_acc_handle_t handle) 236 { 237 px_pec_t *pec_p = ((px_t *)DIP_TO_STATE(dip))->px_pec_p; 238 239 /* 240 * Exclusive access has been used for cautious put/get, 241 * Both utilize i_ddi_ontrap which, on sparcv9, implements 242 * similar protection as what on_trap() does, and which calls 243 * membar #Sync to flush out all cpu deferred errors 244 * prior to get/put operation, so here we're not calling 245 * membar #Sync - a difference from what's in pci_bus_enter(). 246 */ 247 mutex_enter(&pec_p->pec_pokefault_mutex); 248 pec_p->pec_acc_hdl = handle; 249 } 250 251 /* 252 * unlock access for exclusive PCIe access 253 */ 254 /* ARGSUSED */ 255 void 256 px_bus_exit(dev_info_t *dip, ddi_acc_handle_t handle) 257 { 258 px_t *px_p = DIP_TO_STATE(dip); 259 px_pec_t *pec_p = px_p->px_pec_p; 260 261 pec_p->pec_acc_hdl = NULL; 262 mutex_exit(&pec_p->pec_pokefault_mutex); 263 } 264 265 266 /* 267 * PCI error callback which is registered with our parent to call 268 * for PCIe logging when the CPU traps due to PCIe Uncorrectable Errors 269 * and PCI BERR/TO/UE 270 * 271 * Dispatch on all known leaves of this fire device because we cannot tell 272 * which side the error came from. 273 */ 274 /*ARGSUSED*/ 275 int 276 px_fm_callback(dev_info_t *dip, ddi_fm_error_t *derr, const void *impl_data) 277 { 278 px_t *px_p = (px_t *)impl_data; 279 px_cb_t *cb_p = px_p->px_cb_p; 280 int err = PX_OK; 281 int fatal = 0; 282 int nonfatal = 0; 283 int unknown = 0; 284 int ret = DDI_FM_OK; 285 int i; 286 287 mutex_enter(&cb_p->xbc_fm_mutex); 288 289 for (i = 0; i < PX_CB_MAX_LEAF; i++) { 290 px_p = cb_p->xbc_px_list[i]; 291 if (px_p != NULL) 292 err |= px_err_handle(px_p, derr, PX_TRAP_CALL, 293 (i == 0)); 294 } 295 296 for (i = 0; i < PX_CB_MAX_LEAF; i++) { 297 px_p = cb_p->xbc_px_list[i]; 298 if (px_p != NULL) { 299 ret = ndi_fm_handler_dispatch(px_p->px_dip, NULL, derr); 300 switch (ret) { 301 case DDI_FM_FATAL: 302 fatal++; 303 break; 304 case DDI_FM_NONFATAL: 305 nonfatal++; 306 break; 307 case DDI_FM_UNKNOWN: 308 unknown++; 309 break; 310 default: 311 break; 312 } 313 } 314 } 315 mutex_exit(&cb_p->xbc_fm_mutex); 316 317 ret = (fatal != 0) ? DDI_FM_FATAL : 318 ((nonfatal != 0) ? DDI_FM_NONFATAL : 319 (((unknown != 0) ? DDI_FM_UNKNOWN : DDI_FM_OK))); 320 321 /* fire fatal error overrides device error */ 322 if (err & (PX_FATAL_GOS | PX_FATAL_SW)) 323 ret = DDI_FM_FATAL; 324 /* if fire encounts no error, then take whatever device error */ 325 else if ((err != PX_OK) && (ret != DDI_FM_FATAL)) 326 ret = DDI_FM_NONFATAL; 327 328 return (ret); 329 } 330 331 /* 332 * px_err_dmc_pec_intr: 333 * Interrupt handler for the DMC/PEC block. 334 * o lock 335 * o create derr 336 * o px_err_handle(leaf, with jbc) 337 * o send ereport(fire fmri, derr, payload = BDF) 338 * o dispatch (leaf) 339 * o unlock 340 * o handle error: fatal? fm_panic() : return INTR_CLAIMED) 341 */ 342 /* ARGSUSED */ 343 uint_t 344 px_err_fabric_intr(px_t *px_p, msgcode_t msg_code, 345 pcie_req_id_t rid) 346 { 347 dev_info_t *rpdip = px_p->px_dip; 348 px_cb_t *cb_p = px_p->px_cb_p; 349 int err = PX_OK, ret; 350 ddi_fm_error_t derr; 351 352 mutex_enter(&cb_p->xbc_fm_mutex); 353 354 /* Create the derr */ 355 bzero(&derr, sizeof (ddi_fm_error_t)); 356 derr.fme_version = DDI_FME_VERSION; 357 derr.fme_ena = fm_ena_generate(0, FM_ENA_FMT1); 358 derr.fme_flag = DDI_FM_ERR_UNEXPECTED; 359 360 /* send ereport/handle/clear fire registers */ 361 err |= px_err_handle(px_p, &derr, PX_INTR_CALL, B_TRUE); 362 363 /* Check all child devices for errors */ 364 ret = ndi_fm_handler_dispatch(rpdip, NULL, &derr); 365 366 mutex_exit(&cb_p->xbc_fm_mutex); 367 368 /* 369 * PX_FATAL_HW indicates a condition recovered from Fatal-Reset, 370 * therefore it does not cause panic. 371 */ 372 if ((err & (PX_FATAL_GOS | PX_FATAL_SW)) || (ret == DDI_FM_FATAL)) 373 fm_panic("Fatal PCIe Fabric Error has occurred\n"); 374 375 return (DDI_INTR_CLAIMED); 376 } 377 378 /* 379 * px_err_safeacc_check: 380 * Check to see if a peek/poke and cautious access is currently being 381 * done on a particular leaf. 382 * 383 * Safe access reads induced fire errors will be handled by cpu trap handler 384 * which will call px_fm_callback() which calls this function. In that 385 * case, the derr fields will be set by trap handler with the correct values. 386 * 387 * Safe access writes induced errors will be handled by px interrupt 388 * handlers, this function will fill in the derr fields. 389 * 390 * If a cpu trap does occur, it will quiesce all other interrupts allowing 391 * the cpu trap error handling to finish before Fire receives an interrupt. 392 * 393 * If fire does indeed have an error when a cpu trap occurs as a result of 394 * a safe access, a trap followed by a Mondo/Fabric interrupt will occur. 395 * In which case derr will be initialized as "UNEXPECTED" by the interrupt 396 * handler and this function will need to find if this error occured in the 397 * middle of a safe access operation. 398 * 399 * @param px_p leaf in which to check access 400 * @param derr fm err data structure to be updated 401 */ 402 void 403 px_err_safeacc_check(px_t *px_p, ddi_fm_error_t *derr) 404 { 405 px_pec_t *pec_p = px_p->px_pec_p; 406 px_cb_t *cb_p = px_p->px_cb_p; 407 int acctype = pec_p->pec_safeacc_type; 408 409 ASSERT(MUTEX_HELD(&cb_p->xbc_fm_mutex)); 410 411 if (derr->fme_flag != DDI_FM_ERR_UNEXPECTED) { 412 return; 413 } 414 415 /* safe access checking */ 416 switch (acctype) { 417 case DDI_FM_ERR_EXPECTED: 418 /* 419 * cautious access protection, protected from all err. 420 */ 421 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 422 ddi_fm_acc_err_get(pec_p->pec_acc_hdl, derr, 423 DDI_FME_VERSION); 424 derr->fme_flag = acctype; 425 derr->fme_acc_handle = pec_p->pec_acc_hdl; 426 break; 427 case DDI_FM_ERR_POKE: 428 /* 429 * ddi_poke protection, check nexus and children for 430 * expected errors. 431 */ 432 ASSERT(MUTEX_HELD(&pec_p->pec_pokefault_mutex)); 433 membar_sync(); 434 derr->fme_flag = acctype; 435 break; 436 case DDI_FM_ERR_PEEK: 437 derr->fme_flag = acctype; 438 break; 439 } 440 } 441