1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #ifndef _SYS_IB_ADAPTERS_HERMON_FM_H 27 #define _SYS_IB_ADAPTERS_HERMON_FM_H 28 29 /* 30 * hermon_fm.h 31 */ 32 #include <sys/ddifm.h> 33 #include <sys/fm/protocol.h> 34 #include <sys/fm/util.h> 35 #include <sys/fm/io/ddi.h> 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 /* 42 * HCA FMA compile note. 43 * 44 * FMA_TEST is used for HCA function tests, and 45 * the macro can be on by changing Makefile. 46 * 47 * in case of DEBUG 48 * FMA_TEST is on 49 * 50 * in case of non-DEBUG (DEBUG is off) 51 * FMA_TEST is off 52 */ 53 54 /* 55 * HCA FM common data structure 56 */ 57 58 /* 59 * HCA FM Structure 60 * This structure is used to catch HCA HW errors. 61 */ 62 struct i_hca_fm { 63 uint32_t ref_cnt; /* the number of instances referring to this */ 64 kmutex_t lock; /* protection for last_err & polling thread */ 65 struct i_hca_acc_handle *hdl; /* HCA FM acc handle structure */ 66 struct kmem_cache *fm_acc_cache; /* HCA acc handle cache */ 67 68 }; 69 70 /* 71 * HCA FM acc handle structure 72 * This structure is holding ddi_acc_handle_t and other members 73 * to deal with HCA PIO FM. 74 */ 75 struct i_hca_acc_handle { 76 struct i_hca_acc_handle *next; /* next structure */ 77 ddi_acc_handle_t save_hdl; /* acc handle */ 78 kmutex_t lock; /* mutex lock for thread count */ 79 uint32_t thread_cnt; /* number of threads issuing PIOs */ 80 }; 81 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", i_hca_acc_handle::save_hdl)) 82 #define fm_acc_hdl(hdl) (((struct i_hca_acc_handle *)(hdl))->save_hdl) 83 #define FM_POLL_INTERVAL (10000000) /* 10ms (nano) */ 84 85 /* 86 * HCA FM function test structure 87 * This structure can be used to test the basic fm function test for HCA. 88 * The test code is included if the FMA_TEST macro is defined. 89 */ 90 struct i_hca_fm_test { 91 int num; /* serial numner */ 92 int type; /* PIO or Hermon specific errors */ 93 #define HCA_TEST_PIO 0x1 94 #define HCA_TEST_IBA 0x2 95 int trigger; /* how to trigger a HW error */ 96 #define HCA_TEST_TRANSIENT 0x0001 97 #define HCA_TEST_PERSISTENT 0x0002 98 #define HCA_TEST_ATTACH 0x0010 99 #define HCA_TEST_START 0x0100 100 #define HCA_TEST_END 0x0200 101 void (*pio_injection)(struct i_hca_fm_test *, ddi_fm_error_t *); 102 int errcnt; /* how many transient error occurs */ 103 int line_num; /* line number in the source code */ 104 char *file_name; /* source filename */ 105 char *hash_key; /* hash table for test items */ 106 void *private; /* private data */ 107 }; 108 109 /* 110 * Hermon FM data structure 111 */ 112 typedef struct i_hca_fm hermon_hca_fm_t; 113 typedef struct i_hca_acc_handle hermon_acc_handle_t; 114 typedef struct i_hca_fm_test hermon_test_t; 115 116 /* 117 * The following defines are to supplement device error reporting. 118 * At each place where the planned FMA error matrix specifies that 119 * an ereport will be generated, for now there is a HERMON_FMANOTE() 120 * call generating an appropriate message string. 121 * 122 * This has been revised since it has been realized that FMA is only 123 * to be used for hardware errors. HERMON_FMANOTE() is used to report 124 * errors that are likely to be hardware, but possibly are not. 125 */ 126 #define HERMON_FMANOTE(state, string) \ 127 cmn_err(CE_WARN, "hermon%d: Device Error: %s", \ 128 (state)->hs_instance, string) 129 130 /* CQE Syndrome errors - see hermon_cq.c */ 131 132 #define HERMON_FMA_LOCLEN "CQE local length error" 133 #define HERMON_FMA_LOCQPOP "CQE local qp operation error" 134 #define HERMON_FMA_LOCPROT "CQE local protection error" 135 #define HERMON_FMA_WQFLUSH "CQE wqe flushed in error" 136 #define HERMON_FMA_MWBIND "CQE memory window bind error" 137 #define HERMON_FMA_RESP "CQE bad response" 138 #define HERMON_FMA_LOCACC "CQE local access error" 139 #define HERMON_FMA_REMREQ "CQE remote invalid request error" 140 #define HERMON_FMA_REMACC "CQE remote access error" 141 #define HERMON_FMA_REMOP "CQE remote operation error" 142 #define HERMON_FMA_XPORTCNT "CQE transport retry counter exceeded" 143 #define HERMON_FMA_RNRCNT "CQE RNR retry counter exceeded" 144 #define HERMON_FMA_REMABRT "CQE remote aborted error" 145 #define HERMON_FMA_UNKN "CQE unknown/reserved error returned" 146 147 /* event errors - see hermon_event.c */ 148 #define HERMON_FMA_OVERRUN "EQE cq overrun or protection error" 149 #define HERMON_FMA_LOCCAT "EQE local work queue catastrophic error" 150 #define HERMON_FMA_QPCAT "EQE local queue pair catastrophic error" 151 #define HERMON_FMA_PATHMIG "EQE path migration failed" 152 #define HERMON_FMA_LOCINV "EQE invalid request - local work queue" 153 #define HERMON_FMA_LOCACEQ "EQE local access violation" 154 #define HERMON_FMA_SRQCAT "EQE shared received queue catastrophic" 155 #define HERMON_FMA_INTERNAL "EQE hca internal error" 156 157 /* HCR device failure returns - see hermon_cmd.c */ 158 #define HERMON_FMA_HCRINT "HCR internal error processing command" 159 #define HERMON_FMA_NVMEM "HCR NVRAM checksum/CRC failure" 160 #define HERMON_FMA_TOTOG "HCR Timeout waiting for command toggle" 161 #define HERMON_FMA_GOBIT "HCR Timeout waiting for command go bit" 162 #define HERMON_FMA_RSRC "HCR Command insufficient resources" 163 #define HERMON_FMA_CMDINV "HCR Command invalid status returned" 164 165 /* HCA initialization errors - see hermon.c */ 166 #define HERMON_FMA_FWVER "HCA firmware not at minimum version" 167 #define HERMON_FMA_PCIID "HCA PCIe devid not supported" 168 #define HERMON_FMA_MAINT "HCA device set to memory controller mode" 169 #define HERMON_FMA_BADNVMEM "HCR bad NVMEM error" 170 171 /* 172 * HCA FM constants 173 */ 174 175 /* HCA FM state */ 176 #define HCA_NO_FM 0x0000 /* HCA FM is not supported */ 177 /* HCA FM state flags */ 178 #define HCA_PIO_FM 0x0001 /* PIO is fma-protected */ 179 #define HCA_DMA_FM 0x0002 /* DMA is fma-protected */ 180 #define HCA_EREPORT_FM 0x0004 /* FMA ereport is available */ 181 #define HCA_ERRCB_FM 0x0010 /* FMA error callback is supported */ 182 183 #define HCA_ATTCH_FM 0x0100 /* HCA FM attach mode */ 184 #define HCA_RUNTM_FM 0x0200 /* HCA FM runtime mode */ 185 186 /* HCA ererport type */ 187 #define HCA_SYS_ERR 0x001 /* HW error reported by Solaris FMA */ 188 #define HCA_IBA_ERR 0x002 /* IB specific HW error */ 189 190 /* HCA ereport detail */ 191 #define HCA_ERR_TRANSIENT 0x010 /* HCA temporary error */ 192 #define HCA_ERR_NON_FATAL 0x020 /* HCA persistent error */ 193 #define HCA_ERR_SRV_LOST 0x040 /* HCA attach failure */ 194 #define HCA_ERR_DEGRADED 0x080 /* HCA maintenance mode */ 195 #define HCA_ERR_FATAL 0x100 /* HCA critical situation */ 196 #define HCA_ERR_IOCTL 0x200 /* EIO */ 197 198 /* Ignore HCA HW error check */ 199 #define HCA_SKIP_HW_CHK (-1) 200 201 /* HCA FM pio retry operation state */ 202 #define HCA_PIO_OK (0) /* No HW errors */ 203 #define HCA_PIO_TRANSIENT (1) /* transient error */ 204 #define HCA_PIO_PERSISTENT (2) /* persistent error */ 205 #define HCA_PIO_RETRY_CNT (3) 206 207 /* HCA firmware faults */ 208 #define HCA_FW_MISC 0x1 /* firmware misc faults */ 209 #define HCA_FW_CORRUPT 0x2 /* firmware corruption */ 210 #define HCA_FW_MISMATCH 0x3 /* firmware version mismatch */ 211 212 /* 213 * Hermon FM macros 214 */ 215 216 #ifdef FMA_TEST 217 #define TEST_DECLARE(tst) hermon_test_t *tst; 218 #define REGISTER_PIO_TEST(st, tst) \ 219 tst = hermon_test_register(st, __FILE__, __LINE__, HCA_TEST_PIO) 220 #define PIO_START(st, hdl, tst) hermon_PIO_start(st, hdl, tst) 221 #define PIO_END(st, hdl, cnt, tst) hermon_PIO_end(st, hdl, &cnt, tst) 222 #else 223 #define TEST_DECLARE(tst) 224 #define REGISTER_PIO_TEST(st, tst) 225 #define PIO_START(st, hdl, tst) hermon_PIO_start(st, hdl, NULL) 226 #define PIO_END(st, hdl, cnt, tst) hermon_PIO_end(st, hdl, &cnt, NULL) 227 #endif /* FMA_TEST */ 228 229 /* 230 * hermon_pio_init() is a macro initializing variables. 231 */ 232 #define hermon_pio_init(cnt, status, tst) \ 233 TEST_DECLARE(tst) \ 234 int status = HCA_PIO_OK; \ 235 int cnt = HCA_PIO_RETRY_CNT 236 237 /* 238 * hermon_pio_start() is one of a pair of macros checking HW errors 239 * at PIO requests, which should be called before the requests are issued. 240 */ 241 #define hermon_pio_start(st, hdl, label, cnt, status, tst) \ 242 if (st->hs_fm_state & HCA_PIO_FM) { \ 243 if (st->hs_fm_async_fatal) { \ 244 hermon_fm_ereport(st, HCA_SYS_ERR, \ 245 HCA_ERR_NON_FATAL); \ 246 goto label; \ 247 } else { \ 248 REGISTER_PIO_TEST(st, tst); \ 249 cnt = HCA_PIO_RETRY_CNT; \ 250 if (PIO_START(st, hdl, tst) == \ 251 HCA_PIO_PERSISTENT) { \ 252 goto label; \ 253 } \ 254 } \ 255 } else { \ 256 status = HCA_SKIP_HW_CHK; \ 257 } \ 258 do { 259 260 /* 261 * hermon_pio_end() is the other of a pair of macros checking HW errors 262 * at PIO requests, which should be called after the requests end. 263 * If a HW error is detected and can be isolated well, these macros 264 * retry the operation to determine if the error is persistent or not. 265 */ 266 #define hermon_pio_end(st, hdl, label, cnt, status, tst) \ 267 if (status != HCA_SKIP_HW_CHK) { \ 268 if (st->hs_fm_async_fatal) { \ 269 hermon_fm_ereport(st, HCA_SYS_ERR, \ 270 HCA_ERR_NON_FATAL); \ 271 goto label; \ 272 } \ 273 if ((status = PIO_END(st, hdl, cnt, tst)) == \ 274 HCA_PIO_PERSISTENT) { \ 275 goto label; \ 276 } else if (status == HCA_PIO_TRANSIENT) { \ 277 hermon_fm_ereport(st, HCA_SYS_ERR, \ 278 HCA_ERR_TRANSIENT); \ 279 } \ 280 } \ 281 } while (status == HCA_PIO_TRANSIENT) 282 283 extern void hermon_fm_init(hermon_state_t *); 284 extern void hermon_fm_fini(hermon_state_t *); 285 extern int hermon_fm_ereport_init(hermon_state_t *); 286 extern void hermon_fm_ereport_fini(hermon_state_t *); 287 extern int hermon_get_state(hermon_state_t *); 288 extern boolean_t hermon_init_failure(hermon_state_t *); 289 extern boolean_t hermon_cmd_retry_ok(hermon_cmd_post_t *, int); 290 extern void hermon_fm_ereport(hermon_state_t *, int, int); 291 extern int hermon_regs_map_setup(hermon_state_t *, uint_t, caddr_t *, offset_t, 292 offset_t, ddi_device_acc_attr_t *, ddi_acc_handle_t *); 293 extern void hermon_regs_map_free(hermon_state_t *, ddi_acc_handle_t *); 294 extern int hermon_pci_config_setup(hermon_state_t *, ddi_acc_handle_t *); 295 extern void hermon_pci_config_teardown(hermon_state_t *, ddi_acc_handle_t *); 296 extern ushort_t hermon_devacc_attr_version(hermon_state_t *); 297 extern uchar_t hermon_devacc_attr_access(hermon_state_t *); 298 extern int hermon_PIO_start(hermon_state_t *, ddi_acc_handle_t, 299 hermon_test_t *); 300 extern int hermon_PIO_end(hermon_state_t *, ddi_acc_handle_t, int *, 301 hermon_test_t *); 302 extern ddi_acc_handle_t hermon_rsrc_alloc_uarhdl(hermon_state_t *); 303 extern ddi_acc_handle_t hermon_get_uarhdl(hermon_state_t *); 304 extern ddi_acc_handle_t hermon_get_cmdhdl(hermon_state_t *); 305 extern ddi_acc_handle_t hermon_get_msix_tblhdl(hermon_state_t *); 306 extern ddi_acc_handle_t hermon_get_msix_pbahdl(hermon_state_t *); 307 extern ddi_acc_handle_t hermon_get_pcihdl(hermon_state_t *); 308 extern void hermon_clr_state_nolock(hermon_state_t *, int); 309 extern void hermon_inter_err_chk(void *); 310 311 #ifdef FMA_TEST 312 extern hermon_test_t *hermon_test_register(hermon_state_t *, char *, int, int); 313 extern void hermon_test_deregister(void); 314 extern int hermon_test_num; 315 #endif /* FMA_TEST */ 316 317 #ifdef __cplusplus 318 } 319 #endif 320 321 #endif /* _SYS_IB_ADAPTERS_HERMON_FM_H */ 322