xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon_fm.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * hermon_fm.c
28  *    Hermon (InfiniBand) HCA Driver Fault Management Routines
29  *
30  * [Hermon FM Implementation]
31  *
32  * Hermon FM recovers the system from a HW error situation and/or isolates a
33  * HW error by calling the FMA acc handle check functions. (calling
34  * ddi_fm_acc_err_get()) If a HW error is detected when either
35  * ddi_fm_acc_err_get() is called, to determine whether or not the error is
36  * transient, the I/O operation causing the error will retry up to three times.
37  *
38  * (Basic HW error recovery)
39  *
40  *        |
41  *  .---->*
42  *  |     |
43  *  |   issue an I/O request via PIO
44  *  |     |
45  *  |     |
46  *  |   check acc handle
47  *  |     |
48  *  |     |
49  *  `--< a HW error detected && retry count < 3 >
50  *        |
51  *        v
52  *
53  * When a HW error is detected, to provide the error information for users to
54  * isolate the faulted HW, Hermon FM issues Solaris FMA ereports as follows.
55  *
56  *  * PIO transient error
57  *         invalid_state => unaffected
58  *
59  *  * PIO persistent error
60  *         invalid_state => lost
61  *
62  *  * PIO fatal error
63  *         invalid_state => lost => panic
64  *
65  *  * Hermon HCA firmware error
66  *         invalid_state => degraded
67  *
68  *  * Other Hermon HCA specific errors
69  *	   uncorrect => unaffected
70  *		or
71  *	   correct => unaffected
72  *
73  * (Restrictions)
74  *
75  * The current implementation has the following restrictions.
76  *  * No runtime check/protection
77  *  * No detach time check/protection
78  *  * No DMA check/protection
79  *
80  * See the Hermon FMA portfolio in detail.
81  */
82 
83 #include <sys/types.h>
84 #include <sys/conf.h>
85 #include <sys/ddi.h>
86 #include <sys/sunddi.h>
87 #include <sys/sysmacros.h>
88 #include <sys/list.h>
89 #include <sys/modhash.h>
90 
91 #include <sys/ib/adapters/hermon/hermon.h>
92 
93 /*
94  * Hermon driver has to disable its FM functionality
95  * if this "fm_capable" variable is defined or has a value
96  * in /kernel/drv/hermon.conf.
97  */
98 static char *fm_cap = "fm-capable";	/* FM capability */
99 
100 static hermon_hca_fm_t hca_fm;		/* Hermon HCA FM Structure */
101 
102 static void i_hca_fm_ereport(dev_info_t *, int, char *);
103 static void i_hca_fm_init(struct i_hca_fm *);
104 static void i_hca_fm_fini(struct i_hca_fm *);
105 static int i_hca_regs_map_setup(struct i_hca_fm *, dev_info_t *, uint_t,
106     caddr_t *, offset_t, offset_t, ddi_device_acc_attr_t *, ddi_acc_handle_t *);
107 static void i_hca_regs_map_free(struct i_hca_fm *, ddi_acc_handle_t *);
108 static int i_hca_pci_config_setup(struct i_hca_fm *, dev_info_t *,
109     ddi_acc_handle_t *);
110 static void i_hca_pci_config_teardown(struct i_hca_fm *, ddi_acc_handle_t *);
111 static int i_hca_pio_start(dev_info_t *, struct i_hca_acc_handle *,
112     hermon_test_t *);
113 static int i_hca_pio_end(dev_info_t *, struct i_hca_acc_handle *, int *,
114     hermon_test_t *);
115 static struct i_hca_acc_handle *i_hca_get_acc_handle(struct i_hca_fm *,
116     ddi_acc_handle_t);
117 
118 /* forward declaration for hermon_fm_{init, fini}() */
119 #ifdef FMA_TEST
120 static void i_hca_test_init(mod_hash_t **, mod_hash_t **);
121 static void i_hca_test_fini(mod_hash_t **, mod_hash_t **);
122 #endif /* FMA_TEST */
123 
124 /*
125  * Hermon FM Functions
126  *
127  * These functions are based on the HCA FM common interface
128  * defined below, but specific to the Hermon HCA FM capabilities.
129  */
130 
131 /*
132  *  void
133  *  hermon_hca_fm_init(hermon_state_t *state, hermon_hca_fm_t *hca)
134  *
135  *  Overview
136  *      hermon_hca_fm_init() initializes the Hermon FM resources.
137  *
138  *  Argument
139  *      state: pointer to Hermon state structure
140  *      hca: pointer to Hermon FM structure
141  *
142  *  Return value
143  *      Nothing
144  *
145  *  Caller's context
146  *      hermon_hca_fm_init() can be called in user or kernel context only.
147  */
148 static void
hermon_hca_fm_init(hermon_state_t * state,hermon_hca_fm_t * hca_fm)149 hermon_hca_fm_init(hermon_state_t *state, hermon_hca_fm_t *hca_fm)
150 {
151 	state->hs_fm_hca_fm = hca_fm;
152 	i_hca_fm_init((struct i_hca_fm *)hca_fm);
153 }
154 
155 
156 /*
157  *  void
158  *  hermon_hca_fm_fini(hermon_state_t *state)
159  *
160  *  Overview
161  *      hermon_hca_fm_fini() releases the Hermon FM resources.
162  *
163  *  Argument
164  *      state: pointer to Hermon state structure
165  *
166  *  Return value
167  *      Nothing
168  *
169  *  Caller's context
170  *      hermon_hca_fm_fini() can be called in user or kernel context only.
171  */
172 static void
hermon_hca_fm_fini(hermon_state_t * state)173 hermon_hca_fm_fini(hermon_state_t *state)
174 {
175 	i_hca_fm_fini((struct i_hca_fm *)state->hs_fm_hca_fm);
176 	state->hs_fm_hca_fm = NULL;
177 }
178 
179 /*
180  *  void
181  *  hermon_clr_state_nolock(hermon_state_t *state, int fm_state)
182  *
183  *  Overview
184  *      hermon_clr_state() drops the specified state from Hermon FM state
185  *      without the mutex locks.
186  *
187  *  Argument
188  *      state: pointer to Hermon state structure
189  *      fm_state: Hermon FM state, which is composed of:
190  *		HCA_NO_FM	Hermom FM is not supported
191  *		HCA_PIO_FM	PIO is fma-protected
192  *		HCA_DMA_FM	DMA is fma-protected
193  *		HCA_EREPORT_FM	FMA ereport is available
194  *		HCA_ERRCB_FM	FMA error callback is supported
195  *		HCA_ATTCH_FM	HCA FM attach mode
196  *		HCA_RUNTM_FM	HCA FM runtime mode
197  *
198  *  Return value
199  *  	Nothing
200  *
201  *  Caller's context
202  *      hermon_clr_state() can be called in user, kernel, interrupt context
203  *      or high interrupt context.
204  */
205 void
hermon_clr_state_nolock(hermon_state_t * state,int fm_state)206 hermon_clr_state_nolock(hermon_state_t *state, int fm_state)
207 {
208 	extern void membar_sync(void);
209 
210 	state->hs_fm_state &= ~fm_state;
211 	membar_sync();
212 }
213 
214 
215 /*
216  *  void
217  *  hermon_clr_state(hermon_state_t *state, int fm_state)
218  *
219  *  Overview
220  *      hermon_clr_state() drops the specified state from Hermon FM state.
221  *
222  *  Argument
223  *      state: pointer to Hermon state structure
224  *      fm_state: Hermon FM state, which is composed of:
225  *		HCA_NO_FM	Hermom FM is not supported
226  *		HCA_PIO_FM	PIO is fma-protected
227  *		HCA_DMA_FM	DMA is fma-protected
228  *		HCA_EREPORT_FM	FMA ereport is available
229  *		HCA_ERRCB_FM	FMA error callback is supported
230  *		HCA_ATTCH_FM	HCA FM attach mode
231  *		HCA_RUNTM_FM	HCA FM runtime mode
232  *
233  *  Return value
234  *  	Nothing
235  *
236  *  Caller's context
237  *      hermon_clr_state() can be called in user, kernel or interrupt context.
238  */
239 static void
hermon_clr_state(hermon_state_t * state,int fm_state)240 hermon_clr_state(hermon_state_t *state, int fm_state)
241 {
242 	ASSERT(fm_state != HCA_NO_FM);
243 
244 	mutex_enter(&state->hs_fm_lock);
245 	hermon_clr_state_nolock(state, fm_state);
246 	mutex_exit(&state->hs_fm_lock);
247 }
248 
249 
250 /*
251  *  void
252  *  hermon_set_state(hermon_state_t *state, int fm_state)
253  *
254  *  Overview
255  *      hermon_set_state() sets Hermon FM state.
256  *
257  *  Argument
258  *      state: pointer to Hermon state structure
259  *      fm_state: Hermon FM state, which is composed of:
260  *		HCA_NO_FM	Hermom FM is not supported
261  *		HCA_PIO_FM	PIO is fma-protected
262  *		HCA_DMA_FM	DMA is fma-protected
263  *		HCA_EREPORT_FM	FMA ereport is available
264  *		HCA_ERRCB_FM	FMA error callback is supported
265  *		HCA_ATTCH_FM	HCA FM attach mode
266  *		HCA_RUNTM_FM	HCA FM runtime mode
267  *
268  *  Return value
269  *  	Nothing
270  *
271  *  Caller's context
272  *      hermon_set_state() can be called in user, kernel or interrupt context.
273  */
274 static void
hermon_set_state(hermon_state_t * state,int fm_state)275 hermon_set_state(hermon_state_t *state, int fm_state)
276 {
277 	extern void membar_sync(void);
278 
279 	mutex_enter(&state->hs_fm_lock);
280 	if (fm_state == HCA_NO_FM) {
281 		state->hs_fm_state = HCA_NO_FM;
282 	} else {
283 		state->hs_fm_state |= fm_state;
284 	}
285 	membar_sync();
286 	mutex_exit(&state->hs_fm_lock);
287 }
288 
289 
290 /*
291  *  int
292  *  hermon_get_state(hermon_state_t *state)
293  *
294  *  Overview
295  *      hermon_get_state() returns the current Hermon FM state.
296  *
297  *  Argument
298  *      state: pointer to Hermon state structure
299  *
300  *  Return value
301  *      fm_state: Hermon FM state, which is composed of:
302  *		HCA_NO_FM	Hermom FM is not supported
303  *		HCA_PIO_FM	PIO is fma-protected
304  *		HCA_DMA_FM	DMA is fma-protected
305  *		HCA_EREPORT_FM	FMA ereport is available
306  *		HCA_ERRCB_FM	FMA error callback is supported
307  *		HCA_ATTCH_FM	HCA FM attach mode
308  *		HCA_RUNTM_FM	HCA FM runtime mode
309  *
310  *  Caller's context
311  *      hermon_get_state() can be called in user, kernel or interrupt context.
312  */
313 int
hermon_get_state(hermon_state_t * state)314 hermon_get_state(hermon_state_t *state)
315 {
316 	return (state->hs_fm_state);
317 }
318 
319 
320 /*
321  *  void
322  *  hermon_fm_init(hermon_state_t *state)
323  *
324  *  Overview
325  *      hermon_fm_init() is a Hermon FM initialization function which registers
326  *      some FMA functions such as the ereport and the acc check capabilities
327  *      for Hermon. If the "fm_disable" property in /kernel/drv/hermon.conf is
328  *      defined (and/or its value is set), then the Hermon FM capabilities will
329  *      drop, and only the default capabilities (the ereport and error callback
330  *      capabilities) are available (and the action against HW errors is
331  *      issuing an ereport then panicking the system).
332  *
333  *  Argument
334  *      state: pointer to Hermon state structure
335  *
336  *  Return value
337  *      Nothing
338  *
339  *  Caller's context
340  *      hermon_fm_init() can be called in user or kernel context only.
341  */
342 void
hermon_fm_init(hermon_state_t * state)343 hermon_fm_init(hermon_state_t *state)
344 {
345 	ddi_iblock_cookie_t iblk;
346 
347 	/*
348 	 * Check the "fm_disable" property. If it's defined,
349 	 * use the Solaris FMA default action for Hermon.
350 	 */
351 	if (ddi_getprop(DDI_DEV_T_NONE, state->hs_dip, DDI_PROP_DONTPASS,
352 	    "fm_disable", 0) != 0) {
353 		state->hs_fm_disable = 1;
354 	}
355 
356 	/* If hs_fm_diable is set, then skip the rest */
357 	if (state->hs_fm_disable) {
358 		hermon_set_state(state, HCA_NO_FM);
359 		return;
360 	}
361 
362 	/* Set the Hermon FM attach mode */
363 	hermon_set_state(state, HCA_ATTCH_FM);
364 
365 	/* Initialize the Solaris FMA capabilities for the Hermon FM support */
366 	state->hs_fm_capabilities = ddi_prop_get_int(DDI_DEV_T_ANY,
367 	    state->hs_dip, DDI_PROP_DONTPASS, fm_cap,
368 	    DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE);
369 
370 	/*
371 	 * The Hermon FM uses the ereport and acc check capabilites only,
372 	 * but both of them should be available. If either is not, turn
373 	 * hs_fm_disable on and behave in the same way as the "fm_diable"
374 	 * property is set.
375 	 */
376 	if (state->hs_fm_capabilities !=
377 	    (DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE)) {
378 		state->hs_fm_disable = 1;
379 		hermon_set_state(state, HCA_NO_FM);
380 		HERMON_ATTACH_MSG(state->hs_attach_buf,
381 		    "Hermon FM capability fails");
382 		return;
383 	}
384 
385 	/* Initialize the HCA FM resources */
386 	hermon_hca_fm_init(state, &hca_fm);
387 
388 	/* Initialize the fm state lock */
389 	mutex_init(&state->hs_fm_lock, NULL, MUTEX_DRIVER, NULL);
390 
391 	/* Register the capabilities with the IO fault services */
392 	ddi_fm_init(state->hs_dip, &state->hs_fm_capabilities, &iblk);
393 
394 	/* Set up the pci ereport capabilities if the ereport is capable */
395 	if (DDI_FM_EREPORT_CAP(state->hs_fm_capabilities)) {
396 		pci_ereport_setup(state->hs_dip);
397 	}
398 
399 	/* Set the Hermon FM state */
400 	hermon_set_state(state, HCA_PIO_FM | HCA_EREPORT_FM);
401 
402 #ifdef FMA_TEST
403 	i_hca_test_init(&state->hs_fm_test_hash, &state->hs_fm_id_hash);
404 #endif /* FMA_TEST */
405 }
406 
407 
408 /*
409  *  void
410  *  hermon_fm_fini(hermon_state_t *state)
411  *
412  *  Overview
413  *      hermon_fm_fini() is a Hermon FM finalization function which de-registers
414  *      Solaris FMA functions set to Hermon.
415  *
416  *  Argument
417  *      state: pointer to Hermon state structure
418  *
419  *  Return value
420  *      Nothing
421  *
422  *  Caller's context
423  *      hermon_fm_fini() can be called in user or kernel context only.
424  */
425 void
hermon_fm_fini(hermon_state_t * state)426 hermon_fm_fini(hermon_state_t *state)
427 {
428 	/*
429 	 * If hermon_fm_diable is set or there is no FM service provided,
430 	 * then skip the rest.
431 	 */
432 	if (state->hs_fm_disable || hermon_get_state(state) == HCA_NO_FM) {
433 		return;
434 	}
435 
436 	ASSERT(!(hermon_get_state(state) & HCA_ERRCB_FM));
437 
438 #ifdef FMA_TEST
439 	i_hca_test_fini(&state->hs_fm_test_hash, &state->hs_fm_id_hash);
440 #endif /* FMA_TEST */
441 
442 	/* Set the Hermon FM state to no support */
443 	hermon_set_state(state, HCA_NO_FM);
444 
445 	/* Release HCA FM resources */
446 	hermon_hca_fm_fini(state);
447 
448 	/*
449 	 * Release any resources allocated by pci_ereport_setup()
450 	 */
451 	if (DDI_FM_EREPORT_CAP(state->hs_fm_capabilities)) {
452 		pci_ereport_teardown(state->hs_dip);
453 	}
454 
455 	/* De-register the Hermon FM from the IO fault services */
456 	ddi_fm_fini(state->hs_dip);
457 }
458 
459 
460 /*
461  *  int
462  *  hermon_fm_ereport_init(hermon_state_t *state)
463  *
464  *  Overview
465  *      hermon_fm_ereport_init() changes the Hermon FM state to the ereport
466  *      only mode during the driver attach.
467  *
468  *  Argument
469  *      state: pointer to Hermon state structure
470  *
471  *  Return value
472  *      DDI_SUCCESS
473  *      DDI_FAILURE
474  *
475  *  Caller's context
476  *      hermon_fm_ereport_init() can be called in user or kernel context only.
477  */
478 int
hermon_fm_ereport_init(hermon_state_t * state)479 hermon_fm_ereport_init(hermon_state_t *state)
480 {
481 	ddi_iblock_cookie_t iblk;
482 	hermon_cfg_profile_t *cfgprof;
483 	hermon_hw_querydevlim_t	*devlim;
484 	hermon_rsrc_hw_entry_info_t entry_info;
485 	hermon_rsrc_pool_info_t	*rsrc_pool;
486 	uint64_t offset, num, max, num_prealloc;
487 	ddi_device_acc_attr_t dev_attr = {
488 		DDI_DEVICE_ATTR_V0,
489 		DDI_STRUCTURE_LE_ACC,
490 		DDI_STRICTORDER_ACC,
491 		DDI_DEFAULT_ACC
492 	};
493 	char *rsrc_name;
494 	extern void membar_sync(void);
495 
496 	/* Stop the poll thread while the FM state is being changed */
497 	state->hs_fm_poll_suspend = B_TRUE;
498 	membar_sync();
499 
500 	/*
501 	 * Disable the Hermon interrupt after the interrupt capability flag
502 	 * is checked.
503 	 */
504 	if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
505 		if (ddi_intr_block_disable
506 		    (&state->hs_intrmsi_hdl[0], 1) != DDI_SUCCESS) {
507 			return (DDI_FAILURE);
508 		}
509 	} else {
510 		if (ddi_intr_disable
511 		    (state->hs_intrmsi_hdl[0]) != DDI_SUCCESS) {
512 			return (DDI_FAILURE);
513 		}
514 	}
515 
516 	/*
517 	 * Release any resources allocated by pci_ereport_setup()
518 	 */
519 	if (DDI_FM_EREPORT_CAP(state->hs_fm_capabilities)) {
520 		pci_ereport_teardown(state->hs_dip);
521 	}
522 
523 	/* De-register the Hermon FM from the IO fault services */
524 	ddi_fm_fini(state->hs_dip);
525 
526 	/* Re-initialize fm ereport with the ereport only */
527 	state->hs_fm_capabilities = ddi_prop_get_int(DDI_DEV_T_ANY,
528 	    state->hs_dip, DDI_PROP_DONTPASS, fm_cap,
529 	    DDI_FM_EREPORT_CAPABLE);
530 
531 	/*
532 	 * Now that the Hermon FM uses the ereport capability only,
533 	 * If it's not set, turn hs_fm_disable on and behave in the
534 	 * same way as the "fm_diable" property is set.
535 	 */
536 	if (state->hs_fm_capabilities != DDI_FM_EREPORT_CAPABLE) {
537 		HERMON_ATTACH_MSG(state->hs_attach_buf,
538 		    "Hermon FM ereport fails (ereport mode)");
539 		goto error;
540 	}
541 
542 	/* Re-register the ereport capability with the IO fault services */
543 	ddi_fm_init(state->hs_dip, &state->hs_fm_capabilities, &iblk);
544 
545 	/* Initialize the pci ereport capabilities if the ereport is capable */
546 	if (DDI_FM_EREPORT_CAP(state->hs_fm_capabilities)) {
547 		pci_ereport_setup(state->hs_dip);
548 	}
549 
550 	/* Setup for PCI config read/write of HCA device */
551 	if (pci_config_setup(state->hs_dip, &state->hs_reg_pcihdl) !=
552 	    DDI_SUCCESS) {
553 		HERMON_ATTACH_MSG(state->hs_attach_buf,
554 		    "PCI config mapping fails (ereport mode)");
555 		goto error;
556 	}
557 
558 	/* Allocate the regular access handle for MSI-X tables */
559 	if (ddi_regs_map_setup(state->hs_dip, state->hs_msix_tbl_rnumber,
560 	    (caddr_t *)&state->hs_msix_tbl_addr, state->hs_msix_tbl_offset,
561 	    state->hs_msix_tbl_size, &dev_attr,
562 	    &state->hs_reg_msix_tblhdl) != DDI_SUCCESS) {
563 		HERMON_ATTACH_MSG(state->hs_attach_buf,
564 		    "MSI-X Table mapping fails (ereport mode)");
565 		goto error;
566 	}
567 
568 	/* Allocate the regular access handle for MSI-X PBA */
569 	if (ddi_regs_map_setup(state->hs_dip, state->hs_msix_pba_rnumber,
570 	    (caddr_t *)&state->hs_msix_pba_addr, state->hs_msix_pba_offset,
571 	    state->hs_msix_pba_size, &dev_attr,
572 	    &state->hs_reg_msix_pbahdl) != DDI_SUCCESS) {
573 		HERMON_ATTACH_MSG(state->hs_attach_buf,
574 		    "MSI-X PBA mapping fails (ereport mode)");
575 		goto error;
576 	}
577 
578 	/* Allocate the regular access handle for Hermon CMD I/O space */
579 	if (ddi_regs_map_setup(state->hs_dip, HERMON_CMD_BAR,
580 	    &state->hs_reg_cmd_baseaddr, 0, 0, &state->hs_reg_accattr,
581 	    &state->hs_reg_cmdhdl) != DDI_SUCCESS) {
582 		HERMON_ATTACH_MSG(state->hs_attach_buf,
583 		    "CMD_BAR mapping fails (ereport mode)");
584 		goto error;
585 	}
586 
587 	/* Reset the host command register */
588 	state->hs_cmd_regs.hcr = (hermon_hw_hcr_t *)
589 	    ((uintptr_t)state->hs_reg_cmd_baseaddr + HERMON_CMD_HCR_OFFSET);
590 
591 	/* Reset the software reset register */
592 	state->hs_cmd_regs.sw_reset = (uint32_t *)
593 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
594 	    HERMON_CMD_SW_RESET_OFFSET);
595 
596 	/* Reset the software reset register semaphore */
597 	state->hs_cmd_regs.sw_semaphore = (uint32_t *)
598 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
599 	    HERMON_CMD_SW_SEMAPHORE_OFFSET);
600 
601 	/* Calculate the clear interrupt register offset */
602 	offset = state->hs_fw.clr_intr_offs & HERMON_CMD_OFFSET_MASK;
603 
604 	/* Reset the clear interrupt address */
605 	state->hs_cmd_regs.clr_intr = (uint64_t *)
606 	    (uintptr_t)(state->hs_reg_cmd_baseaddr + offset);
607 
608 	/* Reset the internal error buffer address */
609 	state->hs_cmd_regs.fw_err_buf = (uint32_t *)(uintptr_t)
610 	    (state->hs_reg_cmd_baseaddr + state->hs_fw.error_buf_addr);
611 
612 	/* Check if the blue flame is enabled, and set the offset value */
613 	if (state->hs_devlim.blu_flm) {
614 		offset = (uint64_t)1 <<
615 		    (state->hs_devlim.log_max_uar_sz + 20);
616 	} else {
617 		offset = 0;
618 	}
619 
620 	/* Allocate the regular access handle for Hermon UAR I/O space */
621 	if (ddi_regs_map_setup(state->hs_dip, HERMON_UAR_BAR,
622 	    &state->hs_reg_uar_baseaddr, 0, offset,
623 	    &state->hs_reg_accattr, &state->hs_reg_uarhdl) != DDI_SUCCESS) {
624 		HERMON_ATTACH_MSG(state->hs_attach_buf,
625 		    "UAR BAR mapping fails (ereport mode)");
626 		goto error;
627 	}
628 
629 	hermon_eq_reset_uar_baseaddr(state);
630 
631 	/* Drop the Hermon FM Attach Mode */
632 	hermon_clr_state(state, HCA_ATTCH_FM);
633 
634 	/* Set the Hermon FM Runtime Mode */
635 	hermon_set_state(state, HCA_RUNTM_FM);
636 
637 	/* Free up Hermon UAR page #1 */
638 	hermon_rsrc_free(state, &state->hs_uarkpg_rsrc);
639 
640 	/* Free up the UAR pool */
641 	entry_info.hwi_rsrcpool = &state->hs_rsrc_hdl[HERMON_UARPG];
642 	hermon_rsrc_hw_entries_fini(state, &entry_info);
643 
644 	/* Re-allocate the UAR pool */
645 	cfgprof = state->hs_cfg_profile;
646 	devlim	= &state->hs_devlim;
647 	num			  = ((uint64_t)1 << cfgprof->cp_log_num_uar);
648 	max			  = num;
649 	num_prealloc		  = max(devlim->num_rsvd_uar, 128);
650 	rsrc_pool		  = &state->hs_rsrc_hdl[HERMON_UARPG];
651 	rsrc_pool->rsrc_type	  = HERMON_UARPG;
652 	rsrc_pool->rsrc_loc	  = HERMON_IN_UAR;
653 	rsrc_pool->rsrc_pool_size = (num << PAGESHIFT);
654 	rsrc_pool->rsrc_shift	  = PAGESHIFT;
655 	rsrc_pool->rsrc_quantum	  = (uint_t)PAGESIZE;
656 	rsrc_pool->rsrc_align	  = PAGESIZE;
657 	rsrc_pool->rsrc_state	  = state;
658 	rsrc_pool->rsrc_start	  = (void *)state->hs_reg_uar_baseaddr;
659 	rsrc_name = (char *)kmem_zalloc(HERMON_RSRC_NAME_MAXLEN, KM_SLEEP);
660 	HERMON_RSRC_NAME(rsrc_name, HERMON_UAR_PAGE_VMEM_RUNTM);
661 	entry_info.hwi_num	  = num;
662 	entry_info.hwi_max	  = max;
663 	entry_info.hwi_prealloc	  = num_prealloc;
664 	entry_info.hwi_rsrcpool	  = rsrc_pool;
665 	entry_info.hwi_rsrcname	  = rsrc_name;
666 	if (hermon_rsrc_hw_entries_init(state, &entry_info) != DDI_SUCCESS) {
667 		kmem_free(rsrc_name, HERMON_RSRC_NAME_MAXLEN);
668 		goto error;
669 	}
670 	kmem_free(rsrc_name, HERMON_RSRC_NAME_MAXLEN);
671 
672 	/* Re-allocate the kernel UAR page */
673 	if (hermon_rsrc_alloc(state, HERMON_UARPG, 1, HERMON_SLEEP,
674 	    &state->hs_uarkpg_rsrc) != DDI_SUCCESS) {
675 		goto error;
676 	}
677 
678 	/* Setup pointer to kernel UAR page */
679 	state->hs_uar = (hermon_hw_uar_t *)state->hs_uarkpg_rsrc->hr_addr;
680 
681 	/* Now drop the the Hermon PIO FM */
682 	hermon_clr_state(state, HCA_PIO_FM);
683 
684 	/* Release the MSI-X Table access handle */
685 	if (state->hs_fm_msix_tblhdl) {
686 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
687 		state->hs_fm_msix_tblhdl = NULL;
688 	}
689 
690 	/* Release the MSI-X PBA access handle */
691 	if (state->hs_fm_msix_pbahdl) {
692 		hermon_regs_map_free(state, &state->hs_fm_msix_pbahdl);
693 		state->hs_fm_msix_pbahdl = NULL;
694 	}
695 
696 	/* Release the pci config space access handle */
697 	if (state->hs_fm_pcihdl) {
698 		hermon_regs_map_free(state, &state->hs_fm_pcihdl);
699 		state->hs_fm_pcihdl = NULL;
700 	}
701 
702 	/* Release the cmd protected access handle */
703 	if (state->hs_fm_cmdhdl) {
704 		hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
705 		state->hs_fm_cmdhdl = NULL;
706 	}
707 
708 	/* Release the uar fma-protected access handle */
709 	if (state->hs_fm_uarhdl) {
710 		hermon_regs_map_free(state, &state->hs_fm_uarhdl);
711 		state->hs_fm_uarhdl = NULL;
712 	}
713 
714 	/* Enable the Hermon interrupt again */
715 	if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
716 		if (ddi_intr_block_enable
717 		    (&state->hs_intrmsi_hdl[0], 1) != DDI_SUCCESS) {
718 			return (DDI_FAILURE);
719 		}
720 	} else {
721 		if (ddi_intr_enable
722 		    (state->hs_intrmsi_hdl[0]) != DDI_SUCCESS) {
723 			return (DDI_FAILURE);
724 		}
725 	}
726 
727 	/* Restart the poll thread */
728 	state->hs_fm_poll_suspend = B_FALSE;
729 
730 	return (DDI_SUCCESS);
731 
732 error:
733 	/* Enable the Hermon interrupt again */
734 	if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
735 		(void) ddi_intr_block_enable(&state->hs_intrmsi_hdl[0], 1);
736 	} else {
737 		(void) ddi_intr_enable(state->hs_intrmsi_hdl[0]);
738 	}
739 	return (DDI_FAILURE);
740 }
741 
742 
743 /*
744  *  int
745  *  hermon_regs_map_setup(hermon_state_t *state, uint_t rnumber, caddr_t *addrp,
746  *	offset_t offset, offset_t len, ddi_device_acc_attr_t *accattrp,
747  *	ddi_acc_handle_t *handle)
748  *
749  *  Overview
750  *      This is a wrapper function of i_hca_regs_map_setup() for Hermon FM so
751  *      that it calls i_hca_regs_map_setup() inside after it checks the
752  *      "fm_disable" configuration property. If the "fm_disable" is described
753  *      in /kernel/drv/hermon.conf, the function calls ddi_regs_map_setup()
754  *      directly instead.
755  *      See i_hca_regs_map_setup() in detail.
756  *
757  *  Argument
758  *      state: pointer to Hermon state structure
759  *      rnumber: index number to the register address space set
760  *      addrp: platform-dependent value (same as ddi_regs_map_setup())
761  *      offset: offset into the register address space
762  *      len: address space length to be mapped
763  *      accattrp: pointer to device access attribute structure
764  *	handle: pointer to ddi_acc_handle_t used for HCA FM
765  *
766  *  Return value
767  *      ddi function status value which are:
768  *      	DDI_SUCCESS
769  *      	DDI_FAILURE
770  *      	DDI_ME_RNUMBER_RNGE
771  *      	DDI_REGS_ACC_CONFLICT
772  *
773  *  Caller's context
774  *      hermon_regs_map_setup() can be called in user or kernel context only.
775  */
776 int
hermon_regs_map_setup(hermon_state_t * state,uint_t rnumber,caddr_t * addrp,offset_t offset,offset_t len,ddi_device_acc_attr_t * accattrp,ddi_acc_handle_t * handle)777 hermon_regs_map_setup(hermon_state_t *state, uint_t rnumber, caddr_t *addrp,
778 	offset_t offset, offset_t len, ddi_device_acc_attr_t *accattrp,
779 	ddi_acc_handle_t *handle)
780 {
781 	if (state->hs_fm_disable) {
782 		return (ddi_regs_map_setup(state->hs_dip, rnumber, addrp,
783 		    offset, len, accattrp, handle));
784 	} else {
785 		return (i_hca_regs_map_setup(state->hs_fm_hca_fm, state->hs_dip,
786 		    rnumber, addrp, offset, len, accattrp, handle));
787 	}
788 }
789 
790 
791 /*
792  *  void
793  *  hermon_regs_map_free(hermon_state_t *state, ddi_acc_handle_t *handlep)
794  *
795  *  Overview
796  *      This is a wrapper function of i_hca_regs_map_free() for Hermon FM so
797  *      that it calls i_hca_regs_map_free() inside after it checks the
798  *      "fm_disable" configuration property. If the "fm_disable" is described
799  *      in /kernel/drv/hermon.conf, the function calls ddi_regs_map_fre()
800  *      directly instead.  See i_hca_regs_map_free() in detail.
801  *
802  *  Argument
803  *      state: pointer to Hermon state structure
804  *	handle: pointer to ddi_acc_handle_t used for HCA FM
805  *
806  *  Return value
807  *      Nothing
808  *
809  *  Caller's context
810  *      hermon_regs_map_free() can be called in user or kernel context only.
811  *
812  *  Note that the handle passed to hermon_regs_map_free() is NULL-cleared
813  *  after this function is called.
814  */
815 void
hermon_regs_map_free(hermon_state_t * state,ddi_acc_handle_t * handle)816 hermon_regs_map_free(hermon_state_t *state, ddi_acc_handle_t *handle)
817 {
818 	if (state->hs_fm_disable) {
819 		ddi_regs_map_free(handle);
820 		*handle = NULL;
821 	} else {
822 		i_hca_regs_map_free(state->hs_fm_hca_fm, handle);
823 	}
824 }
825 
826 
827 /*
828  *  int
829  *  hermon_pci_config_setup(hermon_state_t *state, ddi_acc_handle_t *handle)
830  *
831  *  Overview
832  *      This is a wrapper function of i_hca_pci_config_setup() for Hermon FM so
833  *      that it calls i_hca_pci_config_setup() inside after it checks the
834  *      "fm-disable" configuration property. If the "fm_disable" is described
835  *      in /kernel/drv/hermon.conf, the function calls pci_config_setup()
836  *      directly instead. See i_hca_pci_config_setup() in detail.
837  *
838  *  Argument
839  *      state: pointer to Hermon state structure
840  *	handle: pointer to ddi_acc_handle_t used for HCA FM
841  *
842  *  Return value
843  *      ddi function status value which are:
844  *      	DDI_SUCCESS
845  *      	DDI_FAILURE
846  *
847  *  Caller's context
848  *      hermon_pci_config_setup() can be called in user or kernel context only.
849  */
850 int
hermon_pci_config_setup(hermon_state_t * state,ddi_acc_handle_t * handle)851 hermon_pci_config_setup(hermon_state_t *state, ddi_acc_handle_t *handle)
852 {
853 	if (state->hs_fm_disable) {
854 		return (pci_config_setup(state->hs_dip, handle));
855 	} else {
856 		/* Check Hermon FM and Solaris FMA capability flags */
857 		ASSERT((hermon_get_state(state) & HCA_PIO_FM &&
858 		    DDI_FM_ACC_ERR_CAP(ddi_fm_capable(state->hs_dip))) ||
859 		    (!(hermon_get_state(state) & HCA_PIO_FM) &&
860 		    !DDI_FM_ACC_ERR_CAP(ddi_fm_capable(state->hs_dip))));
861 		return (i_hca_pci_config_setup(state->hs_fm_hca_fm,
862 		    state->hs_dip, handle));
863 	}
864 }
865 
866 
867 /*
868  *  void
869  *  hermon_pci_config_teardown(hermon_state_t *state, ddi_acc_handle_t *handle)
870  *
871  *  Overview
872  *      This is a wrapper function of i_hca_pci_config_teardown() for Hermon
873  *      FM so that it calls i_hca_pci_config_teardown() inside after it checks
874  *      the "fm-disable" configuration property. If the "fm_disable" is
875  *      described in /kernel/drv/hermon.conf, the function calls
876  *      pci_config_teardown() directly instead.
877  *      See i_hca_pci_config_teardown() in detail.
878  *
879  *  Argument
880  *      state: pointer to Hermon state structure
881  *	handle: pointer to ddi_acc_handle_t used for HCA FM
882  *
883  *  Return value
884  *      Nothing
885  *
886  *  Caller's context
887  *      hermon_pci_config_teardown() can be called in user or kernel context
888  *      only.
889  */
890 void
hermon_pci_config_teardown(hermon_state_t * state,ddi_acc_handle_t * handle)891 hermon_pci_config_teardown(hermon_state_t *state, ddi_acc_handle_t *handle)
892 {
893 	if (state->hs_fm_disable) {
894 		pci_config_teardown(handle);
895 		*handle = NULL;
896 	} else {
897 		i_hca_pci_config_teardown(state->hs_fm_hca_fm, handle);
898 	}
899 }
900 
901 
902 /*
903  *  boolean_t
904  *  hermon_init_failure(hermon_state_t *state)
905  *
906  *  Overview
907  *      hermon_init_failure() tells if HW errors are detected in
908  *      the Hermon driver attach.
909  *
910  *  Argument
911  *      state: pointer to Hermon state structure
912  *
913  *  Return value
914  *  	B_TRUE		HW errors detected during attach
915  *  	B_FALSE		No HW errors during attach
916  *
917  *  Caller's context
918  *      hermon_init_failure() can be called in user, kernel, interrupt
919  *      context or high interrupt context.
920  */
921 boolean_t
hermon_init_failure(hermon_state_t * state)922 hermon_init_failure(hermon_state_t *state)
923 {
924 	ddi_acc_handle_t hdl;
925 	ddi_fm_error_t derr;
926 
927 	if (!(hermon_get_state(state) & HCA_PIO_FM))
928 		return (B_FALSE);
929 
930 	/* check if fatal errors occur during attach */
931 	if (state->hs_fm_async_fatal)
932 		return (B_TRUE);
933 
934 	hdl = hermon_get_uarhdl(state);
935 	/* Get the PIO error against UAR I/O space */
936 	ddi_fm_acc_err_get(hdl, &derr, DDI_FME_VERSION);
937 	if (derr.fme_status != DDI_FM_OK) {
938 		return (B_TRUE);
939 	}
940 
941 	hdl = hermon_get_cmdhdl(state);
942 	/* Get the PIO error againsts CMD I/O space */
943 	ddi_fm_acc_err_get(hdl, &derr, DDI_FME_VERSION);
944 	if (derr.fme_status != DDI_FM_OK) {
945 		return (B_TRUE);
946 	}
947 
948 	return (B_FALSE);
949 }
950 
951 
952 /*
953  *  void
954  *  hermon_fm_ereport(hermon_state_t *state, int type, int detail)
955  *
956  *  Overview
957  *      hermon_fm_ereport() is a Hermon FM ereport function used
958  *      to issue a Solaris FMA ereport. See Hermon FM comments at the
959  *      beginning of this file in detail.
960  *
961  *  Argument
962  *      state: pointer to Hermon state structure
963  *      type: error type
964  *		HCA_SYS_ERR	FMA reporting HW error
965  *		HCA_IBA_ERR	HCA specific HW error
966  *      detail: HW error hint implying which ereport is issued
967  * 		HCA_ERR_TRANSIENT	HW transienet error
968  * 		HCA_ERR_NON_FATAL	HW persistent error
969  * 		HCA_ERR_FATAL		HW fatal error
970  * 		HCA_ERR_SRV_LOST	IB service lost due to HW error
971  * 		HCA_ERR_DEGRADED	Hermon driver and/or uDAPL degraded
972  * 					due to HW error
973  * 		HCA_ERR_IOCTL		HW error detected in user conetxt
974  * 					(especially in ioctl())
975  *
976  *  Return value
977  *      Nothing
978  *
979  *  Caller's context
980  *      hermon_fm_ereport() can be called in user, kernel, interrupt context
981  *      or high interrupt context.
982  */
983 void
hermon_fm_ereport(hermon_state_t * state,int type,int detail)984 hermon_fm_ereport(hermon_state_t *state, int type, int detail)
985 {
986 	/*
987 	 * If hermon_fm_diable is set or there is no FM ereport service
988 	 * provided, then skip the rest.
989 	 */
990 	if (state->hs_fm_disable ||
991 	    !(hermon_get_state(state) & HCA_EREPORT_FM)) {
992 		return;
993 	}
994 
995 	switch (type) {
996 
997 	case HCA_SYS_ERR:
998 		switch (detail) {
999 		case HCA_ERR_TRANSIENT:
1000 		case HCA_ERR_IOCTL:
1001 			ddi_fm_service_impact(state->hs_dip,
1002 			    DDI_SERVICE_UNAFFECTED);
1003 			break;
1004 		case HCA_ERR_NON_FATAL:
1005 			/* Nothing */
1006 			break;
1007 		case HCA_ERR_SRV_LOST:
1008 			ddi_fm_service_impact(state->hs_dip,
1009 			    DDI_SERVICE_LOST);
1010 			break;
1011 		case HCA_ERR_DEGRADED:
1012 			switch (state->hs_fm_degraded_reason) {
1013 			case HCA_FW_CORRUPT:
1014 				i_hca_fm_ereport(state->hs_dip, type,
1015 				    DDI_FM_DEVICE_FW_CORRUPT);
1016 				break;
1017 			case HCA_FW_MISMATCH:
1018 				i_hca_fm_ereport(state->hs_dip, type,
1019 				    DDI_FM_DEVICE_FW_MISMATCH);
1020 				break;
1021 			case HCA_FW_MISC:
1022 			default:
1023 				i_hca_fm_ereport(state->hs_dip, type,
1024 				    DDI_FM_DEVICE_INTERN_UNCORR);
1025 				break;
1026 			}
1027 			ddi_fm_service_impact(state->hs_dip,
1028 			    DDI_SERVICE_DEGRADED);
1029 			break;
1030 		case HCA_ERR_FATAL:
1031 			ddi_fm_service_impact(state->hs_dip,
1032 			    DDI_SERVICE_LOST);
1033 			state->hs_fm_async_fatal = B_TRUE;
1034 			break;
1035 		default:
1036 			cmn_err(CE_WARN, "hermon_fm_ereport: Unknown error. "
1037 			    "type = %d, detail = %d\n.", type, detail);
1038 		}
1039 		break;
1040 
1041 	case HCA_IBA_ERR:
1042 		switch (detail) {
1043 		case HCA_ERR_TRANSIENT:
1044 			i_hca_fm_ereport(state->hs_dip, type,
1045 			    DDI_FM_DEVICE_INTERN_UNCORR);
1046 			ddi_fm_service_impact(state->hs_dip,
1047 			    DDI_SERVICE_UNAFFECTED);
1048 			break;
1049 		case HCA_ERR_SRV_LOST:
1050 			cmn_err(CE_WARN, "hermon_fm_ereport: not supported "
1051 			    "error. type = %d, detail = %d\n.", type, detail);
1052 			break;
1053 		case HCA_ERR_DEGRADED:
1054 			switch (state->hs_fm_degraded_reason) {
1055 			case HCA_FW_CORRUPT:
1056 				i_hca_fm_ereport(state->hs_dip, type,
1057 				    DDI_FM_DEVICE_FW_CORRUPT);
1058 				break;
1059 			case HCA_FW_MISMATCH:
1060 				i_hca_fm_ereport(state->hs_dip, type,
1061 				    DDI_FM_DEVICE_FW_MISMATCH);
1062 				break;
1063 			case HCA_FW_MISC:
1064 			default:
1065 				i_hca_fm_ereport(state->hs_dip, type,
1066 				    DDI_FM_DEVICE_INTERN_UNCORR);
1067 				break;
1068 			}
1069 			ddi_fm_service_impact(state->hs_dip,
1070 			    DDI_SERVICE_DEGRADED);
1071 			break;
1072 		case HCA_ERR_IOCTL:
1073 		case HCA_ERR_NON_FATAL:
1074 			i_hca_fm_ereport(state->hs_dip, type,
1075 			    DDI_FM_DEVICE_INTERN_UNCORR);
1076 			ddi_fm_service_impact(state->hs_dip,
1077 			    DDI_SERVICE_UNAFFECTED);
1078 			break;
1079 		case HCA_ERR_FATAL:
1080 			if (hermon_get_state(state) & HCA_PIO_FM) {
1081 				if (servicing_interrupt()) {
1082 					atomic_inc_32(&state->
1083 					    hs_fm_async_errcnt);
1084 				} else {
1085 					i_hca_fm_ereport(state->hs_dip, type,
1086 					    DDI_FM_DEVICE_INTERN_UNCORR);
1087 					ddi_fm_service_impact(state->hs_dip,
1088 					    DDI_SERVICE_LOST);
1089 				}
1090 				state->hs_fm_async_fatal = B_TRUE;
1091 			} else {
1092 				i_hca_fm_ereport(state->hs_dip, type,
1093 				    DDI_FM_DEVICE_INTERN_UNCORR);
1094 				ddi_fm_service_impact(state->hs_dip,
1095 				    DDI_SERVICE_LOST);
1096 				cmn_err(CE_PANIC,
1097 				    "Hermon Fatal Internal Error. "
1098 				    "Hermon state=0x%p", (void *)state);
1099 			}
1100 			break;
1101 		default:
1102 			cmn_err(CE_WARN, "hermon_fm_ereport: Unknown error. "
1103 			    "type = %d, detail = %d\n.", type, detail);
1104 		}
1105 		break;
1106 
1107 	default:
1108 		cmn_err(CE_WARN, "hermon_fm_ereport: Unknown type "
1109 		    "type = %d, detail = %d\n.", type, detail);
1110 		break;
1111 	}
1112 }
1113 
1114 
1115 /*
1116  *  uchar_t
1117  *  hermon_devacc_attr_version(hermon_state_t *)
1118  *
1119  *  Overview
1120  *      hermon_devacc_attr_version() returns the ddi device attribute
1121  *      version.
1122  *
1123  *  Argument
1124  *      state: pointer to Hermon state structure
1125  *
1126  *  Return value
1127  *      dev_acc_attr_version value
1128  *      	DDI_DEVICE_ATTR_V0	Hermon FM disabled
1129  *      	DDI_DEVICE_ATTR_V1	Hermon FM enabled
1130  *
1131  *  Caller's context
1132  *      hermon_devacc_attr_version() can be called in user, kernel, interrupt
1133  *      context or high interrupt context.
1134  */
1135 ushort_t
hermon_devacc_attr_version(hermon_state_t * state)1136 hermon_devacc_attr_version(hermon_state_t *state)
1137 {
1138 	if (state->hs_fm_disable) {
1139 		return (DDI_DEVICE_ATTR_V0);
1140 	} else {
1141 		return (DDI_DEVICE_ATTR_V1);
1142 	}
1143 }
1144 
1145 
1146 /*
1147  *  uchar_t
1148  *  hermon_devacc_attr_access(hermon_state_t *)
1149  *
1150  *  Overview
1151  *      hermon_devacc_attr_access() returns devacc_attr_access error
1152  *      protection types.
1153  *
1154  *  Argument
1155  *      state: pointer to Hermon state structure
1156  *
1157  *  Return value
1158  *      dev_acc_attr_access error protection type
1159  *      	DDI_DEFAULT_ACC		Hermon FM disabled for PIO
1160  *      	DDI_FLAGERR_ACC		Hermon FM enabled for PIO
1161  *
1162  *  Caller's context
1163  *      hermon_devacc_attr_access() can be called in user, kernel, interrupt
1164  *      context or high interrupt context.
1165  */
1166 uchar_t
hermon_devacc_attr_access(hermon_state_t * state)1167 hermon_devacc_attr_access(hermon_state_t *state)
1168 {
1169 	if (state->hs_fm_disable) {
1170 		return (DDI_DEFAULT_ACC);
1171 	} else {
1172 		return (DDI_FLAGERR_ACC);
1173 	}
1174 }
1175 
1176 
1177 /*
1178  *  int
1179  *  hermon_PIO_start(hermon_state_t *state, ddi_acc_handle_t handle,
1180  *      hermon_test_t *tst)
1181  *
1182  *  Overview
1183  *      hermon_PIO_start() should be called before Hermon driver issues PIOs
1184  *      against I/O space. If Hermon FM is disabled, this function returns
1185  *      HCA_PIO_OK always. See i_hca_pio_start() in detail.
1186  *
1187  *  Argument
1188  *      state: pointer to Hermon state structure
1189  *	handle: pointer to ddi_acc_handle_t used for HCA FM
1190  *      tst: pointer to HCA FM function test structure. If the structure
1191  *           is not used, the NULL value must be passed instead.
1192  *
1193  *  Return value
1194  *  	error status showing whether or not this error can retry
1195  *	HCA_PIO_OK		No HW errors
1196  *	HCA_PIO_TRANSIENT	This error could be transient
1197  *	HCA_PIO_PERSISTENT	This error is persistent
1198  *
1199  *  Caller's context
1200  *      hermon_PIO_start() can be called in user, kernel or interrupt context.
1201  */
1202 int
hermon_PIO_start(hermon_state_t * state,ddi_acc_handle_t handle,hermon_test_t * tst)1203 hermon_PIO_start(hermon_state_t *state, ddi_acc_handle_t handle,
1204     hermon_test_t *tst)
1205 {
1206 	if (state->hs_fm_disable) {
1207 		return (HCA_PIO_OK);
1208 	} else {
1209 		struct i_hca_acc_handle *handlep =
1210 		    i_hca_get_acc_handle(state->hs_fm_hca_fm, handle);
1211 		ASSERT(handlep != NULL);
1212 		return (i_hca_pio_start(state->hs_dip, handlep, tst));
1213 	}
1214 }
1215 
1216 
1217 /*
1218  *  int
1219  *  hermon_PIO_end(hermon_state_t *state, ddi_acc_handle_t handle, int *cnt,
1220  *      hermon_test_t *tst)
1221  *
1222  *  Overview
1223  *      hermon_PIO_end() should be called after Hermon driver issues PIOs
1224  *      against I/O space. If Hermon FM is disabled, this function returns
1225  *      HCA_PIO_OK always. See i_hca_pio_end() in detail.
1226  *
1227  *  Argument
1228  *      state: pointer to Hermon state structure
1229  *	handle: pointer to ddi_acc_handle_t used for HCA FM
1230  *	cnt: pointer to the counter variable which holds the nubmer of retry
1231  *	     (HCA_PIO_RETRY_CNT) when a HW error is detected.
1232  *      tst: pointer to HCA FM function test structure. If the structure
1233  *           is not used, the NULL value must be passed instead.
1234  *
1235  *  Return value
1236  *  	error status showing whether or not this error can retry
1237  *	HCA_PIO_OK		No HW errors
1238  *	HCA_PIO_TRANSIENT	This error could be transient
1239  *	HCA_PIO_PERSISTENT	This error is persistent
1240  *
1241  *  Caller's context
1242  *      hermon_PIO_end() can be called in user, kernel or interrupt context.
1243  */
1244 int
hermon_PIO_end(hermon_state_t * state,ddi_acc_handle_t handle,int * cnt,hermon_test_t * tst)1245 hermon_PIO_end(hermon_state_t *state, ddi_acc_handle_t handle, int *cnt,
1246     hermon_test_t *tst)
1247 {
1248 	if (state->hs_fm_disable) {
1249 		return (HCA_PIO_OK);
1250 	} else {
1251 		struct i_hca_acc_handle *handlep =
1252 		    i_hca_get_acc_handle(state->hs_fm_hca_fm, handle);
1253 		ASSERT(handlep != NULL);
1254 		return (i_hca_pio_end(state->hs_dip, handlep, cnt, tst));
1255 	}
1256 }
1257 
1258 
1259 /*
1260  *  ddi_acc_handle_t
1261  *  hermon_get_cmdhdl(hermon_state_t *state)
1262  *
1263  *  Overview
1264  *      hermon_get_cmdhdl() returns either the fma-protected access handle or
1265  *      the regular ddi-access handle depending on the Hermon FM state for
1266  *      Hermon command I/O space.
1267  *
1268  *  Argument
1269  *      state: pointer to Hermon state structure
1270  *
1271  *  Return value
1272  *  	the access handle for pio requests
1273  *
1274  *  Caller's context
1275  *      hermon_get_cmdhdl() can be called in user, kernel, interrupt context
1276  *      or high interrupt context.
1277  */
1278 ddi_acc_handle_t
hermon_get_cmdhdl(hermon_state_t * state)1279 hermon_get_cmdhdl(hermon_state_t *state)
1280 {
1281 	return (state->hs_fm_disable || hermon_get_state(state) & HCA_PIO_FM ?
1282 	    state->hs_fm_cmdhdl : state->hs_reg_cmdhdl);
1283 }
1284 
1285 
1286 /*
1287  *  ddi_acc_handle_t
1288  *  hermon_get_uarhdl(hermon_state_t *state)
1289  *
1290  *  Overview
1291  *      hermon_get_uarhdl() returns either the fma-protected access handle or
1292  *      the regular ddi-access handle depending on the Hermon FM state for
1293  *      Hermon UAR I/O space.
1294  *
1295  *  Argument
1296  *      state: pointer to Hermon state structure
1297  *
1298  *  Return value
1299  *  	the access handle for pio requests
1300  *
1301  *  Caller's context
1302  *      hermon_get_uarhdl() can be called in user, kernel, interrupt context
1303  *      or high interrupt context.
1304  */
1305 ddi_acc_handle_t
hermon_get_uarhdl(hermon_state_t * state)1306 hermon_get_uarhdl(hermon_state_t *state)
1307 {
1308 	return (state->hs_fm_disable || hermon_get_state(state) & HCA_PIO_FM ?
1309 	    state->hs_fm_uarhdl : state->hs_reg_uarhdl);
1310 }
1311 
1312 
1313 /*
1314  *  ddi_acc_handle_t
1315  *  hermon_rsrc_alloc_uarhdl(hermon_state_t *state)
1316  *
1317  *  Overview
1318  *      hermon_rsrc_alloc_uarhdl() returns either the fma-protected access
1319  *      handle or the regular ddi-access handle depending on the Hermon FM
1320  *      state for Hermon UAR I/O space as well as hermon_get_uarhdl(), but
1321  *      this function is dedicated to the UAR resource allocator.
1322  *
1323  *  Argument
1324  *      state: pointer to Hermon state structure
1325  *
1326  *  Return value
1327  *  	the access handle for pio requests
1328  *
1329  *  Caller's context
1330  *      hermon_rsrc_alloc_uarhdl() can be called in user, kernel, interrupt
1331  *      or high interrupt context.
1332  */
1333 ddi_acc_handle_t
hermon_rsrc_alloc_uarhdl(hermon_state_t * state)1334 hermon_rsrc_alloc_uarhdl(hermon_state_t *state)
1335 {
1336 	return (state->hs_fm_disable || hermon_get_state(state) & HCA_ATTCH_FM ?
1337 	    state->hs_fm_uarhdl : state->hs_reg_uarhdl);
1338 }
1339 
1340 /*
1341  *  ddi_acc_handle_t
1342  *  hermon_get_pcihdl(hermon_state_t *state)
1343  *
1344  *  Overview
1345  *      hermon_get_pcihdl() returns either the fma-protected access
1346  *      handle or the regular ddi-access handle to access the PCI config
1347  *      space. Whether or not which handle is returned at the moment depends
1348  *      on the Hermon FM state.
1349  *
1350  *  Argument
1351  *      state: pointer to Hermon state structure
1352  *
1353  *  Return value
1354  *  	the access handle to PCI config space
1355  *
1356  *  Caller's context
1357  *      hermon_get_pcihdl() can be called in user, kernel, interrupt
1358  *      or high interrupt context.
1359  */
1360 ddi_acc_handle_t
hermon_get_pcihdl(hermon_state_t * state)1361 hermon_get_pcihdl(hermon_state_t *state)
1362 {
1363 	return (state->hs_fm_disable || hermon_get_state(state) & HCA_ATTCH_FM ?
1364 	    state->hs_fm_pcihdl : state->hs_reg_pcihdl);
1365 }
1366 
1367 
1368 /*
1369  *  ddi_acc_handle_t
1370  *  hermon_get_msix_tblhdl(hermon_state_t *state)
1371  *
1372  *  Overview
1373  *      hermon_get_msix_tblhdl() returns either the fma-protected access
1374  *      handle or the regular ddi-access handle to access the MSI-X tables.
1375  *      Whether or not which handle is returned at the moment depends on
1376  *      the Hermon FM state.
1377  *
1378  *  Argument
1379  *      state: pointer to Hermon state structure
1380  *
1381  *  Return value
1382  *  	the access handle to MSI-X tables
1383  *
1384  *  Caller's context
1385  *      hermon_get_msix_tblhdl() can be called in user, kernel, interrupt
1386  *      context or high interrupt context.
1387  */
1388 ddi_acc_handle_t
hermon_get_msix_tblhdl(hermon_state_t * state)1389 hermon_get_msix_tblhdl(hermon_state_t *state)
1390 {
1391 	return (state->hs_fm_disable || hermon_get_state(state) & HCA_ATTCH_FM ?
1392 	    state->hs_fm_msix_tblhdl : state->hs_reg_msix_tblhdl);
1393 }
1394 
1395 
1396 /*
1397  *  ddi_acc_handle_t
1398  *  hermon_get_msix_pbahdl(hermon_state_t *state)
1399  *
1400  *  Overview
1401  *      hermon_get_msix_pbahdl() returns either the fma-protected access
1402  *      handle or the regular ddi-access handle to access the MSI-X PBA.
1403  *      Whether or not which handle is returned at the moment depends on
1404  *      the Hermon FM state.
1405  *
1406  *  Argument
1407  *      state: pointer to Hermon state structure
1408  *
1409  *  Return value
1410  *  	the access handle to MSI-X PBA
1411  *
1412  *  Caller's context
1413  *      hermon_get_msix_pbahdl() can be called in user, kernel, interrupt
1414  *      context or high interrupt context.
1415  */
1416 ddi_acc_handle_t
hermon_get_msix_pbahdl(hermon_state_t * state)1417 hermon_get_msix_pbahdl(hermon_state_t *state)
1418 {
1419 	return (state->hs_fm_disable || hermon_get_state(state) & HCA_ATTCH_FM ?
1420 	    state->hs_fm_msix_pbahdl : state->hs_reg_msix_pbahdl);
1421 }
1422 
1423 
1424 /*
1425  *  void
1426  *  hermon_inter_err_chk(void *arg)
1427  *
1428  *  Overview
1429  *      hermon_inter_err_chk() periodically checks the internal error buffer
1430  *      to pick up a Hermon asynchronous internal error.
1431  *
1432  *      Note that this internal error can be notified if the interrupt is
1433  *      registered, but even so there are some cases that an interrupt against
1434  *      it cannot be raised so that Hermon RPM recommeds to poll this internal
1435  *      error buffer periodically instead. This function is invoked at
1436  *      10ms interval in kernel context though the function itself can be
1437  *      called in interrupt context.
1438  *
1439  *  Argument
1440  *      arg: pointer to Hermon state structure
1441  *
1442  *  Return value
1443  *  	Nothing
1444  *
1445  *  Caller's context
1446  *      hermon_inter_err_chk() can be called in user, kernel, interrupt
1447  *      context or high interrupt context.
1448  *
1449  */
1450 void
hermon_inter_err_chk(void * arg)1451 hermon_inter_err_chk(void *arg)
1452 {
1453 	uint32_t	word;
1454 	ddi_acc_handle_t cmdhdl;
1455 	hermon_state_t *state = (hermon_state_t *)arg;
1456 
1457 	/* initialize the FMA retry loop */
1458 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
1459 
1460 #ifdef FMA_TEST
1461 	if (hermon_test_num != 0) {
1462 		return;
1463 	}
1464 #endif
1465 	if (state->hs_fm_poll_suspend) {
1466 		return;
1467 	}
1468 
1469 	/* Get the access handle for Hermon CMD I/O space */
1470 	cmdhdl = hermon_get_cmdhdl(state);
1471 
1472 	/* the FMA retry loop starts. */
1473 	hermon_pio_start(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
1474 	    fm_test);
1475 
1476 	word = ddi_get32(cmdhdl, state->hs_cmd_regs.fw_err_buf);
1477 
1478 	/* the FMA retry loop ends. */
1479 	hermon_pio_end(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
1480 	    fm_test);
1481 
1482 	if (word != 0) {
1483 		HERMON_FMANOTE(state, HERMON_FMA_INTERNAL);
1484 		/* if fm_disable is on, Hermon FM functions don't work */
1485 		if (state->hs_fm_disable) {
1486 			cmn_err(CE_PANIC,
1487 			    "Hermon Fatal Internal Error. "
1488 			    "Hermon state=0x%p", (void *)state);
1489 		} else {
1490 			hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_FATAL);
1491 		}
1492 	}
1493 
1494 	/* issue the ereport pended in the interrupt context */
1495 	if (state->hs_fm_async_errcnt > 0) {
1496 		hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_FATAL);
1497 		atomic_dec_32(&state->hs_fm_async_errcnt);
1498 	}
1499 
1500 	return;
1501 
1502 pio_error:
1503 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_FATAL);
1504 }
1505 
1506 
1507 /*
1508  *  boolean_t
1509  *  hermon_cmd_retry_ok(hermon_cmd_post_t *cmd, int status)
1510  *
1511  *  Overview
1512  *  	In the case that a HW error is detected, if it can be isolated
1513  *  	enough, Hermon FM retries the operation which caused the error.
1514  *  	However, this retry can induce another error; since the retry is
1515  *  	achieved as a block basis, not a statement basis, once the state
1516  *  	was set inside the Hermon HW already in the previous operation, the
1517  *  	retry can cause for example, a CMD_BAD_SYS_STATE error, as a result.
1518  *  	In this case, CMD_BAD_SYS_STATE should be taken as a side effect
1519  *  	but a harmless result. hermon_cmd_retry_ok() checks this kind of
1520  *  	situation then returns if the state Hermon CMD returns is OK or not.
1521  *
1522  *  Argument
1523  *      cmd: pointer to hermon_cmd_post_t structure
1524  *      status: Hermon CMD status
1525  *
1526  *  Return value
1527  *  	B_TRUE		this state is no problem
1528  *  	B_FALSE		this state should be taken as an error
1529  *
1530  *  Caller's context
1531  *      hermon_cmd_retry_ok() can be called in user, kernel, interrupt
1532  *      context or high interrupt context.
1533  *
1534  *  Note that status except for HERMON_CMD_SUCCESS shouldn't be accepted
1535  *  in the debug module to catch a hidden software bug, so that ASSERT()
1536  *  is enabled in the case.
1537  */
1538 boolean_t
hermon_cmd_retry_ok(hermon_cmd_post_t * cmd,int status)1539 hermon_cmd_retry_ok(hermon_cmd_post_t *cmd, int status)
1540 {
1541 	if (status == HERMON_CMD_SUCCESS)
1542 		return (B_TRUE);
1543 
1544 	/*
1545 	 * The wrong status such as HERMON_CMD_BAD_SYS_STATE or
1546 	 * HERMON_CMD_BAD_RES_STATE can return as a side effect
1547 	 * because of the Hermon FM operation retry when a PIO
1548 	 * error is detected during the I/O transaction. In the
1549 	 * case, the driver may set the same value in Hermon
1550 	 * though it was set already, then Hermon returns HERMON_
1551 	 * CMD_BAD_{RES,SYS}_STATE as a result, which should be
1552 	 * taken as OK.
1553 	 */
1554 	switch (cmd->cp_opcode) {
1555 	case INIT_HCA:
1556 		/*
1557 		 * HERMON_CMD_BAD_SYS_STATE can be gotten in case of
1558 		 * ICM not mapped or HCA already initialized.
1559 		 */
1560 		if (status == HERMON_CMD_BAD_SYS_STATE)
1561 			return (B_TRUE);
1562 		return (B_FALSE);
1563 
1564 	case CLOSE_HCA:
1565 		/*
1566 		 * HERMON_CMD_BAD_SYS_STATE can be gotten in case of Firmware
1567 		 * area is not mapped or HCA already closed.
1568 		 */
1569 		if (status == HERMON_CMD_BAD_SYS_STATE)
1570 			return (B_TRUE);
1571 		return (B_FALSE);
1572 
1573 	case CLOSE_PORT:
1574 		/*
1575 		 * HERMON_CMD_BAD_SYS_STATE can be gotten in case of HCA not
1576 		 * initialized or in case that IB ports are already down.
1577 		 */
1578 		if (status == HERMON_CMD_BAD_SYS_STATE)
1579 			return (B_TRUE);
1580 		return (B_FALSE);
1581 
1582 	case SW2HW_MPT:
1583 		/*
1584 		 * HERMON_CMD_BAD_RES_STATE can be gotten in case of MPT
1585 		 * entry already in hardware ownership.
1586 		 */
1587 		if (status == HERMON_CMD_BAD_RES_STATE)
1588 			return (B_TRUE);
1589 		return (B_FALSE);
1590 
1591 	case HW2SW_MPT:
1592 		/*
1593 		 * HERMON_CMD_BAD_RES_STATE can be gotten in case of MPT
1594 		 * entry already in software ownership.
1595 		 */
1596 		if (status == HERMON_CMD_BAD_RES_STATE)
1597 			return (B_TRUE);
1598 		return (B_FALSE);
1599 
1600 	case SW2HW_EQ:
1601 		/*
1602 		 * HERMON_CMD_BAD_RES_STATE can be gotten in case of EQ
1603 		 * entry already in hardware ownership.
1604 		 */
1605 		if (status == HERMON_CMD_BAD_RES_STATE)
1606 			return (B_TRUE);
1607 		return (B_FALSE);
1608 
1609 	case HW2SW_EQ:
1610 		/*
1611 		 * HERMON_CMD_BAD_RES_STATE can be gotten in case of EQ
1612 		 * entry already in software ownership.
1613 		 */
1614 		if (status == HERMON_CMD_BAD_RES_STATE)
1615 			return (B_TRUE);
1616 		return (B_FALSE);
1617 
1618 	case SW2HW_CQ:
1619 		/*
1620 		 * HERMON_CMD_BAD_RES_STATE can be gotten in case of CQ
1621 		 * entry already in hardware ownership.
1622 		 */
1623 		if (status == HERMON_CMD_BAD_RES_STATE)
1624 			return (B_TRUE);
1625 		return (B_FALSE);
1626 
1627 	case HW2SW_CQ:
1628 		/*
1629 		 * HERMON_CMD_BAD_RES_STATE can be gotten in case of CQ
1630 		 * entry already in software ownership.
1631 		 */
1632 		if (status == HERMON_CMD_BAD_RES_STATE)
1633 			return (B_TRUE);
1634 		return (B_FALSE);
1635 
1636 	case SW2HW_SRQ:
1637 		/*
1638 		 * HERMON_CMD_BAD_RES_STATE can be gotten in case of SRQ
1639 		 * entry already in hardware ownership.
1640 		 */
1641 		if (status == HERMON_CMD_BAD_RES_STATE)
1642 			return (B_TRUE);
1643 		return (B_FALSE);
1644 
1645 	case HW2SW_SRQ:
1646 		/*
1647 		 * HERMON_CMD_BAD_RES_STATE can be gotten in case of SRQ
1648 		 * entry already in software ownership.
1649 		 */
1650 		if (status == HERMON_CMD_BAD_RES_STATE)
1651 			return (B_TRUE);
1652 		return (B_FALSE);
1653 	default:
1654 		break;
1655 	}
1656 
1657 	/* other cases */
1658 	return (B_FALSE);
1659 }
1660 
1661 
1662 #ifdef FMA_TEST
1663 
1664 /*
1665  * Hermon FMA test variables
1666  */
1667 #define	FMA_TEST_HASHSZ	64
1668 int hermon_test_num;			/* predefined testset */
1669 
1670 static struct i_hca_fm_test *i_hca_test_register(char *, int, int,
1671     void (*)(struct i_hca_fm_test *, ddi_fm_error_t *),
1672     void *, mod_hash_t *, mod_hash_t *, int);
1673 static void i_hca_test_free_item(mod_hash_val_t);
1674 static void i_hca_test_set_item(int, struct i_hca_fm_test *);
1675 static void hermon_trigger_pio_error(hermon_test_t *, ddi_fm_error_t *);
1676 
1677 /*
1678  * Hermon FMA Function Test Interface
1679  */
1680 
1681 /* Attach Errors */
1682 
1683 #define	ATTACH_TS	(HCA_TEST_TRANSIENT | HCA_TEST_ATTACH | HCA_TEST_START)
1684 #define	ATTACH_TE	(HCA_TEST_TRANSIENT | HCA_TEST_ATTACH | HCA_TEST_END)
1685 
1686 #define	ATTACH_PS	(HCA_TEST_PERSISTENT | HCA_TEST_ATTACH | HCA_TEST_START)
1687 #define	ATTACH_PE	(HCA_TEST_PERSISTENT | HCA_TEST_ATTACH | HCA_TEST_END)
1688 
1689 static hermon_test_t testset[] = {
1690 /* Initial Value */
1691 {0, 0, 0, NULL, 0, 0, NULL, NULL, NULL},	/* 0 */
1692 
1693 /* PIO Transient Errors */
1694 {0, HCA_TEST_PIO, ATTACH_TS, NULL, /* attach/transient/start/propagate */
1695     HCA_PIO_RETRY_CNT, 0, NULL, NULL, NULL},	/* 1 */
1696 {0, HCA_TEST_PIO, ATTACH_TE, NULL, /* attach/transient/end/propagate */
1697     HCA_PIO_RETRY_CNT, 0, NULL, NULL, NULL},	/* 2 */
1698 
1699 /* PIO Persistent Errors */
1700 {0, HCA_TEST_PIO, ATTACH_PS, NULL, /* attach/persistent/start/propagate */
1701     0, 0, NULL, NULL, NULL},			/* 3 */
1702 {0, HCA_TEST_PIO, ATTACH_PE, NULL, /* attach/persistent/end/propagate */
1703     0, 0, NULL, NULL, NULL},			/* 4 */
1704 
1705 };
1706 
1707 
1708 /*
1709  *  void
1710  *  hermon_trigger_pio_error(hermon_test_t *tst, ddi_fm_error_t *derr)
1711  *
1712  *  Overview
1713  *      hermon_trigger_pio_error() is a PIO error injection function
1714  *      to cause a pseduo PIO error.
1715  *
1716  *  Argument
1717  *      tst: pointer to HCA FM function test structure. If the structure
1718  *           is not used, the NULL value must be passed instead.
1719  *      derr: pointer to ddi_fm_error_t structure
1720  *
1721  *  Return value
1722  *      Nothing
1723  *
1724  *  Caller's context
1725  *      hermon_trigger_pio_error() can be called in user, kernel, interrupt
1726  *      context or high interrupt context.
1727  */
1728 static void
hermon_trigger_pio_error(hermon_test_t * tst,ddi_fm_error_t * derr)1729 hermon_trigger_pio_error(hermon_test_t *tst, ddi_fm_error_t *derr)
1730 {
1731 	hermon_state_t *state = (hermon_state_t *)tst->private;
1732 	derr->fme_status = DDI_FM_OK;
1733 
1734 	if (tst->type != HCA_TEST_PIO) {
1735 		return;
1736 	}
1737 
1738 	if ((tst->trigger & HCA_TEST_ATTACH &&
1739 	    i_ddi_node_state(state->hs_dip) < DS_ATTACHED &&
1740 	    hermon_get_state(state) & HCA_PIO_FM)) {
1741 		if (tst->trigger & HCA_TEST_PERSISTENT) {
1742 			i_hca_fm_ereport(state->hs_dip, HCA_IBA_ERR,
1743 			    DDI_FM_DEVICE_INVAL_STATE);
1744 			derr->fme_status = DDI_FM_NONFATAL;
1745 			return;
1746 		} else if (tst->trigger & HCA_TEST_TRANSIENT &&
1747 		    tst->errcnt) {
1748 			i_hca_fm_ereport(state->hs_dip, HCA_IBA_ERR,
1749 			    DDI_FM_DEVICE_INVAL_STATE);
1750 			derr->fme_status = DDI_FM_NONFATAL;
1751 			tst->errcnt--;
1752 			return;
1753 		}
1754 	}
1755 }
1756 
1757 
1758 /*
1759  *  struct hermon_fm_test *
1760  *  hermon_test_register(hermon_state_t *state, char *filename, int linenum,
1761  *      int type)
1762  *
1763  *  Overview
1764  *      hermon_test_register() registers a Hermon FM test item for the
1765  *      function test.
1766  *
1767  *  Argument
1768  *      state: pointer to Hermon state structure
1769  *  	filename: source file name where the function call is implemented
1770  *		  This value is usually a __FILE__  pre-defined macro.
1771  *  	linenum: line number where the function call is described in the
1772  *		 file specified above.
1773  *		 This value is usually a __LINE__ pre-defined macro.
1774  *	type: HW error type
1775  *			HCA_TEST_PIO	pio error
1776  *			HCA_TEST_IBA	ib specific error
1777  *
1778  *  Return value
1779  *      pointer to Hermon FM function test structure registered.
1780  *
1781  *  Caller's context
1782  *      hermon_test_register() can be called in user, kernel or interrupt
1783  *      context.
1784  *
1785  *  Note that no test item is registered if Hermon FM is disabled.
1786  */
1787 hermon_test_t *
hermon_test_register(hermon_state_t * state,char * filename,int linenum,int type)1788 hermon_test_register(hermon_state_t *state, char *filename, int linenum,
1789     int type)
1790 {
1791 	void (*pio_injection)(struct i_hca_fm_test *, ddi_fm_error_t *) =
1792 	    (void (*)(struct i_hca_fm_test *, ddi_fm_error_t *))
1793 	    hermon_trigger_pio_error;
1794 
1795 	if (state->hs_fm_disable)
1796 		return (NULL);
1797 
1798 	return ((hermon_test_t *)i_hca_test_register(filename, linenum, type,
1799 	    pio_injection, (void *)state, state->hs_fm_test_hash,
1800 	    state->hs_fm_id_hash, hermon_test_num));
1801 }
1802 #endif /* FMA_TEST */
1803 
1804 
1805 /*
1806  * HCA FM Common Interface
1807  *
1808  * These functions should be used for any HCA drivers, but probably
1809  * customized for their own HW design and/or FM implementation.
1810  * Customized functins should have the driver name prefix such as
1811  * hermon_xxxx() and be defined separately but whose functions should
1812  * call the common interface inside.
1813  */
1814 
1815 /*
1816  *  void
1817  *  i_hca_fm_init(struct i_hca_fm *hca_fm)
1818  *
1819  *  Overview
1820  *      i_hca_fm_init() is an initialization function which sets up the acc
1821  *      handle kmem_cache if this function is called the first time.
1822  *
1823  *  Argument
1824  *      hca_fm: pointer to HCA FM structure
1825  *
1826  *  Return value
1827  *      Nothing
1828  *
1829  *  Caller's context
1830  *      i_hca_fm_init() can be called in user or kernel context, but cannot
1831  *      be called in interrupt context.
1832  */
1833 static void
i_hca_fm_init(struct i_hca_fm * hca_fm)1834 i_hca_fm_init(struct i_hca_fm *hca_fm)
1835 {
1836 
1837 	mutex_enter(&hca_fm->lock);
1838 
1839 	++hca_fm->ref_cnt;
1840 	if (hca_fm->fm_acc_cache == NULL) {
1841 		hca_fm->fm_acc_cache = kmem_cache_create("hca_fm_acc_handle",
1842 		    sizeof (struct i_hca_acc_handle), 0, NULL,
1843 		    NULL, NULL, NULL, NULL, 0);
1844 	}
1845 
1846 	mutex_exit(&hca_fm->lock);
1847 }
1848 
1849 
1850 /*
1851  *  void
1852  *  i_hca_fm_fini(struct i_hca_fm *hca_fm)
1853  *
1854  *  Overview
1855  *      i_hca_fm_fini() is a finalization function which frees up the acc
1856  *      handle kmem_cache if this function is called the last time.
1857  *
1858  *  Argument
1859  *      hca_fm: pointer to HCA FM structure
1860  *
1861  *  Return value
1862  *      Nothing
1863  *
1864  *  Caller's context
1865  *      i_hca_fm_fini() can be called in user or kernel context, but cannot
1866  *      be called in interrupt context.
1867  */
1868 static void
i_hca_fm_fini(struct i_hca_fm * hca_fm)1869 i_hca_fm_fini(struct i_hca_fm *hca_fm)
1870 {
1871 	mutex_enter(&hca_fm->lock);
1872 
1873 	if (--hca_fm->ref_cnt == 0) {
1874 
1875 		if (hca_fm->fm_acc_cache) {
1876 			kmem_cache_destroy(hca_fm->fm_acc_cache);
1877 			hca_fm->fm_acc_cache = NULL;
1878 		}
1879 	}
1880 
1881 	mutex_exit(&hca_fm->lock);
1882 }
1883 
1884 
1885 /*
1886  *  void
1887  *  i_hca_fm_ereport(dev_info_t *dip, int type, char *detail)
1888  *
1889  *  Overview
1890  *      i_hca_fm_ereport() is a wrapper function of ddi_fm_ereport_post() but
1891  *      generates an ena before it calls ddi_fm_ereport_post() for HCA
1892  *      specific HW errors.
1893  *
1894  *  Argument
1895  *      dip: pointer to this device dev_info structure
1896  *      type: error type
1897  *		HCA_SYS_ERR	FMA reporting HW error
1898  *		HCA_IBA_ERR	HCA specific HW error
1899  *      detail: definition of leaf driver detected ereports which is one of:
1900  *      	DDI_FM_DEVICE_INVAL_STATE
1901  *		DDI_FM_DEVICE_NO_RESPONSE
1902  *		DDI_FM_DEVICE_STALL
1903  *		DDI_FM_DEVICE_BADINT_LIMIT
1904  *		DDI_FM_DEVICE_INTERN_CORR
1905  *		DDI_FM_DEVICE_INTERN_UNCORR
1906  *
1907  *  Return value
1908  *      Nothing
1909  *
1910  *  Caller's context
1911  *      i_hca_fm_ereport() can be called in user, kernel or interrupt context.
1912  */
1913 static void
i_hca_fm_ereport(dev_info_t * dip,int type,char * detail)1914 i_hca_fm_ereport(dev_info_t *dip, int type, char *detail)
1915 {
1916 	uint64_t ena;
1917 	char buf[FM_MAX_CLASS];
1918 
1919 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
1920 
1921 	ena = fm_ena_generate(0, FM_ENA_FMT1);
1922 	if (type == HCA_IBA_ERR) {
1923 		/* this is an error of its own */
1924 		ena = fm_ena_increment(ena);
1925 	}
1926 
1927 	ddi_fm_ereport_post(dip, buf, ena, DDI_NOSLEEP,
1928 	    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
1929 }
1930 
1931 
1932 /*
1933  * struct i_hca_acc_handle *
1934  * i_hca_get_acc_handle(struct i_hca_fm *hca_fm, ddi_acc_handle_t handle)
1935  *
1936  *  Overview
1937  *      i_hca_get_acc_handle() returns ddi_acc_handle_t used for HCA FM.
1938  *
1939  *  Argument
1940  *      hca_fm: pointer to HCA FM structure
1941  *      handle: ddi_acc_handle_t
1942  *
1943  *  Return value
1944  *	handle: pointer to ddi_acc_handle_t used for HCA FM
1945  *
1946  *  Caller's context
1947  *      i_hca_get_acc_handle() can be called in user, kernel or interrupt
1948  *      context.
1949  */
1950 static struct i_hca_acc_handle *
i_hca_get_acc_handle(struct i_hca_fm * hca_fm,ddi_acc_handle_t handle)1951 i_hca_get_acc_handle(struct i_hca_fm *hca_fm, ddi_acc_handle_t handle)
1952 {
1953 	struct i_hca_acc_handle *hdlp;
1954 
1955 	/* Retrieve the HCA FM access handle */
1956 	mutex_enter(&hca_fm->lock);
1957 
1958 	for (hdlp = hca_fm->hdl; hdlp != NULL; hdlp = hdlp->next) {
1959 		if (hdlp->save_hdl == handle) {
1960 			mutex_exit(&hca_fm->lock);
1961 			return (hdlp);
1962 		}
1963 	}
1964 
1965 	mutex_exit(&hca_fm->lock);
1966 	return (hdlp);
1967 }
1968 
1969 
1970 /*
1971  *  int
1972  *  i_hca_regs_map_setup(struct i_hca_fm *hca_fm, dev_info_t *dip,
1973  *      uint_t rnumber, caddr_t *addrp, offset_t offset, offset_t len,
1974  *      ddi_device_acc_attr_t *accattrp, ddi_acc_handle_t *handle)
1975  *
1976  *  Overview
1977  *      i_hca_regs_map_setup() is a wrapper function of ddi_regs_map_setup(),
1978  *      but allocates the HCA FM acc handle structure and initializes it.
1979  *
1980  *  Argument
1981  *      hca_fm: pointer to HCA FM structure
1982  *      dip: pointer to this device dev_info structure
1983  *      rnumber: index number to the register address space set
1984  *      addrp: platform-dependent value (same as ddi_regs_map_setup())
1985  *      offset: offset into the register address space
1986  *      len: address space length to be mapped
1987  *      accattrp: pointer to device access attribute structure
1988  *	handle: pointer to ddi_acc_handle_t used for HCA FM
1989  *
1990  *  Return value
1991  *      ddi function status value which are:
1992  *      	DDI_SUCCESS
1993  *      	DDI_FAILURE
1994  *      	DDI_ME_RNUMBER_RNGE
1995  *      	DDI_REGS_ACC_CONFLICT
1996  *
1997  *  Caller's context
1998  *      i_hca_regs_map_setup() can be called in user or kernel context only.
1999  */
2000 static int
i_hca_regs_map_setup(struct i_hca_fm * hca_fm,dev_info_t * dip,uint_t rnumber,caddr_t * addrp,offset_t offset,offset_t len,ddi_device_acc_attr_t * accattrp,ddi_acc_handle_t * handle)2001 i_hca_regs_map_setup(struct i_hca_fm *hca_fm, dev_info_t *dip, uint_t rnumber,
2002     caddr_t *addrp, offset_t offset, offset_t len,
2003     ddi_device_acc_attr_t *accattrp, ddi_acc_handle_t *handle)
2004 {
2005 	int status;
2006 	struct i_hca_acc_handle *handlep, *hdlp, *last;
2007 
2008 	/* Allocate an access handle */
2009 	if ((status = ddi_regs_map_setup(dip, rnumber, addrp, offset,
2010 	    len, accattrp, handle)) != DDI_SUCCESS) {
2011 		return (status);
2012 	}
2013 
2014 	/* Allocate HCA FM acc handle structure */
2015 	handlep = kmem_cache_alloc(hca_fm->fm_acc_cache, KM_SLEEP);
2016 
2017 	/* Initialize fields */
2018 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*handlep))
2019 	handlep->next = NULL;
2020 	handlep->save_hdl = (*handle);
2021 	handlep->thread_cnt = 0;
2022 	mutex_init(&handlep->lock, NULL, MUTEX_DRIVER, NULL);
2023 
2024 	/* Register this handle */
2025 	mutex_enter(&hca_fm->lock);
2026 	for (last = hdlp = hca_fm->hdl; hdlp != NULL; hdlp = hdlp->next) {
2027 		last = hdlp;
2028 	}
2029 	if (last == NULL) {
2030 		hca_fm->hdl = handlep;
2031 	} else {
2032 		last->next = handlep;
2033 	}
2034 	mutex_exit(&hca_fm->lock);
2035 
2036 	return (status);
2037 }
2038 
2039 
2040 /*
2041  *  void
2042  *  i_hca_regs_map_free(struct i_hca_fm *hca_fm, ddi_acc_handle_t *handlep)
2043  *
2044  *  Overview
2045  *      i_hca_regs_map_setup() is a wrapper function of ddi_regs_map_free(),
2046  *      and frees the HCA FM acc handle structure allocated by
2047  *      i_hca_regs_map_setup().
2048  *
2049  *  Argument
2050  *      hca_fm: pointer to HCA FM structure
2051  *	handle: pointer to ddi_acc_handle_t used for HCA FM
2052  *
2053  *  Return value
2054  *      Nothing
2055  *
2056  *  Caller's context
2057  *      i_hca_regs_map_free() can be called in user or kernel context only.
2058  *
2059  *  Note that the handle passed to i_hca_regs_map_free() is NULL-cleared
2060  *  after this function is called.
2061  */
2062 static void
i_hca_regs_map_free(struct i_hca_fm * hca_fm,ddi_acc_handle_t * handle)2063 i_hca_regs_map_free(struct i_hca_fm *hca_fm, ddi_acc_handle_t *handle)
2064 {
2065 	struct i_hca_acc_handle *handlep, *hdlp, *prev;
2066 
2067 	/* De-register this handle */
2068 	mutex_enter(&hca_fm->lock);
2069 	for (prev = hdlp = hca_fm->hdl; hdlp != NULL; hdlp = hdlp->next) {
2070 		if (hdlp->save_hdl == *handle)
2071 			break;
2072 		prev = hdlp;
2073 	}
2074 	ASSERT(prev != NULL && hdlp != NULL);
2075 	if (hdlp != prev) {
2076 		prev->next = hdlp->next;
2077 	} else {
2078 		hca_fm->hdl = hdlp->next;
2079 	}
2080 	handlep = hdlp;
2081 	mutex_exit(&hca_fm->lock);
2082 
2083 	mutex_destroy(&handlep->lock);
2084 	handlep->save_hdl = NULL;
2085 	kmem_cache_free(hca_fm->fm_acc_cache, handlep);
2086 
2087 	/* Release this handle */
2088 	ddi_regs_map_free(handle);
2089 	*handle = NULL;
2090 }
2091 
2092 
2093 /*
2094  *  int
2095  *  i_hca_pci_config_setup(struct i_hca_fm *hca_fm, dev_info_t *dip,
2096  *      ddi_acc_handle_t *handle, boolean_t fm_protect)
2097  *
2098  *  Overview
2099  *      i_hca_pci_config_setup() is a wrapper function of pci_config_setup(),
2100  *      but allocates the HCA FM acc handle structure and initializes it.
2101  *
2102  *  Argument
2103  *      hca_fm: pointer to HCA FM structure
2104  *      dip: pointer to this device dev_info structure
2105  *	handle: pointer to ddi_acc_handle_t used for HCA PCI config space
2106  *		with FMA
2107  *	fm_protect: flag to tell if an fma-protected access handle should
2108  *		be used
2109  *
2110  *  Return value
2111  *      ddi function status value which are:
2112  *      	DDI_SUCCESS
2113  *      	DDI_FAILURE
2114  *
2115  *  Caller's context
2116  *      i_hca_pci_config_setup() can be called in user or kernel context only.
2117  */
2118 static int
i_hca_pci_config_setup(struct i_hca_fm * hca_fm,dev_info_t * dip,ddi_acc_handle_t * handle)2119 i_hca_pci_config_setup(struct i_hca_fm *hca_fm, dev_info_t *dip,
2120     ddi_acc_handle_t *handle)
2121 {
2122 	int status;
2123 	struct i_hca_acc_handle *handlep, *hdlp, *last;
2124 
2125 	/* Allocate an access handle */
2126 	if ((status = pci_config_setup(dip, handle)) != DDI_SUCCESS) {
2127 		return (status);
2128 	}
2129 
2130 	/* Allocate HCA FM acc handle structure */
2131 	handlep = kmem_cache_alloc(hca_fm->fm_acc_cache, KM_SLEEP);
2132 
2133 	/* Initialize fields */
2134 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*handlep))
2135 	handlep->next = NULL;
2136 	handlep->save_hdl = (*handle);
2137 	handlep->thread_cnt = 0;
2138 	mutex_init(&handlep->lock, NULL, MUTEX_DRIVER, NULL);
2139 
2140 	/* Register this handle */
2141 	mutex_enter(&hca_fm->lock);
2142 	for (last = hdlp = hca_fm->hdl; hdlp != NULL; hdlp = hdlp->next) {
2143 		last = hdlp;
2144 	}
2145 	if (last == NULL) {
2146 		hca_fm->hdl = handlep;
2147 	} else {
2148 		last->next = handlep;
2149 	}
2150 	mutex_exit(&hca_fm->lock);
2151 
2152 	return (status);
2153 }
2154 
2155 
2156 /*
2157  *  void
2158  *  i_hca_pci_config_teardown(struct i_hca_fm *hca_fm,
2159  *      ddi_acc_handle_t *handlep)
2160  *
2161  *  Overview
2162  *      i_hca_pci_config_teardown() is a wrapper function of
2163  *      pci_config_teardown(), and frees the HCA FM acc handle structure
2164  *      allocated by i_hca_pci_config_setup().
2165  *
2166  *  Argument
2167  *      hca_fm: pointer to HCA FM structure
2168  *	handle: pointer to ddi_acc_handle_t used for HCA FM
2169  *
2170  *  Return value
2171  *      Nothing
2172  *
2173  *  Caller's context
2174  *      i_hca_pci_config_teardown() can be called in user or kernel context
2175  *      only.
2176  *
2177  *  Note that the handle passed to i_hca_pci_config_teardown() is NULL-cleared
2178  *  after this function is called.
2179  */
2180 static void
i_hca_pci_config_teardown(struct i_hca_fm * hca_fm,ddi_acc_handle_t * handle)2181 i_hca_pci_config_teardown(struct i_hca_fm *hca_fm, ddi_acc_handle_t *handle)
2182 {
2183 	struct i_hca_acc_handle *handlep, *hdlp, *prev;
2184 
2185 	/* De-register this handle */
2186 	mutex_enter(&hca_fm->lock);
2187 	for (prev = hdlp = hca_fm->hdl; hdlp != NULL; hdlp = hdlp->next) {
2188 		if (hdlp->save_hdl == *handle)
2189 			break;
2190 		prev = hdlp;
2191 	}
2192 	ASSERT(prev != NULL && hdlp != NULL);
2193 	if (hdlp != prev) {
2194 		prev->next = hdlp->next;
2195 	} else {
2196 		hca_fm->hdl = hdlp->next;
2197 	}
2198 	handlep = hdlp;
2199 	mutex_exit(&hca_fm->lock);
2200 
2201 	mutex_destroy(&handlep->lock);
2202 	handlep->save_hdl = NULL;
2203 	kmem_cache_free(hca_fm->fm_acc_cache, handlep);
2204 
2205 	/* Release this handle */
2206 	pci_config_teardown(handle);
2207 	*handle = NULL;
2208 }
2209 
2210 
2211 /*
2212  *  int
2213  *  i_hca_pio_start(dev_info_t *dip, struct i_acc_handle *handle,
2214  *      struct i_hca_fm_test *tst)
2215  *
2216  *  Overview
2217  *      i_hca_pio_start() is one of a pair of HCA FM fuctions for PIO, which
2218  *      should be called before HCA drivers issue PIOs against I/O space.
2219  *      See HCA FM comments at the beginning of this file in detail.
2220  *
2221  *  Argument
2222  *      dip: pointer to this device dev_info structure
2223  *	handle: pointer to ddi_acc_handle_t used for HCA FM
2224  *      tst: pointer to HCA FM function test structure. If the structure
2225  *           is not used, the NULL value must be passed instead.
2226  *
2227  *  Return value
2228  *  	error status showing whether or not this error can retry
2229  *	HCA_PIO_OK		No HW errors
2230  *	HCA_PIO_TRANSIENT	This error could be transient
2231  *	HCA_PIO_PERSISTENT	This error is persistent
2232  *
2233  *  Caller's context
2234  *      i_hca_pio_start() can be called in user, kernel or interrupt context.
2235  */
2236 /* ARGSUSED */
2237 static int
i_hca_pio_start(dev_info_t * dip,struct i_hca_acc_handle * hdlp,struct i_hca_fm_test * tst)2238 i_hca_pio_start(dev_info_t *dip, struct i_hca_acc_handle *hdlp,
2239     struct i_hca_fm_test *tst)
2240 {
2241 	ddi_fm_error_t derr;
2242 
2243 	/* Count up the number of threads issuing this PIO */
2244 	mutex_enter(&hdlp->lock);
2245 	hdlp->thread_cnt++;
2246 	mutex_exit(&hdlp->lock);
2247 
2248 	/* Get the PIO error via FMA */
2249 	ddi_fm_acc_err_get(fm_acc_hdl(hdlp), &derr, DDI_FME_VERSION);
2250 
2251 #ifdef FMA_TEST
2252 	/* Trigger PIO errors */
2253 	if (tst != NULL && tst->trigger & HCA_TEST_START) {
2254 		(*tst->pio_injection)(tst, &derr);
2255 	}
2256 #endif /* FMA_TEST */
2257 
2258 	switch (derr.fme_status) {
2259 	case DDI_FM_OK:
2260 		/* Not have to clear the fma error log */
2261 		return (HCA_PIO_OK);
2262 
2263 	case DDI_FM_NONFATAL:
2264 		/* Now clear this error */
2265 		ddi_fm_acc_err_clear(fm_acc_hdl(hdlp), DDI_FME_VERSION);
2266 
2267 		/* Log this error and notify it as a persistent error */
2268 		ddi_fm_service_impact(dip, DDI_SERVICE_LOST);
2269 		return (HCA_PIO_PERSISTENT);
2270 
2271 	/* In theory, this shouldn't happen */
2272 	case DDI_FM_FATAL:
2273 	case DDI_FM_UNKNOWN:
2274 	default:
2275 		cmn_err(CE_WARN, "Unknown HCA HW error status (%d)",
2276 		    derr.fme_status);
2277 		/* Return this as a persistent error */
2278 		return (HCA_PIO_PERSISTENT);
2279 	}
2280 }
2281 
2282 
2283 /*
2284  *  int
2285  *  i_hca_pio_end(dev_info_t *dip, ddi_acc_handle_t handle, int *cnt,
2286  *      struct i_hca_fm_test *tst)
2287  *
2288  *  Overview
2289  *      i_hca_pio_end() is the other of a pair of HCA FM fuctions for PIO,
2290  *      which should be called after HCA drivers issue PIOs against I/O space.
2291  *      See HCA FM comments at the beginning of this file in detail.
2292  *
2293  *  Argument
2294  *      dip: pointer to this device dev_info structure
2295  *	handle: pointer to ddi_acc_handle_t used for HCA FM
2296  *	cnt: pointer to the counter variable which holds the nubmer of retry
2297  *	     when a HW error is detected.
2298  *      tst: pointer to HCA FM function test structure. If the structure
2299  *           is not used, the NULL value must be passed instead.
2300  *
2301  *  Return value
2302  *  	error status showing whether or not this error can retry
2303  *	HCA_PIO_OK		No HW errors
2304  *	HCA_PIO_TRANSIENT	This error could be transient
2305  *	HCA_PIO_PERSISTENT	This error is persistent
2306  *
2307  *  Caller's context
2308  *      i_hca_pio_end() can be called in user, kernel or interrupt context.
2309  */
2310 /* ARGSUSED */
2311 static int
i_hca_pio_end(dev_info_t * dip,struct i_hca_acc_handle * hdlp,int * cnt,struct i_hca_fm_test * tst)2312 i_hca_pio_end(dev_info_t *dip, struct i_hca_acc_handle *hdlp, int *cnt,
2313     struct i_hca_fm_test *tst)
2314 {
2315 	ddi_fm_error_t derr;
2316 
2317 	/* Get the PIO error via FMA */
2318 	ddi_fm_acc_err_get(fm_acc_hdl(hdlp), &derr, DDI_FME_VERSION);
2319 
2320 #ifdef FMA_TEST
2321 	/* Trigger PIO errors */
2322 	if (tst != NULL && tst->trigger & HCA_TEST_END) {
2323 		(*tst->pio_injection)(tst, &derr);
2324 	}
2325 #endif /* FMA_TEST */
2326 
2327 	/* Evaluate the PIO error */
2328 	switch (derr.fme_status) {
2329 	case DDI_FM_OK:
2330 		/* Count down the number of threads issuing this PIO */
2331 		mutex_enter(&hdlp->lock);
2332 		hdlp->thread_cnt--;
2333 		mutex_exit(&hdlp->lock);
2334 
2335 		/* Not have to clear the fma error log */
2336 		return (HCA_PIO_OK);
2337 
2338 	case DDI_FM_NONFATAL:
2339 		/* Now clear this error */
2340 		ddi_fm_acc_err_clear(fm_acc_hdl(hdlp), DDI_FME_VERSION);
2341 
2342 		/*
2343 		 * Check if this error comes from another thread running
2344 		 * with the same handle almost at the same time.
2345 		 */
2346 		mutex_enter(&hdlp->lock);
2347 		if (hdlp->thread_cnt > 1) {
2348 			/* Count down the number of threads */
2349 			hdlp->thread_cnt--;
2350 			mutex_exit(&hdlp->lock);
2351 
2352 			/* Return this as a persistent error */
2353 			return (HCA_PIO_PERSISTENT);
2354 		}
2355 		mutex_exit(&hdlp->lock);
2356 
2357 		/* Now determine if this error is persistent or not */
2358 		if (--(*cnt) >= 0)  {
2359 			return (HCA_PIO_TRANSIENT);
2360 		} else {
2361 			/* Count down the number of threads */
2362 			mutex_enter(&hdlp->lock);
2363 			hdlp->thread_cnt--;
2364 			mutex_exit(&hdlp->lock);
2365 			return (HCA_PIO_PERSISTENT);
2366 		}
2367 
2368 	/* In theory, this shouldn't happen */
2369 	case DDI_FM_FATAL:
2370 	case DDI_FM_UNKNOWN:
2371 	default:
2372 		cmn_err(CE_WARN, "Unknown HCA HW error status (%d)",
2373 		    derr.fme_status);
2374 		/* Return this as a persistent error */
2375 		return (HCA_PIO_PERSISTENT);
2376 	}
2377 }
2378 
2379 
2380 /*
2381  * HCA FM Test Interface
2382  *
2383  * These functions should be used for any HCA drivers, but probably
2384  * customized for their own HW design and/or FM implementation.
2385  * Customized functins should have the driver name prefix such as
2386  * hermon_xxxx() and be defined separately but whose function should
2387  * call the common interface inside.
2388  */
2389 
2390 #ifdef FMA_TEST
2391 static int test_num;		/* serial number */
2392 static kmutex_t i_hca_test_lock; 	/* lock for serial numer */
2393 
2394 /*
2395  *  void
2396  *  i_hca_test_init(mod_hash_t **strHashp, mod_hash_t **idHashp)
2397  *
2398  *  Overview
2399  *      i_hca_test_init() creates two hash tables, one of which is for string,
2400  *      and the other of which is for ID, then saves pointers to arguments
2401  *      passed. This function uses the mod_hash utilities to manage the
2402  *      hash tables. About the mod_hash, see common/os/modhash.c.
2403  *
2404  *  Argument
2405  *      strHashp: pointer to String hash table pointer
2406  *      idHashp: pointer to ID hash table pointer
2407  *
2408  *  Return value
2409  *      Nothing
2410  *
2411  *  Caller's context
2412  *      i_hca_test_init() can be called in user or kernel context only.
2413  */
2414 static void
i_hca_test_init(mod_hash_t ** strHashp,mod_hash_t ** idHashp)2415 i_hca_test_init(mod_hash_t **strHashp, mod_hash_t **idHashp)
2416 {
2417 	*idHashp = mod_hash_create_idhash("HCA_FMA_id_hash",
2418 	    FMA_TEST_HASHSZ, mod_hash_null_valdtor);
2419 
2420 	*strHashp = mod_hash_create_strhash("HCA_FMA_test_hash",
2421 	    FMA_TEST_HASHSZ, i_hca_test_free_item);
2422 }
2423 
2424 
2425 /*
2426  *  void
2427  *  i_hca_test_fini(mod_hash_t **strHashp, mod_hash_t **idHashp)
2428  *
2429  *  Overview
2430  *      i_hca_test_fini() releases two hash tables used for HCA FM test.
2431  *
2432  *  Argument
2433  *      strHashp: pointer to String hash table pointer
2434  *      idHashp: pointer to ID hash table pointer
2435  *
2436  *  Return value
2437  *      Nothing
2438  *
2439  *  Caller's context
2440  *      i_hca_test_fini() can be called in user, kernel or interrupt context.
2441  *
2442  */
2443 static void
i_hca_test_fini(mod_hash_t ** strHashp,mod_hash_t ** idHashp)2444 i_hca_test_fini(mod_hash_t **strHashp, mod_hash_t **idHashp)
2445 {
2446 	mod_hash_destroy_hash(*strHashp);
2447 	*strHashp = NULL;
2448 
2449 	mod_hash_destroy_hash(*idHashp);
2450 	*idHashp = NULL;
2451 }
2452 
2453 
2454 /*
2455  *  struct i_hca_fm_test *
2456  *  i_hca_test_register(char *filename, int linenum, int type,
2457  *      void (*pio_injection)(struct i_hca_fm_test *, ddi_fm_error_t *),
2458  *      void *private, mod_hash_t *strHash, mod_hash_t *idHash, int preTestNum)
2459  *
2460  *  Overview
2461  *      i_hca_test_register() registers an HCA FM test item against HCA FM
2462  *      function callings specified with the file name and the line number
2463  *      (passed as the arguments).
2464  *
2465  *  Argument
2466  *  	filename: source file name where the function call is implemented
2467  *		  This value is usually a __FILE__  pre-defined macro.
2468  *  	linenum: line number where the function call is described in the
2469  *		 file specified above.
2470  *		 This value is usually a __LINE__ pre-defined macro.
2471  *	type: HW error type
2472  *			HCA_TEST_PIO	pio error
2473  *			HCA_TEST_IBA	ib specific error
2474  *	pio_injection: pio error injection callback function invoked when the
2475  *		       function specified above (with the file name and the
2476  *		       line number) is executed. If the function is not a PIO,
2477  *		       request, this parameter should be NULL.
2478  *	private: the argument passed to either of injection functions when
2479  *		 they're invoked.
2480  *      strHashp: pointer to String hash table
2481  *      idHashp: pointer to ID hash table
2482  *      preTestNum: the index of the pre-defined testset for this test item.
2483  *
2484  *  Return value
2485  *      pointer to HCA FM function test structure registered.
2486  *
2487  *  Caller's context
2488  *      i_hca_test_register() can be called in user, kernel or interrupt
2489  *      context.
2490  *
2491  */
2492 static struct i_hca_fm_test *
i_hca_test_register(char * filename,int linenum,int type,void (* pio_injection)(struct i_hca_fm_test *,ddi_fm_error_t *),void * private,mod_hash_t * strHash,mod_hash_t * idHash,int preTestNum)2493 i_hca_test_register(char *filename, int linenum, int type,
2494     void (*pio_injection)(struct i_hca_fm_test *, ddi_fm_error_t *),
2495     void *private, mod_hash_t *strHash, mod_hash_t *idHash, int preTestNum)
2496 {
2497 	struct i_hca_fm_test *t_item;
2498 	char key_buf[255], *hash_key;
2499 	int status;
2500 
2501 	(void) sprintf(key_buf, "%s:%d", filename, linenum);
2502 	hash_key = kmem_zalloc(strlen(key_buf) + 1, KM_NOSLEEP);
2503 
2504 	if (hash_key == NULL)
2505 		cmn_err(CE_PANIC, "No memory for HCA FMA Test.");
2506 
2507 	bcopy(key_buf, hash_key, strlen(key_buf));
2508 
2509 	status = mod_hash_find(strHash, (mod_hash_key_t)hash_key,
2510 	    (mod_hash_val_t *)&t_item);
2511 
2512 	switch (status) {
2513 	case MH_ERR_NOTFOUND:
2514 		t_item = (struct i_hca_fm_test *)
2515 		    kmem_alloc(sizeof (struct i_hca_fm_test), KM_NOSLEEP);
2516 		if (t_item == NULL)
2517 			cmn_err(CE_PANIC, "No memory for HCA FMA Test.");
2518 
2519 		/* Set the error number */
2520 		mutex_enter(&i_hca_test_lock);
2521 		t_item->num = test_num++;
2522 		mutex_exit(&i_hca_test_lock);
2523 
2524 		/* Set type and other static information */
2525 		t_item->type = type;
2526 		t_item->line_num = linenum;
2527 		t_item->file_name = filename;
2528 		t_item->hash_key = hash_key;
2529 		t_item->private = private;
2530 		t_item->pio_injection = pio_injection;
2531 
2532 		/* Set the pre-defined hermon test item */
2533 		i_hca_test_set_item(preTestNum, (struct i_hca_fm_test *)t_item);
2534 
2535 		status = mod_hash_insert(strHash, (mod_hash_key_t)
2536 		    hash_key, (mod_hash_val_t)t_item);
2537 		ASSERT(status == 0);
2538 
2539 		status = mod_hash_insert(idHash, (mod_hash_key_t)
2540 		    (uintptr_t)t_item->num, (mod_hash_val_t)t_item);
2541 		ASSERT(status == 0);
2542 		break;
2543 
2544 	case MH_ERR_NOMEM:
2545 		cmn_err(CE_PANIC, "No memory for HCA FMA Test.");
2546 		break;
2547 
2548 	case MH_ERR_DUPLICATE:
2549 		cmn_err(CE_PANIC, "HCA FMA Test Internal Error.");
2550 		break;
2551 	default:
2552 		/* OK, this is already registered. */
2553 		kmem_free(hash_key, strlen(key_buf) + 1);
2554 		break;
2555 	}
2556 	return (t_item);
2557 }
2558 
2559 
2560 /*
2561  *  void
2562  *  i_hca_test_set_item(int num, struct i_hca_fm_test *t_item)
2563  *
2564  *  Overview
2565  *      i_hca_test_set_item() is a private function used in
2566  *      i_hca_test_register() above. This function sets the testset specified
2567  *      (with the index number) to HCA FM function test structure.
2568  *
2569  *  Argument
2570  *      num: index to test set (testset structure array)
2571  *      t_item: pointer to HCA fM function test structure
2572  *
2573  *  Return value
2574  *      Nothing
2575  *
2576  *  Caller's context
2577  *      i_hca_test_set_item() can be called in user, kernel, interrupt
2578  *      context or hight interrupt context.
2579  *
2580  */
2581 static void
i_hca_test_set_item(int num,struct i_hca_fm_test * t_item)2582 i_hca_test_set_item(int num, struct i_hca_fm_test *t_item)
2583 {
2584 	if (num < 0 || num >= sizeof (testset) / sizeof (hermon_test_t) ||
2585 	    testset[num].type != t_item->type) {
2586 		t_item->trigger = testset[0].trigger;
2587 		t_item->errcnt = testset[0].errcnt;
2588 		return;
2589 	}
2590 
2591 	/* Set the testsuite */
2592 	t_item->trigger = testset[num].trigger;
2593 	t_item->errcnt = testset[num].errcnt;
2594 }
2595 
2596 
2597 /*
2598  *  void
2599  *  i_hca_test_free_item(mod_hash_val_t val)
2600  *
2601  *  Overview
2602  *      i_hca_test_free_item() is a private function used to free HCA FM
2603  *      function test structure when i_hca_test_fini() is called. This function
2604  *      is registered as a destructor when the hash table is created in
2605  *      i_hca_test_init().
2606  *
2607  *  Argument
2608  *      val: pointer to the value stored in hash table (pointer to HCA FM
2609  *           function test structure)
2610  *
2611  *  Return value
2612  *      Nothing
2613  *
2614  *  Caller's context
2615  *      i_hca_test_free_item() can be called in user, kernel or interrupt
2616  *      context.
2617  *
2618  */
2619 static void
i_hca_test_free_item(mod_hash_val_t val)2620 i_hca_test_free_item(mod_hash_val_t val)
2621 {
2622 	struct i_hca_fm_test *t_item = (struct i_hca_fm_test *)val;
2623 	kmem_free(t_item, sizeof (struct i_hca_fm_test));
2624 }
2625 #endif /* FMA_TEST */
2626