xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon.c (revision b1d7ec75953cd517f5b7c3d9cb427ff8ec5d7d07)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * hermon.c
29  *    Hermon (InfiniBand) HCA Driver attach/detach Routines
30  *
31  *    Implements all the routines necessary for the attach, setup,
32  *    initialization (and subsequent possible teardown and detach) of the
33  *    Hermon InfiniBand HCA driver.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/file.h>
38 #include <sys/open.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/stat.h>
44 #include <sys/pci.h>
45 #include <sys/pci_cap.h>
46 #include <sys/bitmap.h>
47 #include <sys/policy.h>
48 
49 #include <sys/ib/adapters/hermon/hermon.h>
50 
51 /* The following works around a problem in pre-2_7_000 firmware. */
52 #define	HERMON_FW_WORKAROUND
53 
54 int hermon_verbose = 0;
55 
56 /* Hermon HCA State Pointer */
57 void *hermon_statep;
58 
59 int debug_vpd = 0;
60 
61 /* Disable the internal error-check polling thread */
62 int hermon_no_inter_err_chk = 0;
63 
64 /*
65  * The Hermon "userland resource database" is common to instances of the
66  * Hermon HCA driver.  This structure "hermon_userland_rsrc_db" contains all
67  * the necessary information to maintain it.
68  */
69 hermon_umap_db_t hermon_userland_rsrc_db;
70 
71 static int hermon_attach(dev_info_t *, ddi_attach_cmd_t);
72 static int hermon_detach(dev_info_t *, ddi_detach_cmd_t);
73 static int hermon_open(dev_t *, int, int, cred_t *);
74 static int hermon_close(dev_t, int, int, cred_t *);
75 static int hermon_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
76 
77 static int hermon_drv_init(hermon_state_t *state, dev_info_t *dip,
78     int instance);
79 static void hermon_drv_fini(hermon_state_t *state);
80 static void hermon_drv_fini2(hermon_state_t *state);
81 static int hermon_isr_init(hermon_state_t *state);
82 static void hermon_isr_fini(hermon_state_t *state);
83 
84 static int hermon_hw_init(hermon_state_t *state);
85 
86 static void hermon_hw_fini(hermon_state_t *state,
87     hermon_drv_cleanup_level_t cleanup);
88 static int hermon_soft_state_init(hermon_state_t *state);
89 static void hermon_soft_state_fini(hermon_state_t *state);
90 static int hermon_icm_config_setup(hermon_state_t *state,
91     hermon_hw_initqueryhca_t *inithca);
92 static void hermon_icm_tables_init(hermon_state_t *state);
93 static void hermon_icm_tables_fini(hermon_state_t *state);
94 static int hermon_icm_dma_init(hermon_state_t *state);
95 static void hermon_icm_dma_fini(hermon_state_t *state);
96 static void hermon_inithca_set(hermon_state_t *state,
97     hermon_hw_initqueryhca_t *inithca);
98 static int hermon_hca_port_init(hermon_state_t *state);
99 static int hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init);
100 static int hermon_internal_uarpg_init(hermon_state_t *state);
101 static void hermon_internal_uarpg_fini(hermon_state_t *state);
102 static int hermon_special_qp_contexts_reserve(hermon_state_t *state);
103 static void hermon_special_qp_contexts_unreserve(hermon_state_t *state);
104 static int hermon_sw_reset(hermon_state_t *state);
105 static int hermon_mcg_init(hermon_state_t *state);
106 static void hermon_mcg_fini(hermon_state_t *state);
107 static int hermon_fw_version_check(hermon_state_t *state);
108 static void hermon_device_info_report(hermon_state_t *state);
109 static int hermon_pci_capability_list(hermon_state_t *state,
110     ddi_acc_handle_t hdl);
111 static void hermon_pci_capability_vpd(hermon_state_t *state,
112     ddi_acc_handle_t hdl, uint_t offset);
113 static int hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
114     uint32_t addr, uint32_t *data);
115 static int hermon_intr_or_msi_init(hermon_state_t *state);
116 static int hermon_add_intrs(hermon_state_t *state, int intr_type);
117 static int hermon_intr_or_msi_fini(hermon_state_t *state);
118 void hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
119     uint_t offset);
120 
121 static uint64_t hermon_size_icm(hermon_state_t *state);
122 
123 /* X86 fastreboot support */
124 static ushort_t get_msix_ctrl(dev_info_t *);
125 static size_t get_msix_tbl_size(dev_info_t *);
126 static size_t get_msix_pba_size(dev_info_t *);
127 static void hermon_set_msix_info(hermon_state_t *);
128 static int hermon_intr_disable(hermon_state_t *);
129 static int hermon_quiesce(dev_info_t *);
130 
131 
132 /* Character/Block Operations */
133 static struct cb_ops hermon_cb_ops = {
134 	hermon_open,		/* open */
135 	hermon_close,		/* close */
136 	nodev,			/* strategy (block) */
137 	nodev,			/* print (block) */
138 	nodev,			/* dump (block) */
139 	nodev,			/* read */
140 	nodev,			/* write */
141 	hermon_ioctl,		/* ioctl */
142 	hermon_devmap,		/* devmap */
143 	NULL,			/* mmap */
144 	nodev,			/* segmap */
145 	nochpoll,		/* chpoll */
146 	ddi_prop_op,		/* prop_op */
147 	NULL,			/* streams */
148 	D_NEW | D_MP |
149 	D_64BIT | /* D_HOTPLUG | */
150 	D_DEVMAP,		/* flags */
151 	CB_REV			/* rev */
152 };
153 
154 /* Driver Operations */
155 static struct dev_ops hermon_ops = {
156 	DEVO_REV,		/* struct rev */
157 	0,			/* refcnt */
158 	hermon_getinfo,		/* getinfo */
159 	nulldev,		/* identify */
160 	nulldev,		/* probe */
161 	hermon_attach,		/* attach */
162 	hermon_detach,		/* detach */
163 	nodev,			/* reset */
164 	&hermon_cb_ops,		/* cb_ops */
165 	NULL,			/* bus_ops */
166 	nodev,			/* power */
167 	hermon_quiesce,		/* devo_quiesce */
168 };
169 
170 /* Module Driver Info */
171 static struct modldrv hermon_modldrv = {
172 	&mod_driverops,
173 	"ConnectX IB Driver",
174 	&hermon_ops
175 };
176 
177 /* Module Linkage */
178 static struct modlinkage hermon_modlinkage = {
179 	MODREV_1,
180 	&hermon_modldrv,
181 	NULL
182 };
183 
184 /*
185  * This extern refers to the ibc_operations_t function vector that is defined
186  * in the hermon_ci.c file.
187  */
188 extern ibc_operations_t	hermon_ibc_ops;
189 
190 /*
191  * _init()
192  */
193 int
194 _init()
195 {
196 	int	status;
197 
198 	status = ddi_soft_state_init(&hermon_statep, sizeof (hermon_state_t),
199 	    (size_t)HERMON_INITIAL_STATES);
200 	if (status != 0) {
201 		return (status);
202 	}
203 
204 	status = ibc_init(&hermon_modlinkage);
205 	if (status != 0) {
206 		ddi_soft_state_fini(&hermon_statep);
207 		return (status);
208 	}
209 
210 	status = mod_install(&hermon_modlinkage);
211 	if (status != 0) {
212 		ibc_fini(&hermon_modlinkage);
213 		ddi_soft_state_fini(&hermon_statep);
214 		return (status);
215 	}
216 
217 	/* Initialize the Hermon "userland resources database" */
218 	hermon_umap_db_init();
219 
220 	return (status);
221 }
222 
223 
224 /*
225  * _info()
226  */
227 int
228 _info(struct modinfo *modinfop)
229 {
230 	int	status;
231 
232 	status = mod_info(&hermon_modlinkage, modinfop);
233 	return (status);
234 }
235 
236 
237 /*
238  * _fini()
239  */
240 int
241 _fini()
242 {
243 	int	status;
244 
245 	status = mod_remove(&hermon_modlinkage);
246 	if (status != 0) {
247 		return (status);
248 	}
249 
250 	/* Destroy the Hermon "userland resources database" */
251 	hermon_umap_db_fini();
252 
253 	ibc_fini(&hermon_modlinkage);
254 	ddi_soft_state_fini(&hermon_statep);
255 
256 	return (status);
257 }
258 
259 
260 /*
261  * hermon_getinfo()
262  */
263 /* ARGSUSED */
264 static int
265 hermon_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
266 {
267 	dev_t		dev;
268 	hermon_state_t 	*state;
269 	minor_t		instance;
270 
271 	switch (cmd) {
272 	case DDI_INFO_DEVT2DEVINFO:
273 		dev = (dev_t)arg;
274 		instance = HERMON_DEV_INSTANCE(dev);
275 		state = ddi_get_soft_state(hermon_statep, instance);
276 		if (state == NULL) {
277 			return (DDI_FAILURE);
278 		}
279 		*result = (void *)state->hs_dip;
280 		return (DDI_SUCCESS);
281 
282 	case DDI_INFO_DEVT2INSTANCE:
283 		dev = (dev_t)arg;
284 		instance = HERMON_DEV_INSTANCE(dev);
285 		*result = (void *)(uintptr_t)instance;
286 		return (DDI_SUCCESS);
287 
288 	default:
289 		break;
290 	}
291 
292 	return (DDI_FAILURE);
293 }
294 
295 
296 /*
297  * hermon_open()
298  */
299 /* ARGSUSED */
300 static int
301 hermon_open(dev_t *devp, int flag, int otyp, cred_t *credp)
302 {
303 	hermon_state_t		*state;
304 	hermon_rsrc_t 		*rsrcp;
305 	hermon_umap_db_entry_t	*umapdb, *umapdb2;
306 	minor_t			instance;
307 	uint64_t		key, value;
308 	uint_t			hr_indx;
309 	dev_t			dev;
310 	int			status;
311 
312 	instance = HERMON_DEV_INSTANCE(*devp);
313 	state = ddi_get_soft_state(hermon_statep, instance);
314 	if (state == NULL) {
315 		return (ENXIO);
316 	}
317 
318 	/*
319 	 * Only allow driver to be opened for character access, and verify
320 	 * whether exclusive access is allowed.
321 	 */
322 	if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
323 	    secpolicy_excl_open(credp) != 0)) {
324 		return (EINVAL);
325 	}
326 
327 	/*
328 	 * Search for the current process PID in the "userland resources
329 	 * database".  If it is not found, then attempt to allocate a UAR
330 	 * page and add the ("key", "value") pair to the database.
331 	 * Note:  As a last step we always return a devp appropriate for
332 	 * the open.  Either we return a new minor number (based on the
333 	 * instance and the UAR page index) or we return the current minor
334 	 * number for the given client process.
335 	 *
336 	 * We also add an entry to the database to allow for lookup from
337 	 * "dev_t" to the current process PID.  This is necessary because,
338 	 * under certain circumstance, the process PID that calls the Hermon
339 	 * close() entry point may not be the same as the one who called
340 	 * open().  Specifically, this can happen if a child process calls
341 	 * the Hermon's open() entry point, gets a UAR page, maps it out (using
342 	 * mmap()), and then exits without calling munmap().  Because mmap()
343 	 * adds a reference to the file descriptor, at the exit of the child
344 	 * process the file descriptor is "inherited" by the parent (and will
345 	 * be close()'d by the parent's PID only when it exits).
346 	 *
347 	 * Note: We use the hermon_umap_db_find_nolock() and
348 	 * hermon_umap_db_add_nolock() database access routines below (with
349 	 * an explicit mutex_enter of the database lock - "hdl_umapdb_lock")
350 	 * to ensure that the multiple accesses (in this case searching for,
351 	 * and then adding _two_ database entries) can be done atomically.
352 	 */
353 	key = ddi_get_pid();
354 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
355 	status = hermon_umap_db_find_nolock(instance, key,
356 	    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
357 	if (status != DDI_SUCCESS) {
358 		/*
359 		 * If we are in 'maintenance mode', we cannot alloc a UAR page.
360 		 * But we still need some rsrcp value, and a mostly unique
361 		 * hr_indx value.  So we set rsrcp to NULL for maintenance
362 		 * mode, and use a rolling count for hr_indx.  The field
363 		 * 'hs_open_hr_indx' is used only in this maintenance mode
364 		 * condition.
365 		 *
366 		 * Otherwise, if we are in operational mode then we allocate
367 		 * the UAR page as normal, and use the rsrcp value and tr_indx
368 		 * value from that allocation.
369 		 */
370 		if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
371 			rsrcp = NULL;
372 			hr_indx = state->hs_open_ar_indx++;
373 		} else {
374 			/* Allocate a new UAR page for this process */
375 			status = hermon_rsrc_alloc(state, HERMON_UARPG, 1,
376 			    HERMON_NOSLEEP, &rsrcp);
377 			if (status != DDI_SUCCESS) {
378 				mutex_exit(
379 				    &hermon_userland_rsrc_db.hdl_umapdb_lock);
380 				return (EAGAIN);
381 			}
382 
383 			hr_indx = rsrcp->hr_indx;
384 		}
385 
386 		/*
387 		 * Allocate an entry to track the UAR page resource in the
388 		 * "userland resources database".
389 		 */
390 		umapdb = hermon_umap_db_alloc(instance, key,
391 		    MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
392 		if (umapdb == NULL) {
393 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
394 			/* If in "maintenance mode", don't free the rsrc */
395 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
396 				hermon_rsrc_free(state, &rsrcp);
397 			}
398 			return (EAGAIN);
399 		}
400 
401 		/*
402 		 * Create a new device number.  Minor number is a function of
403 		 * the UAR page index (15 bits) and the device instance number
404 		 * (3 bits).
405 		 */
406 		dev = makedevice(getmajor(*devp), (hr_indx <<
407 		    HERMON_MINORNUM_SHIFT) | instance);
408 
409 		/*
410 		 * Allocate another entry in the "userland resources database"
411 		 * to track the association of the device number (above) to
412 		 * the current process ID (in "key").
413 		 */
414 		umapdb2 = hermon_umap_db_alloc(instance, dev,
415 		    MLNX_UMAP_PID_RSRC, (uint64_t)key);
416 		if (umapdb2 == NULL) {
417 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
418 			hermon_umap_db_free(umapdb);
419 			/* If in "maintenance mode", don't free the rsrc */
420 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
421 				hermon_rsrc_free(state, &rsrcp);
422 			}
423 			return (EAGAIN);
424 		}
425 
426 		/* Add the entries to the database */
427 		hermon_umap_db_add_nolock(umapdb);
428 		hermon_umap_db_add_nolock(umapdb2);
429 
430 	} else {
431 		/*
432 		 * Return the same device number as on the original open()
433 		 * call.  This was calculated as a function of the UAR page
434 		 * index (top 16 bits) and the device instance number
435 		 */
436 		rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
437 		dev = makedevice(getmajor(*devp), (rsrcp->hr_indx <<
438 		    HERMON_MINORNUM_SHIFT) | instance);
439 	}
440 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
441 
442 	*devp = dev;
443 
444 	return (0);
445 }
446 
447 
448 /*
449  * hermon_close()
450  */
451 /* ARGSUSED */
452 static int
453 hermon_close(dev_t dev, int flag, int otyp, cred_t *credp)
454 {
455 	hermon_state_t		*state;
456 	hermon_rsrc_t		*rsrcp;
457 	hermon_umap_db_entry_t	*umapdb;
458 	hermon_umap_db_priv_t	*priv;
459 	minor_t			instance;
460 	uint64_t		key, value;
461 	int			status, reset_status = 0;
462 
463 	instance = HERMON_DEV_INSTANCE(dev);
464 	state = ddi_get_soft_state(hermon_statep, instance);
465 	if (state == NULL) {
466 		return (ENXIO);
467 	}
468 
469 	/*
470 	 * Search for "dev_t" in the "userland resources database".  As
471 	 * explained above in hermon_open(), we can't depend on using the
472 	 * current process ID here to do the lookup because the process
473 	 * that ultimately closes may not be the same one who opened
474 	 * (because of inheritance).
475 	 * So we lookup the "dev_t" (which points to the PID of the process
476 	 * that opened), and we remove the entry from the database (and free
477 	 * it up).  Then we do another query based on the PID value.  And when
478 	 * we find that database entry, we free it up too and then free the
479 	 * Hermon UAR page resource.
480 	 *
481 	 * Note: We use the hermon_umap_db_find_nolock() database access
482 	 * routine below (with an explicit mutex_enter of the database lock)
483 	 * to ensure that the multiple accesses (which attempt to remove the
484 	 * two database entries) can be done atomically.
485 	 *
486 	 * This works the same in both maintenance mode and HCA mode, except
487 	 * for the call to hermon_rsrc_free().  In the case of maintenance mode,
488 	 * this call is not needed, as it was not allocated in hermon_open()
489 	 * above.
490 	 */
491 	key = dev;
492 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
493 	status = hermon_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
494 	    &value, HERMON_UMAP_DB_REMOVE, &umapdb);
495 	if (status == DDI_SUCCESS) {
496 		/*
497 		 * If the "hdb_priv" field is non-NULL, it indicates that
498 		 * some "on close" handling is still necessary.  Call
499 		 * hermon_umap_db_handle_onclose_cb() to do the handling (i.e.
500 		 * to invoke all the registered callbacks).  Then free up
501 		 * the resources associated with "hdb_priv" and continue
502 		 * closing.
503 		 */
504 		priv = (hermon_umap_db_priv_t *)umapdb->hdbe_common.hdb_priv;
505 		if (priv != NULL) {
506 			reset_status = hermon_umap_db_handle_onclose_cb(priv);
507 			kmem_free(priv, sizeof (hermon_umap_db_priv_t));
508 			umapdb->hdbe_common.hdb_priv = (void *)NULL;
509 		}
510 
511 		hermon_umap_db_free(umapdb);
512 
513 		/*
514 		 * Now do another lookup using PID as the key (copy it from
515 		 * "value").  When this lookup is complete, the "value" field
516 		 * will contain the hermon_rsrc_t pointer for the UAR page
517 		 * resource.
518 		 */
519 		key = value;
520 		status = hermon_umap_db_find_nolock(instance, key,
521 		    MLNX_UMAP_UARPG_RSRC, &value, HERMON_UMAP_DB_REMOVE,
522 		    &umapdb);
523 		if (status == DDI_SUCCESS) {
524 			hermon_umap_db_free(umapdb);
525 			/* If in "maintenance mode", don't free the rsrc */
526 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
527 				rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
528 				hermon_rsrc_free(state, &rsrcp);
529 			}
530 		}
531 	}
532 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
533 	return (reset_status);
534 }
535 
536 
537 /*
538  * hermon_attach()
539  *    Context: Only called from attach() path context
540  */
541 static int
542 hermon_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
543 {
544 	hermon_state_t	*state;
545 	ibc_clnt_hdl_t	tmp_ibtfpriv;
546 	ibc_status_t	ibc_status;
547 	int		instance;
548 	int		status;
549 
550 #ifdef __lock_lint
551 	(void) hermon_quiesce(dip);
552 #endif
553 
554 	switch (cmd) {
555 	case DDI_ATTACH:
556 		instance = ddi_get_instance(dip);
557 		status = ddi_soft_state_zalloc(hermon_statep, instance);
558 		if (status != DDI_SUCCESS) {
559 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
560 			    "attach_ssz_fail", instance);
561 			goto fail_attach_nomsg;
562 
563 		}
564 		state = ddi_get_soft_state(hermon_statep, instance);
565 		if (state == NULL) {
566 			ddi_soft_state_free(hermon_statep, instance);
567 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
568 			    "attach_gss_fail", instance);
569 			goto fail_attach_nomsg;
570 		}
571 
572 		/* clear the attach error buffer */
573 		HERMON_ATTACH_MSG_INIT(state->hs_attach_buf);
574 
575 		/* Save away devinfo and instance before hermon_fm_init() */
576 		state->hs_dip = dip;
577 		state->hs_instance = instance;
578 
579 		hermon_fm_init(state);
580 
581 		/*
582 		 * Initialize Hermon driver and hardware.
583 		 *
584 		 * Note: If this initialization fails we may still wish to
585 		 * create a device node and remain operational so that Hermon
586 		 * firmware can be updated/flashed (i.e. "maintenance mode").
587 		 * If this is the case, then "hs_operational_mode" will be
588 		 * equal to HERMON_MAINTENANCE_MODE.  We will not attempt to
589 		 * attach to the IBTF or register with the IBMF (i.e. no
590 		 * InfiniBand interfaces will be enabled).
591 		 */
592 		status = hermon_drv_init(state, dip, instance);
593 		if ((status != DDI_SUCCESS) &&
594 		    (HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
595 			goto fail_attach;
596 		}
597 
598 		/*
599 		 * Change the Hermon FM mode
600 		 */
601 		if ((hermon_get_state(state) & HCA_PIO_FM) &&
602 		    HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
603 			/*
604 			 * Now we wait for 50ms to give an opportunity
605 			 * to Solaris FMA so that HW errors can be notified.
606 			 * Then check if there are HW errors or not. If
607 			 * a HW error is detected, the Hermon attachment
608 			 * must be failed.
609 			 */
610 			delay(drv_usectohz(50000));
611 			if (hermon_init_failure(state)) {
612 				hermon_drv_fini(state);
613 				HERMON_WARNING(state, "unable to "
614 				    "attach Hermon due to a HW error");
615 				HERMON_ATTACH_MSG(state->hs_attach_buf,
616 				    "hermon_attach_failure");
617 				goto fail_attach;
618 			}
619 
620 			/*
621 			 * There seems no HW errors during the attachment,
622 			 * so let's change the Hermon FM state to the
623 			 * ereport only mode.
624 			 */
625 			if (hermon_fm_ereport_init(state) != DDI_SUCCESS) {
626 				/* unwind the resources */
627 				hermon_drv_fini(state);
628 				HERMON_ATTACH_MSG(state->hs_attach_buf,
629 				    "hermon_attach_failure");
630 				goto fail_attach;
631 			}
632 		}
633 
634 		/* Create the minor node for device */
635 		status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
636 		    DDI_PSEUDO, 0);
637 		if (status != DDI_SUCCESS) {
638 			hermon_drv_fini(state);
639 			HERMON_ATTACH_MSG(state->hs_attach_buf,
640 			    "attach_create_mn_fail");
641 			goto fail_attach;
642 		}
643 
644 		/*
645 		 * If we are in "maintenance mode", then we don't want to
646 		 * register with the IBTF.  All InfiniBand interfaces are
647 		 * uninitialized, and the device is only capable of handling
648 		 * requests to update/flash firmware (or test/debug requests).
649 		 */
650 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
651 			cmn_err(CE_NOTE, "!Hermon is operational\n");
652 
653 			/* Attach to InfiniBand Transport Framework (IBTF) */
654 			ibc_status = ibc_attach(&tmp_ibtfpriv,
655 			    &state->hs_ibtfinfo);
656 			if (ibc_status != IBC_SUCCESS) {
657 				cmn_err(CE_CONT, "hermon_attach: ibc_attach "
658 				    "failed\n");
659 				ddi_remove_minor_node(dip, "devctl");
660 				hermon_drv_fini(state);
661 				HERMON_ATTACH_MSG(state->hs_attach_buf,
662 				    "attach_ibcattach_fail");
663 				goto fail_attach;
664 			}
665 
666 			/*
667 			 * Now that we've successfully attached to the IBTF,
668 			 * we enable all appropriate asynch and CQ events to
669 			 * be forwarded to the IBTF.
670 			 */
671 			HERMON_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);
672 
673 			ibc_post_attach(state->hs_ibtfpriv);
674 
675 			/* Register agents with IB Mgmt Framework (IBMF) */
676 			status = hermon_agent_handlers_init(state);
677 			if (status != DDI_SUCCESS) {
678 				(void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
679 				HERMON_QUIESCE_IBTF_CALLB(state);
680 				if (state->hs_in_evcallb != 0) {
681 					HERMON_WARNING(state, "unable to "
682 					    "quiesce Hermon IBTF callbacks");
683 				}
684 				ibc_detach(tmp_ibtfpriv);
685 				ddi_remove_minor_node(dip, "devctl");
686 				hermon_drv_fini(state);
687 				HERMON_ATTACH_MSG(state->hs_attach_buf,
688 				    "attach_agentinit_fail");
689 				goto fail_attach;
690 			}
691 		}
692 
693 		/* Report attach in maintenance mode, if appropriate */
694 		if (!(HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
695 			cmn_err(CE_NOTE, "hermon%d: driver attached "
696 			    "(for maintenance mode only)", state->hs_instance);
697 			hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_DEGRADED);
698 		}
699 
700 		/* Report that driver was loaded */
701 		ddi_report_dev(dip);
702 
703 		/* Send device information to log file */
704 		hermon_device_info_report(state);
705 
706 		/* DEBUG PRINT */
707 		cmn_err(CE_CONT, "!Hermon attach complete\n");
708 		return (DDI_SUCCESS);
709 
710 	case DDI_RESUME:
711 		/* Add code here for DDI_RESUME XXX */
712 		return (DDI_FAILURE);
713 
714 	default:
715 		cmn_err(CE_WARN, "hermon_attach: unknown cmd (0x%x)\n", cmd);
716 		break;
717 	}
718 
719 fail_attach:
720 	cmn_err(CE_NOTE, "hermon%d: driver failed to attach: %s", instance,
721 	    state->hs_attach_buf);
722 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
723 		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
724 	}
725 	hermon_drv_fini2(state);
726 	hermon_fm_fini(state);
727 	ddi_soft_state_free(hermon_statep, instance);
728 
729 fail_attach_nomsg:
730 	return (DDI_FAILURE);
731 }
732 
733 
734 /*
735  * hermon_detach()
736  *    Context: Only called from detach() path context
737  */
738 static int
739 hermon_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
740 {
741 	hermon_state_t	*state;
742 	ibc_clnt_hdl_t	tmp_ibtfpriv;
743 	ibc_status_t	ibc_status;
744 	int		instance, status;
745 
746 	instance = ddi_get_instance(dip);
747 	state = ddi_get_soft_state(hermon_statep, instance);
748 	if (state == NULL) {
749 		return (DDI_FAILURE);
750 	}
751 
752 	switch (cmd) {
753 	case DDI_DETACH:
754 		/*
755 		 * If we are in "maintenance mode", then we do not want to
756 		 * do teardown for any of the InfiniBand interfaces.
757 		 * Specifically, this means not detaching from IBTF (we never
758 		 * attached to begin with) and not deregistering from IBMF.
759 		 */
760 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
761 			/* Unregister agents from IB Mgmt Framework (IBMF) */
762 			status = hermon_agent_handlers_fini(state);
763 			if (status != DDI_SUCCESS) {
764 				return (DDI_FAILURE);
765 			}
766 
767 			/*
768 			 * Attempt the "pre-detach" from InfiniBand Transport
769 			 * Framework (IBTF).  At this point the IBTF is still
770 			 * capable of handling incoming asynch and completion
771 			 * events.  This "pre-detach" is primarily a mechanism
772 			 * to notify the appropriate IBTF clients that the
773 			 * HCA is being removed/offlined.
774 			 */
775 			ibc_status = ibc_pre_detach(state->hs_ibtfpriv, cmd);
776 			if (ibc_status != IBC_SUCCESS) {
777 				status = hermon_agent_handlers_init(state);
778 				if (status != DDI_SUCCESS) {
779 					HERMON_WARNING(state, "failed to "
780 					    "restart Hermon agents");
781 				}
782 				return (DDI_FAILURE);
783 			}
784 
785 			/*
786 			 * Before we can fully detach from the IBTF we need to
787 			 * ensure that we have handled all outstanding event
788 			 * callbacks.  This is accomplished by quiescing the
789 			 * event callback mechanism.  Note: if we are unable
790 			 * to successfully quiesce the callbacks, then this is
791 			 * an indication that something has probably gone
792 			 * seriously wrong.  We print out a warning, but
793 			 * continue.
794 			 */
795 			tmp_ibtfpriv = state->hs_ibtfpriv;
796 			HERMON_QUIESCE_IBTF_CALLB(state);
797 			if (state->hs_in_evcallb != 0) {
798 				HERMON_WARNING(state, "unable to quiesce "
799 				    "Hermon IBTF callbacks");
800 			}
801 
802 			/* Complete the detach from the IBTF */
803 			ibc_detach(tmp_ibtfpriv);
804 		}
805 
806 		/* Remove the minor node for device */
807 		ddi_remove_minor_node(dip, "devctl");
808 
809 		/*
810 		 * Only call hermon_drv_fini() if we are in Hermon HCA mode.
811 		 * (Because if we are in "maintenance mode", then we never
812 		 * successfully finished init.)  Only report successful
813 		 * detach for normal HCA mode.
814 		 */
815 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
816 			/* Cleanup driver resources and shutdown hardware */
817 			hermon_drv_fini(state);
818 			cmn_err(CE_CONT, "!Hermon driver successfully "
819 			    "detached\n");
820 		}
821 
822 		hermon_drv_fini2(state);
823 		hermon_fm_fini(state);
824 		ddi_soft_state_free(hermon_statep, instance);
825 
826 		return (DDI_SUCCESS);
827 
828 	case DDI_SUSPEND:
829 		/* Add code here for DDI_SUSPEND XXX */
830 		return (DDI_FAILURE);
831 
832 	default:
833 		cmn_err(CE_WARN, "hermon_detach: unknown cmd (0x%x)\n", cmd);
834 		break;
835 	}
836 
837 	return (DDI_FAILURE);
838 }
839 
840 /*
841  * hermon_dma_attr_init()
842  *    Context: Can be called from interrupt or base context.
843  */
844 
845 /* ARGSUSED */
846 void
847 hermon_dma_attr_init(hermon_state_t *state, ddi_dma_attr_t *dma_attr)
848 {
849 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
850 
851 	dma_attr->dma_attr_version	= DMA_ATTR_V0;
852 	dma_attr->dma_attr_addr_lo	= 0;
853 	dma_attr->dma_attr_addr_hi	= 0xFFFFFFFFFFFFFFFFull;
854 	dma_attr->dma_attr_count_max	= 0xFFFFFFFFFFFFFFFFull;
855 	dma_attr->dma_attr_align	= HERMON_PAGESIZE;  /* default 4K */
856 	dma_attr->dma_attr_burstsizes	= 0x3FF;
857 	dma_attr->dma_attr_minxfer	= 1;
858 	dma_attr->dma_attr_maxxfer	= 0xFFFFFFFFFFFFFFFFull;
859 	dma_attr->dma_attr_seg		= 0xFFFFFFFFFFFFFFFFull;
860 	dma_attr->dma_attr_sgllen	= 0x7FFFFFFF;
861 	dma_attr->dma_attr_granular	= 1;
862 	dma_attr->dma_attr_flags	= 0;
863 }
864 
865 /*
866  * hermon_dma_alloc()
867  *    Context: Can be called from base context.
868  */
869 int
870 hermon_dma_alloc(hermon_state_t *state, hermon_dma_info_t *dma_info,
871     uint16_t opcode)
872 {
873 	ddi_dma_handle_t	dma_hdl;
874 	ddi_dma_attr_t		dma_attr;
875 	ddi_acc_handle_t	acc_hdl;
876 	ddi_dma_cookie_t	cookie;
877 	uint64_t		kaddr;
878 	uint64_t		real_len;
879 	uint_t			ccount;
880 	int			status;
881 
882 	hermon_dma_attr_init(state, &dma_attr);
883 #ifdef	__sparc
884 	if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
885 		dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
886 #endif
887 
888 	/* Allocate a DMA handle */
889 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, DDI_DMA_SLEEP,
890 	    NULL, &dma_hdl);
891 	if (status != DDI_SUCCESS) {
892 		IBTF_DPRINTF_L2("DMA", "alloc handle failed: %d", status);
893 		cmn_err(CE_CONT, "DMA alloc handle failed(status %d)", status);
894 		return (DDI_FAILURE);
895 	}
896 
897 	/* Allocate DMA memory */
898 	status = ddi_dma_mem_alloc(dma_hdl, dma_info->length,
899 	    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
900 	    (caddr_t *)&kaddr, (size_t *)&real_len, &acc_hdl);
901 	if (status != DDI_SUCCESS) {
902 		ddi_dma_free_handle(&dma_hdl);
903 		IBTF_DPRINTF_L2("DMA", "memory alloc failed: %d", status);
904 		cmn_err(CE_CONT, "DMA memory alloc failed(status %d)", status);
905 		return (DDI_FAILURE);
906 	}
907 	bzero((caddr_t)(uintptr_t)kaddr, real_len);
908 
909 	/* Bind the memory to the handle */
910 	status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
911 	    (caddr_t)(uintptr_t)kaddr, (size_t)real_len, DDI_DMA_RDWR |
912 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &ccount);
913 	if (status != DDI_SUCCESS) {
914 		ddi_dma_mem_free(&acc_hdl);
915 		ddi_dma_free_handle(&dma_hdl);
916 		IBTF_DPRINTF_L2("DMA", "bind handle failed: %d", status);
917 		cmn_err(CE_CONT, "DMA bind handle failed(status %d)", status);
918 		return (DDI_FAILURE);
919 	}
920 
921 	/* Package the hermon_dma_info contents and return */
922 	dma_info->vaddr   = kaddr;
923 	dma_info->dma_hdl = dma_hdl;
924 	dma_info->acc_hdl = acc_hdl;
925 
926 	/* Pass the mapping information to the firmware */
927 	status = hermon_map_cmd_post(state, dma_info, opcode, cookie, ccount);
928 	if (status != DDI_SUCCESS) {
929 		char *s;
930 		hermon_dma_free(dma_info);
931 		switch (opcode) {
932 		case MAP_ICM:
933 			s = "MAP_ICM";
934 			break;
935 		case MAP_FA:
936 			s = "MAP_FA";
937 			break;
938 		case MAP_ICM_AUX:
939 			s = "MAP_ICM_AUX";
940 			break;
941 		default:
942 			s = "UNKNOWN";
943 		}
944 		cmn_err(CE_NOTE, "Map cmd '%s' failed, status %08x\n",
945 		    s, status);
946 		return (DDI_FAILURE);
947 	}
948 
949 	return (DDI_SUCCESS);
950 }
951 
952 /*
953  * hermon_dma_free()
954  *    Context: Can be called from base context.
955  */
956 void
957 hermon_dma_free(hermon_dma_info_t *info)
958 {
959 	/* Unbind the handles and free the memory */
960 	(void) ddi_dma_unbind_handle(info->dma_hdl);
961 	ddi_dma_mem_free(&info->acc_hdl);
962 	ddi_dma_free_handle(&info->dma_hdl);
963 }
964 
965 /* These macros are valid for use only in hermon_icm_alloc/hermon_icm_free. */
966 #define	HERMON_ICM_ALLOC(rsrc) \
967 	hermon_icm_alloc(state, rsrc, index1, index2)
968 #define	HERMON_ICM_FREE(rsrc) \
969 	hermon_icm_free(state, rsrc, index1, index2)
970 
971 /*
972  * hermon_icm_alloc()
973  *    Context: Can be called from base context.
974  *
975  * Only one thread can be here for a given hermon_rsrc_type_t "type".
976  */
977 int
978 hermon_icm_alloc(hermon_state_t *state, hermon_rsrc_type_t type,
979     uint32_t index1, uint32_t index2)
980 {
981 	hermon_icm_table_t	*icm;
982 	hermon_dma_info_t	*dma_info;
983 	uint8_t			*bitmap;
984 	int			status;
985 
986 	if (hermon_verbose) {
987 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: rsrc_type (0x%x) "
988 		    "index1/2 (0x%x/0x%x)", type, index1, index2);
989 	}
990 
991 	icm = &state->hs_icm[type];
992 
993 	switch (type) {
994 	case HERMON_QPC:
995 		status = HERMON_ICM_ALLOC(HERMON_CMPT_QPC);
996 		if (status != DDI_SUCCESS) {
997 			return (status);
998 		}
999 		status = HERMON_ICM_ALLOC(HERMON_RDB);
1000 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1001 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1002 			return (status);
1003 		}
1004 		status = HERMON_ICM_ALLOC(HERMON_ALTC);
1005 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1006 			HERMON_ICM_FREE(HERMON_RDB);
1007 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1008 			return (status);
1009 		}
1010 		status = HERMON_ICM_ALLOC(HERMON_AUXC);
1011 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1012 			HERMON_ICM_FREE(HERMON_ALTC);
1013 			HERMON_ICM_FREE(HERMON_RDB);
1014 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1015 			return (status);
1016 		}
1017 		break;
1018 	case HERMON_SRQC:
1019 		status = HERMON_ICM_ALLOC(HERMON_CMPT_SRQC);
1020 		if (status != DDI_SUCCESS) {
1021 			return (status);
1022 		}
1023 		break;
1024 	case HERMON_CQC:
1025 		status = HERMON_ICM_ALLOC(HERMON_CMPT_CQC);
1026 		if (status != DDI_SUCCESS) {
1027 			return (status);
1028 		}
1029 		break;
1030 	case HERMON_EQC:
1031 		status = HERMON_ICM_ALLOC(HERMON_CMPT_EQC);
1032 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1033 			return (status);
1034 		}
1035 		break;
1036 	}
1037 
1038 	/* ensure existence of bitmap and dmainfo, sets "dma_info" */
1039 	hermon_bitmap(bitmap, dma_info, icm, index1);
1040 
1041 	/* Set up the DMA handle for allocation and mapping */
1042 	dma_info = icm->icm_dma[index1] + index2;
1043 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_info))
1044 	dma_info->length  = icm->span << icm->log_object_size;
1045 	dma_info->icmaddr = icm->icm_baseaddr +
1046 	    (((index1 << icm->split_shift) +
1047 	    (index2 << icm->span_shift)) << icm->log_object_size);
1048 
1049 	if (hermon_verbose) {
1050 		IBTF_DPRINTF_L2("hermon", "alloc DMA: "
1051 		    "rsrc (0x%x) index (%x, %x) "
1052 		    "icm_addr/len (%llx/%x) bitmap %p", type, index1, index2,
1053 		    (longlong_t)dma_info->icmaddr, dma_info->length, bitmap);
1054 	}
1055 
1056 	/* Allocate and map memory for this span */
1057 	status = hermon_dma_alloc(state, dma_info, MAP_ICM);
1058 	if (status != DDI_SUCCESS) {
1059 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: DMA "
1060 		    "allocation failed, status 0x%x", status);
1061 		switch (type) {
1062 		case HERMON_QPC:
1063 			HERMON_ICM_FREE(HERMON_AUXC);
1064 			HERMON_ICM_FREE(HERMON_ALTC);
1065 			HERMON_ICM_FREE(HERMON_RDB);
1066 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1067 			break;
1068 		case HERMON_SRQC:
1069 			HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1070 			break;
1071 		case HERMON_CQC:
1072 			HERMON_ICM_FREE(HERMON_CMPT_CQC);
1073 			break;
1074 		case HERMON_EQC:
1075 			HERMON_ICM_FREE(HERMON_CMPT_EQC);
1076 			break;
1077 		}
1078 
1079 		return (DDI_FAILURE);
1080 	}
1081 	if (hermon_verbose) {
1082 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: mapping ICM: "
1083 		    "rsrc_type (0x%x) index (0x%x, 0x%x) alloc length (0x%x) "
1084 		    "icm_addr (0x%lx)", type, index1, index2, dma_info->length,
1085 		    dma_info->icmaddr);
1086 	}
1087 
1088 	/* Set the bit for this slot in the table bitmap */
1089 	HERMON_BMAP_BIT_SET(icm->icm_bitmap[index1], index2);
1090 
1091 	return (DDI_SUCCESS);
1092 }
1093 
1094 /*
1095  * hermon_icm_free()
1096  *    Context: Can be called from base context.
1097  *
1098  * ICM resources have been successfully returned from hermon_icm_alloc().
1099  * Associated dma_info is no longer in use.  Free the ICM backing memory.
1100  */
1101 void
1102 hermon_icm_free(hermon_state_t *state, hermon_rsrc_type_t type,
1103     uint32_t index1, uint32_t index2)
1104 {
1105 	hermon_icm_table_t	*icm;
1106 	hermon_dma_info_t	*dma_info;
1107 	int			status;
1108 
1109 	icm = &state->hs_icm[type];
1110 	ASSERT(icm->icm_dma[index1][index2].icm_refcnt == 0);
1111 
1112 	if (hermon_verbose) {
1113 		IBTF_DPRINTF_L2("hermon", "hermon_icm_free: rsrc_type (0x%x) "
1114 		    "index (0x%x, 0x%x)", type, index1, index2);
1115 	}
1116 
1117 	dma_info = icm->icm_dma[index1] + index2;
1118 
1119 	/* The following only happens if attach() is failing. */
1120 	if (dma_info == NULL)
1121 		return;
1122 
1123 	/* Unmap the ICM allocation, then free the backing DMA memory */
1124 	status = hermon_unmap_icm_cmd_post(state, dma_info);
1125 	if (status != DDI_SUCCESS) {
1126 		HERMON_WARNING(state, "UNMAP_ICM failure");
1127 	}
1128 	hermon_dma_free(dma_info);
1129 
1130 	/* Clear the bit in the ICM table bitmap */
1131 	HERMON_BMAP_BIT_CLR(icm->icm_bitmap[index1], index2);
1132 
1133 	switch (type) {
1134 	case HERMON_QPC:
1135 		HERMON_ICM_FREE(HERMON_AUXC);
1136 		HERMON_ICM_FREE(HERMON_ALTC);
1137 		HERMON_ICM_FREE(HERMON_RDB);
1138 		HERMON_ICM_FREE(HERMON_CMPT_QPC);
1139 		break;
1140 	case HERMON_SRQC:
1141 		HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1142 		break;
1143 	case HERMON_CQC:
1144 		HERMON_ICM_FREE(HERMON_CMPT_CQC);
1145 		break;
1146 	case HERMON_EQC:
1147 		HERMON_ICM_FREE(HERMON_CMPT_EQC);
1148 		break;
1149 
1150 	}
1151 }
1152 
1153 /*
1154  * hermon_drv_init()
1155  *    Context: Only called from attach() path context
1156  */
1157 /* ARGSUSED */
1158 static int
1159 hermon_drv_init(hermon_state_t *state, dev_info_t *dip, int instance)
1160 {
1161 	int	status;
1162 
1163 	/*
1164 	 * Check and set the operational mode of the device. If the driver is
1165 	 * bound to the Hermon device in "maintenance mode", then this generally
1166 	 * means that either the device has been specifically jumpered to
1167 	 * start in this mode or the firmware boot process has failed to
1168 	 * successfully load either the primary or the secondary firmware
1169 	 * image.
1170 	 */
1171 	if (HERMON_IS_HCA_MODE(state->hs_dip)) {
1172 		state->hs_operational_mode = HERMON_HCA_MODE;
1173 		state->hs_cfg_profile_setting = HERMON_CFG_MEMFREE;
1174 
1175 	} else if (HERMON_IS_MAINTENANCE_MODE(state->hs_dip)) {
1176 		HERMON_FMANOTE(state, HERMON_FMA_MAINT);
1177 		state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1178 		state->hs_fm_degraded_reason = HCA_FW_MISC; /* not fw reason */
1179 		return (DDI_FAILURE);
1180 
1181 	} else {
1182 		state->hs_operational_mode = 0;	/* invalid operational mode */
1183 		HERMON_FMANOTE(state, HERMON_FMA_PCIID);
1184 		HERMON_WARNING(state, "unexpected device type detected");
1185 		return (DDI_FAILURE);
1186 	}
1187 
1188 	/*
1189 	 * Initialize the Hermon hardware.
1190 	 *
1191 	 * Note:  If this routine returns an error, it is often a reasonably
1192 	 * good indication that something Hermon firmware-related has caused
1193 	 * the failure or some HW related errors have caused the failure.
1194 	 * (also there are few possibilities that SW (e.g. SW resource
1195 	 * shortage) can cause the failure, but the majority case is due to
1196 	 * either a firmware related error or a HW related one) In order to
1197 	 * give the user an opportunity (if desired) to update or reflash
1198 	 * the Hermon firmware image, we set "hs_operational_mode" flag
1199 	 * (described above) to indicate that we wish to enter maintenance
1200 	 * mode in case of the firmware-related issue.
1201 	 */
1202 	status = hermon_hw_init(state);
1203 	if (status != DDI_SUCCESS) {
1204 		cmn_err(CE_NOTE, "hermon%d: error during attach: %s", instance,
1205 		    state->hs_attach_buf);
1206 		return (DDI_FAILURE);
1207 	}
1208 
1209 	/*
1210 	 * Now that the ISR has been setup, arm all the EQs for event
1211 	 * generation.
1212 	 */
1213 
1214 	status = hermon_eq_arm_all(state);
1215 	if (status != DDI_SUCCESS) {
1216 		cmn_err(CE_NOTE, "EQ Arm All failed\n");
1217 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1218 		return (DDI_FAILURE);
1219 	}
1220 
1221 	/* test interrupts and event queues */
1222 	status = hermon_nop_post(state, 0x0, 0x0);
1223 	if (status != DDI_SUCCESS) {
1224 		cmn_err(CE_NOTE, "Interrupts/EQs failed\n");
1225 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1226 		return (DDI_FAILURE);
1227 	}
1228 
1229 	/* Initialize Hermon softstate */
1230 	status = hermon_soft_state_init(state);
1231 	if (status != DDI_SUCCESS) {
1232 		cmn_err(CE_NOTE, "Failed to init soft state\n");
1233 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1234 		return (DDI_FAILURE);
1235 	}
1236 
1237 	return (DDI_SUCCESS);
1238 }
1239 
1240 
1241 /*
1242  * hermon_drv_fini()
1243  *    Context: Only called from attach() and/or detach() path contexts
1244  */
1245 static void
1246 hermon_drv_fini(hermon_state_t *state)
1247 {
1248 	/* Cleanup Hermon softstate */
1249 	hermon_soft_state_fini(state);
1250 
1251 	/* Cleanup Hermon resources and shutdown hardware */
1252 	hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1253 }
1254 
1255 
1256 /*
1257  * hermon_drv_fini2()
1258  *    Context: Only called from attach() and/or detach() path contexts
1259  */
1260 static void
1261 hermon_drv_fini2(hermon_state_t *state)
1262 {
1263 	if (state->hs_fm_poll_thread) {
1264 		ddi_periodic_delete(state->hs_fm_poll_thread);
1265 		state->hs_fm_poll_thread = NULL;
1266 	}
1267 
1268 	/* HERMON_DRV_CLEANUP_LEVEL1 */
1269 	if (state->hs_fm_cmdhdl) {
1270 		hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
1271 		state->hs_fm_cmdhdl = NULL;
1272 	}
1273 
1274 	if (state->hs_reg_cmdhdl) {
1275 		ddi_regs_map_free(&state->hs_reg_cmdhdl);
1276 		state->hs_reg_cmdhdl = NULL;
1277 	}
1278 
1279 	/* HERMON_DRV_CLEANUP_LEVEL0 */
1280 	if (state->hs_msix_tbl_entries) {
1281 		kmem_free(state->hs_msix_tbl_entries,
1282 		    state->hs_msix_tbl_size);
1283 		state->hs_msix_tbl_entries = NULL;
1284 	}
1285 
1286 	if (state->hs_msix_pba_entries) {
1287 		kmem_free(state->hs_msix_pba_entries,
1288 		    state->hs_msix_pba_size);
1289 		state->hs_msix_pba_entries = NULL;
1290 	}
1291 
1292 	if (state->hs_fm_msix_tblhdl) {
1293 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
1294 		state->hs_fm_msix_tblhdl = NULL;
1295 	}
1296 
1297 	if (state->hs_reg_msix_tblhdl) {
1298 		ddi_regs_map_free(&state->hs_reg_msix_tblhdl);
1299 		state->hs_reg_msix_tblhdl = NULL;
1300 	}
1301 
1302 	if (state->hs_fm_msix_pbahdl) {
1303 		hermon_regs_map_free(state, &state->hs_fm_msix_pbahdl);
1304 		state->hs_fm_msix_pbahdl = NULL;
1305 	}
1306 
1307 	if (state->hs_reg_msix_pbahdl) {
1308 		ddi_regs_map_free(&state->hs_reg_msix_pbahdl);
1309 		state->hs_reg_msix_pbahdl = NULL;
1310 	}
1311 
1312 	if (state->hs_fm_pcihdl) {
1313 		hermon_pci_config_teardown(state, &state->hs_fm_pcihdl);
1314 		state->hs_fm_pcihdl = NULL;
1315 	}
1316 
1317 	if (state->hs_reg_pcihdl) {
1318 		pci_config_teardown(&state->hs_reg_pcihdl);
1319 		state->hs_reg_pcihdl = NULL;
1320 	}
1321 }
1322 
1323 
1324 /*
1325  * hermon_isr_init()
1326  *    Context: Only called from attach() path context
1327  */
1328 static int
1329 hermon_isr_init(hermon_state_t *state)
1330 {
1331 	int	status;
1332 	int	intr;
1333 
1334 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1335 
1336 		/*
1337 		 * Add a handler for the interrupt or MSI
1338 		 */
1339 		status = ddi_intr_add_handler(state->hs_intrmsi_hdl[intr],
1340 		    hermon_isr, (caddr_t)state, (void *)(uintptr_t)intr);
1341 		if (status  != DDI_SUCCESS) {
1342 			return (DDI_FAILURE);
1343 		}
1344 
1345 		/*
1346 		 * Enable the software interrupt.  Note: depending on the value
1347 		 * returned in the capability flag, we have to call either
1348 		 * ddi_intr_block_enable() or ddi_intr_enable().
1349 		 */
1350 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1351 			status = ddi_intr_block_enable(
1352 			    &state->hs_intrmsi_hdl[intr], 1);
1353 			if (status != DDI_SUCCESS) {
1354 				return (DDI_FAILURE);
1355 			}
1356 		} else {
1357 			status = ddi_intr_enable(state->hs_intrmsi_hdl[intr]);
1358 			if (status != DDI_SUCCESS) {
1359 				return (DDI_FAILURE);
1360 			}
1361 		}
1362 	}
1363 
1364 	/*
1365 	 * Now that the ISR has been enabled, defer arm_all  EQs for event
1366 	 * generation until later, in case MSIX is enabled
1367 	 */
1368 	return (DDI_SUCCESS);
1369 }
1370 
1371 
1372 /*
1373  * hermon_isr_fini()
1374  *    Context: Only called from attach() and/or detach() path contexts
1375  */
1376 static void
1377 hermon_isr_fini(hermon_state_t *state)
1378 {
1379 	int	intr;
1380 
1381 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1382 		/* Disable the software interrupt */
1383 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1384 			(void) ddi_intr_block_disable(
1385 			    &state->hs_intrmsi_hdl[intr], 1);
1386 		} else {
1387 			(void) ddi_intr_disable(state->hs_intrmsi_hdl[intr]);
1388 		}
1389 
1390 		/*
1391 		 * Remove the software handler for the interrupt or MSI
1392 		 */
1393 		(void) ddi_intr_remove_handler(state->hs_intrmsi_hdl[intr]);
1394 	}
1395 }
1396 
1397 
1398 /*
1399  * Sum of ICM configured values:
1400  *     cMPT, dMPT, MTT, QPC, SRQC, RDB, CQC, ALTC, AUXC, EQC, MCG
1401  *
1402  */
1403 static uint64_t
1404 hermon_size_icm(hermon_state_t *state)
1405 {
1406 	hermon_hw_querydevlim_t	*devlim;
1407 	hermon_cfg_profile_t	*cfg;
1408 	uint64_t		num_cmpts, num_dmpts, num_mtts;
1409 	uint64_t		num_qpcs, num_srqc, num_rdbs;
1410 #ifndef HERMON_FW_WORKAROUND
1411 	uint64_t		num_auxc;
1412 #endif
1413 	uint64_t		num_cqcs, num_altc;
1414 	uint64_t		num_eqcs, num_mcgs;
1415 	uint64_t		size;
1416 
1417 	devlim = &state->hs_devlim;
1418 	cfg = state->hs_cfg_profile;
1419 	/* number of respective entries */
1420 	num_cmpts = (uint64_t)0x1 << cfg->cp_log_num_cmpt;
1421 	num_mtts = (uint64_t)0x1 << cfg->cp_log_num_mtt;
1422 	num_dmpts = (uint64_t)0x1 << cfg->cp_log_num_dmpt;
1423 	num_qpcs = (uint64_t)0x1 << cfg->cp_log_num_qp;
1424 	num_srqc = (uint64_t)0x1 << cfg->cp_log_num_srq;
1425 	num_rdbs = (uint64_t)0x1 << cfg->cp_log_num_rdb;
1426 	num_cqcs = (uint64_t)0x1 << cfg->cp_log_num_cq;
1427 	num_altc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1428 #ifndef HERMON_FW_WORKAROUND
1429 	num_auxc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1430 #endif
1431 	num_eqcs = (uint64_t)0x1 << cfg->cp_log_num_eq;
1432 	num_mcgs = (uint64_t)0x1 << cfg->cp_log_num_mcg;
1433 
1434 	size =
1435 	    num_cmpts 	* devlim->cmpt_entry_sz +
1436 	    num_dmpts	* devlim->dmpt_entry_sz +
1437 	    num_mtts	* devlim->mtt_entry_sz +
1438 	    num_qpcs	* devlim->qpc_entry_sz +
1439 	    num_srqc	* devlim->srq_entry_sz +
1440 	    num_rdbs	* devlim->rdmardc_entry_sz +
1441 	    num_cqcs	* devlim->cqc_entry_sz +
1442 	    num_altc	* devlim->altc_entry_sz +
1443 #ifdef HERMON_FW_WORKAROUND
1444 	    0x80000000ull +
1445 #else
1446 	    num_auxc	* devlim->aux_entry_sz	+
1447 #endif
1448 	    num_eqcs	* devlim->eqc_entry_sz +
1449 	    num_mcgs	* HERMON_MCGMEM_SZ(state);
1450 	return (size);
1451 }
1452 
1453 
1454 /*
1455  * hermon_hw_init()
1456  *    Context: Only called from attach() path context
1457  */
1458 static int
1459 hermon_hw_init(hermon_state_t *state)
1460 {
1461 	hermon_drv_cleanup_level_t	cleanup;
1462 	sm_nodeinfo_t			nodeinfo;
1463 	uint64_t			clr_intr_offset;
1464 	int				status;
1465 	uint32_t			fw_size;	/* in page */
1466 	uint64_t			offset;
1467 
1468 	/* This is where driver initialization begins */
1469 	cleanup = HERMON_DRV_CLEANUP_LEVEL0;
1470 
1471 	/* Setup device access attributes */
1472 	state->hs_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V1;
1473 	state->hs_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1474 	state->hs_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1475 	state->hs_reg_accattr.devacc_attr_access = DDI_DEFAULT_ACC;
1476 
1477 	/* Setup fma-protected access attributes */
1478 	state->hs_fm_accattr.devacc_attr_version =
1479 	    hermon_devacc_attr_version(state);
1480 	state->hs_fm_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1481 	state->hs_fm_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1482 	/* set acc err protection type */
1483 	state->hs_fm_accattr.devacc_attr_access =
1484 	    hermon_devacc_attr_access(state);
1485 
1486 	/* Setup for PCI config read/write of HCA device */
1487 	status = hermon_pci_config_setup(state, &state->hs_fm_pcihdl);
1488 	if (status != DDI_SUCCESS) {
1489 		hermon_hw_fini(state, cleanup);
1490 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1491 		    "hw_init_PCI_config_space_regmap_fail");
1492 		/* This case is not the degraded one */
1493 		return (DDI_FAILURE);
1494 	}
1495 
1496 	/* Map PCI config space and MSI-X tables/pba */
1497 	hermon_set_msix_info(state);
1498 
1499 	/* Map in Hermon registers (CMD, UAR, MSIX) and setup offsets */
1500 	status = hermon_regs_map_setup(state, HERMON_CMD_BAR,
1501 	    &state->hs_reg_cmd_baseaddr, 0, 0, &state->hs_fm_accattr,
1502 	    &state->hs_fm_cmdhdl);
1503 	if (status != DDI_SUCCESS) {
1504 		hermon_hw_fini(state, cleanup);
1505 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1506 		    "hw_init_CMD_BAR_regmap_fail");
1507 		/* This case is not the degraded one */
1508 		return (DDI_FAILURE);
1509 	}
1510 
1511 	cleanup = HERMON_DRV_CLEANUP_LEVEL1;
1512 	/*
1513 	 * We defer UAR-BAR mapping until later.  Need to know if
1514 	 * blueflame mapping is to be done, and don't know that until after
1515 	 * we get the dev_caps, so do it right after that
1516 	 */
1517 
1518 	/*
1519 	 * There is a third BAR defined for Hermon - it is for MSIX
1520 	 *
1521 	 * Will need to explore it's possible need/use w/ Mellanox
1522 	 * [es] Temporary mapping maybe
1523 	 */
1524 
1525 #ifdef HERMON_SUPPORTS_MSIX_BAR
1526 	status = ddi_regs_map_setup(state->hs_dip, HERMON_MSIX_BAR,
1527 	    &state->hs_reg_msi_baseaddr, 0, 0, &state->hs_reg_accattr,
1528 	    &state->hs_reg_msihdl);
1529 	if (status != DDI_SUCCESS) {
1530 		hermon_hw_fini(state, cleanup);
1531 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1532 		    "hw_init_MSIX_BAR_regmap_fail");
1533 		/* This case is not the degraded one */
1534 		return (DDI_FAILURE);
1535 	}
1536 #endif
1537 
1538 	cleanup = HERMON_DRV_CLEANUP_LEVEL2;
1539 
1540 	/*
1541 	 * Save interesting registers away. The offsets of the first two
1542 	 * here (HCR and sw_reset) are detailed in the PRM, the others are
1543 	 * derived from values in the QUERY_FW output, so we'll save them
1544 	 * off later.
1545 	 */
1546 	/* Host Command Register (HCR) */
1547 	state->hs_cmd_regs.hcr = (hermon_hw_hcr_t *)
1548 	    ((uintptr_t)state->hs_reg_cmd_baseaddr + HERMON_CMD_HCR_OFFSET);
1549 	state->hs_cmd_toggle = 0;	/* initialize it for use */
1550 
1551 	/* Software Reset register (sw_reset) and semaphore */
1552 	state->hs_cmd_regs.sw_reset = (uint32_t *)
1553 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1554 	    HERMON_CMD_SW_RESET_OFFSET);
1555 	state->hs_cmd_regs.sw_semaphore = (uint32_t *)
1556 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1557 	    HERMON_CMD_SW_SEMAPHORE_OFFSET);
1558 
1559 	/* Retrieve PCI device, vendor and rev IDs */
1560 	state->hs_vendor_id	 = HERMON_GET_VENDOR_ID(state->hs_dip);
1561 	state->hs_device_id	 = HERMON_GET_DEVICE_ID(state->hs_dip);
1562 	state->hs_revision_id	 = HERMON_GET_REVISION_ID(state->hs_dip);
1563 
1564 	/* make sure init'd before we start filling things in */
1565 	bzero(&state->hs_hcaparams, sizeof (struct hermon_hw_initqueryhca_s));
1566 
1567 	/* Initialize the Phase1 configuration profile */
1568 	status = hermon_cfg_profile_init_phase1(state);
1569 	if (status != DDI_SUCCESS) {
1570 		hermon_hw_fini(state, cleanup);
1571 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1572 		    "hw_init_cfginit1_fail");
1573 		/* This case is not the degraded one */
1574 		return (DDI_FAILURE);
1575 	}
1576 	cleanup = HERMON_DRV_CLEANUP_LEVEL3;
1577 
1578 	/* Do a software reset of the adapter to ensure proper state */
1579 	status = hermon_sw_reset(state);
1580 	if (status != HERMON_CMD_SUCCESS) {
1581 		hermon_hw_fini(state, cleanup);
1582 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1583 		    "hw_init_sw_reset_fail");
1584 		/* This case is not the degraded one */
1585 		return (DDI_FAILURE);
1586 	}
1587 
1588 	/* Initialize mailboxes */
1589 	status = hermon_rsrc_init_phase1(state);
1590 	if (status != DDI_SUCCESS) {
1591 		hermon_hw_fini(state, cleanup);
1592 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1593 		    "hw_init_rsrcinit1_fail");
1594 		/* This case is not the degraded one */
1595 		return (DDI_FAILURE);
1596 	}
1597 	cleanup = HERMON_DRV_CLEANUP_LEVEL4;
1598 
1599 	/* Post QUERY_FW */
1600 	status = hermon_cmn_query_cmd_post(state, QUERY_FW, 0, 0, &state->hs_fw,
1601 	    sizeof (hermon_hw_queryfw_t), HERMON_CMD_NOSLEEP_SPIN);
1602 	if (status != HERMON_CMD_SUCCESS) {
1603 		cmn_err(CE_NOTE, "QUERY_FW command failed: %08x\n", status);
1604 		hermon_hw_fini(state, cleanup);
1605 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1606 		    "hw_init_query_fw_cmd_fail");
1607 		/* This case is not the degraded one */
1608 		return (DDI_FAILURE);
1609 	}
1610 
1611 	/* Validate what/that HERMON FW version is appropriate */
1612 
1613 	status = hermon_fw_version_check(state);
1614 	if (status != DDI_SUCCESS) {
1615 		HERMON_FMANOTE(state, HERMON_FMA_FWVER);
1616 		if (state->hs_operational_mode == HERMON_HCA_MODE) {
1617 			cmn_err(CE_CONT, "Unsupported Hermon FW version: "
1618 			    "expected: %04d.%04d.%04d, "
1619 			    "actual: %04d.%04d.%04d\n",
1620 			    HERMON_FW_VER_MAJOR,
1621 			    HERMON_FW_VER_MINOR,
1622 			    HERMON_FW_VER_SUBMINOR,
1623 			    state->hs_fw.fw_rev_major,
1624 			    state->hs_fw.fw_rev_minor,
1625 			    state->hs_fw.fw_rev_subminor);
1626 		} else {
1627 			cmn_err(CE_CONT, "Unsupported FW version: "
1628 			    "%04d.%04d.%04d\n",
1629 			    state->hs_fw.fw_rev_major,
1630 			    state->hs_fw.fw_rev_minor,
1631 			    state->hs_fw.fw_rev_subminor);
1632 		}
1633 		state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1634 		state->hs_fm_degraded_reason = HCA_FW_MISMATCH;
1635 		hermon_hw_fini(state, cleanup);
1636 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1637 		    "hw_init_checkfwver_fail");
1638 		/* This case is the degraded one */
1639 		return (HERMON_CMD_BAD_NVMEM);
1640 	}
1641 
1642 	/*
1643 	 * Save off the rest of the interesting registers that we'll be using.
1644 	 * Setup the offsets for the other registers.
1645 	 */
1646 
1647 	/*
1648 	 * Hermon does the intr_offset from the BAR - technically should get the
1649 	 * BAR info from the response, but PRM says it's from BAR0-1, which is
1650 	 * for us the CMD BAR
1651 	 */
1652 
1653 	clr_intr_offset	 = state->hs_fw.clr_intr_offs & HERMON_CMD_OFFSET_MASK;
1654 
1655 	/* Save Clear Interrupt address */
1656 	state->hs_cmd_regs.clr_intr = (uint64_t *)
1657 	    (uintptr_t)(state->hs_reg_cmd_baseaddr + clr_intr_offset);
1658 
1659 	/*
1660 	 * Set the error buffer also into the structure - used in hermon_event.c
1661 	 * to check for internal error on the HCA, not reported in eqe or
1662 	 * (necessarily) by interrupt
1663 	 */
1664 	state->hs_cmd_regs.fw_err_buf = (uint32_t *)(uintptr_t)
1665 	    (state->hs_reg_cmd_baseaddr + state->hs_fw.error_buf_addr);
1666 
1667 	/*
1668 	 * Invoke a polling thread to check the error buffer periodically.
1669 	 */
1670 	if (!hermon_no_inter_err_chk) {
1671 		state->hs_fm_poll_thread = ddi_periodic_add(
1672 		    hermon_inter_err_chk, (void *)state, FM_POLL_INTERVAL,
1673 		    DDI_IPL_0);
1674 	}
1675 
1676 	cleanup = HERMON_DRV_CLEANUP_LEVEL5;
1677 
1678 	/*
1679 	 * Allocate, map, and run the HCA Firmware.
1680 	 */
1681 
1682 	/* Allocate memory for the firmware to load into and map it */
1683 
1684 	/* get next higher power of 2 */
1685 	fw_size = 1 << highbit(state->hs_fw.fw_pages);
1686 	state->hs_fw_dma.length = fw_size << HERMON_PAGESHIFT;
1687 	status = hermon_dma_alloc(state, &state->hs_fw_dma, MAP_FA);
1688 	if (status != DDI_SUCCESS) {
1689 		cmn_err(CE_NOTE, "FW alloc failed\n");
1690 		hermon_hw_fini(state, cleanup);
1691 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1692 		    "hw_init_dma_alloc_fw_fail");
1693 		/* This case is not the degraded one */
1694 		return (DDI_FAILURE);
1695 	}
1696 
1697 	cleanup = HERMON_DRV_CLEANUP_LEVEL6;
1698 
1699 	/* Invoke the RUN_FW cmd to run the firmware */
1700 	status = hermon_run_fw_cmd_post(state);
1701 	if (status != DDI_SUCCESS) {
1702 		cmn_err(CE_NOTE, "RUN_FW command failed: 0x%08x\n", status);
1703 		if (status == HERMON_CMD_BAD_NVMEM) {
1704 			state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1705 			state->hs_fm_degraded_reason = HCA_FW_CORRUPT;
1706 		}
1707 		hermon_hw_fini(state, cleanup);
1708 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_run_fw_fail");
1709 		/*
1710 		 * If the status is HERMON_CMD_BAD_NVMEM, it's likely the
1711 		 * firmware is corrupted, so the mode falls into the
1712 		 * maintenance mode.
1713 		 */
1714 		return (status == HERMON_CMD_BAD_NVMEM ? HERMON_CMD_BAD_NVMEM :
1715 		    DDI_FAILURE);
1716 	}
1717 
1718 
1719 	/*
1720 	 * QUERY DEVICE LIMITS/CAPABILITIES
1721 	 * NOTE - in Hermon, the command is changed to QUERY_DEV_CAP,
1722 	 * but for familiarity we have kept the structure name the
1723 	 * same as Tavor/Arbel
1724 	 */
1725 
1726 	status = hermon_cmn_query_cmd_post(state, QUERY_DEV_CAP, 0, 0,
1727 	    &state->hs_devlim, sizeof (hermon_hw_querydevlim_t),
1728 	    HERMON_CMD_NOSLEEP_SPIN);
1729 	if (status != HERMON_CMD_SUCCESS) {
1730 		cmn_err(CE_NOTE, "QUERY_DEV_CAP command failed: 0x%08x\n",
1731 		    status);
1732 		hermon_hw_fini(state, cleanup);
1733 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_devcap_fail");
1734 		/* This case is not the degraded one */
1735 		return (DDI_FAILURE);
1736 	}
1737 
1738 	state->hs_devlim.num_rsvd_eq = max(state->hs_devlim.num_rsvd_eq,
1739 	    (4 * state->hs_devlim.num_rsvd_uar));	/* lesser of resvd's */
1740 
1741 	/* now we have enough info to map in the UAR BAR */
1742 	/*
1743 	 * First, we figure out how to map the BAR for UAR - use only half if
1744 	 * BlueFlame is enabled - in that case the mapped length is 1/2 the
1745 	 * log_max_uar_sz (max__uar - 1) * 1MB ( +20).
1746 	 */
1747 
1748 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1749 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1750 	} else {
1751 		offset = 0;	/* a zero length means map the whole thing */
1752 	}
1753 	status = hermon_regs_map_setup(state, HERMON_UAR_BAR,
1754 	    &state->hs_reg_uar_baseaddr, 0, offset, &state->hs_fm_accattr,
1755 	    &state->hs_fm_uarhdl);
1756 	if (status != DDI_SUCCESS) {
1757 		HERMON_ATTACH_MSG(state->hs_attach_buf, "UAR BAR mapping");
1758 		/* This case is not the degraded one */
1759 		return (DDI_FAILURE);
1760 	}
1761 
1762 	/* and if BlueFlame is enabled, map the other half there */
1763 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1764 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1765 		status = ddi_regs_map_setup(state->hs_dip, HERMON_UAR_BAR,
1766 		    &state->hs_reg_bf_baseaddr, offset, offset,
1767 		    &state->hs_reg_accattr, &state->hs_reg_bfhdl);
1768 		if (status != DDI_SUCCESS) {
1769 			HERMON_ATTACH_MSG(state->hs_attach_buf,
1770 			    "BlueFlame BAR mapping");
1771 			/* This case is not the degraded one */
1772 			return (DDI_FAILURE);
1773 		}
1774 		/* This will be used in hw_fini if we fail to init. */
1775 		state->hs_bf_offset = offset;
1776 	}
1777 	cleanup = HERMON_DRV_CLEANUP_LEVEL7;
1778 
1779 	/* Hermon has a couple of things needed for phase 2 in query port */
1780 
1781 	status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0, 0x01,
1782 	    &state->hs_queryport, sizeof (hermon_hw_query_port_t),
1783 	    HERMON_CMD_NOSLEEP_SPIN);
1784 	if (status != HERMON_CMD_SUCCESS) {
1785 		cmn_err(CE_NOTE, "QUERY_PORT command failed: 0x%08x\n",
1786 		    status);
1787 		hermon_hw_fini(state, cleanup);
1788 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1789 		    "hw_init_queryport_fail");
1790 		/* This case is not the degraded one */
1791 		return (DDI_FAILURE);
1792 	}
1793 
1794 	/* Initialize the Phase2 Hermon configuration profile */
1795 	status = hermon_cfg_profile_init_phase2(state);
1796 	if (status != DDI_SUCCESS) {
1797 		cmn_err(CE_NOTE, "CFG phase 2 failed: 0x%08x\n", status);
1798 		hermon_hw_fini(state, cleanup);
1799 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1800 		    "hw_init_cfginit2_fail");
1801 		/* This case is not the degraded one */
1802 		return (DDI_FAILURE);
1803 	}
1804 
1805 	/* Determine and set the ICM size */
1806 	state->hs_icm_sz = hermon_size_icm(state);
1807 	status		 = hermon_set_icm_size_cmd_post(state);
1808 	if (status != DDI_SUCCESS) {
1809 		cmn_err(CE_NOTE, "Hermon: SET_ICM_SIZE cmd failed: 0x%08x\n",
1810 		    status);
1811 		hermon_hw_fini(state, cleanup);
1812 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1813 		    "hw_init_seticmsz_fail");
1814 		/* This case is not the degraded one */
1815 		return (DDI_FAILURE);
1816 	}
1817 	/* alloc icm aux physical memory and map it */
1818 
1819 	state->hs_icma_dma.length = 1 << highbit(state->hs_icma_sz);
1820 
1821 	status = hermon_dma_alloc(state, &state->hs_icma_dma, MAP_ICM_AUX);
1822 	if (status != DDI_SUCCESS) {
1823 		cmn_err(CE_NOTE, "failed to alloc (0x%llx) bytes for ICMA\n",
1824 		    (longlong_t)state->hs_icma_dma.length);
1825 		hermon_hw_fini(state, cleanup);
1826 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1827 		    "hw_init_dma_alloc_icm_aux_fail");
1828 		/* This case is not the degraded one */
1829 		return (DDI_FAILURE);
1830 	}
1831 	cleanup = HERMON_DRV_CLEANUP_LEVEL8;
1832 
1833 	cleanup = HERMON_DRV_CLEANUP_LEVEL9;
1834 
1835 	/* Allocate an array of structures to house the ICM tables */
1836 	state->hs_icm = kmem_zalloc(HERMON_NUM_ICM_RESOURCES *
1837 	    sizeof (hermon_icm_table_t), KM_SLEEP);
1838 
1839 	/* Set up the ICM address space and the INIT_HCA command input */
1840 	status = hermon_icm_config_setup(state, &state->hs_hcaparams);
1841 	if (status != HERMON_CMD_SUCCESS) {
1842 		cmn_err(CE_NOTE, "ICM configuration failed\n");
1843 		hermon_hw_fini(state, cleanup);
1844 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1845 		    "hw_init_icm_config_setup_fail");
1846 		/* This case is not the degraded one */
1847 		return (DDI_FAILURE);
1848 	}
1849 	cleanup = HERMON_DRV_CLEANUP_LEVEL10;
1850 
1851 	/* Initialize the adapter with the INIT_HCA cmd */
1852 	status = hermon_init_hca_cmd_post(state, &state->hs_hcaparams,
1853 	    HERMON_CMD_NOSLEEP_SPIN);
1854 	if (status != HERMON_CMD_SUCCESS) {
1855 		cmn_err(CE_NOTE, "INIT_HCA command failed: %08x\n", status);
1856 		hermon_hw_fini(state, cleanup);
1857 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_hca_fail");
1858 		/* This case is not the degraded one */
1859 		return (DDI_FAILURE);
1860 	}
1861 	cleanup = HERMON_DRV_CLEANUP_LEVEL11;
1862 
1863 	/* Enter the second phase of init for Hermon configuration/resources */
1864 	status = hermon_rsrc_init_phase2(state);
1865 	if (status != DDI_SUCCESS) {
1866 		hermon_hw_fini(state, cleanup);
1867 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1868 		    "hw_init_rsrcinit2_fail");
1869 		/* This case is not the degraded one */
1870 		return (DDI_FAILURE);
1871 	}
1872 	cleanup = HERMON_DRV_CLEANUP_LEVEL12;
1873 
1874 	/* Query the adapter via QUERY_ADAPTER */
1875 	status = hermon_cmn_query_cmd_post(state, QUERY_ADAPTER, 0, 0,
1876 	    &state->hs_adapter, sizeof (hermon_hw_queryadapter_t),
1877 	    HERMON_CMD_NOSLEEP_SPIN);
1878 	if (status != HERMON_CMD_SUCCESS) {
1879 		cmn_err(CE_NOTE, "Hermon: QUERY_ADAPTER command failed: %08x\n",
1880 		    status);
1881 		hermon_hw_fini(state, cleanup);
1882 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1883 		    "hw_init_query_adapter_fail");
1884 		/* This case is not the degraded one */
1885 		return (DDI_FAILURE);
1886 	}
1887 
1888 	/* Allocate protection domain (PD) for Hermon internal use */
1889 	status = hermon_pd_alloc(state, &state->hs_pdhdl_internal,
1890 	    HERMON_SLEEP);
1891 	if (status != DDI_SUCCESS) {
1892 		cmn_err(CE_NOTE, "failed to alloc internal PD\n");
1893 		hermon_hw_fini(state, cleanup);
1894 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1895 		    "hw_init_internal_pd_alloc_fail");
1896 		/* This case is not the degraded one */
1897 		return (DDI_FAILURE);
1898 	}
1899 	cleanup = HERMON_DRV_CLEANUP_LEVEL13;
1900 
1901 	/* Setup UAR page for kernel use */
1902 	status = hermon_internal_uarpg_init(state);
1903 	if (status != DDI_SUCCESS) {
1904 		cmn_err(CE_NOTE, "failed to setup internal UAR\n");
1905 		hermon_hw_fini(state, cleanup);
1906 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1907 		    "hw_init_internal_uarpg_alloc_fail");
1908 		/* This case is not the degraded one */
1909 		return (DDI_FAILURE);
1910 	}
1911 	cleanup = HERMON_DRV_CLEANUP_LEVEL14;
1912 
1913 	/* Query and initialize the Hermon interrupt/MSI information */
1914 	status = hermon_intr_or_msi_init(state);
1915 	if (status != DDI_SUCCESS) {
1916 		cmn_err(CE_NOTE, "failed to setup INTR/MSI\n");
1917 		hermon_hw_fini(state, cleanup);
1918 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1919 		    "hw_init_intr_or_msi_init_fail");
1920 		/* This case is not the degraded one */
1921 		return (DDI_FAILURE);
1922 	}
1923 	cleanup = HERMON_DRV_CLEANUP_LEVEL15;
1924 
1925 	status = hermon_isr_init(state);	/* set up the isr */
1926 	if (status != DDI_SUCCESS) {
1927 		cmn_err(CE_NOTE, "failed to init isr\n");
1928 		hermon_hw_fini(state, cleanup);
1929 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1930 		    "hw_init_isrinit_fail");
1931 		/* This case is not the degraded one */
1932 		return (DDI_FAILURE);
1933 	}
1934 	cleanup = HERMON_DRV_CLEANUP_LEVEL16;
1935 
1936 	/* Setup the event queues */
1937 	status = hermon_eq_init_all(state);
1938 	if (status != DDI_SUCCESS) {
1939 		cmn_err(CE_NOTE, "failed to init EQs\n");
1940 		hermon_hw_fini(state, cleanup);
1941 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1942 		    "hw_init_eqinitall_fail");
1943 		/* This case is not the degraded one */
1944 		return (DDI_FAILURE);
1945 	}
1946 	cleanup = HERMON_DRV_CLEANUP_LEVEL17;
1947 
1948 
1949 
1950 	/* Reserve contexts for QP0 and QP1 */
1951 	status = hermon_special_qp_contexts_reserve(state);
1952 	if (status != DDI_SUCCESS) {
1953 		cmn_err(CE_NOTE, "failed to init special QPs\n");
1954 		hermon_hw_fini(state, cleanup);
1955 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1956 		    "hw_init_rsrv_sqp_fail");
1957 		/* This case is not the degraded one */
1958 		return (DDI_FAILURE);
1959 	}
1960 	cleanup = HERMON_DRV_CLEANUP_LEVEL18;
1961 
1962 	/* Initialize for multicast group handling */
1963 	status = hermon_mcg_init(state);
1964 	if (status != DDI_SUCCESS) {
1965 		cmn_err(CE_NOTE, "failed to init multicast\n");
1966 		hermon_hw_fini(state, cleanup);
1967 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1968 		    "hw_init_mcg_init_fail");
1969 		/* This case is not the degraded one */
1970 		return (DDI_FAILURE);
1971 	}
1972 	cleanup = HERMON_DRV_CLEANUP_LEVEL19;
1973 
1974 	/* Initialize the Hermon IB port(s) */
1975 	status = hermon_hca_port_init(state);
1976 	if (status != DDI_SUCCESS) {
1977 		cmn_err(CE_NOTE, "failed to init HCA Port\n");
1978 		hermon_hw_fini(state, cleanup);
1979 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1980 		    "hw_init_hca_port_init_fail");
1981 		/* This case is not the degraded one */
1982 		return (DDI_FAILURE);
1983 	}
1984 
1985 	cleanup = HERMON_DRV_CLEANUP_ALL;
1986 
1987 	/* Determine NodeGUID and SystemImageGUID */
1988 	status = hermon_getnodeinfo_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
1989 	    &nodeinfo);
1990 	if (status != HERMON_CMD_SUCCESS) {
1991 		cmn_err(CE_NOTE, "GetNodeInfo command failed: %08x\n", status);
1992 		hermon_hw_fini(state, cleanup);
1993 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1994 		    "hw_init_getnodeinfo_cmd_fail");
1995 		/* This case is not the degraded one */
1996 		return (DDI_FAILURE);
1997 	}
1998 
1999 	/*
2000 	 * If the NodeGUID value was set in OBP properties, then we use that
2001 	 * value.  But we still print a message if the value we queried from
2002 	 * firmware does not match this value.
2003 	 *
2004 	 * Otherwise if OBP value is not set then we use the value from
2005 	 * firmware unconditionally.
2006 	 */
2007 	if (state->hs_cfg_profile->cp_nodeguid) {
2008 		state->hs_nodeguid   = state->hs_cfg_profile->cp_nodeguid;
2009 	} else {
2010 		state->hs_nodeguid = nodeinfo.NodeGUID;
2011 	}
2012 
2013 	if (state->hs_nodeguid != nodeinfo.NodeGUID) {
2014 		cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
2015 		    "does not match value set by device property");
2016 	}
2017 
2018 	/*
2019 	 * If the SystemImageGUID value was set in OBP properties, then we use
2020 	 * that value.  But we still print a message if the value we queried
2021 	 * from firmware does not match this value.
2022 	 *
2023 	 * Otherwise if OBP value is not set then we use the value from
2024 	 * firmware unconditionally.
2025 	 */
2026 	if (state->hs_cfg_profile->cp_sysimgguid) {
2027 		state->hs_sysimgguid = state->hs_cfg_profile->cp_sysimgguid;
2028 	} else {
2029 		state->hs_sysimgguid = nodeinfo.SystemImageGUID;
2030 	}
2031 
2032 	if (state->hs_sysimgguid != nodeinfo.SystemImageGUID) {
2033 		cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
2034 		    "does not match value set by device property");
2035 	}
2036 
2037 	/* Get NodeDescription */
2038 	status = hermon_getnodedesc_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
2039 	    (sm_nodedesc_t *)&state->hs_nodedesc);
2040 	if (status != HERMON_CMD_SUCCESS) {
2041 		cmn_err(CE_CONT, "GetNodeDesc command failed: %08x\n", status);
2042 		hermon_hw_fini(state, cleanup);
2043 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2044 		    "hw_init_getnodedesc_cmd_fail");
2045 		/* This case is not the degraded one */
2046 		return (DDI_FAILURE);
2047 	}
2048 
2049 	return (DDI_SUCCESS);
2050 }
2051 
2052 
2053 /*
2054  * hermon_hw_fini()
2055  *    Context: Only called from attach() and/or detach() path contexts
2056  */
2057 static void
2058 hermon_hw_fini(hermon_state_t *state, hermon_drv_cleanup_level_t cleanup)
2059 {
2060 	uint_t		num_ports;
2061 	int		i, status;
2062 
2063 
2064 	/*
2065 	 * JBDB - We might not want to run these returns in all cases of
2066 	 * Bad News. We should still attempt to free all of the DMA memory
2067 	 * resources...  This needs to be worked last, after all allocations
2068 	 * are implemented. For now, and possibly for later, this works.
2069 	 */
2070 
2071 	switch (cleanup) {
2072 	/*
2073 	 * If we add more driver initialization steps that should be cleaned
2074 	 * up here, we need to ensure that HERMON_DRV_CLEANUP_ALL is still the
2075 	 * first entry (i.e. corresponds to the last init step).
2076 	 */
2077 	case HERMON_DRV_CLEANUP_ALL:
2078 		/* Shutdown the Hermon IB port(s) */
2079 		num_ports = state->hs_cfg_profile->cp_num_ports;
2080 		(void) hermon_hca_ports_shutdown(state, num_ports);
2081 		/* FALLTHROUGH */
2082 
2083 	case HERMON_DRV_CLEANUP_LEVEL19:
2084 		/* Teardown resources used for multicast group handling */
2085 		hermon_mcg_fini(state);
2086 		/* FALLTHROUGH */
2087 
2088 	case HERMON_DRV_CLEANUP_LEVEL18:
2089 		/* Unreserve the special QP contexts */
2090 		hermon_special_qp_contexts_unreserve(state);
2091 		/* FALLTHROUGH */
2092 
2093 	case HERMON_DRV_CLEANUP_LEVEL17:
2094 		/*
2095 		 * Attempt to teardown all event queues (EQ).  If we fail
2096 		 * here then print a warning message and return.  Something
2097 		 * (either in HW or SW) has gone seriously wrong.
2098 		 */
2099 		status = hermon_eq_fini_all(state);
2100 		if (status != DDI_SUCCESS) {
2101 			HERMON_WARNING(state, "failed to teardown EQs");
2102 			return;
2103 		}
2104 		/* FALLTHROUGH */
2105 	case HERMON_DRV_CLEANUP_LEVEL16:
2106 		/* Teardown Hermon interrupts */
2107 		hermon_isr_fini(state);
2108 		/* FALLTHROUGH */
2109 
2110 	case HERMON_DRV_CLEANUP_LEVEL15:
2111 		status = hermon_intr_or_msi_fini(state);
2112 		if (status != DDI_SUCCESS) {
2113 			HERMON_WARNING(state, "failed to free intr/MSI");
2114 			return;
2115 		}
2116 		/* FALLTHROUGH */
2117 
2118 	case HERMON_DRV_CLEANUP_LEVEL14:
2119 		/* Free the resources for the Hermon internal UAR pages */
2120 		hermon_internal_uarpg_fini(state);
2121 		/* FALLTHROUGH */
2122 
2123 	case HERMON_DRV_CLEANUP_LEVEL13:
2124 		/*
2125 		 * Free the PD that was used internally by Hermon software.  If
2126 		 * we fail here then print a warning and return.  Something
2127 		 * (probably software-related, but perhaps HW) has gone wrong.
2128 		 */
2129 		status = hermon_pd_free(state, &state->hs_pdhdl_internal);
2130 		if (status != DDI_SUCCESS) {
2131 			HERMON_WARNING(state, "failed to free internal PD");
2132 			return;
2133 		}
2134 		/* FALLTHROUGH */
2135 
2136 	case HERMON_DRV_CLEANUP_LEVEL12:
2137 		/* Cleanup all the phase2 resources first */
2138 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_ALL);
2139 		/* FALLTHROUGH */
2140 
2141 	case HERMON_DRV_CLEANUP_LEVEL11:
2142 		/* LEVEL11 is after INIT_HCA */
2143 		/* FALLTHROUGH */
2144 
2145 
2146 	case HERMON_DRV_CLEANUP_LEVEL10:
2147 		/*
2148 		 * Unmap the ICM memory area with UNMAP_ICM command.
2149 		 */
2150 		status = hermon_unmap_icm_cmd_post(state, NULL);
2151 		if (status != DDI_SUCCESS) {
2152 			cmn_err(CE_WARN,
2153 			    "hermon_hw_fini: failed to unmap ICM\n");
2154 		}
2155 
2156 		/* Free the initial ICM DMA handles */
2157 		hermon_icm_dma_fini(state);
2158 
2159 		/* Free the ICM table structures */
2160 		hermon_icm_tables_fini(state);
2161 
2162 		/* Free the ICM table handles */
2163 		kmem_free(state->hs_icm, HERMON_NUM_ICM_RESOURCES *
2164 		    sizeof (hermon_icm_table_t));
2165 
2166 		/* FALLTHROUGH */
2167 
2168 	case HERMON_DRV_CLEANUP_LEVEL9:
2169 		/*
2170 		 * Unmap the ICM Aux memory area with UNMAP_ICM_AUX command.
2171 		 */
2172 		status = hermon_unmap_icm_aux_cmd_post(state);
2173 		if (status != HERMON_CMD_SUCCESS) {
2174 			cmn_err(CE_NOTE,
2175 			    "hermon_hw_fini: failed to unmap ICMA\n");
2176 		}
2177 		/* FALLTHROUGH */
2178 
2179 	case HERMON_DRV_CLEANUP_LEVEL8:
2180 		/*
2181 		 * Deallocate ICM Aux DMA memory.
2182 		 */
2183 		hermon_dma_free(&state->hs_icma_dma);
2184 		/* FALLTHROUGH */
2185 
2186 	case HERMON_DRV_CLEANUP_LEVEL7:
2187 		if (state->hs_fm_uarhdl) {
2188 			hermon_regs_map_free(state, &state->hs_fm_uarhdl);
2189 			state->hs_fm_uarhdl = NULL;
2190 		}
2191 
2192 		if (state->hs_reg_uarhdl) {
2193 			ddi_regs_map_free(&state->hs_reg_uarhdl);
2194 			state->hs_reg_uarhdl = NULL;
2195 		}
2196 
2197 		if (state->hs_bf_offset != 0 && state->hs_reg_bfhdl) {
2198 			ddi_regs_map_free(&state->hs_reg_bfhdl);
2199 			state->hs_reg_bfhdl = NULL;
2200 		}
2201 
2202 		for (i = 0; i < HERMON_MAX_PORTS; i++) {
2203 			if (state->hs_pkey[i]) {
2204 				kmem_free(state->hs_pkey[i], (1 <<
2205 				    state->hs_cfg_profile->cp_log_max_pkeytbl) *
2206 				    sizeof (ib_pkey_t));
2207 				state->hs_pkey[i] = NULL;
2208 			}
2209 			if (state->hs_guid[i]) {
2210 				kmem_free(state->hs_guid[i], (1 <<
2211 				    state->hs_cfg_profile->cp_log_max_gidtbl) *
2212 				    sizeof (ib_guid_t));
2213 				state->hs_guid[i] = NULL;
2214 			}
2215 		}
2216 		/* FALLTHROUGH */
2217 
2218 	case HERMON_DRV_CLEANUP_LEVEL6:
2219 		/*
2220 		 * Unmap the firmware memory area with UNMAP_FA command.
2221 		 */
2222 		status = hermon_unmap_fa_cmd_post(state);
2223 
2224 		if (status != HERMON_CMD_SUCCESS) {
2225 			cmn_err(CE_NOTE,
2226 			    "hermon_hw_fini: failed to unmap FW\n");
2227 		}
2228 
2229 		/*
2230 		 * Deallocate firmware DMA memory.
2231 		 */
2232 		hermon_dma_free(&state->hs_fw_dma);
2233 		/* FALLTHROUGH */
2234 
2235 	case HERMON_DRV_CLEANUP_LEVEL5:
2236 		/* stop the poll thread */
2237 		if (state->hs_fm_poll_thread) {
2238 			ddi_periodic_delete(state->hs_fm_poll_thread);
2239 			state->hs_fm_poll_thread = NULL;
2240 		}
2241 		/* FALLTHROUGH */
2242 
2243 	case HERMON_DRV_CLEANUP_LEVEL4:
2244 		/* Then cleanup the phase1 resources */
2245 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_PHASE1_COMPLETE);
2246 		/* FALLTHROUGH */
2247 
2248 	case HERMON_DRV_CLEANUP_LEVEL3:
2249 		/* Teardown any resources allocated for the config profile */
2250 		hermon_cfg_profile_fini(state);
2251 		/* FALLTHROUGH */
2252 
2253 	case HERMON_DRV_CLEANUP_LEVEL2:
2254 #ifdef HERMON_SUPPORTS_MSIX_BAR
2255 		/*
2256 		 * unmap 3rd BAR, MSIX BAR
2257 		 */
2258 		if (state->hs_reg_msihdl) {
2259 			ddi_regs_map_free(&state->hs_reg_msihdl);
2260 			state->hs_reg_msihdl = NULL;
2261 		}
2262 		/* FALLTHROUGH */
2263 #endif
2264 	case HERMON_DRV_CLEANUP_LEVEL1:
2265 	case HERMON_DRV_CLEANUP_LEVEL0:
2266 		/*
2267 		 * LEVEL1 and LEVEL0 resources are freed in
2268 		 * hermon_drv_fini2().
2269 		 */
2270 		break;
2271 
2272 	default:
2273 		HERMON_WARNING(state, "unexpected driver cleanup level");
2274 		return;
2275 	}
2276 }
2277 
2278 
2279 /*
2280  * hermon_soft_state_init()
2281  *    Context: Only called from attach() path context
2282  */
2283 static int
2284 hermon_soft_state_init(hermon_state_t *state)
2285 {
2286 	ibt_hca_attr_t		*hca_attr;
2287 	uint64_t		maxval, val;
2288 	ibt_hca_flags_t		caps = IBT_HCA_NO_FLAGS;
2289 	ibt_hca_flags2_t	caps2 = IBT_HCA2_NO_FLAGS;
2290 	int			status;
2291 	int			max_send_wqe_bytes;
2292 	int			max_recv_wqe_bytes;
2293 
2294 	/*
2295 	 * The ibc_hca_info_t struct is passed to the IBTF.  This is the
2296 	 * routine where we initialize it.  Many of the init values come from
2297 	 * either configuration variables or successful queries of the Hermon
2298 	 * hardware abilities
2299 	 */
2300 	state->hs_ibtfinfo.hca_ci_vers	= IBCI_V3;
2301 	state->hs_ibtfinfo.hca_dip	= state->hs_dip;
2302 	state->hs_ibtfinfo.hca_handle	= (ibc_hca_hdl_t)state;
2303 	state->hs_ibtfinfo.hca_ops	= &hermon_ibc_ops;
2304 
2305 	hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
2306 	state->hs_ibtfinfo.hca_attr = hca_attr;
2307 
2308 	hca_attr->hca_fw_major_version = state->hs_fw.fw_rev_major;
2309 	hca_attr->hca_fw_minor_version = state->hs_fw.fw_rev_minor;
2310 	hca_attr->hca_fw_micro_version = state->hs_fw.fw_rev_subminor;
2311 
2312 	/* CQ interrupt moderation maximums - each limited to 16 bits */
2313 	hca_attr->hca_max_cq_mod_count = 0xFFFF;
2314 	hca_attr->hca_max_cq_mod_usec = 0xFFFF;
2315 
2316 	/* CQ relocation to other EQs - change when multiple MSI-Xs are used */
2317 	hca_attr->hca_max_cq_handlers = 1;
2318 
2319 	/*
2320 	 * Determine HCA capabilities:
2321 	 * No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
2322 	 *    IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
2323 	 *    or IBT_HCA_SHUTDOWN_PORT
2324 	 * But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
2325 	 *    IBT_HCA_RNR_NAK, IBT_HCA_CURRENT_QP_STATE, IBT_HCA_PORT_UP,
2326 	 *    IBT_HCA_SRQ, IBT_HCA_RESIZE_SRQ and IBT_HCA_FMR are always
2327 	 *    supported
2328 	 * All other features are conditionally supported, depending on the
2329 	 *    status return by the Hermon HCA in QUERY_DEV_LIM.
2330 	 */
2331 	if (state->hs_devlim.ud_multi) {
2332 		caps |= IBT_HCA_UD_MULTICAST;
2333 	}
2334 	if (state->hs_devlim.atomic) {
2335 		caps |= IBT_HCA_ATOMICS_HCA;
2336 	}
2337 	if (state->hs_devlim.apm) {
2338 		caps |= IBT_HCA_AUTO_PATH_MIG;
2339 	}
2340 	if (state->hs_devlim.pkey_v) {
2341 		caps |= IBT_HCA_PKEY_CNTR;
2342 	}
2343 	if (state->hs_devlim.qkey_v) {
2344 		caps |= IBT_HCA_QKEY_CNTR;
2345 	}
2346 	if (state->hs_devlim.ipoib_cksm) {
2347 		caps |= IBT_HCA_CKSUM_FULL;
2348 		caps2 |= IBT_HCA2_IP_CLASS;
2349 	}
2350 	if (state->hs_devlim.mod_wr_srq) {
2351 		caps |= IBT_HCA_RESIZE_SRQ;
2352 	}
2353 	if (state->hs_devlim.lif) {
2354 		caps |= IBT_HCA_LOCAL_INVAL_FENCE;
2355 	}
2356 	if (state->hs_devlim.reserved_lkey) {
2357 		caps2 |= IBT_HCA2_RES_LKEY;
2358 		hca_attr->hca_reserved_lkey = state->hs_devlim.rsv_lkey;
2359 	}
2360 	if (state->hs_devlim.local_inv && state->hs_devlim.remote_inv &&
2361 	    state->hs_devlim.fast_reg_wr) {	/* fw needs to be >= 2.6.636 */
2362 		if (state->hs_fw.fw_rev_major > 2)
2363 			caps2 |= IBT_HCA2_MEM_MGT_EXT;
2364 		else if (state->hs_fw.fw_rev_major == 2)
2365 			if (state->hs_fw.fw_rev_minor > 6)
2366 				caps2 |= IBT_HCA2_MEM_MGT_EXT;
2367 			else if (state->hs_fw.fw_rev_minor == 6)
2368 				if (state->hs_fw.fw_rev_subminor >= 636)
2369 					caps2 |= IBT_HCA2_MEM_MGT_EXT;
2370 	}
2371 	if (state->hs_devlim.mps) {
2372 		caps |= IBT_HCA_ZERO_BASED_VA;
2373 	}
2374 	if (state->hs_devlim.zb) {
2375 		caps |= IBT_HCA_MULT_PAGE_SZ_MR;
2376 	}
2377 	caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
2378 	    IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
2379 	    IBT_HCA_PORT_UP | IBT_HCA_RC_SRQ | IBT_HCA_UD_SRQ | IBT_HCA_FMR);
2380 
2381 	if (state->hs_devlim.log_max_gso_sz) {
2382 		hca_attr->hca_max_lso_size =
2383 		    (1 << state->hs_devlim.log_max_gso_sz);
2384 		/* 64 = ctrl & datagram seg, 4 = LSO seg, 16 = 1 SGL */
2385 		hca_attr->hca_max_lso_hdr_size =
2386 		    state->hs_devlim.max_desc_sz_sq - (64 + 4 + 16);
2387 	}
2388 
2389 	caps |= IBT_HCA_WQE_SIZE_INFO;
2390 	max_send_wqe_bytes = state->hs_devlim.max_desc_sz_sq;
2391 	max_recv_wqe_bytes = state->hs_devlim.max_desc_sz_rq;
2392 	hca_attr->hca_ud_send_sgl_sz = (max_send_wqe_bytes / 16) - 4;
2393 	hca_attr->hca_conn_send_sgl_sz = (max_send_wqe_bytes / 16) - 1;
2394 	hca_attr->hca_conn_rdma_sgl_overhead = 1;
2395 	hca_attr->hca_recv_sgl_sz = max_recv_wqe_bytes / 16;
2396 
2397 	/* We choose not to support "inline" unless it improves performance */
2398 	hca_attr->hca_max_inline_size = 0;
2399 	hca_attr->hca_ud_send_inline_sz = 0;
2400 	hca_attr->hca_conn_send_inline_sz = 0;
2401 	hca_attr->hca_conn_rdmaw_inline_overhead = 4;
2402 
2403 	hca_attr->hca_flags = caps;
2404 	hca_attr->hca_flags2 = caps2;
2405 
2406 	/*
2407 	 * Set hca_attr's IDs
2408 	 */
2409 	hca_attr->hca_vendor_id	 = state->hs_vendor_id;
2410 	hca_attr->hca_device_id	 = state->hs_device_id;
2411 	hca_attr->hca_version_id = state->hs_revision_id;
2412 
2413 	/*
2414 	 * Determine number of available QPs and max QP size.  Number of
2415 	 * available QPs is determined by subtracting the number of
2416 	 * "reserved QPs" (i.e. reserved for firmware use) from the
2417 	 * total number configured.
2418 	 */
2419 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2420 	hca_attr->hca_max_qp = val - ((uint64_t)1 <<
2421 	    state->hs_devlim.log_rsvd_qp);
2422 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_qp_sz);
2423 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_qp_sz);
2424 	if (val > maxval) {
2425 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2426 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2427 		    "soft_state_init_maxqpsz_toobig_fail");
2428 		return (DDI_FAILURE);
2429 	}
2430 	/* we need to reduce this by the max space needed for headroom */
2431 	hca_attr->hca_max_qp_sz = (uint_t)val - (HERMON_QP_OH_SIZE >>
2432 	    HERMON_QP_WQE_LOG_MINIMUM) - 1;
2433 
2434 	/*
2435 	 * Determine max scatter-gather size in WQEs. The HCA has split
2436 	 * the max sgl into rec'v Q and send Q values. Use the least.
2437 	 *
2438 	 * This is mainly useful for legacy clients.  Smart clients
2439 	 * such as IPoIB will use the IBT_HCA_WQE_SIZE_INFO sgl info.
2440 	 */
2441 	if (state->hs_devlim.max_sg_rq <= state->hs_devlim.max_sg_sq) {
2442 		maxval = state->hs_devlim.max_sg_rq;
2443 	} else {
2444 		maxval = state->hs_devlim.max_sg_sq;
2445 	}
2446 	val	= state->hs_cfg_profile->cp_wqe_max_sgl;
2447 	if (val > maxval) {
2448 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2449 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2450 		    "soft_state_init_toomanysgl_fail");
2451 		return (DDI_FAILURE);
2452 	}
2453 	/* If the rounded value for max SGL is too large, cap it */
2454 	if (state->hs_cfg_profile->cp_wqe_real_max_sgl > maxval) {
2455 		state->hs_cfg_profile->cp_wqe_real_max_sgl = (uint32_t)maxval;
2456 		val = maxval;
2457 	} else {
2458 		val = state->hs_cfg_profile->cp_wqe_real_max_sgl;
2459 	}
2460 
2461 	hca_attr->hca_max_sgl	 = (uint_t)val;
2462 	hca_attr->hca_max_rd_sgl = 0;	/* zero because RD is unsupported */
2463 
2464 	/*
2465 	 * Determine number of available CQs and max CQ size. Number of
2466 	 * available CQs is determined by subtracting the number of
2467 	 * "reserved CQs" (i.e. reserved for firmware use) from the
2468 	 * total number configured.
2469 	 */
2470 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_cq);
2471 	hca_attr->hca_max_cq = val - ((uint64_t)1 <<
2472 	    state->hs_devlim.log_rsvd_cq);
2473 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_cq_sz);
2474 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_cq_sz) - 1;
2475 	if (val > maxval) {
2476 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2477 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2478 		    "soft_state_init_maxcqsz_toobig_fail");
2479 		return (DDI_FAILURE);
2480 	}
2481 	hca_attr->hca_max_cq_sz = (uint_t)val;
2482 
2483 	/*
2484 	 * Determine number of available SRQs and max SRQ size. Number of
2485 	 * available SRQs is determined by subtracting the number of
2486 	 * "reserved SRQs" (i.e. reserved for firmware use) from the
2487 	 * total number configured.
2488 	 */
2489 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_srq);
2490 	hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
2491 	    state->hs_devlim.log_rsvd_srq);
2492 	maxval  = ((uint64_t)1 << state->hs_devlim.log_max_srq_sz);
2493 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_srq_sz);
2494 
2495 	if (val > maxval) {
2496 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2497 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2498 		    "soft_state_init_maxsrqsz_toobig_fail");
2499 		return (DDI_FAILURE);
2500 	}
2501 	hca_attr->hca_max_srqs_sz = (uint_t)val;
2502 
2503 	val	= hca_attr->hca_recv_sgl_sz - 1; /* SRQ has a list link */
2504 	maxval	= state->hs_devlim.max_sg_rq - 1;
2505 	if (val > maxval) {
2506 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2507 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2508 		    "soft_state_init_toomanysrqsgl_fail");
2509 		return (DDI_FAILURE);
2510 	}
2511 	hca_attr->hca_max_srq_sgl = (uint_t)val;
2512 
2513 	/*
2514 	 * Determine supported HCA page sizes
2515 	 * XXX
2516 	 * For now we simply return the system pagesize as the only supported
2517 	 * pagesize
2518 	 */
2519 	hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
2520 	    IBT_PAGE_4K);
2521 
2522 	/*
2523 	 * Determine number of available MemReg, MemWin, and their max size.
2524 	 * Number of available MRs and MWs is determined by subtracting
2525 	 * the number of "reserved MPTs" (i.e. reserved for firmware use)
2526 	 * from the total number configured for each.
2527 	 */
2528 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_dmpt);
2529 	hca_attr->hca_max_memr	  = val - ((uint64_t)1 <<
2530 	    state->hs_devlim.log_rsvd_dmpt);
2531 	hca_attr->hca_max_mem_win = state->hs_devlim.mem_win ? (val -
2532 	    ((uint64_t)1 << state->hs_devlim.log_rsvd_dmpt)) : 0;
2533 	maxval	= state->hs_devlim.log_max_mrw_sz;
2534 	val	= state->hs_cfg_profile->cp_log_max_mrw_sz;
2535 	if (val > maxval) {
2536 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2537 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2538 		    "soft_state_init_maxmrwsz_toobig_fail");
2539 		return (DDI_FAILURE);
2540 	}
2541 	hca_attr->hca_max_memr_len = ((uint64_t)1 << val);
2542 
2543 	/* Determine RDMA/Atomic properties */
2544 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_rdb);
2545 	hca_attr->hca_max_rsc = (uint_t)val;
2546 	val = state->hs_cfg_profile->cp_hca_max_rdma_in_qp;
2547 	hca_attr->hca_max_rdma_in_qp  = (uint8_t)val;
2548 	val = state->hs_cfg_profile->cp_hca_max_rdma_out_qp;
2549 	hca_attr->hca_max_rdma_out_qp = (uint8_t)val;
2550 	hca_attr->hca_max_rdma_in_ee  = 0;
2551 	hca_attr->hca_max_rdma_out_ee = 0;
2552 
2553 	/*
2554 	 * Determine maximum number of raw IPv6 and Ether QPs.  Set to 0
2555 	 * because neither type of raw QP is supported
2556 	 */
2557 	hca_attr->hca_max_ipv6_qp  = 0;
2558 	hca_attr->hca_max_ether_qp = 0;
2559 
2560 	/* Determine max number of MCGs and max QP-per-MCG */
2561 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2562 	hca_attr->hca_max_mcg_qps   = (uint_t)val;
2563 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_mcg);
2564 	hca_attr->hca_max_mcg	    = (uint_t)val;
2565 	val = state->hs_cfg_profile->cp_num_qp_per_mcg;
2566 	hca_attr->hca_max_qp_per_mcg = (uint_t)val;
2567 
2568 	/* Determine max number partitions (i.e. PKeys) */
2569 	maxval	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2570 	    state->hs_queryport.log_max_pkey);
2571 	val	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2572 	    state->hs_cfg_profile->cp_log_max_pkeytbl);
2573 
2574 	if (val > maxval) {
2575 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2576 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2577 		    "soft_state_init_toomanypkey_fail");
2578 		return (DDI_FAILURE);
2579 	}
2580 	hca_attr->hca_max_partitions = (uint16_t)val;
2581 
2582 	/* Determine number of ports */
2583 	maxval = state->hs_devlim.num_ports;
2584 	val = state->hs_cfg_profile->cp_num_ports;
2585 	if ((val > maxval) || (val == 0)) {
2586 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2587 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2588 		    "soft_state_init_toomanyports_fail");
2589 		return (DDI_FAILURE);
2590 	}
2591 	hca_attr->hca_nports = (uint8_t)val;
2592 
2593 	/* Copy NodeGUID and SystemImageGUID from softstate */
2594 	hca_attr->hca_node_guid = state->hs_nodeguid;
2595 	hca_attr->hca_si_guid	= state->hs_sysimgguid;
2596 
2597 	/*
2598 	 * Determine local ACK delay.  Use the value suggested by the Hermon
2599 	 * hardware (from the QUERY_DEV_CAP command)
2600 	 */
2601 	hca_attr->hca_local_ack_delay = state->hs_devlim.ca_ack_delay;
2602 
2603 	/* Determine max SGID table and PKey table sizes */
2604 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_gidtbl);
2605 	hca_attr->hca_max_port_sgid_tbl_sz = (uint_t)val;
2606 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_pkeytbl);
2607 	hca_attr->hca_max_port_pkey_tbl_sz = (uint16_t)val;
2608 
2609 	/* Determine max number of PDs */
2610 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_pd);
2611 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_pd);
2612 	if (val > maxval) {
2613 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2614 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2615 		    "soft_state_init_toomanypd_fail");
2616 		return (DDI_FAILURE);
2617 	}
2618 	hca_attr->hca_max_pd = (uint_t)val;
2619 
2620 	/* Determine max number of Address Handles (NOT IN ARBEL or HERMON) */
2621 	hca_attr->hca_max_ah = 0;
2622 
2623 	/* No RDDs or EECs (since Reliable Datagram is not supported) */
2624 	hca_attr->hca_max_rdd = 0;
2625 	hca_attr->hca_max_eec = 0;
2626 
2627 	/* Initialize lock for reserved UAR page access */
2628 	mutex_init(&state->hs_uar_lock, NULL, MUTEX_DRIVER,
2629 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2630 
2631 	/* Initialize the flash fields */
2632 	state->hs_fw_flashstarted = 0;
2633 	mutex_init(&state->hs_fw_flashlock, NULL, MUTEX_DRIVER,
2634 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2635 
2636 	/* Initialize the lock for the info ioctl */
2637 	mutex_init(&state->hs_info_lock, NULL, MUTEX_DRIVER,
2638 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2639 
2640 	/* Initialize the AVL tree for QP number support */
2641 	hermon_qpn_avl_init(state);
2642 
2643 	/* Initialize the kstat info structure */
2644 	status = hermon_kstat_init(state);
2645 	if (status != DDI_SUCCESS) {
2646 		hermon_qpn_avl_fini(state);
2647 		mutex_destroy(&state->hs_info_lock);
2648 		mutex_destroy(&state->hs_fw_flashlock);
2649 		mutex_destroy(&state->hs_uar_lock);
2650 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2651 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2652 		    "soft_state_init_kstatinit_fail");
2653 		return (DDI_FAILURE);
2654 	}
2655 
2656 	return (DDI_SUCCESS);
2657 }
2658 
2659 
2660 /*
2661  * hermon_soft_state_fini()
2662  *    Context: Called only from detach() path context
2663  */
2664 static void
2665 hermon_soft_state_fini(hermon_state_t *state)
2666 {
2667 
2668 	/* Teardown the kstat info */
2669 	hermon_kstat_fini(state);
2670 
2671 	/* Teardown the AVL tree for QP number support */
2672 	hermon_qpn_avl_fini(state);
2673 
2674 	/* Free up info ioctl mutex */
2675 	mutex_destroy(&state->hs_info_lock);
2676 
2677 	/* Free up flash mutex */
2678 	mutex_destroy(&state->hs_fw_flashlock);
2679 
2680 	/* Free up the UAR page access mutex */
2681 	mutex_destroy(&state->hs_uar_lock);
2682 
2683 	/* Free up the hca_attr struct */
2684 	kmem_free(state->hs_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));
2685 
2686 }
2687 
2688 /*
2689  * hermon_icm_config_setup()
2690  *    Context: Only called from attach() path context
2691  */
2692 static int
2693 hermon_icm_config_setup(hermon_state_t *state,
2694     hermon_hw_initqueryhca_t *inithca)
2695 {
2696 	hermon_hw_querydevlim_t	*devlim;
2697 	hermon_cfg_profile_t	*cfg;
2698 	hermon_icm_table_t	*icm_p[HERMON_NUM_ICM_RESOURCES];
2699 	hermon_icm_table_t	*icm;
2700 	hermon_icm_table_t	*tmp;
2701 	uint64_t		icm_addr;
2702 	uint64_t		icm_size;
2703 	int			status, i, j;
2704 
2705 
2706 	/* Bring in local devlims, cfg_profile and hs_icm table list */
2707 	devlim = &state->hs_devlim;
2708 	cfg = state->hs_cfg_profile;
2709 	icm = state->hs_icm;
2710 
2711 	/*
2712 	 * Assign each ICM table's entry size from data in the devlims,
2713 	 * except for RDB and MCG sizes, which are not returned in devlims
2714 	 * but do have a fixed size, and the UAR context entry size, which
2715 	 * we determine. For this, we use the "cp_num_pgs_per_uce" value
2716 	 * from our hs_cfg_profile.
2717 	 */
2718 	icm[HERMON_CMPT].object_size	= devlim->cmpt_entry_sz;
2719 	icm[HERMON_CMPT_QPC].object_size	= devlim->cmpt_entry_sz;
2720 	icm[HERMON_CMPT_SRQC].object_size	= devlim->cmpt_entry_sz;
2721 	icm[HERMON_CMPT_CQC].object_size	= devlim->cmpt_entry_sz;
2722 	icm[HERMON_CMPT_EQC].object_size	= devlim->cmpt_entry_sz;
2723 	icm[HERMON_MTT].object_size	= devlim->mtt_entry_sz;
2724 	icm[HERMON_DMPT].object_size	= devlim->dmpt_entry_sz;
2725 	icm[HERMON_QPC].object_size	= devlim->qpc_entry_sz;
2726 	icm[HERMON_CQC].object_size	= devlim->cqc_entry_sz;
2727 	icm[HERMON_SRQC].object_size	= devlim->srq_entry_sz;
2728 	icm[HERMON_EQC].object_size	= devlim->eqc_entry_sz;
2729 	icm[HERMON_RDB].object_size	= devlim->rdmardc_entry_sz *
2730 	    cfg->cp_hca_max_rdma_in_qp;
2731 	icm[HERMON_MCG].object_size	= HERMON_MCGMEM_SZ(state);
2732 	icm[HERMON_ALTC].object_size	= devlim->altc_entry_sz;
2733 	icm[HERMON_AUXC].object_size	= devlim->aux_entry_sz;
2734 
2735 	/* Assign each ICM table's log2 number of entries */
2736 	icm[HERMON_CMPT].log_num_entries = cfg->cp_log_num_cmpt;
2737 	icm[HERMON_CMPT_QPC].log_num_entries = cfg->cp_log_num_qp;
2738 	icm[HERMON_CMPT_SRQC].log_num_entries = cfg->cp_log_num_srq;
2739 	icm[HERMON_CMPT_CQC].log_num_entries = cfg->cp_log_num_cq;
2740 	icm[HERMON_CMPT_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
2741 	icm[HERMON_MTT].log_num_entries	= cfg->cp_log_num_mtt;
2742 	icm[HERMON_DMPT].log_num_entries = cfg->cp_log_num_dmpt;
2743 	icm[HERMON_QPC].log_num_entries	= cfg->cp_log_num_qp;
2744 	icm[HERMON_SRQC].log_num_entries = cfg->cp_log_num_srq;
2745 	icm[HERMON_CQC].log_num_entries	= cfg->cp_log_num_cq;
2746 	icm[HERMON_EQC].log_num_entries	= HERMON_NUM_EQ_SHIFT;
2747 	icm[HERMON_RDB].log_num_entries	= cfg->cp_log_num_qp;
2748 	icm[HERMON_MCG].log_num_entries	= cfg->cp_log_num_mcg;
2749 	icm[HERMON_ALTC].log_num_entries = cfg->cp_log_num_qp;
2750 	icm[HERMON_AUXC].log_num_entries = cfg->cp_log_num_qp;
2751 
2752 	/* Initialize the ICM tables */
2753 	hermon_icm_tables_init(state);
2754 
2755 	/*
2756 	 * ICM tables must be aligned on their size in the ICM address
2757 	 * space. So, here we order the tables from largest total table
2758 	 * size to the smallest. All tables are a power of 2 in size, so
2759 	 * this will ensure that all tables are aligned on their own size
2760 	 * without wasting space in the ICM.
2761 	 *
2762 	 * In order to easily set the ICM addresses without needing to
2763 	 * worry about the ordering of our table indices as relates to
2764 	 * the hermon_rsrc_type_t enum, we will use a list of pointers
2765 	 * representing the tables for the sort, then assign ICM addresses
2766 	 * below using it.
2767 	 */
2768 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2769 		icm_p[i] = &icm[i];
2770 	}
2771 	for (i = HERMON_NUM_ICM_RESOURCES; i > 0; i--) {
2772 		switch (i) {
2773 		case HERMON_CMPT_QPC:
2774 		case HERMON_CMPT_SRQC:
2775 		case HERMON_CMPT_CQC:
2776 		case HERMON_CMPT_EQC:
2777 			continue;
2778 		}
2779 		for (j = 1; j < i; j++) {
2780 			if (icm_p[j]->table_size > icm_p[j - 1]->table_size) {
2781 				tmp		= icm_p[j];
2782 				icm_p[j]	= icm_p[j - 1];
2783 				icm_p[j - 1]	= tmp;
2784 			}
2785 		}
2786 	}
2787 
2788 	/* Initialize the ICM address and ICM size */
2789 	icm_addr = icm_size = 0;
2790 
2791 	/*
2792 	 * Set the ICM base address of each table, using our sorted
2793 	 * list of pointers from above.
2794 	 */
2795 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2796 		j = icm_p[i]->icm_type;
2797 		switch (j) {
2798 		case HERMON_CMPT_QPC:
2799 		case HERMON_CMPT_SRQC:
2800 		case HERMON_CMPT_CQC:
2801 		case HERMON_CMPT_EQC:
2802 			continue;
2803 		}
2804 		if (icm[j].table_size) {
2805 			/*
2806 			 * Set the ICM base address in the table, save the
2807 			 * ICM offset in the rsrc pool and increment the
2808 			 * total ICM allocation.
2809 			 */
2810 			icm[j].icm_baseaddr = icm_addr;
2811 			if (hermon_verbose) {
2812 				IBTF_DPRINTF_L2("ICMADDR", "rsrc %x @ %p"
2813 				    " size %llx", j, icm[j].icm_baseaddr,
2814 				    icm[j].table_size);
2815 			}
2816 			icm_size += icm[j].table_size;
2817 		}
2818 
2819 		/* Verify that we don't exceed maximum ICM size */
2820 		if (icm_size > devlim->max_icm_size) {
2821 			/* free the ICM table memory resources */
2822 			hermon_icm_tables_fini(state);
2823 			cmn_err(CE_WARN, "ICM configuration exceeds maximum "
2824 			    "configuration: max (0x%lx) requested (0x%lx)\n",
2825 			    (ulong_t)devlim->max_icm_size, (ulong_t)icm_size);
2826 			HERMON_ATTACH_MSG(state->hs_attach_buf,
2827 			    "icm_config_toobig_fail");
2828 			return (DDI_FAILURE);
2829 		}
2830 
2831 		/* assign address to the 4 pieces of the CMPT */
2832 		if (j == HERMON_CMPT) {
2833 			uint64_t cmpt_size = icm[j].table_size >> 2;
2834 #define	init_cmpt_icm_baseaddr(rsrc, indx)				\
2835 	icm[rsrc].icm_baseaddr	= icm_addr + (indx * cmpt_size);
2836 			init_cmpt_icm_baseaddr(HERMON_CMPT_QPC, 0);
2837 			init_cmpt_icm_baseaddr(HERMON_CMPT_SRQC, 1);
2838 			init_cmpt_icm_baseaddr(HERMON_CMPT_CQC, 2);
2839 			init_cmpt_icm_baseaddr(HERMON_CMPT_EQC, 3);
2840 		}
2841 
2842 		/* Increment the ICM address for the next table */
2843 		icm_addr += icm[j].table_size;
2844 	}
2845 
2846 	/* Populate the structure for the INIT_HCA command */
2847 	hermon_inithca_set(state, inithca);
2848 
2849 	/*
2850 	 * Prior to invoking INIT_HCA, we must have ICM memory in place
2851 	 * for the reserved objects in each table. We will allocate and map
2852 	 * this initial ICM memory here. Note that given the assignment
2853 	 * of span_size above, tables that are smaller or equal in total
2854 	 * size to the default span_size will be mapped in full.
2855 	 */
2856 	status = hermon_icm_dma_init(state);
2857 	if (status != DDI_SUCCESS) {
2858 		/* free the ICM table memory resources */
2859 		hermon_icm_tables_fini(state);
2860 		HERMON_WARNING(state, "Failed to allocate initial ICM");
2861 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2862 		    "icm_config_dma_init_fail");
2863 		return (DDI_FAILURE);
2864 	}
2865 
2866 	return (DDI_SUCCESS);
2867 }
2868 
2869 /*
2870  * hermon_inithca_set()
2871  *    Context: Only called from attach() path context
2872  */
2873 static void
2874 hermon_inithca_set(hermon_state_t *state, hermon_hw_initqueryhca_t *inithca)
2875 {
2876 	hermon_cfg_profile_t	*cfg;
2877 	hermon_icm_table_t	*icm;
2878 	int			i;
2879 
2880 
2881 	/* Populate the INIT_HCA structure */
2882 	icm = state->hs_icm;
2883 	cfg = state->hs_cfg_profile;
2884 
2885 	/* set version */
2886 	inithca->version = 0x02;	/* PRM 0.36 */
2887 	/* set cacheline - log2 in 16-byte chunks */
2888 	inithca->log2_cacheline = 0x2;	/* optimized for 64 byte cache */
2889 
2890 	/* we need to update the inithca info with thie UAR info too */
2891 	inithca->uar.log_max_uars = highbit(cfg->cp_log_num_uar);
2892 	inithca->uar.uar_pg_sz = PAGESHIFT - HERMON_PAGESHIFT;
2893 
2894 	/* Set endianess */
2895 #ifdef	_LITTLE_ENDIAN
2896 	inithca->big_endian	= 0;
2897 #else
2898 	inithca->big_endian	= 1;
2899 #endif
2900 
2901 	/* Port Checking is on by default */
2902 	inithca->udav_port_chk	= HERMON_UDAV_PORTCHK_ENABLED;
2903 
2904 	/* Enable IPoIB checksum */
2905 	if (state->hs_devlim.ipoib_cksm)
2906 		inithca->chsum_en = 1;
2907 
2908 	/* Set each ICM table's attributes */
2909 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2910 		switch (icm[i].icm_type) {
2911 		case HERMON_CMPT:
2912 			inithca->tpt.cmpt_baseaddr = icm[i].icm_baseaddr;
2913 			break;
2914 
2915 		case HERMON_MTT:
2916 			inithca->tpt.mtt_baseaddr = icm[i].icm_baseaddr;
2917 			break;
2918 
2919 		case HERMON_DMPT:
2920 			inithca->tpt.dmpt_baseaddr = icm[i].icm_baseaddr;
2921 			inithca->tpt.log_dmpt_sz   = icm[i].log_num_entries;
2922 			inithca->tpt.pgfault_rnr_to = 0; /* just in case */
2923 			break;
2924 
2925 		case HERMON_QPC:
2926 			inithca->context.log_num_qp = icm[i].log_num_entries;
2927 			inithca->context.qpc_baseaddr_h =
2928 			    icm[i].icm_baseaddr >> 32;
2929 			inithca->context.qpc_baseaddr_l =
2930 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2931 			break;
2932 
2933 		case HERMON_CQC:
2934 			inithca->context.log_num_cq = icm[i].log_num_entries;
2935 			inithca->context.cqc_baseaddr_h =
2936 			    icm[i].icm_baseaddr >> 32;
2937 			inithca->context.cqc_baseaddr_l =
2938 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2939 			break;
2940 
2941 		case HERMON_SRQC:
2942 			inithca->context.log_num_srq = icm[i].log_num_entries;
2943 			inithca->context.srqc_baseaddr_h =
2944 			    icm[i].icm_baseaddr >> 32;
2945 			inithca->context.srqc_baseaddr_l =
2946 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2947 			break;
2948 
2949 		case HERMON_EQC:
2950 			inithca->context.log_num_eq = icm[i].log_num_entries;
2951 			inithca->context.eqc_baseaddr_h =
2952 			    icm[i].icm_baseaddr >> 32;
2953 			inithca->context.eqc_baseaddr_l =
2954 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2955 			break;
2956 
2957 		case HERMON_RDB:
2958 			inithca->context.rdmardc_baseaddr_h =
2959 			    icm[i].icm_baseaddr >> 32;
2960 			inithca->context.rdmardc_baseaddr_l =
2961 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2962 			inithca->context.log_num_rdmardc =
2963 			    cfg->cp_log_num_rdb - cfg->cp_log_num_qp;
2964 			break;
2965 
2966 		case HERMON_MCG:
2967 			inithca->multi.mc_baseaddr    = icm[i].icm_baseaddr;
2968 			inithca->multi.log_mc_tbl_sz  = icm[i].log_num_entries;
2969 			inithca->multi.log_mc_tbl_ent =
2970 			    highbit(HERMON_MCGMEM_SZ(state)) - 1;
2971 			inithca->multi.log_mc_tbl_hash_sz =
2972 			    cfg->cp_log_num_mcg_hash;
2973 			inithca->multi.mc_hash_fn = HERMON_MCG_DEFAULT_HASH_FN;
2974 			break;
2975 
2976 		case HERMON_ALTC:
2977 			inithca->context.altc_baseaddr = icm[i].icm_baseaddr;
2978 			break;
2979 
2980 		case HERMON_AUXC:
2981 			inithca->context.auxc_baseaddr = icm[i].icm_baseaddr;
2982 			break;
2983 
2984 		default:
2985 			break;
2986 
2987 		}
2988 	}
2989 
2990 }
2991 
2992 /*
2993  * hermon_icm_tables_init()
2994  *    Context: Only called from attach() path context
2995  *
2996  * Dynamic ICM breaks the various ICM tables into "span_size" chunks
2997  * to enable allocation of backing memory on demand.  Arbel used a
2998  * fixed size ARBEL_ICM_SPAN_SIZE (initially was 512KB) as the
2999  * span_size for all ICM chunks.  Hermon has other considerations,
3000  * so the span_size used differs from Arbel.
3001  *
3002  * The basic considerations for why Hermon differs are:
3003  *
3004  *	1) ICM memory is in units of HERMON pages.
3005  *
3006  *	2) The AUXC table is approximately 1 byte per QP.
3007  *
3008  *	3) ICM memory for AUXC, ALTC, and RDB is allocated when
3009  *	the ICM memory for the corresponding QPC is allocated.
3010  *
3011  *	4) ICM memory for the CMPT corresponding to the various primary
3012  *	resources (QPC, SRQC, CQC, and EQC) is allocated when the ICM
3013  *	memory for the primary resource is allocated.
3014  *
3015  * One HERMON page (4KB) would typically map 4K QPs worth of AUXC.
3016  * So, the minimum chunk for the various QPC related ICM memory should
3017  * all be allocated to support the 4K QPs.  Currently, this means the
3018  * amount of memory for the various QP chunks is:
3019  *
3020  *	QPC	256*4K bytes
3021  *	RDB	128*4K bytes
3022  *	CMPT	 64*4K bytes
3023  *	ALTC	 64*4K bytes
3024  *	AUXC	  1*4K bytes
3025  *
3026  * The span_size chosen for the QP resource is 4KB of AUXC entries,
3027  * or 1 HERMON_PAGESIZE worth, which is the minimum ICM mapping size.
3028  *
3029  * Other ICM resources can have their span_size be more arbitrary.
3030  * This is 4K (HERMON_ICM_SPAN), except for MTTs because they are tiny.
3031  */
3032 
3033 /* macro to make the code below cleaner */
3034 #define	init_dependent(rsrc, dep)				\
3035 	icm[dep].span		= icm[rsrc].span;		\
3036 	icm[dep].num_spans	= icm[rsrc].num_spans;		\
3037 	icm[dep].split_shift	= icm[rsrc].split_shift;	\
3038 	icm[dep].span_mask	= icm[rsrc].span_mask;		\
3039 	icm[dep].span_shift	= icm[rsrc].span_shift;		\
3040 	icm[dep].rsrc_mask	= icm[rsrc].rsrc_mask;		\
3041 	if (hermon_verbose) {					\
3042 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3043 		    "rsrc (0x%x) size (0x%lx) span (0x%x) "	\
3044 		    "num_spans (0x%x)", dep, icm[dep].table_size, \
3045 		    icm[dep].span, icm[dep].num_spans);		\
3046 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3047 		    "span_shift (0x%x) split_shift (0x%x)",	\
3048 		    icm[dep].span_shift, icm[dep].split_shift);	\
3049 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3050 		    "span_mask (0x%x)  rsrc_mask   (0x%x)",	\
3051 		    icm[dep].span_mask, icm[dep].rsrc_mask);	\
3052 	}
3053 
3054 static void
3055 hermon_icm_tables_init(hermon_state_t *state)
3056 {
3057 	hermon_icm_table_t	*icm;
3058 	int			i, k;
3059 	uint32_t		per_split;
3060 
3061 
3062 	icm = state->hs_icm;
3063 
3064 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3065 		icm[i].icm_type		= i;
3066 		icm[i].num_entries	= 1 << icm[i].log_num_entries;
3067 		icm[i].log_object_size	= highbit(icm[i].object_size) - 1;
3068 		icm[i].table_size	= icm[i].num_entries <<
3069 		    icm[i].log_object_size;
3070 
3071 		/* deal with "dependent" resource types */
3072 		switch (i) {
3073 		case HERMON_AUXC:
3074 #ifdef HERMON_FW_WORKAROUND
3075 			icm[i].table_size = 0x80000000ull;
3076 			/* FALLTHROUGH */
3077 #endif
3078 		case HERMON_CMPT_QPC:
3079 		case HERMON_RDB:
3080 		case HERMON_ALTC:
3081 			init_dependent(HERMON_QPC, i);
3082 			continue;
3083 		case HERMON_CMPT_SRQC:
3084 			init_dependent(HERMON_SRQC, i);
3085 			continue;
3086 		case HERMON_CMPT_CQC:
3087 			init_dependent(HERMON_CQC, i);
3088 			continue;
3089 		case HERMON_CMPT_EQC:
3090 			init_dependent(HERMON_EQC, i);
3091 			continue;
3092 		}
3093 
3094 		icm[i].span = HERMON_ICM_SPAN;	/* default #rsrc's in 1 span */
3095 		if (i == HERMON_MTT) /* Alloc enough MTTs to map 256MB */
3096 			icm[i].span = HERMON_ICM_SPAN * 16;
3097 		icm[i].num_spans = icm[i].num_entries / icm[i].span;
3098 		if (icm[i].num_spans == 0) {
3099 			icm[i].span = icm[i].num_entries;
3100 			per_split = 1;
3101 			icm[i].num_spans = icm[i].num_entries / icm[i].span;
3102 		} else {
3103 			per_split = icm[i].num_spans / HERMON_ICM_SPLIT;
3104 			if (per_split == 0) {
3105 				per_split = 1;
3106 			}
3107 		}
3108 		if (hermon_verbose)
3109 			IBTF_DPRINTF_L2("ICM", "rsrc %x  span %x  num_spans %x",
3110 			    i, icm[i].span, icm[i].num_spans);
3111 
3112 		/*
3113 		 * Ensure a minimum table size of an ICM page, and a
3114 		 * maximum span size of the ICM table size.  This ensures
3115 		 * that we don't have less than an ICM page to map, which is
3116 		 * impossible, and that we will map an entire table at
3117 		 * once if it's total size is less than the span size.
3118 		 */
3119 		icm[i].table_size = max(icm[i].table_size, HERMON_PAGESIZE);
3120 
3121 		icm[i].span_shift = 0;
3122 		for (k = icm[i].span; k != 1; k >>= 1)
3123 			icm[i].span_shift++;
3124 		icm[i].split_shift = icm[i].span_shift;
3125 		for (k = per_split; k != 1; k >>= 1)
3126 			icm[i].split_shift++;
3127 		icm[i].span_mask = (1 << icm[i].split_shift) -
3128 		    (1 << icm[i].span_shift);
3129 		icm[i].rsrc_mask = (1 << icm[i].span_shift) - 1;
3130 
3131 
3132 		/* Initialize the table lock */
3133 		mutex_init(&icm[i].icm_table_lock, NULL, MUTEX_DRIVER,
3134 		    DDI_INTR_PRI(state->hs_intrmsi_pri));
3135 		cv_init(&icm[i].icm_table_cv, NULL, CV_DRIVER, NULL);
3136 
3137 		if (hermon_verbose) {
3138 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3139 			    "rsrc (0x%x) size (0x%lx)", i, icm[i].table_size);
3140 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3141 			    "span (0x%x) num_spans (0x%x)",
3142 			    icm[i].span, icm[i].num_spans);
3143 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3144 			    "span_shift (0x%x) split_shift (0x%x)",
3145 			    icm[i].span_shift, icm[i].split_shift);
3146 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3147 			    "span_mask (0x%x)  rsrc_mask   (0x%x)",
3148 			    icm[i].span_mask, icm[i].rsrc_mask);
3149 		}
3150 	}
3151 
3152 }
3153 
3154 /*
3155  * hermon_icm_tables_fini()
3156  *    Context: Only called from attach() path context
3157  *
3158  * Clean up all icm_tables.  Free the bitmap and dma_info arrays.
3159  */
3160 static void
3161 hermon_icm_tables_fini(hermon_state_t *state)
3162 {
3163 	hermon_icm_table_t	*icm;
3164 	int			nspans;
3165 	int			i, j;
3166 
3167 
3168 	icm = state->hs_icm;
3169 
3170 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3171 
3172 		mutex_enter(&icm[i].icm_table_lock);
3173 		nspans = icm[i].num_spans;
3174 
3175 		for (j = 0; j < HERMON_ICM_SPLIT; j++) {
3176 			if (icm[i].icm_dma[j])
3177 				/* Free the ICM DMA slots */
3178 				kmem_free(icm[i].icm_dma[j],
3179 				    nspans * sizeof (hermon_dma_info_t));
3180 
3181 			if (icm[i].icm_bitmap[j])
3182 				/* Free the table bitmap */
3183 				kmem_free(icm[i].icm_bitmap[j],
3184 				    (nspans + 7) / 8);
3185 		}
3186 		/* Destroy the table lock */
3187 		cv_destroy(&icm[i].icm_table_cv);
3188 		mutex_exit(&icm[i].icm_table_lock);
3189 		mutex_destroy(&icm[i].icm_table_lock);
3190 	}
3191 
3192 }
3193 
3194 /*
3195  * hermon_icm_dma_init()
3196  *    Context: Only called from attach() path context
3197  */
3198 static int
3199 hermon_icm_dma_init(hermon_state_t *state)
3200 {
3201 	hermon_icm_table_t	*icm;
3202 	hermon_rsrc_type_t	type;
3203 	int			status;
3204 
3205 
3206 	/*
3207 	 * This routine will allocate initial ICM DMA resources for ICM
3208 	 * tables that have reserved ICM objects. This is the only routine
3209 	 * where we should have to allocate ICM outside of hermon_rsrc_alloc().
3210 	 * We need to allocate ICM here explicitly, rather than in
3211 	 * hermon_rsrc_alloc(), because we've not yet completed the resource
3212 	 * pool initialization. When the resource pools are initialized
3213 	 * (in hermon_rsrc_init_phase2(), see hermon_rsrc.c for more
3214 	 * information), resource preallocations will be invoked to match
3215 	 * the ICM allocations seen here. We will then be able to use the
3216 	 * normal allocation path.  Note we don't need to set a refcnt on
3217 	 * these initial allocations because that will be done in the calls
3218 	 * to hermon_rsrc_alloc() from hermon_hw_entries_init() for the
3219 	 * "prealloc" objects (see hermon_rsrc.c for more information).
3220 	 */
3221 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3222 
3223 		/* ICM for these is allocated within hermon_icm_alloc() */
3224 		switch (type) {
3225 		case HERMON_CMPT:
3226 		case HERMON_CMPT_QPC:
3227 		case HERMON_CMPT_SRQC:
3228 		case HERMON_CMPT_CQC:
3229 		case HERMON_CMPT_EQC:
3230 		case HERMON_AUXC:
3231 		case HERMON_ALTC:
3232 		case HERMON_RDB:
3233 			continue;
3234 		}
3235 
3236 		icm = &state->hs_icm[type];
3237 
3238 		mutex_enter(&icm->icm_table_lock);
3239 		status = hermon_icm_alloc(state, type, 0, 0);
3240 		mutex_exit(&icm->icm_table_lock);
3241 		if (status != DDI_SUCCESS) {
3242 			while (type--) {
3243 				icm = &state->hs_icm[type];
3244 				mutex_enter(&icm->icm_table_lock);
3245 				hermon_icm_free(state, type, 0, 0);
3246 				mutex_exit(&icm->icm_table_lock);
3247 			}
3248 			return (DDI_FAILURE);
3249 		}
3250 
3251 		if (hermon_verbose) {
3252 			IBTF_DPRINTF_L2("hermon", "hermon_icm_dma_init: "
3253 			    "table (0x%x) index (0x%x) allocated", type, 0);
3254 		}
3255 	}
3256 
3257 	return (DDI_SUCCESS);
3258 }
3259 
3260 /*
3261  * hermon_icm_dma_fini()
3262  *    Context: Only called from attach() path context
3263  *
3264  * ICM has been completely unmapped.  We just free the memory here.
3265  */
3266 static void
3267 hermon_icm_dma_fini(hermon_state_t *state)
3268 {
3269 	hermon_icm_table_t	*icm;
3270 	hermon_dma_info_t	*dma_info;
3271 	hermon_rsrc_type_t	type;
3272 	int			index1, index2;
3273 
3274 
3275 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3276 		icm = &state->hs_icm[type];
3277 		for (index1 = 0; index1 < HERMON_ICM_SPLIT; index1++) {
3278 			dma_info = icm->icm_dma[index1];
3279 			if (dma_info == NULL)
3280 				continue;
3281 			for (index2 = 0; index2 < icm->num_spans; index2++) {
3282 				if (dma_info[index2].dma_hdl)
3283 					hermon_dma_free(&dma_info[index2]);
3284 				dma_info[index2].dma_hdl = NULL;
3285 			}
3286 		}
3287 	}
3288 
3289 }
3290 
3291 /*
3292  * hermon_hca_port_init()
3293  *    Context: Only called from attach() path context
3294  */
3295 static int
3296 hermon_hca_port_init(hermon_state_t *state)
3297 {
3298 	hermon_hw_set_port_t	*portinits, *initport;
3299 	hermon_cfg_profile_t	*cfgprof;
3300 	uint_t			num_ports;
3301 	int			i = 0, status;
3302 	uint64_t		maxval, val;
3303 	uint64_t		sysimgguid, nodeguid, portguid;
3304 
3305 
3306 	cfgprof = state->hs_cfg_profile;
3307 
3308 	/* Get number of HCA ports */
3309 	num_ports = cfgprof->cp_num_ports;
3310 
3311 	/* Allocate space for Hermon set port  struct(s) */
3312 	portinits = (hermon_hw_set_port_t *)kmem_zalloc(num_ports *
3313 	    sizeof (hermon_hw_set_port_t), KM_SLEEP);
3314 
3315 
3316 
3317 	/* Post commands to initialize each Hermon HCA port */
3318 	/*
3319 	 * In Hermon, the process is different than in previous HCAs.
3320 	 * Here, you have to:
3321 	 *	QUERY_PORT - to get basic information from the HCA
3322 	 *	set the fields accordingly
3323 	 *	SET_PORT - to change/set everything as desired
3324 	 *	INIT_PORT - to bring the port up
3325 	 *
3326 	 * Needs to be done for each port in turn
3327 	 */
3328 
3329 	for (i = 0; i < num_ports; i++) {
3330 		bzero(&state->hs_queryport, sizeof (hermon_hw_query_port_t));
3331 		status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0,
3332 		    (i + 1), &state->hs_queryport,
3333 		    sizeof (hermon_hw_query_port_t), HERMON_CMD_NOSLEEP_SPIN);
3334 		if (status != HERMON_CMD_SUCCESS) {
3335 			cmn_err(CE_CONT, "Hermon: QUERY_PORT (port %02d) "
3336 			    "command failed: %08x\n", i + 1, status);
3337 			goto init_ports_fail;
3338 		}
3339 		initport = &portinits[i];
3340 		state->hs_initport = &portinits[i];
3341 
3342 		bzero(initport, sizeof (hermon_hw_query_port_t));
3343 
3344 		/*
3345 		 * Determine whether we need to override the firmware's
3346 		 * default SystemImageGUID setting.
3347 		 */
3348 		sysimgguid = cfgprof->cp_sysimgguid;
3349 		if (sysimgguid != 0) {
3350 			initport->sig		= 1;
3351 			initport->sys_img_guid	= sysimgguid;
3352 		}
3353 
3354 		/*
3355 		 * Determine whether we need to override the firmware's
3356 		 * default NodeGUID setting.
3357 		 */
3358 		nodeguid = cfgprof->cp_nodeguid;
3359 		if (nodeguid != 0) {
3360 			initport->ng		= 1;
3361 			initport->node_guid	= nodeguid;
3362 		}
3363 
3364 		/*
3365 		 * Determine whether we need to override the firmware's
3366 		 * default PortGUID setting.
3367 		 */
3368 		portguid = cfgprof->cp_portguid[i];
3369 		if (portguid != 0) {
3370 			initport->g0		= 1;
3371 			initport->guid0		= portguid;
3372 		}
3373 
3374 		/* Validate max MTU size */
3375 		maxval  = state->hs_queryport.ib_mtu;
3376 		val	= cfgprof->cp_max_mtu;
3377 		if (val > maxval) {
3378 			goto init_ports_fail;
3379 		}
3380 
3381 		/* Set mtu_cap to 4096 bytes */
3382 		initport->mmc = 1;	/* set the change bit */
3383 		initport->mtu_cap = 5;	/* for 4096 bytes */
3384 
3385 		/* Validate the max port width */
3386 		maxval  = state->hs_queryport.ib_port_wid;
3387 		val	= cfgprof->cp_max_port_width;
3388 		if (val > maxval) {
3389 			goto init_ports_fail;
3390 		}
3391 
3392 		/* Validate max VL cap size */
3393 		maxval  = state->hs_queryport.max_vl;
3394 		val	= cfgprof->cp_max_vlcap;
3395 		if (val > maxval) {
3396 			goto init_ports_fail;
3397 		}
3398 
3399 		/* Since we're doing mtu_cap, cut vl_cap down */
3400 		initport->mvc = 1;	/* set this change bit */
3401 		initport->vl_cap = 3;	/* 3 means vl0-vl3, 4 total */
3402 
3403 		/* Validate max GID table size */
3404 		maxval  = ((uint64_t)1 << state->hs_queryport.log_max_gid);
3405 		val	= ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
3406 		if (val > maxval) {
3407 			goto init_ports_fail;
3408 		}
3409 		initport->max_guid = (uint16_t)val;
3410 		initport->mg = 1;
3411 
3412 		/* Validate max PKey table size */
3413 		maxval	= ((uint64_t)1 << state->hs_queryport.log_max_pkey);
3414 		val	= ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
3415 		if (val > maxval) {
3416 			goto init_ports_fail;
3417 		}
3418 		initport->max_pkey = (uint16_t)val;
3419 		initport->mp = 1;
3420 		/*
3421 		 * Post the SET_PORT cmd to Hermon firmware. This sets
3422 		 * the parameters of the port.
3423 		 */
3424 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3425 		    HERMON_CMD_NOSLEEP_SPIN);
3426 		if (status != HERMON_CMD_SUCCESS) {
3427 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3428 			    "failed: %08x\n", i + 1, status);
3429 			goto init_ports_fail;
3430 		}
3431 		/* issue another SET_PORT cmd - performance fix/workaround */
3432 		/* XXX - need to discuss with Mellanox */
3433 		bzero(initport, sizeof (hermon_hw_query_port_t));
3434 		initport->cap_mask = 0x02500868;
3435 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3436 		    HERMON_CMD_NOSLEEP_SPIN);
3437 		if (status != HERMON_CMD_SUCCESS) {
3438 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3439 			    "failed: %08x\n", i + 1, status);
3440 			goto init_ports_fail;
3441 		}
3442 	}
3443 
3444 	/*
3445 	 * Finally, do the INIT_PORT for each port in turn
3446 	 * When this command completes, the corresponding Hermon port
3447 	 * will be physically "Up" and initialized.
3448 	 */
3449 	for (i = 0; i < num_ports; i++) {
3450 		status = hermon_init_port_cmd_post(state, i + 1,
3451 		    HERMON_CMD_NOSLEEP_SPIN);
3452 		if (status != HERMON_CMD_SUCCESS) {
3453 			cmn_err(CE_CONT, "Hermon: INIT_PORT (port %02d) "
3454 			    "comman failed: %08x\n", i + 1, status);
3455 			goto init_ports_fail;
3456 		}
3457 	}
3458 
3459 	/* Free up the memory for Hermon port init struct(s), return success */
3460 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3461 	return (DDI_SUCCESS);
3462 
3463 init_ports_fail:
3464 	/*
3465 	 * Free up the memory for Hermon port init struct(s), shutdown any
3466 	 * successfully initialized ports, and return failure
3467 	 */
3468 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3469 	(void) hermon_hca_ports_shutdown(state, i);
3470 
3471 	return (DDI_FAILURE);
3472 }
3473 
3474 
3475 /*
3476  * hermon_hca_ports_shutdown()
3477  *    Context: Only called from attach() and/or detach() path contexts
3478  */
3479 static int
3480 hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init)
3481 {
3482 	int	i, status;
3483 
3484 	/*
3485 	 * Post commands to shutdown all init'd Hermon HCA ports.  Note: if
3486 	 * any of these commands fail for any reason, it would be entirely
3487 	 * unexpected and probably indicative a serious problem (HW or SW).
3488 	 * Although we do return void from this function, this type of failure
3489 	 * should not go unreported.  That is why we have the warning message.
3490 	 */
3491 	for (i = 0; i < num_init; i++) {
3492 		status = hermon_close_port_cmd_post(state, i + 1,
3493 		    HERMON_CMD_NOSLEEP_SPIN);
3494 		if (status != HERMON_CMD_SUCCESS) {
3495 			HERMON_WARNING(state, "failed to shutdown HCA port");
3496 			return (status);
3497 		}
3498 	}
3499 	return (HERMON_CMD_SUCCESS);
3500 }
3501 
3502 
3503 /*
3504  * hermon_internal_uarpg_init
3505  *    Context: Only called from attach() path context
3506  */
3507 static int
3508 hermon_internal_uarpg_init(hermon_state_t *state)
3509 {
3510 	int	status;
3511 	hermon_dbr_info_t 	*info;
3512 
3513 	/*
3514 	 * Allocate the UAR page for kernel use. This UAR page is
3515 	 * the privileged UAR page through which all kernel generated
3516 	 * doorbells will be rung. There are a number of UAR pages
3517 	 * reserved by hardware at the front of the UAR BAR, indicated
3518 	 * by DEVCAP.num_rsvd_uar, which we have already allocated. So,
3519 	 * the kernel page, or UAR page index num_rsvd_uar, will be
3520 	 * allocated here for kernel use.
3521 	 */
3522 
3523 	status = hermon_rsrc_alloc(state, HERMON_UARPG, 1, HERMON_SLEEP,
3524 	    &state->hs_uarkpg_rsrc);
3525 	if (status != DDI_SUCCESS) {
3526 		return (DDI_FAILURE);
3527 	}
3528 
3529 	/* Setup pointer to kernel UAR page */
3530 	state->hs_uar = (hermon_hw_uar_t *)state->hs_uarkpg_rsrc->hr_addr;
3531 
3532 	/* need to set up DBr tracking as well */
3533 	status = hermon_dbr_page_alloc(state, &info);
3534 	if (status != DDI_SUCCESS) {
3535 		return (DDI_FAILURE);
3536 	}
3537 	state->hs_kern_dbr = info;
3538 	return (DDI_SUCCESS);
3539 }
3540 
3541 
3542 /*
3543  * hermon_internal_uarpg_fini
3544  *    Context: Only called from attach() and/or detach() path contexts
3545  */
3546 static void
3547 hermon_internal_uarpg_fini(hermon_state_t *state)
3548 {
3549 	/* Free up Hermon UAR page #1 (kernel driver doorbells) */
3550 	hermon_rsrc_free(state, &state->hs_uarkpg_rsrc);
3551 }
3552 
3553 
3554 /*
3555  * hermon_special_qp_contexts_reserve()
3556  *    Context: Only called from attach() path context
3557  */
3558 static int
3559 hermon_special_qp_contexts_reserve(hermon_state_t *state)
3560 {
3561 	hermon_rsrc_t	*qp0_rsrc, *qp1_rsrc, *qp_resvd;
3562 	int		status;
3563 
3564 	/* Initialize the lock used for special QP rsrc management */
3565 	mutex_init(&state->hs_spec_qplock, NULL, MUTEX_DRIVER,
3566 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3567 
3568 	/*
3569 	 * Reserve contexts for QP0.  These QP contexts will be setup to
3570 	 * act as aliases for the real QP0.  Note: We are required to grab
3571 	 * two QPs (one per port) even if we are operating in single-port
3572 	 * mode.
3573 	 */
3574 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3575 	    HERMON_SLEEP, &qp0_rsrc);
3576 	if (status != DDI_SUCCESS) {
3577 		mutex_destroy(&state->hs_spec_qplock);
3578 		return (DDI_FAILURE);
3579 	}
3580 	state->hs_spec_qp0 = qp0_rsrc;
3581 
3582 	/*
3583 	 * Reserve contexts for QP1.  These QP contexts will be setup to
3584 	 * act as aliases for the real QP1.  Note: We are required to grab
3585 	 * two QPs (one per port) even if we are operating in single-port
3586 	 * mode.
3587 	 */
3588 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3589 	    HERMON_SLEEP, &qp1_rsrc);
3590 	if (status != DDI_SUCCESS) {
3591 		hermon_rsrc_free(state, &qp0_rsrc);
3592 		mutex_destroy(&state->hs_spec_qplock);
3593 		return (DDI_FAILURE);
3594 	}
3595 	state->hs_spec_qp1 = qp1_rsrc;
3596 
3597 	status = hermon_rsrc_alloc(state, HERMON_QPC, 4,
3598 	    HERMON_SLEEP, &qp_resvd);
3599 	if (status != DDI_SUCCESS) {
3600 		hermon_rsrc_free(state, &qp1_rsrc);
3601 		hermon_rsrc_free(state, &qp0_rsrc);
3602 		mutex_destroy(&state->hs_spec_qplock);
3603 		return (DDI_FAILURE);
3604 	}
3605 	state->hs_spec_qp_unused = qp_resvd;
3606 
3607 	return (DDI_SUCCESS);
3608 }
3609 
3610 
3611 /*
3612  * hermon_special_qp_contexts_unreserve()
3613  *    Context: Only called from attach() and/or detach() path contexts
3614  */
3615 static void
3616 hermon_special_qp_contexts_unreserve(hermon_state_t *state)
3617 {
3618 
3619 	/* Unreserve contexts for spec_qp_unused */
3620 	hermon_rsrc_free(state, &state->hs_spec_qp_unused);
3621 
3622 	/* Unreserve contexts for QP1 */
3623 	hermon_rsrc_free(state, &state->hs_spec_qp1);
3624 
3625 	/* Unreserve contexts for QP0 */
3626 	hermon_rsrc_free(state, &state->hs_spec_qp0);
3627 
3628 	/* Destroy the lock used for special QP rsrc management */
3629 	mutex_destroy(&state->hs_spec_qplock);
3630 
3631 }
3632 
3633 
3634 /*
3635  * hermon_sw_reset()
3636  *    Context: Currently called only from attach() path context
3637  */
3638 static int
3639 hermon_sw_reset(hermon_state_t *state)
3640 {
3641 	ddi_acc_handle_t	hdl = hermon_get_pcihdl(state);
3642 	ddi_acc_handle_t	cmdhdl = hermon_get_cmdhdl(state);
3643 	uint32_t		reset_delay;
3644 	int			status, i;
3645 	uint32_t		sem;
3646 	uint_t			offset;
3647 	uint32_t		data32;		/* for devctl & linkctl */
3648 	int			loopcnt;
3649 
3650 	/* initialize the FMA retry loop */
3651 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
3652 	hermon_pio_init(fm_loop_cnt2, fm_status2, fm_test2);
3653 
3654 	/*
3655 	 * If the configured software reset delay is set to zero, then we
3656 	 * will not attempt a software reset of the Hermon device.
3657 	 */
3658 	reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
3659 	if (reset_delay == 0) {
3660 		return (DDI_SUCCESS);
3661 	}
3662 
3663 	/* the FMA retry loop starts. */
3664 	hermon_pio_start(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3665 	    fm_test);
3666 	hermon_pio_start(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3667 	    fm_test2);
3668 
3669 	/* Query the PCI capabilities of the HCA device */
3670 	/* but don't process the VPD until after reset */
3671 	status = hermon_pci_capability_list(state, hdl);
3672 	if (status != DDI_SUCCESS) {
3673 		cmn_err(CE_NOTE, "failed to get pci capabilities list(0x%x)\n",
3674 		    status);
3675 		return (DDI_FAILURE);
3676 	}
3677 
3678 	/*
3679 	 * Read all PCI config info (reg0...reg63).  Note: According to the
3680 	 * Hermon software reset application note, we should not read or
3681 	 * restore the values in reg22 and reg23.
3682 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
3683 	 * register LAST, and technically, you need to restore the
3684 	 * PCIE Capability "device control" and "link control" (word-sized,
3685 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
3686 	 * We hold off restoring the command register - offset 0x4 - till last
3687 	 */
3688 
3689 	/* 1st, wait for the semaphore assure accessibility - per PRM */
3690 	status = -1;
3691 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
3692 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
3693 		if (sem == 0) {
3694 			status = 0;
3695 			break;
3696 		}
3697 		drv_usecwait(1);
3698 	}
3699 
3700 	/* Check if timeout happens */
3701 	if (status == -1) {
3702 		/*
3703 		 * Remove this acc handle from Hermon, then log
3704 		 * the error.
3705 		 */
3706 		hermon_pci_config_teardown(state, &hdl);
3707 
3708 		cmn_err(CE_WARN, "hermon_sw_reset timeout: "
3709 		    "failed to get the semaphore(0x%p)\n",
3710 		    (void *)state->hs_cmd_regs.sw_semaphore);
3711 
3712 		hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_NON_FATAL);
3713 		return (DDI_FAILURE);
3714 	}
3715 
3716 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3717 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3718 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3719 			state->hs_cfg_data[i]  = pci_config_get32(hdl, i << 2);
3720 		}
3721 	}
3722 
3723 	/*
3724 	 * Perform the software reset (by writing 1 at offset 0xF0010)
3725 	 */
3726 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
3727 
3728 	/*
3729 	 * This delay is required so as not to cause a panic here. If the
3730 	 * device is accessed too soon after reset it will not respond to
3731 	 * config cycles, causing a Master Abort and panic.
3732 	 */
3733 	drv_usecwait(reset_delay);
3734 
3735 	/*
3736 	 * Poll waiting for the device to finish resetting.
3737 	 */
3738 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
3739 	while ((pci_config_get32(hdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
3740 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
3741 		if (--loopcnt == 0)
3742 			break;	/* just in case, break and go on */
3743 	}
3744 	if (loopcnt == 0)
3745 		cmn_err(CE_CONT, "!Never see VEND_ID - read == %X",
3746 		    pci_config_get32(hdl, 0));
3747 
3748 	/*
3749 	 * Restore the config info
3750 	 */
3751 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3752 		if (i == 1) continue;	/* skip the status/ctrl reg */
3753 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3754 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3755 			pci_config_put32(hdl, i << 2, state->hs_cfg_data[i]);
3756 		}
3757 	}
3758 
3759 	/*
3760 	 * PCI Express Capability - we saved during capability list, and
3761 	 * we'll restore them here.
3762 	 */
3763 	offset = state->hs_pci_cap_offset;
3764 	data32 = state->hs_pci_cap_devctl;
3765 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
3766 	data32 = state->hs_pci_cap_lnkctl;
3767 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
3768 
3769 	pci_config_put32(hdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
3770 
3771 	/* the FMA retry loop ends. */
3772 	hermon_pio_end(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3773 	    fm_test2);
3774 	hermon_pio_end(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3775 	    fm_test);
3776 
3777 	return (DDI_SUCCESS);
3778 
3779 pio_error2:
3780 	/* fall through */
3781 pio_error:
3782 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_NON_FATAL);
3783 	return (DDI_FAILURE);
3784 }
3785 
3786 
3787 /*
3788  * hermon_mcg_init()
3789  *    Context: Only called from attach() path context
3790  */
3791 static int
3792 hermon_mcg_init(hermon_state_t *state)
3793 {
3794 	uint_t		mcg_tmp_sz;
3795 
3796 
3797 	/*
3798 	 * Allocate space for the MCG temporary copy buffer.  This is
3799 	 * used by the Attach/Detach Multicast Group code
3800 	 */
3801 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3802 	state->hs_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);
3803 
3804 	/*
3805 	 * Initialize the multicast group mutex.  This ensures atomic
3806 	 * access to add, modify, and remove entries in the multicast
3807 	 * group hash lists.
3808 	 */
3809 	mutex_init(&state->hs_mcglock, NULL, MUTEX_DRIVER,
3810 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3811 
3812 	return (DDI_SUCCESS);
3813 }
3814 
3815 
3816 /*
3817  * hermon_mcg_fini()
3818  *    Context: Only called from attach() and/or detach() path contexts
3819  */
3820 static void
3821 hermon_mcg_fini(hermon_state_t *state)
3822 {
3823 	uint_t		mcg_tmp_sz;
3824 
3825 
3826 	/* Free up the space used for the MCG temporary copy buffer */
3827 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3828 	kmem_free(state->hs_mcgtmp, mcg_tmp_sz);
3829 
3830 	/* Destroy the multicast group mutex */
3831 	mutex_destroy(&state->hs_mcglock);
3832 
3833 }
3834 
3835 
3836 /*
3837  * hermon_fw_version_check()
3838  *    Context: Only called from attach() path context
3839  */
3840 static int
3841 hermon_fw_version_check(hermon_state_t *state)
3842 {
3843 
3844 	uint_t	hermon_fw_ver_major;
3845 	uint_t	hermon_fw_ver_minor;
3846 	uint_t	hermon_fw_ver_subminor;
3847 
3848 #ifdef FMA_TEST
3849 	if (hermon_test_num == -1) {
3850 		return (DDI_FAILURE);
3851 	}
3852 #endif
3853 
3854 	/*
3855 	 * Depending on which version of driver we have attached, and which
3856 	 * HCA we've attached, the firmware version checks will be different.
3857 	 * We set up the comparison values for both Arbel and Sinai HCAs.
3858 	 */
3859 	switch (state->hs_operational_mode) {
3860 	case HERMON_HCA_MODE:
3861 		hermon_fw_ver_major = HERMON_FW_VER_MAJOR;
3862 		hermon_fw_ver_minor = HERMON_FW_VER_MINOR;
3863 		hermon_fw_ver_subminor = HERMON_FW_VER_SUBMINOR;
3864 		break;
3865 
3866 	default:
3867 		return (DDI_FAILURE);
3868 	}
3869 
3870 	/*
3871 	 * If FW revision major number is less than acceptable,
3872 	 * return failure, else if greater return success.  If
3873 	 * the major numbers are equal than check the minor number
3874 	 */
3875 	if (state->hs_fw.fw_rev_major < hermon_fw_ver_major) {
3876 		return (DDI_FAILURE);
3877 	} else if (state->hs_fw.fw_rev_major > hermon_fw_ver_major) {
3878 		return (DDI_SUCCESS);
3879 	}
3880 
3881 	/*
3882 	 * Do the same check as above, except for minor revision numbers
3883 	 * If the minor numbers are equal than check the subminor number
3884 	 */
3885 	if (state->hs_fw.fw_rev_minor < hermon_fw_ver_minor) {
3886 		return (DDI_FAILURE);
3887 	} else if (state->hs_fw.fw_rev_minor > hermon_fw_ver_minor) {
3888 		return (DDI_SUCCESS);
3889 	}
3890 
3891 	/*
3892 	 * Once again we do the same check as above, except for the subminor
3893 	 * revision number.  If the subminor numbers are equal here, then
3894 	 * these are the same firmware version, return success
3895 	 */
3896 	if (state->hs_fw.fw_rev_subminor < hermon_fw_ver_subminor) {
3897 		return (DDI_FAILURE);
3898 	} else if (state->hs_fw.fw_rev_subminor > hermon_fw_ver_subminor) {
3899 		return (DDI_SUCCESS);
3900 	}
3901 
3902 	return (DDI_SUCCESS);
3903 }
3904 
3905 
3906 /*
3907  * hermon_device_info_report()
3908  *    Context: Only called from attach() path context
3909  */
3910 static void
3911 hermon_device_info_report(hermon_state_t *state)
3912 {
3913 
3914 	cmn_err(CE_CONT, "?hermon%d: FW ver: %04d.%04d.%04d, "
3915 	    "HW rev: %02d\n", state->hs_instance, state->hs_fw.fw_rev_major,
3916 	    state->hs_fw.fw_rev_minor, state->hs_fw.fw_rev_subminor,
3917 	    state->hs_revision_id);
3918 	cmn_err(CE_CONT, "?hermon%d: %64s (0x%016" PRIx64 ")\n",
3919 	    state->hs_instance, state->hs_nodedesc, state->hs_nodeguid);
3920 
3921 }
3922 
3923 
3924 /*
3925  * hermon_pci_capability_list()
3926  *    Context: Only called from attach() path context
3927  */
3928 static int
3929 hermon_pci_capability_list(hermon_state_t *state, ddi_acc_handle_t hdl)
3930 {
3931 	uint_t		offset, data;
3932 	uint32_t	data32;
3933 
3934 	state->hs_pci_cap_offset = 0;		/* make sure it's cleared */
3935 
3936 	/*
3937 	 * Check for the "PCI Capabilities" bit in the "Status Register".
3938 	 * Bit 4 in this register indicates the presence of a "PCI
3939 	 * Capabilities" list.
3940 	 *
3941 	 * PCI-Express requires this bit to be set to 1.
3942 	 */
3943 	data = pci_config_get16(hdl, 0x06);
3944 	if ((data & 0x10) == 0) {
3945 		return (DDI_FAILURE);
3946 	}
3947 
3948 	/*
3949 	 * Starting from offset 0x34 in PCI config space, find the
3950 	 * head of "PCI capabilities" list, and walk the list.  If
3951 	 * capabilities of a known type are encountered (e.g.
3952 	 * "PCI-X Capability"), then call the appropriate handler
3953 	 * function.
3954 	 */
3955 	offset = pci_config_get8(hdl, 0x34);
3956 	while (offset != 0x0) {
3957 		data = pci_config_get8(hdl, offset);
3958 		/*
3959 		 * Check for known capability types.  Hermon has the
3960 		 * following:
3961 		 *    o Power Mgmt	 (0x02)
3962 		 *    o VPD Capability   (0x03)
3963 		 *    o PCI-E Capability (0x10)
3964 		 *    o MSIX Capability  (0x11)
3965 		 */
3966 		switch (data) {
3967 		case 0x01:
3968 			/* power mgmt handling */
3969 			break;
3970 		case 0x03:
3971 
3972 /*
3973  * Reading the PCIe VPD is inconsistent - that is, sometimes causes
3974  * problems on (mostly) X64, though we've also seen problems w/ Sparc
3975  * and Tavor --- so, for now until it's root caused, don't try and
3976  * read it
3977  */
3978 #ifdef HERMON_VPD_WORKS
3979 			hermon_pci_capability_vpd(state, hdl, offset);
3980 #else
3981 			delay(100);
3982 			hermon_pci_capability_vpd(state, hdl, offset);
3983 #endif
3984 			break;
3985 		case 0x10:
3986 			/*
3987 			 * PCI Express Capability - save offset & contents
3988 			 * for later in reset
3989 			 */
3990 			state->hs_pci_cap_offset = offset;
3991 			data32 = pci_config_get32(hdl,
3992 			    offset + HERMON_PCI_CAP_DEV_OFFS);
3993 			state->hs_pci_cap_devctl = data32;
3994 			data32 = pci_config_get32(hdl,
3995 			    offset + HERMON_PCI_CAP_LNK_OFFS);
3996 			state->hs_pci_cap_lnkctl = data32;
3997 			break;
3998 		case 0x11:
3999 			/*
4000 			 * MSIX support - nothing to do, taken care of in the
4001 			 * MSI/MSIX interrupt frameworkd
4002 			 */
4003 			break;
4004 		default:
4005 			/* just go on to the next */
4006 			break;
4007 		}
4008 
4009 		/* Get offset of next entry in list */
4010 		offset = pci_config_get8(hdl, offset + 1);
4011 	}
4012 
4013 	return (DDI_SUCCESS);
4014 }
4015 
4016 /*
4017  * hermon_pci_read_vpd()
4018  *    Context: Only called from attach() path context
4019  *    utility routine for hermon_pci_capability_vpd()
4020  */
4021 static int
4022 hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
4023     uint32_t *data)
4024 {
4025 	int		retry = 40;  /* retry counter for EEPROM poll */
4026 	uint32_t	val;
4027 	int		vpd_addr = offset + 2;
4028 	int		vpd_data = offset + 4;
4029 
4030 	/*
4031 	 * In order to read a 32-bit value from VPD, we are to write down
4032 	 * the address (offset in the VPD itself) to the address register.
4033 	 * To signal the read, we also clear bit 31.  We then poll on bit 31
4034 	 * and when it is set, we can then read our 4 bytes from the data
4035 	 * register.
4036 	 */
4037 	(void) pci_config_put32(hdl, offset, addr << 16);
4038 	do {
4039 		drv_usecwait(1000);
4040 		val = pci_config_get16(hdl, vpd_addr);
4041 		if (val & 0x8000) {		/* flag bit set */
4042 			*data = pci_config_get32(hdl, vpd_data);
4043 			return (DDI_SUCCESS);
4044 		}
4045 	} while (--retry);
4046 	/* read of flag failed write one message but count the failures */
4047 	if (debug_vpd == 0)
4048 		cmn_err(CE_NOTE,
4049 		    "!Failed to see flag bit after VPD addr write\n");
4050 	debug_vpd++;
4051 
4052 
4053 vpd_read_fail:
4054 	return (DDI_FAILURE);
4055 }
4056 
4057 
4058 
4059 /*
4060  *   hermon_pci_capability_vpd()
4061  *    Context: Only called from attach() path context
4062  */
4063 static void
4064 hermon_pci_capability_vpd(hermon_state_t *state, ddi_acc_handle_t hdl,
4065     uint_t offset)
4066 {
4067 	uint8_t			name_length;
4068 	uint8_t			pn_length;
4069 	int			i, err = 0;
4070 	int			vpd_str_id = 0;
4071 	int			vpd_ro_desc;
4072 	int			vpd_ro_pn_desc;
4073 #ifdef _BIG_ENDIAN
4074 	uint32_t		data32;
4075 #endif /* _BIG_ENDIAN */
4076 	union {
4077 		uint32_t	vpd_int[HERMON_VPD_HDR_DWSIZE];
4078 		uchar_t		vpd_char[HERMON_VPD_HDR_BSIZE];
4079 	} vpd;
4080 
4081 
4082 	/*
4083 	 * Read in the Vital Product Data (VPD) to the extend needed
4084 	 * by the fwflash utility
4085 	 */
4086 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4087 		err = hermon_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
4088 		if (err != DDI_SUCCESS) {
4089 			cmn_err(CE_NOTE, "!VPD read failed\n");
4090 			goto out;
4091 		}
4092 	}
4093 
4094 #ifdef _BIG_ENDIAN
4095 	/* Need to swap bytes for big endian. */
4096 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4097 		data32 = vpd.vpd_int[i];
4098 		vpd.vpd_char[(i << 2) + 3] =
4099 		    (uchar_t)((data32 & 0xFF000000) >> 24);
4100 		vpd.vpd_char[(i << 2) + 2] =
4101 		    (uchar_t)((data32 & 0x00FF0000) >> 16);
4102 		vpd.vpd_char[(i << 2) + 1] =
4103 		    (uchar_t)((data32 & 0x0000FF00) >> 8);
4104 		vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
4105 	}
4106 #endif	/* _BIG_ENDIAN */
4107 
4108 	/* Check for VPD String ID Tag */
4109 	if (vpd.vpd_char[vpd_str_id] == 0x82) {
4110 		/* get the product name */
4111 		name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
4112 		if (name_length > sizeof (state->hs_hca_name)) {
4113 			cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
4114 			    name_length);
4115 			goto out;
4116 		}
4117 		(void) memcpy(state->hs_hca_name, &vpd.vpd_char[vpd_str_id + 3],
4118 		    name_length);
4119 		state->hs_hca_name[name_length] = 0;
4120 
4121 		/* get the part number */
4122 		vpd_ro_desc = name_length + 3; /* read-only tag location */
4123 		vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */
4124 
4125 		/* Verify read-only tag and Part Number keyword. */
4126 		if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
4127 		    (vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
4128 		    vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
4129 			cmn_err(CE_NOTE, "!VPD Part Number not found\n");
4130 			goto out;
4131 		}
4132 
4133 		pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
4134 		if (pn_length > sizeof (state->hs_hca_pn)) {
4135 			cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
4136 			    name_length);
4137 			goto out;
4138 		}
4139 		(void) memcpy(state->hs_hca_pn,
4140 		    &vpd.vpd_char[vpd_ro_pn_desc + 3],
4141 		    pn_length);
4142 		state->hs_hca_pn[pn_length] = 0;
4143 		state->hs_hca_pn_len = pn_length;
4144 		cmn_err(CE_CONT, "!vpd %s\n", state->hs_hca_pn);
4145 	} else {
4146 		/* Wrong VPD String ID Tag */
4147 		cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
4148 		    vpd.vpd_char[0]);
4149 		goto out;
4150 	}
4151 	return;
4152 out:
4153 	state->hs_hca_pn_len = 0;
4154 }
4155 
4156 
4157 
4158 /*
4159  * hermon_intr_or_msi_init()
4160  *    Context: Only called from attach() path context
4161  */
4162 static int
4163 hermon_intr_or_msi_init(hermon_state_t *state)
4164 {
4165 	int	status;
4166 
4167 
4168 	/* Query for the list of supported interrupt event types */
4169 	status = ddi_intr_get_supported_types(state->hs_dip,
4170 	    &state->hs_intr_types_avail);
4171 	if (status != DDI_SUCCESS) {
4172 		return (DDI_FAILURE);
4173 	}
4174 
4175 	/*
4176 	 * If Hermon supports MSI-X in this system (and, if it
4177 	 * hasn't been overridden by a configuration variable), then
4178 	 * the default behavior is to use a single MSI-X.  Otherwise,
4179 	 * fallback to using legacy interrupts.  Also, if MSI-X is chosen,
4180 	 * but fails for whatever reasons, then next try MSI
4181 	 */
4182 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4183 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4184 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSIX);
4185 		if (status == DDI_SUCCESS) {
4186 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSIX;
4187 			return (DDI_SUCCESS);
4188 		}
4189 	}
4190 
4191 	/*
4192 	 * If Hermon supports MSI in this system (and, if it
4193 	 * hasn't been overridden by a configuration variable), then
4194 	 * the default behavior is to use a single MSIX.  Otherwise,
4195 	 * fallback to using legacy interrupts.  Also, if MSI is chosen,
4196 	 * but fails for whatever reasons, then fallback to using legacy
4197 	 * interrupts.
4198 	 */
4199 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4200 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSI)) {
4201 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSI);
4202 		if (status == DDI_SUCCESS) {
4203 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSI;
4204 			return (DDI_SUCCESS);
4205 		}
4206 	}
4207 
4208 	/*
4209 	 * MSI interrupt allocation failed, or was not available.  Fallback to
4210 	 * legacy interrupt support.
4211 	 */
4212 	if (state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED) {
4213 		status = hermon_add_intrs(state, DDI_INTR_TYPE_FIXED);
4214 		if (status == DDI_SUCCESS) {
4215 			state->hs_intr_type_chosen = DDI_INTR_TYPE_FIXED;
4216 			return (DDI_SUCCESS);
4217 		}
4218 	}
4219 
4220 	/*
4221 	 * None of MSI, MSI-X, nor legacy interrupts were successful.
4222 	 * Return failure.
4223 	 */
4224 	return (DDI_FAILURE);
4225 }
4226 
4227 /*
4228  * hermon_add_intrs()
4229  *    Context: Only called from attach() patch context
4230  */
4231 static int
4232 hermon_add_intrs(hermon_state_t *state, int intr_type)
4233 {
4234 	int	status;
4235 
4236 
4237 	/* Get number of interrupts/MSI supported */
4238 	status = ddi_intr_get_nintrs(state->hs_dip, intr_type,
4239 	    &state->hs_intrmsi_count);
4240 	if (status != DDI_SUCCESS) {
4241 		return (DDI_FAILURE);
4242 	}
4243 
4244 	/* Get number of available interrupts/MSI */
4245 	status = ddi_intr_get_navail(state->hs_dip, intr_type,
4246 	    &state->hs_intrmsi_avail);
4247 	if (status != DDI_SUCCESS) {
4248 		return (DDI_FAILURE);
4249 	}
4250 
4251 	/* Ensure that we have at least one (1) usable MSI or interrupt */
4252 	if ((state->hs_intrmsi_avail < 1) || (state->hs_intrmsi_count < 1)) {
4253 		return (DDI_FAILURE);
4254 	}
4255 
4256 	/* Attempt to allocate the maximum #interrupt/MSI handles */
4257 	status = ddi_intr_alloc(state->hs_dip, &state->hs_intrmsi_hdl[0],
4258 	    intr_type, 0, min(HERMON_MSIX_MAX, state->hs_intrmsi_avail),
4259 	    &state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL);
4260 	if (status != DDI_SUCCESS) {
4261 		return (DDI_FAILURE);
4262 	}
4263 
4264 	/* Ensure that we have allocated at least one (1) MSI or interrupt */
4265 	if (state->hs_intrmsi_allocd < 1) {
4266 		return (DDI_FAILURE);
4267 	}
4268 	state->hs_eq_dist = state->hs_intrmsi_allocd - 1; /* start at 0 */
4269 
4270 	/*
4271 	 * Extract the priority for the allocated interrupt/MSI.  This
4272 	 * will be used later when initializing certain mutexes.
4273 	 */
4274 	status = ddi_intr_get_pri(state->hs_intrmsi_hdl[0],
4275 	    &state->hs_intrmsi_pri);
4276 	if (status != DDI_SUCCESS) {
4277 		/* Free the allocated interrupt/MSI handle */
4278 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4279 
4280 		return (DDI_FAILURE);
4281 	}
4282 
4283 	/* Make sure the interrupt/MSI priority is below 'high level' */
4284 	if (state->hs_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
4285 		/* Free the allocated interrupt/MSI handle */
4286 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4287 
4288 		return (DDI_FAILURE);
4289 	}
4290 
4291 	/* Get add'l capability information regarding interrupt/MSI */
4292 	status = ddi_intr_get_cap(state->hs_intrmsi_hdl[0],
4293 	    &state->hs_intrmsi_cap);
4294 	if (status != DDI_SUCCESS) {
4295 		/* Free the allocated interrupt/MSI handle */
4296 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4297 
4298 		return (DDI_FAILURE);
4299 	}
4300 
4301 	return (DDI_SUCCESS);
4302 }
4303 
4304 
4305 /*
4306  * hermon_intr_or_msi_fini()
4307  *    Context: Only called from attach() and/or detach() path contexts
4308  */
4309 static int
4310 hermon_intr_or_msi_fini(hermon_state_t *state)
4311 {
4312 	int	status;
4313 	int	intr;
4314 
4315 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
4316 
4317 		/* Free the allocated interrupt/MSI handle */
4318 		status = ddi_intr_free(state->hs_intrmsi_hdl[intr]);
4319 		if (status != DDI_SUCCESS) {
4320 			return (DDI_FAILURE);
4321 		}
4322 	}
4323 	return (DDI_SUCCESS);
4324 }
4325 
4326 
4327 /*ARGSUSED*/
4328 void
4329 hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
4330     uint_t offset)
4331 {
4332 	uint32_t	msix_data;
4333 	uint16_t	msg_cntr;
4334 	uint32_t	t_offset;	/* table offset */
4335 	uint32_t	t_bir;
4336 	uint32_t	p_offset;	/* pba */
4337 	uint32_t	p_bir;
4338 	int		t_size;		/* size in entries - each is 4 dwords */
4339 
4340 	/* come in with offset pointing at the capability structure */
4341 
4342 	msix_data = pci_config_get32(hdl, offset);
4343 	cmn_err(CE_CONT, "Full cap structure dword = %X\n", msix_data);
4344 	msg_cntr =  pci_config_get16(hdl, offset+2);
4345 	cmn_err(CE_CONT, "MSIX msg_control = %X\n", msg_cntr);
4346 	offset += 4;
4347 	msix_data = pci_config_get32(hdl, offset);	/* table info */
4348 	t_offset = (msix_data & 0xFFF8) >> 3;
4349 	t_bir = msix_data & 0x07;
4350 	offset += 4;
4351 	cmn_err(CE_CONT, "  table %X --offset = %X, bir(bar) = %X\n",
4352 	    msix_data, t_offset, t_bir);
4353 	msix_data = pci_config_get32(hdl, offset);	/* PBA info */
4354 	p_offset = (msix_data & 0xFFF8) >> 3;
4355 	p_bir = msix_data & 0x07;
4356 
4357 	cmn_err(CE_CONT, "  PBA   %X --offset = %X, bir(bar) = %X\n",
4358 	    msix_data, p_offset, p_bir);
4359 	t_size = msg_cntr & 0x7FF;		/* low eleven bits */
4360 	cmn_err(CE_CONT, "    table size = %X entries\n", t_size);
4361 
4362 	offset = t_offset;		/* reuse this for offset from BAR */
4363 #ifdef HERMON_SUPPORTS_MSIX_BAR
4364 	cmn_err(CE_CONT, "First 2 table entries behind BAR2 \n");
4365 	for (i = 0; i < 2; i++) {
4366 		for (j = 0; j < 4; j++, offset += 4) {
4367 			msix_data = ddi_get32(state->hs_reg_msihdl,
4368 			    (uint32_t *)((uintptr_t)state->hs_reg_msi_baseaddr
4369 			    + offset));
4370 			cmn_err(CE_CONT, "MSI table entry %d, dword %d == %X\n",
4371 			    i, j, msix_data);
4372 		}
4373 	}
4374 #endif
4375 
4376 }
4377 
4378 /*
4379  * X86 fastreboot support functions.
4380  * These functions are used to save/restore MSI-X table/PBA and also
4381  * to disable MSI-X interrupts in hermon_quiesce().
4382  */
4383 
4384 /* Return the message control for MSI-X */
4385 static ushort_t
4386 get_msix_ctrl(dev_info_t *dip)
4387 {
4388 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4389 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4390 	    DEVI(dip)->devi_instance);
4391 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4392 	ASSERT(pci_cfg_hdl != NULL);
4393 
4394 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4395 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4396 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4397 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4398 			return (0);
4399 	}
4400 	ASSERT(msix_ctrl != 0);
4401 
4402 	return (msix_ctrl);
4403 }
4404 
4405 /* Return the MSI-X table size */
4406 static size_t
4407 get_msix_tbl_size(dev_info_t *dip)
4408 {
4409 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4410 	ASSERT(msix_ctrl != 0);
4411 
4412 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4413 	    PCI_MSIX_VECTOR_SIZE);
4414 }
4415 
4416 /* Return the MSI-X PBA size */
4417 static size_t
4418 get_msix_pba_size(dev_info_t *dip)
4419 {
4420 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4421 	ASSERT(msix_ctrl != 0);
4422 
4423 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8);
4424 }
4425 
4426 /* Set up the MSI-X table/PBA save area */
4427 static void
4428 hermon_set_msix_info(hermon_state_t *state)
4429 {
4430 	uint_t			rnumber, breg, nregs;
4431 	ushort_t		caps_ctrl, msix_ctrl;
4432 	pci_regspec_t		*rp;
4433 	int			reg_size, addr_space, offset, *regs_list, i;
4434 
4435 	/*
4436 	 * MSI-X BIR Index Table:
4437 	 * BAR indicator register (BIR) to Base Address register.
4438 	 */
4439 	uchar_t pci_msix_bir_index[8] = {0x10, 0x14, 0x18, 0x1c,
4440 	    0x20, 0x24, 0xff, 0xff};
4441 
4442 	/* Fastreboot data access  attribute */
4443 	ddi_device_acc_attr_t	dev_attr = {
4444 		0,				/* version */
4445 		DDI_STRUCTURE_LE_ACC,
4446 		DDI_STRICTORDER_ACC,		/* attr access */
4447 		0
4448 	};
4449 
4450 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4451 	ASSERT(pci_cfg_hdl != NULL);
4452 
4453 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4454 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4455 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4456 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4457 			return;
4458 	}
4459 	ASSERT(msix_ctrl != 0);
4460 
4461 	state->hs_msix_tbl_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4462 	    PCI_MSIX_TBL_OFFSET);
4463 
4464 	/* Get the BIR for MSI-X table */
4465 	breg = pci_msix_bir_index[state->hs_msix_tbl_offset &
4466 	    PCI_MSIX_TBL_BIR_MASK];
4467 	ASSERT(breg != 0xFF);
4468 
4469 	/* Set the MSI-X table offset */
4470 	state->hs_msix_tbl_offset = state->hs_msix_tbl_offset &
4471 	    ~PCI_MSIX_TBL_BIR_MASK;
4472 
4473 	/* Set the MSI-X table size */
4474 	state->hs_msix_tbl_size = ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4475 	    PCI_MSIX_VECTOR_SIZE;
4476 
4477 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
4478 	    DDI_PROP_DONTPASS, "reg", (int **)&regs_list, &nregs) !=
4479 	    DDI_PROP_SUCCESS) {
4480 		return;
4481 	}
4482 	reg_size = sizeof (pci_regspec_t) / sizeof (int);
4483 
4484 	/* Check the register number for MSI-X table */
4485 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4486 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4487 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4488 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4489 
4490 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4491 		    (addr_space == PCI_ADDR_MEM64))) {
4492 			rnumber = i;
4493 			break;
4494 		}
4495 	}
4496 	ASSERT(rnumber != 0);
4497 	state->hs_msix_tbl_rnumber = rnumber;
4498 
4499 	/* Set device attribute version and access according to Hermon FM */
4500 	dev_attr.devacc_attr_version = hermon_devacc_attr_version(state);
4501 	dev_attr.devacc_attr_access = hermon_devacc_attr_access(state);
4502 
4503 	/* Map the entire MSI-X vector table */
4504 	if (hermon_regs_map_setup(state, state->hs_msix_tbl_rnumber,
4505 	    (caddr_t *)&state->hs_msix_tbl_addr, state->hs_msix_tbl_offset,
4506 	    state->hs_msix_tbl_size, &dev_attr,
4507 	    &state->hs_fm_msix_tblhdl) != DDI_SUCCESS) {
4508 		return;
4509 	}
4510 
4511 	state->hs_msix_pba_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4512 	    PCI_MSIX_PBA_OFFSET);
4513 
4514 	/* Get the BIR for MSI-X PBA */
4515 	breg = pci_msix_bir_index[state->hs_msix_pba_offset &
4516 	    PCI_MSIX_PBA_BIR_MASK];
4517 	ASSERT(breg != 0xFF);
4518 
4519 	/* Set the MSI-X PBA offset */
4520 	state->hs_msix_pba_offset = state->hs_msix_pba_offset &
4521 	    ~PCI_MSIX_PBA_BIR_MASK;
4522 
4523 	/* Set the MSI-X PBA size */
4524 	state->hs_msix_pba_size =
4525 	    ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8;
4526 
4527 	/* Check the register number for MSI-X PBA */
4528 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4529 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4530 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4531 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4532 
4533 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4534 		    (addr_space == PCI_ADDR_MEM64))) {
4535 			rnumber = i;
4536 			break;
4537 		}
4538 	}
4539 	ASSERT(rnumber != 0);
4540 	state->hs_msix_pba_rnumber = rnumber;
4541 
4542 	/* Map in the MSI-X Pending Bit Array */
4543 	if (hermon_regs_map_setup(state, state->hs_msix_pba_rnumber,
4544 	    (caddr_t *)&state->hs_msix_pba_addr, state->hs_msix_pba_offset,
4545 	    state->hs_msix_pba_size, &dev_attr,
4546 	    &state->hs_fm_msix_pbahdl) != DDI_SUCCESS) {
4547 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
4548 		state->hs_fm_msix_tblhdl = NULL;
4549 		return;
4550 	}
4551 
4552 	/* Set the MSI-X table save area */
4553 	state->hs_msix_tbl_entries = kmem_alloc(state->hs_msix_tbl_size,
4554 	    KM_SLEEP);
4555 
4556 	/* Set the MSI-X PBA save area */
4557 	state->hs_msix_pba_entries = kmem_alloc(state->hs_msix_pba_size,
4558 	    KM_SLEEP);
4559 }
4560 
4561 /* Disable Hermon interrupts */
4562 static int
4563 hermon_intr_disable(hermon_state_t *state)
4564 {
4565 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4566 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4567 	ddi_acc_handle_t msix_tblhdl = hermon_get_msix_tblhdl(state);
4568 	int i, j;
4569 	ASSERT(pci_cfg_hdl != NULL && msix_tblhdl != NULL);
4570 	ASSERT(state->hs_intr_types_avail &
4571 	    (DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX));
4572 
4573 	/*
4574 	 * Check if MSI-X interrupts are used. If so, disable MSI-X interupts.
4575 	 * If not, since Hermon doesn't support MSI interrupts, assuming the
4576 	 * legacy interrupt is used instead, disable the legacy interrupt.
4577 	 */
4578 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4579 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4580 
4581 		if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4582 		    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4583 			if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL,
4584 			    caps_ctrl, PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4585 				return (DDI_FAILURE);
4586 		}
4587 		ASSERT(msix_ctrl != 0);
4588 
4589 		if (!(msix_ctrl & PCI_MSIX_ENABLE_BIT))
4590 			return (DDI_SUCCESS);
4591 
4592 		/* Clear all inums in MSI-X table */
4593 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4594 		    i += PCI_MSIX_VECTOR_SIZE) {
4595 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4596 				char *addr = state->hs_msix_tbl_addr + i + j;
4597 				ddi_put32(msix_tblhdl,
4598 				    (uint32_t *)(uintptr_t)addr, 0x0);
4599 			}
4600 		}
4601 
4602 		/* Disable MSI-X interrupts */
4603 		msix_ctrl &= ~PCI_MSIX_ENABLE_BIT;
4604 		PCI_CAP_PUT16(pci_cfg_hdl, NULL, caps_ctrl, PCI_MSIX_CTRL,
4605 		    msix_ctrl);
4606 
4607 	} else {
4608 		uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
4609 		ASSERT(state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED);
4610 
4611 		/* Disable the legacy interrupts */
4612 		cmdreg |= PCI_COMM_INTX_DISABLE;
4613 		pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
4614 	}
4615 
4616 	return (DDI_SUCCESS);
4617 }
4618 
4619 /* Hermon quiesce(9F) entry */
4620 static int
4621 hermon_quiesce(dev_info_t *dip)
4622 {
4623 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4624 	    DEVI(dip)->devi_instance);
4625 	ddi_acc_handle_t pcihdl = hermon_get_pcihdl(state);
4626 	ddi_acc_handle_t cmdhdl = hermon_get_cmdhdl(state);
4627 	ddi_acc_handle_t msix_tbl_hdl = hermon_get_msix_tblhdl(state);
4628 	ddi_acc_handle_t msix_pba_hdl = hermon_get_msix_pbahdl(state);
4629 	uint32_t sem, reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
4630 	uint64_t data64;
4631 	uint32_t data32;
4632 	int status, i, j, loopcnt;
4633 	uint_t offset;
4634 
4635 	ASSERT(state != NULL);
4636 
4637 	/* start fastreboot */
4638 	state->hs_quiescing = B_TRUE;
4639 
4640 	/* If it's in maintenance mode, do nothing but return with SUCCESS */
4641 	if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
4642 		return (DDI_SUCCESS);
4643 	}
4644 
4645 	/* suppress Hermon FM ereports */
4646 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
4647 		hermon_clr_state_nolock(state, HCA_EREPORT_FM);
4648 	}
4649 
4650 	/* Shutdown HCA ports */
4651 	if (hermon_hca_ports_shutdown(state,
4652 	    state->hs_cfg_profile->cp_num_ports) != HERMON_CMD_SUCCESS) {
4653 		state->hs_quiescing = B_FALSE;
4654 		return (DDI_FAILURE);
4655 	}
4656 
4657 	/* Close HCA */
4658 	if (hermon_close_hca_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN) !=
4659 	    HERMON_CMD_SUCCESS) {
4660 		state->hs_quiescing = B_FALSE;
4661 		return (DDI_FAILURE);
4662 	}
4663 
4664 	/* Disable interrupts */
4665 	if (hermon_intr_disable(state) != DDI_SUCCESS) {
4666 		state->hs_quiescing = B_FALSE;
4667 		return (DDI_FAILURE);
4668 	}
4669 
4670 	/*
4671 	 * Query the PCI capabilities of the HCA device, but don't process
4672 	 * the VPD until after reset.
4673 	 */
4674 	if (hermon_pci_capability_list(state, pcihdl) != DDI_SUCCESS) {
4675 		state->hs_quiescing = B_FALSE;
4676 		return (DDI_FAILURE);
4677 	}
4678 
4679 	/*
4680 	 * Read all PCI config info (reg0...reg63).  Note: According to the
4681 	 * Hermon software reset application note, we should not read or
4682 	 * restore the values in reg22 and reg23.
4683 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
4684 	 * register LAST, and technically, you need to restore the
4685 	 * PCIE Capability "device control" and "link control" (word-sized,
4686 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
4687 	 * We hold off restoring the command register - offset 0x4 - till last
4688 	 */
4689 
4690 	/* 1st, wait for the semaphore assure accessibility - per PRM */
4691 	status = -1;
4692 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
4693 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
4694 		if (sem == 0) {
4695 			status = 0;
4696 			break;
4697 		}
4698 		drv_usecwait(1);
4699 	}
4700 
4701 	/* Check if timeout happens */
4702 	if (status == -1) {
4703 		state->hs_quiescing = B_FALSE;
4704 		return (DDI_FAILURE);
4705 	}
4706 
4707 	/* MSI-X interrupts are used, save the MSI-X table */
4708 	if (msix_tbl_hdl && msix_pba_hdl) {
4709 		/* save MSI-X table */
4710 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4711 		    i += PCI_MSIX_VECTOR_SIZE) {
4712 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4713 				char *addr = state->hs_msix_tbl_addr + i + j;
4714 				data32 = ddi_get32(msix_tbl_hdl,
4715 				    (uint32_t *)(uintptr_t)addr);
4716 				*(uint32_t *)(uintptr_t)(state->
4717 				    hs_msix_tbl_entries + i + j) = data32;
4718 			}
4719 		}
4720 		/* save MSI-X PBA */
4721 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4722 			char *addr = state->hs_msix_pba_addr + i;
4723 			data64 = ddi_get64(msix_pba_hdl,
4724 			    (uint64_t *)(uintptr_t)addr);
4725 			*(uint64_t *)(uintptr_t)(state->
4726 			    hs_msix_pba_entries + i) = data64;
4727 		}
4728 	}
4729 
4730 	/* save PCI config space */
4731 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4732 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4733 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4734 			state->hs_cfg_data[i]  =
4735 			    pci_config_get32(pcihdl, i << 2);
4736 		}
4737 	}
4738 
4739 	/* SW-reset HCA */
4740 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
4741 
4742 	/*
4743 	 * This delay is required so as not to cause a panic here. If the
4744 	 * device is accessed too soon after reset it will not respond to
4745 	 * config cycles, causing a Master Abort and panic.
4746 	 */
4747 	drv_usecwait(reset_delay);
4748 
4749 	/* Poll waiting for the device to finish resetting */
4750 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
4751 	while ((pci_config_get32(pcihdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
4752 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
4753 		if (--loopcnt == 0)
4754 			break;	/* just in case, break and go on */
4755 	}
4756 	if (loopcnt == 0) {
4757 		state->hs_quiescing = B_FALSE;
4758 		return (DDI_FAILURE);
4759 	}
4760 
4761 	/* Restore the config info */
4762 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4763 		if (i == 1) continue;	/* skip the status/ctrl reg */
4764 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4765 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4766 			pci_config_put32(pcihdl, i << 2, state->hs_cfg_data[i]);
4767 		}
4768 	}
4769 
4770 	/* If MSI-X interrupts are used, restore the MSI-X table */
4771 	if (msix_tbl_hdl && msix_pba_hdl) {
4772 		/* restore MSI-X PBA */
4773 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4774 			char *addr = state->hs_msix_pba_addr + i;
4775 			data64 = *(uint64_t *)(uintptr_t)
4776 			    (state->hs_msix_pba_entries + i);
4777 			ddi_put64(msix_pba_hdl,
4778 			    (uint64_t *)(uintptr_t)addr, data64);
4779 		}
4780 		/* restore MSI-X table */
4781 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4782 		    i += PCI_MSIX_VECTOR_SIZE) {
4783 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4784 				char *addr = state->hs_msix_tbl_addr + i + j;
4785 				data32 = *(uint32_t *)(uintptr_t)
4786 				    (state->hs_msix_tbl_entries + i + j);
4787 				ddi_put32(msix_tbl_hdl,
4788 				    (uint32_t *)(uintptr_t)addr, data32);
4789 			}
4790 		}
4791 	}
4792 
4793 	/*
4794 	 * PCI Express Capability - we saved during capability list, and
4795 	 * we'll restore them here.
4796 	 */
4797 	offset = state->hs_pci_cap_offset;
4798 	data32 = state->hs_pci_cap_devctl;
4799 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
4800 	data32 = state->hs_pci_cap_lnkctl;
4801 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
4802 
4803 	/* restore the command register */
4804 	pci_config_put32(pcihdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
4805 
4806 	return (DDI_SUCCESS);
4807 }
4808