xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon.c (revision 17f1e64a433a4ca00ffed7539e10c297580a7002)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * hermon.c
29  *    Hermon (InfiniBand) HCA Driver attach/detach Routines
30  *
31  *    Implements all the routines necessary for the attach, setup,
32  *    initialization (and subsequent possible teardown and detach) of the
33  *    Hermon InfiniBand HCA driver.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/file.h>
38 #include <sys/open.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/stat.h>
44 #include <sys/pci.h>
45 #include <sys/pci_cap.h>
46 #include <sys/bitmap.h>
47 #include <sys/policy.h>
48 
49 #include <sys/ib/adapters/hermon/hermon.h>
50 
51 /* The following works around a problem in pre-2_7_000 firmware. */
52 #define	HERMON_FW_WORKAROUND
53 
54 int hermon_verbose = 0;
55 
56 /* Hermon HCA State Pointer */
57 void *hermon_statep;
58 
59 int	debug_vpd = 0;
60 
61 
62 /*
63  * The Hermon "userland resource database" is common to instances of the
64  * Hermon HCA driver.  This structure "hermon_userland_rsrc_db" contains all
65  * the necessary information to maintain it.
66  */
67 hermon_umap_db_t hermon_userland_rsrc_db;
68 
69 static int hermon_attach(dev_info_t *, ddi_attach_cmd_t);
70 static int hermon_detach(dev_info_t *, ddi_detach_cmd_t);
71 static int hermon_open(dev_t *, int, int, cred_t *);
72 static int hermon_close(dev_t, int, int, cred_t *);
73 static int hermon_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
74 
75 static int hermon_drv_init(hermon_state_t *state, dev_info_t *dip,
76     int instance);
77 static void hermon_drv_fini(hermon_state_t *state);
78 static void hermon_drv_fini2(hermon_state_t *state);
79 static int hermon_isr_init(hermon_state_t *state);
80 static void hermon_isr_fini(hermon_state_t *state);
81 
82 static int hermon_hw_init(hermon_state_t *state);
83 
84 static void hermon_hw_fini(hermon_state_t *state,
85     hermon_drv_cleanup_level_t cleanup);
86 static int hermon_soft_state_init(hermon_state_t *state);
87 static void hermon_soft_state_fini(hermon_state_t *state);
88 static int hermon_icm_config_setup(hermon_state_t *state,
89     hermon_hw_initqueryhca_t *inithca);
90 static void hermon_icm_tables_init(hermon_state_t *state);
91 static void hermon_icm_tables_fini(hermon_state_t *state);
92 static int hermon_icm_dma_init(hermon_state_t *state);
93 static void hermon_icm_dma_fini(hermon_state_t *state);
94 static void hermon_inithca_set(hermon_state_t *state,
95     hermon_hw_initqueryhca_t *inithca);
96 static int hermon_hca_port_init(hermon_state_t *state);
97 static int hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init);
98 static int hermon_internal_uarpg_init(hermon_state_t *state);
99 static void hermon_internal_uarpg_fini(hermon_state_t *state);
100 static int hermon_special_qp_contexts_reserve(hermon_state_t *state);
101 static void hermon_special_qp_contexts_unreserve(hermon_state_t *state);
102 static int hermon_sw_reset(hermon_state_t *state);
103 static int hermon_mcg_init(hermon_state_t *state);
104 static void hermon_mcg_fini(hermon_state_t *state);
105 static int hermon_fw_version_check(hermon_state_t *state);
106 static void hermon_device_info_report(hermon_state_t *state);
107 static int hermon_pci_capability_list(hermon_state_t *state,
108     ddi_acc_handle_t hdl);
109 static void hermon_pci_capability_vpd(hermon_state_t *state,
110     ddi_acc_handle_t hdl, uint_t offset);
111 static int hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
112     uint32_t addr, uint32_t *data);
113 static int hermon_intr_or_msi_init(hermon_state_t *state);
114 static int hermon_add_intrs(hermon_state_t *state, int intr_type);
115 static int hermon_intr_or_msi_fini(hermon_state_t *state);
116 void hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
117     uint_t offset);
118 
119 static uint64_t hermon_size_icm(hermon_state_t *state);
120 
121 /* X86 fastreboot support */
122 static ushort_t get_msix_ctrl(dev_info_t *);
123 static size_t get_msix_tbl_size(dev_info_t *);
124 static size_t get_msix_pba_size(dev_info_t *);
125 static void hermon_set_msix_info(hermon_state_t *);
126 static int hermon_intr_disable(hermon_state_t *);
127 static int hermon_quiesce(dev_info_t *);
128 
129 
130 /* Character/Block Operations */
131 static struct cb_ops hermon_cb_ops = {
132 	hermon_open,		/* open */
133 	hermon_close,		/* close */
134 	nodev,			/* strategy (block) */
135 	nodev,			/* print (block) */
136 	nodev,			/* dump (block) */
137 	nodev,			/* read */
138 	nodev,			/* write */
139 	hermon_ioctl,		/* ioctl */
140 	hermon_devmap,		/* devmap */
141 	NULL,			/* mmap */
142 	nodev,			/* segmap */
143 	nochpoll,		/* chpoll */
144 	ddi_prop_op,		/* prop_op */
145 	NULL,			/* streams */
146 	D_NEW | D_MP |
147 	D_64BIT | /* D_HOTPLUG | */
148 	D_DEVMAP,		/* flags */
149 	CB_REV			/* rev */
150 };
151 
152 /* Driver Operations */
153 static struct dev_ops hermon_ops = {
154 	DEVO_REV,		/* struct rev */
155 	0,			/* refcnt */
156 	hermon_getinfo,		/* getinfo */
157 	nulldev,		/* identify */
158 	nulldev,		/* probe */
159 	hermon_attach,		/* attach */
160 	hermon_detach,		/* detach */
161 	nodev,			/* reset */
162 	&hermon_cb_ops,		/* cb_ops */
163 	NULL,			/* bus_ops */
164 	nodev,			/* power */
165 	hermon_quiesce,		/* devo_quiesce */
166 };
167 
168 /* Module Driver Info */
169 static struct modldrv hermon_modldrv = {
170 	&mod_driverops,
171 	"ConnectX IB Driver",
172 	&hermon_ops
173 };
174 
175 /* Module Linkage */
176 static struct modlinkage hermon_modlinkage = {
177 	MODREV_1,
178 	&hermon_modldrv,
179 	NULL
180 };
181 
182 /*
183  * This extern refers to the ibc_operations_t function vector that is defined
184  * in the hermon_ci.c file.
185  */
186 extern ibc_operations_t	hermon_ibc_ops;
187 
188 /*
189  * _init()
190  */
191 int
192 _init()
193 {
194 	int	status;
195 
196 	status = ddi_soft_state_init(&hermon_statep, sizeof (hermon_state_t),
197 	    (size_t)HERMON_INITIAL_STATES);
198 	if (status != 0) {
199 		return (status);
200 	}
201 
202 	status = ibc_init(&hermon_modlinkage);
203 	if (status != 0) {
204 		ddi_soft_state_fini(&hermon_statep);
205 		return (status);
206 	}
207 
208 	status = mod_install(&hermon_modlinkage);
209 	if (status != 0) {
210 		ibc_fini(&hermon_modlinkage);
211 		ddi_soft_state_fini(&hermon_statep);
212 		return (status);
213 	}
214 
215 	/* Initialize the Hermon "userland resources database" */
216 	hermon_umap_db_init();
217 
218 	return (status);
219 }
220 
221 
222 /*
223  * _info()
224  */
225 int
226 _info(struct modinfo *modinfop)
227 {
228 	int	status;
229 
230 	status = mod_info(&hermon_modlinkage, modinfop);
231 	return (status);
232 }
233 
234 
235 /*
236  * _fini()
237  */
238 int
239 _fini()
240 {
241 	int	status;
242 
243 	status = mod_remove(&hermon_modlinkage);
244 	if (status != 0) {
245 		return (status);
246 	}
247 
248 	/* Destroy the Hermon "userland resources database" */
249 	hermon_umap_db_fini();
250 
251 	ibc_fini(&hermon_modlinkage);
252 	ddi_soft_state_fini(&hermon_statep);
253 
254 	return (status);
255 }
256 
257 
258 /*
259  * hermon_getinfo()
260  */
261 /* ARGSUSED */
262 static int
263 hermon_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
264 {
265 	dev_t		dev;
266 	hermon_state_t 	*state;
267 	minor_t		instance;
268 
269 	switch (cmd) {
270 	case DDI_INFO_DEVT2DEVINFO:
271 		dev = (dev_t)arg;
272 		instance = HERMON_DEV_INSTANCE(dev);
273 		state = ddi_get_soft_state(hermon_statep, instance);
274 		if (state == NULL) {
275 			return (DDI_FAILURE);
276 		}
277 		*result = (void *)state->hs_dip;
278 		return (DDI_SUCCESS);
279 
280 	case DDI_INFO_DEVT2INSTANCE:
281 		dev = (dev_t)arg;
282 		instance = HERMON_DEV_INSTANCE(dev);
283 		*result = (void *)(uintptr_t)instance;
284 		return (DDI_SUCCESS);
285 
286 	default:
287 		break;
288 	}
289 
290 	return (DDI_FAILURE);
291 }
292 
293 
294 /*
295  * hermon_open()
296  */
297 /* ARGSUSED */
298 static int
299 hermon_open(dev_t *devp, int flag, int otyp, cred_t *credp)
300 {
301 	hermon_state_t		*state;
302 	hermon_rsrc_t 		*rsrcp;
303 	hermon_umap_db_entry_t	*umapdb, *umapdb2;
304 	minor_t			instance;
305 	uint64_t		key, value;
306 	uint_t			hr_indx;
307 	dev_t			dev;
308 	int			status;
309 
310 	instance = HERMON_DEV_INSTANCE(*devp);
311 	state = ddi_get_soft_state(hermon_statep, instance);
312 	if (state == NULL) {
313 		return (ENXIO);
314 	}
315 
316 	/*
317 	 * Only allow driver to be opened for character access, and verify
318 	 * whether exclusive access is allowed.
319 	 */
320 	if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
321 	    secpolicy_excl_open(credp) != 0)) {
322 		return (EINVAL);
323 	}
324 
325 	/*
326 	 * Search for the current process PID in the "userland resources
327 	 * database".  If it is not found, then attempt to allocate a UAR
328 	 * page and add the ("key", "value") pair to the database.
329 	 * Note:  As a last step we always return a devp appropriate for
330 	 * the open.  Either we return a new minor number (based on the
331 	 * instance and the UAR page index) or we return the current minor
332 	 * number for the given client process.
333 	 *
334 	 * We also add an entry to the database to allow for lookup from
335 	 * "dev_t" to the current process PID.  This is necessary because,
336 	 * under certain circumstance, the process PID that calls the Hermon
337 	 * close() entry point may not be the same as the one who called
338 	 * open().  Specifically, this can happen if a child process calls
339 	 * the Hermon's open() entry point, gets a UAR page, maps it out (using
340 	 * mmap()), and then exits without calling munmap().  Because mmap()
341 	 * adds a reference to the file descriptor, at the exit of the child
342 	 * process the file descriptor is "inherited" by the parent (and will
343 	 * be close()'d by the parent's PID only when it exits).
344 	 *
345 	 * Note: We use the hermon_umap_db_find_nolock() and
346 	 * hermon_umap_db_add_nolock() database access routines below (with
347 	 * an explicit mutex_enter of the database lock - "hdl_umapdb_lock")
348 	 * to ensure that the multiple accesses (in this case searching for,
349 	 * and then adding _two_ database entries) can be done atomically.
350 	 */
351 	key = ddi_get_pid();
352 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
353 	status = hermon_umap_db_find_nolock(instance, key,
354 	    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
355 	if (status != DDI_SUCCESS) {
356 		/*
357 		 * If we are in 'maintenance mode', we cannot alloc a UAR page.
358 		 * But we still need some rsrcp value, and a mostly unique
359 		 * hr_indx value.  So we set rsrcp to NULL for maintenance
360 		 * mode, and use a rolling count for hr_indx.  The field
361 		 * 'hs_open_hr_indx' is used only in this maintenance mode
362 		 * condition.
363 		 *
364 		 * Otherwise, if we are in operational mode then we allocate
365 		 * the UAR page as normal, and use the rsrcp value and tr_indx
366 		 * value from that allocation.
367 		 */
368 		if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
369 			rsrcp = NULL;
370 			hr_indx = state->hs_open_ar_indx++;
371 		} else {
372 			/* Allocate a new UAR page for this process */
373 			status = hermon_rsrc_alloc(state, HERMON_UARPG, 1,
374 			    HERMON_NOSLEEP, &rsrcp);
375 			if (status != DDI_SUCCESS) {
376 				mutex_exit(
377 				    &hermon_userland_rsrc_db.hdl_umapdb_lock);
378 				return (EAGAIN);
379 			}
380 
381 			hr_indx = rsrcp->hr_indx;
382 		}
383 
384 		/*
385 		 * Allocate an entry to track the UAR page resource in the
386 		 * "userland resources database".
387 		 */
388 		umapdb = hermon_umap_db_alloc(instance, key,
389 		    MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
390 		if (umapdb == NULL) {
391 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
392 			/* If in "maintenance mode", don't free the rsrc */
393 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
394 				hermon_rsrc_free(state, &rsrcp);
395 			}
396 			return (EAGAIN);
397 		}
398 
399 		/*
400 		 * Create a new device number.  Minor number is a function of
401 		 * the UAR page index (15 bits) and the device instance number
402 		 * (3 bits).
403 		 */
404 		dev = makedevice(getmajor(*devp), (hr_indx <<
405 		    HERMON_MINORNUM_SHIFT) | instance);
406 
407 		/*
408 		 * Allocate another entry in the "userland resources database"
409 		 * to track the association of the device number (above) to
410 		 * the current process ID (in "key").
411 		 */
412 		umapdb2 = hermon_umap_db_alloc(instance, dev,
413 		    MLNX_UMAP_PID_RSRC, (uint64_t)key);
414 		if (umapdb2 == NULL) {
415 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
416 			hermon_umap_db_free(umapdb);
417 			/* If in "maintenance mode", don't free the rsrc */
418 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
419 				hermon_rsrc_free(state, &rsrcp);
420 			}
421 			return (EAGAIN);
422 		}
423 
424 		/* Add the entries to the database */
425 		hermon_umap_db_add_nolock(umapdb);
426 		hermon_umap_db_add_nolock(umapdb2);
427 
428 	} else {
429 		/*
430 		 * Return the same device number as on the original open()
431 		 * call.  This was calculated as a function of the UAR page
432 		 * index (top 16 bits) and the device instance number
433 		 */
434 		rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
435 		dev = makedevice(getmajor(*devp), (rsrcp->hr_indx <<
436 		    HERMON_MINORNUM_SHIFT) | instance);
437 	}
438 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
439 
440 	*devp = dev;
441 
442 	return (0);
443 }
444 
445 
446 /*
447  * hermon_close()
448  */
449 /* ARGSUSED */
450 static int
451 hermon_close(dev_t dev, int flag, int otyp, cred_t *credp)
452 {
453 	hermon_state_t		*state;
454 	hermon_rsrc_t		*rsrcp;
455 	hermon_umap_db_entry_t	*umapdb;
456 	hermon_umap_db_priv_t	*priv;
457 	minor_t			instance;
458 	uint64_t		key, value;
459 	int			status, reset_status = 0;
460 
461 	instance = HERMON_DEV_INSTANCE(dev);
462 	state = ddi_get_soft_state(hermon_statep, instance);
463 	if (state == NULL) {
464 		return (ENXIO);
465 	}
466 
467 	/*
468 	 * Search for "dev_t" in the "userland resources database".  As
469 	 * explained above in hermon_open(), we can't depend on using the
470 	 * current process ID here to do the lookup because the process
471 	 * that ultimately closes may not be the same one who opened
472 	 * (because of inheritance).
473 	 * So we lookup the "dev_t" (which points to the PID of the process
474 	 * that opened), and we remove the entry from the database (and free
475 	 * it up).  Then we do another query based on the PID value.  And when
476 	 * we find that database entry, we free it up too and then free the
477 	 * Hermon UAR page resource.
478 	 *
479 	 * Note: We use the hermon_umap_db_find_nolock() database access
480 	 * routine below (with an explicit mutex_enter of the database lock)
481 	 * to ensure that the multiple accesses (which attempt to remove the
482 	 * two database entries) can be done atomically.
483 	 *
484 	 * This works the same in both maintenance mode and HCA mode, except
485 	 * for the call to hermon_rsrc_free().  In the case of maintenance mode,
486 	 * this call is not needed, as it was not allocated in hermon_open()
487 	 * above.
488 	 */
489 	key = dev;
490 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
491 	status = hermon_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
492 	    &value, HERMON_UMAP_DB_REMOVE, &umapdb);
493 	if (status == DDI_SUCCESS) {
494 		/*
495 		 * If the "hdb_priv" field is non-NULL, it indicates that
496 		 * some "on close" handling is still necessary.  Call
497 		 * hermon_umap_db_handle_onclose_cb() to do the handling (i.e.
498 		 * to invoke all the registered callbacks).  Then free up
499 		 * the resources associated with "hdb_priv" and continue
500 		 * closing.
501 		 */
502 		priv = (hermon_umap_db_priv_t *)umapdb->hdbe_common.hdb_priv;
503 		if (priv != NULL) {
504 			reset_status = hermon_umap_db_handle_onclose_cb(priv);
505 			kmem_free(priv, sizeof (hermon_umap_db_priv_t));
506 			umapdb->hdbe_common.hdb_priv = (void *)NULL;
507 		}
508 
509 		hermon_umap_db_free(umapdb);
510 
511 		/*
512 		 * Now do another lookup using PID as the key (copy it from
513 		 * "value").  When this lookup is complete, the "value" field
514 		 * will contain the hermon_rsrc_t pointer for the UAR page
515 		 * resource.
516 		 */
517 		key = value;
518 		status = hermon_umap_db_find_nolock(instance, key,
519 		    MLNX_UMAP_UARPG_RSRC, &value, HERMON_UMAP_DB_REMOVE,
520 		    &umapdb);
521 		if (status == DDI_SUCCESS) {
522 			hermon_umap_db_free(umapdb);
523 			/* If in "maintenance mode", don't free the rsrc */
524 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
525 				rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
526 				hermon_rsrc_free(state, &rsrcp);
527 			}
528 		}
529 	}
530 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
531 	return (reset_status);
532 }
533 
534 
535 /*
536  * hermon_attach()
537  *    Context: Only called from attach() path context
538  */
539 static int
540 hermon_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
541 {
542 	hermon_state_t	*state;
543 	ibc_clnt_hdl_t	tmp_ibtfpriv;
544 	ibc_status_t	ibc_status;
545 	int		instance;
546 	int		status;
547 
548 #ifdef __lock_lint
549 	(void) hermon_quiesce(dip);
550 #endif
551 
552 	switch (cmd) {
553 	case DDI_ATTACH:
554 		instance = ddi_get_instance(dip);
555 		status = ddi_soft_state_zalloc(hermon_statep, instance);
556 		if (status != DDI_SUCCESS) {
557 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
558 			    "attach_ssz_fail", instance);
559 			goto fail_attach_nomsg;
560 
561 		}
562 		state = ddi_get_soft_state(hermon_statep, instance);
563 		if (state == NULL) {
564 			ddi_soft_state_free(hermon_statep, instance);
565 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
566 			    "attach_gss_fail", instance);
567 			goto fail_attach_nomsg;
568 		}
569 
570 		/* clear the attach error buffer */
571 		HERMON_ATTACH_MSG_INIT(state->hs_attach_buf);
572 
573 		/* Save away devinfo and instance before hermon_fm_init() */
574 		state->hs_dip = dip;
575 		state->hs_instance = instance;
576 
577 		hermon_fm_init(state);
578 
579 		/*
580 		 * Initialize Hermon driver and hardware.
581 		 *
582 		 * Note: If this initialization fails we may still wish to
583 		 * create a device node and remain operational so that Hermon
584 		 * firmware can be updated/flashed (i.e. "maintenance mode").
585 		 * If this is the case, then "hs_operational_mode" will be
586 		 * equal to HERMON_MAINTENANCE_MODE.  We will not attempt to
587 		 * attach to the IBTF or register with the IBMF (i.e. no
588 		 * InfiniBand interfaces will be enabled).
589 		 */
590 		status = hermon_drv_init(state, dip, instance);
591 		if ((status != DDI_SUCCESS) &&
592 		    (HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
593 			goto fail_attach;
594 		}
595 
596 		/*
597 		 * Change the Hermon FM mode
598 		 */
599 		if ((hermon_get_state(state) & HCA_PIO_FM) &&
600 		    HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
601 			/*
602 			 * Now we wait for 50ms to give an opportunity
603 			 * to Solaris FMA so that HW errors can be notified.
604 			 * Then check if there are HW errors or not. If
605 			 * a HW error is detected, the Hermon attachment
606 			 * must be failed.
607 			 */
608 			delay(drv_usectohz(50000));
609 			if (hermon_init_failure(state)) {
610 				hermon_drv_fini(state);
611 				HERMON_WARNING(state, "unable to "
612 				    "attach Hermon due to a HW error");
613 				HERMON_ATTACH_MSG(state->hs_attach_buf,
614 				    "hermon_attach_failure");
615 				goto fail_attach;
616 			}
617 
618 			/*
619 			 * There seems no HW errors during the attachment,
620 			 * so let's change the Hermon FM state to the
621 			 * ereport only mode.
622 			 */
623 			if (hermon_fm_ereport_init(state) != DDI_SUCCESS) {
624 				/* unwind the resources */
625 				hermon_drv_fini(state);
626 				HERMON_ATTACH_MSG(state->hs_attach_buf,
627 				    "hermon_attach_failure");
628 				goto fail_attach;
629 			}
630 		}
631 
632 		/* Create the minor node for device */
633 		status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
634 		    DDI_PSEUDO, 0);
635 		if (status != DDI_SUCCESS) {
636 			hermon_drv_fini(state);
637 			HERMON_ATTACH_MSG(state->hs_attach_buf,
638 			    "attach_create_mn_fail");
639 			goto fail_attach;
640 		}
641 
642 		/*
643 		 * If we are in "maintenance mode", then we don't want to
644 		 * register with the IBTF.  All InfiniBand interfaces are
645 		 * uninitialized, and the device is only capable of handling
646 		 * requests to update/flash firmware (or test/debug requests).
647 		 */
648 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
649 			cmn_err(CE_NOTE, "!Hermon is operational\n");
650 
651 			/* Attach to InfiniBand Transport Framework (IBTF) */
652 			ibc_status = ibc_attach(&tmp_ibtfpriv,
653 			    &state->hs_ibtfinfo);
654 			if (ibc_status != IBC_SUCCESS) {
655 				cmn_err(CE_CONT, "hermon_attach: ibc_attach "
656 				    "failed\n");
657 				ddi_remove_minor_node(dip, "devctl");
658 				hermon_drv_fini(state);
659 				HERMON_ATTACH_MSG(state->hs_attach_buf,
660 				    "attach_ibcattach_fail");
661 				goto fail_attach;
662 			}
663 
664 			/*
665 			 * Now that we've successfully attached to the IBTF,
666 			 * we enable all appropriate asynch and CQ events to
667 			 * be forwarded to the IBTF.
668 			 */
669 			HERMON_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);
670 
671 			ibc_post_attach(state->hs_ibtfpriv);
672 
673 			/* Register agents with IB Mgmt Framework (IBMF) */
674 			status = hermon_agent_handlers_init(state);
675 			if (status != DDI_SUCCESS) {
676 				(void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
677 				HERMON_QUIESCE_IBTF_CALLB(state);
678 				if (state->hs_in_evcallb != 0) {
679 					HERMON_WARNING(state, "unable to "
680 					    "quiesce Hermon IBTF callbacks");
681 				}
682 				ibc_detach(tmp_ibtfpriv);
683 				ddi_remove_minor_node(dip, "devctl");
684 				hermon_drv_fini(state);
685 				HERMON_ATTACH_MSG(state->hs_attach_buf,
686 				    "attach_agentinit_fail");
687 				goto fail_attach;
688 			}
689 		}
690 
691 		/* Report attach in maintenance mode, if appropriate */
692 		if (!(HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
693 			cmn_err(CE_NOTE, "hermon%d: driver attached "
694 			    "(for maintenance mode only)", state->hs_instance);
695 			hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_DEGRADED);
696 		}
697 
698 		/* Report that driver was loaded */
699 		ddi_report_dev(dip);
700 
701 		/* Send device information to log file */
702 		hermon_device_info_report(state);
703 
704 		/* DEBUG PRINT */
705 		cmn_err(CE_CONT, "!Hermon attach complete\n");
706 		return (DDI_SUCCESS);
707 
708 	case DDI_RESUME:
709 		/* Add code here for DDI_RESUME XXX */
710 		return (DDI_FAILURE);
711 
712 	default:
713 		cmn_err(CE_WARN, "hermon_attach: unknown cmd (0x%x)\n", cmd);
714 		break;
715 	}
716 
717 fail_attach:
718 	cmn_err(CE_NOTE, "hermon%d: driver failed to attach: %s", instance,
719 	    state->hs_attach_buf);
720 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
721 		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
722 	}
723 	hermon_drv_fini2(state);
724 	hermon_fm_fini(state);
725 	ddi_soft_state_free(hermon_statep, instance);
726 
727 fail_attach_nomsg:
728 	return (DDI_FAILURE);
729 }
730 
731 
732 /*
733  * hermon_detach()
734  *    Context: Only called from detach() path context
735  */
736 static int
737 hermon_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
738 {
739 	hermon_state_t	*state;
740 	ibc_clnt_hdl_t	tmp_ibtfpriv;
741 	ibc_status_t	ibc_status;
742 	int		instance, status;
743 
744 	instance = ddi_get_instance(dip);
745 	state = ddi_get_soft_state(hermon_statep, instance);
746 	if (state == NULL) {
747 		return (DDI_FAILURE);
748 	}
749 
750 	switch (cmd) {
751 	case DDI_DETACH:
752 		/*
753 		 * If we are in "maintenance mode", then we do not want to
754 		 * do teardown for any of the InfiniBand interfaces.
755 		 * Specifically, this means not detaching from IBTF (we never
756 		 * attached to begin with) and not deregistering from IBMF.
757 		 */
758 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
759 			/* Unregister agents from IB Mgmt Framework (IBMF) */
760 			status = hermon_agent_handlers_fini(state);
761 			if (status != DDI_SUCCESS) {
762 				return (DDI_FAILURE);
763 			}
764 
765 			/*
766 			 * Attempt the "pre-detach" from InfiniBand Transport
767 			 * Framework (IBTF).  At this point the IBTF is still
768 			 * capable of handling incoming asynch and completion
769 			 * events.  This "pre-detach" is primarily a mechanism
770 			 * to notify the appropriate IBTF clients that the
771 			 * HCA is being removed/offlined.
772 			 */
773 			ibc_status = ibc_pre_detach(state->hs_ibtfpriv, cmd);
774 			if (ibc_status != IBC_SUCCESS) {
775 				status = hermon_agent_handlers_init(state);
776 				if (status != DDI_SUCCESS) {
777 					HERMON_WARNING(state, "failed to "
778 					    "restart Hermon agents");
779 				}
780 				return (DDI_FAILURE);
781 			}
782 
783 			/*
784 			 * Before we can fully detach from the IBTF we need to
785 			 * ensure that we have handled all outstanding event
786 			 * callbacks.  This is accomplished by quiescing the
787 			 * event callback mechanism.  Note: if we are unable
788 			 * to successfully quiesce the callbacks, then this is
789 			 * an indication that something has probably gone
790 			 * seriously wrong.  We print out a warning, but
791 			 * continue.
792 			 */
793 			tmp_ibtfpriv = state->hs_ibtfpriv;
794 			HERMON_QUIESCE_IBTF_CALLB(state);
795 			if (state->hs_in_evcallb != 0) {
796 				HERMON_WARNING(state, "unable to quiesce "
797 				    "Hermon IBTF callbacks");
798 			}
799 
800 			/* Complete the detach from the IBTF */
801 			ibc_detach(tmp_ibtfpriv);
802 		}
803 
804 		/* Remove the minor node for device */
805 		ddi_remove_minor_node(dip, "devctl");
806 
807 		/*
808 		 * Only call hermon_drv_fini() if we are in Hermon HCA mode.
809 		 * (Because if we are in "maintenance mode", then we never
810 		 * successfully finished init.)  Only report successful
811 		 * detach for normal HCA mode.
812 		 */
813 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
814 			/* Cleanup driver resources and shutdown hardware */
815 			hermon_drv_fini(state);
816 			cmn_err(CE_CONT, "!Hermon driver successfully "
817 			    "detached\n");
818 		}
819 
820 		hermon_drv_fini2(state);
821 		hermon_fm_fini(state);
822 		ddi_soft_state_free(hermon_statep, instance);
823 
824 		return (DDI_SUCCESS);
825 
826 	case DDI_SUSPEND:
827 		/* Add code here for DDI_SUSPEND XXX */
828 		return (DDI_FAILURE);
829 
830 	default:
831 		cmn_err(CE_WARN, "hermon_detach: unknown cmd (0x%x)\n", cmd);
832 		break;
833 	}
834 
835 	return (DDI_FAILURE);
836 }
837 
838 /*
839  * hermon_dma_attr_init()
840  *    Context: Can be called from interrupt or base context.
841  */
842 
843 /* ARGSUSED */
844 void
845 hermon_dma_attr_init(hermon_state_t *state, ddi_dma_attr_t *dma_attr)
846 {
847 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
848 
849 	dma_attr->dma_attr_version	= DMA_ATTR_V0;
850 	dma_attr->dma_attr_addr_lo	= 0;
851 	dma_attr->dma_attr_addr_hi	= 0xFFFFFFFFFFFFFFFFull;
852 	dma_attr->dma_attr_count_max	= 0xFFFFFFFFFFFFFFFFull;
853 	dma_attr->dma_attr_align	= HERMON_PAGESIZE;  /* default 4K */
854 	dma_attr->dma_attr_burstsizes	= 0x3FF;
855 	dma_attr->dma_attr_minxfer	= 1;
856 	dma_attr->dma_attr_maxxfer	= 0xFFFFFFFFFFFFFFFFull;
857 	dma_attr->dma_attr_seg		= 0xFFFFFFFFFFFFFFFFull;
858 	dma_attr->dma_attr_sgllen	= 0x7FFFFFFF;
859 	dma_attr->dma_attr_granular	= 1;
860 	dma_attr->dma_attr_flags	= 0;
861 }
862 
863 /*
864  * hermon_dma_alloc()
865  *    Context: Can be called from base context.
866  */
867 int
868 hermon_dma_alloc(hermon_state_t *state, hermon_dma_info_t *dma_info,
869     uint16_t opcode)
870 {
871 	ddi_dma_handle_t	dma_hdl;
872 	ddi_dma_attr_t		dma_attr;
873 	ddi_acc_handle_t	acc_hdl;
874 	ddi_dma_cookie_t	cookie;
875 	uint64_t		kaddr;
876 	uint64_t		real_len;
877 	uint_t			ccount;
878 	int			status;
879 
880 	hermon_dma_attr_init(state, &dma_attr);
881 
882 	/* Allocate a DMA handle */
883 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, DDI_DMA_SLEEP,
884 	    NULL, &dma_hdl);
885 	if (status != DDI_SUCCESS) {
886 		IBTF_DPRINTF_L2("DMA", "alloc handle failed: %d", status);
887 		cmn_err(CE_CONT, "DMA alloc handle failed(status %d)", status);
888 		return (DDI_FAILURE);
889 	}
890 
891 	/* Allocate DMA memory */
892 	status = ddi_dma_mem_alloc(dma_hdl, dma_info->length,
893 	    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
894 	    (caddr_t *)&kaddr, (size_t *)&real_len, &acc_hdl);
895 	if (status != DDI_SUCCESS) {
896 		ddi_dma_free_handle(&dma_hdl);
897 		IBTF_DPRINTF_L2("DMA", "memory alloc failed: %d", status);
898 		cmn_err(CE_CONT, "DMA memory alloc failed(status %d)", status);
899 		return (DDI_FAILURE);
900 	}
901 	bzero((caddr_t)(uintptr_t)kaddr, real_len);
902 
903 	/* Bind the memory to the handle */
904 	status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
905 	    (caddr_t)(uintptr_t)kaddr, (size_t)real_len, DDI_DMA_RDWR |
906 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &ccount);
907 	if (status != DDI_SUCCESS) {
908 		ddi_dma_mem_free(&acc_hdl);
909 		ddi_dma_free_handle(&dma_hdl);
910 		IBTF_DPRINTF_L2("DMA", "bind handle failed: %d", status);
911 		cmn_err(CE_CONT, "DMA bind handle failed(status %d)", status);
912 		return (DDI_FAILURE);
913 	}
914 
915 	/* Package the hermon_dma_info contents and return */
916 	dma_info->vaddr   = kaddr;
917 	dma_info->dma_hdl = dma_hdl;
918 	dma_info->acc_hdl = acc_hdl;
919 
920 	/* Pass the mapping information to the firmware */
921 	status = hermon_map_cmd_post(state, dma_info, opcode, cookie, ccount);
922 	if (status != DDI_SUCCESS) {
923 		char *s;
924 		hermon_dma_free(dma_info);
925 		switch (opcode) {
926 		case MAP_ICM:
927 			s = "MAP_ICM";
928 			break;
929 		case MAP_FA:
930 			s = "MAP_FA";
931 			break;
932 		case MAP_ICM_AUX:
933 			s = "MAP_ICM_AUX";
934 			break;
935 		default:
936 			s = "UNKNOWN";
937 		}
938 		cmn_err(CE_NOTE, "Map cmd '%s' failed, status %08x\n",
939 		    s, status);
940 		return (DDI_FAILURE);
941 	}
942 
943 	return (DDI_SUCCESS);
944 }
945 
946 /*
947  * hermon_dma_free()
948  *    Context: Can be called from base context.
949  */
950 void
951 hermon_dma_free(hermon_dma_info_t *info)
952 {
953 	/* Unbind the handles and free the memory */
954 	(void) ddi_dma_unbind_handle(info->dma_hdl);
955 	ddi_dma_mem_free(&info->acc_hdl);
956 	ddi_dma_free_handle(&info->dma_hdl);
957 }
958 
959 /* These macros are valid for use only in hermon_icm_alloc/hermon_icm_free. */
960 #define	HERMON_ICM_ALLOC(rsrc) \
961 	hermon_icm_alloc(state, rsrc, index1, index2)
962 #define	HERMON_ICM_FREE(rsrc) \
963 	hermon_icm_free(state, rsrc, index1, index2)
964 
965 /*
966  * hermon_icm_alloc()
967  *    Context: Can be called from base context.
968  *
969  * Only one thread can be here for a given hermon_rsrc_type_t "type".
970  */
971 int
972 hermon_icm_alloc(hermon_state_t *state, hermon_rsrc_type_t type,
973     uint32_t index1, uint32_t index2)
974 {
975 	hermon_icm_table_t	*icm;
976 	hermon_dma_info_t	*dma_info;
977 	uint8_t			*bitmap;
978 	int			status;
979 
980 	if (hermon_verbose) {
981 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: rsrc_type (0x%x) "
982 		    "index1/2 (0x%x/0x%x)", type, index1, index2);
983 	}
984 
985 	icm = &state->hs_icm[type];
986 
987 	switch (type) {
988 	case HERMON_QPC:
989 		status = HERMON_ICM_ALLOC(HERMON_CMPT_QPC);
990 		if (status != DDI_SUCCESS) {
991 			return (status);
992 		}
993 		status = HERMON_ICM_ALLOC(HERMON_RDB);
994 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
995 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
996 			return (status);
997 		}
998 		status = HERMON_ICM_ALLOC(HERMON_ALTC);
999 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1000 			HERMON_ICM_FREE(HERMON_RDB);
1001 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1002 			return (status);
1003 		}
1004 		status = HERMON_ICM_ALLOC(HERMON_AUXC);
1005 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1006 			HERMON_ICM_FREE(HERMON_ALTC);
1007 			HERMON_ICM_FREE(HERMON_RDB);
1008 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1009 			return (status);
1010 		}
1011 		break;
1012 	case HERMON_SRQC:
1013 		status = HERMON_ICM_ALLOC(HERMON_CMPT_SRQC);
1014 		if (status != DDI_SUCCESS) {
1015 			return (status);
1016 		}
1017 		break;
1018 	case HERMON_CQC:
1019 		status = HERMON_ICM_ALLOC(HERMON_CMPT_CQC);
1020 		if (status != DDI_SUCCESS) {
1021 			return (status);
1022 		}
1023 		break;
1024 	case HERMON_EQC:
1025 		status = HERMON_ICM_ALLOC(HERMON_CMPT_EQC);
1026 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1027 			return (status);
1028 		}
1029 		break;
1030 	}
1031 
1032 	/* ensure existence of bitmap and dmainfo, sets "dma_info" */
1033 	hermon_bitmap(bitmap, dma_info, icm, index1);
1034 
1035 	/* Set up the DMA handle for allocation and mapping */
1036 	dma_info = icm->icm_dma[index1] + index2;
1037 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_info))
1038 	dma_info->length  = icm->span << icm->log_object_size;
1039 	dma_info->icmaddr = icm->icm_baseaddr +
1040 	    (((index1 << icm->split_shift) +
1041 	    (index2 << icm->span_shift)) << icm->log_object_size);
1042 
1043 	if (hermon_verbose) {
1044 		IBTF_DPRINTF_L2("hermon", "alloc DMA: "
1045 		    "rsrc (0x%x) index (%x, %x) "
1046 		    "icm_addr/len (%llx/%x) bitmap %p", type, index1, index2,
1047 		    (longlong_t)dma_info->icmaddr, dma_info->length, bitmap);
1048 	}
1049 
1050 	/* Allocate and map memory for this span */
1051 	status = hermon_dma_alloc(state, dma_info, MAP_ICM);
1052 	if (status != DDI_SUCCESS) {
1053 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: DMA "
1054 		    "allocation failed, status 0x%x", status);
1055 		switch (type) {
1056 		case HERMON_QPC:
1057 			HERMON_ICM_FREE(HERMON_AUXC);
1058 			HERMON_ICM_FREE(HERMON_ALTC);
1059 			HERMON_ICM_FREE(HERMON_RDB);
1060 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1061 			break;
1062 		case HERMON_SRQC:
1063 			HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1064 			break;
1065 		case HERMON_CQC:
1066 			HERMON_ICM_FREE(HERMON_CMPT_CQC);
1067 			break;
1068 		case HERMON_EQC:
1069 			HERMON_ICM_FREE(HERMON_CMPT_EQC);
1070 			break;
1071 		}
1072 
1073 		return (DDI_FAILURE);
1074 	}
1075 	if (hermon_verbose) {
1076 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: mapping ICM: "
1077 		    "rsrc_type (0x%x) index (0x%x, 0x%x) alloc length (0x%x) "
1078 		    "icm_addr (0x%lx)", type, index1, index2, dma_info->length,
1079 		    dma_info->icmaddr);
1080 	}
1081 
1082 	/* Set the bit for this slot in the table bitmap */
1083 	HERMON_BMAP_BIT_SET(icm->icm_bitmap[index1], index2);
1084 
1085 	return (DDI_SUCCESS);
1086 }
1087 
1088 /*
1089  * hermon_icm_free()
1090  *    Context: Can be called from base context.
1091  *
1092  * ICM resources have been successfully returned from hermon_icm_alloc().
1093  * Associated dma_info is no longer in use.  Free the ICM backing memory.
1094  */
1095 void
1096 hermon_icm_free(hermon_state_t *state, hermon_rsrc_type_t type,
1097     uint32_t index1, uint32_t index2)
1098 {
1099 	hermon_icm_table_t	*icm;
1100 	hermon_dma_info_t	*dma_info;
1101 	int			status;
1102 
1103 	icm = &state->hs_icm[type];
1104 	ASSERT(icm->icm_dma[index1][index2].icm_refcnt == 0);
1105 
1106 	if (hermon_verbose) {
1107 		IBTF_DPRINTF_L2("hermon", "hermon_icm_free: rsrc_type (0x%x) "
1108 		    "index (0x%x, 0x%x)", type, index1, index2);
1109 	}
1110 
1111 	dma_info = icm->icm_dma[index1] + index2;
1112 
1113 	/* The following only happens if attach() is failing. */
1114 	if (dma_info == NULL)
1115 		return;
1116 
1117 	/* Unmap the ICM allocation, then free the backing DMA memory */
1118 	status = hermon_unmap_icm_cmd_post(state, dma_info);
1119 	if (status != DDI_SUCCESS) {
1120 		HERMON_WARNING(state, "UNMAP_ICM failure");
1121 	}
1122 	hermon_dma_free(dma_info);
1123 
1124 	/* Clear the bit in the ICM table bitmap */
1125 	HERMON_BMAP_BIT_CLR(icm->icm_bitmap[index1], index2);
1126 
1127 	switch (type) {
1128 	case HERMON_QPC:
1129 		HERMON_ICM_FREE(HERMON_AUXC);
1130 		HERMON_ICM_FREE(HERMON_ALTC);
1131 		HERMON_ICM_FREE(HERMON_RDB);
1132 		HERMON_ICM_FREE(HERMON_CMPT_QPC);
1133 		break;
1134 	case HERMON_SRQC:
1135 		HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1136 		break;
1137 	case HERMON_CQC:
1138 		HERMON_ICM_FREE(HERMON_CMPT_CQC);
1139 		break;
1140 	case HERMON_EQC:
1141 		HERMON_ICM_FREE(HERMON_CMPT_EQC);
1142 		break;
1143 
1144 	}
1145 }
1146 
1147 /*
1148  * hermon_drv_init()
1149  *    Context: Only called from attach() path context
1150  */
1151 /* ARGSUSED */
1152 static int
1153 hermon_drv_init(hermon_state_t *state, dev_info_t *dip, int instance)
1154 {
1155 	int	status;
1156 
1157 	/*
1158 	 * Check and set the operational mode of the device. If the driver is
1159 	 * bound to the Hermon device in "maintenance mode", then this generally
1160 	 * means that either the device has been specifically jumpered to
1161 	 * start in this mode or the firmware boot process has failed to
1162 	 * successfully load either the primary or the secondary firmware
1163 	 * image.
1164 	 */
1165 	if (HERMON_IS_HCA_MODE(state->hs_dip)) {
1166 		state->hs_operational_mode = HERMON_HCA_MODE;
1167 		state->hs_cfg_profile_setting = HERMON_CFG_MEMFREE;
1168 
1169 	} else if (HERMON_IS_MAINTENANCE_MODE(state->hs_dip)) {
1170 		HERMON_FMANOTE(state, HERMON_FMA_MAINT);
1171 		state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1172 		return (DDI_FAILURE);
1173 
1174 	} else {
1175 		state->hs_operational_mode = 0;	/* invalid operational mode */
1176 		HERMON_FMANOTE(state, HERMON_FMA_PCIID);
1177 		HERMON_WARNING(state, "unexpected device type detected");
1178 		return (DDI_FAILURE);
1179 	}
1180 
1181 	/*
1182 	 * Initialize the Hermon hardware.
1183 	 *
1184 	 * Note:  If this routine returns an error, it is often a reasonably
1185 	 * good indication that something Hermon firmware-related has caused
1186 	 * the failure or some HW related errors have caused the failure.
1187 	 * (also there are few possibilities that SW (e.g. SW resource
1188 	 * shortage) can cause the failure, but the majority case is due to
1189 	 * either a firmware related error or a HW related one) In order to
1190 	 * give the user an opportunity (if desired) to update or reflash
1191 	 * the Hermon firmware image, we set "hs_operational_mode" flag
1192 	 * (described above) to indicate that we wish to enter maintenance
1193 	 * mode in case of the firmware-related issue.
1194 	 */
1195 	status = hermon_hw_init(state);
1196 	if (status != DDI_SUCCESS) {
1197 		cmn_err(CE_NOTE, "hermon%d: error during attach: %s", instance,
1198 		    state->hs_attach_buf);
1199 		return (DDI_FAILURE);
1200 	}
1201 
1202 	/*
1203 	 * Now that the ISR has been setup, arm all the EQs for event
1204 	 * generation.
1205 	 */
1206 
1207 	status = hermon_eq_arm_all(state);
1208 	if (status != DDI_SUCCESS) {
1209 		cmn_err(CE_NOTE, "EQ Arm All failed\n");
1210 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1211 		return (DDI_FAILURE);
1212 	}
1213 
1214 	/* test interrupts and event queues */
1215 	status = hermon_nop_post(state, 0x0, 0x0);
1216 	if (status != DDI_SUCCESS) {
1217 		cmn_err(CE_NOTE, "Interrupts/EQs failed\n");
1218 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1219 		return (DDI_FAILURE);
1220 	}
1221 
1222 	/* Initialize Hermon softstate */
1223 	status = hermon_soft_state_init(state);
1224 	if (status != DDI_SUCCESS) {
1225 		cmn_err(CE_NOTE, "Failed to init soft state\n");
1226 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1227 		return (DDI_FAILURE);
1228 	}
1229 
1230 	return (DDI_SUCCESS);
1231 }
1232 
1233 
1234 /*
1235  * hermon_drv_fini()
1236  *    Context: Only called from attach() and/or detach() path contexts
1237  */
1238 static void
1239 hermon_drv_fini(hermon_state_t *state)
1240 {
1241 	/* Cleanup Hermon softstate */
1242 	hermon_soft_state_fini(state);
1243 
1244 	/* Cleanup Hermon resources and shutdown hardware */
1245 	hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1246 }
1247 
1248 
1249 /*
1250  * hermon_drv_fini2()
1251  *    Context: Only called from attach() and/or detach() path contexts
1252  */
1253 static void
1254 hermon_drv_fini2(hermon_state_t *state)
1255 {
1256 	if (state->hs_fm_poll_thread) {
1257 		ddi_periodic_delete(state->hs_fm_poll_thread);
1258 		state->hs_fm_poll_thread = NULL;
1259 	}
1260 
1261 	/* HERMON_DRV_CLEANUP_LEVEL1 */
1262 	if (state->hs_fm_cmdhdl) {
1263 		hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
1264 		state->hs_fm_cmdhdl = NULL;
1265 	}
1266 
1267 	if (state->hs_reg_cmdhdl) {
1268 		ddi_regs_map_free(&state->hs_reg_cmdhdl);
1269 		state->hs_reg_cmdhdl = NULL;
1270 	}
1271 
1272 	/* HERMON_DRV_CLEANUP_LEVEL0 */
1273 	if (state->hs_msix_tbl_entries) {
1274 		kmem_free(state->hs_msix_tbl_entries,
1275 		    state->hs_msix_tbl_size);
1276 		state->hs_msix_tbl_entries = NULL;
1277 	}
1278 
1279 	if (state->hs_msix_pba_entries) {
1280 		kmem_free(state->hs_msix_pba_entries,
1281 		    state->hs_msix_pba_size);
1282 		state->hs_msix_pba_entries = NULL;
1283 	}
1284 
1285 	if (state->hs_fm_msix_tblhdl) {
1286 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
1287 		state->hs_fm_msix_tblhdl = NULL;
1288 	}
1289 
1290 	if (state->hs_reg_msix_tblhdl) {
1291 		ddi_regs_map_free(&state->hs_reg_msix_tblhdl);
1292 		state->hs_reg_msix_tblhdl = NULL;
1293 	}
1294 
1295 	if (state->hs_fm_msix_pbahdl) {
1296 		hermon_regs_map_free(state, &state->hs_fm_msix_pbahdl);
1297 		state->hs_fm_msix_pbahdl = NULL;
1298 	}
1299 
1300 	if (state->hs_reg_msix_pbahdl) {
1301 		ddi_regs_map_free(&state->hs_reg_msix_pbahdl);
1302 		state->hs_reg_msix_pbahdl = NULL;
1303 	}
1304 
1305 	if (state->hs_fm_pcihdl) {
1306 		hermon_pci_config_teardown(state, &state->hs_fm_pcihdl);
1307 		state->hs_fm_pcihdl = NULL;
1308 	}
1309 
1310 	if (state->hs_reg_pcihdl) {
1311 		pci_config_teardown(&state->hs_reg_pcihdl);
1312 		state->hs_reg_pcihdl = NULL;
1313 	}
1314 }
1315 
1316 
1317 /*
1318  * hermon_isr_init()
1319  *    Context: Only called from attach() path context
1320  */
1321 static int
1322 hermon_isr_init(hermon_state_t *state)
1323 {
1324 	int	status;
1325 	int	intr;
1326 
1327 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1328 
1329 		/*
1330 		 * Add a handler for the interrupt or MSI
1331 		 */
1332 		status = ddi_intr_add_handler(state->hs_intrmsi_hdl[intr],
1333 		    hermon_isr, (caddr_t)state, (void *)(uintptr_t)intr);
1334 		if (status  != DDI_SUCCESS) {
1335 			return (DDI_FAILURE);
1336 		}
1337 
1338 		/*
1339 		 * Enable the software interrupt.  Note: depending on the value
1340 		 * returned in the capability flag, we have to call either
1341 		 * ddi_intr_block_enable() or ddi_intr_enable().
1342 		 */
1343 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1344 			status = ddi_intr_block_enable(
1345 			    &state->hs_intrmsi_hdl[intr], 1);
1346 			if (status != DDI_SUCCESS) {
1347 				return (DDI_FAILURE);
1348 			}
1349 		} else {
1350 			status = ddi_intr_enable(state->hs_intrmsi_hdl[intr]);
1351 			if (status != DDI_SUCCESS) {
1352 				return (DDI_FAILURE);
1353 			}
1354 		}
1355 	}
1356 
1357 	/*
1358 	 * Now that the ISR has been enabled, defer arm_all  EQs for event
1359 	 * generation until later, in case MSIX is enabled
1360 	 */
1361 	return (DDI_SUCCESS);
1362 }
1363 
1364 
1365 /*
1366  * hermon_isr_fini()
1367  *    Context: Only called from attach() and/or detach() path contexts
1368  */
1369 static void
1370 hermon_isr_fini(hermon_state_t *state)
1371 {
1372 	int	intr;
1373 
1374 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1375 		/* Disable the software interrupt */
1376 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1377 			(void) ddi_intr_block_disable(
1378 			    &state->hs_intrmsi_hdl[intr], 1);
1379 		} else {
1380 			(void) ddi_intr_disable(state->hs_intrmsi_hdl[intr]);
1381 		}
1382 
1383 		/*
1384 		 * Remove the software handler for the interrupt or MSI
1385 		 */
1386 		(void) ddi_intr_remove_handler(state->hs_intrmsi_hdl[intr]);
1387 	}
1388 }
1389 
1390 
1391 /*
1392  * Sum of ICM configured values:
1393  *     cMPT, dMPT, MTT, QPC, SRQC, RDB, CQC, ALTC, AUXC, EQC, MCG
1394  *
1395  */
1396 static uint64_t
1397 hermon_size_icm(hermon_state_t *state)
1398 {
1399 	hermon_hw_querydevlim_t	*devlim;
1400 	hermon_cfg_profile_t	*cfg;
1401 	uint64_t		num_cmpts, num_dmpts, num_mtts;
1402 	uint64_t		num_qpcs, num_srqc, num_rdbs;
1403 #ifndef HERMON_FW_WORKAROUND
1404 	uint64_t		num_auxc;
1405 #endif
1406 	uint64_t		num_cqcs, num_altc;
1407 	uint64_t		num_eqcs, num_mcgs;
1408 	uint64_t		size;
1409 
1410 	devlim = &state->hs_devlim;
1411 	cfg = state->hs_cfg_profile;
1412 	/* number of respective entries */
1413 	num_cmpts = (uint64_t)0x1 << cfg->cp_log_num_cmpt;
1414 	num_mtts = (uint64_t)0x1 << cfg->cp_log_num_mtt;
1415 	num_dmpts = (uint64_t)0x1 << cfg->cp_log_num_dmpt;
1416 	num_qpcs = (uint64_t)0x1 << cfg->cp_log_num_qp;
1417 	num_srqc = (uint64_t)0x1 << cfg->cp_log_num_srq;
1418 	num_rdbs = (uint64_t)0x1 << cfg->cp_log_num_rdb;
1419 	num_cqcs = (uint64_t)0x1 << cfg->cp_log_num_cq;
1420 	num_altc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1421 #ifndef HERMON_FW_WORKAROUND
1422 	num_auxc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1423 #endif
1424 	num_eqcs = (uint64_t)0x1 << cfg->cp_log_num_eq;
1425 	num_mcgs = (uint64_t)0x1 << cfg->cp_log_num_mcg;
1426 
1427 	size =
1428 	    num_cmpts 	* devlim->cmpt_entry_sz +
1429 	    num_dmpts	* devlim->dmpt_entry_sz +
1430 	    num_mtts	* devlim->mtt_entry_sz +
1431 	    num_qpcs	* devlim->qpc_entry_sz +
1432 	    num_srqc	* devlim->srq_entry_sz +
1433 	    num_rdbs	* devlim->rdmardc_entry_sz +
1434 	    num_cqcs	* devlim->cqc_entry_sz +
1435 	    num_altc	* devlim->altc_entry_sz +
1436 #ifdef HERMON_FW_WORKAROUND
1437 	    0x80000000ull +
1438 #else
1439 	    num_auxc	* devlim->aux_entry_sz	+
1440 #endif
1441 	    num_eqcs	* devlim->eqc_entry_sz +
1442 	    num_mcgs	* HERMON_MCGMEM_SZ(state);
1443 	return (size);
1444 }
1445 
1446 
1447 /*
1448  * hermon_hw_init()
1449  *    Context: Only called from attach() path context
1450  */
1451 static int
1452 hermon_hw_init(hermon_state_t *state)
1453 {
1454 	hermon_drv_cleanup_level_t	cleanup;
1455 	sm_nodeinfo_t			nodeinfo;
1456 	uint64_t			clr_intr_offset;
1457 	int				status;
1458 	uint32_t			fw_size;	/* in page */
1459 	uint64_t			offset;
1460 
1461 	/* This is where driver initialization begins */
1462 	cleanup = HERMON_DRV_CLEANUP_LEVEL0;
1463 
1464 	/* Setup device access attributes */
1465 	state->hs_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
1466 	state->hs_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1467 	state->hs_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1468 	state->hs_reg_accattr.devacc_attr_access = DDI_DEFAULT_ACC;
1469 
1470 	/* Setup fma-protected access attributes */
1471 	state->hs_fm_accattr.devacc_attr_version =
1472 	    hermon_devacc_attr_version(state);
1473 	state->hs_fm_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1474 	state->hs_fm_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1475 	/* set acc err protection type */
1476 	state->hs_fm_accattr.devacc_attr_access =
1477 	    hermon_devacc_attr_access(state);
1478 
1479 	/* Setup for PCI config read/write of HCA device */
1480 	status = hermon_pci_config_setup(state, &state->hs_fm_pcihdl);
1481 	if (status != DDI_SUCCESS) {
1482 		hermon_hw_fini(state, cleanup);
1483 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1484 		    "hw_init_PCI_config_space_regmap_fail");
1485 		/* This case is not the degraded one */
1486 		return (DDI_FAILURE);
1487 	}
1488 
1489 	/* Map PCI config space and MSI-X tables/pba */
1490 	hermon_set_msix_info(state);
1491 
1492 	/* Map in Hermon registers (CMD, UAR, MSIX) and setup offsets */
1493 	status = hermon_regs_map_setup(state, HERMON_CMD_BAR,
1494 	    &state->hs_reg_cmd_baseaddr, 0, 0, &state->hs_fm_accattr,
1495 	    &state->hs_fm_cmdhdl);
1496 	if (status != DDI_SUCCESS) {
1497 		hermon_hw_fini(state, cleanup);
1498 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1499 		    "hw_init_CMD_BAR_regmap_fail");
1500 		/* This case is not the degraded one */
1501 		return (DDI_FAILURE);
1502 	}
1503 
1504 	cleanup = HERMON_DRV_CLEANUP_LEVEL1;
1505 	/*
1506 	 * We defer UAR-BAR mapping until later.  Need to know if
1507 	 * blueflame mapping is to be done, and don't know that until after
1508 	 * we get the dev_caps, so do it right after that
1509 	 */
1510 
1511 	/*
1512 	 * There is a third BAR defined for Hermon - it is for MSIX
1513 	 *
1514 	 * Will need to explore it's possible need/use w/ Mellanox
1515 	 * [es] Temporary mapping maybe
1516 	 */
1517 
1518 #ifdef HERMON_SUPPORTS_MSIX_BAR
1519 	status = ddi_regs_map_setup(state->hs_dip, HERMON_MSIX_BAR,
1520 	    &state->hs_reg_msi_baseaddr, 0, 0, &state->hs_reg_accattr,
1521 	    &state->hs_reg_msihdl);
1522 	if (status != DDI_SUCCESS) {
1523 		hermon_hw_fini(state, cleanup);
1524 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1525 		    "hw_init_MSIX_BAR_regmap_fail");
1526 		/* This case is not the degraded one */
1527 		return (DDI_FAILURE);
1528 	}
1529 #endif
1530 
1531 	cleanup = HERMON_DRV_CLEANUP_LEVEL2;
1532 
1533 	/*
1534 	 * Save interesting registers away. The offsets of the first two
1535 	 * here (HCR and sw_reset) are detailed in the PRM, the others are
1536 	 * derived from values in the QUERY_FW output, so we'll save them
1537 	 * off later.
1538 	 */
1539 	/* Host Command Register (HCR) */
1540 	state->hs_cmd_regs.hcr = (hermon_hw_hcr_t *)
1541 	    ((uintptr_t)state->hs_reg_cmd_baseaddr + HERMON_CMD_HCR_OFFSET);
1542 	state->hs_cmd_toggle = 0;	/* initialize it for use */
1543 
1544 	/* Software Reset register (sw_reset) and semaphore */
1545 	state->hs_cmd_regs.sw_reset = (uint32_t *)
1546 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1547 	    HERMON_CMD_SW_RESET_OFFSET);
1548 	state->hs_cmd_regs.sw_semaphore = (uint32_t *)
1549 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1550 	    HERMON_CMD_SW_SEMAPHORE_OFFSET);
1551 
1552 	/* Retrieve PCI device, vendor and rev IDs */
1553 	state->hs_vendor_id	 = HERMON_GET_VENDOR_ID(state->hs_dip);
1554 	state->hs_device_id	 = HERMON_GET_DEVICE_ID(state->hs_dip);
1555 	state->hs_revision_id	 = HERMON_GET_REVISION_ID(state->hs_dip);
1556 
1557 	/* make sure init'd before we start filling things in */
1558 	bzero(&state->hs_hcaparams, sizeof (struct hermon_hw_initqueryhca_s));
1559 
1560 	/* Initialize the Phase1 configuration profile */
1561 	status = hermon_cfg_profile_init_phase1(state);
1562 	if (status != DDI_SUCCESS) {
1563 		hermon_hw_fini(state, cleanup);
1564 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1565 		    "hw_init_cfginit1_fail");
1566 		/* This case is not the degraded one */
1567 		return (DDI_FAILURE);
1568 	}
1569 	cleanup = HERMON_DRV_CLEANUP_LEVEL3;
1570 
1571 	/* Do a software reset of the adapter to ensure proper state */
1572 	status = hermon_sw_reset(state);
1573 	if (status != HERMON_CMD_SUCCESS) {
1574 		hermon_hw_fini(state, cleanup);
1575 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1576 		    "hw_init_sw_reset_fail");
1577 		/* This case is not the degraded one */
1578 		return (DDI_FAILURE);
1579 	}
1580 
1581 	/* Initialize mailboxes */
1582 	status = hermon_rsrc_init_phase1(state);
1583 	if (status != DDI_SUCCESS) {
1584 		hermon_hw_fini(state, cleanup);
1585 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1586 		    "hw_init_rsrcinit1_fail");
1587 		/* This case is not the degraded one */
1588 		return (DDI_FAILURE);
1589 	}
1590 	cleanup = HERMON_DRV_CLEANUP_LEVEL4;
1591 
1592 	/* Post QUERY_FW */
1593 	status = hermon_cmn_query_cmd_post(state, QUERY_FW, 0, 0, &state->hs_fw,
1594 	    sizeof (hermon_hw_queryfw_t), HERMON_CMD_NOSLEEP_SPIN);
1595 	if (status != HERMON_CMD_SUCCESS) {
1596 		cmn_err(CE_NOTE, "QUERY_FW command failed: %08x\n", status);
1597 		hermon_hw_fini(state, cleanup);
1598 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1599 		    "hw_init_query_fw_cmd_fail");
1600 		/* This case is not the degraded one */
1601 		return (DDI_FAILURE);
1602 	}
1603 
1604 	/* Validate what/that HERMON FW version is appropriate */
1605 
1606 	status = hermon_fw_version_check(state);
1607 	if (status != DDI_SUCCESS) {
1608 		HERMON_FMANOTE(state, HERMON_FMA_FWVER);
1609 		if (state->hs_operational_mode == HERMON_HCA_MODE) {
1610 			cmn_err(CE_CONT, "Unsupported Hermon FW version: "
1611 			    "expected: %04d.%04d.%04d, "
1612 			    "actual: %04d.%04d.%04d\n",
1613 			    HERMON_FW_VER_MAJOR,
1614 			    HERMON_FW_VER_MINOR,
1615 			    HERMON_FW_VER_SUBMINOR,
1616 			    state->hs_fw.fw_rev_major,
1617 			    state->hs_fw.fw_rev_minor,
1618 			    state->hs_fw.fw_rev_subminor);
1619 		} else {
1620 			cmn_err(CE_CONT, "Unsupported FW version: "
1621 			    "%04d.%04d.%04d\n",
1622 			    state->hs_fw.fw_rev_major,
1623 			    state->hs_fw.fw_rev_minor,
1624 			    state->hs_fw.fw_rev_subminor);
1625 		}
1626 		state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1627 		hermon_hw_fini(state, cleanup);
1628 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1629 		    "hw_init_checkfwver_fail");
1630 		/* This case is the degraded one */
1631 		return (HERMON_CMD_BAD_NVMEM);
1632 	}
1633 
1634 	/*
1635 	 * Save off the rest of the interesting registers that we'll be using.
1636 	 * Setup the offsets for the other registers.
1637 	 */
1638 
1639 	/*
1640 	 * Hermon does the intr_offset from the BAR - technically should get the
1641 	 * BAR info from the response, but PRM says it's from BAR0-1, which is
1642 	 * for us the CMD BAR
1643 	 */
1644 
1645 	clr_intr_offset	 = state->hs_fw.clr_intr_offs & HERMON_CMD_OFFSET_MASK;
1646 
1647 	/* Save Clear Interrupt address */
1648 	state->hs_cmd_regs.clr_intr = (uint64_t *)
1649 	    (uintptr_t)(state->hs_reg_cmd_baseaddr + clr_intr_offset);
1650 
1651 	/*
1652 	 * Set the error buffer also into the structure - used in hermon_event.c
1653 	 * to check for internal error on the HCA, not reported in eqe or
1654 	 * (necessarily) by interrupt
1655 	 */
1656 	state->hs_cmd_regs.fw_err_buf = (uint32_t *)(uintptr_t)
1657 	    (state->hs_reg_cmd_baseaddr + state->hs_fw.error_buf_addr);
1658 
1659 	/*
1660 	 * Invoke a polling thread to check the error buffer periodically.
1661 	 */
1662 	state->hs_fm_poll_thread = ddi_periodic_add(hermon_inter_err_chk,
1663 	    (void *)state, FM_POLL_INTERVAL, DDI_IPL_0);
1664 
1665 	cleanup = HERMON_DRV_CLEANUP_LEVEL5;
1666 
1667 	/*
1668 	 * Allocate, map, and run the HCA Firmware.
1669 	 */
1670 
1671 	/* Allocate memory for the firmware to load into and map it */
1672 
1673 	/* get next higher power of 2 */
1674 	fw_size = 1 << highbit(state->hs_fw.fw_pages);
1675 	state->hs_fw_dma.length = fw_size << HERMON_PAGESHIFT;
1676 	status = hermon_dma_alloc(state, &state->hs_fw_dma, MAP_FA);
1677 	if (status != DDI_SUCCESS) {
1678 		cmn_err(CE_NOTE, "FW alloc failed\n");
1679 		hermon_hw_fini(state, cleanup);
1680 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1681 		    "hw_init_dma_alloc_fw_fail");
1682 		/* This case is not the degraded one */
1683 		return (DDI_FAILURE);
1684 	}
1685 
1686 	cleanup = HERMON_DRV_CLEANUP_LEVEL6;
1687 
1688 	/* Invoke the RUN_FW cmd to run the firmware */
1689 	status = hermon_run_fw_cmd_post(state);
1690 	if (status != DDI_SUCCESS) {
1691 		cmn_err(CE_NOTE, "RUN_FW command failed: 0x%08x\n", status);
1692 		if (status == HERMON_CMD_BAD_NVMEM) {
1693 			state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1694 		}
1695 		hermon_hw_fini(state, cleanup);
1696 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_run_fw_fail");
1697 		/*
1698 		 * If the status is HERMON_CMD_BAD_NVMEM, it's likely the
1699 		 * firmware is corrupted, so the mode falls into the
1700 		 * maintenance mode.
1701 		 */
1702 		return (status == HERMON_CMD_BAD_NVMEM ? HERMON_CMD_BAD_NVMEM :
1703 		    DDI_FAILURE);
1704 	}
1705 
1706 
1707 	/*
1708 	 * QUERY DEVICE LIMITS/CAPABILITIES
1709 	 * NOTE - in Hermon, the command is changed to QUERY_DEV_CAP,
1710 	 * but for familiarity we have kept the structure name the
1711 	 * same as Tavor/Arbel
1712 	 */
1713 
1714 	status = hermon_cmn_query_cmd_post(state, QUERY_DEV_CAP, 0, 0,
1715 	    &state->hs_devlim, sizeof (hermon_hw_querydevlim_t),
1716 	    HERMON_CMD_NOSLEEP_SPIN);
1717 	if (status != HERMON_CMD_SUCCESS) {
1718 		cmn_err(CE_NOTE, "QUERY_DEV_CAP command failed: 0x%08x\n",
1719 		    status);
1720 		hermon_hw_fini(state, cleanup);
1721 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_devcap_fail");
1722 		/* This case is not the degraded one */
1723 		return (DDI_FAILURE);
1724 	}
1725 
1726 	state->hs_devlim.num_rsvd_eq = max(state->hs_devlim.num_rsvd_eq,
1727 	    (4 * state->hs_devlim.num_rsvd_uar));	/* lesser of resvd's */
1728 
1729 	/* now we have enough info to map in the UAR BAR */
1730 	/*
1731 	 * First, we figure out how to map the BAR for UAR - use only half if
1732 	 * BlueFlame is enabled - in that case the mapped length is 1/2 the
1733 	 * log_max_uar_sz (max__uar - 1) * 1MB ( +20).
1734 	 */
1735 
1736 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1737 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1738 	} else {
1739 		offset = 0;	/* a zero length means map the whole thing */
1740 	}
1741 	status = hermon_regs_map_setup(state, HERMON_UAR_BAR,
1742 	    &state->hs_reg_uar_baseaddr, 0, offset, &state->hs_fm_accattr,
1743 	    &state->hs_fm_uarhdl);
1744 	if (status != DDI_SUCCESS) {
1745 		HERMON_ATTACH_MSG(state->hs_attach_buf, "UAR BAR mapping");
1746 		/* This case is not the degraded one */
1747 		return (DDI_FAILURE);
1748 	}
1749 
1750 	/* and if BlueFlame is enabled, map the other half there */
1751 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1752 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1753 		status = ddi_regs_map_setup(state->hs_dip, HERMON_UAR_BAR,
1754 		    &state->hs_reg_bf_baseaddr, offset, offset,
1755 		    &state->hs_reg_accattr, &state->hs_reg_bfhdl);
1756 		if (status != DDI_SUCCESS) {
1757 			HERMON_ATTACH_MSG(state->hs_attach_buf,
1758 			    "BlueFlame BAR mapping");
1759 			/* This case is not the degraded one */
1760 			return (DDI_FAILURE);
1761 		}
1762 		/* This will be used in hw_fini if we fail to init. */
1763 		state->hs_bf_offset = offset;
1764 	}
1765 	cleanup = HERMON_DRV_CLEANUP_LEVEL7;
1766 
1767 	/* Hermon has a couple of things needed for phase 2 in query port */
1768 
1769 	status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0, 0x01,
1770 	    &state->hs_queryport, sizeof (hermon_hw_query_port_t),
1771 	    HERMON_CMD_NOSLEEP_SPIN);
1772 	if (status != HERMON_CMD_SUCCESS) {
1773 		cmn_err(CE_NOTE, "QUERY_PORT command failed: 0x%08x\n",
1774 		    status);
1775 		hermon_hw_fini(state, cleanup);
1776 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1777 		    "hw_init_queryport_fail");
1778 		/* This case is not the degraded one */
1779 		return (DDI_FAILURE);
1780 	}
1781 
1782 	/* Initialize the Phase2 Hermon configuration profile */
1783 	status = hermon_cfg_profile_init_phase2(state);
1784 	if (status != DDI_SUCCESS) {
1785 		cmn_err(CE_NOTE, "CFG phase 2 failed: 0x%08x\n", status);
1786 		hermon_hw_fini(state, cleanup);
1787 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1788 		    "hw_init_cfginit2_fail");
1789 		/* This case is not the degraded one */
1790 		return (DDI_FAILURE);
1791 	}
1792 
1793 	/* Determine and set the ICM size */
1794 	state->hs_icm_sz = hermon_size_icm(state);
1795 	status		 = hermon_set_icm_size_cmd_post(state);
1796 	if (status != DDI_SUCCESS) {
1797 		cmn_err(CE_NOTE, "Hermon: SET_ICM_SIZE cmd failed: 0x%08x\n",
1798 		    status);
1799 		hermon_hw_fini(state, cleanup);
1800 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1801 		    "hw_init_seticmsz_fail");
1802 		/* This case is not the degraded one */
1803 		return (DDI_FAILURE);
1804 	}
1805 	/* alloc icm aux physical memory and map it */
1806 
1807 	state->hs_icma_dma.length = 1 << highbit(state->hs_icma_sz);
1808 
1809 	status = hermon_dma_alloc(state, &state->hs_icma_dma, MAP_ICM_AUX);
1810 	if (status != DDI_SUCCESS) {
1811 		cmn_err(CE_NOTE, "failed to alloc (0x%llx) bytes for ICMA\n",
1812 		    (longlong_t)state->hs_icma_dma.length);
1813 		hermon_hw_fini(state, cleanup);
1814 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1815 		    "hw_init_dma_alloc_icm_aux_fail");
1816 		/* This case is not the degraded one */
1817 		return (DDI_FAILURE);
1818 	}
1819 	cleanup = HERMON_DRV_CLEANUP_LEVEL8;
1820 
1821 	cleanup = HERMON_DRV_CLEANUP_LEVEL9;
1822 
1823 	/* Allocate an array of structures to house the ICM tables */
1824 	state->hs_icm = kmem_zalloc(HERMON_NUM_ICM_RESOURCES *
1825 	    sizeof (hermon_icm_table_t), KM_SLEEP);
1826 
1827 	/* Set up the ICM address space and the INIT_HCA command input */
1828 	status = hermon_icm_config_setup(state, &state->hs_hcaparams);
1829 	if (status != HERMON_CMD_SUCCESS) {
1830 		cmn_err(CE_NOTE, "ICM configuration failed\n");
1831 		hermon_hw_fini(state, cleanup);
1832 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1833 		    "hw_init_icm_config_setup_fail");
1834 		/* This case is not the degraded one */
1835 		return (DDI_FAILURE);
1836 	}
1837 	cleanup = HERMON_DRV_CLEANUP_LEVEL10;
1838 
1839 	/* Initialize the adapter with the INIT_HCA cmd */
1840 	status = hermon_init_hca_cmd_post(state, &state->hs_hcaparams,
1841 	    HERMON_CMD_NOSLEEP_SPIN);
1842 	if (status != HERMON_CMD_SUCCESS) {
1843 		cmn_err(CE_NOTE, "INIT_HCA command failed: %08x\n", status);
1844 		hermon_hw_fini(state, cleanup);
1845 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_hca_fail");
1846 		/* This case is not the degraded one */
1847 		return (DDI_FAILURE);
1848 	}
1849 	cleanup = HERMON_DRV_CLEANUP_LEVEL11;
1850 
1851 	/* Enter the second phase of init for Hermon configuration/resources */
1852 	status = hermon_rsrc_init_phase2(state);
1853 	if (status != DDI_SUCCESS) {
1854 		hermon_hw_fini(state, cleanup);
1855 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1856 		    "hw_init_rsrcinit2_fail");
1857 		/* This case is not the degraded one */
1858 		return (DDI_FAILURE);
1859 	}
1860 	cleanup = HERMON_DRV_CLEANUP_LEVEL12;
1861 
1862 	/* Query the adapter via QUERY_ADAPTER */
1863 	status = hermon_cmn_query_cmd_post(state, QUERY_ADAPTER, 0, 0,
1864 	    &state->hs_adapter, sizeof (hermon_hw_queryadapter_t),
1865 	    HERMON_CMD_NOSLEEP_SPIN);
1866 	if (status != HERMON_CMD_SUCCESS) {
1867 		cmn_err(CE_NOTE, "Hermon: QUERY_ADAPTER command failed: %08x\n",
1868 		    status);
1869 		hermon_hw_fini(state, cleanup);
1870 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1871 		    "hw_init_query_adapter_fail");
1872 		/* This case is not the degraded one */
1873 		return (DDI_FAILURE);
1874 	}
1875 
1876 	/* Allocate protection domain (PD) for Hermon internal use */
1877 	status = hermon_pd_alloc(state, &state->hs_pdhdl_internal,
1878 	    HERMON_SLEEP);
1879 	if (status != DDI_SUCCESS) {
1880 		cmn_err(CE_NOTE, "failed to alloc internal PD\n");
1881 		hermon_hw_fini(state, cleanup);
1882 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1883 		    "hw_init_internal_pd_alloc_fail");
1884 		/* This case is not the degraded one */
1885 		return (DDI_FAILURE);
1886 	}
1887 	cleanup = HERMON_DRV_CLEANUP_LEVEL13;
1888 
1889 	/* Setup UAR page for kernel use */
1890 	status = hermon_internal_uarpg_init(state);
1891 	if (status != DDI_SUCCESS) {
1892 		cmn_err(CE_NOTE, "failed to setup internal UAR\n");
1893 		hermon_hw_fini(state, cleanup);
1894 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1895 		    "hw_init_internal_uarpg_alloc_fail");
1896 		/* This case is not the degraded one */
1897 		return (DDI_FAILURE);
1898 	}
1899 	cleanup = HERMON_DRV_CLEANUP_LEVEL14;
1900 
1901 	/* Query and initialize the Hermon interrupt/MSI information */
1902 	status = hermon_intr_or_msi_init(state);
1903 	if (status != DDI_SUCCESS) {
1904 		cmn_err(CE_NOTE, "failed to setup INTR/MSI\n");
1905 		hermon_hw_fini(state, cleanup);
1906 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1907 		    "hw_init_intr_or_msi_init_fail");
1908 		/* This case is not the degraded one */
1909 		return (DDI_FAILURE);
1910 	}
1911 	cleanup = HERMON_DRV_CLEANUP_LEVEL15;
1912 
1913 	status = hermon_isr_init(state);	/* set up the isr */
1914 	if (status != DDI_SUCCESS) {
1915 		cmn_err(CE_NOTE, "failed to init isr\n");
1916 		hermon_hw_fini(state, cleanup);
1917 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1918 		    "hw_init_isrinit_fail");
1919 		/* This case is not the degraded one */
1920 		return (DDI_FAILURE);
1921 	}
1922 	cleanup = HERMON_DRV_CLEANUP_LEVEL16;
1923 
1924 	/* Setup the event queues */
1925 	status = hermon_eq_init_all(state);
1926 	if (status != DDI_SUCCESS) {
1927 		cmn_err(CE_NOTE, "failed to init EQs\n");
1928 		hermon_hw_fini(state, cleanup);
1929 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1930 		    "hw_init_eqinitall_fail");
1931 		/* This case is not the degraded one */
1932 		return (DDI_FAILURE);
1933 	}
1934 	cleanup = HERMON_DRV_CLEANUP_LEVEL17;
1935 
1936 
1937 
1938 	/* Reserve contexts for QP0 and QP1 */
1939 	status = hermon_special_qp_contexts_reserve(state);
1940 	if (status != DDI_SUCCESS) {
1941 		cmn_err(CE_NOTE, "failed to init special QPs\n");
1942 		hermon_hw_fini(state, cleanup);
1943 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1944 		    "hw_init_rsrv_sqp_fail");
1945 		/* This case is not the degraded one */
1946 		return (DDI_FAILURE);
1947 	}
1948 	cleanup = HERMON_DRV_CLEANUP_LEVEL18;
1949 
1950 	/* Initialize for multicast group handling */
1951 	status = hermon_mcg_init(state);
1952 	if (status != DDI_SUCCESS) {
1953 		cmn_err(CE_NOTE, "failed to init multicast\n");
1954 		hermon_hw_fini(state, cleanup);
1955 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1956 		    "hw_init_mcg_init_fail");
1957 		/* This case is not the degraded one */
1958 		return (DDI_FAILURE);
1959 	}
1960 	cleanup = HERMON_DRV_CLEANUP_LEVEL19;
1961 
1962 	/* Initialize the Hermon IB port(s) */
1963 	status = hermon_hca_port_init(state);
1964 	if (status != DDI_SUCCESS) {
1965 		cmn_err(CE_NOTE, "failed to init HCA Port\n");
1966 		hermon_hw_fini(state, cleanup);
1967 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1968 		    "hw_init_hca_port_init_fail");
1969 		/* This case is not the degraded one */
1970 		return (DDI_FAILURE);
1971 	}
1972 
1973 	cleanup = HERMON_DRV_CLEANUP_ALL;
1974 
1975 	/* Determine NodeGUID and SystemImageGUID */
1976 	status = hermon_getnodeinfo_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
1977 	    &nodeinfo);
1978 	if (status != HERMON_CMD_SUCCESS) {
1979 		cmn_err(CE_NOTE, "GetNodeInfo command failed: %08x\n", status);
1980 		hermon_hw_fini(state, cleanup);
1981 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1982 		    "hw_init_getnodeinfo_cmd_fail");
1983 		/* This case is not the degraded one */
1984 		return (DDI_FAILURE);
1985 	}
1986 
1987 	/*
1988 	 * If the NodeGUID value was set in OBP properties, then we use that
1989 	 * value.  But we still print a message if the value we queried from
1990 	 * firmware does not match this value.
1991 	 *
1992 	 * Otherwise if OBP value is not set then we use the value from
1993 	 * firmware unconditionally.
1994 	 */
1995 	if (state->hs_cfg_profile->cp_nodeguid) {
1996 		state->hs_nodeguid   = state->hs_cfg_profile->cp_nodeguid;
1997 	} else {
1998 		state->hs_nodeguid = nodeinfo.NodeGUID;
1999 	}
2000 
2001 	if (state->hs_nodeguid != nodeinfo.NodeGUID) {
2002 		cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
2003 		    "does not match value set by device property");
2004 	}
2005 
2006 	/*
2007 	 * If the SystemImageGUID value was set in OBP properties, then we use
2008 	 * that value.  But we still print a message if the value we queried
2009 	 * from firmware does not match this value.
2010 	 *
2011 	 * Otherwise if OBP value is not set then we use the value from
2012 	 * firmware unconditionally.
2013 	 */
2014 	if (state->hs_cfg_profile->cp_sysimgguid) {
2015 		state->hs_sysimgguid = state->hs_cfg_profile->cp_sysimgguid;
2016 	} else {
2017 		state->hs_sysimgguid = nodeinfo.SystemImageGUID;
2018 	}
2019 
2020 	if (state->hs_sysimgguid != nodeinfo.SystemImageGUID) {
2021 		cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
2022 		    "does not match value set by device property");
2023 	}
2024 
2025 	/* Get NodeDescription */
2026 	status = hermon_getnodedesc_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
2027 	    (sm_nodedesc_t *)&state->hs_nodedesc);
2028 	if (status != HERMON_CMD_SUCCESS) {
2029 		cmn_err(CE_CONT, "GetNodeDesc command failed: %08x\n", status);
2030 		hermon_hw_fini(state, cleanup);
2031 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2032 		    "hw_init_getnodedesc_cmd_fail");
2033 		/* This case is not the degraded one */
2034 		return (DDI_FAILURE);
2035 	}
2036 
2037 	return (DDI_SUCCESS);
2038 }
2039 
2040 
2041 /*
2042  * hermon_hw_fini()
2043  *    Context: Only called from attach() and/or detach() path contexts
2044  */
2045 static void
2046 hermon_hw_fini(hermon_state_t *state, hermon_drv_cleanup_level_t cleanup)
2047 {
2048 	uint_t		num_ports;
2049 	int		i, status;
2050 
2051 
2052 	/*
2053 	 * JBDB - We might not want to run these returns in all cases of
2054 	 * Bad News. We should still attempt to free all of the DMA memory
2055 	 * resources...  This needs to be worked last, after all allocations
2056 	 * are implemented. For now, and possibly for later, this works.
2057 	 */
2058 
2059 	switch (cleanup) {
2060 	/*
2061 	 * If we add more driver initialization steps that should be cleaned
2062 	 * up here, we need to ensure that HERMON_DRV_CLEANUP_ALL is still the
2063 	 * first entry (i.e. corresponds to the last init step).
2064 	 */
2065 	case HERMON_DRV_CLEANUP_ALL:
2066 		/* Shutdown the Hermon IB port(s) */
2067 		num_ports = state->hs_cfg_profile->cp_num_ports;
2068 		(void) hermon_hca_ports_shutdown(state, num_ports);
2069 		/* FALLTHROUGH */
2070 
2071 	case HERMON_DRV_CLEANUP_LEVEL19:
2072 		/* Teardown resources used for multicast group handling */
2073 		hermon_mcg_fini(state);
2074 		/* FALLTHROUGH */
2075 
2076 	case HERMON_DRV_CLEANUP_LEVEL18:
2077 		/* Unreserve the special QP contexts */
2078 		hermon_special_qp_contexts_unreserve(state);
2079 		/* FALLTHROUGH */
2080 
2081 	case HERMON_DRV_CLEANUP_LEVEL17:
2082 		/*
2083 		 * Attempt to teardown all event queues (EQ).  If we fail
2084 		 * here then print a warning message and return.  Something
2085 		 * (either in HW or SW) has gone seriously wrong.
2086 		 */
2087 		status = hermon_eq_fini_all(state);
2088 		if (status != DDI_SUCCESS) {
2089 			HERMON_WARNING(state, "failed to teardown EQs");
2090 			return;
2091 		}
2092 		/* FALLTHROUGH */
2093 	case HERMON_DRV_CLEANUP_LEVEL16:
2094 		/* Teardown Hermon interrupts */
2095 		hermon_isr_fini(state);
2096 		/* FALLTHROUGH */
2097 
2098 	case HERMON_DRV_CLEANUP_LEVEL15:
2099 		status = hermon_intr_or_msi_fini(state);
2100 		if (status != DDI_SUCCESS) {
2101 			HERMON_WARNING(state, "failed to free intr/MSI");
2102 			return;
2103 		}
2104 		/* FALLTHROUGH */
2105 
2106 	case HERMON_DRV_CLEANUP_LEVEL14:
2107 		/* Free the resources for the Hermon internal UAR pages */
2108 		hermon_internal_uarpg_fini(state);
2109 		/* FALLTHROUGH */
2110 
2111 	case HERMON_DRV_CLEANUP_LEVEL13:
2112 		/*
2113 		 * Free the PD that was used internally by Hermon software.  If
2114 		 * we fail here then print a warning and return.  Something
2115 		 * (probably software-related, but perhaps HW) has gone wrong.
2116 		 */
2117 		status = hermon_pd_free(state, &state->hs_pdhdl_internal);
2118 		if (status != DDI_SUCCESS) {
2119 			HERMON_WARNING(state, "failed to free internal PD");
2120 			return;
2121 		}
2122 		/* FALLTHROUGH */
2123 
2124 	case HERMON_DRV_CLEANUP_LEVEL12:
2125 		/* Cleanup all the phase2 resources first */
2126 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_ALL);
2127 		/* FALLTHROUGH */
2128 
2129 	case HERMON_DRV_CLEANUP_LEVEL11:
2130 		/* LEVEL11 is after INIT_HCA */
2131 		/* FALLTHROUGH */
2132 
2133 
2134 	case HERMON_DRV_CLEANUP_LEVEL10:
2135 		/*
2136 		 * Unmap the ICM memory area with UNMAP_ICM command.
2137 		 */
2138 		status = hermon_unmap_icm_cmd_post(state, NULL);
2139 		if (status != DDI_SUCCESS) {
2140 			cmn_err(CE_WARN,
2141 			    "hermon_hw_fini: failed to unmap ICM\n");
2142 		}
2143 
2144 		/* Free the initial ICM DMA handles */
2145 		hermon_icm_dma_fini(state);
2146 
2147 		/* Free the ICM table structures */
2148 		hermon_icm_tables_fini(state);
2149 
2150 		/* Free the ICM table handles */
2151 		kmem_free(state->hs_icm, HERMON_NUM_ICM_RESOURCES *
2152 		    sizeof (hermon_icm_table_t));
2153 
2154 		/* FALLTHROUGH */
2155 
2156 	case HERMON_DRV_CLEANUP_LEVEL9:
2157 		/*
2158 		 * Unmap the ICM Aux memory area with UNMAP_ICM_AUX command.
2159 		 */
2160 		status = hermon_unmap_icm_aux_cmd_post(state);
2161 		if (status != HERMON_CMD_SUCCESS) {
2162 			cmn_err(CE_NOTE,
2163 			    "hermon_hw_fini: failed to unmap ICMA\n");
2164 		}
2165 		/* FALLTHROUGH */
2166 
2167 	case HERMON_DRV_CLEANUP_LEVEL8:
2168 		/*
2169 		 * Deallocate ICM Aux DMA memory.
2170 		 */
2171 		hermon_dma_free(&state->hs_icma_dma);
2172 		/* FALLTHROUGH */
2173 
2174 	case HERMON_DRV_CLEANUP_LEVEL7:
2175 		if (state->hs_fm_uarhdl) {
2176 			hermon_regs_map_free(state, &state->hs_fm_uarhdl);
2177 			state->hs_fm_uarhdl = NULL;
2178 		}
2179 
2180 		if (state->hs_reg_uarhdl) {
2181 			ddi_regs_map_free(&state->hs_reg_uarhdl);
2182 			state->hs_reg_uarhdl = NULL;
2183 		}
2184 
2185 		if (state->hs_bf_offset != 0 && state->hs_reg_bfhdl) {
2186 			ddi_regs_map_free(&state->hs_reg_bfhdl);
2187 			state->hs_reg_bfhdl = NULL;
2188 		}
2189 
2190 		for (i = 0; i < HERMON_MAX_PORTS; i++) {
2191 			if (state->hs_pkey[i]) {
2192 				kmem_free(state->hs_pkey[i], (1 <<
2193 				    state->hs_cfg_profile->cp_log_max_pkeytbl) *
2194 				    sizeof (ib_pkey_t));
2195 				state->hs_pkey[i] = NULL;
2196 			}
2197 			if (state->hs_guid[i]) {
2198 				kmem_free(state->hs_guid[i], (1 <<
2199 				    state->hs_cfg_profile->cp_log_max_gidtbl) *
2200 				    sizeof (ib_guid_t));
2201 				state->hs_guid[i] = NULL;
2202 			}
2203 		}
2204 		/* FALLTHROUGH */
2205 
2206 	case HERMON_DRV_CLEANUP_LEVEL6:
2207 		/*
2208 		 * Unmap the firmware memory area with UNMAP_FA command.
2209 		 */
2210 		status = hermon_unmap_fa_cmd_post(state);
2211 
2212 		if (status != HERMON_CMD_SUCCESS) {
2213 			cmn_err(CE_NOTE,
2214 			    "hermon_hw_fini: failed to unmap FW\n");
2215 		}
2216 
2217 		/*
2218 		 * Deallocate firmware DMA memory.
2219 		 */
2220 		hermon_dma_free(&state->hs_fw_dma);
2221 		/* FALLTHROUGH */
2222 
2223 	case HERMON_DRV_CLEANUP_LEVEL5:
2224 		/* stop the poll thread */
2225 		if (state->hs_fm_poll_thread) {
2226 			ddi_periodic_delete(state->hs_fm_poll_thread);
2227 			state->hs_fm_poll_thread = NULL;
2228 		}
2229 		/* FALLTHROUGH */
2230 
2231 	case HERMON_DRV_CLEANUP_LEVEL4:
2232 		/* Then cleanup the phase1 resources */
2233 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_PHASE1_COMPLETE);
2234 		/* FALLTHROUGH */
2235 
2236 	case HERMON_DRV_CLEANUP_LEVEL3:
2237 		/* Teardown any resources allocated for the config profile */
2238 		hermon_cfg_profile_fini(state);
2239 		/* FALLTHROUGH */
2240 
2241 	case HERMON_DRV_CLEANUP_LEVEL2:
2242 #ifdef HERMON_SUPPORTS_MSIX_BAR
2243 		/*
2244 		 * unmap 3rd BAR, MSIX BAR
2245 		 */
2246 		if (state->hs_reg_msihdl) {
2247 			ddi_regs_map_free(&state->hs_reg_msihdl);
2248 			state->hs_reg_msihdl = NULL;
2249 		}
2250 		/* FALLTHROUGH */
2251 #endif
2252 	case HERMON_DRV_CLEANUP_LEVEL1:
2253 	case HERMON_DRV_CLEANUP_LEVEL0:
2254 		/*
2255 		 * LEVEL1 and LEVEL0 resources are freed in
2256 		 * hermon_drv_fini2().
2257 		 */
2258 		break;
2259 
2260 	default:
2261 		HERMON_WARNING(state, "unexpected driver cleanup level");
2262 		return;
2263 	}
2264 }
2265 
2266 
2267 /*
2268  * hermon_soft_state_init()
2269  *    Context: Only called from attach() path context
2270  */
2271 static int
2272 hermon_soft_state_init(hermon_state_t *state)
2273 {
2274 	ibt_hca_attr_t		*hca_attr;
2275 	uint64_t		maxval, val;
2276 	ibt_hca_flags_t		caps = IBT_HCA_NO_FLAGS;
2277 	ibt_hca_flags2_t	caps2 = IBT_HCA2_NO_FLAGS;
2278 	int			status;
2279 	int			max_send_wqe_bytes;
2280 	int			max_recv_wqe_bytes;
2281 
2282 	/*
2283 	 * The ibc_hca_info_t struct is passed to the IBTF.  This is the
2284 	 * routine where we initialize it.  Many of the init values come from
2285 	 * either configuration variables or successful queries of the Hermon
2286 	 * hardware abilities
2287 	 */
2288 	state->hs_ibtfinfo.hca_ci_vers	= IBCI_V3;
2289 	state->hs_ibtfinfo.hca_dip	= state->hs_dip;
2290 	state->hs_ibtfinfo.hca_handle	= (ibc_hca_hdl_t)state;
2291 	state->hs_ibtfinfo.hca_ops	= &hermon_ibc_ops;
2292 
2293 	hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
2294 	state->hs_ibtfinfo.hca_attr = hca_attr;
2295 
2296 	hca_attr->hca_fw_major_version = state->hs_fw.fw_rev_major;
2297 	hca_attr->hca_fw_minor_version = state->hs_fw.fw_rev_minor;
2298 	hca_attr->hca_fw_micro_version = state->hs_fw.fw_rev_subminor;
2299 
2300 	/* CQ interrupt moderation maximums - each limited to 16 bits */
2301 	hca_attr->hca_max_cq_mod_count = 0xFFFF;
2302 	hca_attr->hca_max_cq_mod_usec = 0xFFFF;
2303 
2304 	/* CQ relocation to other EQs - change when multiple MSI-Xs are used */
2305 	hca_attr->hca_max_cq_handlers = 1;
2306 
2307 	/*
2308 	 * Determine HCA capabilities:
2309 	 * No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
2310 	 *    IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
2311 	 *    or IBT_HCA_SHUTDOWN_PORT
2312 	 * But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
2313 	 *    IBT_HCA_RNR_NAK, IBT_HCA_CURRENT_QP_STATE, IBT_HCA_PORT_UP,
2314 	 *    IBT_HCA_SRQ, IBT_HCA_RESIZE_SRQ and IBT_HCA_FMR are always
2315 	 *    supported
2316 	 * All other features are conditionally supported, depending on the
2317 	 *    status return by the Hermon HCA in QUERY_DEV_LIM.
2318 	 */
2319 	if (state->hs_devlim.ud_multi) {
2320 		caps |= IBT_HCA_UD_MULTICAST;
2321 	}
2322 	if (state->hs_devlim.atomic) {
2323 		caps |= IBT_HCA_ATOMICS_HCA;
2324 	}
2325 	if (state->hs_devlim.apm) {
2326 		caps |= IBT_HCA_AUTO_PATH_MIG;
2327 	}
2328 	if (state->hs_devlim.pkey_v) {
2329 		caps |= IBT_HCA_PKEY_CNTR;
2330 	}
2331 	if (state->hs_devlim.qkey_v) {
2332 		caps |= IBT_HCA_QKEY_CNTR;
2333 	}
2334 	if (state->hs_devlim.ipoib_cksm) {
2335 		caps |= IBT_HCA_CKSUM_FULL;
2336 		caps2 |= IBT_HCA2_IP_CLASS;
2337 	}
2338 	if (state->hs_devlim.mod_wr_srq) {
2339 		caps |= IBT_HCA_RESIZE_SRQ;
2340 	}
2341 	if (state->hs_devlim.lif) {
2342 		caps |= IBT_HCA_LOCAL_INVAL_FENCE;
2343 	}
2344 	if (state->hs_devlim.reserved_lkey) {
2345 		caps2 |= IBT_HCA2_RES_LKEY;
2346 		hca_attr->hca_reserved_lkey = state->hs_devlim.rsv_lkey;
2347 	}
2348 	if (state->hs_devlim.local_inv && state->hs_devlim.remote_inv &&
2349 	    state->hs_devlim.fast_reg_wr) {	/* fw needs to be >= 2.6.636 */
2350 		if (state->hs_fw.fw_rev_major > 2)
2351 			caps2 |= IBT_HCA2_MEM_MGT_EXT;
2352 		else if (state->hs_fw.fw_rev_major == 2)
2353 			if (state->hs_fw.fw_rev_minor > 6)
2354 				caps2 |= IBT_HCA2_MEM_MGT_EXT;
2355 			else if (state->hs_fw.fw_rev_minor == 6)
2356 				if (state->hs_fw.fw_rev_subminor >= 636)
2357 					caps2 |= IBT_HCA2_MEM_MGT_EXT;
2358 	}
2359 	if (state->hs_devlim.mps) {
2360 		caps |= IBT_HCA_ZERO_BASED_VA;
2361 	}
2362 	if (state->hs_devlim.zb) {
2363 		caps |= IBT_HCA_MULT_PAGE_SZ_MR;
2364 	}
2365 	caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
2366 	    IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
2367 	    IBT_HCA_PORT_UP | IBT_HCA_RC_SRQ | IBT_HCA_UD_SRQ | IBT_HCA_FMR);
2368 
2369 	if (state->hs_devlim.log_max_gso_sz) {
2370 		hca_attr->hca_max_lso_size =
2371 		    (1 << state->hs_devlim.log_max_gso_sz);
2372 		/* More work needed in hermon_post_send for larger values */
2373 		hca_attr->hca_max_lso_hdr_size = 0x2c;	/* IPv4 only */
2374 	}
2375 
2376 	caps |= IBT_HCA_WQE_SIZE_INFO;
2377 	max_send_wqe_bytes = state->hs_devlim.max_desc_sz_sq;
2378 	max_recv_wqe_bytes = state->hs_devlim.max_desc_sz_rq;
2379 	hca_attr->hca_ud_send_sgl_sz = (max_send_wqe_bytes / 16) - 4;
2380 	hca_attr->hca_conn_send_sgl_sz = (max_send_wqe_bytes / 16) - 1;
2381 	hca_attr->hca_conn_rdma_sgl_overhead = 1;
2382 	hca_attr->hca_recv_sgl_sz = max_recv_wqe_bytes / 16;
2383 
2384 	/* We choose not to support "inline" unless it improves performance */
2385 	hca_attr->hca_max_inline_size = 0;
2386 	hca_attr->hca_ud_send_inline_sz = 0;
2387 	hca_attr->hca_conn_send_inline_sz = 0;
2388 	hca_attr->hca_conn_rdmaw_inline_overhead = 4;
2389 
2390 	hca_attr->hca_flags = caps;
2391 	hca_attr->hca_flags2 = caps2;
2392 
2393 	/*
2394 	 * Set hca_attr's IDs
2395 	 */
2396 	hca_attr->hca_vendor_id	 = state->hs_vendor_id;
2397 	hca_attr->hca_device_id	 = state->hs_device_id;
2398 	hca_attr->hca_version_id = state->hs_revision_id;
2399 
2400 	/*
2401 	 * Determine number of available QPs and max QP size.  Number of
2402 	 * available QPs is determined by subtracting the number of
2403 	 * "reserved QPs" (i.e. reserved for firmware use) from the
2404 	 * total number configured.
2405 	 */
2406 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2407 	hca_attr->hca_max_qp = val - ((uint64_t)1 <<
2408 	    state->hs_devlim.log_rsvd_qp);
2409 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_qp_sz);
2410 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_qp_sz);
2411 	if (val > maxval) {
2412 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2413 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2414 		    "soft_state_init_maxqpsz_toobig_fail");
2415 		return (DDI_FAILURE);
2416 	}
2417 	/* we need to reduce this by the max space needed for headroom */
2418 	hca_attr->hca_max_qp_sz = (uint_t)val - (HERMON_QP_OH_SIZE >>
2419 	    HERMON_QP_WQE_LOG_MINIMUM) - 1;
2420 
2421 	/*
2422 	 * Determine max scatter-gather size in WQEs. The HCA has split
2423 	 * the max sgl into rec'v Q and send Q values. Use the least.
2424 	 *
2425 	 * This is mainly useful for legacy clients.  Smart clients
2426 	 * such as IPoIB will use the IBT_HCA_WQE_SIZE_INFO sgl info.
2427 	 */
2428 	if (state->hs_devlim.max_sg_rq <= state->hs_devlim.max_sg_sq) {
2429 		maxval = state->hs_devlim.max_sg_rq;
2430 	} else {
2431 		maxval = state->hs_devlim.max_sg_sq;
2432 	}
2433 	val	= state->hs_cfg_profile->cp_wqe_max_sgl;
2434 	if (val > maxval) {
2435 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2436 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2437 		    "soft_state_init_toomanysgl_fail");
2438 		return (DDI_FAILURE);
2439 	}
2440 	/* If the rounded value for max SGL is too large, cap it */
2441 	if (state->hs_cfg_profile->cp_wqe_real_max_sgl > maxval) {
2442 		state->hs_cfg_profile->cp_wqe_real_max_sgl = (uint32_t)maxval;
2443 		val = maxval;
2444 	} else {
2445 		val = state->hs_cfg_profile->cp_wqe_real_max_sgl;
2446 	}
2447 
2448 	hca_attr->hca_max_sgl	 = (uint_t)val;
2449 	hca_attr->hca_max_rd_sgl = 0;	/* zero because RD is unsupported */
2450 
2451 	/*
2452 	 * Determine number of available CQs and max CQ size. Number of
2453 	 * available CQs is determined by subtracting the number of
2454 	 * "reserved CQs" (i.e. reserved for firmware use) from the
2455 	 * total number configured.
2456 	 */
2457 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_cq);
2458 	hca_attr->hca_max_cq = val - ((uint64_t)1 <<
2459 	    state->hs_devlim.log_rsvd_cq);
2460 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_cq_sz);
2461 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_cq_sz) - 1;
2462 	if (val > maxval) {
2463 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2464 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2465 		    "soft_state_init_maxcqsz_toobig_fail");
2466 		return (DDI_FAILURE);
2467 	}
2468 	hca_attr->hca_max_cq_sz = (uint_t)val;
2469 
2470 	/*
2471 	 * Determine number of available SRQs and max SRQ size. Number of
2472 	 * available SRQs is determined by subtracting the number of
2473 	 * "reserved SRQs" (i.e. reserved for firmware use) from the
2474 	 * total number configured.
2475 	 */
2476 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_srq);
2477 	hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
2478 	    state->hs_devlim.log_rsvd_srq);
2479 	maxval  = ((uint64_t)1 << state->hs_devlim.log_max_srq_sz);
2480 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_srq_sz);
2481 
2482 	if (val > maxval) {
2483 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2484 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2485 		    "soft_state_init_maxsrqsz_toobig_fail");
2486 		return (DDI_FAILURE);
2487 	}
2488 	hca_attr->hca_max_srqs_sz = (uint_t)val;
2489 
2490 	val	= hca_attr->hca_recv_sgl_sz - 1; /* SRQ has a list link */
2491 	maxval	= state->hs_devlim.max_sg_rq - 1;
2492 	if (val > maxval) {
2493 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2494 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2495 		    "soft_state_init_toomanysrqsgl_fail");
2496 		return (DDI_FAILURE);
2497 	}
2498 	hca_attr->hca_max_srq_sgl = (uint_t)val;
2499 
2500 	/*
2501 	 * Determine supported HCA page sizes
2502 	 * XXX
2503 	 * For now we simply return the system pagesize as the only supported
2504 	 * pagesize
2505 	 */
2506 	hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
2507 	    IBT_PAGE_4K);
2508 
2509 	/*
2510 	 * Determine number of available MemReg, MemWin, and their max size.
2511 	 * Number of available MRs and MWs is determined by subtracting
2512 	 * the number of "reserved MPTs" (i.e. reserved for firmware use)
2513 	 * from the total number configured for each.
2514 	 */
2515 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_dmpt);
2516 	hca_attr->hca_max_memr	  = val - ((uint64_t)1 <<
2517 	    state->hs_devlim.log_rsvd_dmpt);
2518 	hca_attr->hca_max_mem_win = state->hs_devlim.mem_win ? (val -
2519 	    ((uint64_t)1 << state->hs_devlim.log_rsvd_dmpt)) : 0;
2520 	maxval	= state->hs_devlim.log_max_mrw_sz;
2521 	val	= state->hs_cfg_profile->cp_log_max_mrw_sz;
2522 	if (val > maxval) {
2523 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2524 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2525 		    "soft_state_init_maxmrwsz_toobig_fail");
2526 		return (DDI_FAILURE);
2527 	}
2528 	hca_attr->hca_max_memr_len = ((uint64_t)1 << val);
2529 
2530 	/* Determine RDMA/Atomic properties */
2531 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_rdb);
2532 	hca_attr->hca_max_rsc = (uint_t)val;
2533 	val = state->hs_cfg_profile->cp_hca_max_rdma_in_qp;
2534 	hca_attr->hca_max_rdma_in_qp  = (uint8_t)val;
2535 	val = state->hs_cfg_profile->cp_hca_max_rdma_out_qp;
2536 	hca_attr->hca_max_rdma_out_qp = (uint8_t)val;
2537 	hca_attr->hca_max_rdma_in_ee  = 0;
2538 	hca_attr->hca_max_rdma_out_ee = 0;
2539 
2540 	/*
2541 	 * Determine maximum number of raw IPv6 and Ether QPs.  Set to 0
2542 	 * because neither type of raw QP is supported
2543 	 */
2544 	hca_attr->hca_max_ipv6_qp  = 0;
2545 	hca_attr->hca_max_ether_qp = 0;
2546 
2547 	/* Determine max number of MCGs and max QP-per-MCG */
2548 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2549 	hca_attr->hca_max_mcg_qps   = (uint_t)val;
2550 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_mcg);
2551 	hca_attr->hca_max_mcg	    = (uint_t)val;
2552 	val = state->hs_cfg_profile->cp_num_qp_per_mcg;
2553 	hca_attr->hca_max_qp_per_mcg = (uint_t)val;
2554 
2555 	/* Determine max number partitions (i.e. PKeys) */
2556 	maxval	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2557 	    state->hs_queryport.log_max_pkey);
2558 	val	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2559 	    state->hs_cfg_profile->cp_log_max_pkeytbl);
2560 
2561 	if (val > maxval) {
2562 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2563 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2564 		    "soft_state_init_toomanypkey_fail");
2565 		return (DDI_FAILURE);
2566 	}
2567 	hca_attr->hca_max_partitions = (uint16_t)val;
2568 
2569 	/* Determine number of ports */
2570 	maxval = state->hs_devlim.num_ports;
2571 	val = state->hs_cfg_profile->cp_num_ports;
2572 	if ((val > maxval) || (val == 0)) {
2573 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2574 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2575 		    "soft_state_init_toomanyports_fail");
2576 		return (DDI_FAILURE);
2577 	}
2578 	hca_attr->hca_nports = (uint8_t)val;
2579 
2580 	/* Copy NodeGUID and SystemImageGUID from softstate */
2581 	hca_attr->hca_node_guid = state->hs_nodeguid;
2582 	hca_attr->hca_si_guid	= state->hs_sysimgguid;
2583 
2584 	/*
2585 	 * Determine local ACK delay.  Use the value suggested by the Hermon
2586 	 * hardware (from the QUERY_DEV_CAP command)
2587 	 */
2588 	hca_attr->hca_local_ack_delay = state->hs_devlim.ca_ack_delay;
2589 
2590 	/* Determine max SGID table and PKey table sizes */
2591 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_gidtbl);
2592 	hca_attr->hca_max_port_sgid_tbl_sz = (uint_t)val;
2593 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_pkeytbl);
2594 	hca_attr->hca_max_port_pkey_tbl_sz = (uint16_t)val;
2595 
2596 	/* Determine max number of PDs */
2597 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_pd);
2598 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_pd);
2599 	if (val > maxval) {
2600 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2601 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2602 		    "soft_state_init_toomanypd_fail");
2603 		return (DDI_FAILURE);
2604 	}
2605 	hca_attr->hca_max_pd = (uint_t)val;
2606 
2607 	/* Determine max number of Address Handles (NOT IN ARBEL or HERMON) */
2608 	hca_attr->hca_max_ah = 0;
2609 
2610 	/* No RDDs or EECs (since Reliable Datagram is not supported) */
2611 	hca_attr->hca_max_rdd = 0;
2612 	hca_attr->hca_max_eec = 0;
2613 
2614 	/* Initialize lock for reserved UAR page access */
2615 	mutex_init(&state->hs_uar_lock, NULL, MUTEX_DRIVER,
2616 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2617 
2618 	/* Initialize the flash fields */
2619 	state->hs_fw_flashstarted = 0;
2620 	mutex_init(&state->hs_fw_flashlock, NULL, MUTEX_DRIVER,
2621 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2622 
2623 	/* Initialize the lock for the info ioctl */
2624 	mutex_init(&state->hs_info_lock, NULL, MUTEX_DRIVER,
2625 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2626 
2627 	/* Initialize the AVL tree for QP number support */
2628 	hermon_qpn_avl_init(state);
2629 
2630 	/* Initialize the kstat info structure */
2631 	status = hermon_kstat_init(state);
2632 	if (status != DDI_SUCCESS) {
2633 		hermon_qpn_avl_fini(state);
2634 		mutex_destroy(&state->hs_info_lock);
2635 		mutex_destroy(&state->hs_fw_flashlock);
2636 		mutex_destroy(&state->hs_uar_lock);
2637 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2638 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2639 		    "soft_state_init_kstatinit_fail");
2640 		return (DDI_FAILURE);
2641 	}
2642 
2643 	return (DDI_SUCCESS);
2644 }
2645 
2646 
2647 /*
2648  * hermon_soft_state_fini()
2649  *    Context: Called only from detach() path context
2650  */
2651 static void
2652 hermon_soft_state_fini(hermon_state_t *state)
2653 {
2654 
2655 	/* Teardown the kstat info */
2656 	hermon_kstat_fini(state);
2657 
2658 	/* Teardown the AVL tree for QP number support */
2659 	hermon_qpn_avl_fini(state);
2660 
2661 	/* Free up info ioctl mutex */
2662 	mutex_destroy(&state->hs_info_lock);
2663 
2664 	/* Free up flash mutex */
2665 	mutex_destroy(&state->hs_fw_flashlock);
2666 
2667 	/* Free up the UAR page access mutex */
2668 	mutex_destroy(&state->hs_uar_lock);
2669 
2670 	/* Free up the hca_attr struct */
2671 	kmem_free(state->hs_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));
2672 
2673 }
2674 
2675 /*
2676  * hermon_icm_config_setup()
2677  *    Context: Only called from attach() path context
2678  */
2679 static int
2680 hermon_icm_config_setup(hermon_state_t *state,
2681     hermon_hw_initqueryhca_t *inithca)
2682 {
2683 	hermon_hw_querydevlim_t	*devlim;
2684 	hermon_cfg_profile_t	*cfg;
2685 	hermon_icm_table_t	*icm_p[HERMON_NUM_ICM_RESOURCES];
2686 	hermon_icm_table_t	*icm;
2687 	hermon_icm_table_t	*tmp;
2688 	uint64_t		icm_addr;
2689 	uint64_t		icm_size;
2690 	int			status, i, j;
2691 
2692 
2693 	/* Bring in local devlims, cfg_profile and hs_icm table list */
2694 	devlim = &state->hs_devlim;
2695 	cfg = state->hs_cfg_profile;
2696 	icm = state->hs_icm;
2697 
2698 	/*
2699 	 * Assign each ICM table's entry size from data in the devlims,
2700 	 * except for RDB and MCG sizes, which are not returned in devlims
2701 	 * but do have a fixed size, and the UAR context entry size, which
2702 	 * we determine. For this, we use the "cp_num_pgs_per_uce" value
2703 	 * from our hs_cfg_profile.
2704 	 */
2705 	icm[HERMON_CMPT].object_size	= devlim->cmpt_entry_sz;
2706 	icm[HERMON_CMPT_QPC].object_size	= devlim->cmpt_entry_sz;
2707 	icm[HERMON_CMPT_SRQC].object_size	= devlim->cmpt_entry_sz;
2708 	icm[HERMON_CMPT_CQC].object_size	= devlim->cmpt_entry_sz;
2709 	icm[HERMON_CMPT_EQC].object_size	= devlim->cmpt_entry_sz;
2710 	icm[HERMON_MTT].object_size	= devlim->mtt_entry_sz;
2711 	icm[HERMON_DMPT].object_size	= devlim->dmpt_entry_sz;
2712 	icm[HERMON_QPC].object_size	= devlim->qpc_entry_sz;
2713 	icm[HERMON_CQC].object_size	= devlim->cqc_entry_sz;
2714 	icm[HERMON_SRQC].object_size	= devlim->srq_entry_sz;
2715 	icm[HERMON_EQC].object_size	= devlim->eqc_entry_sz;
2716 	icm[HERMON_RDB].object_size	= devlim->rdmardc_entry_sz *
2717 	    cfg->cp_hca_max_rdma_in_qp;
2718 	icm[HERMON_MCG].object_size	= HERMON_MCG_SIZE;
2719 	icm[HERMON_ALTC].object_size	= devlim->altc_entry_sz;
2720 	icm[HERMON_AUXC].object_size	= devlim->aux_entry_sz;
2721 
2722 	/* Assign each ICM table's log2 number of entries */
2723 	icm[HERMON_CMPT].log_num_entries = cfg->cp_log_num_cmpt;
2724 	icm[HERMON_CMPT_QPC].log_num_entries = cfg->cp_log_num_qp;
2725 	icm[HERMON_CMPT_SRQC].log_num_entries = cfg->cp_log_num_srq;
2726 	icm[HERMON_CMPT_CQC].log_num_entries = cfg->cp_log_num_cq;
2727 	icm[HERMON_CMPT_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
2728 	icm[HERMON_MTT].log_num_entries	= cfg->cp_log_num_mtt;
2729 	icm[HERMON_DMPT].log_num_entries = cfg->cp_log_num_dmpt;
2730 	icm[HERMON_QPC].log_num_entries	= cfg->cp_log_num_qp;
2731 	icm[HERMON_SRQC].log_num_entries = cfg->cp_log_num_srq;
2732 	icm[HERMON_CQC].log_num_entries	= cfg->cp_log_num_cq;
2733 	icm[HERMON_EQC].log_num_entries	= HERMON_NUM_EQ_SHIFT;
2734 	icm[HERMON_RDB].log_num_entries	= cfg->cp_log_num_qp;
2735 	icm[HERMON_MCG].log_num_entries	= cfg->cp_log_num_mcg;
2736 	icm[HERMON_ALTC].log_num_entries = cfg->cp_log_num_qp;
2737 	icm[HERMON_AUXC].log_num_entries = cfg->cp_log_num_qp;
2738 
2739 	/* Initialize the ICM tables */
2740 	hermon_icm_tables_init(state);
2741 
2742 	/*
2743 	 * ICM tables must be aligned on their size in the ICM address
2744 	 * space. So, here we order the tables from largest total table
2745 	 * size to the smallest. All tables are a power of 2 in size, so
2746 	 * this will ensure that all tables are aligned on their own size
2747 	 * without wasting space in the ICM.
2748 	 *
2749 	 * In order to easily set the ICM addresses without needing to
2750 	 * worry about the ordering of our table indices as relates to
2751 	 * the hermon_rsrc_type_t enum, we will use a list of pointers
2752 	 * representing the tables for the sort, then assign ICM addresses
2753 	 * below using it.
2754 	 */
2755 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2756 		icm_p[i] = &icm[i];
2757 	}
2758 	for (i = HERMON_NUM_ICM_RESOURCES; i > 0; i--) {
2759 		switch (i) {
2760 		case HERMON_CMPT_QPC:
2761 		case HERMON_CMPT_SRQC:
2762 		case HERMON_CMPT_CQC:
2763 		case HERMON_CMPT_EQC:
2764 			continue;
2765 		}
2766 		for (j = 1; j < i; j++) {
2767 			if (icm_p[j]->table_size > icm_p[j - 1]->table_size) {
2768 				tmp		= icm_p[j];
2769 				icm_p[j]	= icm_p[j - 1];
2770 				icm_p[j - 1]	= tmp;
2771 			}
2772 		}
2773 	}
2774 
2775 	/* Initialize the ICM address and ICM size */
2776 	icm_addr = icm_size = 0;
2777 
2778 	/*
2779 	 * Set the ICM base address of each table, using our sorted
2780 	 * list of pointers from above.
2781 	 */
2782 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2783 		j = icm_p[i]->icm_type;
2784 		switch (j) {
2785 		case HERMON_CMPT_QPC:
2786 		case HERMON_CMPT_SRQC:
2787 		case HERMON_CMPT_CQC:
2788 		case HERMON_CMPT_EQC:
2789 			continue;
2790 		}
2791 		if (icm[j].table_size) {
2792 			/*
2793 			 * Set the ICM base address in the table, save the
2794 			 * ICM offset in the rsrc pool and increment the
2795 			 * total ICM allocation.
2796 			 */
2797 			icm[j].icm_baseaddr = icm_addr;
2798 			if (hermon_verbose) {
2799 				IBTF_DPRINTF_L2("ICMADDR", "rsrc %x @ %p"
2800 				    " size %llx", j, icm[j].icm_baseaddr,
2801 				    icm[j].table_size);
2802 			}
2803 			icm_size += icm[j].table_size;
2804 		}
2805 
2806 		/* Verify that we don't exceed maximum ICM size */
2807 		if (icm_size > devlim->max_icm_size) {
2808 			/* free the ICM table memory resources */
2809 			hermon_icm_tables_fini(state);
2810 			cmn_err(CE_WARN, "ICM configuration exceeds maximum "
2811 			    "configuration: max (0x%lx) requested (0x%lx)\n",
2812 			    (ulong_t)devlim->max_icm_size, (ulong_t)icm_size);
2813 			HERMON_ATTACH_MSG(state->hs_attach_buf,
2814 			    "icm_config_toobig_fail");
2815 			return (DDI_FAILURE);
2816 		}
2817 
2818 		/* assign address to the 4 pieces of the CMPT */
2819 		if (j == HERMON_CMPT) {
2820 			uint64_t cmpt_size = icm[j].table_size >> 2;
2821 #define	init_cmpt_icm_baseaddr(rsrc, indx)				\
2822 	icm[rsrc].icm_baseaddr	= icm_addr + (indx * cmpt_size);
2823 			init_cmpt_icm_baseaddr(HERMON_CMPT_QPC, 0);
2824 			init_cmpt_icm_baseaddr(HERMON_CMPT_SRQC, 1);
2825 			init_cmpt_icm_baseaddr(HERMON_CMPT_CQC, 2);
2826 			init_cmpt_icm_baseaddr(HERMON_CMPT_EQC, 3);
2827 		}
2828 
2829 		/* Increment the ICM address for the next table */
2830 		icm_addr += icm[j].table_size;
2831 	}
2832 
2833 	/* Populate the structure for the INIT_HCA command */
2834 	hermon_inithca_set(state, inithca);
2835 
2836 	/*
2837 	 * Prior to invoking INIT_HCA, we must have ICM memory in place
2838 	 * for the reserved objects in each table. We will allocate and map
2839 	 * this initial ICM memory here. Note that given the assignment
2840 	 * of span_size above, tables that are smaller or equal in total
2841 	 * size to the default span_size will be mapped in full.
2842 	 */
2843 	status = hermon_icm_dma_init(state);
2844 	if (status != DDI_SUCCESS) {
2845 		/* free the ICM table memory resources */
2846 		hermon_icm_tables_fini(state);
2847 		HERMON_WARNING(state, "Failed to allocate initial ICM");
2848 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2849 		    "icm_config_dma_init_fail");
2850 		return (DDI_FAILURE);
2851 	}
2852 
2853 	return (DDI_SUCCESS);
2854 }
2855 
2856 /*
2857  * hermon_inithca_set()
2858  *    Context: Only called from attach() path context
2859  */
2860 static void
2861 hermon_inithca_set(hermon_state_t *state, hermon_hw_initqueryhca_t *inithca)
2862 {
2863 	hermon_cfg_profile_t	*cfg;
2864 	hermon_icm_table_t	*icm;
2865 	int			i;
2866 
2867 
2868 	/* Populate the INIT_HCA structure */
2869 	icm = state->hs_icm;
2870 	cfg = state->hs_cfg_profile;
2871 
2872 	/* set version */
2873 	inithca->version = 0x02;	/* PRM 0.36 */
2874 	/* set cacheline - log2 in 16-byte chunks */
2875 	inithca->log2_cacheline = 0x2;	/* optimized for 64 byte cache */
2876 
2877 	/* we need to update the inithca info with thie UAR info too */
2878 	inithca->uar.log_max_uars = highbit(cfg->cp_log_num_uar);
2879 	inithca->uar.uar_pg_sz = PAGESHIFT - HERMON_PAGESHIFT;
2880 
2881 	/* Set endianess */
2882 #ifdef	_LITTLE_ENDIAN
2883 	inithca->big_endian	= 0;
2884 #else
2885 	inithca->big_endian	= 1;
2886 #endif
2887 
2888 	/* Port Checking is on by default */
2889 	inithca->udav_port_chk	= HERMON_UDAV_PORTCHK_ENABLED;
2890 
2891 	/* Enable IPoIB checksum */
2892 	if (state->hs_devlim.ipoib_cksm)
2893 		inithca->chsum_en = 1;
2894 
2895 	/* Set each ICM table's attributes */
2896 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2897 		switch (icm[i].icm_type) {
2898 		case HERMON_CMPT:
2899 			inithca->tpt.cmpt_baseaddr = icm[i].icm_baseaddr;
2900 			break;
2901 
2902 		case HERMON_MTT:
2903 			inithca->tpt.mtt_baseaddr = icm[i].icm_baseaddr;
2904 			break;
2905 
2906 		case HERMON_DMPT:
2907 			inithca->tpt.dmpt_baseaddr = icm[i].icm_baseaddr;
2908 			inithca->tpt.log_dmpt_sz   = icm[i].log_num_entries;
2909 			inithca->tpt.pgfault_rnr_to = 0; /* just in case */
2910 			break;
2911 
2912 		case HERMON_QPC:
2913 			inithca->context.log_num_qp = icm[i].log_num_entries;
2914 			inithca->context.qpc_baseaddr_h =
2915 			    icm[i].icm_baseaddr >> 32;
2916 			inithca->context.qpc_baseaddr_l =
2917 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2918 			break;
2919 
2920 		case HERMON_CQC:
2921 			inithca->context.log_num_cq = icm[i].log_num_entries;
2922 			inithca->context.cqc_baseaddr_h =
2923 			    icm[i].icm_baseaddr >> 32;
2924 			inithca->context.cqc_baseaddr_l =
2925 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2926 			break;
2927 
2928 		case HERMON_SRQC:
2929 			inithca->context.log_num_srq = icm[i].log_num_entries;
2930 			inithca->context.srqc_baseaddr_h =
2931 			    icm[i].icm_baseaddr >> 32;
2932 			inithca->context.srqc_baseaddr_l =
2933 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2934 			break;
2935 
2936 		case HERMON_EQC:
2937 			inithca->context.log_num_eq = icm[i].log_num_entries;
2938 			inithca->context.eqc_baseaddr_h =
2939 			    icm[i].icm_baseaddr >> 32;
2940 			inithca->context.eqc_baseaddr_l =
2941 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2942 			break;
2943 
2944 		case HERMON_RDB:
2945 			inithca->context.rdmardc_baseaddr_h =
2946 			    icm[i].icm_baseaddr >> 32;
2947 			inithca->context.rdmardc_baseaddr_l =
2948 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2949 			inithca->context.log_num_rdmardc =
2950 			    icm[i].log_num_entries;
2951 			break;
2952 
2953 		case HERMON_MCG:
2954 			inithca->multi.mc_baseaddr    = icm[i].icm_baseaddr;
2955 			inithca->multi.log_mc_tbl_sz  = icm[i].log_num_entries;
2956 			inithca->multi.log_mc_tbl_ent =
2957 			    highbit(HERMON_MCGMEM_SZ(state)) - 1;
2958 			inithca->multi.log_mc_tbl_hash_sz =
2959 			    cfg->cp_log_num_mcg_hash;
2960 			inithca->multi.mc_hash_fn = HERMON_MCG_DEFAULT_HASH_FN;
2961 			break;
2962 
2963 		case HERMON_ALTC:
2964 			inithca->context.altc_baseaddr = icm[i].icm_baseaddr;
2965 			break;
2966 
2967 		case HERMON_AUXC:
2968 			inithca->context.auxc_baseaddr = icm[i].icm_baseaddr;
2969 			break;
2970 
2971 		default:
2972 			break;
2973 
2974 		}
2975 	}
2976 
2977 }
2978 
2979 /*
2980  * hermon_icm_tables_init()
2981  *    Context: Only called from attach() path context
2982  *
2983  * Dynamic ICM breaks the various ICM tables into "span_size" chunks
2984  * to enable allocation of backing memory on demand.  Arbel used a
2985  * fixed size ARBEL_ICM_SPAN_SIZE (initially was 512KB) as the
2986  * span_size for all ICM chunks.  Hermon has other considerations,
2987  * so the span_size used differs from Arbel.
2988  *
2989  * The basic considerations for why Hermon differs are:
2990  *
2991  *	1) ICM memory is in units of HERMON pages.
2992  *
2993  *	2) The AUXC table is approximately 1 byte per QP.
2994  *
2995  *	3) ICM memory for AUXC, ALTC, and RDB is allocated when
2996  *	the ICM memory for the corresponding QPC is allocated.
2997  *
2998  *	4) ICM memory for the CMPT corresponding to the various primary
2999  *	resources (QPC, SRQC, CQC, and EQC) is allocated when the ICM
3000  *	memory for the primary resource is allocated.
3001  *
3002  * One HERMON page (4KB) would typically map 4K QPs worth of AUXC.
3003  * So, the minimum chunk for the various QPC related ICM memory should
3004  * all be allocated to support the 4K QPs.  Currently, this means the
3005  * amount of memory for the various QP chunks is:
3006  *
3007  *	QPC	256*4K bytes
3008  *	RDB	128*4K bytes
3009  *	CMPT	 64*4K bytes
3010  *	ALTC	 64*4K bytes
3011  *	AUXC	  1*4K bytes
3012  *
3013  * The span_size chosen for the QP resource is 4KB of AUXC entries,
3014  * or 1 HERMON_PAGESIZE worth, which is the minimum ICM mapping size.
3015  *
3016  * Other ICM resources can have their span_size be more arbitrary.
3017  * This is 4K (HERMON_ICM_SPAN), except for MTTs because they are tiny.
3018  */
3019 
3020 /* macro to make the code below cleaner */
3021 #define	init_dependent(rsrc, dep)				\
3022 	icm[dep].span		= icm[rsrc].span;		\
3023 	icm[dep].num_spans	= icm[rsrc].num_spans;		\
3024 	icm[dep].split_shift	= icm[rsrc].split_shift;	\
3025 	icm[dep].span_mask	= icm[rsrc].span_mask;		\
3026 	icm[dep].span_shift	= icm[rsrc].span_shift;		\
3027 	icm[dep].rsrc_mask	= icm[rsrc].rsrc_mask;		\
3028 	if (hermon_verbose) {					\
3029 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3030 		    "rsrc (0x%x) size (0x%lx) span (0x%x) "	\
3031 		    "num_spans (0x%x)", dep, icm[dep].table_size, \
3032 		    icm[dep].span, icm[dep].num_spans);		\
3033 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3034 		    "span_shift (0x%x) split_shift (0x%x)",	\
3035 		    icm[dep].span_shift, icm[dep].split_shift);	\
3036 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3037 		    "span_mask (0x%x)  rsrc_mask   (0x%x)",	\
3038 		    icm[dep].span_mask, icm[dep].rsrc_mask);	\
3039 	}
3040 
3041 static void
3042 hermon_icm_tables_init(hermon_state_t *state)
3043 {
3044 	hermon_icm_table_t	*icm;
3045 	int			i, k;
3046 	uint32_t		per_split;
3047 
3048 
3049 	icm = state->hs_icm;
3050 
3051 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3052 		icm[i].icm_type		= i;
3053 		icm[i].num_entries	= 1 << icm[i].log_num_entries;
3054 		icm[i].log_object_size	= highbit(icm[i].object_size) - 1;
3055 		icm[i].table_size	= icm[i].num_entries <<
3056 		    icm[i].log_object_size;
3057 
3058 		/* deal with "dependent" resource types */
3059 		switch (i) {
3060 		case HERMON_AUXC:
3061 #ifdef HERMON_FW_WORKAROUND
3062 			icm[i].table_size = 0x80000000ull;
3063 			/* FALLTHROUGH */
3064 #endif
3065 		case HERMON_CMPT_QPC:
3066 		case HERMON_RDB:
3067 		case HERMON_ALTC:
3068 			init_dependent(HERMON_QPC, i);
3069 			continue;
3070 		case HERMON_CMPT_SRQC:
3071 			init_dependent(HERMON_SRQC, i);
3072 			continue;
3073 		case HERMON_CMPT_CQC:
3074 			init_dependent(HERMON_CQC, i);
3075 			continue;
3076 		case HERMON_CMPT_EQC:
3077 			init_dependent(HERMON_EQC, i);
3078 			continue;
3079 		}
3080 
3081 		icm[i].span = HERMON_ICM_SPAN;	/* default #rsrc's in 1 span */
3082 		if (i == HERMON_MTT) /* Alloc enough MTTs to map 256MB */
3083 			icm[i].span = HERMON_ICM_SPAN * 16;
3084 		icm[i].num_spans = icm[i].num_entries / icm[i].span;
3085 		if (icm[i].num_spans == 0) {
3086 			icm[i].span = icm[i].num_entries;
3087 			per_split = 1;
3088 			icm[i].num_spans = icm[i].num_entries / icm[i].span;
3089 		} else {
3090 			per_split = icm[i].num_spans / HERMON_ICM_SPLIT;
3091 			if (per_split == 0) {
3092 				per_split = 1;
3093 			}
3094 		}
3095 		if (hermon_verbose)
3096 			IBTF_DPRINTF_L2("ICM", "rsrc %x  span %x  num_spans %x",
3097 			    i, icm[i].span, icm[i].num_spans);
3098 
3099 		/*
3100 		 * Ensure a minimum table size of an ICM page, and a
3101 		 * maximum span size of the ICM table size.  This ensures
3102 		 * that we don't have less than an ICM page to map, which is
3103 		 * impossible, and that we will map an entire table at
3104 		 * once if it's total size is less than the span size.
3105 		 */
3106 		icm[i].table_size = max(icm[i].table_size, HERMON_PAGESIZE);
3107 
3108 		icm[i].span_shift = 0;
3109 		for (k = icm[i].span; k != 1; k >>= 1)
3110 			icm[i].span_shift++;
3111 		icm[i].split_shift = icm[i].span_shift;
3112 		for (k = per_split; k != 1; k >>= 1)
3113 			icm[i].split_shift++;
3114 		icm[i].span_mask = (1 << icm[i].split_shift) -
3115 		    (1 << icm[i].span_shift);
3116 		icm[i].rsrc_mask = (1 << icm[i].span_shift) - 1;
3117 
3118 
3119 		/* Initialize the table lock */
3120 		mutex_init(&icm[i].icm_table_lock, NULL, MUTEX_DRIVER,
3121 		    DDI_INTR_PRI(state->hs_intrmsi_pri));
3122 		cv_init(&icm[i].icm_table_cv, NULL, CV_DRIVER, NULL);
3123 
3124 		if (hermon_verbose) {
3125 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3126 			    "rsrc (0x%x) size (0x%lx)", i, icm[i].table_size);
3127 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3128 			    "span (0x%x) num_spans (0x%x)",
3129 			    icm[i].span, icm[i].num_spans);
3130 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3131 			    "span_shift (0x%x) split_shift (0x%x)",
3132 			    icm[i].span_shift, icm[i].split_shift);
3133 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3134 			    "span_mask (0x%x)  rsrc_mask   (0x%x)",
3135 			    icm[i].span_mask, icm[i].rsrc_mask);
3136 		}
3137 	}
3138 
3139 }
3140 
3141 /*
3142  * hermon_icm_tables_fini()
3143  *    Context: Only called from attach() path context
3144  *
3145  * Clean up all icm_tables.  Free the bitmap and dma_info arrays.
3146  */
3147 static void
3148 hermon_icm_tables_fini(hermon_state_t *state)
3149 {
3150 	hermon_icm_table_t	*icm;
3151 	int			nspans;
3152 	int			i, j;
3153 
3154 
3155 	icm = state->hs_icm;
3156 
3157 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3158 
3159 		mutex_enter(&icm[i].icm_table_lock);
3160 		nspans = icm[i].num_spans;
3161 
3162 		for (j = 0; j < HERMON_ICM_SPLIT; j++) {
3163 			if (icm[i].icm_dma[j])
3164 				/* Free the ICM DMA slots */
3165 				kmem_free(icm[i].icm_dma[j],
3166 				    nspans * sizeof (hermon_dma_info_t));
3167 
3168 			if (icm[i].icm_bitmap[j])
3169 				/* Free the table bitmap */
3170 				kmem_free(icm[i].icm_bitmap[j],
3171 				    (nspans + 7) / 8);
3172 		}
3173 		/* Destroy the table lock */
3174 		cv_destroy(&icm[i].icm_table_cv);
3175 		mutex_exit(&icm[i].icm_table_lock);
3176 		mutex_destroy(&icm[i].icm_table_lock);
3177 	}
3178 
3179 }
3180 
3181 /*
3182  * hermon_icm_dma_init()
3183  *    Context: Only called from attach() path context
3184  */
3185 static int
3186 hermon_icm_dma_init(hermon_state_t *state)
3187 {
3188 	hermon_icm_table_t	*icm;
3189 	hermon_rsrc_type_t	type;
3190 	int			status;
3191 
3192 
3193 	/*
3194 	 * This routine will allocate initial ICM DMA resources for ICM
3195 	 * tables that have reserved ICM objects. This is the only routine
3196 	 * where we should have to allocate ICM outside of hermon_rsrc_alloc().
3197 	 * We need to allocate ICM here explicitly, rather than in
3198 	 * hermon_rsrc_alloc(), because we've not yet completed the resource
3199 	 * pool initialization. When the resource pools are initialized
3200 	 * (in hermon_rsrc_init_phase2(), see hermon_rsrc.c for more
3201 	 * information), resource preallocations will be invoked to match
3202 	 * the ICM allocations seen here. We will then be able to use the
3203 	 * normal allocation path.  Note we don't need to set a refcnt on
3204 	 * these initial allocations because that will be done in the calls
3205 	 * to hermon_rsrc_alloc() from hermon_hw_entries_init() for the
3206 	 * "prealloc" objects (see hermon_rsrc.c for more information).
3207 	 */
3208 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3209 
3210 		/* ICM for these is allocated within hermon_icm_alloc() */
3211 		switch (type) {
3212 		case HERMON_CMPT:
3213 		case HERMON_CMPT_QPC:
3214 		case HERMON_CMPT_SRQC:
3215 		case HERMON_CMPT_CQC:
3216 		case HERMON_CMPT_EQC:
3217 		case HERMON_AUXC:
3218 		case HERMON_ALTC:
3219 		case HERMON_RDB:
3220 			continue;
3221 		}
3222 
3223 		icm = &state->hs_icm[type];
3224 
3225 		mutex_enter(&icm->icm_table_lock);
3226 		status = hermon_icm_alloc(state, type, 0, 0);
3227 		mutex_exit(&icm->icm_table_lock);
3228 		if (status != DDI_SUCCESS) {
3229 			while (type--) {
3230 				icm = &state->hs_icm[type];
3231 				mutex_enter(&icm->icm_table_lock);
3232 				hermon_icm_free(state, type, 0, 0);
3233 				mutex_exit(&icm->icm_table_lock);
3234 			}
3235 			return (DDI_FAILURE);
3236 		}
3237 
3238 		if (hermon_verbose) {
3239 			IBTF_DPRINTF_L2("hermon", "hermon_icm_dma_init: "
3240 			    "table (0x%x) index (0x%x) allocated", type, 0);
3241 		}
3242 	}
3243 
3244 	return (DDI_SUCCESS);
3245 }
3246 
3247 /*
3248  * hermon_icm_dma_fini()
3249  *    Context: Only called from attach() path context
3250  *
3251  * ICM has been completely unmapped.  We just free the memory here.
3252  */
3253 static void
3254 hermon_icm_dma_fini(hermon_state_t *state)
3255 {
3256 	hermon_icm_table_t	*icm;
3257 	hermon_dma_info_t	*dma_info;
3258 	hermon_rsrc_type_t	type;
3259 	int			index1, index2;
3260 
3261 
3262 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3263 		icm = &state->hs_icm[type];
3264 		for (index1 = 0; index1 < HERMON_ICM_SPLIT; index1++) {
3265 			dma_info = icm->icm_dma[index1];
3266 			if (dma_info == NULL)
3267 				continue;
3268 			for (index2 = 0; index2 < icm->num_spans; index2++) {
3269 				if (dma_info[index2].dma_hdl)
3270 					hermon_dma_free(&dma_info[index2]);
3271 				dma_info[index2].dma_hdl = NULL;
3272 			}
3273 		}
3274 	}
3275 
3276 }
3277 
3278 /*
3279  * hermon_hca_port_init()
3280  *    Context: Only called from attach() path context
3281  */
3282 static int
3283 hermon_hca_port_init(hermon_state_t *state)
3284 {
3285 	hermon_hw_set_port_t	*portinits, *initport;
3286 	hermon_cfg_profile_t	*cfgprof;
3287 	uint_t			num_ports;
3288 	int			i = 0, status;
3289 	uint64_t		maxval, val;
3290 	uint64_t		sysimgguid, nodeguid, portguid;
3291 
3292 
3293 	cfgprof = state->hs_cfg_profile;
3294 
3295 	/* Get number of HCA ports */
3296 	num_ports = cfgprof->cp_num_ports;
3297 
3298 	/* Allocate space for Hermon set port  struct(s) */
3299 	portinits = (hermon_hw_set_port_t *)kmem_zalloc(num_ports *
3300 	    sizeof (hermon_hw_set_port_t), KM_SLEEP);
3301 
3302 
3303 
3304 	/* Post commands to initialize each Hermon HCA port */
3305 	/*
3306 	 * In Hermon, the process is different than in previous HCAs.
3307 	 * Here, you have to:
3308 	 *	QUERY_PORT - to get basic information from the HCA
3309 	 *	set the fields accordingly
3310 	 *	SET_PORT - to change/set everything as desired
3311 	 *	INIT_PORT - to bring the port up
3312 	 *
3313 	 * Needs to be done for each port in turn
3314 	 */
3315 
3316 	for (i = 0; i < num_ports; i++) {
3317 		bzero(&state->hs_queryport, sizeof (hermon_hw_query_port_t));
3318 		status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0,
3319 		    (i + 1), &state->hs_queryport,
3320 		    sizeof (hermon_hw_query_port_t), HERMON_CMD_NOSLEEP_SPIN);
3321 		if (status != HERMON_CMD_SUCCESS) {
3322 			cmn_err(CE_CONT, "Hermon: QUERY_PORT (port %02d) "
3323 			    "command failed: %08x\n", i + 1, status);
3324 			goto init_ports_fail;
3325 		}
3326 		initport = &portinits[i];
3327 		state->hs_initport = &portinits[i];
3328 
3329 		bzero(initport, sizeof (hermon_hw_query_port_t));
3330 
3331 		/*
3332 		 * Determine whether we need to override the firmware's
3333 		 * default SystemImageGUID setting.
3334 		 */
3335 		sysimgguid = cfgprof->cp_sysimgguid;
3336 		if (sysimgguid != 0) {
3337 			initport->sig		= 1;
3338 			initport->sys_img_guid	= sysimgguid;
3339 		}
3340 
3341 		/*
3342 		 * Determine whether we need to override the firmware's
3343 		 * default NodeGUID setting.
3344 		 */
3345 		nodeguid = cfgprof->cp_nodeguid;
3346 		if (nodeguid != 0) {
3347 			initport->ng		= 1;
3348 			initport->node_guid	= nodeguid;
3349 		}
3350 
3351 		/*
3352 		 * Determine whether we need to override the firmware's
3353 		 * default PortGUID setting.
3354 		 */
3355 		portguid = cfgprof->cp_portguid[i];
3356 		if (portguid != 0) {
3357 			initport->g0		= 1;
3358 			initport->guid0		= portguid;
3359 		}
3360 
3361 		/* Validate max MTU size */
3362 		maxval  = state->hs_queryport.ib_mtu;
3363 		val	= cfgprof->cp_max_mtu;
3364 		if (val > maxval) {
3365 			goto init_ports_fail;
3366 		}
3367 
3368 		/* Validate the max port width */
3369 		maxval  = state->hs_queryport.ib_port_wid;
3370 		val	= cfgprof->cp_max_port_width;
3371 		if (val > maxval) {
3372 			goto init_ports_fail;
3373 		}
3374 
3375 		/* Validate max VL cap size */
3376 		maxval  = state->hs_queryport.max_vl;
3377 		val	= cfgprof->cp_max_vlcap;
3378 		if (val > maxval) {
3379 			goto init_ports_fail;
3380 		}
3381 
3382 		/* Validate max GID table size */
3383 		maxval  = ((uint64_t)1 << state->hs_queryport.log_max_gid);
3384 		val	= ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
3385 		if (val > maxval) {
3386 			goto init_ports_fail;
3387 		}
3388 		initport->max_guid = (uint16_t)val;
3389 		initport->mg = 1;
3390 
3391 		/* Validate max PKey table size */
3392 		maxval	= ((uint64_t)1 << state->hs_queryport.log_max_pkey);
3393 		val	= ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
3394 		if (val > maxval) {
3395 			goto init_ports_fail;
3396 		}
3397 		initport->max_pkey = (uint16_t)val;
3398 		initport->mp = 1;
3399 		/*
3400 		 * Post the SET_PORT cmd to Hermon firmware. This sets
3401 		 * the parameters of the port.
3402 		 */
3403 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3404 		    HERMON_CMD_NOSLEEP_SPIN);
3405 		if (status != HERMON_CMD_SUCCESS) {
3406 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3407 			    "failed: %08x\n", i + 1, status);
3408 			goto init_ports_fail;
3409 		}
3410 		/* issue another SET_PORT cmd - performance fix/workaround */
3411 		/* XXX - need to discuss with Mellanox */
3412 		bzero(initport, sizeof (hermon_hw_query_port_t));
3413 		initport->cap_mask = 0x02500868;
3414 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3415 		    HERMON_CMD_NOSLEEP_SPIN);
3416 		if (status != HERMON_CMD_SUCCESS) {
3417 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3418 			    "failed: %08x\n", i + 1, status);
3419 			goto init_ports_fail;
3420 		}
3421 	}
3422 
3423 	/*
3424 	 * Finally, do the INIT_PORT for each port in turn
3425 	 * When this command completes, the corresponding Hermon port
3426 	 * will be physically "Up" and initialized.
3427 	 */
3428 	for (i = 0; i < num_ports; i++) {
3429 		status = hermon_init_port_cmd_post(state, i + 1,
3430 		    HERMON_CMD_NOSLEEP_SPIN);
3431 		if (status != HERMON_CMD_SUCCESS) {
3432 			cmn_err(CE_CONT, "Hermon: INIT_PORT (port %02d) "
3433 			    "comman failed: %08x\n", i + 1, status);
3434 			goto init_ports_fail;
3435 		}
3436 	}
3437 
3438 	/* Free up the memory for Hermon port init struct(s), return success */
3439 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3440 	return (DDI_SUCCESS);
3441 
3442 init_ports_fail:
3443 	/*
3444 	 * Free up the memory for Hermon port init struct(s), shutdown any
3445 	 * successfully initialized ports, and return failure
3446 	 */
3447 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3448 	(void) hermon_hca_ports_shutdown(state, i);
3449 
3450 	return (DDI_FAILURE);
3451 }
3452 
3453 
3454 /*
3455  * hermon_hca_ports_shutdown()
3456  *    Context: Only called from attach() and/or detach() path contexts
3457  */
3458 static int
3459 hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init)
3460 {
3461 	int	i, status;
3462 
3463 	/*
3464 	 * Post commands to shutdown all init'd Hermon HCA ports.  Note: if
3465 	 * any of these commands fail for any reason, it would be entirely
3466 	 * unexpected and probably indicative a serious problem (HW or SW).
3467 	 * Although we do return void from this function, this type of failure
3468 	 * should not go unreported.  That is why we have the warning message.
3469 	 */
3470 	for (i = 0; i < num_init; i++) {
3471 		status = hermon_close_port_cmd_post(state, i + 1,
3472 		    HERMON_CMD_NOSLEEP_SPIN);
3473 		if (status != HERMON_CMD_SUCCESS) {
3474 			HERMON_WARNING(state, "failed to shutdown HCA port");
3475 			return (status);
3476 		}
3477 	}
3478 	return (HERMON_CMD_SUCCESS);
3479 }
3480 
3481 
3482 /*
3483  * hermon_internal_uarpg_init
3484  *    Context: Only called from attach() path context
3485  */
3486 static int
3487 hermon_internal_uarpg_init(hermon_state_t *state)
3488 {
3489 	int	status;
3490 	hermon_dbr_info_t 	*info;
3491 
3492 
3493 	/*
3494 	 * Allocate the UAR page for kernel use. This UAR page is
3495 	 * the privileged UAR page through which all kernel generated
3496 	 * doorbells will be rung. There are a number of UAR pages
3497 	 * reserved by hardware at the front of the UAR BAR, indicated
3498 	 * by DEVCAP.num_rsvd_uar, which we have already allocated. So,
3499 	 * the kernel page, or UAR page index num_rsvd_uar, will be
3500 	 * allocated here for kernel use.
3501 	 */
3502 
3503 	status = hermon_rsrc_alloc(state, HERMON_UARPG, 1, HERMON_SLEEP,
3504 	    &state->hs_uarkpg_rsrc);
3505 	if (status != DDI_SUCCESS) {
3506 		return (DDI_FAILURE);
3507 	}
3508 
3509 	/* Setup pointer to kernel UAR page */
3510 	state->hs_uar = (hermon_hw_uar_t *)state->hs_uarkpg_rsrc->hr_addr;
3511 
3512 	/* need to set up DBr tracking as well */
3513 	status = hermon_dbr_page_alloc(state, &info);
3514 	if (status != DDI_SUCCESS) {
3515 		return (DDI_FAILURE);
3516 	}
3517 
3518 	/* store the page pointer in the private area - the rest s/b done */
3519 	state->hs_kern_dbr = info->dbr_page;
3520 	return (DDI_SUCCESS);
3521 }
3522 
3523 
3524 /*
3525  * hermon_internal_uarpg_fini
3526  *    Context: Only called from attach() and/or detach() path contexts
3527  */
3528 static void
3529 hermon_internal_uarpg_fini(hermon_state_t *state)
3530 {
3531 
3532 	/* Free up Hermon UAR page #1 (kernel driver doorbells) */
3533 	hermon_rsrc_free(state, &state->hs_uarkpg_rsrc);
3534 
3535 }
3536 
3537 
3538 /*
3539  * hermon_special_qp_contexts_reserve()
3540  *    Context: Only called from attach() path context
3541  */
3542 static int
3543 hermon_special_qp_contexts_reserve(hermon_state_t *state)
3544 {
3545 	hermon_rsrc_t	*qp0_rsrc, *qp1_rsrc, *qp_resvd;
3546 	int		status;
3547 
3548 
3549 	/* Initialize the lock used for special QP rsrc management */
3550 	mutex_init(&state->hs_spec_qplock, NULL, MUTEX_DRIVER,
3551 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3552 
3553 	/*
3554 	 * Reserve contexts for QP0.  These QP contexts will be setup to
3555 	 * act as aliases for the real QP0.  Note: We are required to grab
3556 	 * two QPs (one per port) even if we are operating in single-port
3557 	 * mode.
3558 	 */
3559 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3560 	    HERMON_SLEEP, &qp0_rsrc);
3561 	if (status != DDI_SUCCESS) {
3562 		mutex_destroy(&state->hs_spec_qplock);
3563 		return (DDI_FAILURE);
3564 	}
3565 	state->hs_spec_qp0 = qp0_rsrc;
3566 
3567 	/*
3568 	 * Reserve contexts for QP1.  These QP contexts will be setup to
3569 	 * act as aliases for the real QP1.  Note: We are required to grab
3570 	 * two QPs (one per port) even if we are operating in single-port
3571 	 * mode.
3572 	 */
3573 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3574 	    HERMON_SLEEP, &qp1_rsrc);
3575 	if (status != DDI_SUCCESS) {
3576 		hermon_rsrc_free(state, &qp0_rsrc);
3577 		mutex_destroy(&state->hs_spec_qplock);
3578 		return (DDI_FAILURE);
3579 	}
3580 	state->hs_spec_qp1 = qp1_rsrc;
3581 
3582 	status = hermon_rsrc_alloc(state, HERMON_QPC, 4,
3583 	    HERMON_SLEEP, &qp_resvd);
3584 	if (status != DDI_SUCCESS) {
3585 		hermon_rsrc_free(state, &qp1_rsrc);
3586 		hermon_rsrc_free(state, &qp0_rsrc);
3587 		mutex_destroy(&state->hs_spec_qplock);
3588 		return (DDI_FAILURE);
3589 	}
3590 	state->hs_spec_qp_unused = qp_resvd;
3591 
3592 	return (DDI_SUCCESS);
3593 }
3594 
3595 
3596 /*
3597  * hermon_special_qp_contexts_unreserve()
3598  *    Context: Only called from attach() and/or detach() path contexts
3599  */
3600 static void
3601 hermon_special_qp_contexts_unreserve(hermon_state_t *state)
3602 {
3603 
3604 	/* Unreserve contexts for spec_qp_unused */
3605 	hermon_rsrc_free(state, &state->hs_spec_qp_unused);
3606 
3607 	/* Unreserve contexts for QP1 */
3608 	hermon_rsrc_free(state, &state->hs_spec_qp1);
3609 
3610 	/* Unreserve contexts for QP0 */
3611 	hermon_rsrc_free(state, &state->hs_spec_qp0);
3612 
3613 	/* Destroy the lock used for special QP rsrc management */
3614 	mutex_destroy(&state->hs_spec_qplock);
3615 
3616 }
3617 
3618 
3619 /*
3620  * hermon_sw_reset()
3621  *    Context: Currently called only from attach() path context
3622  */
3623 static int
3624 hermon_sw_reset(hermon_state_t *state)
3625 {
3626 	ddi_acc_handle_t	hdl = hermon_get_pcihdl(state);
3627 	ddi_acc_handle_t	cmdhdl = hermon_get_cmdhdl(state);
3628 	uint32_t		reset_delay;
3629 	int			status, i;
3630 	uint32_t		sem;
3631 	uint_t			offset;
3632 	uint32_t		data32;		/* for devctl & linkctl */
3633 	int			loopcnt;
3634 
3635 	/* initialize the FMA retry loop */
3636 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
3637 	hermon_pio_init(fm_loop_cnt2, fm_status2, fm_test2);
3638 
3639 	/*
3640 	 * If the configured software reset delay is set to zero, then we
3641 	 * will not attempt a software reset of the Hermon device.
3642 	 */
3643 	reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
3644 	if (reset_delay == 0) {
3645 		return (DDI_SUCCESS);
3646 	}
3647 
3648 	/* the FMA retry loop starts. */
3649 	hermon_pio_start(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3650 	    fm_test);
3651 	hermon_pio_start(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3652 	    fm_test2);
3653 
3654 	/* Query the PCI capabilities of the HCA device */
3655 	/* but don't process the VPD until after reset */
3656 	status = hermon_pci_capability_list(state, hdl);
3657 	if (status != DDI_SUCCESS) {
3658 		cmn_err(CE_NOTE, "failed to get pci capabilities list(0x%x)\n",
3659 		    status);
3660 		return (DDI_FAILURE);
3661 	}
3662 
3663 	/*
3664 	 * Read all PCI config info (reg0...reg63).  Note: According to the
3665 	 * Hermon software reset application note, we should not read or
3666 	 * restore the values in reg22 and reg23.
3667 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
3668 	 * register LAST, and technically, you need to restore the
3669 	 * PCIE Capability "device control" and "link control" (word-sized,
3670 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
3671 	 * We hold off restoring the command register - offset 0x4 - till last
3672 	 */
3673 
3674 	/* 1st, wait for the semaphore assure accessibility - per PRM */
3675 	status = -1;
3676 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
3677 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
3678 		if (sem == 0) {
3679 			status = 0;
3680 			break;
3681 		}
3682 		drv_usecwait(1);
3683 	}
3684 
3685 	/* Check if timeout happens */
3686 	if (status == -1) {
3687 		/*
3688 		 * Remove this acc handle from Hermon, then log
3689 		 * the error.
3690 		 */
3691 		hermon_pci_config_teardown(state, &hdl);
3692 
3693 		cmn_err(CE_WARN, "hermon_sw_reset timeout: "
3694 		    "failed to get the semaphore(0x%p)\n",
3695 		    (void *)state->hs_cmd_regs.sw_semaphore);
3696 
3697 		hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_NON_FATAL);
3698 		return (DDI_FAILURE);
3699 	}
3700 
3701 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3702 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3703 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3704 			state->hs_cfg_data[i]  = pci_config_get32(hdl, i << 2);
3705 		}
3706 	}
3707 
3708 	/*
3709 	 * Perform the software reset (by writing 1 at offset 0xF0010)
3710 	 */
3711 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
3712 
3713 	/*
3714 	 * This delay is required so as not to cause a panic here. If the
3715 	 * device is accessed too soon after reset it will not respond to
3716 	 * config cycles, causing a Master Abort and panic.
3717 	 */
3718 	drv_usecwait(reset_delay);
3719 
3720 	/*
3721 	 * Poll waiting for the device to finish resetting.
3722 	 */
3723 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
3724 	while ((pci_config_get32(hdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
3725 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
3726 		if (--loopcnt == 0)
3727 			break;	/* just in case, break and go on */
3728 	}
3729 	if (loopcnt == 0)
3730 		cmn_err(CE_CONT, "!Never see VEND_ID - read == %X",
3731 		    pci_config_get32(hdl, 0));
3732 
3733 	/*
3734 	 * Restore the config info
3735 	 */
3736 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3737 		if (i == 1) continue;	/* skip the status/ctrl reg */
3738 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3739 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3740 			pci_config_put32(hdl, i << 2, state->hs_cfg_data[i]);
3741 		}
3742 	}
3743 
3744 	/*
3745 	 * PCI Express Capability - we saved during capability list, and
3746 	 * we'll restore them here.
3747 	 */
3748 	offset = state->hs_pci_cap_offset;
3749 	data32 = state->hs_pci_cap_devctl;
3750 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
3751 	data32 = state->hs_pci_cap_lnkctl;
3752 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
3753 
3754 	pci_config_put32(hdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
3755 
3756 	/* the FMA retry loop ends. */
3757 	hermon_pio_end(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3758 	    fm_test2);
3759 	hermon_pio_end(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3760 	    fm_test);
3761 
3762 	return (DDI_SUCCESS);
3763 
3764 pio_error2:
3765 	/* fall through */
3766 pio_error:
3767 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_NON_FATAL);
3768 	return (DDI_FAILURE);
3769 }
3770 
3771 
3772 /*
3773  * hermon_mcg_init()
3774  *    Context: Only called from attach() path context
3775  */
3776 static int
3777 hermon_mcg_init(hermon_state_t *state)
3778 {
3779 	uint_t		mcg_tmp_sz;
3780 
3781 
3782 	/*
3783 	 * Allocate space for the MCG temporary copy buffer.  This is
3784 	 * used by the Attach/Detach Multicast Group code
3785 	 */
3786 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3787 	state->hs_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);
3788 
3789 	/*
3790 	 * Initialize the multicast group mutex.  This ensures atomic
3791 	 * access to add, modify, and remove entries in the multicast
3792 	 * group hash lists.
3793 	 */
3794 	mutex_init(&state->hs_mcglock, NULL, MUTEX_DRIVER,
3795 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3796 
3797 	return (DDI_SUCCESS);
3798 }
3799 
3800 
3801 /*
3802  * hermon_mcg_fini()
3803  *    Context: Only called from attach() and/or detach() path contexts
3804  */
3805 static void
3806 hermon_mcg_fini(hermon_state_t *state)
3807 {
3808 	uint_t		mcg_tmp_sz;
3809 
3810 
3811 	/* Free up the space used for the MCG temporary copy buffer */
3812 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3813 	kmem_free(state->hs_mcgtmp, mcg_tmp_sz);
3814 
3815 	/* Destroy the multicast group mutex */
3816 	mutex_destroy(&state->hs_mcglock);
3817 
3818 }
3819 
3820 
3821 /*
3822  * hermon_fw_version_check()
3823  *    Context: Only called from attach() path context
3824  */
3825 static int
3826 hermon_fw_version_check(hermon_state_t *state)
3827 {
3828 
3829 	uint_t	hermon_fw_ver_major;
3830 	uint_t	hermon_fw_ver_minor;
3831 	uint_t	hermon_fw_ver_subminor;
3832 
3833 #ifdef FMA_TEST
3834 	if (hermon_test_num == -1) {
3835 		return (DDI_FAILURE);
3836 	}
3837 #endif
3838 
3839 	/*
3840 	 * Depending on which version of driver we have attached, and which
3841 	 * HCA we've attached, the firmware version checks will be different.
3842 	 * We set up the comparison values for both Arbel and Sinai HCAs.
3843 	 */
3844 	switch (state->hs_operational_mode) {
3845 	case HERMON_HCA_MODE:
3846 		hermon_fw_ver_major = HERMON_FW_VER_MAJOR;
3847 		hermon_fw_ver_minor = HERMON_FW_VER_MINOR;
3848 		hermon_fw_ver_subminor = HERMON_FW_VER_SUBMINOR;
3849 		break;
3850 
3851 	default:
3852 		return (DDI_FAILURE);
3853 	}
3854 
3855 	/*
3856 	 * If FW revision major number is less than acceptable,
3857 	 * return failure, else if greater return success.  If
3858 	 * the major numbers are equal than check the minor number
3859 	 */
3860 	if (state->hs_fw.fw_rev_major < hermon_fw_ver_major) {
3861 		return (DDI_FAILURE);
3862 	} else if (state->hs_fw.fw_rev_major > hermon_fw_ver_major) {
3863 		return (DDI_SUCCESS);
3864 	}
3865 
3866 	/*
3867 	 * Do the same check as above, except for minor revision numbers
3868 	 * If the minor numbers are equal than check the subminor number
3869 	 */
3870 	if (state->hs_fw.fw_rev_minor < hermon_fw_ver_minor) {
3871 		return (DDI_FAILURE);
3872 	} else if (state->hs_fw.fw_rev_minor > hermon_fw_ver_minor) {
3873 		return (DDI_SUCCESS);
3874 	}
3875 
3876 	/*
3877 	 * Once again we do the same check as above, except for the subminor
3878 	 * revision number.  If the subminor numbers are equal here, then
3879 	 * these are the same firmware version, return success
3880 	 */
3881 	if (state->hs_fw.fw_rev_subminor < hermon_fw_ver_subminor) {
3882 		return (DDI_FAILURE);
3883 	} else if (state->hs_fw.fw_rev_subminor > hermon_fw_ver_subminor) {
3884 		return (DDI_SUCCESS);
3885 	}
3886 
3887 	return (DDI_SUCCESS);
3888 }
3889 
3890 
3891 /*
3892  * hermon_device_info_report()
3893  *    Context: Only called from attach() path context
3894  */
3895 static void
3896 hermon_device_info_report(hermon_state_t *state)
3897 {
3898 
3899 	cmn_err(CE_CONT, "?hermon%d: FW ver: %04d.%04d.%04d, "
3900 	    "HW rev: %02d\n", state->hs_instance, state->hs_fw.fw_rev_major,
3901 	    state->hs_fw.fw_rev_minor, state->hs_fw.fw_rev_subminor,
3902 	    state->hs_revision_id);
3903 	cmn_err(CE_CONT, "?hermon%d: %64s (0x%016" PRIx64 ")\n",
3904 	    state->hs_instance, state->hs_nodedesc, state->hs_nodeguid);
3905 
3906 }
3907 
3908 
3909 /*
3910  * hermon_pci_capability_list()
3911  *    Context: Only called from attach() path context
3912  */
3913 static int
3914 hermon_pci_capability_list(hermon_state_t *state, ddi_acc_handle_t hdl)
3915 {
3916 	uint_t		offset, data;
3917 	uint32_t	data32;
3918 
3919 	state->hs_pci_cap_offset = 0;		/* make sure it's cleared */
3920 
3921 	/*
3922 	 * Check for the "PCI Capabilities" bit in the "Status Register".
3923 	 * Bit 4 in this register indicates the presence of a "PCI
3924 	 * Capabilities" list.
3925 	 *
3926 	 * PCI-Express requires this bit to be set to 1.
3927 	 */
3928 	data = pci_config_get16(hdl, 0x06);
3929 	if ((data & 0x10) == 0) {
3930 		return (DDI_FAILURE);
3931 	}
3932 
3933 	/*
3934 	 * Starting from offset 0x34 in PCI config space, find the
3935 	 * head of "PCI capabilities" list, and walk the list.  If
3936 	 * capabilities of a known type are encountered (e.g.
3937 	 * "PCI-X Capability"), then call the appropriate handler
3938 	 * function.
3939 	 */
3940 	offset = pci_config_get8(hdl, 0x34);
3941 	while (offset != 0x0) {
3942 		data = pci_config_get8(hdl, offset);
3943 		/*
3944 		 * Check for known capability types.  Hermon has the
3945 		 * following:
3946 		 *    o Power Mgmt	 (0x02)
3947 		 *    o VPD Capability   (0x03)
3948 		 *    o PCI-E Capability (0x10)
3949 		 *    o MSIX Capability  (0x11)
3950 		 */
3951 		switch (data) {
3952 		case 0x01:
3953 			/* power mgmt handling */
3954 			break;
3955 		case 0x03:
3956 
3957 /*
3958  * Reading the PCIe VPD is inconsistent - that is, sometimes causes
3959  * problems on (mostly) X64, though we've also seen problems w/ Sparc
3960  * and Tavor --- so, for now until it's root caused, don't try and
3961  * read it
3962  */
3963 #ifdef HERMON_VPD_WORKS
3964 			hermon_pci_capability_vpd(state, hdl, offset);
3965 #else
3966 			delay(100);
3967 			hermon_pci_capability_vpd(state, hdl, offset);
3968 #endif
3969 			break;
3970 		case 0x10:
3971 			/*
3972 			 * PCI Express Capability - save offset & contents
3973 			 * for later in reset
3974 			 */
3975 			state->hs_pci_cap_offset = offset;
3976 			data32 = pci_config_get32(hdl,
3977 			    offset + HERMON_PCI_CAP_DEV_OFFS);
3978 			state->hs_pci_cap_devctl = data32;
3979 			data32 = pci_config_get32(hdl,
3980 			    offset + HERMON_PCI_CAP_LNK_OFFS);
3981 			state->hs_pci_cap_lnkctl = data32;
3982 			break;
3983 		case 0x11:
3984 			/*
3985 			 * MSIX support - nothing to do, taken care of in the
3986 			 * MSI/MSIX interrupt frameworkd
3987 			 */
3988 			break;
3989 		default:
3990 			/* just go on to the next */
3991 			break;
3992 		}
3993 
3994 		/* Get offset of next entry in list */
3995 		offset = pci_config_get8(hdl, offset + 1);
3996 	}
3997 
3998 	return (DDI_SUCCESS);
3999 }
4000 
4001 /*
4002  * hermon_pci_read_vpd()
4003  *    Context: Only called from attach() path context
4004  *    utility routine for hermon_pci_capability_vpd()
4005  */
4006 static int
4007 hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
4008     uint32_t *data)
4009 {
4010 	int		retry = 40;  /* retry counter for EEPROM poll */
4011 	uint32_t	val;
4012 	int		vpd_addr = offset + 2;
4013 	int		vpd_data = offset + 4;
4014 
4015 	/*
4016 	 * In order to read a 32-bit value from VPD, we are to write down
4017 	 * the address (offset in the VPD itself) to the address register.
4018 	 * To signal the read, we also clear bit 31.  We then poll on bit 31
4019 	 * and when it is set, we can then read our 4 bytes from the data
4020 	 * register.
4021 	 */
4022 	(void) pci_config_put32(hdl, offset, addr << 16);
4023 	do {
4024 		drv_usecwait(1000);
4025 		val = pci_config_get16(hdl, vpd_addr);
4026 		if (val & 0x8000) {		/* flag bit set */
4027 			*data = pci_config_get32(hdl, vpd_data);
4028 			return (DDI_SUCCESS);
4029 		}
4030 	} while (--retry);
4031 	/* read of flag failed write one message but count the failures */
4032 	if (debug_vpd == 0)
4033 		cmn_err(CE_NOTE,
4034 		    "!Failed to see flag bit after VPD addr write\n");
4035 	debug_vpd++;
4036 
4037 
4038 vpd_read_fail:
4039 	return (DDI_FAILURE);
4040 }
4041 
4042 
4043 
4044 /*
4045  *   hermon_pci_capability_vpd()
4046  *    Context: Only called from attach() path context
4047  */
4048 static void
4049 hermon_pci_capability_vpd(hermon_state_t *state, ddi_acc_handle_t hdl,
4050     uint_t offset)
4051 {
4052 	uint8_t			name_length;
4053 	uint8_t			pn_length;
4054 	int			i, err = 0;
4055 	int			vpd_str_id = 0;
4056 	int			vpd_ro_desc;
4057 	int			vpd_ro_pn_desc;
4058 #ifdef _BIG_ENDIAN
4059 	uint32_t		data32;
4060 #endif /* _BIG_ENDIAN */
4061 	union {
4062 		uint32_t	vpd_int[HERMON_VPD_HDR_DWSIZE];
4063 		uchar_t		vpd_char[HERMON_VPD_HDR_BSIZE];
4064 	} vpd;
4065 
4066 
4067 	/*
4068 	 * Read in the Vital Product Data (VPD) to the extend needed
4069 	 * by the fwflash utility
4070 	 */
4071 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4072 		err = hermon_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
4073 		if (err != DDI_SUCCESS) {
4074 			cmn_err(CE_NOTE, "!VPD read failed\n");
4075 			goto out;
4076 		}
4077 	}
4078 
4079 #ifdef _BIG_ENDIAN
4080 	/* Need to swap bytes for big endian. */
4081 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4082 		data32 = vpd.vpd_int[i];
4083 		vpd.vpd_char[(i << 2) + 3] =
4084 		    (uchar_t)((data32 & 0xFF000000) >> 24);
4085 		vpd.vpd_char[(i << 2) + 2] =
4086 		    (uchar_t)((data32 & 0x00FF0000) >> 16);
4087 		vpd.vpd_char[(i << 2) + 1] =
4088 		    (uchar_t)((data32 & 0x0000FF00) >> 8);
4089 		vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
4090 	}
4091 #endif	/* _BIG_ENDIAN */
4092 
4093 	/* Check for VPD String ID Tag */
4094 	if (vpd.vpd_char[vpd_str_id] == 0x82) {
4095 		/* get the product name */
4096 		name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
4097 		if (name_length > sizeof (state->hs_hca_name)) {
4098 			cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
4099 			    name_length);
4100 			goto out;
4101 		}
4102 		(void) memcpy(state->hs_hca_name, &vpd.vpd_char[vpd_str_id + 3],
4103 		    name_length);
4104 		state->hs_hca_name[name_length] = 0;
4105 
4106 		/* get the part number */
4107 		vpd_ro_desc = name_length + 3; /* read-only tag location */
4108 		vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */
4109 
4110 		/* Verify read-only tag and Part Number keyword. */
4111 		if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
4112 		    (vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
4113 		    vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
4114 			cmn_err(CE_NOTE, "!VPD Part Number not found\n");
4115 			goto out;
4116 		}
4117 
4118 		pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
4119 		if (pn_length > sizeof (state->hs_hca_pn)) {
4120 			cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
4121 			    name_length);
4122 			goto out;
4123 		}
4124 		(void) memcpy(state->hs_hca_pn,
4125 		    &vpd.vpd_char[vpd_ro_pn_desc + 3],
4126 		    pn_length);
4127 		state->hs_hca_pn[pn_length] = 0;
4128 		state->hs_hca_pn_len = pn_length;
4129 		cmn_err(CE_CONT, "!vpd %s\n", state->hs_hca_pn);
4130 	} else {
4131 		/* Wrong VPD String ID Tag */
4132 		cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
4133 		    vpd.vpd_char[0]);
4134 		goto out;
4135 	}
4136 	return;
4137 out:
4138 	state->hs_hca_pn_len = 0;
4139 }
4140 
4141 
4142 
4143 /*
4144  * hermon_intr_or_msi_init()
4145  *    Context: Only called from attach() path context
4146  */
4147 static int
4148 hermon_intr_or_msi_init(hermon_state_t *state)
4149 {
4150 	int	status;
4151 
4152 
4153 	/* Query for the list of supported interrupt event types */
4154 	status = ddi_intr_get_supported_types(state->hs_dip,
4155 	    &state->hs_intr_types_avail);
4156 	if (status != DDI_SUCCESS) {
4157 		return (DDI_FAILURE);
4158 	}
4159 
4160 	/*
4161 	 * If Hermon supports MSI-X in this system (and, if it
4162 	 * hasn't been overridden by a configuration variable), then
4163 	 * the default behavior is to use a single MSI-X.  Otherwise,
4164 	 * fallback to using legacy interrupts.  Also, if MSI-X is chosen,
4165 	 * but fails for whatever reasons, then next try MSI
4166 	 */
4167 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4168 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4169 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSIX);
4170 		if (status == DDI_SUCCESS) {
4171 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSIX;
4172 			return (DDI_SUCCESS);
4173 		}
4174 	}
4175 
4176 	/*
4177 	 * If Hermon supports MSI in this system (and, if it
4178 	 * hasn't been overridden by a configuration variable), then
4179 	 * the default behavior is to use a single MSIX.  Otherwise,
4180 	 * fallback to using legacy interrupts.  Also, if MSI is chosen,
4181 	 * but fails for whatever reasons, then fallback to using legacy
4182 	 * interrupts.
4183 	 */
4184 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4185 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSI)) {
4186 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSI);
4187 		if (status == DDI_SUCCESS) {
4188 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSI;
4189 			return (DDI_SUCCESS);
4190 		}
4191 	}
4192 
4193 	/*
4194 	 * MSI interrupt allocation failed, or was not available.  Fallback to
4195 	 * legacy interrupt support.
4196 	 */
4197 	if (state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED) {
4198 		status = hermon_add_intrs(state, DDI_INTR_TYPE_FIXED);
4199 		if (status == DDI_SUCCESS) {
4200 			state->hs_intr_type_chosen = DDI_INTR_TYPE_FIXED;
4201 			return (DDI_SUCCESS);
4202 		}
4203 	}
4204 
4205 	/*
4206 	 * None of MSI, MSI-X, nor legacy interrupts were successful.
4207 	 * Return failure.
4208 	 */
4209 	return (DDI_FAILURE);
4210 }
4211 
4212 /*
4213  * hermon_add_intrs()
4214  *    Context: Only called from attach() patch context
4215  */
4216 static int
4217 hermon_add_intrs(hermon_state_t *state, int intr_type)
4218 {
4219 	int	status;
4220 
4221 
4222 	/* Get number of interrupts/MSI supported */
4223 	status = ddi_intr_get_nintrs(state->hs_dip, intr_type,
4224 	    &state->hs_intrmsi_count);
4225 	if (status != DDI_SUCCESS) {
4226 		return (DDI_FAILURE);
4227 	}
4228 
4229 	/* Get number of available interrupts/MSI */
4230 	status = ddi_intr_get_navail(state->hs_dip, intr_type,
4231 	    &state->hs_intrmsi_avail);
4232 	if (status != DDI_SUCCESS) {
4233 		return (DDI_FAILURE);
4234 	}
4235 
4236 	/* Ensure that we have at least one (1) usable MSI or interrupt */
4237 	if ((state->hs_intrmsi_avail < 1) || (state->hs_intrmsi_count < 1)) {
4238 		return (DDI_FAILURE);
4239 	}
4240 
4241 	/* Attempt to allocate the maximum #interrupt/MSI handles */
4242 	status = ddi_intr_alloc(state->hs_dip, &state->hs_intrmsi_hdl[0],
4243 	    intr_type, 0, min(HERMON_MSIX_MAX, state->hs_intrmsi_avail),
4244 	    &state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL);
4245 	if (status != DDI_SUCCESS) {
4246 		return (DDI_FAILURE);
4247 	}
4248 
4249 	/* Ensure that we have allocated at least one (1) MSI or interrupt */
4250 	if (state->hs_intrmsi_allocd < 1) {
4251 		return (DDI_FAILURE);
4252 	}
4253 	state->hs_eq_dist = state->hs_intrmsi_allocd - 1; /* start at 0 */
4254 
4255 	/*
4256 	 * Extract the priority for the allocated interrupt/MSI.  This
4257 	 * will be used later when initializing certain mutexes.
4258 	 */
4259 	status = ddi_intr_get_pri(state->hs_intrmsi_hdl[0],
4260 	    &state->hs_intrmsi_pri);
4261 	if (status != DDI_SUCCESS) {
4262 		/* Free the allocated interrupt/MSI handle */
4263 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4264 
4265 		return (DDI_FAILURE);
4266 	}
4267 
4268 	/* Make sure the interrupt/MSI priority is below 'high level' */
4269 	if (state->hs_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
4270 		/* Free the allocated interrupt/MSI handle */
4271 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4272 
4273 		return (DDI_FAILURE);
4274 	}
4275 
4276 	/* Get add'l capability information regarding interrupt/MSI */
4277 	status = ddi_intr_get_cap(state->hs_intrmsi_hdl[0],
4278 	    &state->hs_intrmsi_cap);
4279 	if (status != DDI_SUCCESS) {
4280 		/* Free the allocated interrupt/MSI handle */
4281 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4282 
4283 		return (DDI_FAILURE);
4284 	}
4285 
4286 	return (DDI_SUCCESS);
4287 }
4288 
4289 
4290 /*
4291  * hermon_intr_or_msi_fini()
4292  *    Context: Only called from attach() and/or detach() path contexts
4293  */
4294 static int
4295 hermon_intr_or_msi_fini(hermon_state_t *state)
4296 {
4297 	int	status;
4298 	int	intr;
4299 
4300 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
4301 
4302 		/* Free the allocated interrupt/MSI handle */
4303 		status = ddi_intr_free(state->hs_intrmsi_hdl[intr]);
4304 		if (status != DDI_SUCCESS) {
4305 			return (DDI_FAILURE);
4306 		}
4307 	}
4308 	return (DDI_SUCCESS);
4309 }
4310 
4311 
4312 /*ARGSUSED*/
4313 void
4314 hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
4315     uint_t offset)
4316 {
4317 	uint32_t	msix_data;
4318 	uint16_t	msg_cntr;
4319 	uint32_t	t_offset;	/* table offset */
4320 	uint32_t	t_bir;
4321 	uint32_t	p_offset;	/* pba */
4322 	uint32_t	p_bir;
4323 	int		t_size;		/* size in entries - each is 4 dwords */
4324 
4325 	/* come in with offset pointing at the capability structure */
4326 
4327 	msix_data = pci_config_get32(hdl, offset);
4328 	cmn_err(CE_CONT, "Full cap structure dword = %X\n", msix_data);
4329 	msg_cntr =  pci_config_get16(hdl, offset+2);
4330 	cmn_err(CE_CONT, "MSIX msg_control = %X\n", msg_cntr);
4331 	offset += 4;
4332 	msix_data = pci_config_get32(hdl, offset);	/* table info */
4333 	t_offset = (msix_data & 0xFFF8) >> 3;
4334 	t_bir = msix_data & 0x07;
4335 	offset += 4;
4336 	cmn_err(CE_CONT, "  table %X --offset = %X, bir(bar) = %X\n",
4337 	    msix_data, t_offset, t_bir);
4338 	msix_data = pci_config_get32(hdl, offset);	/* PBA info */
4339 	p_offset = (msix_data & 0xFFF8) >> 3;
4340 	p_bir = msix_data & 0x07;
4341 
4342 	cmn_err(CE_CONT, "  PBA   %X --offset = %X, bir(bar) = %X\n",
4343 	    msix_data, p_offset, p_bir);
4344 	t_size = msg_cntr & 0x7FF;		/* low eleven bits */
4345 	cmn_err(CE_CONT, "    table size = %X entries\n", t_size);
4346 
4347 	offset = t_offset;		/* reuse this for offset from BAR */
4348 #ifdef HERMON_SUPPORTS_MSIX_BAR
4349 	cmn_err(CE_CONT, "First 2 table entries behind BAR2 \n");
4350 	for (i = 0; i < 2; i++) {
4351 		for (j = 0; j < 4; j++, offset += 4) {
4352 			msix_data = ddi_get32(state->hs_reg_msihdl,
4353 			    (uint32_t *)((uintptr_t)state->hs_reg_msi_baseaddr
4354 			    + offset));
4355 			cmn_err(CE_CONT, "MSI table entry %d, dword %d == %X\n",
4356 			    i, j, msix_data);
4357 		}
4358 	}
4359 #endif
4360 
4361 }
4362 
4363 /*
4364  * X86 fastreboot support functions.
4365  * These functions are used to save/restore MSI-X table/PBA and also
4366  * to disable MSI-X interrupts in hermon_quiesce().
4367  */
4368 
4369 /* Return the message control for MSI-X */
4370 static ushort_t
4371 get_msix_ctrl(dev_info_t *dip)
4372 {
4373 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4374 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4375 	    DEVI(dip)->devi_instance);
4376 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4377 	ASSERT(pci_cfg_hdl != NULL);
4378 
4379 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4380 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4381 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4382 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4383 			return (0);
4384 	}
4385 	ASSERT(msix_ctrl != 0);
4386 
4387 	return (msix_ctrl);
4388 }
4389 
4390 /* Return the MSI-X table size */
4391 static size_t
4392 get_msix_tbl_size(dev_info_t *dip)
4393 {
4394 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4395 	ASSERT(msix_ctrl != 0);
4396 
4397 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4398 	    PCI_MSIX_VECTOR_SIZE);
4399 }
4400 
4401 /* Return the MSI-X PBA size */
4402 static size_t
4403 get_msix_pba_size(dev_info_t *dip)
4404 {
4405 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4406 	ASSERT(msix_ctrl != 0);
4407 
4408 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8);
4409 }
4410 
4411 /* Set up the MSI-X table/PBA save area */
4412 static void
4413 hermon_set_msix_info(hermon_state_t *state)
4414 {
4415 	uint_t			rnumber, breg, nregs;
4416 	ushort_t		caps_ctrl, msix_ctrl;
4417 	pci_regspec_t		*rp;
4418 	int			reg_size, addr_space, offset, *regs_list, i;
4419 
4420 	/*
4421 	 * MSI-X BIR Index Table:
4422 	 * BAR indicator register (BIR) to Base Address register.
4423 	 */
4424 	uchar_t pci_msix_bir_index[8] = {0x10, 0x14, 0x18, 0x1c,
4425 	    0x20, 0x24, 0xff, 0xff};
4426 
4427 	/* Fastreboot data access  attribute */
4428 	ddi_device_acc_attr_t	dev_attr = {
4429 		0,				/* version */
4430 		DDI_STRUCTURE_LE_ACC,
4431 		DDI_STRICTORDER_ACC,		/* attr access */
4432 		0
4433 	};
4434 
4435 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4436 	ASSERT(pci_cfg_hdl != NULL);
4437 
4438 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4439 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4440 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4441 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4442 			return;
4443 	}
4444 	ASSERT(msix_ctrl != 0);
4445 
4446 	state->hs_msix_tbl_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4447 	    PCI_MSIX_TBL_OFFSET);
4448 
4449 	/* Get the BIR for MSI-X table */
4450 	breg = pci_msix_bir_index[state->hs_msix_tbl_offset &
4451 	    PCI_MSIX_TBL_BIR_MASK];
4452 	ASSERT(breg != 0xFF);
4453 
4454 	/* Set the MSI-X table offset */
4455 	state->hs_msix_tbl_offset = state->hs_msix_tbl_offset &
4456 	    ~PCI_MSIX_TBL_BIR_MASK;
4457 
4458 	/* Set the MSI-X table size */
4459 	state->hs_msix_tbl_size = ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4460 	    PCI_MSIX_VECTOR_SIZE;
4461 
4462 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
4463 	    DDI_PROP_DONTPASS, "reg", (int **)&regs_list, &nregs) !=
4464 	    DDI_PROP_SUCCESS) {
4465 		return;
4466 	}
4467 	reg_size = sizeof (pci_regspec_t) / sizeof (int);
4468 
4469 	/* Check the register number for MSI-X table */
4470 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4471 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4472 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4473 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4474 
4475 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4476 		    (addr_space == PCI_ADDR_MEM64))) {
4477 			rnumber = i;
4478 			break;
4479 		}
4480 	}
4481 	ASSERT(rnumber != 0);
4482 	state->hs_msix_tbl_rnumber = rnumber;
4483 
4484 	/* Set device attribute version and access according to Hermon FM */
4485 	dev_attr.devacc_attr_version = hermon_devacc_attr_version(state);
4486 	dev_attr.devacc_attr_access = hermon_devacc_attr_access(state);
4487 
4488 	/* Map the entire MSI-X vector table */
4489 	if (hermon_regs_map_setup(state, state->hs_msix_tbl_rnumber,
4490 	    (caddr_t *)&state->hs_msix_tbl_addr, state->hs_msix_tbl_offset,
4491 	    state->hs_msix_tbl_size, &dev_attr,
4492 	    &state->hs_fm_msix_tblhdl) != DDI_SUCCESS) {
4493 		return;
4494 	}
4495 
4496 	state->hs_msix_pba_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4497 	    PCI_MSIX_PBA_OFFSET);
4498 
4499 	/* Get the BIR for MSI-X PBA */
4500 	breg = pci_msix_bir_index[state->hs_msix_pba_offset &
4501 	    PCI_MSIX_PBA_BIR_MASK];
4502 	ASSERT(breg != 0xFF);
4503 
4504 	/* Set the MSI-X PBA offset */
4505 	state->hs_msix_pba_offset = state->hs_msix_pba_offset &
4506 	    ~PCI_MSIX_PBA_BIR_MASK;
4507 
4508 	/* Set the MSI-X PBA size */
4509 	state->hs_msix_pba_size =
4510 	    ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8;
4511 
4512 	/* Check the register number for MSI-X PBA */
4513 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4514 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4515 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4516 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4517 
4518 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4519 		    (addr_space == PCI_ADDR_MEM64))) {
4520 			rnumber = i;
4521 			break;
4522 		}
4523 	}
4524 	ASSERT(rnumber != 0);
4525 	state->hs_msix_pba_rnumber = rnumber;
4526 
4527 	/* Map in the MSI-X Pending Bit Array */
4528 	if (hermon_regs_map_setup(state, state->hs_msix_pba_rnumber,
4529 	    (caddr_t *)&state->hs_msix_pba_addr, state->hs_msix_pba_offset,
4530 	    state->hs_msix_pba_size, &dev_attr,
4531 	    &state->hs_fm_msix_pbahdl) != DDI_SUCCESS) {
4532 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
4533 		state->hs_fm_msix_tblhdl = NULL;
4534 		return;
4535 	}
4536 
4537 	/* Set the MSI-X table save area */
4538 	state->hs_msix_tbl_entries = kmem_alloc(state->hs_msix_tbl_size,
4539 	    KM_SLEEP);
4540 
4541 	/* Set the MSI-X PBA save area */
4542 	state->hs_msix_pba_entries = kmem_alloc(state->hs_msix_pba_size,
4543 	    KM_SLEEP);
4544 }
4545 
4546 /* Disable Hermon interrupts */
4547 static int
4548 hermon_intr_disable(hermon_state_t *state)
4549 {
4550 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4551 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4552 	ddi_acc_handle_t msix_tblhdl = hermon_get_msix_tblhdl(state);
4553 	int i, j;
4554 	ASSERT(pci_cfg_hdl != NULL && msix_tblhdl != NULL);
4555 	ASSERT(state->hs_intr_types_avail &
4556 	    (DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX));
4557 
4558 	/*
4559 	 * Check if MSI-X interrupts are used. If so, disable MSI-X interupts.
4560 	 * If not, since Hermon doesn't support MSI interrupts, assuming the
4561 	 * legacy interrupt is used instead, disable the legacy interrupt.
4562 	 */
4563 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4564 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4565 
4566 		if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4567 		    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4568 			if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL,
4569 			    caps_ctrl, PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4570 				return (DDI_FAILURE);
4571 		}
4572 		ASSERT(msix_ctrl != 0);
4573 
4574 		if (!(msix_ctrl & PCI_MSIX_ENABLE_BIT))
4575 			return (DDI_SUCCESS);
4576 
4577 		/* Clear all inums in MSI-X table */
4578 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4579 		    i += PCI_MSIX_VECTOR_SIZE) {
4580 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4581 				char *addr = state->hs_msix_tbl_addr + i + j;
4582 				ddi_put32(msix_tblhdl,
4583 				    (uint32_t *)(uintptr_t)addr, 0x0);
4584 			}
4585 		}
4586 
4587 		/* Disable MSI-X interrupts */
4588 		msix_ctrl &= ~PCI_MSIX_ENABLE_BIT;
4589 		PCI_CAP_PUT16(pci_cfg_hdl, NULL, caps_ctrl, PCI_MSIX_CTRL,
4590 		    msix_ctrl);
4591 
4592 	} else {
4593 		uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
4594 		ASSERT(state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED);
4595 
4596 		/* Disable the legacy interrupts */
4597 		cmdreg |= PCI_COMM_INTX_DISABLE;
4598 		pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
4599 	}
4600 
4601 	return (DDI_SUCCESS);
4602 }
4603 
4604 /* Hermon quiesce(9F) entry */
4605 static int
4606 hermon_quiesce(dev_info_t *dip)
4607 {
4608 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4609 	    DEVI(dip)->devi_instance);
4610 	ddi_acc_handle_t pcihdl = hermon_get_pcihdl(state);
4611 	ddi_acc_handle_t cmdhdl = hermon_get_cmdhdl(state);
4612 	ddi_acc_handle_t msix_tbl_hdl = hermon_get_msix_tblhdl(state);
4613 	ddi_acc_handle_t msix_pba_hdl = hermon_get_msix_pbahdl(state);
4614 	uint32_t sem, reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
4615 	uint64_t data64;
4616 	uint32_t data32;
4617 	int status, i, j, loopcnt;
4618 	uint_t offset;
4619 
4620 	ASSERT(state != NULL);
4621 
4622 	/* start fastreboot */
4623 	state->hs_quiescing = B_TRUE;
4624 
4625 	/* suppress Hermon FM ereports */
4626 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
4627 		hermon_clr_state_nolock(state, HCA_EREPORT_FM);
4628 	}
4629 
4630 	/* Shutdown HCA ports */
4631 	if (hermon_hca_ports_shutdown(state,
4632 	    state->hs_cfg_profile->cp_num_ports) != HERMON_CMD_SUCCESS) {
4633 		state->hs_quiescing = B_FALSE;
4634 		return (DDI_FAILURE);
4635 	}
4636 
4637 	/* Close HCA */
4638 	if (hermon_close_hca_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN) !=
4639 	    HERMON_CMD_SUCCESS) {
4640 		state->hs_quiescing = B_FALSE;
4641 		return (DDI_FAILURE);
4642 	}
4643 
4644 	/* Disable interrupts */
4645 	if (hermon_intr_disable(state) != DDI_SUCCESS) {
4646 		state->hs_quiescing = B_FALSE;
4647 		return (DDI_FAILURE);
4648 	}
4649 
4650 	/*
4651 	 * Query the PCI capabilities of the HCA device, but don't process
4652 	 * the VPD until after reset.
4653 	 */
4654 	if (hermon_pci_capability_list(state, pcihdl) != DDI_SUCCESS) {
4655 		state->hs_quiescing = B_FALSE;
4656 		return (DDI_FAILURE);
4657 	}
4658 
4659 	/*
4660 	 * Read all PCI config info (reg0...reg63).  Note: According to the
4661 	 * Hermon software reset application note, we should not read or
4662 	 * restore the values in reg22 and reg23.
4663 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
4664 	 * register LAST, and technically, you need to restore the
4665 	 * PCIE Capability "device control" and "link control" (word-sized,
4666 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
4667 	 * We hold off restoring the command register - offset 0x4 - till last
4668 	 */
4669 
4670 	/* 1st, wait for the semaphore assure accessibility - per PRM */
4671 	status = -1;
4672 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
4673 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
4674 		if (sem == 0) {
4675 			status = 0;
4676 			break;
4677 		}
4678 		drv_usecwait(1);
4679 	}
4680 
4681 	/* Check if timeout happens */
4682 	if (status == -1) {
4683 		state->hs_quiescing = B_FALSE;
4684 		return (DDI_FAILURE);
4685 	}
4686 
4687 	/* MSI-X interrupts are used, save the MSI-X table */
4688 	if (msix_tbl_hdl && msix_pba_hdl) {
4689 		/* save MSI-X table */
4690 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4691 		    i += PCI_MSIX_VECTOR_SIZE) {
4692 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4693 				char *addr = state->hs_msix_tbl_addr + i + j;
4694 				data32 = ddi_get32(msix_tbl_hdl,
4695 				    (uint32_t *)(uintptr_t)addr);
4696 				*(uint32_t *)(uintptr_t)(state->
4697 				    hs_msix_tbl_entries + i + j) = data32;
4698 			}
4699 		}
4700 		/* save MSI-X PBA */
4701 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4702 			char *addr = state->hs_msix_pba_addr + i;
4703 			data64 = ddi_get64(msix_pba_hdl,
4704 			    (uint64_t *)(uintptr_t)addr);
4705 			*(uint64_t *)(uintptr_t)(state->
4706 			    hs_msix_pba_entries + i) = data64;
4707 		}
4708 	}
4709 
4710 	/* save PCI config space */
4711 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4712 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4713 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4714 			state->hs_cfg_data[i]  =
4715 			    pci_config_get32(pcihdl, i << 2);
4716 		}
4717 	}
4718 
4719 	/* SW-reset HCA */
4720 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
4721 
4722 	/*
4723 	 * This delay is required so as not to cause a panic here. If the
4724 	 * device is accessed too soon after reset it will not respond to
4725 	 * config cycles, causing a Master Abort and panic.
4726 	 */
4727 	drv_usecwait(reset_delay);
4728 
4729 	/* Poll waiting for the device to finish resetting */
4730 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
4731 	while ((pci_config_get32(pcihdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
4732 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
4733 		if (--loopcnt == 0)
4734 			break;	/* just in case, break and go on */
4735 	}
4736 	if (loopcnt == 0) {
4737 		state->hs_quiescing = B_FALSE;
4738 		return (DDI_FAILURE);
4739 	}
4740 
4741 	/* Restore the config info */
4742 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4743 		if (i == 1) continue;	/* skip the status/ctrl reg */
4744 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4745 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4746 			pci_config_put32(pcihdl, i << 2, state->hs_cfg_data[i]);
4747 		}
4748 	}
4749 
4750 	/* If MSI-X interrupts are used, restore the MSI-X table */
4751 	if (msix_tbl_hdl && msix_pba_hdl) {
4752 		/* restore MSI-X PBA */
4753 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4754 			char *addr = state->hs_msix_pba_addr + i;
4755 			data64 = *(uint64_t *)(uintptr_t)
4756 			    (state->hs_msix_pba_entries + i);
4757 			ddi_put64(msix_pba_hdl,
4758 			    (uint64_t *)(uintptr_t)addr, data64);
4759 		}
4760 		/* restore MSI-X table */
4761 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4762 		    i += PCI_MSIX_VECTOR_SIZE) {
4763 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4764 				char *addr = state->hs_msix_tbl_addr + i + j;
4765 				data32 = *(uint32_t *)(uintptr_t)
4766 				    (state->hs_msix_tbl_entries + i + j);
4767 				ddi_put32(msix_tbl_hdl,
4768 				    (uint32_t *)(uintptr_t)addr, data32);
4769 			}
4770 		}
4771 	}
4772 
4773 	/*
4774 	 * PCI Express Capability - we saved during capability list, and
4775 	 * we'll restore them here.
4776 	 */
4777 	offset = state->hs_pci_cap_offset;
4778 	data32 = state->hs_pci_cap_devctl;
4779 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
4780 	data32 = state->hs_pci_cap_lnkctl;
4781 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
4782 
4783 	/* restore the command register */
4784 	pci_config_put32(pcihdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
4785 
4786 	return (DDI_SUCCESS);
4787 }
4788