xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon.c (revision 9e39c5ba00a55fa05777cc94b148296af305e135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * hermon.c
29  *    Hermon (InfiniBand) HCA Driver attach/detach Routines
30  *
31  *    Implements all the routines necessary for the attach, setup,
32  *    initialization (and subsequent possible teardown and detach) of the
33  *    Hermon InfiniBand HCA driver.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/file.h>
38 #include <sys/open.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/stat.h>
44 #include <sys/pci.h>
45 #include <sys/pci_cap.h>
46 #include <sys/bitmap.h>
47 #include <sys/policy.h>
48 
49 #include <sys/ib/adapters/hermon/hermon.h>
50 
51 /* The following works around a problem in pre-2_7_000 firmware. */
52 #define	HERMON_FW_WORKAROUND
53 
54 int hermon_verbose = 0;
55 
56 /* Hermon HCA State Pointer */
57 void *hermon_statep;
58 
59 int	debug_vpd = 0;
60 
61 
62 /*
63  * The Hermon "userland resource database" is common to instances of the
64  * Hermon HCA driver.  This structure "hermon_userland_rsrc_db" contains all
65  * the necessary information to maintain it.
66  */
67 hermon_umap_db_t hermon_userland_rsrc_db;
68 
69 static int hermon_attach(dev_info_t *, ddi_attach_cmd_t);
70 static int hermon_detach(dev_info_t *, ddi_detach_cmd_t);
71 static int hermon_open(dev_t *, int, int, cred_t *);
72 static int hermon_close(dev_t, int, int, cred_t *);
73 static int hermon_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
74 
75 static int hermon_drv_init(hermon_state_t *state, dev_info_t *dip,
76     int instance);
77 static void hermon_drv_fini(hermon_state_t *state);
78 static void hermon_drv_fini2(hermon_state_t *state);
79 static int hermon_isr_init(hermon_state_t *state);
80 static void hermon_isr_fini(hermon_state_t *state);
81 
82 static int hermon_hw_init(hermon_state_t *state);
83 
84 static void hermon_hw_fini(hermon_state_t *state,
85     hermon_drv_cleanup_level_t cleanup);
86 static int hermon_soft_state_init(hermon_state_t *state);
87 static void hermon_soft_state_fini(hermon_state_t *state);
88 static int hermon_icm_config_setup(hermon_state_t *state,
89     hermon_hw_initqueryhca_t *inithca);
90 static void hermon_icm_tables_init(hermon_state_t *state);
91 static void hermon_icm_tables_fini(hermon_state_t *state);
92 static int hermon_icm_dma_init(hermon_state_t *state);
93 static void hermon_icm_dma_fini(hermon_state_t *state);
94 static void hermon_inithca_set(hermon_state_t *state,
95     hermon_hw_initqueryhca_t *inithca);
96 static int hermon_hca_port_init(hermon_state_t *state);
97 static int hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init);
98 static int hermon_internal_uarpg_init(hermon_state_t *state);
99 static void hermon_internal_uarpg_fini(hermon_state_t *state);
100 static int hermon_special_qp_contexts_reserve(hermon_state_t *state);
101 static void hermon_special_qp_contexts_unreserve(hermon_state_t *state);
102 static int hermon_sw_reset(hermon_state_t *state);
103 static int hermon_mcg_init(hermon_state_t *state);
104 static void hermon_mcg_fini(hermon_state_t *state);
105 static int hermon_fw_version_check(hermon_state_t *state);
106 static void hermon_device_info_report(hermon_state_t *state);
107 static int hermon_pci_capability_list(hermon_state_t *state,
108     ddi_acc_handle_t hdl);
109 static void hermon_pci_capability_vpd(hermon_state_t *state,
110     ddi_acc_handle_t hdl, uint_t offset);
111 static int hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
112     uint32_t addr, uint32_t *data);
113 static int hermon_intr_or_msi_init(hermon_state_t *state);
114 static int hermon_add_intrs(hermon_state_t *state, int intr_type);
115 static int hermon_intr_or_msi_fini(hermon_state_t *state);
116 void hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
117     uint_t offset);
118 
119 static uint64_t hermon_size_icm(hermon_state_t *state);
120 
121 /* X86 fastreboot support */
122 static ushort_t get_msix_ctrl(dev_info_t *);
123 static size_t get_msix_tbl_size(dev_info_t *);
124 static size_t get_msix_pba_size(dev_info_t *);
125 static void hermon_set_msix_info(hermon_state_t *);
126 static int hermon_intr_disable(hermon_state_t *);
127 static int hermon_quiesce(dev_info_t *);
128 
129 
130 /* Character/Block Operations */
131 static struct cb_ops hermon_cb_ops = {
132 	hermon_open,		/* open */
133 	hermon_close,		/* close */
134 	nodev,			/* strategy (block) */
135 	nodev,			/* print (block) */
136 	nodev,			/* dump (block) */
137 	nodev,			/* read */
138 	nodev,			/* write */
139 	hermon_ioctl,		/* ioctl */
140 	hermon_devmap,		/* devmap */
141 	NULL,			/* mmap */
142 	nodev,			/* segmap */
143 	nochpoll,		/* chpoll */
144 	ddi_prop_op,		/* prop_op */
145 	NULL,			/* streams */
146 	D_NEW | D_MP |
147 	D_64BIT | /* D_HOTPLUG | */
148 	D_DEVMAP,		/* flags */
149 	CB_REV			/* rev */
150 };
151 
152 /* Driver Operations */
153 static struct dev_ops hermon_ops = {
154 	DEVO_REV,		/* struct rev */
155 	0,			/* refcnt */
156 	hermon_getinfo,		/* getinfo */
157 	nulldev,		/* identify */
158 	nulldev,		/* probe */
159 	hermon_attach,		/* attach */
160 	hermon_detach,		/* detach */
161 	nodev,			/* reset */
162 	&hermon_cb_ops,		/* cb_ops */
163 	NULL,			/* bus_ops */
164 	nodev,			/* power */
165 	hermon_quiesce,		/* devo_quiesce */
166 };
167 
168 /* Module Driver Info */
169 static struct modldrv hermon_modldrv = {
170 	&mod_driverops,
171 	"ConnectX IB Driver",
172 	&hermon_ops
173 };
174 
175 /* Module Linkage */
176 static struct modlinkage hermon_modlinkage = {
177 	MODREV_1,
178 	&hermon_modldrv,
179 	NULL
180 };
181 
182 /*
183  * This extern refers to the ibc_operations_t function vector that is defined
184  * in the hermon_ci.c file.
185  */
186 extern ibc_operations_t	hermon_ibc_ops;
187 
188 /*
189  * _init()
190  */
191 int
192 _init()
193 {
194 	int	status;
195 
196 	status = ddi_soft_state_init(&hermon_statep, sizeof (hermon_state_t),
197 	    (size_t)HERMON_INITIAL_STATES);
198 	if (status != 0) {
199 		return (status);
200 	}
201 
202 	status = ibc_init(&hermon_modlinkage);
203 	if (status != 0) {
204 		ddi_soft_state_fini(&hermon_statep);
205 		return (status);
206 	}
207 
208 	status = mod_install(&hermon_modlinkage);
209 	if (status != 0) {
210 		ibc_fini(&hermon_modlinkage);
211 		ddi_soft_state_fini(&hermon_statep);
212 		return (status);
213 	}
214 
215 	/* Initialize the Hermon "userland resources database" */
216 	hermon_umap_db_init();
217 
218 	return (status);
219 }
220 
221 
222 /*
223  * _info()
224  */
225 int
226 _info(struct modinfo *modinfop)
227 {
228 	int	status;
229 
230 	status = mod_info(&hermon_modlinkage, modinfop);
231 	return (status);
232 }
233 
234 
235 /*
236  * _fini()
237  */
238 int
239 _fini()
240 {
241 	int	status;
242 
243 	status = mod_remove(&hermon_modlinkage);
244 	if (status != 0) {
245 		return (status);
246 	}
247 
248 	/* Destroy the Hermon "userland resources database" */
249 	hermon_umap_db_fini();
250 
251 	ibc_fini(&hermon_modlinkage);
252 	ddi_soft_state_fini(&hermon_statep);
253 
254 	return (status);
255 }
256 
257 
258 /*
259  * hermon_getinfo()
260  */
261 /* ARGSUSED */
262 static int
263 hermon_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
264 {
265 	dev_t		dev;
266 	hermon_state_t 	*state;
267 	minor_t		instance;
268 
269 	switch (cmd) {
270 	case DDI_INFO_DEVT2DEVINFO:
271 		dev = (dev_t)arg;
272 		instance = HERMON_DEV_INSTANCE(dev);
273 		state = ddi_get_soft_state(hermon_statep, instance);
274 		if (state == NULL) {
275 			return (DDI_FAILURE);
276 		}
277 		*result = (void *)state->hs_dip;
278 		return (DDI_SUCCESS);
279 
280 	case DDI_INFO_DEVT2INSTANCE:
281 		dev = (dev_t)arg;
282 		instance = HERMON_DEV_INSTANCE(dev);
283 		*result = (void *)(uintptr_t)instance;
284 		return (DDI_SUCCESS);
285 
286 	default:
287 		break;
288 	}
289 
290 	return (DDI_FAILURE);
291 }
292 
293 
294 /*
295  * hermon_open()
296  */
297 /* ARGSUSED */
298 static int
299 hermon_open(dev_t *devp, int flag, int otyp, cred_t *credp)
300 {
301 	hermon_state_t		*state;
302 	hermon_rsrc_t 		*rsrcp;
303 	hermon_umap_db_entry_t	*umapdb, *umapdb2;
304 	minor_t			instance;
305 	uint64_t		key, value;
306 	uint_t			hr_indx;
307 	dev_t			dev;
308 	int			status;
309 
310 	instance = HERMON_DEV_INSTANCE(*devp);
311 	state = ddi_get_soft_state(hermon_statep, instance);
312 	if (state == NULL) {
313 		return (ENXIO);
314 	}
315 
316 	/*
317 	 * Only allow driver to be opened for character access, and verify
318 	 * whether exclusive access is allowed.
319 	 */
320 	if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
321 	    secpolicy_excl_open(credp) != 0)) {
322 		return (EINVAL);
323 	}
324 
325 	/*
326 	 * Search for the current process PID in the "userland resources
327 	 * database".  If it is not found, then attempt to allocate a UAR
328 	 * page and add the ("key", "value") pair to the database.
329 	 * Note:  As a last step we always return a devp appropriate for
330 	 * the open.  Either we return a new minor number (based on the
331 	 * instance and the UAR page index) or we return the current minor
332 	 * number for the given client process.
333 	 *
334 	 * We also add an entry to the database to allow for lookup from
335 	 * "dev_t" to the current process PID.  This is necessary because,
336 	 * under certain circumstance, the process PID that calls the Hermon
337 	 * close() entry point may not be the same as the one who called
338 	 * open().  Specifically, this can happen if a child process calls
339 	 * the Hermon's open() entry point, gets a UAR page, maps it out (using
340 	 * mmap()), and then exits without calling munmap().  Because mmap()
341 	 * adds a reference to the file descriptor, at the exit of the child
342 	 * process the file descriptor is "inherited" by the parent (and will
343 	 * be close()'d by the parent's PID only when it exits).
344 	 *
345 	 * Note: We use the hermon_umap_db_find_nolock() and
346 	 * hermon_umap_db_add_nolock() database access routines below (with
347 	 * an explicit mutex_enter of the database lock - "hdl_umapdb_lock")
348 	 * to ensure that the multiple accesses (in this case searching for,
349 	 * and then adding _two_ database entries) can be done atomically.
350 	 */
351 	key = ddi_get_pid();
352 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
353 	status = hermon_umap_db_find_nolock(instance, key,
354 	    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
355 	if (status != DDI_SUCCESS) {
356 		/*
357 		 * If we are in 'maintenance mode', we cannot alloc a UAR page.
358 		 * But we still need some rsrcp value, and a mostly unique
359 		 * hr_indx value.  So we set rsrcp to NULL for maintenance
360 		 * mode, and use a rolling count for hr_indx.  The field
361 		 * 'hs_open_hr_indx' is used only in this maintenance mode
362 		 * condition.
363 		 *
364 		 * Otherwise, if we are in operational mode then we allocate
365 		 * the UAR page as normal, and use the rsrcp value and tr_indx
366 		 * value from that allocation.
367 		 */
368 		if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
369 			rsrcp = NULL;
370 			hr_indx = state->hs_open_ar_indx++;
371 		} else {
372 			/* Allocate a new UAR page for this process */
373 			status = hermon_rsrc_alloc(state, HERMON_UARPG, 1,
374 			    HERMON_NOSLEEP, &rsrcp);
375 			if (status != DDI_SUCCESS) {
376 				mutex_exit(
377 				    &hermon_userland_rsrc_db.hdl_umapdb_lock);
378 				return (EAGAIN);
379 			}
380 
381 			hr_indx = rsrcp->hr_indx;
382 		}
383 
384 		/*
385 		 * Allocate an entry to track the UAR page resource in the
386 		 * "userland resources database".
387 		 */
388 		umapdb = hermon_umap_db_alloc(instance, key,
389 		    MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
390 		if (umapdb == NULL) {
391 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
392 			/* If in "maintenance mode", don't free the rsrc */
393 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
394 				hermon_rsrc_free(state, &rsrcp);
395 			}
396 			return (EAGAIN);
397 		}
398 
399 		/*
400 		 * Create a new device number.  Minor number is a function of
401 		 * the UAR page index (15 bits) and the device instance number
402 		 * (3 bits).
403 		 */
404 		dev = makedevice(getmajor(*devp), (hr_indx <<
405 		    HERMON_MINORNUM_SHIFT) | instance);
406 
407 		/*
408 		 * Allocate another entry in the "userland resources database"
409 		 * to track the association of the device number (above) to
410 		 * the current process ID (in "key").
411 		 */
412 		umapdb2 = hermon_umap_db_alloc(instance, dev,
413 		    MLNX_UMAP_PID_RSRC, (uint64_t)key);
414 		if (umapdb2 == NULL) {
415 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
416 			hermon_umap_db_free(umapdb);
417 			/* If in "maintenance mode", don't free the rsrc */
418 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
419 				hermon_rsrc_free(state, &rsrcp);
420 			}
421 			return (EAGAIN);
422 		}
423 
424 		/* Add the entries to the database */
425 		hermon_umap_db_add_nolock(umapdb);
426 		hermon_umap_db_add_nolock(umapdb2);
427 
428 	} else {
429 		/*
430 		 * Return the same device number as on the original open()
431 		 * call.  This was calculated as a function of the UAR page
432 		 * index (top 16 bits) and the device instance number
433 		 */
434 		rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
435 		dev = makedevice(getmajor(*devp), (rsrcp->hr_indx <<
436 		    HERMON_MINORNUM_SHIFT) | instance);
437 	}
438 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
439 
440 	*devp = dev;
441 
442 	return (0);
443 }
444 
445 
446 /*
447  * hermon_close()
448  */
449 /* ARGSUSED */
450 static int
451 hermon_close(dev_t dev, int flag, int otyp, cred_t *credp)
452 {
453 	hermon_state_t		*state;
454 	hermon_rsrc_t		*rsrcp;
455 	hermon_umap_db_entry_t	*umapdb;
456 	hermon_umap_db_priv_t	*priv;
457 	minor_t			instance;
458 	uint64_t		key, value;
459 	int			status, reset_status = 0;
460 
461 	instance = HERMON_DEV_INSTANCE(dev);
462 	state = ddi_get_soft_state(hermon_statep, instance);
463 	if (state == NULL) {
464 		return (ENXIO);
465 	}
466 
467 	/*
468 	 * Search for "dev_t" in the "userland resources database".  As
469 	 * explained above in hermon_open(), we can't depend on using the
470 	 * current process ID here to do the lookup because the process
471 	 * that ultimately closes may not be the same one who opened
472 	 * (because of inheritance).
473 	 * So we lookup the "dev_t" (which points to the PID of the process
474 	 * that opened), and we remove the entry from the database (and free
475 	 * it up).  Then we do another query based on the PID value.  And when
476 	 * we find that database entry, we free it up too and then free the
477 	 * Hermon UAR page resource.
478 	 *
479 	 * Note: We use the hermon_umap_db_find_nolock() database access
480 	 * routine below (with an explicit mutex_enter of the database lock)
481 	 * to ensure that the multiple accesses (which attempt to remove the
482 	 * two database entries) can be done atomically.
483 	 *
484 	 * This works the same in both maintenance mode and HCA mode, except
485 	 * for the call to hermon_rsrc_free().  In the case of maintenance mode,
486 	 * this call is not needed, as it was not allocated in hermon_open()
487 	 * above.
488 	 */
489 	key = dev;
490 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
491 	status = hermon_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
492 	    &value, HERMON_UMAP_DB_REMOVE, &umapdb);
493 	if (status == DDI_SUCCESS) {
494 		/*
495 		 * If the "hdb_priv" field is non-NULL, it indicates that
496 		 * some "on close" handling is still necessary.  Call
497 		 * hermon_umap_db_handle_onclose_cb() to do the handling (i.e.
498 		 * to invoke all the registered callbacks).  Then free up
499 		 * the resources associated with "hdb_priv" and continue
500 		 * closing.
501 		 */
502 		priv = (hermon_umap_db_priv_t *)umapdb->hdbe_common.hdb_priv;
503 		if (priv != NULL) {
504 			reset_status = hermon_umap_db_handle_onclose_cb(priv);
505 			kmem_free(priv, sizeof (hermon_umap_db_priv_t));
506 			umapdb->hdbe_common.hdb_priv = (void *)NULL;
507 		}
508 
509 		hermon_umap_db_free(umapdb);
510 
511 		/*
512 		 * Now do another lookup using PID as the key (copy it from
513 		 * "value").  When this lookup is complete, the "value" field
514 		 * will contain the hermon_rsrc_t pointer for the UAR page
515 		 * resource.
516 		 */
517 		key = value;
518 		status = hermon_umap_db_find_nolock(instance, key,
519 		    MLNX_UMAP_UARPG_RSRC, &value, HERMON_UMAP_DB_REMOVE,
520 		    &umapdb);
521 		if (status == DDI_SUCCESS) {
522 			hermon_umap_db_free(umapdb);
523 			/* If in "maintenance mode", don't free the rsrc */
524 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
525 				rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
526 				hermon_rsrc_free(state, &rsrcp);
527 			}
528 		}
529 	}
530 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
531 	return (reset_status);
532 }
533 
534 
535 /*
536  * hermon_attach()
537  *    Context: Only called from attach() path context
538  */
539 static int
540 hermon_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
541 {
542 	hermon_state_t	*state;
543 	ibc_clnt_hdl_t	tmp_ibtfpriv;
544 	ibc_status_t	ibc_status;
545 	int		instance;
546 	int		status;
547 
548 #ifdef __lock_lint
549 	(void) hermon_quiesce(dip);
550 #endif
551 
552 	switch (cmd) {
553 	case DDI_ATTACH:
554 		instance = ddi_get_instance(dip);
555 		status = ddi_soft_state_zalloc(hermon_statep, instance);
556 		if (status != DDI_SUCCESS) {
557 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
558 			    "attach_ssz_fail", instance);
559 			goto fail_attach_nomsg;
560 
561 		}
562 		state = ddi_get_soft_state(hermon_statep, instance);
563 		if (state == NULL) {
564 			ddi_soft_state_free(hermon_statep, instance);
565 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
566 			    "attach_gss_fail", instance);
567 			goto fail_attach_nomsg;
568 		}
569 
570 		/* clear the attach error buffer */
571 		HERMON_ATTACH_MSG_INIT(state->hs_attach_buf);
572 
573 		/* Save away devinfo and instance before hermon_fm_init() */
574 		state->hs_dip = dip;
575 		state->hs_instance = instance;
576 
577 		hermon_fm_init(state);
578 
579 		/*
580 		 * Initialize Hermon driver and hardware.
581 		 *
582 		 * Note: If this initialization fails we may still wish to
583 		 * create a device node and remain operational so that Hermon
584 		 * firmware can be updated/flashed (i.e. "maintenance mode").
585 		 * If this is the case, then "hs_operational_mode" will be
586 		 * equal to HERMON_MAINTENANCE_MODE.  We will not attempt to
587 		 * attach to the IBTF or register with the IBMF (i.e. no
588 		 * InfiniBand interfaces will be enabled).
589 		 */
590 		status = hermon_drv_init(state, dip, instance);
591 		if ((status != DDI_SUCCESS) &&
592 		    (HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
593 			goto fail_attach;
594 		}
595 
596 		/*
597 		 * Change the Hermon FM mode
598 		 */
599 		if ((hermon_get_state(state) & HCA_PIO_FM) &&
600 		    HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
601 			/*
602 			 * Now we wait for 50ms to give an opportunity
603 			 * to Solaris FMA so that HW errors can be notified.
604 			 * Then check if there are HW errors or not. If
605 			 * a HW error is detected, the Hermon attachment
606 			 * must be failed.
607 			 */
608 			delay(drv_usectohz(50000));
609 			if (hermon_init_failure(state)) {
610 				hermon_drv_fini(state);
611 				HERMON_WARNING(state, "unable to "
612 				    "attach Hermon due to a HW error");
613 				HERMON_ATTACH_MSG(state->hs_attach_buf,
614 				    "hermon_attach_failure");
615 				goto fail_attach;
616 			}
617 
618 			/*
619 			 * There seems no HW errors during the attachment,
620 			 * so let's change the Hermon FM state to the
621 			 * ereport only mode.
622 			 */
623 			if (hermon_fm_ereport_init(state) != DDI_SUCCESS) {
624 				/* unwind the resources */
625 				hermon_drv_fini(state);
626 				HERMON_ATTACH_MSG(state->hs_attach_buf,
627 				    "hermon_attach_failure");
628 				goto fail_attach;
629 			}
630 		}
631 
632 		/* Create the minor node for device */
633 		status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
634 		    DDI_PSEUDO, 0);
635 		if (status != DDI_SUCCESS) {
636 			hermon_drv_fini(state);
637 			HERMON_ATTACH_MSG(state->hs_attach_buf,
638 			    "attach_create_mn_fail");
639 			goto fail_attach;
640 		}
641 
642 		/*
643 		 * If we are in "maintenance mode", then we don't want to
644 		 * register with the IBTF.  All InfiniBand interfaces are
645 		 * uninitialized, and the device is only capable of handling
646 		 * requests to update/flash firmware (or test/debug requests).
647 		 */
648 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
649 			cmn_err(CE_NOTE, "!Hermon is operational\n");
650 
651 			/* Attach to InfiniBand Transport Framework (IBTF) */
652 			ibc_status = ibc_attach(&tmp_ibtfpriv,
653 			    &state->hs_ibtfinfo);
654 			if (ibc_status != IBC_SUCCESS) {
655 				cmn_err(CE_CONT, "hermon_attach: ibc_attach "
656 				    "failed\n");
657 				ddi_remove_minor_node(dip, "devctl");
658 				hermon_drv_fini(state);
659 				HERMON_ATTACH_MSG(state->hs_attach_buf,
660 				    "attach_ibcattach_fail");
661 				goto fail_attach;
662 			}
663 
664 			/*
665 			 * Now that we've successfully attached to the IBTF,
666 			 * we enable all appropriate asynch and CQ events to
667 			 * be forwarded to the IBTF.
668 			 */
669 			HERMON_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);
670 
671 			ibc_post_attach(state->hs_ibtfpriv);
672 
673 			/* Register agents with IB Mgmt Framework (IBMF) */
674 			status = hermon_agent_handlers_init(state);
675 			if (status != DDI_SUCCESS) {
676 				(void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
677 				HERMON_QUIESCE_IBTF_CALLB(state);
678 				if (state->hs_in_evcallb != 0) {
679 					HERMON_WARNING(state, "unable to "
680 					    "quiesce Hermon IBTF callbacks");
681 				}
682 				ibc_detach(tmp_ibtfpriv);
683 				ddi_remove_minor_node(dip, "devctl");
684 				hermon_drv_fini(state);
685 				HERMON_ATTACH_MSG(state->hs_attach_buf,
686 				    "attach_agentinit_fail");
687 				goto fail_attach;
688 			}
689 		}
690 
691 		/* Report attach in maintenance mode, if appropriate */
692 		if (!(HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
693 			cmn_err(CE_NOTE, "hermon%d: driver attached "
694 			    "(for maintenance mode only)", state->hs_instance);
695 			hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_DEGRADED);
696 		}
697 
698 		/* Report that driver was loaded */
699 		ddi_report_dev(dip);
700 
701 		/* Send device information to log file */
702 		hermon_device_info_report(state);
703 
704 		/* DEBUG PRINT */
705 		cmn_err(CE_CONT, "!Hermon attach complete\n");
706 		return (DDI_SUCCESS);
707 
708 	case DDI_RESUME:
709 		/* Add code here for DDI_RESUME XXX */
710 		return (DDI_FAILURE);
711 
712 	default:
713 		cmn_err(CE_WARN, "hermon_attach: unknown cmd (0x%x)\n", cmd);
714 		break;
715 	}
716 
717 fail_attach:
718 	cmn_err(CE_NOTE, "hermon%d: driver failed to attach: %s", instance,
719 	    state->hs_attach_buf);
720 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
721 		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
722 	}
723 	hermon_fm_fini(state);
724 	ddi_soft_state_free(hermon_statep, instance);
725 
726 fail_attach_nomsg:
727 	return (DDI_FAILURE);
728 }
729 
730 
731 /*
732  * hermon_detach()
733  *    Context: Only called from detach() path context
734  */
735 static int
736 hermon_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
737 {
738 	hermon_state_t	*state;
739 	ibc_clnt_hdl_t	tmp_ibtfpriv;
740 	ibc_status_t	ibc_status;
741 	int		instance, status;
742 
743 	instance = ddi_get_instance(dip);
744 	state = ddi_get_soft_state(hermon_statep, instance);
745 	if (state == NULL) {
746 		return (DDI_FAILURE);
747 	}
748 
749 	switch (cmd) {
750 	case DDI_DETACH:
751 		/*
752 		 * If we are in "maintenance mode", then we do not want to
753 		 * do teardown for any of the InfiniBand interfaces.
754 		 * Specifically, this means not detaching from IBTF (we never
755 		 * attached to begin with) and not deregistering from IBMF.
756 		 */
757 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
758 			/* Unregister agents from IB Mgmt Framework (IBMF) */
759 			status = hermon_agent_handlers_fini(state);
760 			if (status != DDI_SUCCESS) {
761 				return (DDI_FAILURE);
762 			}
763 
764 			/*
765 			 * Attempt the "pre-detach" from InfiniBand Transport
766 			 * Framework (IBTF).  At this point the IBTF is still
767 			 * capable of handling incoming asynch and completion
768 			 * events.  This "pre-detach" is primarily a mechanism
769 			 * to notify the appropriate IBTF clients that the
770 			 * HCA is being removed/offlined.
771 			 */
772 			ibc_status = ibc_pre_detach(state->hs_ibtfpriv, cmd);
773 			if (ibc_status != IBC_SUCCESS) {
774 				status = hermon_agent_handlers_init(state);
775 				if (status != DDI_SUCCESS) {
776 					HERMON_WARNING(state, "failed to "
777 					    "restart Hermon agents");
778 				}
779 				return (DDI_FAILURE);
780 			}
781 
782 			/*
783 			 * Before we can fully detach from the IBTF we need to
784 			 * ensure that we have handled all outstanding event
785 			 * callbacks.  This is accomplished by quiescing the
786 			 * event callback mechanism.  Note: if we are unable
787 			 * to successfully quiesce the callbacks, then this is
788 			 * an indication that something has probably gone
789 			 * seriously wrong.  We print out a warning, but
790 			 * continue.
791 			 */
792 			tmp_ibtfpriv = state->hs_ibtfpriv;
793 			HERMON_QUIESCE_IBTF_CALLB(state);
794 			if (state->hs_in_evcallb != 0) {
795 				HERMON_WARNING(state, "unable to quiesce "
796 				    "Hermon IBTF callbacks");
797 			}
798 
799 			/* Complete the detach from the IBTF */
800 			ibc_detach(tmp_ibtfpriv);
801 		}
802 
803 		/* Remove the minor node for device */
804 		ddi_remove_minor_node(dip, "devctl");
805 
806 		/*
807 		 * Only call hermon_drv_fini() if we are in Hermon HCA mode.
808 		 * (Because if we are in "maintenance mode", then we never
809 		 * successfully finished init.)  Only report successful
810 		 * detach for normal HCA mode.
811 		 */
812 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
813 			/* Cleanup driver resources and shutdown hardware */
814 			hermon_drv_fini(state);
815 			cmn_err(CE_CONT, "!Hermon driver successfully "
816 			    "detached\n");
817 		}
818 
819 		hermon_drv_fini2(state);
820 		hermon_fm_fini(state);
821 		ddi_soft_state_free(hermon_statep, instance);
822 
823 		return (DDI_SUCCESS);
824 
825 	case DDI_SUSPEND:
826 		/* Add code here for DDI_SUSPEND XXX */
827 		return (DDI_FAILURE);
828 
829 	default:
830 		cmn_err(CE_WARN, "hermon_detach: unknown cmd (0x%x)\n", cmd);
831 		break;
832 	}
833 
834 	return (DDI_FAILURE);
835 }
836 
837 /*
838  * hermon_dma_attr_init()
839  *    Context: Can be called from interrupt or base context.
840  */
841 
842 /* ARGSUSED */
843 void
844 hermon_dma_attr_init(hermon_state_t *state, ddi_dma_attr_t *dma_attr)
845 {
846 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
847 
848 	dma_attr->dma_attr_version	= DMA_ATTR_V0;
849 	dma_attr->dma_attr_addr_lo	= 0;
850 	dma_attr->dma_attr_addr_hi	= 0xFFFFFFFFFFFFFFFFull;
851 	dma_attr->dma_attr_count_max	= 0xFFFFFFFFFFFFFFFFull;
852 	dma_attr->dma_attr_align	= HERMON_PAGESIZE;  /* default 4K */
853 	dma_attr->dma_attr_burstsizes	= 0x3FF;
854 	dma_attr->dma_attr_minxfer	= 1;
855 	dma_attr->dma_attr_maxxfer	= 0xFFFFFFFFFFFFFFFFull;
856 	dma_attr->dma_attr_seg		= 0xFFFFFFFFFFFFFFFFull;
857 	dma_attr->dma_attr_sgllen	= 0x7FFFFFFF;
858 	dma_attr->dma_attr_granular	= 1;
859 	dma_attr->dma_attr_flags	= 0;
860 }
861 
862 /*
863  * hermon_dma_alloc()
864  *    Context: Can be called from base context.
865  */
866 int
867 hermon_dma_alloc(hermon_state_t *state, hermon_dma_info_t *dma_info,
868     uint16_t opcode)
869 {
870 	ddi_dma_handle_t	dma_hdl;
871 	ddi_dma_attr_t		dma_attr;
872 	ddi_acc_handle_t	acc_hdl;
873 	ddi_dma_cookie_t	cookie;
874 	uint64_t		kaddr;
875 	uint64_t		real_len;
876 	uint_t			ccount;
877 	int			status;
878 
879 	hermon_dma_attr_init(state, &dma_attr);
880 
881 	/* Allocate a DMA handle */
882 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, DDI_DMA_SLEEP,
883 	    NULL, &dma_hdl);
884 	if (status != DDI_SUCCESS) {
885 		IBTF_DPRINTF_L2("DMA", "alloc handle failed: %d", status);
886 		cmn_err(CE_CONT, "DMA alloc handle failed(status %d)", status);
887 		return (DDI_FAILURE);
888 	}
889 
890 	/* Allocate DMA memory */
891 	status = ddi_dma_mem_alloc(dma_hdl, dma_info->length,
892 	    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
893 	    (caddr_t *)&kaddr, (size_t *)&real_len, &acc_hdl);
894 	if (status != DDI_SUCCESS) {
895 		ddi_dma_free_handle(&dma_hdl);
896 		IBTF_DPRINTF_L2("DMA", "memory alloc failed: %d", status);
897 		cmn_err(CE_CONT, "DMA memory alloc failed(status %d)", status);
898 		return (DDI_FAILURE);
899 	}
900 	bzero((caddr_t)(uintptr_t)kaddr, real_len);
901 
902 	/* Bind the memory to the handle */
903 	status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
904 	    (caddr_t)(uintptr_t)kaddr, (size_t)real_len, DDI_DMA_RDWR |
905 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &ccount);
906 	if (status != DDI_SUCCESS) {
907 		ddi_dma_mem_free(&acc_hdl);
908 		ddi_dma_free_handle(&dma_hdl);
909 		IBTF_DPRINTF_L2("DMA", "bind handle failed: %d", status);
910 		cmn_err(CE_CONT, "DMA bind handle failed(status %d)", status);
911 		return (DDI_FAILURE);
912 	}
913 
914 	/* Package the hermon_dma_info contents and return */
915 	dma_info->vaddr   = kaddr;
916 	dma_info->dma_hdl = dma_hdl;
917 	dma_info->acc_hdl = acc_hdl;
918 
919 	/* Pass the mapping information to the firmware */
920 	status = hermon_map_cmd_post(state, dma_info, opcode, cookie, ccount);
921 	if (status != DDI_SUCCESS) {
922 		char *s;
923 		hermon_dma_free(dma_info);
924 		switch (opcode) {
925 		case MAP_ICM:
926 			s = "MAP_ICM";
927 			break;
928 		case MAP_FA:
929 			s = "MAP_FA";
930 			break;
931 		case MAP_ICM_AUX:
932 			s = "MAP_ICM_AUX";
933 			break;
934 		default:
935 			s = "UNKNOWN";
936 		}
937 		cmn_err(CE_NOTE, "Map cmd '%s' failed, status %08x\n",
938 		    s, status);
939 		return (DDI_FAILURE);
940 	}
941 
942 	return (DDI_SUCCESS);
943 }
944 
945 /*
946  * hermon_dma_free()
947  *    Context: Can be called from base context.
948  */
949 void
950 hermon_dma_free(hermon_dma_info_t *info)
951 {
952 	/* Unbind the handles and free the memory */
953 	(void) ddi_dma_unbind_handle(info->dma_hdl);
954 	ddi_dma_mem_free(&info->acc_hdl);
955 	ddi_dma_free_handle(&info->dma_hdl);
956 }
957 
958 /* These macros are valid for use only in hermon_icm_alloc/hermon_icm_free. */
959 #define	HERMON_ICM_ALLOC(rsrc) \
960 	hermon_icm_alloc(state, rsrc, index1, index2)
961 #define	HERMON_ICM_FREE(rsrc) \
962 	hermon_icm_free(state, rsrc, index1, index2)
963 
964 /*
965  * hermon_icm_alloc()
966  *    Context: Can be called from base context.
967  *
968  * Only one thread can be here for a given hermon_rsrc_type_t "type".
969  */
970 int
971 hermon_icm_alloc(hermon_state_t *state, hermon_rsrc_type_t type,
972     uint32_t index1, uint32_t index2)
973 {
974 	hermon_icm_table_t	*icm;
975 	hermon_dma_info_t	*dma_info;
976 	uint8_t			*bitmap;
977 	int			status;
978 
979 	if (hermon_verbose) {
980 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: rsrc_type (0x%x) "
981 		    "index1/2 (0x%x/0x%x)", type, index1, index2);
982 	}
983 
984 	icm = &state->hs_icm[type];
985 
986 	switch (type) {
987 	case HERMON_QPC:
988 		status = HERMON_ICM_ALLOC(HERMON_CMPT_QPC);
989 		if (status != DDI_SUCCESS) {
990 			return (status);
991 		}
992 		status = HERMON_ICM_ALLOC(HERMON_RDB);
993 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
994 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
995 			return (status);
996 		}
997 		status = HERMON_ICM_ALLOC(HERMON_ALTC);
998 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
999 			HERMON_ICM_FREE(HERMON_RDB);
1000 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1001 			return (status);
1002 		}
1003 		status = HERMON_ICM_ALLOC(HERMON_AUXC);
1004 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1005 			HERMON_ICM_FREE(HERMON_ALTC);
1006 			HERMON_ICM_FREE(HERMON_RDB);
1007 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1008 			return (status);
1009 		}
1010 		break;
1011 	case HERMON_SRQC:
1012 		status = HERMON_ICM_ALLOC(HERMON_CMPT_SRQC);
1013 		if (status != DDI_SUCCESS) {
1014 			return (status);
1015 		}
1016 		break;
1017 	case HERMON_CQC:
1018 		status = HERMON_ICM_ALLOC(HERMON_CMPT_CQC);
1019 		if (status != DDI_SUCCESS) {
1020 			return (status);
1021 		}
1022 		break;
1023 	case HERMON_EQC:
1024 		status = HERMON_ICM_ALLOC(HERMON_CMPT_EQC);
1025 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1026 			return (status);
1027 		}
1028 		break;
1029 	}
1030 
1031 	/* ensure existence of bitmap and dmainfo, sets "dma_info" */
1032 	hermon_bitmap(bitmap, dma_info, icm, index1);
1033 
1034 	/* Set up the DMA handle for allocation and mapping */
1035 	dma_info = icm->icm_dma[index1] + index2;
1036 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_info))
1037 	dma_info->length  = icm->span << icm->log_object_size;
1038 	dma_info->icmaddr = icm->icm_baseaddr +
1039 	    (((index1 << icm->split_shift) +
1040 	    (index2 << icm->span_shift)) << icm->log_object_size);
1041 
1042 	if (hermon_verbose) {
1043 		IBTF_DPRINTF_L2("hermon", "alloc DMA: "
1044 		    "rsrc (0x%x) index (%x, %x) "
1045 		    "icm_addr/len (%llx/%x) bitmap %p", type, index1, index2,
1046 		    (longlong_t)dma_info->icmaddr, dma_info->length, bitmap);
1047 	}
1048 
1049 	/* Allocate and map memory for this span */
1050 	status = hermon_dma_alloc(state, dma_info, MAP_ICM);
1051 	if (status != DDI_SUCCESS) {
1052 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: DMA "
1053 		    "allocation failed, status 0x%x", status);
1054 		switch (type) {
1055 		case HERMON_QPC:
1056 			HERMON_ICM_FREE(HERMON_AUXC);
1057 			HERMON_ICM_FREE(HERMON_ALTC);
1058 			HERMON_ICM_FREE(HERMON_RDB);
1059 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1060 			break;
1061 		case HERMON_SRQC:
1062 			HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1063 			break;
1064 		case HERMON_CQC:
1065 			HERMON_ICM_FREE(HERMON_CMPT_CQC);
1066 			break;
1067 		case HERMON_EQC:
1068 			HERMON_ICM_FREE(HERMON_CMPT_EQC);
1069 			break;
1070 		}
1071 
1072 		return (DDI_FAILURE);
1073 	}
1074 	if (hermon_verbose) {
1075 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: mapping ICM: "
1076 		    "rsrc_type (0x%x) index (0x%x, 0x%x) alloc length (0x%x) "
1077 		    "icm_addr (0x%lx)", type, index1, index2, dma_info->length,
1078 		    dma_info->icmaddr);
1079 	}
1080 
1081 	/* Set the bit for this slot in the table bitmap */
1082 	HERMON_BMAP_BIT_SET(icm->icm_bitmap[index1], index2);
1083 
1084 	return (DDI_SUCCESS);
1085 }
1086 
1087 /*
1088  * hermon_icm_free()
1089  *    Context: Can be called from base context.
1090  *
1091  * ICM resources have been successfully returned from hermon_icm_alloc().
1092  * Associated dma_info is no longer in use.  Free the ICM backing memory.
1093  */
1094 void
1095 hermon_icm_free(hermon_state_t *state, hermon_rsrc_type_t type,
1096     uint32_t index1, uint32_t index2)
1097 {
1098 	hermon_icm_table_t	*icm;
1099 	hermon_dma_info_t	*dma_info;
1100 	int			status;
1101 
1102 	icm = &state->hs_icm[type];
1103 	ASSERT(icm->icm_dma[index1][index2].icm_refcnt == 0);
1104 
1105 	if (hermon_verbose) {
1106 		IBTF_DPRINTF_L2("hermon", "hermon_icm_free: rsrc_type (0x%x) "
1107 		    "index (0x%x, 0x%x)", type, index1, index2);
1108 	}
1109 
1110 	dma_info = icm->icm_dma[index1] + index2;
1111 
1112 	/* The following only happens if attach() is failing. */
1113 	if (dma_info == NULL)
1114 		return;
1115 
1116 	/* Unmap the ICM allocation, then free the backing DMA memory */
1117 	status = hermon_unmap_icm_cmd_post(state, dma_info);
1118 	if (status != DDI_SUCCESS) {
1119 		HERMON_WARNING(state, "UNMAP_ICM failure");
1120 	}
1121 	hermon_dma_free(dma_info);
1122 
1123 	/* Clear the bit in the ICM table bitmap */
1124 	HERMON_BMAP_BIT_CLR(icm->icm_bitmap[index1], index2);
1125 
1126 	switch (type) {
1127 	case HERMON_QPC:
1128 		HERMON_ICM_FREE(HERMON_AUXC);
1129 		HERMON_ICM_FREE(HERMON_ALTC);
1130 		HERMON_ICM_FREE(HERMON_RDB);
1131 		HERMON_ICM_FREE(HERMON_CMPT_QPC);
1132 		break;
1133 	case HERMON_SRQC:
1134 		HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1135 		break;
1136 	case HERMON_CQC:
1137 		HERMON_ICM_FREE(HERMON_CMPT_CQC);
1138 		break;
1139 	case HERMON_EQC:
1140 		HERMON_ICM_FREE(HERMON_CMPT_EQC);
1141 		break;
1142 
1143 	}
1144 }
1145 
1146 /*
1147  * hermon_drv_init()
1148  *    Context: Only called from attach() path context
1149  */
1150 /* ARGSUSED */
1151 static int
1152 hermon_drv_init(hermon_state_t *state, dev_info_t *dip, int instance)
1153 {
1154 	int	status;
1155 
1156 	/*
1157 	 * Check and set the operational mode of the device. If the driver is
1158 	 * bound to the Hermon device in "maintenance mode", then this generally
1159 	 * means that either the device has been specifically jumpered to
1160 	 * start in this mode or the firmware boot process has failed to
1161 	 * successfully load either the primary or the secondary firmware
1162 	 * image.
1163 	 */
1164 	if (HERMON_IS_HCA_MODE(state->hs_dip)) {
1165 		state->hs_operational_mode = HERMON_HCA_MODE;
1166 		state->hs_cfg_profile_setting = HERMON_CFG_MEMFREE;
1167 
1168 	} else if (HERMON_IS_MAINTENANCE_MODE(state->hs_dip)) {
1169 		HERMON_FMANOTE(state, HERMON_FMA_MAINT);
1170 		state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1171 		return (DDI_FAILURE);
1172 
1173 	} else {
1174 		state->hs_operational_mode = 0;	/* invalid operational mode */
1175 		HERMON_FMANOTE(state, HERMON_FMA_PCIID);
1176 		HERMON_WARNING(state, "unexpected device type detected");
1177 		return (DDI_FAILURE);
1178 	}
1179 
1180 	/*
1181 	 * Initialize the Hermon hardware.
1182 	 *
1183 	 * Note:  If this routine returns an error, it is often a reasonably
1184 	 * good indication that something Hermon firmware-related has caused
1185 	 * the failure or some HW related errors have caused the failure.
1186 	 * (also there are few possibilities that SW (e.g. SW resource
1187 	 * shortage) can cause the failure, but the majority case is due to
1188 	 * either a firmware related error or a HW related one) In order to
1189 	 * give the user an opportunity (if desired) to update or reflash
1190 	 * the Hermon firmware image, we set "hs_operational_mode" flag
1191 	 * (described above) to indicate that we wish to enter maintenance
1192 	 * mode in case of the firmware-related issue.
1193 	 */
1194 	status = hermon_hw_init(state);
1195 	if (status != DDI_SUCCESS) {
1196 		cmn_err(CE_NOTE, "hermon%d: error during attach: %s", instance,
1197 		    state->hs_attach_buf);
1198 		return (DDI_FAILURE);
1199 	}
1200 
1201 	/*
1202 	 * Now that the ISR has been setup, arm all the EQs for event
1203 	 * generation.
1204 	 */
1205 
1206 	status = hermon_eq_arm_all(state);
1207 	if (status != DDI_SUCCESS) {
1208 		cmn_err(CE_NOTE, "EQ Arm All failed\n");
1209 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1210 		return (DDI_FAILURE);
1211 	}
1212 
1213 	/* test interrupts and event queues */
1214 	status = hermon_nop_post(state, 0x0, 0x0);
1215 	if (status != DDI_SUCCESS) {
1216 		cmn_err(CE_NOTE, "Interrupts/EQs failed\n");
1217 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1218 		return (DDI_FAILURE);
1219 	}
1220 
1221 	/* Initialize Hermon softstate */
1222 	status = hermon_soft_state_init(state);
1223 	if (status != DDI_SUCCESS) {
1224 		cmn_err(CE_NOTE, "Failed to init soft state\n");
1225 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1226 		return (DDI_FAILURE);
1227 	}
1228 
1229 	return (DDI_SUCCESS);
1230 }
1231 
1232 
1233 /*
1234  * hermon_drv_fini()
1235  *    Context: Only called from attach() and/or detach() path contexts
1236  */
1237 static void
1238 hermon_drv_fini(hermon_state_t *state)
1239 {
1240 	/* Cleanup Hermon softstate */
1241 	hermon_soft_state_fini(state);
1242 
1243 	/* Cleanup Hermon resources and shutdown hardware */
1244 	hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1245 }
1246 
1247 
1248 /*
1249  * hermon_drv_fini2()
1250  *    Context: Only called from or detach() path context
1251  */
1252 static void
1253 hermon_drv_fini2(hermon_state_t *state)
1254 {
1255 	if (state->hs_fm_poll_thread) {
1256 		ddi_periodic_delete(state->hs_fm_poll_thread);
1257 		state->hs_fm_poll_thread = NULL;
1258 	}
1259 	if (state->hs_fm_cmdhdl) {
1260 		hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
1261 		state->hs_fm_cmdhdl = NULL;
1262 	}
1263 	if (state->hs_reg_cmdhdl) {
1264 		ddi_regs_map_free(&state->hs_reg_cmdhdl);
1265 		state->hs_reg_cmdhdl = NULL;
1266 	}
1267 }
1268 
1269 
1270 /*
1271  * hermon_isr_init()
1272  *    Context: Only called from attach() path context
1273  */
1274 static int
1275 hermon_isr_init(hermon_state_t *state)
1276 {
1277 	int	status;
1278 	int	intr;
1279 
1280 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1281 
1282 		/*
1283 		 * Add a handler for the interrupt or MSI
1284 		 */
1285 		status = ddi_intr_add_handler(state->hs_intrmsi_hdl[intr],
1286 		    hermon_isr, (caddr_t)state, (void *)(uintptr_t)intr);
1287 		if (status  != DDI_SUCCESS) {
1288 			return (DDI_FAILURE);
1289 		}
1290 
1291 		/*
1292 		 * Enable the software interrupt.  Note: depending on the value
1293 		 * returned in the capability flag, we have to call either
1294 		 * ddi_intr_block_enable() or ddi_intr_enable().
1295 		 */
1296 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1297 			status = ddi_intr_block_enable(
1298 			    &state->hs_intrmsi_hdl[intr], 1);
1299 			if (status != DDI_SUCCESS) {
1300 				return (DDI_FAILURE);
1301 			}
1302 		} else {
1303 			status = ddi_intr_enable(state->hs_intrmsi_hdl[intr]);
1304 			if (status != DDI_SUCCESS) {
1305 				return (DDI_FAILURE);
1306 			}
1307 		}
1308 	}
1309 
1310 	/*
1311 	 * Now that the ISR has been enabled, defer arm_all  EQs for event
1312 	 * generation until later, in case MSIX is enabled
1313 	 */
1314 	return (DDI_SUCCESS);
1315 }
1316 
1317 
1318 /*
1319  * hermon_isr_fini()
1320  *    Context: Only called from attach() and/or detach() path contexts
1321  */
1322 static void
1323 hermon_isr_fini(hermon_state_t *state)
1324 {
1325 	int	intr;
1326 
1327 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1328 		/* Disable the software interrupt */
1329 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1330 			(void) ddi_intr_block_disable(
1331 			    &state->hs_intrmsi_hdl[intr], 1);
1332 		} else {
1333 			(void) ddi_intr_disable(state->hs_intrmsi_hdl[intr]);
1334 		}
1335 
1336 		/*
1337 		 * Remove the software handler for the interrupt or MSI
1338 		 */
1339 		(void) ddi_intr_remove_handler(state->hs_intrmsi_hdl[intr]);
1340 	}
1341 }
1342 
1343 
1344 /*
1345  * Sum of ICM configured values:
1346  *     cMPT, dMPT, MTT, QPC, SRQC, RDB, CQC, ALTC, AUXC, EQC, MCG
1347  *
1348  */
1349 static uint64_t
1350 hermon_size_icm(hermon_state_t *state)
1351 {
1352 	hermon_hw_querydevlim_t	*devlim;
1353 	hermon_cfg_profile_t	*cfg;
1354 	uint64_t		num_cmpts, num_dmpts, num_mtts;
1355 	uint64_t		num_qpcs, num_srqc, num_rdbs;
1356 #ifndef HERMON_FW_WORKAROUND
1357 	uint64_t		num_auxc;
1358 #endif
1359 	uint64_t		num_cqcs, num_altc;
1360 	uint64_t		num_eqcs, num_mcgs;
1361 	uint64_t		size;
1362 
1363 	devlim = &state->hs_devlim;
1364 	cfg = state->hs_cfg_profile;
1365 	/* number of respective entries */
1366 	num_cmpts = (uint64_t)0x1 << cfg->cp_log_num_cmpt;
1367 	num_mtts = (uint64_t)0x1 << cfg->cp_log_num_mtt;
1368 	num_dmpts = (uint64_t)0x1 << cfg->cp_log_num_dmpt;
1369 	num_qpcs = (uint64_t)0x1 << cfg->cp_log_num_qp;
1370 	num_srqc = (uint64_t)0x1 << cfg->cp_log_num_srq;
1371 	num_rdbs = (uint64_t)0x1 << cfg->cp_log_num_rdb;
1372 	num_cqcs = (uint64_t)0x1 << cfg->cp_log_num_cq;
1373 	num_altc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1374 #ifndef HERMON_FW_WORKAROUND
1375 	num_auxc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1376 #endif
1377 	num_eqcs = (uint64_t)0x1 << cfg->cp_log_num_eq;
1378 	num_mcgs = (uint64_t)0x1 << cfg->cp_log_num_mcg;
1379 
1380 	size =
1381 	    num_cmpts 	* devlim->cmpt_entry_sz +
1382 	    num_dmpts	* devlim->dmpt_entry_sz +
1383 	    num_mtts	* devlim->mtt_entry_sz +
1384 	    num_qpcs	* devlim->qpc_entry_sz +
1385 	    num_srqc	* devlim->srq_entry_sz +
1386 	    num_rdbs	* devlim->rdmardc_entry_sz +
1387 	    num_cqcs	* devlim->cqc_entry_sz +
1388 	    num_altc	* devlim->altc_entry_sz +
1389 #ifdef HERMON_FW_WORKAROUND
1390 	    0x80000000ull +
1391 #else
1392 	    num_auxc	* devlim->aux_entry_sz	+
1393 #endif
1394 	    num_eqcs	* devlim->eqc_entry_sz +
1395 	    num_mcgs	* HERMON_MCGMEM_SZ(state);
1396 	return (size);
1397 }
1398 
1399 
1400 /*
1401  * hermon_hw_init()
1402  *    Context: Only called from attach() path context
1403  */
1404 static int
1405 hermon_hw_init(hermon_state_t *state)
1406 {
1407 	hermon_drv_cleanup_level_t	cleanup;
1408 	sm_nodeinfo_t			nodeinfo;
1409 	uint64_t			clr_intr_offset;
1410 	int				status;
1411 	uint32_t			fw_size;	/* in page */
1412 	uint64_t			offset;
1413 
1414 	/* This is where driver initialization begins */
1415 	cleanup = HERMON_DRV_CLEANUP_LEVEL0;
1416 
1417 	/* Setup device access attributes */
1418 	state->hs_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
1419 	state->hs_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1420 	state->hs_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1421 	state->hs_reg_accattr.devacc_attr_access = DDI_DEFAULT_ACC;
1422 
1423 	/* Setup fma-protected access attributes */
1424 	state->hs_fm_accattr.devacc_attr_version =
1425 	    hermon_devacc_attr_version(state);
1426 	state->hs_fm_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1427 	state->hs_fm_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1428 	/* set acc err protection type */
1429 	state->hs_fm_accattr.devacc_attr_access =
1430 	    hermon_devacc_attr_access(state);
1431 
1432 	/* Setup for PCI config read/write of HCA device */
1433 	status = hermon_pci_config_setup(state, &state->hs_fm_pcihdl);
1434 	if (status != DDI_SUCCESS) {
1435 		hermon_hw_fini(state, cleanup);
1436 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1437 		    "hw_init_PCI_config_space_regmap_fail");
1438 		/* This case is not the degraded one */
1439 		return (DDI_FAILURE);
1440 	}
1441 
1442 	/* Map PCI config space and MSI-X tables/pba */
1443 	hermon_set_msix_info(state);
1444 
1445 	/* Map in Hermon registers (CMD, UAR, MSIX) and setup offsets */
1446 	status = hermon_regs_map_setup(state, HERMON_CMD_BAR,
1447 	    &state->hs_reg_cmd_baseaddr, 0, 0, &state->hs_fm_accattr,
1448 	    &state->hs_fm_cmdhdl);
1449 	if (status != DDI_SUCCESS) {
1450 		hermon_hw_fini(state, cleanup);
1451 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1452 		    "hw_init_CMD_BAR_regmap_fail");
1453 		/* This case is not the degraded one */
1454 		return (DDI_FAILURE);
1455 	}
1456 
1457 	cleanup = HERMON_DRV_CLEANUP_LEVEL1;
1458 	/*
1459 	 * We defer UAR-BAR mapping until later.  Need to know if
1460 	 * blueflame mapping is to be done, and don't know that until after
1461 	 * we get the dev_caps, so do it right after that
1462 	 */
1463 
1464 	/*
1465 	 * There is a third BAR defined for Hermon - it is for MSIX
1466 	 *
1467 	 * Will need to explore it's possible need/use w/ Mellanox
1468 	 * [es] Temporary mapping maybe
1469 	 */
1470 
1471 #ifdef HERMON_SUPPORTS_MSIX_BAR
1472 	status = ddi_regs_map_setup(state->hs_dip, HERMON_MSIX_BAR,
1473 	    &state->hs_reg_msi_baseaddr, 0, 0, &state->hs_reg_accattr,
1474 	    &state->hs_reg_msihdl);
1475 	if (status != DDI_SUCCESS) {
1476 		hermon_hw_fini(state, cleanup);
1477 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1478 		    "hw_init_MSIX_BAR_regmap_fail");
1479 		/* This case is not the degraded one */
1480 		return (DDI_FAILURE);
1481 	}
1482 #endif
1483 
1484 	cleanup = HERMON_DRV_CLEANUP_LEVEL2;
1485 
1486 	/*
1487 	 * Save interesting registers away. The offsets of the first two
1488 	 * here (HCR and sw_reset) are detailed in the PRM, the others are
1489 	 * derived from values in the QUERY_FW output, so we'll save them
1490 	 * off later.
1491 	 */
1492 	/* Host Command Register (HCR) */
1493 	state->hs_cmd_regs.hcr = (hermon_hw_hcr_t *)
1494 	    ((uintptr_t)state->hs_reg_cmd_baseaddr + HERMON_CMD_HCR_OFFSET);
1495 	state->hs_cmd_toggle = 0;	/* initialize it for use */
1496 
1497 	/* Software Reset register (sw_reset) and semaphore */
1498 	state->hs_cmd_regs.sw_reset = (uint32_t *)
1499 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1500 	    HERMON_CMD_SW_RESET_OFFSET);
1501 	state->hs_cmd_regs.sw_semaphore = (uint32_t *)
1502 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1503 	    HERMON_CMD_SW_SEMAPHORE_OFFSET);
1504 
1505 	/* Retrieve PCI device, vendor and rev IDs */
1506 	state->hs_vendor_id	 = HERMON_GET_VENDOR_ID(state->hs_dip);
1507 	state->hs_device_id	 = HERMON_GET_DEVICE_ID(state->hs_dip);
1508 	state->hs_revision_id	 = HERMON_GET_REVISION_ID(state->hs_dip);
1509 
1510 	/* make sure init'd before we start filling things in */
1511 	bzero(&state->hs_hcaparams, sizeof (struct hermon_hw_initqueryhca_s));
1512 
1513 	/* Initialize the Phase1 configuration profile */
1514 	status = hermon_cfg_profile_init_phase1(state);
1515 	if (status != DDI_SUCCESS) {
1516 		hermon_hw_fini(state, cleanup);
1517 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1518 		    "hw_init_cfginit1_fail");
1519 		/* This case is not the degraded one */
1520 		return (DDI_FAILURE);
1521 	}
1522 	cleanup = HERMON_DRV_CLEANUP_LEVEL3;
1523 
1524 	/* Do a software reset of the adapter to ensure proper state */
1525 	status = hermon_sw_reset(state);
1526 	if (status != HERMON_CMD_SUCCESS) {
1527 		hermon_hw_fini(state, cleanup);
1528 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1529 		    "hw_init_sw_reset_fail");
1530 		/* This case is not the degraded one */
1531 		return (DDI_FAILURE);
1532 	}
1533 
1534 	/* Initialize mailboxes */
1535 	status = hermon_rsrc_init_phase1(state);
1536 	if (status != DDI_SUCCESS) {
1537 		hermon_hw_fini(state, cleanup);
1538 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1539 		    "hw_init_rsrcinit1_fail");
1540 		/* This case is not the degraded one */
1541 		return (DDI_FAILURE);
1542 	}
1543 	cleanup = HERMON_DRV_CLEANUP_LEVEL4;
1544 
1545 	/* Post QUERY_FW */
1546 	status = hermon_cmn_query_cmd_post(state, QUERY_FW, 0, 0, &state->hs_fw,
1547 	    sizeof (hermon_hw_queryfw_t), HERMON_CMD_NOSLEEP_SPIN);
1548 	if (status != HERMON_CMD_SUCCESS) {
1549 		cmn_err(CE_NOTE, "QUERY_FW command failed: %08x\n", status);
1550 		hermon_hw_fini(state, cleanup);
1551 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1552 		    "hw_init_query_fw_cmd_fail");
1553 		/* This case is not the degraded one */
1554 		return (DDI_FAILURE);
1555 	}
1556 
1557 	/* Validate what/that HERMON FW version is appropriate */
1558 
1559 	status = hermon_fw_version_check(state);
1560 	if (status != DDI_SUCCESS) {
1561 		HERMON_FMANOTE(state, HERMON_FMA_FWVER);
1562 		if (state->hs_operational_mode == HERMON_HCA_MODE) {
1563 			cmn_err(CE_CONT, "Unsupported Hermon FW version: "
1564 			    "expected: %04d.%04d.%04d, "
1565 			    "actual: %04d.%04d.%04d\n",
1566 			    HERMON_FW_VER_MAJOR,
1567 			    HERMON_FW_VER_MINOR,
1568 			    HERMON_FW_VER_SUBMINOR,
1569 			    state->hs_fw.fw_rev_major,
1570 			    state->hs_fw.fw_rev_minor,
1571 			    state->hs_fw.fw_rev_subminor);
1572 		} else {
1573 			cmn_err(CE_CONT, "Unsupported FW version: "
1574 			    "%04d.%04d.%04d\n",
1575 			    state->hs_fw.fw_rev_major,
1576 			    state->hs_fw.fw_rev_minor,
1577 			    state->hs_fw.fw_rev_subminor);
1578 		}
1579 		state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1580 		hermon_hw_fini(state, cleanup);
1581 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1582 		    "hw_init_checkfwver_fail");
1583 		/* This case is the degraded one */
1584 		return (HERMON_CMD_BAD_NVMEM);
1585 	}
1586 
1587 	/*
1588 	 * Save off the rest of the interesting registers that we'll be using.
1589 	 * Setup the offsets for the other registers.
1590 	 */
1591 
1592 	/*
1593 	 * Hermon does the intr_offset from the BAR - technically should get the
1594 	 * BAR info from the response, but PRM says it's from BAR0-1, which is
1595 	 * for us the CMD BAR
1596 	 */
1597 
1598 	clr_intr_offset	 = state->hs_fw.clr_intr_offs & HERMON_CMD_OFFSET_MASK;
1599 
1600 	/* Save Clear Interrupt address */
1601 	state->hs_cmd_regs.clr_intr = (uint64_t *)
1602 	    (uintptr_t)(state->hs_reg_cmd_baseaddr + clr_intr_offset);
1603 
1604 	/*
1605 	 * Set the error buffer also into the structure - used in hermon_event.c
1606 	 * to check for internal error on the HCA, not reported in eqe or
1607 	 * (necessarily) by interrupt
1608 	 */
1609 	state->hs_cmd_regs.fw_err_buf = (uint32_t *)(uintptr_t)
1610 	    (state->hs_reg_cmd_baseaddr + state->hs_fw.error_buf_addr);
1611 
1612 	/*
1613 	 * Invoke a polling thread to check the error buffer periodically.
1614 	 */
1615 	state->hs_fm_poll_thread = ddi_periodic_add(hermon_inter_err_chk,
1616 	    (void *)state, FM_POLL_INTERVAL, DDI_IPL_0);
1617 
1618 	cleanup = HERMON_DRV_CLEANUP_LEVEL5;
1619 
1620 	/*
1621 	 * Allocate, map, and run the HCA Firmware.
1622 	 */
1623 
1624 	/* Allocate memory for the firmware to load into and map it */
1625 
1626 	/* get next higher power of 2 */
1627 	fw_size = 1 << highbit(state->hs_fw.fw_pages);
1628 	state->hs_fw_dma.length = fw_size << HERMON_PAGESHIFT;
1629 	status = hermon_dma_alloc(state, &state->hs_fw_dma, MAP_FA);
1630 	if (status != DDI_SUCCESS) {
1631 		cmn_err(CE_NOTE, "FW alloc failed\n");
1632 		hermon_hw_fini(state, cleanup);
1633 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1634 		    "hw_init_dma_alloc_fw_fail");
1635 		/* This case is not the degraded one */
1636 		return (DDI_FAILURE);
1637 	}
1638 
1639 	cleanup = HERMON_DRV_CLEANUP_LEVEL6;
1640 
1641 	/* Invoke the RUN_FW cmd to run the firmware */
1642 	status = hermon_run_fw_cmd_post(state);
1643 	if (status != DDI_SUCCESS) {
1644 		cmn_err(CE_NOTE, "RUN_FW command failed: 0x%08x\n", status);
1645 		if (status == HERMON_CMD_BAD_NVMEM) {
1646 			state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1647 		}
1648 		hermon_hw_fini(state, cleanup);
1649 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_run_fw_fail");
1650 		/*
1651 		 * If the status is HERMON_CMD_BAD_NVMEM, it's likely the
1652 		 * firmware is corrupted, so the mode falls into the
1653 		 * maintenance mode.
1654 		 */
1655 		return (status == HERMON_CMD_BAD_NVMEM ? HERMON_CMD_BAD_NVMEM :
1656 		    DDI_FAILURE);
1657 	}
1658 
1659 
1660 	/*
1661 	 * QUERY DEVICE LIMITS/CAPABILITIES
1662 	 * NOTE - in Hermon, the command is changed to QUERY_DEV_CAP,
1663 	 * but for familiarity we have kept the structure name the
1664 	 * same as Tavor/Arbel
1665 	 */
1666 
1667 	status = hermon_cmn_query_cmd_post(state, QUERY_DEV_CAP, 0, 0,
1668 	    &state->hs_devlim, sizeof (hermon_hw_querydevlim_t),
1669 	    HERMON_CMD_NOSLEEP_SPIN);
1670 	if (status != HERMON_CMD_SUCCESS) {
1671 		cmn_err(CE_NOTE, "QUERY_DEV_CAP command failed: 0x%08x\n",
1672 		    status);
1673 		hermon_hw_fini(state, cleanup);
1674 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_devcap_fail");
1675 		/* This case is not the degraded one */
1676 		return (DDI_FAILURE);
1677 	}
1678 
1679 	state->hs_devlim.num_rsvd_eq = max(state->hs_devlim.num_rsvd_eq,
1680 	    (4 * state->hs_devlim.num_rsvd_uar));	/* lesser of resvd's */
1681 
1682 	/* now we have enough info to map in the UAR BAR */
1683 	/*
1684 	 * First, we figure out how to map the BAR for UAR - use only half if
1685 	 * BlueFlame is enabled - in that case the mapped length is 1/2 the
1686 	 * log_max_uar_sz (max__uar - 1) * 1MB ( +20).
1687 	 */
1688 
1689 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1690 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1691 	} else {
1692 		offset = 0;	/* a zero length means map the whole thing */
1693 	}
1694 	status = hermon_regs_map_setup(state, HERMON_UAR_BAR,
1695 	    &state->hs_reg_uar_baseaddr, 0, offset, &state->hs_fm_accattr,
1696 	    &state->hs_fm_uarhdl);
1697 	if (status != DDI_SUCCESS) {
1698 		HERMON_ATTACH_MSG(state->hs_attach_buf, "UAR BAR mapping");
1699 		/* This case is not the degraded one */
1700 		return (DDI_FAILURE);
1701 	}
1702 
1703 	/* and if BlueFlame is enabled, map the other half there */
1704 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1705 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1706 		status = ddi_regs_map_setup(state->hs_dip, HERMON_UAR_BAR,
1707 		    &state->hs_reg_bf_baseaddr, offset, offset,
1708 		    &state->hs_reg_accattr, &state->hs_reg_bfhdl);
1709 		if (status != DDI_SUCCESS) {
1710 			HERMON_ATTACH_MSG(state->hs_attach_buf,
1711 			    "BlueFlame BAR mapping");
1712 			/* This case is not the degraded one */
1713 			return (DDI_FAILURE);
1714 		}
1715 		/* This will be used in hw_fini if we fail to init. */
1716 		state->hs_bf_offset = offset;
1717 	}
1718 	cleanup = HERMON_DRV_CLEANUP_LEVEL7;
1719 
1720 	/* Hermon has a couple of things needed for phase 2 in query port */
1721 
1722 	status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0, 0x01,
1723 	    &state->hs_queryport, sizeof (hermon_hw_query_port_t),
1724 	    HERMON_CMD_NOSLEEP_SPIN);
1725 	if (status != HERMON_CMD_SUCCESS) {
1726 		cmn_err(CE_NOTE, "QUERY_PORT command failed: 0x%08x\n",
1727 		    status);
1728 		hermon_hw_fini(state, cleanup);
1729 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1730 		    "hw_init_queryport_fail");
1731 		/* This case is not the degraded one */
1732 		return (DDI_FAILURE);
1733 	}
1734 
1735 	/* Initialize the Phase2 Hermon configuration profile */
1736 	status = hermon_cfg_profile_init_phase2(state);
1737 	if (status != DDI_SUCCESS) {
1738 		cmn_err(CE_NOTE, "CFG phase 2 failed: 0x%08x\n", status);
1739 		hermon_hw_fini(state, cleanup);
1740 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1741 		    "hw_init_cfginit2_fail");
1742 		/* This case is not the degraded one */
1743 		return (DDI_FAILURE);
1744 	}
1745 
1746 	/* Determine and set the ICM size */
1747 	state->hs_icm_sz = hermon_size_icm(state);
1748 	status		 = hermon_set_icm_size_cmd_post(state);
1749 	if (status != DDI_SUCCESS) {
1750 		cmn_err(CE_NOTE, "Hermon: SET_ICM_SIZE cmd failed: 0x%08x\n",
1751 		    status);
1752 		hermon_hw_fini(state, cleanup);
1753 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1754 		    "hw_init_seticmsz_fail");
1755 		/* This case is not the degraded one */
1756 		return (DDI_FAILURE);
1757 	}
1758 	/* alloc icm aux physical memory and map it */
1759 
1760 	state->hs_icma_dma.length = 1 << highbit(state->hs_icma_sz);
1761 
1762 	status = hermon_dma_alloc(state, &state->hs_icma_dma, MAP_ICM_AUX);
1763 	if (status != DDI_SUCCESS) {
1764 		cmn_err(CE_NOTE, "failed to alloc (0x%llx) bytes for ICMA\n",
1765 		    (longlong_t)state->hs_icma_dma.length);
1766 		hermon_hw_fini(state, cleanup);
1767 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1768 		    "hw_init_dma_alloc_icm_aux_fail");
1769 		/* This case is not the degraded one */
1770 		return (DDI_FAILURE);
1771 	}
1772 	cleanup = HERMON_DRV_CLEANUP_LEVEL8;
1773 
1774 	cleanup = HERMON_DRV_CLEANUP_LEVEL9;
1775 
1776 	/* Allocate an array of structures to house the ICM tables */
1777 	state->hs_icm = kmem_zalloc(HERMON_NUM_ICM_RESOURCES *
1778 	    sizeof (hermon_icm_table_t), KM_SLEEP);
1779 
1780 	/* Set up the ICM address space and the INIT_HCA command input */
1781 	status = hermon_icm_config_setup(state, &state->hs_hcaparams);
1782 	if (status != HERMON_CMD_SUCCESS) {
1783 		cmn_err(CE_NOTE, "ICM configuration failed\n");
1784 		hermon_hw_fini(state, cleanup);
1785 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1786 		    "hw_init_icm_config_setup_fail");
1787 		/* This case is not the degraded one */
1788 		return (DDI_FAILURE);
1789 	}
1790 	cleanup = HERMON_DRV_CLEANUP_LEVEL10;
1791 
1792 	/* Initialize the adapter with the INIT_HCA cmd */
1793 	status = hermon_init_hca_cmd_post(state, &state->hs_hcaparams,
1794 	    HERMON_CMD_NOSLEEP_SPIN);
1795 	if (status != HERMON_CMD_SUCCESS) {
1796 		cmn_err(CE_NOTE, "INIT_HCA command failed: %08x\n", status);
1797 		hermon_hw_fini(state, cleanup);
1798 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_hca_fail");
1799 		/* This case is not the degraded one */
1800 		return (DDI_FAILURE);
1801 	}
1802 	cleanup = HERMON_DRV_CLEANUP_LEVEL11;
1803 
1804 	/* Enter the second phase of init for Hermon configuration/resources */
1805 	status = hermon_rsrc_init_phase2(state);
1806 	if (status != DDI_SUCCESS) {
1807 		hermon_hw_fini(state, cleanup);
1808 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1809 		    "hw_init_rsrcinit2_fail");
1810 		/* This case is not the degraded one */
1811 		return (DDI_FAILURE);
1812 	}
1813 	cleanup = HERMON_DRV_CLEANUP_LEVEL12;
1814 
1815 	/* Query the adapter via QUERY_ADAPTER */
1816 	status = hermon_cmn_query_cmd_post(state, QUERY_ADAPTER, 0, 0,
1817 	    &state->hs_adapter, sizeof (hermon_hw_queryadapter_t),
1818 	    HERMON_CMD_NOSLEEP_SPIN);
1819 	if (status != HERMON_CMD_SUCCESS) {
1820 		cmn_err(CE_NOTE, "Hermon: QUERY_ADAPTER command failed: %08x\n",
1821 		    status);
1822 		hermon_hw_fini(state, cleanup);
1823 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1824 		    "hw_init_query_adapter_fail");
1825 		/* This case is not the degraded one */
1826 		return (DDI_FAILURE);
1827 	}
1828 
1829 	/* Allocate protection domain (PD) for Hermon internal use */
1830 	status = hermon_pd_alloc(state, &state->hs_pdhdl_internal,
1831 	    HERMON_SLEEP);
1832 	if (status != DDI_SUCCESS) {
1833 		cmn_err(CE_NOTE, "failed to alloc internal PD\n");
1834 		hermon_hw_fini(state, cleanup);
1835 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1836 		    "hw_init_internal_pd_alloc_fail");
1837 		/* This case is not the degraded one */
1838 		return (DDI_FAILURE);
1839 	}
1840 	cleanup = HERMON_DRV_CLEANUP_LEVEL13;
1841 
1842 	/* Setup UAR page for kernel use */
1843 	status = hermon_internal_uarpg_init(state);
1844 	if (status != DDI_SUCCESS) {
1845 		cmn_err(CE_NOTE, "failed to setup internal UAR\n");
1846 		hermon_hw_fini(state, cleanup);
1847 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1848 		    "hw_init_internal_uarpg_alloc_fail");
1849 		/* This case is not the degraded one */
1850 		return (DDI_FAILURE);
1851 	}
1852 	cleanup = HERMON_DRV_CLEANUP_LEVEL14;
1853 
1854 	/* Query and initialize the Hermon interrupt/MSI information */
1855 	status = hermon_intr_or_msi_init(state);
1856 	if (status != DDI_SUCCESS) {
1857 		cmn_err(CE_NOTE, "failed to setup INTR/MSI\n");
1858 		hermon_hw_fini(state, cleanup);
1859 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1860 		    "hw_init_intr_or_msi_init_fail");
1861 		/* This case is not the degraded one */
1862 		return (DDI_FAILURE);
1863 	}
1864 	cleanup = HERMON_DRV_CLEANUP_LEVEL15;
1865 
1866 	status = hermon_isr_init(state);	/* set up the isr */
1867 	if (status != DDI_SUCCESS) {
1868 		cmn_err(CE_NOTE, "failed to init isr\n");
1869 		hermon_hw_fini(state, cleanup);
1870 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1871 		    "hw_init_isrinit_fail");
1872 		/* This case is not the degraded one */
1873 		return (DDI_FAILURE);
1874 	}
1875 	cleanup = HERMON_DRV_CLEANUP_LEVEL16;
1876 
1877 	/* Setup the event queues */
1878 	status = hermon_eq_init_all(state);
1879 	if (status != DDI_SUCCESS) {
1880 		cmn_err(CE_NOTE, "failed to init EQs\n");
1881 		hermon_hw_fini(state, cleanup);
1882 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1883 		    "hw_init_eqinitall_fail");
1884 		/* This case is not the degraded one */
1885 		return (DDI_FAILURE);
1886 	}
1887 	cleanup = HERMON_DRV_CLEANUP_LEVEL17;
1888 
1889 
1890 
1891 	/* Reserve contexts for QP0 and QP1 */
1892 	status = hermon_special_qp_contexts_reserve(state);
1893 	if (status != DDI_SUCCESS) {
1894 		cmn_err(CE_NOTE, "failed to init special QPs\n");
1895 		hermon_hw_fini(state, cleanup);
1896 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1897 		    "hw_init_rsrv_sqp_fail");
1898 		/* This case is not the degraded one */
1899 		return (DDI_FAILURE);
1900 	}
1901 	cleanup = HERMON_DRV_CLEANUP_LEVEL18;
1902 
1903 	/* Initialize for multicast group handling */
1904 	status = hermon_mcg_init(state);
1905 	if (status != DDI_SUCCESS) {
1906 		cmn_err(CE_NOTE, "failed to init multicast\n");
1907 		hermon_hw_fini(state, cleanup);
1908 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1909 		    "hw_init_mcg_init_fail");
1910 		/* This case is not the degraded one */
1911 		return (DDI_FAILURE);
1912 	}
1913 	cleanup = HERMON_DRV_CLEANUP_LEVEL19;
1914 
1915 	/* Initialize the Hermon IB port(s) */
1916 	status = hermon_hca_port_init(state);
1917 	if (status != DDI_SUCCESS) {
1918 		cmn_err(CE_NOTE, "failed to init HCA Port\n");
1919 		hermon_hw_fini(state, cleanup);
1920 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1921 		    "hw_init_hca_port_init_fail");
1922 		/* This case is not the degraded one */
1923 		return (DDI_FAILURE);
1924 	}
1925 
1926 	cleanup = HERMON_DRV_CLEANUP_ALL;
1927 
1928 	/* Determine NodeGUID and SystemImageGUID */
1929 	status = hermon_getnodeinfo_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
1930 	    &nodeinfo);
1931 	if (status != HERMON_CMD_SUCCESS) {
1932 		cmn_err(CE_NOTE, "GetNodeInfo command failed: %08x\n", status);
1933 		hermon_hw_fini(state, cleanup);
1934 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1935 		    "hw_init_getnodeinfo_cmd_fail");
1936 		/* This case is not the degraded one */
1937 		return (DDI_FAILURE);
1938 	}
1939 
1940 	/*
1941 	 * If the NodeGUID value was set in OBP properties, then we use that
1942 	 * value.  But we still print a message if the value we queried from
1943 	 * firmware does not match this value.
1944 	 *
1945 	 * Otherwise if OBP value is not set then we use the value from
1946 	 * firmware unconditionally.
1947 	 */
1948 	if (state->hs_cfg_profile->cp_nodeguid) {
1949 		state->hs_nodeguid   = state->hs_cfg_profile->cp_nodeguid;
1950 	} else {
1951 		state->hs_nodeguid = nodeinfo.NodeGUID;
1952 	}
1953 
1954 	if (state->hs_nodeguid != nodeinfo.NodeGUID) {
1955 		cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
1956 		    "does not match value set by device property");
1957 	}
1958 
1959 	/*
1960 	 * If the SystemImageGUID value was set in OBP properties, then we use
1961 	 * that value.  But we still print a message if the value we queried
1962 	 * from firmware does not match this value.
1963 	 *
1964 	 * Otherwise if OBP value is not set then we use the value from
1965 	 * firmware unconditionally.
1966 	 */
1967 	if (state->hs_cfg_profile->cp_sysimgguid) {
1968 		state->hs_sysimgguid = state->hs_cfg_profile->cp_sysimgguid;
1969 	} else {
1970 		state->hs_sysimgguid = nodeinfo.SystemImageGUID;
1971 	}
1972 
1973 	if (state->hs_sysimgguid != nodeinfo.SystemImageGUID) {
1974 		cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
1975 		    "does not match value set by device property");
1976 	}
1977 
1978 	/* Get NodeDescription */
1979 	status = hermon_getnodedesc_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
1980 	    (sm_nodedesc_t *)&state->hs_nodedesc);
1981 	if (status != HERMON_CMD_SUCCESS) {
1982 		cmn_err(CE_CONT, "GetNodeDesc command failed: %08x\n", status);
1983 		hermon_hw_fini(state, cleanup);
1984 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1985 		    "hw_init_getnodedesc_cmd_fail");
1986 		/* This case is not the degraded one */
1987 		return (DDI_FAILURE);
1988 	}
1989 
1990 	return (DDI_SUCCESS);
1991 }
1992 
1993 
1994 /*
1995  * hermon_hw_fini()
1996  *    Context: Only called from attach() and/or detach() path contexts
1997  */
1998 static void
1999 hermon_hw_fini(hermon_state_t *state, hermon_drv_cleanup_level_t cleanup)
2000 {
2001 	uint_t		num_ports;
2002 	int		i, status;
2003 
2004 
2005 	/*
2006 	 * JBDB - We might not want to run these returns in all cases of
2007 	 * Bad News. We should still attempt to free all of the DMA memory
2008 	 * resources...  This needs to be worked last, after all allocations
2009 	 * are implemented. For now, and possibly for later, this works.
2010 	 */
2011 
2012 	switch (cleanup) {
2013 	/*
2014 	 * If we add more driver initialization steps that should be cleaned
2015 	 * up here, we need to ensure that HERMON_DRV_CLEANUP_ALL is still the
2016 	 * first entry (i.e. corresponds to the last init step).
2017 	 */
2018 	case HERMON_DRV_CLEANUP_ALL:
2019 		/* Shutdown the Hermon IB port(s) */
2020 		num_ports = state->hs_cfg_profile->cp_num_ports;
2021 		(void) hermon_hca_ports_shutdown(state, num_ports);
2022 		/* FALLTHROUGH */
2023 
2024 	case HERMON_DRV_CLEANUP_LEVEL19:
2025 		/* Teardown resources used for multicast group handling */
2026 		hermon_mcg_fini(state);
2027 		/* FALLTHROUGH */
2028 
2029 	case HERMON_DRV_CLEANUP_LEVEL18:
2030 		/* Unreserve the special QP contexts */
2031 		hermon_special_qp_contexts_unreserve(state);
2032 		/* FALLTHROUGH */
2033 
2034 	case HERMON_DRV_CLEANUP_LEVEL17:
2035 		/*
2036 		 * Attempt to teardown all event queues (EQ).  If we fail
2037 		 * here then print a warning message and return.  Something
2038 		 * (either in HW or SW) has gone seriously wrong.
2039 		 */
2040 		status = hermon_eq_fini_all(state);
2041 		if (status != DDI_SUCCESS) {
2042 			HERMON_WARNING(state, "failed to teardown EQs");
2043 			return;
2044 		}
2045 		/* FALLTHROUGH */
2046 	case HERMON_DRV_CLEANUP_LEVEL16:
2047 		/* Teardown Hermon interrupts */
2048 		hermon_isr_fini(state);
2049 		/* FALLTHROUGH */
2050 
2051 	case HERMON_DRV_CLEANUP_LEVEL15:
2052 		status = hermon_intr_or_msi_fini(state);
2053 		if (status != DDI_SUCCESS) {
2054 			HERMON_WARNING(state, "failed to free intr/MSI");
2055 			return;
2056 		}
2057 		/* FALLTHROUGH */
2058 
2059 	case HERMON_DRV_CLEANUP_LEVEL14:
2060 		/* Free the resources for the Hermon internal UAR pages */
2061 		hermon_internal_uarpg_fini(state);
2062 		/* FALLTHROUGH */
2063 
2064 	case HERMON_DRV_CLEANUP_LEVEL13:
2065 		/*
2066 		 * Free the PD that was used internally by Hermon software.  If
2067 		 * we fail here then print a warning and return.  Something
2068 		 * (probably software-related, but perhaps HW) has gone wrong.
2069 		 */
2070 		status = hermon_pd_free(state, &state->hs_pdhdl_internal);
2071 		if (status != DDI_SUCCESS) {
2072 			HERMON_WARNING(state, "failed to free internal PD");
2073 			return;
2074 		}
2075 		/* FALLTHROUGH */
2076 
2077 	case HERMON_DRV_CLEANUP_LEVEL12:
2078 		/* Cleanup all the phase2 resources first */
2079 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_ALL);
2080 		/* FALLTHROUGH */
2081 
2082 	case HERMON_DRV_CLEANUP_LEVEL11:
2083 		/* LEVEL11 is after INIT_HCA */
2084 		/* FALLTHROUGH */
2085 
2086 
2087 	case HERMON_DRV_CLEANUP_LEVEL10:
2088 		/*
2089 		 * Unmap the ICM memory area with UNMAP_ICM command.
2090 		 */
2091 		status = hermon_unmap_icm_cmd_post(state, NULL);
2092 		if (status != DDI_SUCCESS) {
2093 			cmn_err(CE_WARN,
2094 			    "hermon_hw_fini: failed to unmap ICM\n");
2095 		}
2096 
2097 		/* Free the initial ICM DMA handles */
2098 		hermon_icm_dma_fini(state);
2099 
2100 		/* Free the ICM table structures */
2101 		hermon_icm_tables_fini(state);
2102 
2103 		/* Free the ICM table handles */
2104 		kmem_free(state->hs_icm, HERMON_NUM_ICM_RESOURCES *
2105 		    sizeof (hermon_icm_table_t));
2106 
2107 		/* FALLTHROUGH */
2108 
2109 	case HERMON_DRV_CLEANUP_LEVEL9:
2110 		/*
2111 		 * Unmap the ICM Aux memory area with UNMAP_ICM_AUX command.
2112 		 */
2113 		status = hermon_unmap_icm_aux_cmd_post(state);
2114 		if (status != HERMON_CMD_SUCCESS) {
2115 			cmn_err(CE_NOTE,
2116 			    "hermon_hw_fini: failed to unmap ICMA\n");
2117 		}
2118 		/* FALLTHROUGH */
2119 
2120 	case HERMON_DRV_CLEANUP_LEVEL8:
2121 		/*
2122 		 * Deallocate ICM Aux DMA memory.
2123 		 */
2124 		hermon_dma_free(&state->hs_icma_dma);
2125 		/* FALLTHROUGH */
2126 
2127 	case HERMON_DRV_CLEANUP_LEVEL7:
2128 		if (state->hs_fm_uarhdl) {
2129 			hermon_regs_map_free(state, &state->hs_fm_uarhdl);
2130 			state->hs_fm_uarhdl = NULL;
2131 		}
2132 
2133 		if (state->hs_reg_uarhdl) {
2134 			ddi_regs_map_free(&state->hs_reg_uarhdl);
2135 			state->hs_reg_uarhdl = NULL;
2136 		}
2137 
2138 		if (state->hs_bf_offset != 0 && state->hs_reg_bfhdl) {
2139 			ddi_regs_map_free(&state->hs_reg_bfhdl);
2140 			state->hs_reg_bfhdl = NULL;
2141 		}
2142 
2143 		for (i = 0; i < HERMON_MAX_PORTS; i++) {
2144 			if (state->hs_pkey[i]) {
2145 				kmem_free(state->hs_pkey[i], (1 <<
2146 				    state->hs_cfg_profile->cp_log_max_pkeytbl) *
2147 				    sizeof (ib_pkey_t));
2148 				state->hs_pkey[i] = NULL;
2149 			}
2150 			if (state->hs_guid[i]) {
2151 				kmem_free(state->hs_guid[i], (1 <<
2152 				    state->hs_cfg_profile->cp_log_max_gidtbl) *
2153 				    sizeof (ib_guid_t));
2154 				state->hs_guid[i] = NULL;
2155 			}
2156 		}
2157 		/* FALLTHROUGH */
2158 
2159 	case HERMON_DRV_CLEANUP_LEVEL6:
2160 		/*
2161 		 * Unmap the firmware memory area with UNMAP_FA command.
2162 		 */
2163 		status = hermon_unmap_fa_cmd_post(state);
2164 
2165 		if (status != HERMON_CMD_SUCCESS) {
2166 			cmn_err(CE_NOTE,
2167 			    "hermon_hw_fini: failed to unmap FW\n");
2168 		}
2169 
2170 		/*
2171 		 * Deallocate firmware DMA memory.
2172 		 */
2173 		hermon_dma_free(&state->hs_fw_dma);
2174 		/* FALLTHROUGH */
2175 
2176 	case HERMON_DRV_CLEANUP_LEVEL5:
2177 		/* stop the poll thread */
2178 		if (state->hs_fm_poll_thread) {
2179 			ddi_periodic_delete(state->hs_fm_poll_thread);
2180 			state->hs_fm_poll_thread = NULL;
2181 		}
2182 		/* FALLTHROUGH */
2183 
2184 	case HERMON_DRV_CLEANUP_LEVEL4:
2185 		/* Then cleanup the phase1 resources */
2186 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_PHASE1_COMPLETE);
2187 		/* FALLTHROUGH */
2188 
2189 	case HERMON_DRV_CLEANUP_LEVEL3:
2190 		/* Teardown any resources allocated for the config profile */
2191 		hermon_cfg_profile_fini(state);
2192 		/* FALLTHROUGH */
2193 
2194 	case HERMON_DRV_CLEANUP_LEVEL2:
2195 #ifdef HERMON_SUPPORTS_MSIX_BAR
2196 		/*
2197 		 * unmap 3rd BAR, MSIX BAR
2198 		 */
2199 		if (state->hs_reg_msihdl) {
2200 			ddi_regs_map_free(&state->hs_reg_msihdl);
2201 			state->hs_reg_msihdl = NULL;
2202 		}
2203 		/* FALLTHROUGH */
2204 #endif
2205 	case HERMON_DRV_CLEANUP_LEVEL1:
2206 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode) &&
2207 		    state->hs_fm_cmdhdl) {
2208 			hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
2209 			state->hs_fm_cmdhdl = NULL;
2210 		}
2211 
2212 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode) &&
2213 		    state->hs_reg_cmdhdl) {
2214 			ddi_regs_map_free(&state->hs_reg_cmdhdl);
2215 			state->hs_reg_cmdhdl = NULL;
2216 		}
2217 		/* FALLTHROUGH */
2218 
2219 	case HERMON_DRV_CLEANUP_LEVEL0:
2220 		if (state->hs_msix_tbl_entries) {
2221 			kmem_free(state->hs_msix_tbl_entries,
2222 			    state->hs_msix_tbl_size);
2223 			state->hs_msix_tbl_entries = NULL;
2224 		}
2225 
2226 		if (state->hs_msix_pba_entries) {
2227 			kmem_free(state->hs_msix_pba_entries,
2228 			    state->hs_msix_pba_size);
2229 			state->hs_msix_pba_entries = NULL;
2230 		}
2231 
2232 		if (state->hs_fm_msix_tblhdl) {
2233 			hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
2234 			state->hs_fm_msix_tblhdl = NULL;
2235 		}
2236 
2237 		if (state->hs_reg_msix_tblhdl) {
2238 			ddi_regs_map_free(&state->hs_reg_msix_tblhdl);
2239 			state->hs_reg_msix_tblhdl = NULL;
2240 		}
2241 
2242 		if (state->hs_fm_msix_pbahdl) {
2243 			hermon_regs_map_free(state, &state->hs_fm_msix_pbahdl);
2244 			state->hs_fm_msix_pbahdl = NULL;
2245 		}
2246 
2247 		if (state->hs_reg_msix_pbahdl) {
2248 			ddi_regs_map_free(&state->hs_reg_msix_pbahdl);
2249 			state->hs_reg_msix_pbahdl = NULL;
2250 		}
2251 
2252 		if (state->hs_fm_pcihdl) {
2253 			hermon_regs_map_free(state, &state->hs_fm_pcihdl);
2254 			state->hs_fm_pcihdl = NULL;
2255 		}
2256 
2257 		if (state->hs_reg_pcihdl) {
2258 			ddi_regs_map_free(&state->hs_reg_pcihdl);
2259 			state->hs_reg_pcihdl = NULL;
2260 		}
2261 		break;
2262 
2263 	default:
2264 		HERMON_WARNING(state, "unexpected driver cleanup level");
2265 		return;
2266 	}
2267 }
2268 
2269 
2270 /*
2271  * hermon_soft_state_init()
2272  *    Context: Only called from attach() path context
2273  */
2274 static int
2275 hermon_soft_state_init(hermon_state_t *state)
2276 {
2277 	ibt_hca_attr_t		*hca_attr;
2278 	uint64_t		maxval, val;
2279 	ibt_hca_flags_t		caps = IBT_HCA_NO_FLAGS;
2280 	ibt_hca_flags2_t	caps2 = IBT_HCA2_NO_FLAGS;
2281 	int			status;
2282 	int			max_send_wqe_bytes;
2283 	int			max_recv_wqe_bytes;
2284 
2285 	/*
2286 	 * The ibc_hca_info_t struct is passed to the IBTF.  This is the
2287 	 * routine where we initialize it.  Many of the init values come from
2288 	 * either configuration variables or successful queries of the Hermon
2289 	 * hardware abilities
2290 	 */
2291 	state->hs_ibtfinfo.hca_ci_vers	= IBCI_V3;
2292 	state->hs_ibtfinfo.hca_dip	= state->hs_dip;
2293 	state->hs_ibtfinfo.hca_handle	= (ibc_hca_hdl_t)state;
2294 	state->hs_ibtfinfo.hca_ops	= &hermon_ibc_ops;
2295 
2296 	hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
2297 	state->hs_ibtfinfo.hca_attr = hca_attr;
2298 
2299 	hca_attr->hca_fw_major_version = state->hs_fw.fw_rev_major;
2300 	hca_attr->hca_fw_minor_version = state->hs_fw.fw_rev_minor;
2301 	hca_attr->hca_fw_micro_version = state->hs_fw.fw_rev_subminor;
2302 
2303 	/* CQ interrupt moderation maximums - each limited to 16 bits */
2304 	hca_attr->hca_max_cq_mod_count = 0xFFFF;
2305 	hca_attr->hca_max_cq_mod_usec = 0xFFFF;
2306 
2307 	/* CQ relocation to other EQs - change when multiple MSI-Xs are used */
2308 	hca_attr->hca_max_cq_handlers = 1;
2309 
2310 	/*
2311 	 * Determine HCA capabilities:
2312 	 * No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
2313 	 *    IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
2314 	 *    or IBT_HCA_SHUTDOWN_PORT
2315 	 * But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
2316 	 *    IBT_HCA_RNR_NAK, IBT_HCA_CURRENT_QP_STATE, IBT_HCA_PORT_UP,
2317 	 *    IBT_HCA_SRQ, IBT_HCA_RESIZE_SRQ and IBT_HCA_FMR are always
2318 	 *    supported
2319 	 * All other features are conditionally supported, depending on the
2320 	 *    status return by the Hermon HCA in QUERY_DEV_LIM.
2321 	 */
2322 	if (state->hs_devlim.ud_multi) {
2323 		caps |= IBT_HCA_UD_MULTICAST;
2324 	}
2325 	if (state->hs_devlim.atomic) {
2326 		caps |= IBT_HCA_ATOMICS_HCA;
2327 	}
2328 	if (state->hs_devlim.apm) {
2329 		caps |= IBT_HCA_AUTO_PATH_MIG;
2330 	}
2331 	if (state->hs_devlim.pkey_v) {
2332 		caps |= IBT_HCA_PKEY_CNTR;
2333 	}
2334 	if (state->hs_devlim.qkey_v) {
2335 		caps |= IBT_HCA_QKEY_CNTR;
2336 	}
2337 	if (state->hs_devlim.ipoib_cksm) {
2338 		caps |= IBT_HCA_CKSUM_FULL;
2339 		caps2 |= IBT_HCA2_IP_CLASS;
2340 	}
2341 	if (state->hs_devlim.mod_wr_srq) {
2342 		caps |= IBT_HCA_RESIZE_SRQ;
2343 	}
2344 	if (state->hs_devlim.lif) {
2345 		caps |= IBT_HCA_LOCAL_INVAL_FENCE;
2346 	}
2347 	if (state->hs_devlim.reserved_lkey) {
2348 		caps2 |= IBT_HCA2_RES_LKEY;
2349 		hca_attr->hca_reserved_lkey = state->hs_devlim.rsv_lkey;
2350 	}
2351 	if (state->hs_devlim.local_inv && state->hs_devlim.remote_inv &&
2352 	    state->hs_devlim.fast_reg_wr) {	/* fw needs to be >= 2.6.636 */
2353 		if (state->hs_fw.fw_rev_major > 2)
2354 			caps2 |= IBT_HCA2_MEM_MGT_EXT;
2355 		else if (state->hs_fw.fw_rev_major == 2)
2356 			if (state->hs_fw.fw_rev_minor > 6)
2357 				caps2 |= IBT_HCA2_MEM_MGT_EXT;
2358 			else if (state->hs_fw.fw_rev_minor == 6)
2359 				if (state->hs_fw.fw_rev_subminor >= 636)
2360 					caps2 |= IBT_HCA2_MEM_MGT_EXT;
2361 	}
2362 	if (state->hs_devlim.mps) {
2363 		caps |= IBT_HCA_ZERO_BASED_VA;
2364 	}
2365 	if (state->hs_devlim.zb) {
2366 		caps |= IBT_HCA_MULT_PAGE_SZ_MR;
2367 	}
2368 	caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
2369 	    IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
2370 	    IBT_HCA_PORT_UP | IBT_HCA_RC_SRQ | IBT_HCA_UD_SRQ | IBT_HCA_FMR);
2371 
2372 	if (state->hs_devlim.log_max_gso_sz) {
2373 		hca_attr->hca_max_lso_size =
2374 		    (1 << state->hs_devlim.log_max_gso_sz);
2375 		/* More work needed in hermon_post_send for larger values */
2376 		hca_attr->hca_max_lso_hdr_size = 0x2c;	/* IPv4 only */
2377 	}
2378 
2379 	caps |= IBT_HCA_WQE_SIZE_INFO;
2380 	max_send_wqe_bytes = state->hs_devlim.max_desc_sz_sq;
2381 	max_recv_wqe_bytes = state->hs_devlim.max_desc_sz_rq;
2382 	hca_attr->hca_ud_send_sgl_sz = (max_send_wqe_bytes / 16) - 4;
2383 	hca_attr->hca_conn_send_sgl_sz = (max_send_wqe_bytes / 16) - 1;
2384 	hca_attr->hca_conn_rdma_sgl_overhead = 1;
2385 	hca_attr->hca_recv_sgl_sz = max_recv_wqe_bytes / 16;
2386 
2387 	/* We choose not to support "inline" unless it improves performance */
2388 	hca_attr->hca_max_inline_size = 0;
2389 	hca_attr->hca_ud_send_inline_sz = 0;
2390 	hca_attr->hca_conn_send_inline_sz = 0;
2391 	hca_attr->hca_conn_rdmaw_inline_overhead = 4;
2392 
2393 	hca_attr->hca_flags = caps;
2394 	hca_attr->hca_flags2 = caps2;
2395 
2396 	/*
2397 	 * Set hca_attr's IDs
2398 	 */
2399 	hca_attr->hca_vendor_id	 = state->hs_vendor_id;
2400 	hca_attr->hca_device_id	 = state->hs_device_id;
2401 	hca_attr->hca_version_id = state->hs_revision_id;
2402 
2403 	/*
2404 	 * Determine number of available QPs and max QP size.  Number of
2405 	 * available QPs is determined by subtracting the number of
2406 	 * "reserved QPs" (i.e. reserved for firmware use) from the
2407 	 * total number configured.
2408 	 */
2409 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2410 	hca_attr->hca_max_qp = val - ((uint64_t)1 <<
2411 	    state->hs_devlim.log_rsvd_qp);
2412 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_qp_sz);
2413 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_qp_sz);
2414 	if (val > maxval) {
2415 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2416 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2417 		    "soft_state_init_maxqpsz_toobig_fail");
2418 		return (DDI_FAILURE);
2419 	}
2420 	/* we need to reduce this by the max space needed for headroom */
2421 	hca_attr->hca_max_qp_sz = (uint_t)val - (HERMON_QP_OH_SIZE >>
2422 	    HERMON_QP_WQE_LOG_MINIMUM) - 1;
2423 
2424 	/*
2425 	 * Determine max scatter-gather size in WQEs. The HCA has split
2426 	 * the max sgl into rec'v Q and send Q values. Use the least.
2427 	 *
2428 	 * This is mainly useful for legacy clients.  Smart clients
2429 	 * such as IPoIB will use the IBT_HCA_WQE_SIZE_INFO sgl info.
2430 	 */
2431 	if (state->hs_devlim.max_sg_rq <= state->hs_devlim.max_sg_sq) {
2432 		maxval = state->hs_devlim.max_sg_rq;
2433 	} else {
2434 		maxval = state->hs_devlim.max_sg_sq;
2435 	}
2436 	val	= state->hs_cfg_profile->cp_wqe_max_sgl;
2437 	if (val > maxval) {
2438 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2439 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2440 		    "soft_state_init_toomanysgl_fail");
2441 		return (DDI_FAILURE);
2442 	}
2443 	/* If the rounded value for max SGL is too large, cap it */
2444 	if (state->hs_cfg_profile->cp_wqe_real_max_sgl > maxval) {
2445 		state->hs_cfg_profile->cp_wqe_real_max_sgl = (uint32_t)maxval;
2446 		val = maxval;
2447 	} else {
2448 		val = state->hs_cfg_profile->cp_wqe_real_max_sgl;
2449 	}
2450 
2451 	hca_attr->hca_max_sgl	 = (uint_t)val;
2452 	hca_attr->hca_max_rd_sgl = 0;	/* zero because RD is unsupported */
2453 
2454 	/*
2455 	 * Determine number of available CQs and max CQ size. Number of
2456 	 * available CQs is determined by subtracting the number of
2457 	 * "reserved CQs" (i.e. reserved for firmware use) from the
2458 	 * total number configured.
2459 	 */
2460 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_cq);
2461 	hca_attr->hca_max_cq = val - ((uint64_t)1 <<
2462 	    state->hs_devlim.log_rsvd_cq);
2463 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_cq_sz);
2464 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_cq_sz) - 1;
2465 	if (val > maxval) {
2466 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2467 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2468 		    "soft_state_init_maxcqsz_toobig_fail");
2469 		return (DDI_FAILURE);
2470 	}
2471 	hca_attr->hca_max_cq_sz = (uint_t)val;
2472 
2473 	/*
2474 	 * Determine number of available SRQs and max SRQ size. Number of
2475 	 * available SRQs is determined by subtracting the number of
2476 	 * "reserved SRQs" (i.e. reserved for firmware use) from the
2477 	 * total number configured.
2478 	 */
2479 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_srq);
2480 	hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
2481 	    state->hs_devlim.log_rsvd_srq);
2482 	maxval  = ((uint64_t)1 << state->hs_devlim.log_max_srq_sz);
2483 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_srq_sz);
2484 
2485 	if (val > maxval) {
2486 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2487 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2488 		    "soft_state_init_maxsrqsz_toobig_fail");
2489 		return (DDI_FAILURE);
2490 	}
2491 	hca_attr->hca_max_srqs_sz = (uint_t)val;
2492 
2493 	val	= hca_attr->hca_recv_sgl_sz - 1; /* SRQ has a list link */
2494 	maxval	= state->hs_devlim.max_sg_rq - 1;
2495 	if (val > maxval) {
2496 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2497 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2498 		    "soft_state_init_toomanysrqsgl_fail");
2499 		return (DDI_FAILURE);
2500 	}
2501 	hca_attr->hca_max_srq_sgl = (uint_t)val;
2502 
2503 	/*
2504 	 * Determine supported HCA page sizes
2505 	 * XXX
2506 	 * For now we simply return the system pagesize as the only supported
2507 	 * pagesize
2508 	 */
2509 	hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
2510 	    IBT_PAGE_4K);
2511 
2512 	/*
2513 	 * Determine number of available MemReg, MemWin, and their max size.
2514 	 * Number of available MRs and MWs is determined by subtracting
2515 	 * the number of "reserved MPTs" (i.e. reserved for firmware use)
2516 	 * from the total number configured for each.
2517 	 */
2518 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_dmpt);
2519 	hca_attr->hca_max_memr	  = val - ((uint64_t)1 <<
2520 	    state->hs_devlim.log_rsvd_dmpt);
2521 	hca_attr->hca_max_mem_win = state->hs_devlim.mem_win ? (val -
2522 	    ((uint64_t)1 << state->hs_devlim.log_rsvd_dmpt)) : 0;
2523 	maxval	= state->hs_devlim.log_max_mrw_sz;
2524 	val	= state->hs_cfg_profile->cp_log_max_mrw_sz;
2525 	if (val > maxval) {
2526 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2527 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2528 		    "soft_state_init_maxmrwsz_toobig_fail");
2529 		return (DDI_FAILURE);
2530 	}
2531 	hca_attr->hca_max_memr_len = ((uint64_t)1 << val);
2532 
2533 	/* Determine RDMA/Atomic properties */
2534 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_rdb);
2535 	hca_attr->hca_max_rsc = (uint_t)val;
2536 	val = state->hs_cfg_profile->cp_hca_max_rdma_in_qp;
2537 	hca_attr->hca_max_rdma_in_qp  = (uint8_t)val;
2538 	val = state->hs_cfg_profile->cp_hca_max_rdma_out_qp;
2539 	hca_attr->hca_max_rdma_out_qp = (uint8_t)val;
2540 	hca_attr->hca_max_rdma_in_ee  = 0;
2541 	hca_attr->hca_max_rdma_out_ee = 0;
2542 
2543 	/*
2544 	 * Determine maximum number of raw IPv6 and Ether QPs.  Set to 0
2545 	 * because neither type of raw QP is supported
2546 	 */
2547 	hca_attr->hca_max_ipv6_qp  = 0;
2548 	hca_attr->hca_max_ether_qp = 0;
2549 
2550 	/* Determine max number of MCGs and max QP-per-MCG */
2551 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2552 	hca_attr->hca_max_mcg_qps   = (uint_t)val;
2553 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_mcg);
2554 	hca_attr->hca_max_mcg	    = (uint_t)val;
2555 	val = state->hs_cfg_profile->cp_num_qp_per_mcg;
2556 	hca_attr->hca_max_qp_per_mcg = (uint_t)val;
2557 
2558 	/* Determine max number partitions (i.e. PKeys) */
2559 	maxval	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2560 	    state->hs_queryport.log_max_pkey);
2561 	val	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2562 	    state->hs_cfg_profile->cp_log_max_pkeytbl);
2563 
2564 	if (val > maxval) {
2565 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2566 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2567 		    "soft_state_init_toomanypkey_fail");
2568 		return (DDI_FAILURE);
2569 	}
2570 	hca_attr->hca_max_partitions = (uint16_t)val;
2571 
2572 	/* Determine number of ports */
2573 	maxval = state->hs_devlim.num_ports;
2574 	val = state->hs_cfg_profile->cp_num_ports;
2575 	if ((val > maxval) || (val == 0)) {
2576 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2577 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2578 		    "soft_state_init_toomanyports_fail");
2579 		return (DDI_FAILURE);
2580 	}
2581 	hca_attr->hca_nports = (uint8_t)val;
2582 
2583 	/* Copy NodeGUID and SystemImageGUID from softstate */
2584 	hca_attr->hca_node_guid = state->hs_nodeguid;
2585 	hca_attr->hca_si_guid	= state->hs_sysimgguid;
2586 
2587 	/*
2588 	 * Determine local ACK delay.  Use the value suggested by the Hermon
2589 	 * hardware (from the QUERY_DEV_CAP command)
2590 	 */
2591 	hca_attr->hca_local_ack_delay = state->hs_devlim.ca_ack_delay;
2592 
2593 	/* Determine max SGID table and PKey table sizes */
2594 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_gidtbl);
2595 	hca_attr->hca_max_port_sgid_tbl_sz = (uint_t)val;
2596 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_pkeytbl);
2597 	hca_attr->hca_max_port_pkey_tbl_sz = (uint16_t)val;
2598 
2599 	/* Determine max number of PDs */
2600 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_pd);
2601 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_pd);
2602 	if (val > maxval) {
2603 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2604 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2605 		    "soft_state_init_toomanypd_fail");
2606 		return (DDI_FAILURE);
2607 	}
2608 	hca_attr->hca_max_pd = (uint_t)val;
2609 
2610 	/* Determine max number of Address Handles (NOT IN ARBEL or HERMON) */
2611 	hca_attr->hca_max_ah = 0;
2612 
2613 	/* No RDDs or EECs (since Reliable Datagram is not supported) */
2614 	hca_attr->hca_max_rdd = 0;
2615 	hca_attr->hca_max_eec = 0;
2616 
2617 	/* Initialize lock for reserved UAR page access */
2618 	mutex_init(&state->hs_uar_lock, NULL, MUTEX_DRIVER,
2619 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2620 
2621 	/* Initialize the flash fields */
2622 	state->hs_fw_flashstarted = 0;
2623 	mutex_init(&state->hs_fw_flashlock, NULL, MUTEX_DRIVER,
2624 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2625 
2626 	/* Initialize the lock for the info ioctl */
2627 	mutex_init(&state->hs_info_lock, NULL, MUTEX_DRIVER,
2628 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2629 
2630 	/* Initialize the AVL tree for QP number support */
2631 	hermon_qpn_avl_init(state);
2632 
2633 	/* Initialize the kstat info structure */
2634 	status = hermon_kstat_init(state);
2635 	if (status != DDI_SUCCESS) {
2636 		hermon_qpn_avl_fini(state);
2637 		mutex_destroy(&state->hs_info_lock);
2638 		mutex_destroy(&state->hs_fw_flashlock);
2639 		mutex_destroy(&state->hs_uar_lock);
2640 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2641 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2642 		    "soft_state_init_kstatinit_fail");
2643 		return (DDI_FAILURE);
2644 	}
2645 
2646 	return (DDI_SUCCESS);
2647 }
2648 
2649 
2650 /*
2651  * hermon_soft_state_fini()
2652  *    Context: Called only from detach() path context
2653  */
2654 static void
2655 hermon_soft_state_fini(hermon_state_t *state)
2656 {
2657 
2658 	/* Teardown the kstat info */
2659 	hermon_kstat_fini(state);
2660 
2661 	/* Teardown the AVL tree for QP number support */
2662 	hermon_qpn_avl_fini(state);
2663 
2664 	/* Free up info ioctl mutex */
2665 	mutex_destroy(&state->hs_info_lock);
2666 
2667 	/* Free up flash mutex */
2668 	mutex_destroy(&state->hs_fw_flashlock);
2669 
2670 	/* Free up the UAR page access mutex */
2671 	mutex_destroy(&state->hs_uar_lock);
2672 
2673 	/* Free up the hca_attr struct */
2674 	kmem_free(state->hs_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));
2675 
2676 }
2677 
2678 /*
2679  * hermon_icm_config_setup()
2680  *    Context: Only called from attach() path context
2681  */
2682 static int
2683 hermon_icm_config_setup(hermon_state_t *state,
2684     hermon_hw_initqueryhca_t *inithca)
2685 {
2686 	hermon_hw_querydevlim_t	*devlim;
2687 	hermon_cfg_profile_t	*cfg;
2688 	hermon_icm_table_t	*icm_p[HERMON_NUM_ICM_RESOURCES];
2689 	hermon_icm_table_t	*icm;
2690 	hermon_icm_table_t	*tmp;
2691 	uint64_t		icm_addr;
2692 	uint64_t		icm_size;
2693 	int			status, i, j;
2694 
2695 
2696 	/* Bring in local devlims, cfg_profile and hs_icm table list */
2697 	devlim = &state->hs_devlim;
2698 	cfg = state->hs_cfg_profile;
2699 	icm = state->hs_icm;
2700 
2701 	/*
2702 	 * Assign each ICM table's entry size from data in the devlims,
2703 	 * except for RDB and MCG sizes, which are not returned in devlims
2704 	 * but do have a fixed size, and the UAR context entry size, which
2705 	 * we determine. For this, we use the "cp_num_pgs_per_uce" value
2706 	 * from our hs_cfg_profile.
2707 	 */
2708 	icm[HERMON_CMPT].object_size	= devlim->cmpt_entry_sz;
2709 	icm[HERMON_CMPT_QPC].object_size	= devlim->cmpt_entry_sz;
2710 	icm[HERMON_CMPT_SRQC].object_size	= devlim->cmpt_entry_sz;
2711 	icm[HERMON_CMPT_CQC].object_size	= devlim->cmpt_entry_sz;
2712 	icm[HERMON_CMPT_EQC].object_size	= devlim->cmpt_entry_sz;
2713 	icm[HERMON_MTT].object_size	= devlim->mtt_entry_sz;
2714 	icm[HERMON_DMPT].object_size	= devlim->dmpt_entry_sz;
2715 	icm[HERMON_QPC].object_size	= devlim->qpc_entry_sz;
2716 	icm[HERMON_CQC].object_size	= devlim->cqc_entry_sz;
2717 	icm[HERMON_SRQC].object_size	= devlim->srq_entry_sz;
2718 	icm[HERMON_EQC].object_size	= devlim->eqc_entry_sz;
2719 	icm[HERMON_RDB].object_size	= devlim->rdmardc_entry_sz *
2720 	    cfg->cp_hca_max_rdma_in_qp;
2721 	icm[HERMON_MCG].object_size	= HERMON_MCG_SIZE;
2722 	icm[HERMON_ALTC].object_size	= devlim->altc_entry_sz;
2723 	icm[HERMON_AUXC].object_size	= devlim->aux_entry_sz;
2724 
2725 	/* Assign each ICM table's log2 number of entries */
2726 	icm[HERMON_CMPT].log_num_entries = cfg->cp_log_num_cmpt;
2727 	icm[HERMON_CMPT_QPC].log_num_entries = cfg->cp_log_num_qp;
2728 	icm[HERMON_CMPT_SRQC].log_num_entries = cfg->cp_log_num_srq;
2729 	icm[HERMON_CMPT_CQC].log_num_entries = cfg->cp_log_num_cq;
2730 	icm[HERMON_CMPT_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
2731 	icm[HERMON_MTT].log_num_entries	= cfg->cp_log_num_mtt;
2732 	icm[HERMON_DMPT].log_num_entries = cfg->cp_log_num_dmpt;
2733 	icm[HERMON_QPC].log_num_entries	= cfg->cp_log_num_qp;
2734 	icm[HERMON_SRQC].log_num_entries = cfg->cp_log_num_srq;
2735 	icm[HERMON_CQC].log_num_entries	= cfg->cp_log_num_cq;
2736 	icm[HERMON_EQC].log_num_entries	= HERMON_NUM_EQ_SHIFT;
2737 	icm[HERMON_RDB].log_num_entries	= cfg->cp_log_num_qp;
2738 	icm[HERMON_MCG].log_num_entries	= cfg->cp_log_num_mcg;
2739 	icm[HERMON_ALTC].log_num_entries = cfg->cp_log_num_qp;
2740 	icm[HERMON_AUXC].log_num_entries = cfg->cp_log_num_qp;
2741 
2742 	/* Initialize the ICM tables */
2743 	hermon_icm_tables_init(state);
2744 
2745 	/*
2746 	 * ICM tables must be aligned on their size in the ICM address
2747 	 * space. So, here we order the tables from largest total table
2748 	 * size to the smallest. All tables are a power of 2 in size, so
2749 	 * this will ensure that all tables are aligned on their own size
2750 	 * without wasting space in the ICM.
2751 	 *
2752 	 * In order to easily set the ICM addresses without needing to
2753 	 * worry about the ordering of our table indices as relates to
2754 	 * the hermon_rsrc_type_t enum, we will use a list of pointers
2755 	 * representing the tables for the sort, then assign ICM addresses
2756 	 * below using it.
2757 	 */
2758 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2759 		icm_p[i] = &icm[i];
2760 	}
2761 	for (i = HERMON_NUM_ICM_RESOURCES; i > 0; i--) {
2762 		switch (i) {
2763 		case HERMON_CMPT_QPC:
2764 		case HERMON_CMPT_SRQC:
2765 		case HERMON_CMPT_CQC:
2766 		case HERMON_CMPT_EQC:
2767 			continue;
2768 		}
2769 		for (j = 1; j < i; j++) {
2770 			if (icm_p[j]->table_size > icm_p[j - 1]->table_size) {
2771 				tmp		= icm_p[j];
2772 				icm_p[j]	= icm_p[j - 1];
2773 				icm_p[j - 1]	= tmp;
2774 			}
2775 		}
2776 	}
2777 
2778 	/* Initialize the ICM address and ICM size */
2779 	icm_addr = icm_size = 0;
2780 
2781 	/*
2782 	 * Set the ICM base address of each table, using our sorted
2783 	 * list of pointers from above.
2784 	 */
2785 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2786 		j = icm_p[i]->icm_type;
2787 		switch (j) {
2788 		case HERMON_CMPT_QPC:
2789 		case HERMON_CMPT_SRQC:
2790 		case HERMON_CMPT_CQC:
2791 		case HERMON_CMPT_EQC:
2792 			continue;
2793 		}
2794 		if (icm[j].table_size) {
2795 			/*
2796 			 * Set the ICM base address in the table, save the
2797 			 * ICM offset in the rsrc pool and increment the
2798 			 * total ICM allocation.
2799 			 */
2800 			icm[j].icm_baseaddr = icm_addr;
2801 			if (hermon_verbose) {
2802 				IBTF_DPRINTF_L2("ICMADDR", "rsrc %x @ %p"
2803 				    " size %llx", j, icm[j].icm_baseaddr,
2804 				    icm[j].table_size);
2805 			}
2806 			icm_size += icm[j].table_size;
2807 		}
2808 
2809 		/* Verify that we don't exceed maximum ICM size */
2810 		if (icm_size > devlim->max_icm_size) {
2811 			/* free the ICM table memory resources */
2812 			hermon_icm_tables_fini(state);
2813 			cmn_err(CE_WARN, "ICM configuration exceeds maximum "
2814 			    "configuration: max (0x%lx) requested (0x%lx)\n",
2815 			    (ulong_t)devlim->max_icm_size, (ulong_t)icm_size);
2816 			HERMON_ATTACH_MSG(state->hs_attach_buf,
2817 			    "icm_config_toobig_fail");
2818 			return (DDI_FAILURE);
2819 		}
2820 
2821 		/* assign address to the 4 pieces of the CMPT */
2822 		if (j == HERMON_CMPT) {
2823 			uint64_t cmpt_size = icm[j].table_size >> 2;
2824 #define	init_cmpt_icm_baseaddr(rsrc, indx)				\
2825 	icm[rsrc].icm_baseaddr	= icm_addr + (indx * cmpt_size);
2826 			init_cmpt_icm_baseaddr(HERMON_CMPT_QPC, 0);
2827 			init_cmpt_icm_baseaddr(HERMON_CMPT_SRQC, 1);
2828 			init_cmpt_icm_baseaddr(HERMON_CMPT_CQC, 2);
2829 			init_cmpt_icm_baseaddr(HERMON_CMPT_EQC, 3);
2830 		}
2831 
2832 		/* Increment the ICM address for the next table */
2833 		icm_addr += icm[j].table_size;
2834 	}
2835 
2836 	/* Populate the structure for the INIT_HCA command */
2837 	hermon_inithca_set(state, inithca);
2838 
2839 	/*
2840 	 * Prior to invoking INIT_HCA, we must have ICM memory in place
2841 	 * for the reserved objects in each table. We will allocate and map
2842 	 * this initial ICM memory here. Note that given the assignment
2843 	 * of span_size above, tables that are smaller or equal in total
2844 	 * size to the default span_size will be mapped in full.
2845 	 */
2846 	status = hermon_icm_dma_init(state);
2847 	if (status != DDI_SUCCESS) {
2848 		/* free the ICM table memory resources */
2849 		hermon_icm_tables_fini(state);
2850 		HERMON_WARNING(state, "Failed to allocate initial ICM");
2851 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2852 		    "icm_config_dma_init_fail");
2853 		return (DDI_FAILURE);
2854 	}
2855 
2856 	return (DDI_SUCCESS);
2857 }
2858 
2859 /*
2860  * hermon_inithca_set()
2861  *    Context: Only called from attach() path context
2862  */
2863 static void
2864 hermon_inithca_set(hermon_state_t *state, hermon_hw_initqueryhca_t *inithca)
2865 {
2866 	hermon_cfg_profile_t	*cfg;
2867 	hermon_icm_table_t	*icm;
2868 	int			i;
2869 
2870 
2871 	/* Populate the INIT_HCA structure */
2872 	icm = state->hs_icm;
2873 	cfg = state->hs_cfg_profile;
2874 
2875 	/* set version */
2876 	inithca->version = 0x02;	/* PRM 0.36 */
2877 	/* set cacheline - log2 in 16-byte chunks */
2878 	inithca->log2_cacheline = 0x2;	/* optimized for 64 byte cache */
2879 
2880 	/* we need to update the inithca info with thie UAR info too */
2881 	inithca->uar.log_max_uars = highbit(cfg->cp_log_num_uar);
2882 	inithca->uar.uar_pg_sz = PAGESHIFT - HERMON_PAGESHIFT;
2883 
2884 	/* Set endianess */
2885 #ifdef	_LITTLE_ENDIAN
2886 	inithca->big_endian	= 0;
2887 #else
2888 	inithca->big_endian	= 1;
2889 #endif
2890 
2891 	/* Port Checking is on by default */
2892 	inithca->udav_port_chk	= HERMON_UDAV_PORTCHK_ENABLED;
2893 
2894 	/* Enable IPoIB checksum */
2895 	if (state->hs_devlim.ipoib_cksm)
2896 		inithca->chsum_en = 1;
2897 
2898 	/* Set each ICM table's attributes */
2899 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2900 		switch (icm[i].icm_type) {
2901 		case HERMON_CMPT:
2902 			inithca->tpt.cmpt_baseaddr = icm[i].icm_baseaddr;
2903 			break;
2904 
2905 		case HERMON_MTT:
2906 			inithca->tpt.mtt_baseaddr = icm[i].icm_baseaddr;
2907 			break;
2908 
2909 		case HERMON_DMPT:
2910 			inithca->tpt.dmpt_baseaddr = icm[i].icm_baseaddr;
2911 			inithca->tpt.log_dmpt_sz   = icm[i].log_num_entries;
2912 			inithca->tpt.pgfault_rnr_to = 0; /* just in case */
2913 			break;
2914 
2915 		case HERMON_QPC:
2916 			inithca->context.log_num_qp = icm[i].log_num_entries;
2917 			inithca->context.qpc_baseaddr_h =
2918 			    icm[i].icm_baseaddr >> 32;
2919 			inithca->context.qpc_baseaddr_l =
2920 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2921 			break;
2922 
2923 		case HERMON_CQC:
2924 			inithca->context.log_num_cq = icm[i].log_num_entries;
2925 			inithca->context.cqc_baseaddr_h =
2926 			    icm[i].icm_baseaddr >> 32;
2927 			inithca->context.cqc_baseaddr_l =
2928 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2929 			break;
2930 
2931 		case HERMON_SRQC:
2932 			inithca->context.log_num_srq = icm[i].log_num_entries;
2933 			inithca->context.srqc_baseaddr_h =
2934 			    icm[i].icm_baseaddr >> 32;
2935 			inithca->context.srqc_baseaddr_l =
2936 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2937 			break;
2938 
2939 		case HERMON_EQC:
2940 			inithca->context.log_num_eq = icm[i].log_num_entries;
2941 			inithca->context.eqc_baseaddr_h =
2942 			    icm[i].icm_baseaddr >> 32;
2943 			inithca->context.eqc_baseaddr_l =
2944 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2945 			break;
2946 
2947 		case HERMON_RDB:
2948 			inithca->context.rdmardc_baseaddr_h =
2949 			    icm[i].icm_baseaddr >> 32;
2950 			inithca->context.rdmardc_baseaddr_l =
2951 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2952 			inithca->context.log_num_rdmardc =
2953 			    icm[i].log_num_entries;
2954 			break;
2955 
2956 		case HERMON_MCG:
2957 			inithca->multi.mc_baseaddr    = icm[i].icm_baseaddr;
2958 			inithca->multi.log_mc_tbl_sz  = icm[i].log_num_entries;
2959 			inithca->multi.log_mc_tbl_ent =
2960 			    highbit(HERMON_MCGMEM_SZ(state)) - 1;
2961 			inithca->multi.log_mc_tbl_hash_sz =
2962 			    cfg->cp_log_num_mcg_hash;
2963 			inithca->multi.mc_hash_fn = HERMON_MCG_DEFAULT_HASH_FN;
2964 			break;
2965 
2966 		case HERMON_ALTC:
2967 			inithca->context.altc_baseaddr = icm[i].icm_baseaddr;
2968 			break;
2969 
2970 		case HERMON_AUXC:
2971 			inithca->context.auxc_baseaddr = icm[i].icm_baseaddr;
2972 			break;
2973 
2974 		default:
2975 			break;
2976 
2977 		}
2978 	}
2979 
2980 }
2981 
2982 /*
2983  * hermon_icm_tables_init()
2984  *    Context: Only called from attach() path context
2985  *
2986  * Dynamic ICM breaks the various ICM tables into "span_size" chunks
2987  * to enable allocation of backing memory on demand.  Arbel used a
2988  * fixed size ARBEL_ICM_SPAN_SIZE (initially was 512KB) as the
2989  * span_size for all ICM chunks.  Hermon has other considerations,
2990  * so the span_size used differs from Arbel.
2991  *
2992  * The basic considerations for why Hermon differs are:
2993  *
2994  *	1) ICM memory is in units of HERMON pages.
2995  *
2996  *	2) The AUXC table is approximately 1 byte per QP.
2997  *
2998  *	3) ICM memory for AUXC, ALTC, and RDB is allocated when
2999  *	the ICM memory for the corresponding QPC is allocated.
3000  *
3001  *	4) ICM memory for the CMPT corresponding to the various primary
3002  *	resources (QPC, SRQC, CQC, and EQC) is allocated when the ICM
3003  *	memory for the primary resource is allocated.
3004  *
3005  * One HERMON page (4KB) would typically map 4K QPs worth of AUXC.
3006  * So, the minimum chunk for the various QPC related ICM memory should
3007  * all be allocated to support the 4K QPs.  Currently, this means the
3008  * amount of memory for the various QP chunks is:
3009  *
3010  *	QPC	256*4K bytes
3011  *	RDB	128*4K bytes
3012  *	CMPT	 64*4K bytes
3013  *	ALTC	 64*4K bytes
3014  *	AUXC	  1*4K bytes
3015  *
3016  * The span_size chosen for the QP resource is 4KB of AUXC entries,
3017  * or 1 HERMON_PAGESIZE worth, which is the minimum ICM mapping size.
3018  *
3019  * Other ICM resources can have their span_size be more arbitrary.
3020  * This is 4K (HERMON_ICM_SPAN), except for MTTs because they are tiny.
3021  */
3022 
3023 /* macro to make the code below cleaner */
3024 #define	init_dependent(rsrc, dep)				\
3025 	icm[dep].span		= icm[rsrc].span;		\
3026 	icm[dep].num_spans	= icm[rsrc].num_spans;		\
3027 	icm[dep].split_shift	= icm[rsrc].split_shift;	\
3028 	icm[dep].span_mask	= icm[rsrc].span_mask;		\
3029 	icm[dep].span_shift	= icm[rsrc].span_shift;		\
3030 	icm[dep].rsrc_mask	= icm[rsrc].rsrc_mask;		\
3031 	if (hermon_verbose) {					\
3032 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3033 		    "rsrc (0x%x) size (0x%lx) span (0x%x) "	\
3034 		    "num_spans (0x%x)", dep, icm[dep].table_size, \
3035 		    icm[dep].span, icm[dep].num_spans);		\
3036 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3037 		    "span_shift (0x%x) split_shift (0x%x)",	\
3038 		    icm[dep].span_shift, icm[dep].split_shift);	\
3039 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3040 		    "span_mask (0x%x)  rsrc_mask   (0x%x)",	\
3041 		    icm[dep].span_mask, icm[dep].rsrc_mask);	\
3042 	}
3043 
3044 static void
3045 hermon_icm_tables_init(hermon_state_t *state)
3046 {
3047 	hermon_icm_table_t	*icm;
3048 	int			i, k;
3049 	uint32_t		per_split;
3050 
3051 
3052 	icm = state->hs_icm;
3053 
3054 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3055 		icm[i].icm_type		= i;
3056 		icm[i].num_entries	= 1 << icm[i].log_num_entries;
3057 		icm[i].log_object_size	= highbit(icm[i].object_size) - 1;
3058 		icm[i].table_size	= icm[i].num_entries <<
3059 		    icm[i].log_object_size;
3060 
3061 		/* deal with "dependent" resource types */
3062 		switch (i) {
3063 		case HERMON_AUXC:
3064 #ifdef HERMON_FW_WORKAROUND
3065 			icm[i].table_size = 0x80000000ull;
3066 			/* FALLTHROUGH */
3067 #endif
3068 		case HERMON_CMPT_QPC:
3069 		case HERMON_RDB:
3070 		case HERMON_ALTC:
3071 			init_dependent(HERMON_QPC, i);
3072 			continue;
3073 		case HERMON_CMPT_SRQC:
3074 			init_dependent(HERMON_SRQC, i);
3075 			continue;
3076 		case HERMON_CMPT_CQC:
3077 			init_dependent(HERMON_CQC, i);
3078 			continue;
3079 		case HERMON_CMPT_EQC:
3080 			init_dependent(HERMON_EQC, i);
3081 			continue;
3082 		}
3083 
3084 		icm[i].span = HERMON_ICM_SPAN;	/* default #rsrc's in 1 span */
3085 		if (i == HERMON_MTT) /* Alloc enough MTTs to map 256MB */
3086 			icm[i].span = HERMON_ICM_SPAN * 16;
3087 		icm[i].num_spans = icm[i].num_entries / icm[i].span;
3088 		if (icm[i].num_spans == 0) {
3089 			icm[i].span = icm[i].num_entries;
3090 			per_split = 1;
3091 			icm[i].num_spans = icm[i].num_entries / icm[i].span;
3092 		} else {
3093 			per_split = icm[i].num_spans / HERMON_ICM_SPLIT;
3094 			if (per_split == 0) {
3095 				per_split = 1;
3096 			}
3097 		}
3098 		if (hermon_verbose)
3099 			IBTF_DPRINTF_L2("ICM", "rsrc %x  span %x  num_spans %x",
3100 			    i, icm[i].span, icm[i].num_spans);
3101 
3102 		/*
3103 		 * Ensure a minimum table size of an ICM page, and a
3104 		 * maximum span size of the ICM table size.  This ensures
3105 		 * that we don't have less than an ICM page to map, which is
3106 		 * impossible, and that we will map an entire table at
3107 		 * once if it's total size is less than the span size.
3108 		 */
3109 		icm[i].table_size = max(icm[i].table_size, HERMON_PAGESIZE);
3110 
3111 		icm[i].span_shift = 0;
3112 		for (k = icm[i].span; k != 1; k >>= 1)
3113 			icm[i].span_shift++;
3114 		icm[i].split_shift = icm[i].span_shift;
3115 		for (k = per_split; k != 1; k >>= 1)
3116 			icm[i].split_shift++;
3117 		icm[i].span_mask = (1 << icm[i].split_shift) -
3118 		    (1 << icm[i].span_shift);
3119 		icm[i].rsrc_mask = (1 << icm[i].span_shift) - 1;
3120 
3121 
3122 		/* Initialize the table lock */
3123 		mutex_init(&icm[i].icm_table_lock, NULL, MUTEX_DRIVER,
3124 		    DDI_INTR_PRI(state->hs_intrmsi_pri));
3125 		cv_init(&icm[i].icm_table_cv, NULL, CV_DRIVER, NULL);
3126 
3127 		if (hermon_verbose) {
3128 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3129 			    "rsrc (0x%x) size (0x%lx)", i, icm[i].table_size);
3130 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3131 			    "span (0x%x) num_spans (0x%x)",
3132 			    icm[i].span, icm[i].num_spans);
3133 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3134 			    "span_shift (0x%x) split_shift (0x%x)",
3135 			    icm[i].span_shift, icm[i].split_shift);
3136 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3137 			    "span_mask (0x%x)  rsrc_mask   (0x%x)",
3138 			    icm[i].span_mask, icm[i].rsrc_mask);
3139 		}
3140 	}
3141 
3142 }
3143 
3144 /*
3145  * hermon_icm_tables_fini()
3146  *    Context: Only called from attach() path context
3147  *
3148  * Clean up all icm_tables.  Free the bitmap and dma_info arrays.
3149  */
3150 static void
3151 hermon_icm_tables_fini(hermon_state_t *state)
3152 {
3153 	hermon_icm_table_t	*icm;
3154 	int			nspans;
3155 	int			i, j;
3156 
3157 
3158 	icm = state->hs_icm;
3159 
3160 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3161 
3162 		mutex_enter(&icm[i].icm_table_lock);
3163 		nspans = icm[i].num_spans;
3164 
3165 		for (j = 0; j < HERMON_ICM_SPLIT; j++) {
3166 			if (icm[i].icm_dma[j])
3167 				/* Free the ICM DMA slots */
3168 				kmem_free(icm[i].icm_dma[j],
3169 				    nspans * sizeof (hermon_dma_info_t));
3170 
3171 			if (icm[i].icm_bitmap[j])
3172 				/* Free the table bitmap */
3173 				kmem_free(icm[i].icm_bitmap[j],
3174 				    (nspans + 7) / 8);
3175 		}
3176 		/* Destroy the table lock */
3177 		cv_destroy(&icm[i].icm_table_cv);
3178 		mutex_exit(&icm[i].icm_table_lock);
3179 		mutex_destroy(&icm[i].icm_table_lock);
3180 	}
3181 
3182 }
3183 
3184 /*
3185  * hermon_icm_dma_init()
3186  *    Context: Only called from attach() path context
3187  */
3188 static int
3189 hermon_icm_dma_init(hermon_state_t *state)
3190 {
3191 	hermon_icm_table_t	*icm;
3192 	hermon_rsrc_type_t	type;
3193 	int			status;
3194 
3195 
3196 	/*
3197 	 * This routine will allocate initial ICM DMA resources for ICM
3198 	 * tables that have reserved ICM objects. This is the only routine
3199 	 * where we should have to allocate ICM outside of hermon_rsrc_alloc().
3200 	 * We need to allocate ICM here explicitly, rather than in
3201 	 * hermon_rsrc_alloc(), because we've not yet completed the resource
3202 	 * pool initialization. When the resource pools are initialized
3203 	 * (in hermon_rsrc_init_phase2(), see hermon_rsrc.c for more
3204 	 * information), resource preallocations will be invoked to match
3205 	 * the ICM allocations seen here. We will then be able to use the
3206 	 * normal allocation path.  Note we don't need to set a refcnt on
3207 	 * these initial allocations because that will be done in the calls
3208 	 * to hermon_rsrc_alloc() from hermon_hw_entries_init() for the
3209 	 * "prealloc" objects (see hermon_rsrc.c for more information).
3210 	 */
3211 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3212 
3213 		/* ICM for these is allocated within hermon_icm_alloc() */
3214 		switch (type) {
3215 		case HERMON_CMPT:
3216 		case HERMON_CMPT_QPC:
3217 		case HERMON_CMPT_SRQC:
3218 		case HERMON_CMPT_CQC:
3219 		case HERMON_CMPT_EQC:
3220 		case HERMON_AUXC:
3221 		case HERMON_ALTC:
3222 		case HERMON_RDB:
3223 			continue;
3224 		}
3225 
3226 		icm = &state->hs_icm[type];
3227 
3228 		mutex_enter(&icm->icm_table_lock);
3229 		status = hermon_icm_alloc(state, type, 0, 0);
3230 		mutex_exit(&icm->icm_table_lock);
3231 		if (status != DDI_SUCCESS) {
3232 			while (type--) {
3233 				icm = &state->hs_icm[type];
3234 				mutex_enter(&icm->icm_table_lock);
3235 				hermon_icm_free(state, type, 0, 0);
3236 				mutex_exit(&icm->icm_table_lock);
3237 			}
3238 			return (DDI_FAILURE);
3239 		}
3240 
3241 		if (hermon_verbose) {
3242 			IBTF_DPRINTF_L2("hermon", "hermon_icm_dma_init: "
3243 			    "table (0x%x) index (0x%x) allocated", type, 0);
3244 		}
3245 	}
3246 
3247 	return (DDI_SUCCESS);
3248 }
3249 
3250 /*
3251  * hermon_icm_dma_fini()
3252  *    Context: Only called from attach() path context
3253  *
3254  * ICM has been completely unmapped.  We just free the memory here.
3255  */
3256 static void
3257 hermon_icm_dma_fini(hermon_state_t *state)
3258 {
3259 	hermon_icm_table_t	*icm;
3260 	hermon_dma_info_t	*dma_info;
3261 	hermon_rsrc_type_t	type;
3262 	int			index1, index2;
3263 
3264 
3265 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3266 		icm = &state->hs_icm[type];
3267 		for (index1 = 0; index1 < HERMON_ICM_SPLIT; index1++) {
3268 			dma_info = icm->icm_dma[index1];
3269 			if (dma_info == NULL)
3270 				continue;
3271 			for (index2 = 0; index2 < icm->num_spans; index2++) {
3272 				if (dma_info[index2].dma_hdl)
3273 					hermon_dma_free(&dma_info[index2]);
3274 				dma_info[index2].dma_hdl = NULL;
3275 			}
3276 		}
3277 	}
3278 
3279 }
3280 
3281 /*
3282  * hermon_hca_port_init()
3283  *    Context: Only called from attach() path context
3284  */
3285 static int
3286 hermon_hca_port_init(hermon_state_t *state)
3287 {
3288 	hermon_hw_set_port_t	*portinits, *initport;
3289 	hermon_cfg_profile_t	*cfgprof;
3290 	uint_t			num_ports;
3291 	int			i = 0, status;
3292 	uint64_t		maxval, val;
3293 	uint64_t		sysimgguid, nodeguid, portguid;
3294 
3295 
3296 	cfgprof = state->hs_cfg_profile;
3297 
3298 	/* Get number of HCA ports */
3299 	num_ports = cfgprof->cp_num_ports;
3300 
3301 	/* Allocate space for Hermon set port  struct(s) */
3302 	portinits = (hermon_hw_set_port_t *)kmem_zalloc(num_ports *
3303 	    sizeof (hermon_hw_set_port_t), KM_SLEEP);
3304 
3305 
3306 
3307 	/* Post commands to initialize each Hermon HCA port */
3308 	/*
3309 	 * In Hermon, the process is different than in previous HCAs.
3310 	 * Here, you have to:
3311 	 *	QUERY_PORT - to get basic information from the HCA
3312 	 *	set the fields accordingly
3313 	 *	SET_PORT - to change/set everything as desired
3314 	 *	INIT_PORT - to bring the port up
3315 	 *
3316 	 * Needs to be done for each port in turn
3317 	 */
3318 
3319 	for (i = 0; i < num_ports; i++) {
3320 		bzero(&state->hs_queryport, sizeof (hermon_hw_query_port_t));
3321 		status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0,
3322 		    (i + 1), &state->hs_queryport,
3323 		    sizeof (hermon_hw_query_port_t), HERMON_CMD_NOSLEEP_SPIN);
3324 		if (status != HERMON_CMD_SUCCESS) {
3325 			cmn_err(CE_CONT, "Hermon: QUERY_PORT (port %02d) "
3326 			    "command failed: %08x\n", i + 1, status);
3327 			goto init_ports_fail;
3328 		}
3329 		initport = &portinits[i];
3330 		state->hs_initport = &portinits[i];
3331 
3332 		bzero(initport, sizeof (hermon_hw_query_port_t));
3333 
3334 		/*
3335 		 * Determine whether we need to override the firmware's
3336 		 * default SystemImageGUID setting.
3337 		 */
3338 		sysimgguid = cfgprof->cp_sysimgguid;
3339 		if (sysimgguid != 0) {
3340 			initport->sig		= 1;
3341 			initport->sys_img_guid	= sysimgguid;
3342 		}
3343 
3344 		/*
3345 		 * Determine whether we need to override the firmware's
3346 		 * default NodeGUID setting.
3347 		 */
3348 		nodeguid = cfgprof->cp_nodeguid;
3349 		if (nodeguid != 0) {
3350 			initport->ng		= 1;
3351 			initport->node_guid	= nodeguid;
3352 		}
3353 
3354 		/*
3355 		 * Determine whether we need to override the firmware's
3356 		 * default PortGUID setting.
3357 		 */
3358 		portguid = cfgprof->cp_portguid[i];
3359 		if (portguid != 0) {
3360 			initport->g0		= 1;
3361 			initport->guid0		= portguid;
3362 		}
3363 
3364 		/* Validate max MTU size */
3365 		maxval  = state->hs_queryport.ib_mtu;
3366 		val	= cfgprof->cp_max_mtu;
3367 		if (val > maxval) {
3368 			goto init_ports_fail;
3369 		}
3370 
3371 		/* Validate the max port width */
3372 		maxval  = state->hs_queryport.ib_port_wid;
3373 		val	= cfgprof->cp_max_port_width;
3374 		if (val > maxval) {
3375 			goto init_ports_fail;
3376 		}
3377 
3378 		/* Validate max VL cap size */
3379 		maxval  = state->hs_queryport.max_vl;
3380 		val	= cfgprof->cp_max_vlcap;
3381 		if (val > maxval) {
3382 			goto init_ports_fail;
3383 		}
3384 
3385 		/* Validate max GID table size */
3386 		maxval  = ((uint64_t)1 << state->hs_queryport.log_max_gid);
3387 		val	= ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
3388 		if (val > maxval) {
3389 			goto init_ports_fail;
3390 		}
3391 		initport->max_guid = (uint16_t)val;
3392 		initport->mg = 1;
3393 
3394 		/* Validate max PKey table size */
3395 		maxval	= ((uint64_t)1 << state->hs_queryport.log_max_pkey);
3396 		val	= ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
3397 		if (val > maxval) {
3398 			goto init_ports_fail;
3399 		}
3400 		initport->max_pkey = (uint16_t)val;
3401 		initport->mp = 1;
3402 		/*
3403 		 * Post the SET_PORT cmd to Hermon firmware. This sets
3404 		 * the parameters of the port.
3405 		 */
3406 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3407 		    HERMON_CMD_NOSLEEP_SPIN);
3408 		if (status != HERMON_CMD_SUCCESS) {
3409 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3410 			    "failed: %08x\n", i + 1, status);
3411 			goto init_ports_fail;
3412 		}
3413 		/* issue another SET_PORT cmd - performance fix/workaround */
3414 		/* XXX - need to discuss with Mellanox */
3415 		bzero(initport, sizeof (hermon_hw_query_port_t));
3416 		initport->cap_mask = 0x02500868;
3417 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3418 		    HERMON_CMD_NOSLEEP_SPIN);
3419 		if (status != HERMON_CMD_SUCCESS) {
3420 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3421 			    "failed: %08x\n", i + 1, status);
3422 			goto init_ports_fail;
3423 		}
3424 	}
3425 
3426 	/*
3427 	 * Finally, do the INIT_PORT for each port in turn
3428 	 * When this command completes, the corresponding Hermon port
3429 	 * will be physically "Up" and initialized.
3430 	 */
3431 	for (i = 0; i < num_ports; i++) {
3432 		status = hermon_init_port_cmd_post(state, i + 1,
3433 		    HERMON_CMD_NOSLEEP_SPIN);
3434 		if (status != HERMON_CMD_SUCCESS) {
3435 			cmn_err(CE_CONT, "Hermon: INIT_PORT (port %02d) "
3436 			    "comman failed: %08x\n", i + 1, status);
3437 			goto init_ports_fail;
3438 		}
3439 	}
3440 
3441 	/* Free up the memory for Hermon port init struct(s), return success */
3442 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3443 	return (DDI_SUCCESS);
3444 
3445 init_ports_fail:
3446 	/*
3447 	 * Free up the memory for Hermon port init struct(s), shutdown any
3448 	 * successfully initialized ports, and return failure
3449 	 */
3450 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3451 	(void) hermon_hca_ports_shutdown(state, i);
3452 
3453 	return (DDI_FAILURE);
3454 }
3455 
3456 
3457 /*
3458  * hermon_hca_ports_shutdown()
3459  *    Context: Only called from attach() and/or detach() path contexts
3460  */
3461 static int
3462 hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init)
3463 {
3464 	int	i, status;
3465 
3466 	/*
3467 	 * Post commands to shutdown all init'd Hermon HCA ports.  Note: if
3468 	 * any of these commands fail for any reason, it would be entirely
3469 	 * unexpected and probably indicative a serious problem (HW or SW).
3470 	 * Although we do return void from this function, this type of failure
3471 	 * should not go unreported.  That is why we have the warning message.
3472 	 */
3473 	for (i = 0; i < num_init; i++) {
3474 		status = hermon_close_port_cmd_post(state, i + 1,
3475 		    HERMON_CMD_NOSLEEP_SPIN);
3476 		if (status != HERMON_CMD_SUCCESS) {
3477 			HERMON_WARNING(state, "failed to shutdown HCA port");
3478 			return (status);
3479 		}
3480 	}
3481 	return (HERMON_CMD_SUCCESS);
3482 }
3483 
3484 
3485 /*
3486  * hermon_internal_uarpg_init
3487  *    Context: Only called from attach() path context
3488  */
3489 static int
3490 hermon_internal_uarpg_init(hermon_state_t *state)
3491 {
3492 	int	status;
3493 	hermon_dbr_info_t 	*info;
3494 
3495 
3496 	/*
3497 	 * Allocate the UAR page for kernel use. This UAR page is
3498 	 * the privileged UAR page through which all kernel generated
3499 	 * doorbells will be rung. There are a number of UAR pages
3500 	 * reserved by hardware at the front of the UAR BAR, indicated
3501 	 * by DEVCAP.num_rsvd_uar, which we have already allocated. So,
3502 	 * the kernel page, or UAR page index num_rsvd_uar, will be
3503 	 * allocated here for kernel use.
3504 	 */
3505 
3506 	status = hermon_rsrc_alloc(state, HERMON_UARPG, 1, HERMON_SLEEP,
3507 	    &state->hs_uarkpg_rsrc);
3508 	if (status != DDI_SUCCESS) {
3509 		return (DDI_FAILURE);
3510 	}
3511 
3512 	/* Setup pointer to kernel UAR page */
3513 	state->hs_uar = (hermon_hw_uar_t *)state->hs_uarkpg_rsrc->hr_addr;
3514 
3515 	/* need to set up DBr tracking as well */
3516 	status = hermon_dbr_page_alloc(state, &info);
3517 	if (status != DDI_SUCCESS) {
3518 		return (DDI_FAILURE);
3519 	}
3520 
3521 	/* store the page pointer in the private area - the rest s/b done */
3522 	state->hs_kern_dbr = info->dbr_page;
3523 	return (DDI_SUCCESS);
3524 }
3525 
3526 
3527 /*
3528  * hermon_internal_uarpg_fini
3529  *    Context: Only called from attach() and/or detach() path contexts
3530  */
3531 static void
3532 hermon_internal_uarpg_fini(hermon_state_t *state)
3533 {
3534 
3535 	/* Free up Hermon UAR page #1 (kernel driver doorbells) */
3536 	hermon_rsrc_free(state, &state->hs_uarkpg_rsrc);
3537 
3538 }
3539 
3540 
3541 /*
3542  * hermon_special_qp_contexts_reserve()
3543  *    Context: Only called from attach() path context
3544  */
3545 static int
3546 hermon_special_qp_contexts_reserve(hermon_state_t *state)
3547 {
3548 	hermon_rsrc_t	*qp0_rsrc, *qp1_rsrc, *qp_resvd;
3549 	int		status;
3550 
3551 
3552 	/* Initialize the lock used for special QP rsrc management */
3553 	mutex_init(&state->hs_spec_qplock, NULL, MUTEX_DRIVER,
3554 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3555 
3556 	/*
3557 	 * Reserve contexts for QP0.  These QP contexts will be setup to
3558 	 * act as aliases for the real QP0.  Note: We are required to grab
3559 	 * two QPs (one per port) even if we are operating in single-port
3560 	 * mode.
3561 	 */
3562 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3563 	    HERMON_SLEEP, &qp0_rsrc);
3564 	if (status != DDI_SUCCESS) {
3565 		mutex_destroy(&state->hs_spec_qplock);
3566 		return (DDI_FAILURE);
3567 	}
3568 	state->hs_spec_qp0 = qp0_rsrc;
3569 
3570 	/*
3571 	 * Reserve contexts for QP1.  These QP contexts will be setup to
3572 	 * act as aliases for the real QP1.  Note: We are required to grab
3573 	 * two QPs (one per port) even if we are operating in single-port
3574 	 * mode.
3575 	 */
3576 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3577 	    HERMON_SLEEP, &qp1_rsrc);
3578 	if (status != DDI_SUCCESS) {
3579 		hermon_rsrc_free(state, &qp0_rsrc);
3580 		mutex_destroy(&state->hs_spec_qplock);
3581 		return (DDI_FAILURE);
3582 	}
3583 	state->hs_spec_qp1 = qp1_rsrc;
3584 
3585 	status = hermon_rsrc_alloc(state, HERMON_QPC, 4,
3586 	    HERMON_SLEEP, &qp_resvd);
3587 	if (status != DDI_SUCCESS) {
3588 		hermon_rsrc_free(state, &qp1_rsrc);
3589 		hermon_rsrc_free(state, &qp0_rsrc);
3590 		mutex_destroy(&state->hs_spec_qplock);
3591 		return (DDI_FAILURE);
3592 	}
3593 	state->hs_spec_qp_unused = qp_resvd;
3594 
3595 	return (DDI_SUCCESS);
3596 }
3597 
3598 
3599 /*
3600  * hermon_special_qp_contexts_unreserve()
3601  *    Context: Only called from attach() and/or detach() path contexts
3602  */
3603 static void
3604 hermon_special_qp_contexts_unreserve(hermon_state_t *state)
3605 {
3606 
3607 	/* Unreserve contexts for spec_qp_unused */
3608 	hermon_rsrc_free(state, &state->hs_spec_qp_unused);
3609 
3610 	/* Unreserve contexts for QP1 */
3611 	hermon_rsrc_free(state, &state->hs_spec_qp1);
3612 
3613 	/* Unreserve contexts for QP0 */
3614 	hermon_rsrc_free(state, &state->hs_spec_qp0);
3615 
3616 	/* Destroy the lock used for special QP rsrc management */
3617 	mutex_destroy(&state->hs_spec_qplock);
3618 
3619 }
3620 
3621 
3622 /*
3623  * hermon_sw_reset()
3624  *    Context: Currently called only from attach() path context
3625  */
3626 static int
3627 hermon_sw_reset(hermon_state_t *state)
3628 {
3629 	ddi_acc_handle_t	hdl = hermon_get_pcihdl(state);
3630 	ddi_acc_handle_t	cmdhdl = hermon_get_cmdhdl(state);
3631 	uint32_t		reset_delay;
3632 	int			status, i;
3633 	uint32_t		sem;
3634 	uint_t			offset;
3635 	uint32_t		data32;		/* for devctl & linkctl */
3636 	int			loopcnt;
3637 
3638 	/* initialize the FMA retry loop */
3639 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
3640 	hermon_pio_init(fm_loop_cnt2, fm_status2, fm_test2);
3641 
3642 	/*
3643 	 * If the configured software reset delay is set to zero, then we
3644 	 * will not attempt a software reset of the Hermon device.
3645 	 */
3646 	reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
3647 	if (reset_delay == 0) {
3648 		return (DDI_SUCCESS);
3649 	}
3650 
3651 	/* the FMA retry loop starts. */
3652 	hermon_pio_start(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3653 	    fm_test);
3654 	hermon_pio_start(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3655 	    fm_test2);
3656 
3657 	/* Query the PCI capabilities of the HCA device */
3658 	/* but don't process the VPD until after reset */
3659 	status = hermon_pci_capability_list(state, hdl);
3660 	if (status != DDI_SUCCESS) {
3661 		cmn_err(CE_NOTE, "failed to get pci capabilities list(0x%x)\n",
3662 		    status);
3663 		return (DDI_FAILURE);
3664 	}
3665 
3666 	/*
3667 	 * Read all PCI config info (reg0...reg63).  Note: According to the
3668 	 * Hermon software reset application note, we should not read or
3669 	 * restore the values in reg22 and reg23.
3670 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
3671 	 * register LAST, and technically, you need to restore the
3672 	 * PCIE Capability "device control" and "link control" (word-sized,
3673 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
3674 	 * We hold off restoring the command register - offset 0x4 - till last
3675 	 */
3676 
3677 	/* 1st, wait for the semaphore assure accessibility - per PRM */
3678 	status = -1;
3679 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
3680 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
3681 		if (sem == 0) {
3682 			status = 0;
3683 			break;
3684 		}
3685 		drv_usecwait(1);
3686 	}
3687 
3688 	/* Check if timeout happens */
3689 	if (status == -1) {
3690 		/*
3691 		 * Remove this acc handle from Hermon, then log
3692 		 * the error.
3693 		 */
3694 		hermon_pci_config_teardown(state, &hdl);
3695 
3696 		cmn_err(CE_WARN, "hermon_sw_reset timeout: "
3697 		    "failed to get the semaphore(0x%p)\n",
3698 		    (void *)state->hs_cmd_regs.sw_semaphore);
3699 
3700 		hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_NON_FATAL);
3701 		return (DDI_FAILURE);
3702 	}
3703 
3704 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3705 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3706 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3707 			state->hs_cfg_data[i]  = pci_config_get32(hdl, i << 2);
3708 		}
3709 	}
3710 
3711 	/*
3712 	 * Perform the software reset (by writing 1 at offset 0xF0010)
3713 	 */
3714 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
3715 
3716 	/*
3717 	 * This delay is required so as not to cause a panic here. If the
3718 	 * device is accessed too soon after reset it will not respond to
3719 	 * config cycles, causing a Master Abort and panic.
3720 	 */
3721 	drv_usecwait(reset_delay);
3722 
3723 	/*
3724 	 * Poll waiting for the device to finish resetting.
3725 	 */
3726 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
3727 	while ((pci_config_get32(hdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
3728 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
3729 		if (--loopcnt == 0)
3730 			break;	/* just in case, break and go on */
3731 	}
3732 	if (loopcnt == 0)
3733 		cmn_err(CE_CONT, "!Never see VEND_ID - read == %X",
3734 		    pci_config_get32(hdl, 0));
3735 
3736 	/*
3737 	 * Restore the config info
3738 	 */
3739 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3740 		if (i == 1) continue;	/* skip the status/ctrl reg */
3741 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3742 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3743 			pci_config_put32(hdl, i << 2, state->hs_cfg_data[i]);
3744 		}
3745 	}
3746 
3747 	/*
3748 	 * PCI Express Capability - we saved during capability list, and
3749 	 * we'll restore them here.
3750 	 */
3751 	offset = state->hs_pci_cap_offset;
3752 	data32 = state->hs_pci_cap_devctl;
3753 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
3754 	data32 = state->hs_pci_cap_lnkctl;
3755 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
3756 
3757 	pci_config_put32(hdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
3758 
3759 	/* the FMA retry loop ends. */
3760 	hermon_pio_end(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3761 	    fm_test2);
3762 	hermon_pio_end(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3763 	    fm_test);
3764 
3765 	return (DDI_SUCCESS);
3766 
3767 pio_error2:
3768 	/* fall through */
3769 pio_error:
3770 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_NON_FATAL);
3771 	return (DDI_FAILURE);
3772 }
3773 
3774 
3775 /*
3776  * hermon_mcg_init()
3777  *    Context: Only called from attach() path context
3778  */
3779 static int
3780 hermon_mcg_init(hermon_state_t *state)
3781 {
3782 	uint_t		mcg_tmp_sz;
3783 
3784 
3785 	/*
3786 	 * Allocate space for the MCG temporary copy buffer.  This is
3787 	 * used by the Attach/Detach Multicast Group code
3788 	 */
3789 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3790 	state->hs_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);
3791 
3792 	/*
3793 	 * Initialize the multicast group mutex.  This ensures atomic
3794 	 * access to add, modify, and remove entries in the multicast
3795 	 * group hash lists.
3796 	 */
3797 	mutex_init(&state->hs_mcglock, NULL, MUTEX_DRIVER,
3798 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3799 
3800 	return (DDI_SUCCESS);
3801 }
3802 
3803 
3804 /*
3805  * hermon_mcg_fini()
3806  *    Context: Only called from attach() and/or detach() path contexts
3807  */
3808 static void
3809 hermon_mcg_fini(hermon_state_t *state)
3810 {
3811 	uint_t		mcg_tmp_sz;
3812 
3813 
3814 	/* Free up the space used for the MCG temporary copy buffer */
3815 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3816 	kmem_free(state->hs_mcgtmp, mcg_tmp_sz);
3817 
3818 	/* Destroy the multicast group mutex */
3819 	mutex_destroy(&state->hs_mcglock);
3820 
3821 }
3822 
3823 
3824 /*
3825  * hermon_fw_version_check()
3826  *    Context: Only called from attach() path context
3827  */
3828 static int
3829 hermon_fw_version_check(hermon_state_t *state)
3830 {
3831 
3832 	uint_t	hermon_fw_ver_major;
3833 	uint_t	hermon_fw_ver_minor;
3834 	uint_t	hermon_fw_ver_subminor;
3835 
3836 #ifdef FMA_TEST
3837 	if (hermon_test_num == -1) {
3838 		return (DDI_FAILURE);
3839 	}
3840 #endif
3841 
3842 	/*
3843 	 * Depending on which version of driver we have attached, and which
3844 	 * HCA we've attached, the firmware version checks will be different.
3845 	 * We set up the comparison values for both Arbel and Sinai HCAs.
3846 	 */
3847 	switch (state->hs_operational_mode) {
3848 	case HERMON_HCA_MODE:
3849 		hermon_fw_ver_major = HERMON_FW_VER_MAJOR;
3850 		hermon_fw_ver_minor = HERMON_FW_VER_MINOR;
3851 		hermon_fw_ver_subminor = HERMON_FW_VER_SUBMINOR;
3852 		break;
3853 
3854 	default:
3855 		return (DDI_FAILURE);
3856 	}
3857 
3858 	/*
3859 	 * If FW revision major number is less than acceptable,
3860 	 * return failure, else if greater return success.  If
3861 	 * the major numbers are equal than check the minor number
3862 	 */
3863 	if (state->hs_fw.fw_rev_major < hermon_fw_ver_major) {
3864 		return (DDI_FAILURE);
3865 	} else if (state->hs_fw.fw_rev_major > hermon_fw_ver_major) {
3866 		return (DDI_SUCCESS);
3867 	}
3868 
3869 	/*
3870 	 * Do the same check as above, except for minor revision numbers
3871 	 * If the minor numbers are equal than check the subminor number
3872 	 */
3873 	if (state->hs_fw.fw_rev_minor < hermon_fw_ver_minor) {
3874 		return (DDI_FAILURE);
3875 	} else if (state->hs_fw.fw_rev_minor > hermon_fw_ver_minor) {
3876 		return (DDI_SUCCESS);
3877 	}
3878 
3879 	/*
3880 	 * Once again we do the same check as above, except for the subminor
3881 	 * revision number.  If the subminor numbers are equal here, then
3882 	 * these are the same firmware version, return success
3883 	 */
3884 	if (state->hs_fw.fw_rev_subminor < hermon_fw_ver_subminor) {
3885 		return (DDI_FAILURE);
3886 	} else if (state->hs_fw.fw_rev_subminor > hermon_fw_ver_subminor) {
3887 		return (DDI_SUCCESS);
3888 	}
3889 
3890 	return (DDI_SUCCESS);
3891 }
3892 
3893 
3894 /*
3895  * hermon_device_info_report()
3896  *    Context: Only called from attach() path context
3897  */
3898 static void
3899 hermon_device_info_report(hermon_state_t *state)
3900 {
3901 
3902 	cmn_err(CE_CONT, "?hermon%d: FW ver: %04d.%04d.%04d, "
3903 	    "HW rev: %02d\n", state->hs_instance, state->hs_fw.fw_rev_major,
3904 	    state->hs_fw.fw_rev_minor, state->hs_fw.fw_rev_subminor,
3905 	    state->hs_revision_id);
3906 	cmn_err(CE_CONT, "?hermon%d: %64s (0x%016" PRIx64 ")\n",
3907 	    state->hs_instance, state->hs_nodedesc, state->hs_nodeguid);
3908 
3909 }
3910 
3911 
3912 /*
3913  * hermon_pci_capability_list()
3914  *    Context: Only called from attach() path context
3915  */
3916 static int
3917 hermon_pci_capability_list(hermon_state_t *state, ddi_acc_handle_t hdl)
3918 {
3919 	uint_t		offset, data;
3920 	uint32_t	data32;
3921 
3922 	state->hs_pci_cap_offset = 0;		/* make sure it's cleared */
3923 
3924 	/*
3925 	 * Check for the "PCI Capabilities" bit in the "Status Register".
3926 	 * Bit 4 in this register indicates the presence of a "PCI
3927 	 * Capabilities" list.
3928 	 *
3929 	 * PCI-Express requires this bit to be set to 1.
3930 	 */
3931 	data = pci_config_get16(hdl, 0x06);
3932 	if ((data & 0x10) == 0) {
3933 		return (DDI_FAILURE);
3934 	}
3935 
3936 	/*
3937 	 * Starting from offset 0x34 in PCI config space, find the
3938 	 * head of "PCI capabilities" list, and walk the list.  If
3939 	 * capabilities of a known type are encountered (e.g.
3940 	 * "PCI-X Capability"), then call the appropriate handler
3941 	 * function.
3942 	 */
3943 	offset = pci_config_get8(hdl, 0x34);
3944 	while (offset != 0x0) {
3945 		data = pci_config_get8(hdl, offset);
3946 		/*
3947 		 * Check for known capability types.  Hermon has the
3948 		 * following:
3949 		 *    o Power Mgmt	 (0x02)
3950 		 *    o VPD Capability   (0x03)
3951 		 *    o PCI-E Capability (0x10)
3952 		 *    o MSIX Capability  (0x11)
3953 		 */
3954 		switch (data) {
3955 		case 0x01:
3956 			/* power mgmt handling */
3957 			break;
3958 		case 0x03:
3959 
3960 /*
3961  * Reading the PCIe VPD is inconsistent - that is, sometimes causes
3962  * problems on (mostly) X64, though we've also seen problems w/ Sparc
3963  * and Tavor --- so, for now until it's root caused, don't try and
3964  * read it
3965  */
3966 #ifdef HERMON_VPD_WORKS
3967 			hermon_pci_capability_vpd(state, hdl, offset);
3968 #else
3969 			delay(100);
3970 			hermon_pci_capability_vpd(state, hdl, offset);
3971 #endif
3972 			break;
3973 		case 0x10:
3974 			/*
3975 			 * PCI Express Capability - save offset & contents
3976 			 * for later in reset
3977 			 */
3978 			state->hs_pci_cap_offset = offset;
3979 			data32 = pci_config_get32(hdl,
3980 			    offset + HERMON_PCI_CAP_DEV_OFFS);
3981 			state->hs_pci_cap_devctl = data32;
3982 			data32 = pci_config_get32(hdl,
3983 			    offset + HERMON_PCI_CAP_LNK_OFFS);
3984 			state->hs_pci_cap_lnkctl = data32;
3985 			break;
3986 		case 0x11:
3987 			/*
3988 			 * MSIX support - nothing to do, taken care of in the
3989 			 * MSI/MSIX interrupt frameworkd
3990 			 */
3991 			break;
3992 		default:
3993 			/* just go on to the next */
3994 			break;
3995 		}
3996 
3997 		/* Get offset of next entry in list */
3998 		offset = pci_config_get8(hdl, offset + 1);
3999 	}
4000 
4001 	return (DDI_SUCCESS);
4002 }
4003 
4004 /*
4005  * hermon_pci_read_vpd()
4006  *    Context: Only called from attach() path context
4007  *    utility routine for hermon_pci_capability_vpd()
4008  */
4009 static int
4010 hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
4011     uint32_t *data)
4012 {
4013 	int		retry = 40;  /* retry counter for EEPROM poll */
4014 	uint32_t	val;
4015 	int		vpd_addr = offset + 2;
4016 	int		vpd_data = offset + 4;
4017 
4018 	/*
4019 	 * In order to read a 32-bit value from VPD, we are to write down
4020 	 * the address (offset in the VPD itself) to the address register.
4021 	 * To signal the read, we also clear bit 31.  We then poll on bit 31
4022 	 * and when it is set, we can then read our 4 bytes from the data
4023 	 * register.
4024 	 */
4025 	(void) pci_config_put32(hdl, offset, addr << 16);
4026 	do {
4027 		drv_usecwait(1000);
4028 		val = pci_config_get16(hdl, vpd_addr);
4029 		if (val & 0x8000) {		/* flag bit set */
4030 			*data = pci_config_get32(hdl, vpd_data);
4031 			return (DDI_SUCCESS);
4032 		}
4033 	} while (--retry);
4034 	/* read of flag failed write one message but count the failures */
4035 	if (debug_vpd == 0)
4036 		cmn_err(CE_NOTE,
4037 		    "!Failed to see flag bit after VPD addr write\n");
4038 	debug_vpd++;
4039 
4040 
4041 vpd_read_fail:
4042 	return (DDI_FAILURE);
4043 }
4044 
4045 
4046 
4047 /*
4048  *   hermon_pci_capability_vpd()
4049  *    Context: Only called from attach() path context
4050  */
4051 static void
4052 hermon_pci_capability_vpd(hermon_state_t *state, ddi_acc_handle_t hdl,
4053     uint_t offset)
4054 {
4055 	uint8_t			name_length;
4056 	uint8_t			pn_length;
4057 	int			i, err = 0;
4058 	int			vpd_str_id = 0;
4059 	int			vpd_ro_desc;
4060 	int			vpd_ro_pn_desc;
4061 #ifdef _BIG_ENDIAN
4062 	uint32_t		data32;
4063 #endif /* _BIG_ENDIAN */
4064 	union {
4065 		uint32_t	vpd_int[HERMON_VPD_HDR_DWSIZE];
4066 		uchar_t		vpd_char[HERMON_VPD_HDR_BSIZE];
4067 	} vpd;
4068 
4069 
4070 	/*
4071 	 * Read in the Vital Product Data (VPD) to the extend needed
4072 	 * by the fwflash utility
4073 	 */
4074 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4075 		err = hermon_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
4076 		if (err != DDI_SUCCESS) {
4077 			cmn_err(CE_NOTE, "!VPD read failed\n");
4078 			goto out;
4079 		}
4080 	}
4081 
4082 #ifdef _BIG_ENDIAN
4083 	/* Need to swap bytes for big endian. */
4084 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4085 		data32 = vpd.vpd_int[i];
4086 		vpd.vpd_char[(i << 2) + 3] =
4087 		    (uchar_t)((data32 & 0xFF000000) >> 24);
4088 		vpd.vpd_char[(i << 2) + 2] =
4089 		    (uchar_t)((data32 & 0x00FF0000) >> 16);
4090 		vpd.vpd_char[(i << 2) + 1] =
4091 		    (uchar_t)((data32 & 0x0000FF00) >> 8);
4092 		vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
4093 	}
4094 #endif	/* _BIG_ENDIAN */
4095 
4096 	/* Check for VPD String ID Tag */
4097 	if (vpd.vpd_char[vpd_str_id] == 0x82) {
4098 		/* get the product name */
4099 		name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
4100 		if (name_length > sizeof (state->hs_hca_name)) {
4101 			cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
4102 			    name_length);
4103 			goto out;
4104 		}
4105 		(void) memcpy(state->hs_hca_name, &vpd.vpd_char[vpd_str_id + 3],
4106 		    name_length);
4107 		state->hs_hca_name[name_length] = 0;
4108 
4109 		/* get the part number */
4110 		vpd_ro_desc = name_length + 3; /* read-only tag location */
4111 		vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */
4112 
4113 		/* Verify read-only tag and Part Number keyword. */
4114 		if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
4115 		    (vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
4116 		    vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
4117 			cmn_err(CE_NOTE, "!VPD Part Number not found\n");
4118 			goto out;
4119 		}
4120 
4121 		pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
4122 		if (pn_length > sizeof (state->hs_hca_pn)) {
4123 			cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
4124 			    name_length);
4125 			goto out;
4126 		}
4127 		(void) memcpy(state->hs_hca_pn,
4128 		    &vpd.vpd_char[vpd_ro_pn_desc + 3],
4129 		    pn_length);
4130 		state->hs_hca_pn[pn_length] = 0;
4131 		state->hs_hca_pn_len = pn_length;
4132 		cmn_err(CE_CONT, "!vpd %s\n", state->hs_hca_pn);
4133 	} else {
4134 		/* Wrong VPD String ID Tag */
4135 		cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
4136 		    vpd.vpd_char[0]);
4137 		goto out;
4138 	}
4139 	return;
4140 out:
4141 	state->hs_hca_pn_len = 0;
4142 }
4143 
4144 
4145 
4146 /*
4147  * hermon_intr_or_msi_init()
4148  *    Context: Only called from attach() path context
4149  */
4150 static int
4151 hermon_intr_or_msi_init(hermon_state_t *state)
4152 {
4153 	int	status;
4154 
4155 
4156 	/* Query for the list of supported interrupt event types */
4157 	status = ddi_intr_get_supported_types(state->hs_dip,
4158 	    &state->hs_intr_types_avail);
4159 	if (status != DDI_SUCCESS) {
4160 		return (DDI_FAILURE);
4161 	}
4162 
4163 	/*
4164 	 * If Hermon supports MSI-X in this system (and, if it
4165 	 * hasn't been overridden by a configuration variable), then
4166 	 * the default behavior is to use a single MSI-X.  Otherwise,
4167 	 * fallback to using legacy interrupts.  Also, if MSI-X is chosen,
4168 	 * but fails for whatever reasons, then next try MSI
4169 	 */
4170 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4171 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4172 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSIX);
4173 		if (status == DDI_SUCCESS) {
4174 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSIX;
4175 			return (DDI_SUCCESS);
4176 		}
4177 	}
4178 
4179 	/*
4180 	 * If Hermon supports MSI in this system (and, if it
4181 	 * hasn't been overridden by a configuration variable), then
4182 	 * the default behavior is to use a single MSIX.  Otherwise,
4183 	 * fallback to using legacy interrupts.  Also, if MSI is chosen,
4184 	 * but fails for whatever reasons, then fallback to using legacy
4185 	 * interrupts.
4186 	 */
4187 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4188 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSI)) {
4189 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSI);
4190 		if (status == DDI_SUCCESS) {
4191 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSI;
4192 			return (DDI_SUCCESS);
4193 		}
4194 	}
4195 
4196 	/*
4197 	 * MSI interrupt allocation failed, or was not available.  Fallback to
4198 	 * legacy interrupt support.
4199 	 */
4200 	if (state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED) {
4201 		status = hermon_add_intrs(state, DDI_INTR_TYPE_FIXED);
4202 		if (status == DDI_SUCCESS) {
4203 			state->hs_intr_type_chosen = DDI_INTR_TYPE_FIXED;
4204 			return (DDI_SUCCESS);
4205 		}
4206 	}
4207 
4208 	/*
4209 	 * None of MSI, MSI-X, nor legacy interrupts were successful.
4210 	 * Return failure.
4211 	 */
4212 	return (DDI_FAILURE);
4213 }
4214 
4215 /*
4216  * hermon_add_intrs()
4217  *    Context: Only called from attach() patch context
4218  */
4219 static int
4220 hermon_add_intrs(hermon_state_t *state, int intr_type)
4221 {
4222 	int	status;
4223 
4224 
4225 	/* Get number of interrupts/MSI supported */
4226 	status = ddi_intr_get_nintrs(state->hs_dip, intr_type,
4227 	    &state->hs_intrmsi_count);
4228 	if (status != DDI_SUCCESS) {
4229 		return (DDI_FAILURE);
4230 	}
4231 
4232 	/* Get number of available interrupts/MSI */
4233 	status = ddi_intr_get_navail(state->hs_dip, intr_type,
4234 	    &state->hs_intrmsi_avail);
4235 	if (status != DDI_SUCCESS) {
4236 		return (DDI_FAILURE);
4237 	}
4238 
4239 	/* Ensure that we have at least one (1) usable MSI or interrupt */
4240 	if ((state->hs_intrmsi_avail < 1) || (state->hs_intrmsi_count < 1)) {
4241 		return (DDI_FAILURE);
4242 	}
4243 
4244 	/* Attempt to allocate the maximum #interrupt/MSI handles */
4245 	status = ddi_intr_alloc(state->hs_dip, &state->hs_intrmsi_hdl[0],
4246 	    intr_type, 0, min(HERMON_MSIX_MAX, state->hs_intrmsi_avail),
4247 	    &state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL);
4248 	if (status != DDI_SUCCESS) {
4249 		return (DDI_FAILURE);
4250 	}
4251 
4252 	/* Ensure that we have allocated at least one (1) MSI or interrupt */
4253 	if (state->hs_intrmsi_allocd < 1) {
4254 		return (DDI_FAILURE);
4255 	}
4256 	state->hs_eq_dist = state->hs_intrmsi_allocd - 1; /* start at 0 */
4257 
4258 	/*
4259 	 * Extract the priority for the allocated interrupt/MSI.  This
4260 	 * will be used later when initializing certain mutexes.
4261 	 */
4262 	status = ddi_intr_get_pri(state->hs_intrmsi_hdl[0],
4263 	    &state->hs_intrmsi_pri);
4264 	if (status != DDI_SUCCESS) {
4265 		/* Free the allocated interrupt/MSI handle */
4266 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4267 
4268 		return (DDI_FAILURE);
4269 	}
4270 
4271 	/* Make sure the interrupt/MSI priority is below 'high level' */
4272 	if (state->hs_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
4273 		/* Free the allocated interrupt/MSI handle */
4274 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4275 
4276 		return (DDI_FAILURE);
4277 	}
4278 
4279 	/* Get add'l capability information regarding interrupt/MSI */
4280 	status = ddi_intr_get_cap(state->hs_intrmsi_hdl[0],
4281 	    &state->hs_intrmsi_cap);
4282 	if (status != DDI_SUCCESS) {
4283 		/* Free the allocated interrupt/MSI handle */
4284 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4285 
4286 		return (DDI_FAILURE);
4287 	}
4288 
4289 	return (DDI_SUCCESS);
4290 }
4291 
4292 
4293 /*
4294  * hermon_intr_or_msi_fini()
4295  *    Context: Only called from attach() and/or detach() path contexts
4296  */
4297 static int
4298 hermon_intr_or_msi_fini(hermon_state_t *state)
4299 {
4300 	int	status;
4301 	int	intr;
4302 
4303 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
4304 
4305 		/* Free the allocated interrupt/MSI handle */
4306 		status = ddi_intr_free(state->hs_intrmsi_hdl[intr]);
4307 		if (status != DDI_SUCCESS) {
4308 			return (DDI_FAILURE);
4309 		}
4310 	}
4311 	return (DDI_SUCCESS);
4312 }
4313 
4314 
4315 /*ARGSUSED*/
4316 void
4317 hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
4318     uint_t offset)
4319 {
4320 	uint32_t	msix_data;
4321 	uint16_t	msg_cntr;
4322 	uint32_t	t_offset;	/* table offset */
4323 	uint32_t	t_bir;
4324 	uint32_t	p_offset;	/* pba */
4325 	uint32_t	p_bir;
4326 	int		t_size;		/* size in entries - each is 4 dwords */
4327 
4328 	/* come in with offset pointing at the capability structure */
4329 
4330 	msix_data = pci_config_get32(hdl, offset);
4331 	cmn_err(CE_CONT, "Full cap structure dword = %X\n", msix_data);
4332 	msg_cntr =  pci_config_get16(hdl, offset+2);
4333 	cmn_err(CE_CONT, "MSIX msg_control = %X\n", msg_cntr);
4334 	offset += 4;
4335 	msix_data = pci_config_get32(hdl, offset);	/* table info */
4336 	t_offset = (msix_data & 0xFFF8) >> 3;
4337 	t_bir = msix_data & 0x07;
4338 	offset += 4;
4339 	cmn_err(CE_CONT, "  table %X --offset = %X, bir(bar) = %X\n",
4340 	    msix_data, t_offset, t_bir);
4341 	msix_data = pci_config_get32(hdl, offset);	/* PBA info */
4342 	p_offset = (msix_data & 0xFFF8) >> 3;
4343 	p_bir = msix_data & 0x07;
4344 
4345 	cmn_err(CE_CONT, "  PBA   %X --offset = %X, bir(bar) = %X\n",
4346 	    msix_data, p_offset, p_bir);
4347 	t_size = msg_cntr & 0x7FF;		/* low eleven bits */
4348 	cmn_err(CE_CONT, "    table size = %X entries\n", t_size);
4349 
4350 	offset = t_offset;		/* reuse this for offset from BAR */
4351 #ifdef HERMON_SUPPORTS_MSIX_BAR
4352 	cmn_err(CE_CONT, "First 2 table entries behind BAR2 \n");
4353 	for (i = 0; i < 2; i++) {
4354 		for (j = 0; j < 4; j++, offset += 4) {
4355 			msix_data = ddi_get32(state->hs_reg_msihdl,
4356 			    (uint32_t *)((uintptr_t)state->hs_reg_msi_baseaddr
4357 			    + offset));
4358 			cmn_err(CE_CONT, "MSI table entry %d, dword %d == %X\n",
4359 			    i, j, msix_data);
4360 		}
4361 	}
4362 #endif
4363 
4364 }
4365 
4366 /*
4367  * X86 fastreboot support functions.
4368  * These functions are used to save/restore MSI-X table/PBA and also
4369  * to disable MSI-X interrupts in hermon_quiesce().
4370  */
4371 
4372 /* Return the message control for MSI-X */
4373 static ushort_t
4374 get_msix_ctrl(dev_info_t *dip)
4375 {
4376 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4377 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4378 	    DEVI(dip)->devi_instance);
4379 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4380 	ASSERT(pci_cfg_hdl != NULL);
4381 
4382 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4383 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4384 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4385 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4386 			return (0);
4387 	}
4388 	ASSERT(msix_ctrl != 0);
4389 
4390 	return (msix_ctrl);
4391 }
4392 
4393 /* Return the MSI-X table size */
4394 static size_t
4395 get_msix_tbl_size(dev_info_t *dip)
4396 {
4397 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4398 	ASSERT(msix_ctrl != 0);
4399 
4400 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4401 	    PCI_MSIX_VECTOR_SIZE);
4402 }
4403 
4404 /* Return the MSI-X PBA size */
4405 static size_t
4406 get_msix_pba_size(dev_info_t *dip)
4407 {
4408 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4409 	ASSERT(msix_ctrl != 0);
4410 
4411 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8);
4412 }
4413 
4414 /* Set up the MSI-X table/PBA save area */
4415 static void
4416 hermon_set_msix_info(hermon_state_t *state)
4417 {
4418 	uint_t			rnumber, breg, nregs;
4419 	ushort_t		caps_ctrl, msix_ctrl;
4420 	pci_regspec_t		*rp;
4421 	int			reg_size, addr_space, offset, *regs_list, i;
4422 
4423 	/*
4424 	 * MSI-X BIR Index Table:
4425 	 * BAR indicator register (BIR) to Base Address register.
4426 	 */
4427 	uchar_t pci_msix_bir_index[8] = {0x10, 0x14, 0x18, 0x1c,
4428 	    0x20, 0x24, 0xff, 0xff};
4429 
4430 	/* Fastreboot data access  attribute */
4431 	ddi_device_acc_attr_t	dev_attr = {
4432 		0,				/* version */
4433 		DDI_STRUCTURE_LE_ACC,
4434 		DDI_STRICTORDER_ACC,		/* attr access */
4435 		0
4436 	};
4437 
4438 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4439 	ASSERT(pci_cfg_hdl != NULL);
4440 
4441 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4442 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4443 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4444 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4445 			return;
4446 	}
4447 	ASSERT(msix_ctrl != 0);
4448 
4449 	state->hs_msix_tbl_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4450 	    PCI_MSIX_TBL_OFFSET);
4451 
4452 	/* Get the BIR for MSI-X table */
4453 	breg = pci_msix_bir_index[state->hs_msix_tbl_offset &
4454 	    PCI_MSIX_TBL_BIR_MASK];
4455 	ASSERT(breg != 0xFF);
4456 
4457 	/* Set the MSI-X table offset */
4458 	state->hs_msix_tbl_offset = state->hs_msix_tbl_offset &
4459 	    ~PCI_MSIX_TBL_BIR_MASK;
4460 
4461 	/* Set the MSI-X table size */
4462 	state->hs_msix_tbl_size = ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4463 	    PCI_MSIX_VECTOR_SIZE;
4464 
4465 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
4466 	    DDI_PROP_DONTPASS, "reg", (int **)&regs_list, &nregs) !=
4467 	    DDI_PROP_SUCCESS) {
4468 		return;
4469 	}
4470 	reg_size = sizeof (pci_regspec_t) / sizeof (int);
4471 
4472 	/* Check the register number for MSI-X table */
4473 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4474 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4475 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4476 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4477 
4478 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4479 		    (addr_space == PCI_ADDR_MEM64))) {
4480 			rnumber = i;
4481 			break;
4482 		}
4483 	}
4484 	ASSERT(rnumber != 0);
4485 	state->hs_msix_tbl_rnumber = rnumber;
4486 
4487 	/* Set device attribute version and access according to Hermon FM */
4488 	dev_attr.devacc_attr_version = hermon_devacc_attr_version(state);
4489 	dev_attr.devacc_attr_access = hermon_devacc_attr_access(state);
4490 
4491 	/* Map the entire MSI-X vector table */
4492 	if (hermon_regs_map_setup(state, state->hs_msix_tbl_rnumber,
4493 	    (caddr_t *)&state->hs_msix_tbl_addr, state->hs_msix_tbl_offset,
4494 	    state->hs_msix_tbl_size, &dev_attr,
4495 	    &state->hs_fm_msix_tblhdl) != DDI_SUCCESS) {
4496 		return;
4497 	}
4498 
4499 	state->hs_msix_pba_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4500 	    PCI_MSIX_PBA_OFFSET);
4501 
4502 	/* Get the BIR for MSI-X PBA */
4503 	breg = pci_msix_bir_index[state->hs_msix_pba_offset &
4504 	    PCI_MSIX_PBA_BIR_MASK];
4505 	ASSERT(breg != 0xFF);
4506 
4507 	/* Set the MSI-X PBA offset */
4508 	state->hs_msix_pba_offset = state->hs_msix_pba_offset &
4509 	    ~PCI_MSIX_PBA_BIR_MASK;
4510 
4511 	/* Set the MSI-X PBA size */
4512 	state->hs_msix_pba_size =
4513 	    ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8;
4514 
4515 	/* Check the register number for MSI-X PBA */
4516 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4517 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4518 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4519 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4520 
4521 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4522 		    (addr_space == PCI_ADDR_MEM64))) {
4523 			rnumber = i;
4524 			break;
4525 		}
4526 	}
4527 	ASSERT(rnumber != 0);
4528 	state->hs_msix_pba_rnumber = rnumber;
4529 
4530 	/* Map in the MSI-X Pending Bit Array */
4531 	if (hermon_regs_map_setup(state, state->hs_msix_pba_rnumber,
4532 	    (caddr_t *)&state->hs_msix_pba_addr, state->hs_msix_pba_offset,
4533 	    state->hs_msix_pba_size, &dev_attr,
4534 	    &state->hs_fm_msix_pbahdl) != DDI_SUCCESS) {
4535 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
4536 		state->hs_fm_msix_tblhdl = NULL;
4537 		return;
4538 	}
4539 
4540 	/* Set the MSI-X table save area */
4541 	state->hs_msix_tbl_entries = kmem_alloc(state->hs_msix_tbl_size,
4542 	    KM_SLEEP);
4543 
4544 	/* Set the MSI-X PBA save area */
4545 	state->hs_msix_pba_entries = kmem_alloc(state->hs_msix_pba_size,
4546 	    KM_SLEEP);
4547 }
4548 
4549 /* Disable Hermon interrupts */
4550 static int
4551 hermon_intr_disable(hermon_state_t *state)
4552 {
4553 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4554 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4555 	ddi_acc_handle_t msix_tblhdl = hermon_get_msix_tblhdl(state);
4556 	int i, j;
4557 	ASSERT(pci_cfg_hdl != NULL && msix_tblhdl != NULL);
4558 	ASSERT(state->hs_intr_types_avail &
4559 	    (DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX));
4560 
4561 	/*
4562 	 * Check if MSI-X interrupts are used. If so, disable MSI-X interupts.
4563 	 * If not, since Hermon doesn't support MSI interrupts, assuming the
4564 	 * legacy interrupt is used instead, disable the legacy interrupt.
4565 	 */
4566 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4567 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4568 
4569 		if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4570 		    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4571 			if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL,
4572 			    caps_ctrl, PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4573 				return (DDI_FAILURE);
4574 		}
4575 		ASSERT(msix_ctrl != 0);
4576 
4577 		if (!(msix_ctrl & PCI_MSIX_ENABLE_BIT))
4578 			return (DDI_SUCCESS);
4579 
4580 		/* Clear all inums in MSI-X table */
4581 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4582 		    i += PCI_MSIX_VECTOR_SIZE) {
4583 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4584 				char *addr = state->hs_msix_tbl_addr + i + j;
4585 				ddi_put32(msix_tblhdl,
4586 				    (uint32_t *)(uintptr_t)addr, 0x0);
4587 			}
4588 		}
4589 
4590 		/* Disable MSI-X interrupts */
4591 		msix_ctrl &= ~PCI_MSIX_ENABLE_BIT;
4592 		PCI_CAP_PUT16(pci_cfg_hdl, NULL, caps_ctrl, PCI_MSIX_CTRL,
4593 		    msix_ctrl);
4594 
4595 	} else {
4596 		uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
4597 		ASSERT(state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED);
4598 
4599 		/* Disable the legacy interrupts */
4600 		cmdreg |= PCI_COMM_INTX_DISABLE;
4601 		pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
4602 	}
4603 
4604 	return (DDI_SUCCESS);
4605 }
4606 
4607 /* Hermon quiesce(9F) entry */
4608 static int
4609 hermon_quiesce(dev_info_t *dip)
4610 {
4611 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4612 	    DEVI(dip)->devi_instance);
4613 	ddi_acc_handle_t pcihdl = hermon_get_pcihdl(state);
4614 	ddi_acc_handle_t cmdhdl = hermon_get_cmdhdl(state);
4615 	ddi_acc_handle_t msix_tbl_hdl = hermon_get_msix_tblhdl(state);
4616 	ddi_acc_handle_t msix_pba_hdl = hermon_get_msix_pbahdl(state);
4617 	uint32_t sem, reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
4618 	uint64_t data64;
4619 	uint32_t data32;
4620 	int status, i, j, loopcnt;
4621 	uint_t offset;
4622 
4623 	ASSERT(state != NULL);
4624 
4625 	/* start fastreboot */
4626 	state->hs_quiescing = B_TRUE;
4627 
4628 	/* suppress Hermon FM ereports */
4629 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
4630 		hermon_clr_state_nolock(state, HCA_EREPORT_FM);
4631 	}
4632 
4633 	/* Shutdown HCA ports */
4634 	if (hermon_hca_ports_shutdown(state,
4635 	    state->hs_cfg_profile->cp_num_ports) != HERMON_CMD_SUCCESS) {
4636 		state->hs_quiescing = B_FALSE;
4637 		return (DDI_FAILURE);
4638 	}
4639 
4640 	/* Close HCA */
4641 	if (hermon_close_hca_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN) !=
4642 	    HERMON_CMD_SUCCESS) {
4643 		state->hs_quiescing = B_FALSE;
4644 		return (DDI_FAILURE);
4645 	}
4646 
4647 	/* Disable interrupts */
4648 	if (hermon_intr_disable(state) != DDI_SUCCESS) {
4649 		state->hs_quiescing = B_FALSE;
4650 		return (DDI_FAILURE);
4651 	}
4652 
4653 	/*
4654 	 * Query the PCI capabilities of the HCA device, but don't process
4655 	 * the VPD until after reset.
4656 	 */
4657 	if (hermon_pci_capability_list(state, pcihdl) != DDI_SUCCESS) {
4658 		state->hs_quiescing = B_FALSE;
4659 		return (DDI_FAILURE);
4660 	}
4661 
4662 	/*
4663 	 * Read all PCI config info (reg0...reg63).  Note: According to the
4664 	 * Hermon software reset application note, we should not read or
4665 	 * restore the values in reg22 and reg23.
4666 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
4667 	 * register LAST, and technically, you need to restore the
4668 	 * PCIE Capability "device control" and "link control" (word-sized,
4669 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
4670 	 * We hold off restoring the command register - offset 0x4 - till last
4671 	 */
4672 
4673 	/* 1st, wait for the semaphore assure accessibility - per PRM */
4674 	status = -1;
4675 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
4676 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
4677 		if (sem == 0) {
4678 			status = 0;
4679 			break;
4680 		}
4681 		drv_usecwait(1);
4682 	}
4683 
4684 	/* Check if timeout happens */
4685 	if (status == -1) {
4686 		state->hs_quiescing = B_FALSE;
4687 		return (DDI_FAILURE);
4688 	}
4689 
4690 	/* MSI-X interrupts are used, save the MSI-X table */
4691 	if (msix_tbl_hdl && msix_pba_hdl) {
4692 		/* save MSI-X table */
4693 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4694 		    i += PCI_MSIX_VECTOR_SIZE) {
4695 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4696 				char *addr = state->hs_msix_tbl_addr + i + j;
4697 				data32 = ddi_get32(msix_tbl_hdl,
4698 				    (uint32_t *)(uintptr_t)addr);
4699 				*(uint32_t *)(uintptr_t)(state->
4700 				    hs_msix_tbl_entries + i + j) = data32;
4701 			}
4702 		}
4703 		/* save MSI-X PBA */
4704 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4705 			char *addr = state->hs_msix_pba_addr + i;
4706 			data64 = ddi_get64(msix_pba_hdl,
4707 			    (uint64_t *)(uintptr_t)addr);
4708 			*(uint64_t *)(uintptr_t)(state->
4709 			    hs_msix_pba_entries + i) = data64;
4710 		}
4711 	}
4712 
4713 	/* save PCI config space */
4714 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4715 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4716 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4717 			state->hs_cfg_data[i]  =
4718 			    pci_config_get32(pcihdl, i << 2);
4719 		}
4720 	}
4721 
4722 	/* SW-reset HCA */
4723 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
4724 
4725 	/*
4726 	 * This delay is required so as not to cause a panic here. If the
4727 	 * device is accessed too soon after reset it will not respond to
4728 	 * config cycles, causing a Master Abort and panic.
4729 	 */
4730 	drv_usecwait(reset_delay);
4731 
4732 	/* Poll waiting for the device to finish resetting */
4733 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
4734 	while ((pci_config_get32(pcihdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
4735 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
4736 		if (--loopcnt == 0)
4737 			break;	/* just in case, break and go on */
4738 	}
4739 	if (loopcnt == 0) {
4740 		state->hs_quiescing = B_FALSE;
4741 		return (DDI_FAILURE);
4742 	}
4743 
4744 	/* Restore the config info */
4745 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4746 		if (i == 1) continue;	/* skip the status/ctrl reg */
4747 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4748 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4749 			pci_config_put32(pcihdl, i << 2, state->hs_cfg_data[i]);
4750 		}
4751 	}
4752 
4753 	/* If MSI-X interrupts are used, restore the MSI-X table */
4754 	if (msix_tbl_hdl && msix_pba_hdl) {
4755 		/* restore MSI-X PBA */
4756 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4757 			char *addr = state->hs_msix_pba_addr + i;
4758 			data64 = *(uint64_t *)(uintptr_t)
4759 			    (state->hs_msix_pba_entries + i);
4760 			ddi_put64(msix_pba_hdl,
4761 			    (uint64_t *)(uintptr_t)addr, data64);
4762 		}
4763 		/* restore MSI-X table */
4764 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4765 		    i += PCI_MSIX_VECTOR_SIZE) {
4766 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4767 				char *addr = state->hs_msix_tbl_addr + i + j;
4768 				data32 = *(uint32_t *)(uintptr_t)
4769 				    (state->hs_msix_tbl_entries + i + j);
4770 				ddi_put32(msix_tbl_hdl,
4771 				    (uint32_t *)(uintptr_t)addr, data32);
4772 			}
4773 		}
4774 	}
4775 
4776 	/*
4777 	 * PCI Express Capability - we saved during capability list, and
4778 	 * we'll restore them here.
4779 	 */
4780 	offset = state->hs_pci_cap_offset;
4781 	data32 = state->hs_pci_cap_devctl;
4782 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
4783 	data32 = state->hs_pci_cap_lnkctl;
4784 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
4785 
4786 	/* restore the command register */
4787 	pci_config_put32(pcihdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
4788 
4789 	return (DDI_SUCCESS);
4790 }
4791