xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon.c (revision c050a449559fc53eab7574bb8f3d5f4bd85211f3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * hermon.c
28  *    Hermon (InfiniBand) HCA Driver attach/detach Routines
29  *
30  *    Implements all the routines necessary for the attach, setup,
31  *    initialization (and subsequent possible teardown and detach) of the
32  *    Hermon InfiniBand HCA driver.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h>
43 #include <sys/pci.h>
44 #include <sys/pci_cap.h>
45 #include <sys/bitmap.h>
46 #include <sys/policy.h>
47 
48 #include <sys/ib/adapters/hermon/hermon.h>
49 
50 /* The following works around a problem in pre-2_7_000 firmware. */
51 #define	HERMON_FW_WORKAROUND
52 
53 int hermon_verbose = 0;
54 
55 /* Hermon HCA State Pointer */
56 void *hermon_statep;
57 
58 int debug_vpd = 0;
59 
60 /* Disable the internal error-check polling thread */
61 int hermon_no_inter_err_chk = 0;
62 
63 /*
64  * The Hermon "userland resource database" is common to instances of the
65  * Hermon HCA driver.  This structure "hermon_userland_rsrc_db" contains all
66  * the necessary information to maintain it.
67  */
68 hermon_umap_db_t hermon_userland_rsrc_db;
69 
70 static int hermon_attach(dev_info_t *, ddi_attach_cmd_t);
71 static int hermon_detach(dev_info_t *, ddi_detach_cmd_t);
72 static int hermon_open(dev_t *, int, int, cred_t *);
73 static int hermon_close(dev_t, int, int, cred_t *);
74 static int hermon_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
75 
76 static int hermon_drv_init(hermon_state_t *state, dev_info_t *dip,
77     int instance);
78 static void hermon_drv_fini(hermon_state_t *state);
79 static void hermon_drv_fini2(hermon_state_t *state);
80 static int hermon_isr_init(hermon_state_t *state);
81 static void hermon_isr_fini(hermon_state_t *state);
82 
83 static int hermon_hw_init(hermon_state_t *state);
84 
85 static void hermon_hw_fini(hermon_state_t *state,
86     hermon_drv_cleanup_level_t cleanup);
87 static int hermon_soft_state_init(hermon_state_t *state);
88 static void hermon_soft_state_fini(hermon_state_t *state);
89 static int hermon_icm_config_setup(hermon_state_t *state,
90     hermon_hw_initqueryhca_t *inithca);
91 static void hermon_icm_tables_init(hermon_state_t *state);
92 static void hermon_icm_tables_fini(hermon_state_t *state);
93 static int hermon_icm_dma_init(hermon_state_t *state);
94 static void hermon_icm_dma_fini(hermon_state_t *state);
95 static void hermon_inithca_set(hermon_state_t *state,
96     hermon_hw_initqueryhca_t *inithca);
97 static int hermon_hca_port_init(hermon_state_t *state);
98 static int hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init);
99 static int hermon_internal_uarpg_init(hermon_state_t *state);
100 static void hermon_internal_uarpg_fini(hermon_state_t *state);
101 static int hermon_special_qp_contexts_reserve(hermon_state_t *state);
102 static void hermon_special_qp_contexts_unreserve(hermon_state_t *state);
103 static int hermon_sw_reset(hermon_state_t *state);
104 static int hermon_mcg_init(hermon_state_t *state);
105 static void hermon_mcg_fini(hermon_state_t *state);
106 static int hermon_fw_version_check(hermon_state_t *state);
107 static void hermon_device_info_report(hermon_state_t *state);
108 static int hermon_pci_capability_list(hermon_state_t *state,
109     ddi_acc_handle_t hdl);
110 static void hermon_pci_capability_vpd(hermon_state_t *state,
111     ddi_acc_handle_t hdl, uint_t offset);
112 static int hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
113     uint32_t addr, uint32_t *data);
114 static int hermon_intr_or_msi_init(hermon_state_t *state);
115 static int hermon_add_intrs(hermon_state_t *state, int intr_type);
116 static int hermon_intr_or_msi_fini(hermon_state_t *state);
117 void hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
118     uint_t offset);
119 
120 static uint64_t hermon_size_icm(hermon_state_t *state);
121 
122 /* X86 fastreboot support */
123 static ushort_t get_msix_ctrl(dev_info_t *);
124 static size_t get_msix_tbl_size(dev_info_t *);
125 static size_t get_msix_pba_size(dev_info_t *);
126 static void hermon_set_msix_info(hermon_state_t *);
127 static int hermon_intr_disable(hermon_state_t *);
128 static int hermon_quiesce(dev_info_t *);
129 
130 
131 /* Character/Block Operations */
132 static struct cb_ops hermon_cb_ops = {
133 	hermon_open,		/* open */
134 	hermon_close,		/* close */
135 	nodev,			/* strategy (block) */
136 	nodev,			/* print (block) */
137 	nodev,			/* dump (block) */
138 	nodev,			/* read */
139 	nodev,			/* write */
140 	hermon_ioctl,		/* ioctl */
141 	hermon_devmap,		/* devmap */
142 	NULL,			/* mmap */
143 	nodev,			/* segmap */
144 	nochpoll,		/* chpoll */
145 	ddi_prop_op,		/* prop_op */
146 	NULL,			/* streams */
147 	D_NEW | D_MP |
148 	D_64BIT | D_HOTPLUG |
149 	D_DEVMAP,		/* flags */
150 	CB_REV			/* rev */
151 };
152 
153 /* Driver Operations */
154 static struct dev_ops hermon_ops = {
155 	DEVO_REV,		/* struct rev */
156 	0,			/* refcnt */
157 	hermon_getinfo,		/* getinfo */
158 	nulldev,		/* identify */
159 	nulldev,		/* probe */
160 	hermon_attach,		/* attach */
161 	hermon_detach,		/* detach */
162 	nodev,			/* reset */
163 	&hermon_cb_ops,		/* cb_ops */
164 	NULL,			/* bus_ops */
165 	nodev,			/* power */
166 	hermon_quiesce,		/* devo_quiesce */
167 };
168 
169 /* Module Driver Info */
170 static struct modldrv hermon_modldrv = {
171 	&mod_driverops,
172 	"ConnectX IB Driver",
173 	&hermon_ops
174 };
175 
176 /* Module Linkage */
177 static struct modlinkage hermon_modlinkage = {
178 	MODREV_1,
179 	&hermon_modldrv,
180 	NULL
181 };
182 
183 /*
184  * This extern refers to the ibc_operations_t function vector that is defined
185  * in the hermon_ci.c file.
186  */
187 extern ibc_operations_t	hermon_ibc_ops;
188 
189 /*
190  * _init()
191  */
192 int
193 _init()
194 {
195 	int	status;
196 
197 	status = ddi_soft_state_init(&hermon_statep, sizeof (hermon_state_t),
198 	    (size_t)HERMON_INITIAL_STATES);
199 	if (status != 0) {
200 		return (status);
201 	}
202 
203 	status = ibc_init(&hermon_modlinkage);
204 	if (status != 0) {
205 		ddi_soft_state_fini(&hermon_statep);
206 		return (status);
207 	}
208 
209 	status = mod_install(&hermon_modlinkage);
210 	if (status != 0) {
211 		ibc_fini(&hermon_modlinkage);
212 		ddi_soft_state_fini(&hermon_statep);
213 		return (status);
214 	}
215 
216 	/* Initialize the Hermon "userland resources database" */
217 	hermon_umap_db_init();
218 
219 	return (status);
220 }
221 
222 
223 /*
224  * _info()
225  */
226 int
227 _info(struct modinfo *modinfop)
228 {
229 	int	status;
230 
231 	status = mod_info(&hermon_modlinkage, modinfop);
232 	return (status);
233 }
234 
235 
236 /*
237  * _fini()
238  */
239 int
240 _fini()
241 {
242 	int	status;
243 
244 	status = mod_remove(&hermon_modlinkage);
245 	if (status != 0) {
246 		return (status);
247 	}
248 
249 	/* Destroy the Hermon "userland resources database" */
250 	hermon_umap_db_fini();
251 
252 	ibc_fini(&hermon_modlinkage);
253 	ddi_soft_state_fini(&hermon_statep);
254 
255 	return (status);
256 }
257 
258 
259 /*
260  * hermon_getinfo()
261  */
262 /* ARGSUSED */
263 static int
264 hermon_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
265 {
266 	dev_t		dev;
267 	hermon_state_t 	*state;
268 	minor_t		instance;
269 
270 	switch (cmd) {
271 	case DDI_INFO_DEVT2DEVINFO:
272 		dev = (dev_t)arg;
273 		instance = HERMON_DEV_INSTANCE(dev);
274 		state = ddi_get_soft_state(hermon_statep, instance);
275 		if (state == NULL) {
276 			return (DDI_FAILURE);
277 		}
278 		*result = (void *)state->hs_dip;
279 		return (DDI_SUCCESS);
280 
281 	case DDI_INFO_DEVT2INSTANCE:
282 		dev = (dev_t)arg;
283 		instance = HERMON_DEV_INSTANCE(dev);
284 		*result = (void *)(uintptr_t)instance;
285 		return (DDI_SUCCESS);
286 
287 	default:
288 		break;
289 	}
290 
291 	return (DDI_FAILURE);
292 }
293 
294 
295 /*
296  * hermon_open()
297  */
298 /* ARGSUSED */
299 static int
300 hermon_open(dev_t *devp, int flag, int otyp, cred_t *credp)
301 {
302 	hermon_state_t		*state;
303 	hermon_rsrc_t 		*rsrcp;
304 	hermon_umap_db_entry_t	*umapdb, *umapdb2;
305 	minor_t			instance;
306 	uint64_t		key, value;
307 	uint_t			hr_indx;
308 	dev_t			dev;
309 	int			status;
310 
311 	instance = HERMON_DEV_INSTANCE(*devp);
312 	state = ddi_get_soft_state(hermon_statep, instance);
313 	if (state == NULL) {
314 		return (ENXIO);
315 	}
316 
317 	/*
318 	 * Only allow driver to be opened for character access, and verify
319 	 * whether exclusive access is allowed.
320 	 */
321 	if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
322 	    secpolicy_excl_open(credp) != 0)) {
323 		return (EINVAL);
324 	}
325 
326 	/*
327 	 * Search for the current process PID in the "userland resources
328 	 * database".  If it is not found, then attempt to allocate a UAR
329 	 * page and add the ("key", "value") pair to the database.
330 	 * Note:  As a last step we always return a devp appropriate for
331 	 * the open.  Either we return a new minor number (based on the
332 	 * instance and the UAR page index) or we return the current minor
333 	 * number for the given client process.
334 	 *
335 	 * We also add an entry to the database to allow for lookup from
336 	 * "dev_t" to the current process PID.  This is necessary because,
337 	 * under certain circumstance, the process PID that calls the Hermon
338 	 * close() entry point may not be the same as the one who called
339 	 * open().  Specifically, this can happen if a child process calls
340 	 * the Hermon's open() entry point, gets a UAR page, maps it out (using
341 	 * mmap()), and then exits without calling munmap().  Because mmap()
342 	 * adds a reference to the file descriptor, at the exit of the child
343 	 * process the file descriptor is "inherited" by the parent (and will
344 	 * be close()'d by the parent's PID only when it exits).
345 	 *
346 	 * Note: We use the hermon_umap_db_find_nolock() and
347 	 * hermon_umap_db_add_nolock() database access routines below (with
348 	 * an explicit mutex_enter of the database lock - "hdl_umapdb_lock")
349 	 * to ensure that the multiple accesses (in this case searching for,
350 	 * and then adding _two_ database entries) can be done atomically.
351 	 */
352 	key = ddi_get_pid();
353 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
354 	status = hermon_umap_db_find_nolock(instance, key,
355 	    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
356 	if (status != DDI_SUCCESS) {
357 		/*
358 		 * If we are in 'maintenance mode', we cannot alloc a UAR page.
359 		 * But we still need some rsrcp value, and a mostly unique
360 		 * hr_indx value.  So we set rsrcp to NULL for maintenance
361 		 * mode, and use a rolling count for hr_indx.  The field
362 		 * 'hs_open_hr_indx' is used only in this maintenance mode
363 		 * condition.
364 		 *
365 		 * Otherwise, if we are in operational mode then we allocate
366 		 * the UAR page as normal, and use the rsrcp value and tr_indx
367 		 * value from that allocation.
368 		 */
369 		if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
370 			rsrcp = NULL;
371 			hr_indx = state->hs_open_ar_indx++;
372 		} else {
373 			/* Allocate a new UAR page for this process */
374 			status = hermon_rsrc_alloc(state, HERMON_UARPG, 1,
375 			    HERMON_NOSLEEP, &rsrcp);
376 			if (status != DDI_SUCCESS) {
377 				mutex_exit(
378 				    &hermon_userland_rsrc_db.hdl_umapdb_lock);
379 				return (EAGAIN);
380 			}
381 
382 			hr_indx = rsrcp->hr_indx;
383 		}
384 
385 		/*
386 		 * Allocate an entry to track the UAR page resource in the
387 		 * "userland resources database".
388 		 */
389 		umapdb = hermon_umap_db_alloc(instance, key,
390 		    MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
391 		if (umapdb == NULL) {
392 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
393 			/* If in "maintenance mode", don't free the rsrc */
394 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
395 				hermon_rsrc_free(state, &rsrcp);
396 			}
397 			return (EAGAIN);
398 		}
399 
400 		/*
401 		 * Create a new device number.  Minor number is a function of
402 		 * the UAR page index (15 bits) and the device instance number
403 		 * (3 bits).
404 		 */
405 		dev = makedevice(getmajor(*devp), (hr_indx <<
406 		    HERMON_MINORNUM_SHIFT) | instance);
407 
408 		/*
409 		 * Allocate another entry in the "userland resources database"
410 		 * to track the association of the device number (above) to
411 		 * the current process ID (in "key").
412 		 */
413 		umapdb2 = hermon_umap_db_alloc(instance, dev,
414 		    MLNX_UMAP_PID_RSRC, (uint64_t)key);
415 		if (umapdb2 == NULL) {
416 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
417 			hermon_umap_db_free(umapdb);
418 			/* If in "maintenance mode", don't free the rsrc */
419 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
420 				hermon_rsrc_free(state, &rsrcp);
421 			}
422 			return (EAGAIN);
423 		}
424 
425 		/* Add the entries to the database */
426 		hermon_umap_db_add_nolock(umapdb);
427 		hermon_umap_db_add_nolock(umapdb2);
428 
429 	} else {
430 		/*
431 		 * Return the same device number as on the original open()
432 		 * call.  This was calculated as a function of the UAR page
433 		 * index (top 16 bits) and the device instance number
434 		 */
435 		rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
436 		dev = makedevice(getmajor(*devp), (rsrcp->hr_indx <<
437 		    HERMON_MINORNUM_SHIFT) | instance);
438 	}
439 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
440 
441 	*devp = dev;
442 
443 	return (0);
444 }
445 
446 
447 /*
448  * hermon_close()
449  */
450 /* ARGSUSED */
451 static int
452 hermon_close(dev_t dev, int flag, int otyp, cred_t *credp)
453 {
454 	hermon_state_t		*state;
455 	hermon_rsrc_t		*rsrcp;
456 	hermon_umap_db_entry_t	*umapdb;
457 	hermon_umap_db_priv_t	*priv;
458 	minor_t			instance;
459 	uint64_t		key, value;
460 	int			status, reset_status = 0;
461 
462 	instance = HERMON_DEV_INSTANCE(dev);
463 	state = ddi_get_soft_state(hermon_statep, instance);
464 	if (state == NULL) {
465 		return (ENXIO);
466 	}
467 
468 	/*
469 	 * Search for "dev_t" in the "userland resources database".  As
470 	 * explained above in hermon_open(), we can't depend on using the
471 	 * current process ID here to do the lookup because the process
472 	 * that ultimately closes may not be the same one who opened
473 	 * (because of inheritance).
474 	 * So we lookup the "dev_t" (which points to the PID of the process
475 	 * that opened), and we remove the entry from the database (and free
476 	 * it up).  Then we do another query based on the PID value.  And when
477 	 * we find that database entry, we free it up too and then free the
478 	 * Hermon UAR page resource.
479 	 *
480 	 * Note: We use the hermon_umap_db_find_nolock() database access
481 	 * routine below (with an explicit mutex_enter of the database lock)
482 	 * to ensure that the multiple accesses (which attempt to remove the
483 	 * two database entries) can be done atomically.
484 	 *
485 	 * This works the same in both maintenance mode and HCA mode, except
486 	 * for the call to hermon_rsrc_free().  In the case of maintenance mode,
487 	 * this call is not needed, as it was not allocated in hermon_open()
488 	 * above.
489 	 */
490 	key = dev;
491 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
492 	status = hermon_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
493 	    &value, HERMON_UMAP_DB_REMOVE, &umapdb);
494 	if (status == DDI_SUCCESS) {
495 		/*
496 		 * If the "hdb_priv" field is non-NULL, it indicates that
497 		 * some "on close" handling is still necessary.  Call
498 		 * hermon_umap_db_handle_onclose_cb() to do the handling (i.e.
499 		 * to invoke all the registered callbacks).  Then free up
500 		 * the resources associated with "hdb_priv" and continue
501 		 * closing.
502 		 */
503 		priv = (hermon_umap_db_priv_t *)umapdb->hdbe_common.hdb_priv;
504 		if (priv != NULL) {
505 			reset_status = hermon_umap_db_handle_onclose_cb(priv);
506 			kmem_free(priv, sizeof (hermon_umap_db_priv_t));
507 			umapdb->hdbe_common.hdb_priv = (void *)NULL;
508 		}
509 
510 		hermon_umap_db_free(umapdb);
511 
512 		/*
513 		 * Now do another lookup using PID as the key (copy it from
514 		 * "value").  When this lookup is complete, the "value" field
515 		 * will contain the hermon_rsrc_t pointer for the UAR page
516 		 * resource.
517 		 */
518 		key = value;
519 		status = hermon_umap_db_find_nolock(instance, key,
520 		    MLNX_UMAP_UARPG_RSRC, &value, HERMON_UMAP_DB_REMOVE,
521 		    &umapdb);
522 		if (status == DDI_SUCCESS) {
523 			hermon_umap_db_free(umapdb);
524 			/* If in "maintenance mode", don't free the rsrc */
525 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
526 				rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
527 				hermon_rsrc_free(state, &rsrcp);
528 			}
529 		}
530 	}
531 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
532 	return (reset_status);
533 }
534 
535 
536 /*
537  * hermon_attach()
538  *    Context: Only called from attach() path context
539  */
540 static int
541 hermon_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
542 {
543 	hermon_state_t	*state;
544 	ibc_clnt_hdl_t	tmp_ibtfpriv;
545 	ibc_status_t	ibc_status;
546 	int		instance;
547 	int		status;
548 
549 #ifdef __lock_lint
550 	(void) hermon_quiesce(dip);
551 #endif
552 
553 	switch (cmd) {
554 	case DDI_ATTACH:
555 		instance = ddi_get_instance(dip);
556 		status = ddi_soft_state_zalloc(hermon_statep, instance);
557 		if (status != DDI_SUCCESS) {
558 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
559 			    "attach_ssz_fail", instance);
560 			goto fail_attach_nomsg;
561 
562 		}
563 		state = ddi_get_soft_state(hermon_statep, instance);
564 		if (state == NULL) {
565 			ddi_soft_state_free(hermon_statep, instance);
566 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
567 			    "attach_gss_fail", instance);
568 			goto fail_attach_nomsg;
569 		}
570 
571 		/* clear the attach error buffer */
572 		HERMON_ATTACH_MSG_INIT(state->hs_attach_buf);
573 
574 		/* Save away devinfo and instance before hermon_fm_init() */
575 		state->hs_dip = dip;
576 		state->hs_instance = instance;
577 
578 		hermon_fm_init(state);
579 
580 		/*
581 		 * Initialize Hermon driver and hardware.
582 		 *
583 		 * Note: If this initialization fails we may still wish to
584 		 * create a device node and remain operational so that Hermon
585 		 * firmware can be updated/flashed (i.e. "maintenance mode").
586 		 * If this is the case, then "hs_operational_mode" will be
587 		 * equal to HERMON_MAINTENANCE_MODE.  We will not attempt to
588 		 * attach to the IBTF or register with the IBMF (i.e. no
589 		 * InfiniBand interfaces will be enabled).
590 		 */
591 		status = hermon_drv_init(state, dip, instance);
592 		if ((status != DDI_SUCCESS) &&
593 		    (HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
594 			goto fail_attach;
595 		}
596 
597 		/*
598 		 * Change the Hermon FM mode
599 		 */
600 		if ((hermon_get_state(state) & HCA_PIO_FM) &&
601 		    HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
602 			/*
603 			 * Now we wait for 50ms to give an opportunity
604 			 * to Solaris FMA so that HW errors can be notified.
605 			 * Then check if there are HW errors or not. If
606 			 * a HW error is detected, the Hermon attachment
607 			 * must be failed.
608 			 */
609 			delay(drv_usectohz(50000));
610 			if (hermon_init_failure(state)) {
611 				hermon_drv_fini(state);
612 				HERMON_WARNING(state, "unable to "
613 				    "attach Hermon due to a HW error");
614 				HERMON_ATTACH_MSG(state->hs_attach_buf,
615 				    "hermon_attach_failure");
616 				goto fail_attach;
617 			}
618 
619 			/*
620 			 * There seems no HW errors during the attachment,
621 			 * so let's change the Hermon FM state to the
622 			 * ereport only mode.
623 			 */
624 			if (hermon_fm_ereport_init(state) != DDI_SUCCESS) {
625 				/* unwind the resources */
626 				hermon_drv_fini(state);
627 				HERMON_ATTACH_MSG(state->hs_attach_buf,
628 				    "hermon_attach_failure");
629 				goto fail_attach;
630 			}
631 		}
632 
633 		/* Create the minor node for device */
634 		status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
635 		    DDI_PSEUDO, 0);
636 		if (status != DDI_SUCCESS) {
637 			hermon_drv_fini(state);
638 			HERMON_ATTACH_MSG(state->hs_attach_buf,
639 			    "attach_create_mn_fail");
640 			goto fail_attach;
641 		}
642 
643 		/*
644 		 * If we are in "maintenance mode", then we don't want to
645 		 * register with the IBTF.  All InfiniBand interfaces are
646 		 * uninitialized, and the device is only capable of handling
647 		 * requests to update/flash firmware (or test/debug requests).
648 		 */
649 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
650 			cmn_err(CE_NOTE, "!Hermon is operational\n");
651 
652 			/* Attach to InfiniBand Transport Framework (IBTF) */
653 			ibc_status = ibc_attach(&tmp_ibtfpriv,
654 			    &state->hs_ibtfinfo);
655 			if (ibc_status != IBC_SUCCESS) {
656 				cmn_err(CE_CONT, "hermon_attach: ibc_attach "
657 				    "failed\n");
658 				ddi_remove_minor_node(dip, "devctl");
659 				hermon_drv_fini(state);
660 				HERMON_ATTACH_MSG(state->hs_attach_buf,
661 				    "attach_ibcattach_fail");
662 				goto fail_attach;
663 			}
664 
665 			/*
666 			 * Now that we've successfully attached to the IBTF,
667 			 * we enable all appropriate asynch and CQ events to
668 			 * be forwarded to the IBTF.
669 			 */
670 			HERMON_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);
671 
672 			ibc_post_attach(state->hs_ibtfpriv);
673 
674 			/* Register agents with IB Mgmt Framework (IBMF) */
675 			status = hermon_agent_handlers_init(state);
676 			if (status != DDI_SUCCESS) {
677 				(void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
678 				HERMON_QUIESCE_IBTF_CALLB(state);
679 				if (state->hs_in_evcallb != 0) {
680 					HERMON_WARNING(state, "unable to "
681 					    "quiesce Hermon IBTF callbacks");
682 				}
683 				ibc_detach(tmp_ibtfpriv);
684 				ddi_remove_minor_node(dip, "devctl");
685 				hermon_drv_fini(state);
686 				HERMON_ATTACH_MSG(state->hs_attach_buf,
687 				    "attach_agentinit_fail");
688 				goto fail_attach;
689 			}
690 		}
691 
692 		/* Report attach in maintenance mode, if appropriate */
693 		if (!(HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
694 			cmn_err(CE_NOTE, "hermon%d: driver attached "
695 			    "(for maintenance mode only)", state->hs_instance);
696 			hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_DEGRADED);
697 		}
698 
699 		/* Report that driver was loaded */
700 		ddi_report_dev(dip);
701 
702 		/* Send device information to log file */
703 		hermon_device_info_report(state);
704 
705 		/* DEBUG PRINT */
706 		cmn_err(CE_CONT, "!Hermon attach complete\n");
707 		return (DDI_SUCCESS);
708 
709 	case DDI_RESUME:
710 		/* Add code here for DDI_RESUME XXX */
711 		return (DDI_FAILURE);
712 
713 	default:
714 		cmn_err(CE_WARN, "hermon_attach: unknown cmd (0x%x)\n", cmd);
715 		break;
716 	}
717 
718 fail_attach:
719 	cmn_err(CE_NOTE, "hermon%d: driver failed to attach: %s", instance,
720 	    state->hs_attach_buf);
721 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
722 		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
723 	}
724 	hermon_drv_fini2(state);
725 	hermon_fm_fini(state);
726 	ddi_soft_state_free(hermon_statep, instance);
727 
728 fail_attach_nomsg:
729 	return (DDI_FAILURE);
730 }
731 
732 
733 /*
734  * hermon_detach()
735  *    Context: Only called from detach() path context
736  */
737 static int
738 hermon_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
739 {
740 	hermon_state_t	*state;
741 	ibc_clnt_hdl_t	tmp_ibtfpriv;
742 	ibc_status_t	ibc_status;
743 	int		instance, status;
744 
745 	instance = ddi_get_instance(dip);
746 	state = ddi_get_soft_state(hermon_statep, instance);
747 	if (state == NULL) {
748 		return (DDI_FAILURE);
749 	}
750 
751 	switch (cmd) {
752 	case DDI_DETACH:
753 		/*
754 		 * If we are in "maintenance mode", then we do not want to
755 		 * do teardown for any of the InfiniBand interfaces.
756 		 * Specifically, this means not detaching from IBTF (we never
757 		 * attached to begin with) and not deregistering from IBMF.
758 		 */
759 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
760 			/* Unregister agents from IB Mgmt Framework (IBMF) */
761 			status = hermon_agent_handlers_fini(state);
762 			if (status != DDI_SUCCESS) {
763 				return (DDI_FAILURE);
764 			}
765 
766 			/*
767 			 * Attempt the "pre-detach" from InfiniBand Transport
768 			 * Framework (IBTF).  At this point the IBTF is still
769 			 * capable of handling incoming asynch and completion
770 			 * events.  This "pre-detach" is primarily a mechanism
771 			 * to notify the appropriate IBTF clients that the
772 			 * HCA is being removed/offlined.
773 			 */
774 			ibc_status = ibc_pre_detach(state->hs_ibtfpriv, cmd);
775 			if (ibc_status != IBC_SUCCESS) {
776 				status = hermon_agent_handlers_init(state);
777 				if (status != DDI_SUCCESS) {
778 					HERMON_WARNING(state, "failed to "
779 					    "restart Hermon agents");
780 				}
781 				return (DDI_FAILURE);
782 			}
783 
784 			/*
785 			 * Before we can fully detach from the IBTF we need to
786 			 * ensure that we have handled all outstanding event
787 			 * callbacks.  This is accomplished by quiescing the
788 			 * event callback mechanism.  Note: if we are unable
789 			 * to successfully quiesce the callbacks, then this is
790 			 * an indication that something has probably gone
791 			 * seriously wrong.  We print out a warning, but
792 			 * continue.
793 			 */
794 			tmp_ibtfpriv = state->hs_ibtfpriv;
795 			HERMON_QUIESCE_IBTF_CALLB(state);
796 			if (state->hs_in_evcallb != 0) {
797 				HERMON_WARNING(state, "unable to quiesce "
798 				    "Hermon IBTF callbacks");
799 			}
800 
801 			/* Complete the detach from the IBTF */
802 			ibc_detach(tmp_ibtfpriv);
803 		}
804 
805 		/* Remove the minor node for device */
806 		ddi_remove_minor_node(dip, "devctl");
807 
808 		/*
809 		 * Only call hermon_drv_fini() if we are in Hermon HCA mode.
810 		 * (Because if we are in "maintenance mode", then we never
811 		 * successfully finished init.)  Only report successful
812 		 * detach for normal HCA mode.
813 		 */
814 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
815 			/* Cleanup driver resources and shutdown hardware */
816 			hermon_drv_fini(state);
817 			cmn_err(CE_CONT, "!Hermon driver successfully "
818 			    "detached\n");
819 		}
820 
821 		hermon_drv_fini2(state);
822 		hermon_fm_fini(state);
823 		ddi_soft_state_free(hermon_statep, instance);
824 
825 		return (DDI_SUCCESS);
826 
827 	case DDI_SUSPEND:
828 		/* Add code here for DDI_SUSPEND XXX */
829 		return (DDI_FAILURE);
830 
831 	default:
832 		cmn_err(CE_WARN, "hermon_detach: unknown cmd (0x%x)\n", cmd);
833 		break;
834 	}
835 
836 	return (DDI_FAILURE);
837 }
838 
839 /*
840  * hermon_dma_attr_init()
841  *    Context: Can be called from interrupt or base context.
842  */
843 
844 /* ARGSUSED */
845 void
846 hermon_dma_attr_init(hermon_state_t *state, ddi_dma_attr_t *dma_attr)
847 {
848 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
849 
850 	dma_attr->dma_attr_version	= DMA_ATTR_V0;
851 	dma_attr->dma_attr_addr_lo	= 0;
852 	dma_attr->dma_attr_addr_hi	= 0xFFFFFFFFFFFFFFFFull;
853 	dma_attr->dma_attr_count_max	= 0xFFFFFFFFFFFFFFFFull;
854 	dma_attr->dma_attr_align	= HERMON_PAGESIZE;  /* default 4K */
855 	dma_attr->dma_attr_burstsizes	= 0x3FF;
856 	dma_attr->dma_attr_minxfer	= 1;
857 	dma_attr->dma_attr_maxxfer	= 0xFFFFFFFFFFFFFFFFull;
858 	dma_attr->dma_attr_seg		= 0xFFFFFFFFFFFFFFFFull;
859 	dma_attr->dma_attr_sgllen	= 0x7FFFFFFF;
860 	dma_attr->dma_attr_granular	= 1;
861 	dma_attr->dma_attr_flags	= 0;
862 }
863 
864 /*
865  * hermon_dma_alloc()
866  *    Context: Can be called from base context.
867  */
868 int
869 hermon_dma_alloc(hermon_state_t *state, hermon_dma_info_t *dma_info,
870     uint16_t opcode)
871 {
872 	ddi_dma_handle_t	dma_hdl;
873 	ddi_dma_attr_t		dma_attr;
874 	ddi_acc_handle_t	acc_hdl;
875 	ddi_dma_cookie_t	cookie;
876 	uint64_t		kaddr;
877 	uint64_t		real_len;
878 	uint_t			ccount;
879 	int			status;
880 
881 	hermon_dma_attr_init(state, &dma_attr);
882 #ifdef	__sparc
883 	if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
884 		dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
885 #endif
886 
887 	/* Allocate a DMA handle */
888 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, DDI_DMA_SLEEP,
889 	    NULL, &dma_hdl);
890 	if (status != DDI_SUCCESS) {
891 		IBTF_DPRINTF_L2("DMA", "alloc handle failed: %d", status);
892 		cmn_err(CE_CONT, "DMA alloc handle failed(status %d)", status);
893 		return (DDI_FAILURE);
894 	}
895 
896 	/* Allocate DMA memory */
897 	status = ddi_dma_mem_alloc(dma_hdl, dma_info->length,
898 	    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
899 	    (caddr_t *)&kaddr, (size_t *)&real_len, &acc_hdl);
900 	if (status != DDI_SUCCESS) {
901 		ddi_dma_free_handle(&dma_hdl);
902 		IBTF_DPRINTF_L2("DMA", "memory alloc failed: %d", status);
903 		cmn_err(CE_CONT, "DMA memory alloc failed(status %d)", status);
904 		return (DDI_FAILURE);
905 	}
906 	bzero((caddr_t)(uintptr_t)kaddr, real_len);
907 
908 	/* Bind the memory to the handle */
909 	status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
910 	    (caddr_t)(uintptr_t)kaddr, (size_t)real_len, DDI_DMA_RDWR |
911 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &ccount);
912 	if (status != DDI_SUCCESS) {
913 		ddi_dma_mem_free(&acc_hdl);
914 		ddi_dma_free_handle(&dma_hdl);
915 		IBTF_DPRINTF_L2("DMA", "bind handle failed: %d", status);
916 		cmn_err(CE_CONT, "DMA bind handle failed(status %d)", status);
917 		return (DDI_FAILURE);
918 	}
919 
920 	/* Package the hermon_dma_info contents and return */
921 	dma_info->vaddr   = kaddr;
922 	dma_info->dma_hdl = dma_hdl;
923 	dma_info->acc_hdl = acc_hdl;
924 
925 	/* Pass the mapping information to the firmware */
926 	status = hermon_map_cmd_post(state, dma_info, opcode, cookie, ccount);
927 	if (status != DDI_SUCCESS) {
928 		char *s;
929 		hermon_dma_free(dma_info);
930 		switch (opcode) {
931 		case MAP_ICM:
932 			s = "MAP_ICM";
933 			break;
934 		case MAP_FA:
935 			s = "MAP_FA";
936 			break;
937 		case MAP_ICM_AUX:
938 			s = "MAP_ICM_AUX";
939 			break;
940 		default:
941 			s = "UNKNOWN";
942 		}
943 		cmn_err(CE_NOTE, "Map cmd '%s' failed, status %08x\n",
944 		    s, status);
945 		return (DDI_FAILURE);
946 	}
947 
948 	return (DDI_SUCCESS);
949 }
950 
951 /*
952  * hermon_dma_free()
953  *    Context: Can be called from base context.
954  */
955 void
956 hermon_dma_free(hermon_dma_info_t *info)
957 {
958 	/* Unbind the handles and free the memory */
959 	(void) ddi_dma_unbind_handle(info->dma_hdl);
960 	ddi_dma_mem_free(&info->acc_hdl);
961 	ddi_dma_free_handle(&info->dma_hdl);
962 }
963 
964 /* These macros are valid for use only in hermon_icm_alloc/hermon_icm_free. */
965 #define	HERMON_ICM_ALLOC(rsrc) \
966 	hermon_icm_alloc(state, rsrc, index1, index2)
967 #define	HERMON_ICM_FREE(rsrc) \
968 	hermon_icm_free(state, rsrc, index1, index2)
969 
970 /*
971  * hermon_icm_alloc()
972  *    Context: Can be called from base context.
973  *
974  * Only one thread can be here for a given hermon_rsrc_type_t "type".
975  */
976 int
977 hermon_icm_alloc(hermon_state_t *state, hermon_rsrc_type_t type,
978     uint32_t index1, uint32_t index2)
979 {
980 	hermon_icm_table_t	*icm;
981 	hermon_dma_info_t	*dma_info;
982 	uint8_t			*bitmap;
983 	int			status;
984 
985 	if (hermon_verbose) {
986 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: rsrc_type (0x%x) "
987 		    "index1/2 (0x%x/0x%x)", type, index1, index2);
988 	}
989 
990 	icm = &state->hs_icm[type];
991 
992 	switch (type) {
993 	case HERMON_QPC:
994 		status = HERMON_ICM_ALLOC(HERMON_CMPT_QPC);
995 		if (status != DDI_SUCCESS) {
996 			return (status);
997 		}
998 		status = HERMON_ICM_ALLOC(HERMON_RDB);
999 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1000 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1001 			return (status);
1002 		}
1003 		status = HERMON_ICM_ALLOC(HERMON_ALTC);
1004 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1005 			HERMON_ICM_FREE(HERMON_RDB);
1006 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1007 			return (status);
1008 		}
1009 		status = HERMON_ICM_ALLOC(HERMON_AUXC);
1010 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1011 			HERMON_ICM_FREE(HERMON_ALTC);
1012 			HERMON_ICM_FREE(HERMON_RDB);
1013 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1014 			return (status);
1015 		}
1016 		break;
1017 	case HERMON_SRQC:
1018 		status = HERMON_ICM_ALLOC(HERMON_CMPT_SRQC);
1019 		if (status != DDI_SUCCESS) {
1020 			return (status);
1021 		}
1022 		break;
1023 	case HERMON_CQC:
1024 		status = HERMON_ICM_ALLOC(HERMON_CMPT_CQC);
1025 		if (status != DDI_SUCCESS) {
1026 			return (status);
1027 		}
1028 		break;
1029 	case HERMON_EQC:
1030 		status = HERMON_ICM_ALLOC(HERMON_CMPT_EQC);
1031 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1032 			return (status);
1033 		}
1034 		break;
1035 	}
1036 
1037 	/* ensure existence of bitmap and dmainfo, sets "dma_info" */
1038 	hermon_bitmap(bitmap, dma_info, icm, index1);
1039 
1040 	/* Set up the DMA handle for allocation and mapping */
1041 	dma_info = icm->icm_dma[index1] + index2;
1042 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_info))
1043 	dma_info->length  = icm->span << icm->log_object_size;
1044 	dma_info->icmaddr = icm->icm_baseaddr +
1045 	    (((index1 << icm->split_shift) +
1046 	    (index2 << icm->span_shift)) << icm->log_object_size);
1047 
1048 	if (hermon_verbose) {
1049 		IBTF_DPRINTF_L2("hermon", "alloc DMA: "
1050 		    "rsrc (0x%x) index (%x, %x) "
1051 		    "icm_addr/len (%llx/%x) bitmap %p", type, index1, index2,
1052 		    (longlong_t)dma_info->icmaddr, dma_info->length, bitmap);
1053 	}
1054 
1055 	/* Allocate and map memory for this span */
1056 	status = hermon_dma_alloc(state, dma_info, MAP_ICM);
1057 	if (status != DDI_SUCCESS) {
1058 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: DMA "
1059 		    "allocation failed, status 0x%x", status);
1060 		switch (type) {
1061 		case HERMON_QPC:
1062 			HERMON_ICM_FREE(HERMON_AUXC);
1063 			HERMON_ICM_FREE(HERMON_ALTC);
1064 			HERMON_ICM_FREE(HERMON_RDB);
1065 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1066 			break;
1067 		case HERMON_SRQC:
1068 			HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1069 			break;
1070 		case HERMON_CQC:
1071 			HERMON_ICM_FREE(HERMON_CMPT_CQC);
1072 			break;
1073 		case HERMON_EQC:
1074 			HERMON_ICM_FREE(HERMON_CMPT_EQC);
1075 			break;
1076 		}
1077 
1078 		return (DDI_FAILURE);
1079 	}
1080 	if (hermon_verbose) {
1081 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: mapping ICM: "
1082 		    "rsrc_type (0x%x) index (0x%x, 0x%x) alloc length (0x%x) "
1083 		    "icm_addr (0x%lx)", type, index1, index2, dma_info->length,
1084 		    dma_info->icmaddr);
1085 	}
1086 
1087 	/* Set the bit for this slot in the table bitmap */
1088 	HERMON_BMAP_BIT_SET(icm->icm_bitmap[index1], index2);
1089 
1090 	return (DDI_SUCCESS);
1091 }
1092 
1093 /*
1094  * hermon_icm_free()
1095  *    Context: Can be called from base context.
1096  *
1097  * ICM resources have been successfully returned from hermon_icm_alloc().
1098  * Associated dma_info is no longer in use.  Free the ICM backing memory.
1099  */
1100 void
1101 hermon_icm_free(hermon_state_t *state, hermon_rsrc_type_t type,
1102     uint32_t index1, uint32_t index2)
1103 {
1104 	hermon_icm_table_t	*icm;
1105 	hermon_dma_info_t	*dma_info;
1106 	int			status;
1107 
1108 	icm = &state->hs_icm[type];
1109 	ASSERT(icm->icm_dma[index1][index2].icm_refcnt == 0);
1110 
1111 	if (hermon_verbose) {
1112 		IBTF_DPRINTF_L2("hermon", "hermon_icm_free: rsrc_type (0x%x) "
1113 		    "index (0x%x, 0x%x)", type, index1, index2);
1114 	}
1115 
1116 	dma_info = icm->icm_dma[index1] + index2;
1117 
1118 	/* The following only happens if attach() is failing. */
1119 	if (dma_info == NULL)
1120 		return;
1121 
1122 	/* Unmap the ICM allocation, then free the backing DMA memory */
1123 	status = hermon_unmap_icm_cmd_post(state, dma_info);
1124 	if (status != DDI_SUCCESS) {
1125 		HERMON_WARNING(state, "UNMAP_ICM failure");
1126 	}
1127 	hermon_dma_free(dma_info);
1128 
1129 	/* Clear the bit in the ICM table bitmap */
1130 	HERMON_BMAP_BIT_CLR(icm->icm_bitmap[index1], index2);
1131 
1132 	switch (type) {
1133 	case HERMON_QPC:
1134 		HERMON_ICM_FREE(HERMON_AUXC);
1135 		HERMON_ICM_FREE(HERMON_ALTC);
1136 		HERMON_ICM_FREE(HERMON_RDB);
1137 		HERMON_ICM_FREE(HERMON_CMPT_QPC);
1138 		break;
1139 	case HERMON_SRQC:
1140 		HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1141 		break;
1142 	case HERMON_CQC:
1143 		HERMON_ICM_FREE(HERMON_CMPT_CQC);
1144 		break;
1145 	case HERMON_EQC:
1146 		HERMON_ICM_FREE(HERMON_CMPT_EQC);
1147 		break;
1148 
1149 	}
1150 }
1151 
1152 
1153 /*
1154  * hermon_device_mode()
1155  *    Context: Can be called from base or interrupt context.
1156  *
1157  * Return HERMON_HCA_MODE for operational mode
1158  * Return HERMON_MAINTENANCE_MODE for maintenance mode
1159  * Return 0 otherwise
1160  *
1161  * A non-zero return for either operational or maintenance mode simplifies
1162  * one of the 2 uses of this function.
1163  */
1164 int
1165 hermon_device_mode(hermon_state_t *state)
1166 {
1167 	if (state->hs_vendor_id != PCI_VENID_MLX)
1168 		return (0);
1169 
1170 	switch (state->hs_device_id) {
1171 	case PCI_DEVID_HERMON_SDR:
1172 	case PCI_DEVID_HERMON_DDR:
1173 	case PCI_DEVID_HERMON_DDRG2:
1174 	case PCI_DEVID_HERMON_QDRG2:
1175 	case PCI_DEVID_HERMON_QDRG2V:
1176 		return (HERMON_HCA_MODE);
1177 	case PCI_DEVID_HERMON_MAINT:
1178 		return (HERMON_MAINTENANCE_MODE);
1179 	default:
1180 		return (0);
1181 	}
1182 }
1183 
1184 /*
1185  * hermon_drv_init()
1186  *    Context: Only called from attach() path context
1187  */
1188 /* ARGSUSED */
1189 static int
1190 hermon_drv_init(hermon_state_t *state, dev_info_t *dip, int instance)
1191 {
1192 	int	status;
1193 
1194 	/* Retrieve PCI device, vendor and rev IDs */
1195 	state->hs_vendor_id	 = HERMON_GET_VENDOR_ID(state->hs_dip);
1196 	state->hs_device_id	 = HERMON_GET_DEVICE_ID(state->hs_dip);
1197 	state->hs_revision_id	 = HERMON_GET_REVISION_ID(state->hs_dip);
1198 
1199 	/*
1200 	 * Check and set the operational mode of the device. If the driver is
1201 	 * bound to the Hermon device in "maintenance mode", then this generally
1202 	 * means that either the device has been specifically jumpered to
1203 	 * start in this mode or the firmware boot process has failed to
1204 	 * successfully load either the primary or the secondary firmware
1205 	 * image.
1206 	 */
1207 	state->hs_operational_mode = hermon_device_mode(state);
1208 	switch (state->hs_operational_mode) {
1209 	case HERMON_HCA_MODE:
1210 		state->hs_cfg_profile_setting = HERMON_CFG_MEMFREE;
1211 		break;
1212 	case HERMON_MAINTENANCE_MODE:
1213 		HERMON_FMANOTE(state, HERMON_FMA_MAINT);
1214 		state->hs_fm_degraded_reason = HCA_FW_MISC; /* not fw reason */
1215 		return (DDI_FAILURE);
1216 	default:
1217 		HERMON_FMANOTE(state, HERMON_FMA_PCIID);
1218 		HERMON_WARNING(state, "unexpected device type detected");
1219 		return (DDI_FAILURE);
1220 	}
1221 
1222 	/*
1223 	 * Initialize the Hermon hardware.
1224 	 *
1225 	 * Note:  If this routine returns an error, it is often a reasonably
1226 	 * good indication that something Hermon firmware-related has caused
1227 	 * the failure or some HW related errors have caused the failure.
1228 	 * (also there are few possibilities that SW (e.g. SW resource
1229 	 * shortage) can cause the failure, but the majority case is due to
1230 	 * either a firmware related error or a HW related one) In order to
1231 	 * give the user an opportunity (if desired) to update or reflash
1232 	 * the Hermon firmware image, we set "hs_operational_mode" flag
1233 	 * (described above) to indicate that we wish to enter maintenance
1234 	 * mode in case of the firmware-related issue.
1235 	 */
1236 	status = hermon_hw_init(state);
1237 	if (status != DDI_SUCCESS) {
1238 		cmn_err(CE_NOTE, "hermon%d: error during attach: %s", instance,
1239 		    state->hs_attach_buf);
1240 		return (DDI_FAILURE);
1241 	}
1242 
1243 	/*
1244 	 * Now that the ISR has been setup, arm all the EQs for event
1245 	 * generation.
1246 	 */
1247 
1248 	status = hermon_eq_arm_all(state);
1249 	if (status != DDI_SUCCESS) {
1250 		cmn_err(CE_NOTE, "EQ Arm All failed\n");
1251 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1252 		return (DDI_FAILURE);
1253 	}
1254 
1255 	/* test interrupts and event queues */
1256 	status = hermon_nop_post(state, 0x0, 0x0);
1257 	if (status != DDI_SUCCESS) {
1258 		cmn_err(CE_NOTE, "Interrupts/EQs failed\n");
1259 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1260 		return (DDI_FAILURE);
1261 	}
1262 
1263 	/* Initialize Hermon softstate */
1264 	status = hermon_soft_state_init(state);
1265 	if (status != DDI_SUCCESS) {
1266 		cmn_err(CE_NOTE, "Failed to init soft state\n");
1267 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1268 		return (DDI_FAILURE);
1269 	}
1270 
1271 	return (DDI_SUCCESS);
1272 }
1273 
1274 
1275 /*
1276  * hermon_drv_fini()
1277  *    Context: Only called from attach() and/or detach() path contexts
1278  */
1279 static void
1280 hermon_drv_fini(hermon_state_t *state)
1281 {
1282 	/* Cleanup Hermon softstate */
1283 	hermon_soft_state_fini(state);
1284 
1285 	/* Cleanup Hermon resources and shutdown hardware */
1286 	hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1287 }
1288 
1289 
1290 /*
1291  * hermon_drv_fini2()
1292  *    Context: Only called from attach() and/or detach() path contexts
1293  */
1294 static void
1295 hermon_drv_fini2(hermon_state_t *state)
1296 {
1297 	if (state->hs_fm_poll_thread) {
1298 		ddi_periodic_delete(state->hs_fm_poll_thread);
1299 		state->hs_fm_poll_thread = NULL;
1300 	}
1301 
1302 	/* HERMON_DRV_CLEANUP_LEVEL1 */
1303 	if (state->hs_fm_cmdhdl) {
1304 		hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
1305 		state->hs_fm_cmdhdl = NULL;
1306 	}
1307 
1308 	if (state->hs_reg_cmdhdl) {
1309 		ddi_regs_map_free(&state->hs_reg_cmdhdl);
1310 		state->hs_reg_cmdhdl = NULL;
1311 	}
1312 
1313 	/* HERMON_DRV_CLEANUP_LEVEL0 */
1314 	if (state->hs_msix_tbl_entries) {
1315 		kmem_free(state->hs_msix_tbl_entries,
1316 		    state->hs_msix_tbl_size);
1317 		state->hs_msix_tbl_entries = NULL;
1318 	}
1319 
1320 	if (state->hs_msix_pba_entries) {
1321 		kmem_free(state->hs_msix_pba_entries,
1322 		    state->hs_msix_pba_size);
1323 		state->hs_msix_pba_entries = NULL;
1324 	}
1325 
1326 	if (state->hs_fm_msix_tblhdl) {
1327 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
1328 		state->hs_fm_msix_tblhdl = NULL;
1329 	}
1330 
1331 	if (state->hs_reg_msix_tblhdl) {
1332 		ddi_regs_map_free(&state->hs_reg_msix_tblhdl);
1333 		state->hs_reg_msix_tblhdl = NULL;
1334 	}
1335 
1336 	if (state->hs_fm_msix_pbahdl) {
1337 		hermon_regs_map_free(state, &state->hs_fm_msix_pbahdl);
1338 		state->hs_fm_msix_pbahdl = NULL;
1339 	}
1340 
1341 	if (state->hs_reg_msix_pbahdl) {
1342 		ddi_regs_map_free(&state->hs_reg_msix_pbahdl);
1343 		state->hs_reg_msix_pbahdl = NULL;
1344 	}
1345 
1346 	if (state->hs_fm_pcihdl) {
1347 		hermon_pci_config_teardown(state, &state->hs_fm_pcihdl);
1348 		state->hs_fm_pcihdl = NULL;
1349 	}
1350 
1351 	if (state->hs_reg_pcihdl) {
1352 		pci_config_teardown(&state->hs_reg_pcihdl);
1353 		state->hs_reg_pcihdl = NULL;
1354 	}
1355 }
1356 
1357 
1358 /*
1359  * hermon_isr_init()
1360  *    Context: Only called from attach() path context
1361  */
1362 static int
1363 hermon_isr_init(hermon_state_t *state)
1364 {
1365 	int	status;
1366 	int	intr;
1367 
1368 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1369 
1370 		/*
1371 		 * Add a handler for the interrupt or MSI
1372 		 */
1373 		status = ddi_intr_add_handler(state->hs_intrmsi_hdl[intr],
1374 		    hermon_isr, (caddr_t)state, (void *)(uintptr_t)intr);
1375 		if (status  != DDI_SUCCESS) {
1376 			return (DDI_FAILURE);
1377 		}
1378 
1379 		/*
1380 		 * Enable the software interrupt.  Note: depending on the value
1381 		 * returned in the capability flag, we have to call either
1382 		 * ddi_intr_block_enable() or ddi_intr_enable().
1383 		 */
1384 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1385 			status = ddi_intr_block_enable(
1386 			    &state->hs_intrmsi_hdl[intr], 1);
1387 			if (status != DDI_SUCCESS) {
1388 				return (DDI_FAILURE);
1389 			}
1390 		} else {
1391 			status = ddi_intr_enable(state->hs_intrmsi_hdl[intr]);
1392 			if (status != DDI_SUCCESS) {
1393 				return (DDI_FAILURE);
1394 			}
1395 		}
1396 	}
1397 
1398 	/*
1399 	 * Now that the ISR has been enabled, defer arm_all  EQs for event
1400 	 * generation until later, in case MSIX is enabled
1401 	 */
1402 	return (DDI_SUCCESS);
1403 }
1404 
1405 
1406 /*
1407  * hermon_isr_fini()
1408  *    Context: Only called from attach() and/or detach() path contexts
1409  */
1410 static void
1411 hermon_isr_fini(hermon_state_t *state)
1412 {
1413 	int	intr;
1414 
1415 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1416 		/* Disable the software interrupt */
1417 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1418 			(void) ddi_intr_block_disable(
1419 			    &state->hs_intrmsi_hdl[intr], 1);
1420 		} else {
1421 			(void) ddi_intr_disable(state->hs_intrmsi_hdl[intr]);
1422 		}
1423 
1424 		/*
1425 		 * Remove the software handler for the interrupt or MSI
1426 		 */
1427 		(void) ddi_intr_remove_handler(state->hs_intrmsi_hdl[intr]);
1428 	}
1429 }
1430 
1431 
1432 /*
1433  * Sum of ICM configured values:
1434  *     cMPT, dMPT, MTT, QPC, SRQC, RDB, CQC, ALTC, AUXC, EQC, MCG
1435  *
1436  */
1437 static uint64_t
1438 hermon_size_icm(hermon_state_t *state)
1439 {
1440 	hermon_hw_querydevlim_t	*devlim;
1441 	hermon_cfg_profile_t	*cfg;
1442 	uint64_t		num_cmpts, num_dmpts, num_mtts;
1443 	uint64_t		num_qpcs, num_srqc, num_rdbs;
1444 #ifndef HERMON_FW_WORKAROUND
1445 	uint64_t		num_auxc;
1446 #endif
1447 	uint64_t		num_cqcs, num_altc;
1448 	uint64_t		num_eqcs, num_mcgs;
1449 	uint64_t		size;
1450 
1451 	devlim = &state->hs_devlim;
1452 	cfg = state->hs_cfg_profile;
1453 	/* number of respective entries */
1454 	num_cmpts = (uint64_t)0x1 << cfg->cp_log_num_cmpt;
1455 	num_mtts = (uint64_t)0x1 << cfg->cp_log_num_mtt;
1456 	num_dmpts = (uint64_t)0x1 << cfg->cp_log_num_dmpt;
1457 	num_qpcs = (uint64_t)0x1 << cfg->cp_log_num_qp;
1458 	num_srqc = (uint64_t)0x1 << cfg->cp_log_num_srq;
1459 	num_rdbs = (uint64_t)0x1 << cfg->cp_log_num_rdb;
1460 	num_cqcs = (uint64_t)0x1 << cfg->cp_log_num_cq;
1461 	num_altc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1462 #ifndef HERMON_FW_WORKAROUND
1463 	num_auxc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1464 #endif
1465 	num_eqcs = (uint64_t)0x1 << cfg->cp_log_num_eq;
1466 	num_mcgs = (uint64_t)0x1 << cfg->cp_log_num_mcg;
1467 
1468 	size =
1469 	    num_cmpts 	* devlim->cmpt_entry_sz +
1470 	    num_dmpts	* devlim->dmpt_entry_sz +
1471 	    num_mtts	* devlim->mtt_entry_sz +
1472 	    num_qpcs	* devlim->qpc_entry_sz +
1473 	    num_srqc	* devlim->srq_entry_sz +
1474 	    num_rdbs	* devlim->rdmardc_entry_sz +
1475 	    num_cqcs	* devlim->cqc_entry_sz +
1476 	    num_altc	* devlim->altc_entry_sz +
1477 #ifdef HERMON_FW_WORKAROUND
1478 	    0x80000000ull +
1479 #else
1480 	    num_auxc	* devlim->aux_entry_sz	+
1481 #endif
1482 	    num_eqcs	* devlim->eqc_entry_sz +
1483 	    num_mcgs	* HERMON_MCGMEM_SZ(state);
1484 	return (size);
1485 }
1486 
1487 
1488 /*
1489  * hermon_hw_init()
1490  *    Context: Only called from attach() path context
1491  */
1492 static int
1493 hermon_hw_init(hermon_state_t *state)
1494 {
1495 	hermon_drv_cleanup_level_t	cleanup;
1496 	sm_nodeinfo_t			nodeinfo;
1497 	uint64_t			clr_intr_offset;
1498 	int				status;
1499 	uint32_t			fw_size;	/* in page */
1500 	uint64_t			offset;
1501 
1502 	/* This is where driver initialization begins */
1503 	cleanup = HERMON_DRV_CLEANUP_LEVEL0;
1504 
1505 	/* Setup device access attributes */
1506 	state->hs_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V1;
1507 	state->hs_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1508 	state->hs_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1509 	state->hs_reg_accattr.devacc_attr_access = DDI_DEFAULT_ACC;
1510 
1511 	/* Setup fma-protected access attributes */
1512 	state->hs_fm_accattr.devacc_attr_version =
1513 	    hermon_devacc_attr_version(state);
1514 	state->hs_fm_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1515 	state->hs_fm_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1516 	/* set acc err protection type */
1517 	state->hs_fm_accattr.devacc_attr_access =
1518 	    hermon_devacc_attr_access(state);
1519 
1520 	/* Setup for PCI config read/write of HCA device */
1521 	status = hermon_pci_config_setup(state, &state->hs_fm_pcihdl);
1522 	if (status != DDI_SUCCESS) {
1523 		hermon_hw_fini(state, cleanup);
1524 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1525 		    "hw_init_PCI_config_space_regmap_fail");
1526 		/* This case is not the degraded one */
1527 		return (DDI_FAILURE);
1528 	}
1529 
1530 	/* Map PCI config space and MSI-X tables/pba */
1531 	hermon_set_msix_info(state);
1532 
1533 	/* Map in Hermon registers (CMD, UAR, MSIX) and setup offsets */
1534 	status = hermon_regs_map_setup(state, HERMON_CMD_BAR,
1535 	    &state->hs_reg_cmd_baseaddr, 0, 0, &state->hs_fm_accattr,
1536 	    &state->hs_fm_cmdhdl);
1537 	if (status != DDI_SUCCESS) {
1538 		hermon_hw_fini(state, cleanup);
1539 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1540 		    "hw_init_CMD_BAR_regmap_fail");
1541 		/* This case is not the degraded one */
1542 		return (DDI_FAILURE);
1543 	}
1544 
1545 	cleanup = HERMON_DRV_CLEANUP_LEVEL1;
1546 	/*
1547 	 * We defer UAR-BAR mapping until later.  Need to know if
1548 	 * blueflame mapping is to be done, and don't know that until after
1549 	 * we get the dev_caps, so do it right after that
1550 	 */
1551 
1552 	/*
1553 	 * There is a third BAR defined for Hermon - it is for MSIX
1554 	 *
1555 	 * Will need to explore it's possible need/use w/ Mellanox
1556 	 * [es] Temporary mapping maybe
1557 	 */
1558 
1559 #ifdef HERMON_SUPPORTS_MSIX_BAR
1560 	status = ddi_regs_map_setup(state->hs_dip, HERMON_MSIX_BAR,
1561 	    &state->hs_reg_msi_baseaddr, 0, 0, &state->hs_reg_accattr,
1562 	    &state->hs_reg_msihdl);
1563 	if (status != DDI_SUCCESS) {
1564 		hermon_hw_fini(state, cleanup);
1565 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1566 		    "hw_init_MSIX_BAR_regmap_fail");
1567 		/* This case is not the degraded one */
1568 		return (DDI_FAILURE);
1569 	}
1570 #endif
1571 
1572 	cleanup = HERMON_DRV_CLEANUP_LEVEL2;
1573 
1574 	/*
1575 	 * Save interesting registers away. The offsets of the first two
1576 	 * here (HCR and sw_reset) are detailed in the PRM, the others are
1577 	 * derived from values in the QUERY_FW output, so we'll save them
1578 	 * off later.
1579 	 */
1580 	/* Host Command Register (HCR) */
1581 	state->hs_cmd_regs.hcr = (hermon_hw_hcr_t *)
1582 	    ((uintptr_t)state->hs_reg_cmd_baseaddr + HERMON_CMD_HCR_OFFSET);
1583 	state->hs_cmd_toggle = 0;	/* initialize it for use */
1584 
1585 	/* Software Reset register (sw_reset) and semaphore */
1586 	state->hs_cmd_regs.sw_reset = (uint32_t *)
1587 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1588 	    HERMON_CMD_SW_RESET_OFFSET);
1589 	state->hs_cmd_regs.sw_semaphore = (uint32_t *)
1590 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1591 	    HERMON_CMD_SW_SEMAPHORE_OFFSET);
1592 
1593 	/* make sure init'd before we start filling things in */
1594 	bzero(&state->hs_hcaparams, sizeof (struct hermon_hw_initqueryhca_s));
1595 
1596 	/* Initialize the Phase1 configuration profile */
1597 	status = hermon_cfg_profile_init_phase1(state);
1598 	if (status != DDI_SUCCESS) {
1599 		hermon_hw_fini(state, cleanup);
1600 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1601 		    "hw_init_cfginit1_fail");
1602 		/* This case is not the degraded one */
1603 		return (DDI_FAILURE);
1604 	}
1605 	cleanup = HERMON_DRV_CLEANUP_LEVEL3;
1606 
1607 	/* Do a software reset of the adapter to ensure proper state */
1608 	status = hermon_sw_reset(state);
1609 	if (status != HERMON_CMD_SUCCESS) {
1610 		hermon_hw_fini(state, cleanup);
1611 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1612 		    "hw_init_sw_reset_fail");
1613 		/* This case is not the degraded one */
1614 		return (DDI_FAILURE);
1615 	}
1616 
1617 	/* Initialize mailboxes */
1618 	status = hermon_rsrc_init_phase1(state);
1619 	if (status != DDI_SUCCESS) {
1620 		hermon_hw_fini(state, cleanup);
1621 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1622 		    "hw_init_rsrcinit1_fail");
1623 		/* This case is not the degraded one */
1624 		return (DDI_FAILURE);
1625 	}
1626 	cleanup = HERMON_DRV_CLEANUP_LEVEL4;
1627 
1628 	/* Post QUERY_FW */
1629 	status = hermon_cmn_query_cmd_post(state, QUERY_FW, 0, 0, &state->hs_fw,
1630 	    sizeof (hermon_hw_queryfw_t), HERMON_CMD_NOSLEEP_SPIN);
1631 	if (status != HERMON_CMD_SUCCESS) {
1632 		cmn_err(CE_NOTE, "QUERY_FW command failed: %08x\n", status);
1633 		hermon_hw_fini(state, cleanup);
1634 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1635 		    "hw_init_query_fw_cmd_fail");
1636 		/* This case is not the degraded one */
1637 		return (DDI_FAILURE);
1638 	}
1639 
1640 	/* Validate what/that HERMON FW version is appropriate */
1641 
1642 	status = hermon_fw_version_check(state);
1643 	if (status != DDI_SUCCESS) {
1644 		HERMON_FMANOTE(state, HERMON_FMA_FWVER);
1645 		if (state->hs_operational_mode == HERMON_HCA_MODE) {
1646 			cmn_err(CE_CONT, "Unsupported Hermon FW version: "
1647 			    "expected: %04d.%04d.%04d, "
1648 			    "actual: %04d.%04d.%04d\n",
1649 			    HERMON_FW_VER_MAJOR,
1650 			    HERMON_FW_VER_MINOR,
1651 			    HERMON_FW_VER_SUBMINOR,
1652 			    state->hs_fw.fw_rev_major,
1653 			    state->hs_fw.fw_rev_minor,
1654 			    state->hs_fw.fw_rev_subminor);
1655 		} else {
1656 			cmn_err(CE_CONT, "Unsupported FW version: "
1657 			    "%04d.%04d.%04d\n",
1658 			    state->hs_fw.fw_rev_major,
1659 			    state->hs_fw.fw_rev_minor,
1660 			    state->hs_fw.fw_rev_subminor);
1661 		}
1662 		state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1663 		state->hs_fm_degraded_reason = HCA_FW_MISMATCH;
1664 		hermon_hw_fini(state, cleanup);
1665 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1666 		    "hw_init_checkfwver_fail");
1667 		/* This case is the degraded one */
1668 		return (HERMON_CMD_BAD_NVMEM);
1669 	}
1670 
1671 	/*
1672 	 * Save off the rest of the interesting registers that we'll be using.
1673 	 * Setup the offsets for the other registers.
1674 	 */
1675 
1676 	/*
1677 	 * Hermon does the intr_offset from the BAR - technically should get the
1678 	 * BAR info from the response, but PRM says it's from BAR0-1, which is
1679 	 * for us the CMD BAR
1680 	 */
1681 
1682 	clr_intr_offset	 = state->hs_fw.clr_intr_offs & HERMON_CMD_OFFSET_MASK;
1683 
1684 	/* Save Clear Interrupt address */
1685 	state->hs_cmd_regs.clr_intr = (uint64_t *)
1686 	    (uintptr_t)(state->hs_reg_cmd_baseaddr + clr_intr_offset);
1687 
1688 	/*
1689 	 * Set the error buffer also into the structure - used in hermon_event.c
1690 	 * to check for internal error on the HCA, not reported in eqe or
1691 	 * (necessarily) by interrupt
1692 	 */
1693 	state->hs_cmd_regs.fw_err_buf = (uint32_t *)(uintptr_t)
1694 	    (state->hs_reg_cmd_baseaddr + state->hs_fw.error_buf_addr);
1695 
1696 	/*
1697 	 * Invoke a polling thread to check the error buffer periodically.
1698 	 */
1699 	if (!hermon_no_inter_err_chk) {
1700 		state->hs_fm_poll_thread = ddi_periodic_add(
1701 		    hermon_inter_err_chk, (void *)state, FM_POLL_INTERVAL,
1702 		    DDI_IPL_0);
1703 	}
1704 
1705 	cleanup = HERMON_DRV_CLEANUP_LEVEL5;
1706 
1707 	/*
1708 	 * Allocate, map, and run the HCA Firmware.
1709 	 */
1710 
1711 	/* Allocate memory for the firmware to load into and map it */
1712 
1713 	/* get next higher power of 2 */
1714 	fw_size = 1 << highbit(state->hs_fw.fw_pages);
1715 	state->hs_fw_dma.length = fw_size << HERMON_PAGESHIFT;
1716 	status = hermon_dma_alloc(state, &state->hs_fw_dma, MAP_FA);
1717 	if (status != DDI_SUCCESS) {
1718 		cmn_err(CE_NOTE, "FW alloc failed\n");
1719 		hermon_hw_fini(state, cleanup);
1720 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1721 		    "hw_init_dma_alloc_fw_fail");
1722 		/* This case is not the degraded one */
1723 		return (DDI_FAILURE);
1724 	}
1725 
1726 	cleanup = HERMON_DRV_CLEANUP_LEVEL6;
1727 
1728 	/* Invoke the RUN_FW cmd to run the firmware */
1729 	status = hermon_run_fw_cmd_post(state);
1730 	if (status != DDI_SUCCESS) {
1731 		cmn_err(CE_NOTE, "RUN_FW command failed: 0x%08x\n", status);
1732 		if (status == HERMON_CMD_BAD_NVMEM) {
1733 			state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1734 			state->hs_fm_degraded_reason = HCA_FW_CORRUPT;
1735 		}
1736 		hermon_hw_fini(state, cleanup);
1737 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_run_fw_fail");
1738 		/*
1739 		 * If the status is HERMON_CMD_BAD_NVMEM, it's likely the
1740 		 * firmware is corrupted, so the mode falls into the
1741 		 * maintenance mode.
1742 		 */
1743 		return (status == HERMON_CMD_BAD_NVMEM ? HERMON_CMD_BAD_NVMEM :
1744 		    DDI_FAILURE);
1745 	}
1746 
1747 
1748 	/*
1749 	 * QUERY DEVICE LIMITS/CAPABILITIES
1750 	 * NOTE - in Hermon, the command is changed to QUERY_DEV_CAP,
1751 	 * but for familiarity we have kept the structure name the
1752 	 * same as Tavor/Arbel
1753 	 */
1754 
1755 	status = hermon_cmn_query_cmd_post(state, QUERY_DEV_CAP, 0, 0,
1756 	    &state->hs_devlim, sizeof (hermon_hw_querydevlim_t),
1757 	    HERMON_CMD_NOSLEEP_SPIN);
1758 	if (status != HERMON_CMD_SUCCESS) {
1759 		cmn_err(CE_NOTE, "QUERY_DEV_CAP command failed: 0x%08x\n",
1760 		    status);
1761 		hermon_hw_fini(state, cleanup);
1762 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_devcap_fail");
1763 		/* This case is not the degraded one */
1764 		return (DDI_FAILURE);
1765 	}
1766 
1767 	state->hs_devlim.num_rsvd_eq = max(state->hs_devlim.num_rsvd_eq,
1768 	    (4 * state->hs_devlim.num_rsvd_uar));	/* lesser of resvd's */
1769 
1770 	/* now we have enough info to map in the UAR BAR */
1771 	/*
1772 	 * First, we figure out how to map the BAR for UAR - use only half if
1773 	 * BlueFlame is enabled - in that case the mapped length is 1/2 the
1774 	 * log_max_uar_sz (max__uar - 1) * 1MB ( +20).
1775 	 */
1776 
1777 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1778 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1779 	} else {
1780 		offset = 0;	/* a zero length means map the whole thing */
1781 	}
1782 	status = hermon_regs_map_setup(state, HERMON_UAR_BAR,
1783 	    &state->hs_reg_uar_baseaddr, 0, offset, &state->hs_fm_accattr,
1784 	    &state->hs_fm_uarhdl);
1785 	if (status != DDI_SUCCESS) {
1786 		HERMON_ATTACH_MSG(state->hs_attach_buf, "UAR BAR mapping");
1787 		/* This case is not the degraded one */
1788 		return (DDI_FAILURE);
1789 	}
1790 
1791 	/* and if BlueFlame is enabled, map the other half there */
1792 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1793 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1794 		status = ddi_regs_map_setup(state->hs_dip, HERMON_UAR_BAR,
1795 		    &state->hs_reg_bf_baseaddr, offset, offset,
1796 		    &state->hs_reg_accattr, &state->hs_reg_bfhdl);
1797 		if (status != DDI_SUCCESS) {
1798 			HERMON_ATTACH_MSG(state->hs_attach_buf,
1799 			    "BlueFlame BAR mapping");
1800 			/* This case is not the degraded one */
1801 			return (DDI_FAILURE);
1802 		}
1803 		/* This will be used in hw_fini if we fail to init. */
1804 		state->hs_bf_offset = offset;
1805 	}
1806 	cleanup = HERMON_DRV_CLEANUP_LEVEL7;
1807 
1808 	/* Hermon has a couple of things needed for phase 2 in query port */
1809 
1810 	status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0, 0x01,
1811 	    &state->hs_queryport, sizeof (hermon_hw_query_port_t),
1812 	    HERMON_CMD_NOSLEEP_SPIN);
1813 	if (status != HERMON_CMD_SUCCESS) {
1814 		cmn_err(CE_NOTE, "QUERY_PORT command failed: 0x%08x\n",
1815 		    status);
1816 		hermon_hw_fini(state, cleanup);
1817 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1818 		    "hw_init_queryport_fail");
1819 		/* This case is not the degraded one */
1820 		return (DDI_FAILURE);
1821 	}
1822 
1823 	/* Initialize the Phase2 Hermon configuration profile */
1824 	status = hermon_cfg_profile_init_phase2(state);
1825 	if (status != DDI_SUCCESS) {
1826 		cmn_err(CE_NOTE, "CFG phase 2 failed: 0x%08x\n", status);
1827 		hermon_hw_fini(state, cleanup);
1828 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1829 		    "hw_init_cfginit2_fail");
1830 		/* This case is not the degraded one */
1831 		return (DDI_FAILURE);
1832 	}
1833 
1834 	/* Determine and set the ICM size */
1835 	state->hs_icm_sz = hermon_size_icm(state);
1836 	status		 = hermon_set_icm_size_cmd_post(state);
1837 	if (status != DDI_SUCCESS) {
1838 		cmn_err(CE_NOTE, "Hermon: SET_ICM_SIZE cmd failed: 0x%08x\n",
1839 		    status);
1840 		hermon_hw_fini(state, cleanup);
1841 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1842 		    "hw_init_seticmsz_fail");
1843 		/* This case is not the degraded one */
1844 		return (DDI_FAILURE);
1845 	}
1846 	/* alloc icm aux physical memory and map it */
1847 
1848 	state->hs_icma_dma.length = 1 << highbit(state->hs_icma_sz);
1849 
1850 	status = hermon_dma_alloc(state, &state->hs_icma_dma, MAP_ICM_AUX);
1851 	if (status != DDI_SUCCESS) {
1852 		cmn_err(CE_NOTE, "failed to alloc (0x%llx) bytes for ICMA\n",
1853 		    (longlong_t)state->hs_icma_dma.length);
1854 		hermon_hw_fini(state, cleanup);
1855 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1856 		    "hw_init_dma_alloc_icm_aux_fail");
1857 		/* This case is not the degraded one */
1858 		return (DDI_FAILURE);
1859 	}
1860 	cleanup = HERMON_DRV_CLEANUP_LEVEL8;
1861 
1862 	cleanup = HERMON_DRV_CLEANUP_LEVEL9;
1863 
1864 	/* Allocate an array of structures to house the ICM tables */
1865 	state->hs_icm = kmem_zalloc(HERMON_NUM_ICM_RESOURCES *
1866 	    sizeof (hermon_icm_table_t), KM_SLEEP);
1867 
1868 	/* Set up the ICM address space and the INIT_HCA command input */
1869 	status = hermon_icm_config_setup(state, &state->hs_hcaparams);
1870 	if (status != HERMON_CMD_SUCCESS) {
1871 		cmn_err(CE_NOTE, "ICM configuration failed\n");
1872 		hermon_hw_fini(state, cleanup);
1873 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1874 		    "hw_init_icm_config_setup_fail");
1875 		/* This case is not the degraded one */
1876 		return (DDI_FAILURE);
1877 	}
1878 	cleanup = HERMON_DRV_CLEANUP_LEVEL10;
1879 
1880 	/* Initialize the adapter with the INIT_HCA cmd */
1881 	status = hermon_init_hca_cmd_post(state, &state->hs_hcaparams,
1882 	    HERMON_CMD_NOSLEEP_SPIN);
1883 	if (status != HERMON_CMD_SUCCESS) {
1884 		cmn_err(CE_NOTE, "INIT_HCA command failed: %08x\n", status);
1885 		hermon_hw_fini(state, cleanup);
1886 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_hca_fail");
1887 		/* This case is not the degraded one */
1888 		return (DDI_FAILURE);
1889 	}
1890 	cleanup = HERMON_DRV_CLEANUP_LEVEL11;
1891 
1892 	/* Enter the second phase of init for Hermon configuration/resources */
1893 	status = hermon_rsrc_init_phase2(state);
1894 	if (status != DDI_SUCCESS) {
1895 		hermon_hw_fini(state, cleanup);
1896 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1897 		    "hw_init_rsrcinit2_fail");
1898 		/* This case is not the degraded one */
1899 		return (DDI_FAILURE);
1900 	}
1901 	cleanup = HERMON_DRV_CLEANUP_LEVEL12;
1902 
1903 	/* Query the adapter via QUERY_ADAPTER */
1904 	status = hermon_cmn_query_cmd_post(state, QUERY_ADAPTER, 0, 0,
1905 	    &state->hs_adapter, sizeof (hermon_hw_queryadapter_t),
1906 	    HERMON_CMD_NOSLEEP_SPIN);
1907 	if (status != HERMON_CMD_SUCCESS) {
1908 		cmn_err(CE_NOTE, "Hermon: QUERY_ADAPTER command failed: %08x\n",
1909 		    status);
1910 		hermon_hw_fini(state, cleanup);
1911 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1912 		    "hw_init_query_adapter_fail");
1913 		/* This case is not the degraded one */
1914 		return (DDI_FAILURE);
1915 	}
1916 
1917 	/* Allocate protection domain (PD) for Hermon internal use */
1918 	status = hermon_pd_alloc(state, &state->hs_pdhdl_internal,
1919 	    HERMON_SLEEP);
1920 	if (status != DDI_SUCCESS) {
1921 		cmn_err(CE_NOTE, "failed to alloc internal PD\n");
1922 		hermon_hw_fini(state, cleanup);
1923 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1924 		    "hw_init_internal_pd_alloc_fail");
1925 		/* This case is not the degraded one */
1926 		return (DDI_FAILURE);
1927 	}
1928 	cleanup = HERMON_DRV_CLEANUP_LEVEL13;
1929 
1930 	/* Setup UAR page for kernel use */
1931 	status = hermon_internal_uarpg_init(state);
1932 	if (status != DDI_SUCCESS) {
1933 		cmn_err(CE_NOTE, "failed to setup internal UAR\n");
1934 		hermon_hw_fini(state, cleanup);
1935 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1936 		    "hw_init_internal_uarpg_alloc_fail");
1937 		/* This case is not the degraded one */
1938 		return (DDI_FAILURE);
1939 	}
1940 	cleanup = HERMON_DRV_CLEANUP_LEVEL14;
1941 
1942 	/* Query and initialize the Hermon interrupt/MSI information */
1943 	status = hermon_intr_or_msi_init(state);
1944 	if (status != DDI_SUCCESS) {
1945 		cmn_err(CE_NOTE, "failed to setup INTR/MSI\n");
1946 		hermon_hw_fini(state, cleanup);
1947 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1948 		    "hw_init_intr_or_msi_init_fail");
1949 		/* This case is not the degraded one */
1950 		return (DDI_FAILURE);
1951 	}
1952 	cleanup = HERMON_DRV_CLEANUP_LEVEL15;
1953 
1954 	status = hermon_isr_init(state);	/* set up the isr */
1955 	if (status != DDI_SUCCESS) {
1956 		cmn_err(CE_NOTE, "failed to init isr\n");
1957 		hermon_hw_fini(state, cleanup);
1958 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1959 		    "hw_init_isrinit_fail");
1960 		/* This case is not the degraded one */
1961 		return (DDI_FAILURE);
1962 	}
1963 	cleanup = HERMON_DRV_CLEANUP_LEVEL16;
1964 
1965 	/* Setup the event queues */
1966 	status = hermon_eq_init_all(state);
1967 	if (status != DDI_SUCCESS) {
1968 		cmn_err(CE_NOTE, "failed to init EQs\n");
1969 		hermon_hw_fini(state, cleanup);
1970 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1971 		    "hw_init_eqinitall_fail");
1972 		/* This case is not the degraded one */
1973 		return (DDI_FAILURE);
1974 	}
1975 	cleanup = HERMON_DRV_CLEANUP_LEVEL17;
1976 
1977 
1978 
1979 	/* Reserve contexts for QP0 and QP1 */
1980 	status = hermon_special_qp_contexts_reserve(state);
1981 	if (status != DDI_SUCCESS) {
1982 		cmn_err(CE_NOTE, "failed to init special QPs\n");
1983 		hermon_hw_fini(state, cleanup);
1984 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1985 		    "hw_init_rsrv_sqp_fail");
1986 		/* This case is not the degraded one */
1987 		return (DDI_FAILURE);
1988 	}
1989 	cleanup = HERMON_DRV_CLEANUP_LEVEL18;
1990 
1991 	/* Initialize for multicast group handling */
1992 	status = hermon_mcg_init(state);
1993 	if (status != DDI_SUCCESS) {
1994 		cmn_err(CE_NOTE, "failed to init multicast\n");
1995 		hermon_hw_fini(state, cleanup);
1996 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1997 		    "hw_init_mcg_init_fail");
1998 		/* This case is not the degraded one */
1999 		return (DDI_FAILURE);
2000 	}
2001 	cleanup = HERMON_DRV_CLEANUP_LEVEL19;
2002 
2003 	/* Initialize the Hermon IB port(s) */
2004 	status = hermon_hca_port_init(state);
2005 	if (status != DDI_SUCCESS) {
2006 		cmn_err(CE_NOTE, "failed to init HCA Port\n");
2007 		hermon_hw_fini(state, cleanup);
2008 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2009 		    "hw_init_hca_port_init_fail");
2010 		/* This case is not the degraded one */
2011 		return (DDI_FAILURE);
2012 	}
2013 
2014 	cleanup = HERMON_DRV_CLEANUP_ALL;
2015 
2016 	/* Determine NodeGUID and SystemImageGUID */
2017 	status = hermon_getnodeinfo_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
2018 	    &nodeinfo);
2019 	if (status != HERMON_CMD_SUCCESS) {
2020 		cmn_err(CE_NOTE, "GetNodeInfo command failed: %08x\n", status);
2021 		hermon_hw_fini(state, cleanup);
2022 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2023 		    "hw_init_getnodeinfo_cmd_fail");
2024 		/* This case is not the degraded one */
2025 		return (DDI_FAILURE);
2026 	}
2027 
2028 	/*
2029 	 * If the NodeGUID value was set in OBP properties, then we use that
2030 	 * value.  But we still print a message if the value we queried from
2031 	 * firmware does not match this value.
2032 	 *
2033 	 * Otherwise if OBP value is not set then we use the value from
2034 	 * firmware unconditionally.
2035 	 */
2036 	if (state->hs_cfg_profile->cp_nodeguid) {
2037 		state->hs_nodeguid   = state->hs_cfg_profile->cp_nodeguid;
2038 	} else {
2039 		state->hs_nodeguid = nodeinfo.NodeGUID;
2040 	}
2041 
2042 	if (state->hs_nodeguid != nodeinfo.NodeGUID) {
2043 		cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
2044 		    "does not match value set by device property");
2045 	}
2046 
2047 	/*
2048 	 * If the SystemImageGUID value was set in OBP properties, then we use
2049 	 * that value.  But we still print a message if the value we queried
2050 	 * from firmware does not match this value.
2051 	 *
2052 	 * Otherwise if OBP value is not set then we use the value from
2053 	 * firmware unconditionally.
2054 	 */
2055 	if (state->hs_cfg_profile->cp_sysimgguid) {
2056 		state->hs_sysimgguid = state->hs_cfg_profile->cp_sysimgguid;
2057 	} else {
2058 		state->hs_sysimgguid = nodeinfo.SystemImageGUID;
2059 	}
2060 
2061 	if (state->hs_sysimgguid != nodeinfo.SystemImageGUID) {
2062 		cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
2063 		    "does not match value set by device property");
2064 	}
2065 
2066 	/* Get NodeDescription */
2067 	status = hermon_getnodedesc_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
2068 	    (sm_nodedesc_t *)&state->hs_nodedesc);
2069 	if (status != HERMON_CMD_SUCCESS) {
2070 		cmn_err(CE_CONT, "GetNodeDesc command failed: %08x\n", status);
2071 		hermon_hw_fini(state, cleanup);
2072 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2073 		    "hw_init_getnodedesc_cmd_fail");
2074 		/* This case is not the degraded one */
2075 		return (DDI_FAILURE);
2076 	}
2077 
2078 	return (DDI_SUCCESS);
2079 }
2080 
2081 
2082 /*
2083  * hermon_hw_fini()
2084  *    Context: Only called from attach() and/or detach() path contexts
2085  */
2086 static void
2087 hermon_hw_fini(hermon_state_t *state, hermon_drv_cleanup_level_t cleanup)
2088 {
2089 	uint_t		num_ports;
2090 	int		i, status;
2091 
2092 
2093 	/*
2094 	 * JBDB - We might not want to run these returns in all cases of
2095 	 * Bad News. We should still attempt to free all of the DMA memory
2096 	 * resources...  This needs to be worked last, after all allocations
2097 	 * are implemented. For now, and possibly for later, this works.
2098 	 */
2099 
2100 	switch (cleanup) {
2101 	/*
2102 	 * If we add more driver initialization steps that should be cleaned
2103 	 * up here, we need to ensure that HERMON_DRV_CLEANUP_ALL is still the
2104 	 * first entry (i.e. corresponds to the last init step).
2105 	 */
2106 	case HERMON_DRV_CLEANUP_ALL:
2107 		/* Shutdown the Hermon IB port(s) */
2108 		num_ports = state->hs_cfg_profile->cp_num_ports;
2109 		(void) hermon_hca_ports_shutdown(state, num_ports);
2110 		/* FALLTHROUGH */
2111 
2112 	case HERMON_DRV_CLEANUP_LEVEL19:
2113 		/* Teardown resources used for multicast group handling */
2114 		hermon_mcg_fini(state);
2115 		/* FALLTHROUGH */
2116 
2117 	case HERMON_DRV_CLEANUP_LEVEL18:
2118 		/* Unreserve the special QP contexts */
2119 		hermon_special_qp_contexts_unreserve(state);
2120 		/* FALLTHROUGH */
2121 
2122 	case HERMON_DRV_CLEANUP_LEVEL17:
2123 		/*
2124 		 * Attempt to teardown all event queues (EQ).  If we fail
2125 		 * here then print a warning message and return.  Something
2126 		 * (either in HW or SW) has gone seriously wrong.
2127 		 */
2128 		status = hermon_eq_fini_all(state);
2129 		if (status != DDI_SUCCESS) {
2130 			HERMON_WARNING(state, "failed to teardown EQs");
2131 			return;
2132 		}
2133 		/* FALLTHROUGH */
2134 	case HERMON_DRV_CLEANUP_LEVEL16:
2135 		/* Teardown Hermon interrupts */
2136 		hermon_isr_fini(state);
2137 		/* FALLTHROUGH */
2138 
2139 	case HERMON_DRV_CLEANUP_LEVEL15:
2140 		status = hermon_intr_or_msi_fini(state);
2141 		if (status != DDI_SUCCESS) {
2142 			HERMON_WARNING(state, "failed to free intr/MSI");
2143 			return;
2144 		}
2145 		/* FALLTHROUGH */
2146 
2147 	case HERMON_DRV_CLEANUP_LEVEL14:
2148 		/* Free the resources for the Hermon internal UAR pages */
2149 		hermon_internal_uarpg_fini(state);
2150 		/* FALLTHROUGH */
2151 
2152 	case HERMON_DRV_CLEANUP_LEVEL13:
2153 		/*
2154 		 * Free the PD that was used internally by Hermon software.  If
2155 		 * we fail here then print a warning and return.  Something
2156 		 * (probably software-related, but perhaps HW) has gone wrong.
2157 		 */
2158 		status = hermon_pd_free(state, &state->hs_pdhdl_internal);
2159 		if (status != DDI_SUCCESS) {
2160 			HERMON_WARNING(state, "failed to free internal PD");
2161 			return;
2162 		}
2163 		/* FALLTHROUGH */
2164 
2165 	case HERMON_DRV_CLEANUP_LEVEL12:
2166 		/* Cleanup all the phase2 resources first */
2167 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_ALL);
2168 		/* FALLTHROUGH */
2169 
2170 	case HERMON_DRV_CLEANUP_LEVEL11:
2171 		/* LEVEL11 is after INIT_HCA */
2172 		/* FALLTHROUGH */
2173 
2174 
2175 	case HERMON_DRV_CLEANUP_LEVEL10:
2176 		/*
2177 		 * Unmap the ICM memory area with UNMAP_ICM command.
2178 		 */
2179 		status = hermon_unmap_icm_cmd_post(state, NULL);
2180 		if (status != DDI_SUCCESS) {
2181 			cmn_err(CE_WARN,
2182 			    "hermon_hw_fini: failed to unmap ICM\n");
2183 		}
2184 
2185 		/* Free the initial ICM DMA handles */
2186 		hermon_icm_dma_fini(state);
2187 
2188 		/* Free the ICM table structures */
2189 		hermon_icm_tables_fini(state);
2190 
2191 		/* Free the ICM table handles */
2192 		kmem_free(state->hs_icm, HERMON_NUM_ICM_RESOURCES *
2193 		    sizeof (hermon_icm_table_t));
2194 
2195 		/* FALLTHROUGH */
2196 
2197 	case HERMON_DRV_CLEANUP_LEVEL9:
2198 		/*
2199 		 * Unmap the ICM Aux memory area with UNMAP_ICM_AUX command.
2200 		 */
2201 		status = hermon_unmap_icm_aux_cmd_post(state);
2202 		if (status != HERMON_CMD_SUCCESS) {
2203 			cmn_err(CE_NOTE,
2204 			    "hermon_hw_fini: failed to unmap ICMA\n");
2205 		}
2206 		/* FALLTHROUGH */
2207 
2208 	case HERMON_DRV_CLEANUP_LEVEL8:
2209 		/*
2210 		 * Deallocate ICM Aux DMA memory.
2211 		 */
2212 		hermon_dma_free(&state->hs_icma_dma);
2213 		/* FALLTHROUGH */
2214 
2215 	case HERMON_DRV_CLEANUP_LEVEL7:
2216 		if (state->hs_fm_uarhdl) {
2217 			hermon_regs_map_free(state, &state->hs_fm_uarhdl);
2218 			state->hs_fm_uarhdl = NULL;
2219 		}
2220 
2221 		if (state->hs_reg_uarhdl) {
2222 			ddi_regs_map_free(&state->hs_reg_uarhdl);
2223 			state->hs_reg_uarhdl = NULL;
2224 		}
2225 
2226 		if (state->hs_bf_offset != 0 && state->hs_reg_bfhdl) {
2227 			ddi_regs_map_free(&state->hs_reg_bfhdl);
2228 			state->hs_reg_bfhdl = NULL;
2229 		}
2230 
2231 		for (i = 0; i < HERMON_MAX_PORTS; i++) {
2232 			if (state->hs_pkey[i]) {
2233 				kmem_free(state->hs_pkey[i], (1 <<
2234 				    state->hs_cfg_profile->cp_log_max_pkeytbl) *
2235 				    sizeof (ib_pkey_t));
2236 				state->hs_pkey[i] = NULL;
2237 			}
2238 			if (state->hs_guid[i]) {
2239 				kmem_free(state->hs_guid[i], (1 <<
2240 				    state->hs_cfg_profile->cp_log_max_gidtbl) *
2241 				    sizeof (ib_guid_t));
2242 				state->hs_guid[i] = NULL;
2243 			}
2244 		}
2245 		/* FALLTHROUGH */
2246 
2247 	case HERMON_DRV_CLEANUP_LEVEL6:
2248 		/*
2249 		 * Unmap the firmware memory area with UNMAP_FA command.
2250 		 */
2251 		status = hermon_unmap_fa_cmd_post(state);
2252 
2253 		if (status != HERMON_CMD_SUCCESS) {
2254 			cmn_err(CE_NOTE,
2255 			    "hermon_hw_fini: failed to unmap FW\n");
2256 		}
2257 
2258 		/*
2259 		 * Deallocate firmware DMA memory.
2260 		 */
2261 		hermon_dma_free(&state->hs_fw_dma);
2262 		/* FALLTHROUGH */
2263 
2264 	case HERMON_DRV_CLEANUP_LEVEL5:
2265 		/* stop the poll thread */
2266 		if (state->hs_fm_poll_thread) {
2267 			ddi_periodic_delete(state->hs_fm_poll_thread);
2268 			state->hs_fm_poll_thread = NULL;
2269 		}
2270 		/* FALLTHROUGH */
2271 
2272 	case HERMON_DRV_CLEANUP_LEVEL4:
2273 		/* Then cleanup the phase1 resources */
2274 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_PHASE1_COMPLETE);
2275 		/* FALLTHROUGH */
2276 
2277 	case HERMON_DRV_CLEANUP_LEVEL3:
2278 		/* Teardown any resources allocated for the config profile */
2279 		hermon_cfg_profile_fini(state);
2280 		/* FALLTHROUGH */
2281 
2282 	case HERMON_DRV_CLEANUP_LEVEL2:
2283 #ifdef HERMON_SUPPORTS_MSIX_BAR
2284 		/*
2285 		 * unmap 3rd BAR, MSIX BAR
2286 		 */
2287 		if (state->hs_reg_msihdl) {
2288 			ddi_regs_map_free(&state->hs_reg_msihdl);
2289 			state->hs_reg_msihdl = NULL;
2290 		}
2291 		/* FALLTHROUGH */
2292 #endif
2293 	case HERMON_DRV_CLEANUP_LEVEL1:
2294 	case HERMON_DRV_CLEANUP_LEVEL0:
2295 		/*
2296 		 * LEVEL1 and LEVEL0 resources are freed in
2297 		 * hermon_drv_fini2().
2298 		 */
2299 		break;
2300 
2301 	default:
2302 		HERMON_WARNING(state, "unexpected driver cleanup level");
2303 		return;
2304 	}
2305 }
2306 
2307 
2308 /*
2309  * hermon_soft_state_init()
2310  *    Context: Only called from attach() path context
2311  */
2312 static int
2313 hermon_soft_state_init(hermon_state_t *state)
2314 {
2315 	ibt_hca_attr_t		*hca_attr;
2316 	uint64_t		maxval, val;
2317 	ibt_hca_flags_t		caps = IBT_HCA_NO_FLAGS;
2318 	ibt_hca_flags2_t	caps2 = IBT_HCA2_NO_FLAGS;
2319 	int			status;
2320 	int			max_send_wqe_bytes;
2321 	int			max_recv_wqe_bytes;
2322 
2323 	/*
2324 	 * The ibc_hca_info_t struct is passed to the IBTF.  This is the
2325 	 * routine where we initialize it.  Many of the init values come from
2326 	 * either configuration variables or successful queries of the Hermon
2327 	 * hardware abilities
2328 	 */
2329 	state->hs_ibtfinfo.hca_ci_vers	= IBCI_V3;
2330 	state->hs_ibtfinfo.hca_dip	= state->hs_dip;
2331 	state->hs_ibtfinfo.hca_handle	= (ibc_hca_hdl_t)state;
2332 	state->hs_ibtfinfo.hca_ops	= &hermon_ibc_ops;
2333 
2334 	hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
2335 	state->hs_ibtfinfo.hca_attr = hca_attr;
2336 
2337 	hca_attr->hca_fw_major_version = state->hs_fw.fw_rev_major;
2338 	hca_attr->hca_fw_minor_version = state->hs_fw.fw_rev_minor;
2339 	hca_attr->hca_fw_micro_version = state->hs_fw.fw_rev_subminor;
2340 
2341 	/* CQ interrupt moderation maximums - each limited to 16 bits */
2342 	hca_attr->hca_max_cq_mod_count = 0xFFFF;
2343 	hca_attr->hca_max_cq_mod_usec = 0xFFFF;
2344 
2345 	/* CQ relocation to other EQs - change when multiple MSI-Xs are used */
2346 	hca_attr->hca_max_cq_handlers = 1;
2347 
2348 	/*
2349 	 * Determine HCA capabilities:
2350 	 * No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
2351 	 *    IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
2352 	 *    or IBT_HCA_SHUTDOWN_PORT
2353 	 * But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
2354 	 *    IBT_HCA_RNR_NAK, IBT_HCA_CURRENT_QP_STATE, IBT_HCA_PORT_UP,
2355 	 *    IBT_HCA_SRQ, IBT_HCA_RESIZE_SRQ and IBT_HCA_FMR are always
2356 	 *    supported
2357 	 * All other features are conditionally supported, depending on the
2358 	 *    status return by the Hermon HCA in QUERY_DEV_LIM.
2359 	 */
2360 	if (state->hs_devlim.ud_multi) {
2361 		caps |= IBT_HCA_UD_MULTICAST;
2362 	}
2363 	if (state->hs_devlim.atomic) {
2364 		caps |= IBT_HCA_ATOMICS_HCA;
2365 	}
2366 	if (state->hs_devlim.apm) {
2367 		caps |= IBT_HCA_AUTO_PATH_MIG;
2368 	}
2369 	if (state->hs_devlim.pkey_v) {
2370 		caps |= IBT_HCA_PKEY_CNTR;
2371 	}
2372 	if (state->hs_devlim.qkey_v) {
2373 		caps |= IBT_HCA_QKEY_CNTR;
2374 	}
2375 	if (state->hs_devlim.ipoib_cksm) {
2376 		caps |= IBT_HCA_CKSUM_FULL;
2377 		caps2 |= IBT_HCA2_IP_CLASS;
2378 	}
2379 	if (state->hs_devlim.mod_wr_srq) {
2380 		caps |= IBT_HCA_RESIZE_SRQ;
2381 	}
2382 	if (state->hs_devlim.lif) {
2383 		caps |= IBT_HCA_LOCAL_INVAL_FENCE;
2384 	}
2385 	if (state->hs_devlim.reserved_lkey) {
2386 		caps2 |= IBT_HCA2_RES_LKEY;
2387 		hca_attr->hca_reserved_lkey = state->hs_devlim.rsv_lkey;
2388 	}
2389 	if (state->hs_devlim.local_inv && state->hs_devlim.remote_inv &&
2390 	    state->hs_devlim.fast_reg_wr) {	/* fw needs to be >= 2.6.636 */
2391 		if (state->hs_fw.fw_rev_major > 2)
2392 			caps2 |= IBT_HCA2_MEM_MGT_EXT;
2393 		else if (state->hs_fw.fw_rev_major == 2)
2394 			if (state->hs_fw.fw_rev_minor > 6)
2395 				caps2 |= IBT_HCA2_MEM_MGT_EXT;
2396 			else if (state->hs_fw.fw_rev_minor == 6)
2397 				if (state->hs_fw.fw_rev_subminor >= 636)
2398 					caps2 |= IBT_HCA2_MEM_MGT_EXT;
2399 	}
2400 	if (state->hs_devlim.mps) {
2401 		caps |= IBT_HCA_ZERO_BASED_VA;
2402 	}
2403 	if (state->hs_devlim.zb) {
2404 		caps |= IBT_HCA_MULT_PAGE_SZ_MR;
2405 	}
2406 	caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
2407 	    IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
2408 	    IBT_HCA_PORT_UP | IBT_HCA_RC_SRQ | IBT_HCA_UD_SRQ | IBT_HCA_FMR);
2409 
2410 	if (state->hs_devlim.log_max_gso_sz) {
2411 		hca_attr->hca_max_lso_size =
2412 		    (1 << state->hs_devlim.log_max_gso_sz);
2413 		/* 64 = ctrl & datagram seg, 4 = LSO seg, 16 = 1 SGL */
2414 		hca_attr->hca_max_lso_hdr_size =
2415 		    state->hs_devlim.max_desc_sz_sq - (64 + 4 + 16);
2416 	}
2417 
2418 	caps |= IBT_HCA_WQE_SIZE_INFO;
2419 	max_send_wqe_bytes = state->hs_devlim.max_desc_sz_sq;
2420 	max_recv_wqe_bytes = state->hs_devlim.max_desc_sz_rq;
2421 	hca_attr->hca_ud_send_sgl_sz = (max_send_wqe_bytes / 16) - 4;
2422 	hca_attr->hca_conn_send_sgl_sz = (max_send_wqe_bytes / 16) - 1;
2423 	hca_attr->hca_conn_rdma_sgl_overhead = 1;
2424 	hca_attr->hca_recv_sgl_sz = max_recv_wqe_bytes / 16;
2425 
2426 	/* We choose not to support "inline" unless it improves performance */
2427 	hca_attr->hca_max_inline_size = 0;
2428 	hca_attr->hca_ud_send_inline_sz = 0;
2429 	hca_attr->hca_conn_send_inline_sz = 0;
2430 	hca_attr->hca_conn_rdmaw_inline_overhead = 4;
2431 
2432 	hca_attr->hca_flags = caps;
2433 	hca_attr->hca_flags2 = caps2;
2434 
2435 	/*
2436 	 * Set hca_attr's IDs
2437 	 */
2438 	hca_attr->hca_vendor_id	 = state->hs_vendor_id;
2439 	hca_attr->hca_device_id	 = state->hs_device_id;
2440 	hca_attr->hca_version_id = state->hs_revision_id;
2441 
2442 	/*
2443 	 * Determine number of available QPs and max QP size.  Number of
2444 	 * available QPs is determined by subtracting the number of
2445 	 * "reserved QPs" (i.e. reserved for firmware use) from the
2446 	 * total number configured.
2447 	 */
2448 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2449 	hca_attr->hca_max_qp = val - ((uint64_t)1 <<
2450 	    state->hs_devlim.log_rsvd_qp);
2451 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_qp_sz);
2452 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_qp_sz);
2453 	if (val > maxval) {
2454 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2455 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2456 		    "soft_state_init_maxqpsz_toobig_fail");
2457 		return (DDI_FAILURE);
2458 	}
2459 	/* we need to reduce this by the max space needed for headroom */
2460 	hca_attr->hca_max_qp_sz = (uint_t)val - (HERMON_QP_OH_SIZE >>
2461 	    HERMON_QP_WQE_LOG_MINIMUM) - 1;
2462 
2463 	/*
2464 	 * Determine max scatter-gather size in WQEs. The HCA has split
2465 	 * the max sgl into rec'v Q and send Q values. Use the least.
2466 	 *
2467 	 * This is mainly useful for legacy clients.  Smart clients
2468 	 * such as IPoIB will use the IBT_HCA_WQE_SIZE_INFO sgl info.
2469 	 */
2470 	if (state->hs_devlim.max_sg_rq <= state->hs_devlim.max_sg_sq) {
2471 		maxval = state->hs_devlim.max_sg_rq;
2472 	} else {
2473 		maxval = state->hs_devlim.max_sg_sq;
2474 	}
2475 	val	= state->hs_cfg_profile->cp_wqe_max_sgl;
2476 	if (val > maxval) {
2477 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2478 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2479 		    "soft_state_init_toomanysgl_fail");
2480 		return (DDI_FAILURE);
2481 	}
2482 	/* If the rounded value for max SGL is too large, cap it */
2483 	if (state->hs_cfg_profile->cp_wqe_real_max_sgl > maxval) {
2484 		state->hs_cfg_profile->cp_wqe_real_max_sgl = (uint32_t)maxval;
2485 		val = maxval;
2486 	} else {
2487 		val = state->hs_cfg_profile->cp_wqe_real_max_sgl;
2488 	}
2489 
2490 	hca_attr->hca_max_sgl	 = (uint_t)val;
2491 	hca_attr->hca_max_rd_sgl = 0;	/* zero because RD is unsupported */
2492 
2493 	/*
2494 	 * Determine number of available CQs and max CQ size. Number of
2495 	 * available CQs is determined by subtracting the number of
2496 	 * "reserved CQs" (i.e. reserved for firmware use) from the
2497 	 * total number configured.
2498 	 */
2499 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_cq);
2500 	hca_attr->hca_max_cq = val - ((uint64_t)1 <<
2501 	    state->hs_devlim.log_rsvd_cq);
2502 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_cq_sz);
2503 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_cq_sz) - 1;
2504 	if (val > maxval) {
2505 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2506 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2507 		    "soft_state_init_maxcqsz_toobig_fail");
2508 		return (DDI_FAILURE);
2509 	}
2510 	hca_attr->hca_max_cq_sz = (uint_t)val;
2511 
2512 	/*
2513 	 * Determine number of available SRQs and max SRQ size. Number of
2514 	 * available SRQs is determined by subtracting the number of
2515 	 * "reserved SRQs" (i.e. reserved for firmware use) from the
2516 	 * total number configured.
2517 	 */
2518 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_srq);
2519 	hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
2520 	    state->hs_devlim.log_rsvd_srq);
2521 	maxval  = ((uint64_t)1 << state->hs_devlim.log_max_srq_sz);
2522 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_srq_sz);
2523 
2524 	if (val > maxval) {
2525 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2526 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2527 		    "soft_state_init_maxsrqsz_toobig_fail");
2528 		return (DDI_FAILURE);
2529 	}
2530 	hca_attr->hca_max_srqs_sz = (uint_t)val;
2531 
2532 	val	= hca_attr->hca_recv_sgl_sz - 1; /* SRQ has a list link */
2533 	maxval	= state->hs_devlim.max_sg_rq - 1;
2534 	if (val > maxval) {
2535 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2536 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2537 		    "soft_state_init_toomanysrqsgl_fail");
2538 		return (DDI_FAILURE);
2539 	}
2540 	hca_attr->hca_max_srq_sgl = (uint_t)val;
2541 
2542 	/*
2543 	 * Determine supported HCA page sizes
2544 	 * XXX
2545 	 * For now we simply return the system pagesize as the only supported
2546 	 * pagesize
2547 	 */
2548 	hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
2549 	    IBT_PAGE_4K);
2550 
2551 	/*
2552 	 * Determine number of available MemReg, MemWin, and their max size.
2553 	 * Number of available MRs and MWs is determined by subtracting
2554 	 * the number of "reserved MPTs" (i.e. reserved for firmware use)
2555 	 * from the total number configured for each.
2556 	 */
2557 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_dmpt);
2558 	hca_attr->hca_max_memr	  = val - ((uint64_t)1 <<
2559 	    state->hs_devlim.log_rsvd_dmpt);
2560 	hca_attr->hca_max_mem_win = state->hs_devlim.mem_win ? (val -
2561 	    ((uint64_t)1 << state->hs_devlim.log_rsvd_dmpt)) : 0;
2562 	maxval	= state->hs_devlim.log_max_mrw_sz;
2563 	val	= state->hs_cfg_profile->cp_log_max_mrw_sz;
2564 	if (val > maxval) {
2565 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2566 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2567 		    "soft_state_init_maxmrwsz_toobig_fail");
2568 		return (DDI_FAILURE);
2569 	}
2570 	hca_attr->hca_max_memr_len = ((uint64_t)1 << val);
2571 
2572 	/* Determine RDMA/Atomic properties */
2573 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_rdb);
2574 	hca_attr->hca_max_rsc = (uint_t)val;
2575 	val = state->hs_cfg_profile->cp_hca_max_rdma_in_qp;
2576 	hca_attr->hca_max_rdma_in_qp  = (uint8_t)val;
2577 	val = state->hs_cfg_profile->cp_hca_max_rdma_out_qp;
2578 	hca_attr->hca_max_rdma_out_qp = (uint8_t)val;
2579 	hca_attr->hca_max_rdma_in_ee  = 0;
2580 	hca_attr->hca_max_rdma_out_ee = 0;
2581 
2582 	/*
2583 	 * Determine maximum number of raw IPv6 and Ether QPs.  Set to 0
2584 	 * because neither type of raw QP is supported
2585 	 */
2586 	hca_attr->hca_max_ipv6_qp  = 0;
2587 	hca_attr->hca_max_ether_qp = 0;
2588 
2589 	/* Determine max number of MCGs and max QP-per-MCG */
2590 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2591 	hca_attr->hca_max_mcg_qps   = (uint_t)val;
2592 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_mcg);
2593 	hca_attr->hca_max_mcg	    = (uint_t)val;
2594 	val = state->hs_cfg_profile->cp_num_qp_per_mcg;
2595 	hca_attr->hca_max_qp_per_mcg = (uint_t)val;
2596 
2597 	/* Determine max number partitions (i.e. PKeys) */
2598 	maxval	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2599 	    state->hs_queryport.log_max_pkey);
2600 	val	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2601 	    state->hs_cfg_profile->cp_log_max_pkeytbl);
2602 
2603 	if (val > maxval) {
2604 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2605 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2606 		    "soft_state_init_toomanypkey_fail");
2607 		return (DDI_FAILURE);
2608 	}
2609 	hca_attr->hca_max_partitions = (uint16_t)val;
2610 
2611 	/* Determine number of ports */
2612 	maxval = state->hs_devlim.num_ports;
2613 	val = state->hs_cfg_profile->cp_num_ports;
2614 	if ((val > maxval) || (val == 0)) {
2615 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2616 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2617 		    "soft_state_init_toomanyports_fail");
2618 		return (DDI_FAILURE);
2619 	}
2620 	hca_attr->hca_nports = (uint8_t)val;
2621 
2622 	/* Copy NodeGUID and SystemImageGUID from softstate */
2623 	hca_attr->hca_node_guid = state->hs_nodeguid;
2624 	hca_attr->hca_si_guid	= state->hs_sysimgguid;
2625 
2626 	/*
2627 	 * Determine local ACK delay.  Use the value suggested by the Hermon
2628 	 * hardware (from the QUERY_DEV_CAP command)
2629 	 */
2630 	hca_attr->hca_local_ack_delay = state->hs_devlim.ca_ack_delay;
2631 
2632 	/* Determine max SGID table and PKey table sizes */
2633 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_gidtbl);
2634 	hca_attr->hca_max_port_sgid_tbl_sz = (uint_t)val;
2635 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_pkeytbl);
2636 	hca_attr->hca_max_port_pkey_tbl_sz = (uint16_t)val;
2637 
2638 	/* Determine max number of PDs */
2639 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_pd);
2640 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_pd);
2641 	if (val > maxval) {
2642 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2643 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2644 		    "soft_state_init_toomanypd_fail");
2645 		return (DDI_FAILURE);
2646 	}
2647 	hca_attr->hca_max_pd = (uint_t)val;
2648 
2649 	/* Determine max number of Address Handles (NOT IN ARBEL or HERMON) */
2650 	hca_attr->hca_max_ah = 0;
2651 
2652 	/* No RDDs or EECs (since Reliable Datagram is not supported) */
2653 	hca_attr->hca_max_rdd = 0;
2654 	hca_attr->hca_max_eec = 0;
2655 
2656 	/* Initialize lock for reserved UAR page access */
2657 	mutex_init(&state->hs_uar_lock, NULL, MUTEX_DRIVER,
2658 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2659 
2660 	/* Initialize the flash fields */
2661 	state->hs_fw_flashstarted = 0;
2662 	mutex_init(&state->hs_fw_flashlock, NULL, MUTEX_DRIVER,
2663 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2664 
2665 	/* Initialize the lock for the info ioctl */
2666 	mutex_init(&state->hs_info_lock, NULL, MUTEX_DRIVER,
2667 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2668 
2669 	/* Initialize the AVL tree for QP number support */
2670 	hermon_qpn_avl_init(state);
2671 
2672 	/* Initialize the kstat info structure */
2673 	status = hermon_kstat_init(state);
2674 	if (status != DDI_SUCCESS) {
2675 		hermon_qpn_avl_fini(state);
2676 		mutex_destroy(&state->hs_info_lock);
2677 		mutex_destroy(&state->hs_fw_flashlock);
2678 		mutex_destroy(&state->hs_uar_lock);
2679 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2680 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2681 		    "soft_state_init_kstatinit_fail");
2682 		return (DDI_FAILURE);
2683 	}
2684 
2685 	return (DDI_SUCCESS);
2686 }
2687 
2688 
2689 /*
2690  * hermon_soft_state_fini()
2691  *    Context: Called only from detach() path context
2692  */
2693 static void
2694 hermon_soft_state_fini(hermon_state_t *state)
2695 {
2696 
2697 	/* Teardown the kstat info */
2698 	hermon_kstat_fini(state);
2699 
2700 	/* Teardown the AVL tree for QP number support */
2701 	hermon_qpn_avl_fini(state);
2702 
2703 	/* Free up info ioctl mutex */
2704 	mutex_destroy(&state->hs_info_lock);
2705 
2706 	/* Free up flash mutex */
2707 	mutex_destroy(&state->hs_fw_flashlock);
2708 
2709 	/* Free up the UAR page access mutex */
2710 	mutex_destroy(&state->hs_uar_lock);
2711 
2712 	/* Free up the hca_attr struct */
2713 	kmem_free(state->hs_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));
2714 
2715 }
2716 
2717 /*
2718  * hermon_icm_config_setup()
2719  *    Context: Only called from attach() path context
2720  */
2721 static int
2722 hermon_icm_config_setup(hermon_state_t *state,
2723     hermon_hw_initqueryhca_t *inithca)
2724 {
2725 	hermon_hw_querydevlim_t	*devlim;
2726 	hermon_cfg_profile_t	*cfg;
2727 	hermon_icm_table_t	*icm_p[HERMON_NUM_ICM_RESOURCES];
2728 	hermon_icm_table_t	*icm;
2729 	hermon_icm_table_t	*tmp;
2730 	uint64_t		icm_addr;
2731 	uint64_t		icm_size;
2732 	int			status, i, j;
2733 
2734 
2735 	/* Bring in local devlims, cfg_profile and hs_icm table list */
2736 	devlim = &state->hs_devlim;
2737 	cfg = state->hs_cfg_profile;
2738 	icm = state->hs_icm;
2739 
2740 	/*
2741 	 * Assign each ICM table's entry size from data in the devlims,
2742 	 * except for RDB and MCG sizes, which are not returned in devlims
2743 	 * but do have a fixed size, and the UAR context entry size, which
2744 	 * we determine. For this, we use the "cp_num_pgs_per_uce" value
2745 	 * from our hs_cfg_profile.
2746 	 */
2747 	icm[HERMON_CMPT].object_size	= devlim->cmpt_entry_sz;
2748 	icm[HERMON_CMPT_QPC].object_size	= devlim->cmpt_entry_sz;
2749 	icm[HERMON_CMPT_SRQC].object_size	= devlim->cmpt_entry_sz;
2750 	icm[HERMON_CMPT_CQC].object_size	= devlim->cmpt_entry_sz;
2751 	icm[HERMON_CMPT_EQC].object_size	= devlim->cmpt_entry_sz;
2752 	icm[HERMON_MTT].object_size	= devlim->mtt_entry_sz;
2753 	icm[HERMON_DMPT].object_size	= devlim->dmpt_entry_sz;
2754 	icm[HERMON_QPC].object_size	= devlim->qpc_entry_sz;
2755 	icm[HERMON_CQC].object_size	= devlim->cqc_entry_sz;
2756 	icm[HERMON_SRQC].object_size	= devlim->srq_entry_sz;
2757 	icm[HERMON_EQC].object_size	= devlim->eqc_entry_sz;
2758 	icm[HERMON_RDB].object_size	= devlim->rdmardc_entry_sz *
2759 	    cfg->cp_hca_max_rdma_in_qp;
2760 	icm[HERMON_MCG].object_size	= HERMON_MCGMEM_SZ(state);
2761 	icm[HERMON_ALTC].object_size	= devlim->altc_entry_sz;
2762 	icm[HERMON_AUXC].object_size	= devlim->aux_entry_sz;
2763 
2764 	/* Assign each ICM table's log2 number of entries */
2765 	icm[HERMON_CMPT].log_num_entries = cfg->cp_log_num_cmpt;
2766 	icm[HERMON_CMPT_QPC].log_num_entries = cfg->cp_log_num_qp;
2767 	icm[HERMON_CMPT_SRQC].log_num_entries = cfg->cp_log_num_srq;
2768 	icm[HERMON_CMPT_CQC].log_num_entries = cfg->cp_log_num_cq;
2769 	icm[HERMON_CMPT_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
2770 	icm[HERMON_MTT].log_num_entries	= cfg->cp_log_num_mtt;
2771 	icm[HERMON_DMPT].log_num_entries = cfg->cp_log_num_dmpt;
2772 	icm[HERMON_QPC].log_num_entries	= cfg->cp_log_num_qp;
2773 	icm[HERMON_SRQC].log_num_entries = cfg->cp_log_num_srq;
2774 	icm[HERMON_CQC].log_num_entries	= cfg->cp_log_num_cq;
2775 	icm[HERMON_EQC].log_num_entries	= HERMON_NUM_EQ_SHIFT;
2776 	icm[HERMON_RDB].log_num_entries	= cfg->cp_log_num_qp;
2777 	icm[HERMON_MCG].log_num_entries	= cfg->cp_log_num_mcg;
2778 	icm[HERMON_ALTC].log_num_entries = cfg->cp_log_num_qp;
2779 	icm[HERMON_AUXC].log_num_entries = cfg->cp_log_num_qp;
2780 
2781 	/* Initialize the ICM tables */
2782 	hermon_icm_tables_init(state);
2783 
2784 	/*
2785 	 * ICM tables must be aligned on their size in the ICM address
2786 	 * space. So, here we order the tables from largest total table
2787 	 * size to the smallest. All tables are a power of 2 in size, so
2788 	 * this will ensure that all tables are aligned on their own size
2789 	 * without wasting space in the ICM.
2790 	 *
2791 	 * In order to easily set the ICM addresses without needing to
2792 	 * worry about the ordering of our table indices as relates to
2793 	 * the hermon_rsrc_type_t enum, we will use a list of pointers
2794 	 * representing the tables for the sort, then assign ICM addresses
2795 	 * below using it.
2796 	 */
2797 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2798 		icm_p[i] = &icm[i];
2799 	}
2800 	for (i = HERMON_NUM_ICM_RESOURCES; i > 0; i--) {
2801 		switch (i) {
2802 		case HERMON_CMPT_QPC:
2803 		case HERMON_CMPT_SRQC:
2804 		case HERMON_CMPT_CQC:
2805 		case HERMON_CMPT_EQC:
2806 			continue;
2807 		}
2808 		for (j = 1; j < i; j++) {
2809 			if (icm_p[j]->table_size > icm_p[j - 1]->table_size) {
2810 				tmp		= icm_p[j];
2811 				icm_p[j]	= icm_p[j - 1];
2812 				icm_p[j - 1]	= tmp;
2813 			}
2814 		}
2815 	}
2816 
2817 	/* Initialize the ICM address and ICM size */
2818 	icm_addr = icm_size = 0;
2819 
2820 	/*
2821 	 * Set the ICM base address of each table, using our sorted
2822 	 * list of pointers from above.
2823 	 */
2824 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2825 		j = icm_p[i]->icm_type;
2826 		switch (j) {
2827 		case HERMON_CMPT_QPC:
2828 		case HERMON_CMPT_SRQC:
2829 		case HERMON_CMPT_CQC:
2830 		case HERMON_CMPT_EQC:
2831 			continue;
2832 		}
2833 		if (icm[j].table_size) {
2834 			/*
2835 			 * Set the ICM base address in the table, save the
2836 			 * ICM offset in the rsrc pool and increment the
2837 			 * total ICM allocation.
2838 			 */
2839 			icm[j].icm_baseaddr = icm_addr;
2840 			if (hermon_verbose) {
2841 				IBTF_DPRINTF_L2("ICMADDR", "rsrc %x @ %p"
2842 				    " size %llx", j, icm[j].icm_baseaddr,
2843 				    icm[j].table_size);
2844 			}
2845 			icm_size += icm[j].table_size;
2846 		}
2847 
2848 		/* Verify that we don't exceed maximum ICM size */
2849 		if (icm_size > devlim->max_icm_size) {
2850 			/* free the ICM table memory resources */
2851 			hermon_icm_tables_fini(state);
2852 			cmn_err(CE_WARN, "ICM configuration exceeds maximum "
2853 			    "configuration: max (0x%lx) requested (0x%lx)\n",
2854 			    (ulong_t)devlim->max_icm_size, (ulong_t)icm_size);
2855 			HERMON_ATTACH_MSG(state->hs_attach_buf,
2856 			    "icm_config_toobig_fail");
2857 			return (DDI_FAILURE);
2858 		}
2859 
2860 		/* assign address to the 4 pieces of the CMPT */
2861 		if (j == HERMON_CMPT) {
2862 			uint64_t cmpt_size = icm[j].table_size >> 2;
2863 #define	init_cmpt_icm_baseaddr(rsrc, indx)				\
2864 	icm[rsrc].icm_baseaddr	= icm_addr + (indx * cmpt_size);
2865 			init_cmpt_icm_baseaddr(HERMON_CMPT_QPC, 0);
2866 			init_cmpt_icm_baseaddr(HERMON_CMPT_SRQC, 1);
2867 			init_cmpt_icm_baseaddr(HERMON_CMPT_CQC, 2);
2868 			init_cmpt_icm_baseaddr(HERMON_CMPT_EQC, 3);
2869 		}
2870 
2871 		/* Increment the ICM address for the next table */
2872 		icm_addr += icm[j].table_size;
2873 	}
2874 
2875 	/* Populate the structure for the INIT_HCA command */
2876 	hermon_inithca_set(state, inithca);
2877 
2878 	/*
2879 	 * Prior to invoking INIT_HCA, we must have ICM memory in place
2880 	 * for the reserved objects in each table. We will allocate and map
2881 	 * this initial ICM memory here. Note that given the assignment
2882 	 * of span_size above, tables that are smaller or equal in total
2883 	 * size to the default span_size will be mapped in full.
2884 	 */
2885 	status = hermon_icm_dma_init(state);
2886 	if (status != DDI_SUCCESS) {
2887 		/* free the ICM table memory resources */
2888 		hermon_icm_tables_fini(state);
2889 		HERMON_WARNING(state, "Failed to allocate initial ICM");
2890 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2891 		    "icm_config_dma_init_fail");
2892 		return (DDI_FAILURE);
2893 	}
2894 
2895 	return (DDI_SUCCESS);
2896 }
2897 
2898 /*
2899  * hermon_inithca_set()
2900  *    Context: Only called from attach() path context
2901  */
2902 static void
2903 hermon_inithca_set(hermon_state_t *state, hermon_hw_initqueryhca_t *inithca)
2904 {
2905 	hermon_cfg_profile_t	*cfg;
2906 	hermon_icm_table_t	*icm;
2907 	int			i;
2908 
2909 
2910 	/* Populate the INIT_HCA structure */
2911 	icm = state->hs_icm;
2912 	cfg = state->hs_cfg_profile;
2913 
2914 	/* set version */
2915 	inithca->version = 0x02;	/* PRM 0.36 */
2916 	/* set cacheline - log2 in 16-byte chunks */
2917 	inithca->log2_cacheline = 0x2;	/* optimized for 64 byte cache */
2918 
2919 	/* we need to update the inithca info with thie UAR info too */
2920 	inithca->uar.log_max_uars = highbit(cfg->cp_log_num_uar);
2921 	inithca->uar.uar_pg_sz = PAGESHIFT - HERMON_PAGESHIFT;
2922 
2923 	/* Set endianess */
2924 #ifdef	_LITTLE_ENDIAN
2925 	inithca->big_endian	= 0;
2926 #else
2927 	inithca->big_endian	= 1;
2928 #endif
2929 
2930 	/* Port Checking is on by default */
2931 	inithca->udav_port_chk	= HERMON_UDAV_PORTCHK_ENABLED;
2932 
2933 	/* Enable IPoIB checksum */
2934 	if (state->hs_devlim.ipoib_cksm)
2935 		inithca->chsum_en = 1;
2936 
2937 	/* Set each ICM table's attributes */
2938 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2939 		switch (icm[i].icm_type) {
2940 		case HERMON_CMPT:
2941 			inithca->tpt.cmpt_baseaddr = icm[i].icm_baseaddr;
2942 			break;
2943 
2944 		case HERMON_MTT:
2945 			inithca->tpt.mtt_baseaddr = icm[i].icm_baseaddr;
2946 			break;
2947 
2948 		case HERMON_DMPT:
2949 			inithca->tpt.dmpt_baseaddr = icm[i].icm_baseaddr;
2950 			inithca->tpt.log_dmpt_sz   = icm[i].log_num_entries;
2951 			inithca->tpt.pgfault_rnr_to = 0; /* just in case */
2952 			break;
2953 
2954 		case HERMON_QPC:
2955 			inithca->context.log_num_qp = icm[i].log_num_entries;
2956 			inithca->context.qpc_baseaddr_h =
2957 			    icm[i].icm_baseaddr >> 32;
2958 			inithca->context.qpc_baseaddr_l =
2959 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2960 			break;
2961 
2962 		case HERMON_CQC:
2963 			inithca->context.log_num_cq = icm[i].log_num_entries;
2964 			inithca->context.cqc_baseaddr_h =
2965 			    icm[i].icm_baseaddr >> 32;
2966 			inithca->context.cqc_baseaddr_l =
2967 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2968 			break;
2969 
2970 		case HERMON_SRQC:
2971 			inithca->context.log_num_srq = icm[i].log_num_entries;
2972 			inithca->context.srqc_baseaddr_h =
2973 			    icm[i].icm_baseaddr >> 32;
2974 			inithca->context.srqc_baseaddr_l =
2975 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2976 			break;
2977 
2978 		case HERMON_EQC:
2979 			inithca->context.log_num_eq = icm[i].log_num_entries;
2980 			inithca->context.eqc_baseaddr_h =
2981 			    icm[i].icm_baseaddr >> 32;
2982 			inithca->context.eqc_baseaddr_l =
2983 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2984 			break;
2985 
2986 		case HERMON_RDB:
2987 			inithca->context.rdmardc_baseaddr_h =
2988 			    icm[i].icm_baseaddr >> 32;
2989 			inithca->context.rdmardc_baseaddr_l =
2990 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
2991 			inithca->context.log_num_rdmardc =
2992 			    cfg->cp_log_num_rdb - cfg->cp_log_num_qp;
2993 			break;
2994 
2995 		case HERMON_MCG:
2996 			inithca->multi.mc_baseaddr    = icm[i].icm_baseaddr;
2997 			inithca->multi.log_mc_tbl_sz  = icm[i].log_num_entries;
2998 			inithca->multi.log_mc_tbl_ent =
2999 			    highbit(HERMON_MCGMEM_SZ(state)) - 1;
3000 			inithca->multi.log_mc_tbl_hash_sz =
3001 			    cfg->cp_log_num_mcg_hash;
3002 			inithca->multi.mc_hash_fn = HERMON_MCG_DEFAULT_HASH_FN;
3003 			break;
3004 
3005 		case HERMON_ALTC:
3006 			inithca->context.altc_baseaddr = icm[i].icm_baseaddr;
3007 			break;
3008 
3009 		case HERMON_AUXC:
3010 			inithca->context.auxc_baseaddr = icm[i].icm_baseaddr;
3011 			break;
3012 
3013 		default:
3014 			break;
3015 
3016 		}
3017 	}
3018 
3019 }
3020 
3021 /*
3022  * hermon_icm_tables_init()
3023  *    Context: Only called from attach() path context
3024  *
3025  * Dynamic ICM breaks the various ICM tables into "span_size" chunks
3026  * to enable allocation of backing memory on demand.  Arbel used a
3027  * fixed size ARBEL_ICM_SPAN_SIZE (initially was 512KB) as the
3028  * span_size for all ICM chunks.  Hermon has other considerations,
3029  * so the span_size used differs from Arbel.
3030  *
3031  * The basic considerations for why Hermon differs are:
3032  *
3033  *	1) ICM memory is in units of HERMON pages.
3034  *
3035  *	2) The AUXC table is approximately 1 byte per QP.
3036  *
3037  *	3) ICM memory for AUXC, ALTC, and RDB is allocated when
3038  *	the ICM memory for the corresponding QPC is allocated.
3039  *
3040  *	4) ICM memory for the CMPT corresponding to the various primary
3041  *	resources (QPC, SRQC, CQC, and EQC) is allocated when the ICM
3042  *	memory for the primary resource is allocated.
3043  *
3044  * One HERMON page (4KB) would typically map 4K QPs worth of AUXC.
3045  * So, the minimum chunk for the various QPC related ICM memory should
3046  * all be allocated to support the 4K QPs.  Currently, this means the
3047  * amount of memory for the various QP chunks is:
3048  *
3049  *	QPC	256*4K bytes
3050  *	RDB	128*4K bytes
3051  *	CMPT	 64*4K bytes
3052  *	ALTC	 64*4K bytes
3053  *	AUXC	  1*4K bytes
3054  *
3055  * The span_size chosen for the QP resource is 4KB of AUXC entries,
3056  * or 1 HERMON_PAGESIZE worth, which is the minimum ICM mapping size.
3057  *
3058  * Other ICM resources can have their span_size be more arbitrary.
3059  * This is 4K (HERMON_ICM_SPAN), except for MTTs because they are tiny.
3060  */
3061 
3062 /* macro to make the code below cleaner */
3063 #define	init_dependent(rsrc, dep)				\
3064 	icm[dep].span		= icm[rsrc].span;		\
3065 	icm[dep].num_spans	= icm[rsrc].num_spans;		\
3066 	icm[dep].split_shift	= icm[rsrc].split_shift;	\
3067 	icm[dep].span_mask	= icm[rsrc].span_mask;		\
3068 	icm[dep].span_shift	= icm[rsrc].span_shift;		\
3069 	icm[dep].rsrc_mask	= icm[rsrc].rsrc_mask;		\
3070 	if (hermon_verbose) {					\
3071 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3072 		    "rsrc (0x%x) size (0x%lx) span (0x%x) "	\
3073 		    "num_spans (0x%x)", dep, icm[dep].table_size, \
3074 		    icm[dep].span, icm[dep].num_spans);		\
3075 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3076 		    "span_shift (0x%x) split_shift (0x%x)",	\
3077 		    icm[dep].span_shift, icm[dep].split_shift);	\
3078 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3079 		    "span_mask (0x%x)  rsrc_mask   (0x%x)",	\
3080 		    icm[dep].span_mask, icm[dep].rsrc_mask);	\
3081 	}
3082 
3083 static void
3084 hermon_icm_tables_init(hermon_state_t *state)
3085 {
3086 	hermon_icm_table_t	*icm;
3087 	int			i, k;
3088 	uint32_t		per_split;
3089 
3090 
3091 	icm = state->hs_icm;
3092 
3093 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3094 		icm[i].icm_type		= i;
3095 		icm[i].num_entries	= 1 << icm[i].log_num_entries;
3096 		icm[i].log_object_size	= highbit(icm[i].object_size) - 1;
3097 		icm[i].table_size	= icm[i].num_entries <<
3098 		    icm[i].log_object_size;
3099 
3100 		/* deal with "dependent" resource types */
3101 		switch (i) {
3102 		case HERMON_AUXC:
3103 #ifdef HERMON_FW_WORKAROUND
3104 			icm[i].table_size = 0x80000000ull;
3105 			/* FALLTHROUGH */
3106 #endif
3107 		case HERMON_CMPT_QPC:
3108 		case HERMON_RDB:
3109 		case HERMON_ALTC:
3110 			init_dependent(HERMON_QPC, i);
3111 			continue;
3112 		case HERMON_CMPT_SRQC:
3113 			init_dependent(HERMON_SRQC, i);
3114 			continue;
3115 		case HERMON_CMPT_CQC:
3116 			init_dependent(HERMON_CQC, i);
3117 			continue;
3118 		case HERMON_CMPT_EQC:
3119 			init_dependent(HERMON_EQC, i);
3120 			continue;
3121 		}
3122 
3123 		icm[i].span = HERMON_ICM_SPAN;	/* default #rsrc's in 1 span */
3124 		if (i == HERMON_MTT) /* Alloc enough MTTs to map 256MB */
3125 			icm[i].span = HERMON_ICM_SPAN * 16;
3126 		icm[i].num_spans = icm[i].num_entries / icm[i].span;
3127 		if (icm[i].num_spans == 0) {
3128 			icm[i].span = icm[i].num_entries;
3129 			per_split = 1;
3130 			icm[i].num_spans = icm[i].num_entries / icm[i].span;
3131 		} else {
3132 			per_split = icm[i].num_spans / HERMON_ICM_SPLIT;
3133 			if (per_split == 0) {
3134 				per_split = 1;
3135 			}
3136 		}
3137 		if (hermon_verbose)
3138 			IBTF_DPRINTF_L2("ICM", "rsrc %x  span %x  num_spans %x",
3139 			    i, icm[i].span, icm[i].num_spans);
3140 
3141 		/*
3142 		 * Ensure a minimum table size of an ICM page, and a
3143 		 * maximum span size of the ICM table size.  This ensures
3144 		 * that we don't have less than an ICM page to map, which is
3145 		 * impossible, and that we will map an entire table at
3146 		 * once if it's total size is less than the span size.
3147 		 */
3148 		icm[i].table_size = max(icm[i].table_size, HERMON_PAGESIZE);
3149 
3150 		icm[i].span_shift = 0;
3151 		for (k = icm[i].span; k != 1; k >>= 1)
3152 			icm[i].span_shift++;
3153 		icm[i].split_shift = icm[i].span_shift;
3154 		for (k = per_split; k != 1; k >>= 1)
3155 			icm[i].split_shift++;
3156 		icm[i].span_mask = (1 << icm[i].split_shift) -
3157 		    (1 << icm[i].span_shift);
3158 		icm[i].rsrc_mask = (1 << icm[i].span_shift) - 1;
3159 
3160 
3161 		/* Initialize the table lock */
3162 		mutex_init(&icm[i].icm_table_lock, NULL, MUTEX_DRIVER,
3163 		    DDI_INTR_PRI(state->hs_intrmsi_pri));
3164 		cv_init(&icm[i].icm_table_cv, NULL, CV_DRIVER, NULL);
3165 
3166 		if (hermon_verbose) {
3167 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3168 			    "rsrc (0x%x) size (0x%lx)", i, icm[i].table_size);
3169 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3170 			    "span (0x%x) num_spans (0x%x)",
3171 			    icm[i].span, icm[i].num_spans);
3172 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3173 			    "span_shift (0x%x) split_shift (0x%x)",
3174 			    icm[i].span_shift, icm[i].split_shift);
3175 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3176 			    "span_mask (0x%x)  rsrc_mask   (0x%x)",
3177 			    icm[i].span_mask, icm[i].rsrc_mask);
3178 		}
3179 	}
3180 
3181 }
3182 
3183 /*
3184  * hermon_icm_tables_fini()
3185  *    Context: Only called from attach() path context
3186  *
3187  * Clean up all icm_tables.  Free the bitmap and dma_info arrays.
3188  */
3189 static void
3190 hermon_icm_tables_fini(hermon_state_t *state)
3191 {
3192 	hermon_icm_table_t	*icm;
3193 	int			nspans;
3194 	int			i, j;
3195 
3196 
3197 	icm = state->hs_icm;
3198 
3199 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3200 
3201 		mutex_enter(&icm[i].icm_table_lock);
3202 		nspans = icm[i].num_spans;
3203 
3204 		for (j = 0; j < HERMON_ICM_SPLIT; j++) {
3205 			if (icm[i].icm_dma[j])
3206 				/* Free the ICM DMA slots */
3207 				kmem_free(icm[i].icm_dma[j],
3208 				    nspans * sizeof (hermon_dma_info_t));
3209 
3210 			if (icm[i].icm_bitmap[j])
3211 				/* Free the table bitmap */
3212 				kmem_free(icm[i].icm_bitmap[j],
3213 				    (nspans + 7) / 8);
3214 		}
3215 		/* Destroy the table lock */
3216 		cv_destroy(&icm[i].icm_table_cv);
3217 		mutex_exit(&icm[i].icm_table_lock);
3218 		mutex_destroy(&icm[i].icm_table_lock);
3219 	}
3220 
3221 }
3222 
3223 /*
3224  * hermon_icm_dma_init()
3225  *    Context: Only called from attach() path context
3226  */
3227 static int
3228 hermon_icm_dma_init(hermon_state_t *state)
3229 {
3230 	hermon_icm_table_t	*icm;
3231 	hermon_rsrc_type_t	type;
3232 	int			status;
3233 
3234 
3235 	/*
3236 	 * This routine will allocate initial ICM DMA resources for ICM
3237 	 * tables that have reserved ICM objects. This is the only routine
3238 	 * where we should have to allocate ICM outside of hermon_rsrc_alloc().
3239 	 * We need to allocate ICM here explicitly, rather than in
3240 	 * hermon_rsrc_alloc(), because we've not yet completed the resource
3241 	 * pool initialization. When the resource pools are initialized
3242 	 * (in hermon_rsrc_init_phase2(), see hermon_rsrc.c for more
3243 	 * information), resource preallocations will be invoked to match
3244 	 * the ICM allocations seen here. We will then be able to use the
3245 	 * normal allocation path.  Note we don't need to set a refcnt on
3246 	 * these initial allocations because that will be done in the calls
3247 	 * to hermon_rsrc_alloc() from hermon_hw_entries_init() for the
3248 	 * "prealloc" objects (see hermon_rsrc.c for more information).
3249 	 */
3250 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3251 
3252 		/* ICM for these is allocated within hermon_icm_alloc() */
3253 		switch (type) {
3254 		case HERMON_CMPT:
3255 		case HERMON_CMPT_QPC:
3256 		case HERMON_CMPT_SRQC:
3257 		case HERMON_CMPT_CQC:
3258 		case HERMON_CMPT_EQC:
3259 		case HERMON_AUXC:
3260 		case HERMON_ALTC:
3261 		case HERMON_RDB:
3262 			continue;
3263 		}
3264 
3265 		icm = &state->hs_icm[type];
3266 
3267 		mutex_enter(&icm->icm_table_lock);
3268 		status = hermon_icm_alloc(state, type, 0, 0);
3269 		mutex_exit(&icm->icm_table_lock);
3270 		if (status != DDI_SUCCESS) {
3271 			while (type--) {
3272 				icm = &state->hs_icm[type];
3273 				mutex_enter(&icm->icm_table_lock);
3274 				hermon_icm_free(state, type, 0, 0);
3275 				mutex_exit(&icm->icm_table_lock);
3276 			}
3277 			return (DDI_FAILURE);
3278 		}
3279 
3280 		if (hermon_verbose) {
3281 			IBTF_DPRINTF_L2("hermon", "hermon_icm_dma_init: "
3282 			    "table (0x%x) index (0x%x) allocated", type, 0);
3283 		}
3284 	}
3285 
3286 	return (DDI_SUCCESS);
3287 }
3288 
3289 /*
3290  * hermon_icm_dma_fini()
3291  *    Context: Only called from attach() path context
3292  *
3293  * ICM has been completely unmapped.  We just free the memory here.
3294  */
3295 static void
3296 hermon_icm_dma_fini(hermon_state_t *state)
3297 {
3298 	hermon_icm_table_t	*icm;
3299 	hermon_dma_info_t	*dma_info;
3300 	hermon_rsrc_type_t	type;
3301 	int			index1, index2;
3302 
3303 
3304 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3305 		icm = &state->hs_icm[type];
3306 		for (index1 = 0; index1 < HERMON_ICM_SPLIT; index1++) {
3307 			dma_info = icm->icm_dma[index1];
3308 			if (dma_info == NULL)
3309 				continue;
3310 			for (index2 = 0; index2 < icm->num_spans; index2++) {
3311 				if (dma_info[index2].dma_hdl)
3312 					hermon_dma_free(&dma_info[index2]);
3313 				dma_info[index2].dma_hdl = NULL;
3314 			}
3315 		}
3316 	}
3317 
3318 }
3319 
3320 /*
3321  * hermon_hca_port_init()
3322  *    Context: Only called from attach() path context
3323  */
3324 static int
3325 hermon_hca_port_init(hermon_state_t *state)
3326 {
3327 	hermon_hw_set_port_t	*portinits, *initport;
3328 	hermon_cfg_profile_t	*cfgprof;
3329 	uint_t			num_ports;
3330 	int			i = 0, status;
3331 	uint64_t		maxval, val;
3332 	uint64_t		sysimgguid, nodeguid, portguid;
3333 
3334 
3335 	cfgprof = state->hs_cfg_profile;
3336 
3337 	/* Get number of HCA ports */
3338 	num_ports = cfgprof->cp_num_ports;
3339 
3340 	/* Allocate space for Hermon set port  struct(s) */
3341 	portinits = (hermon_hw_set_port_t *)kmem_zalloc(num_ports *
3342 	    sizeof (hermon_hw_set_port_t), KM_SLEEP);
3343 
3344 
3345 
3346 	/* Post commands to initialize each Hermon HCA port */
3347 	/*
3348 	 * In Hermon, the process is different than in previous HCAs.
3349 	 * Here, you have to:
3350 	 *	QUERY_PORT - to get basic information from the HCA
3351 	 *	set the fields accordingly
3352 	 *	SET_PORT - to change/set everything as desired
3353 	 *	INIT_PORT - to bring the port up
3354 	 *
3355 	 * Needs to be done for each port in turn
3356 	 */
3357 
3358 	for (i = 0; i < num_ports; i++) {
3359 		bzero(&state->hs_queryport, sizeof (hermon_hw_query_port_t));
3360 		status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0,
3361 		    (i + 1), &state->hs_queryport,
3362 		    sizeof (hermon_hw_query_port_t), HERMON_CMD_NOSLEEP_SPIN);
3363 		if (status != HERMON_CMD_SUCCESS) {
3364 			cmn_err(CE_CONT, "Hermon: QUERY_PORT (port %02d) "
3365 			    "command failed: %08x\n", i + 1, status);
3366 			goto init_ports_fail;
3367 		}
3368 		initport = &portinits[i];
3369 		state->hs_initport = &portinits[i];
3370 
3371 		bzero(initport, sizeof (hermon_hw_query_port_t));
3372 
3373 		/*
3374 		 * Determine whether we need to override the firmware's
3375 		 * default SystemImageGUID setting.
3376 		 */
3377 		sysimgguid = cfgprof->cp_sysimgguid;
3378 		if (sysimgguid != 0) {
3379 			initport->sig		= 1;
3380 			initport->sys_img_guid	= sysimgguid;
3381 		}
3382 
3383 		/*
3384 		 * Determine whether we need to override the firmware's
3385 		 * default NodeGUID setting.
3386 		 */
3387 		nodeguid = cfgprof->cp_nodeguid;
3388 		if (nodeguid != 0) {
3389 			initport->ng		= 1;
3390 			initport->node_guid	= nodeguid;
3391 		}
3392 
3393 		/*
3394 		 * Determine whether we need to override the firmware's
3395 		 * default PortGUID setting.
3396 		 */
3397 		portguid = cfgprof->cp_portguid[i];
3398 		if (portguid != 0) {
3399 			initport->g0		= 1;
3400 			initport->guid0		= portguid;
3401 		}
3402 
3403 		/* Validate max MTU size */
3404 		maxval  = state->hs_queryport.ib_mtu;
3405 		val	= cfgprof->cp_max_mtu;
3406 		if (val > maxval) {
3407 			goto init_ports_fail;
3408 		}
3409 
3410 		/* Set mtu_cap to 4096 bytes */
3411 		initport->mmc = 1;	/* set the change bit */
3412 		initport->mtu_cap = 5;	/* for 4096 bytes */
3413 
3414 		/* Validate the max port width */
3415 		maxval  = state->hs_queryport.ib_port_wid;
3416 		val	= cfgprof->cp_max_port_width;
3417 		if (val > maxval) {
3418 			goto init_ports_fail;
3419 		}
3420 
3421 		/* Validate max VL cap size */
3422 		maxval  = state->hs_queryport.max_vl;
3423 		val	= cfgprof->cp_max_vlcap;
3424 		if (val > maxval) {
3425 			goto init_ports_fail;
3426 		}
3427 
3428 		/* Since we're doing mtu_cap, cut vl_cap down */
3429 		initport->mvc = 1;	/* set this change bit */
3430 		initport->vl_cap = 3;	/* 3 means vl0-vl3, 4 total */
3431 
3432 		/* Validate max GID table size */
3433 		maxval  = ((uint64_t)1 << state->hs_queryport.log_max_gid);
3434 		val	= ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
3435 		if (val > maxval) {
3436 			goto init_ports_fail;
3437 		}
3438 		initport->max_guid = (uint16_t)val;
3439 		initport->mg = 1;
3440 
3441 		/* Validate max PKey table size */
3442 		maxval	= ((uint64_t)1 << state->hs_queryport.log_max_pkey);
3443 		val	= ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
3444 		if (val > maxval) {
3445 			goto init_ports_fail;
3446 		}
3447 		initport->max_pkey = (uint16_t)val;
3448 		initport->mp = 1;
3449 		/*
3450 		 * Post the SET_PORT cmd to Hermon firmware. This sets
3451 		 * the parameters of the port.
3452 		 */
3453 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3454 		    HERMON_CMD_NOSLEEP_SPIN);
3455 		if (status != HERMON_CMD_SUCCESS) {
3456 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3457 			    "failed: %08x\n", i + 1, status);
3458 			goto init_ports_fail;
3459 		}
3460 		/* issue another SET_PORT cmd - performance fix/workaround */
3461 		/* XXX - need to discuss with Mellanox */
3462 		bzero(initport, sizeof (hermon_hw_query_port_t));
3463 		initport->cap_mask = 0x02500868;
3464 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3465 		    HERMON_CMD_NOSLEEP_SPIN);
3466 		if (status != HERMON_CMD_SUCCESS) {
3467 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3468 			    "failed: %08x\n", i + 1, status);
3469 			goto init_ports_fail;
3470 		}
3471 	}
3472 
3473 	/*
3474 	 * Finally, do the INIT_PORT for each port in turn
3475 	 * When this command completes, the corresponding Hermon port
3476 	 * will be physically "Up" and initialized.
3477 	 */
3478 	for (i = 0; i < num_ports; i++) {
3479 		status = hermon_init_port_cmd_post(state, i + 1,
3480 		    HERMON_CMD_NOSLEEP_SPIN);
3481 		if (status != HERMON_CMD_SUCCESS) {
3482 			cmn_err(CE_CONT, "Hermon: INIT_PORT (port %02d) "
3483 			    "comman failed: %08x\n", i + 1, status);
3484 			goto init_ports_fail;
3485 		}
3486 	}
3487 
3488 	/* Free up the memory for Hermon port init struct(s), return success */
3489 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3490 	return (DDI_SUCCESS);
3491 
3492 init_ports_fail:
3493 	/*
3494 	 * Free up the memory for Hermon port init struct(s), shutdown any
3495 	 * successfully initialized ports, and return failure
3496 	 */
3497 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3498 	(void) hermon_hca_ports_shutdown(state, i);
3499 
3500 	return (DDI_FAILURE);
3501 }
3502 
3503 
3504 /*
3505  * hermon_hca_ports_shutdown()
3506  *    Context: Only called from attach() and/or detach() path contexts
3507  */
3508 static int
3509 hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init)
3510 {
3511 	int	i, status;
3512 
3513 	/*
3514 	 * Post commands to shutdown all init'd Hermon HCA ports.  Note: if
3515 	 * any of these commands fail for any reason, it would be entirely
3516 	 * unexpected and probably indicative a serious problem (HW or SW).
3517 	 * Although we do return void from this function, this type of failure
3518 	 * should not go unreported.  That is why we have the warning message.
3519 	 */
3520 	for (i = 0; i < num_init; i++) {
3521 		status = hermon_close_port_cmd_post(state, i + 1,
3522 		    HERMON_CMD_NOSLEEP_SPIN);
3523 		if (status != HERMON_CMD_SUCCESS) {
3524 			HERMON_WARNING(state, "failed to shutdown HCA port");
3525 			return (status);
3526 		}
3527 	}
3528 	return (HERMON_CMD_SUCCESS);
3529 }
3530 
3531 
3532 /*
3533  * hermon_internal_uarpg_init
3534  *    Context: Only called from attach() path context
3535  */
3536 static int
3537 hermon_internal_uarpg_init(hermon_state_t *state)
3538 {
3539 	int	status;
3540 	hermon_dbr_info_t 	*info;
3541 
3542 	/*
3543 	 * Allocate the UAR page for kernel use. This UAR page is
3544 	 * the privileged UAR page through which all kernel generated
3545 	 * doorbells will be rung. There are a number of UAR pages
3546 	 * reserved by hardware at the front of the UAR BAR, indicated
3547 	 * by DEVCAP.num_rsvd_uar, which we have already allocated. So,
3548 	 * the kernel page, or UAR page index num_rsvd_uar, will be
3549 	 * allocated here for kernel use.
3550 	 */
3551 
3552 	status = hermon_rsrc_alloc(state, HERMON_UARPG, 1, HERMON_SLEEP,
3553 	    &state->hs_uarkpg_rsrc);
3554 	if (status != DDI_SUCCESS) {
3555 		return (DDI_FAILURE);
3556 	}
3557 
3558 	/* Setup pointer to kernel UAR page */
3559 	state->hs_uar = (hermon_hw_uar_t *)state->hs_uarkpg_rsrc->hr_addr;
3560 
3561 	/* need to set up DBr tracking as well */
3562 	status = hermon_dbr_page_alloc(state, &info);
3563 	if (status != DDI_SUCCESS) {
3564 		return (DDI_FAILURE);
3565 	}
3566 	state->hs_kern_dbr = info;
3567 	return (DDI_SUCCESS);
3568 }
3569 
3570 
3571 /*
3572  * hermon_internal_uarpg_fini
3573  *    Context: Only called from attach() and/or detach() path contexts
3574  */
3575 static void
3576 hermon_internal_uarpg_fini(hermon_state_t *state)
3577 {
3578 	/* Free up Hermon UAR page #1 (kernel driver doorbells) */
3579 	hermon_rsrc_free(state, &state->hs_uarkpg_rsrc);
3580 }
3581 
3582 
3583 /*
3584  * hermon_special_qp_contexts_reserve()
3585  *    Context: Only called from attach() path context
3586  */
3587 static int
3588 hermon_special_qp_contexts_reserve(hermon_state_t *state)
3589 {
3590 	hermon_rsrc_t	*qp0_rsrc, *qp1_rsrc, *qp_resvd;
3591 	int		status;
3592 
3593 	/* Initialize the lock used for special QP rsrc management */
3594 	mutex_init(&state->hs_spec_qplock, NULL, MUTEX_DRIVER,
3595 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3596 
3597 	/*
3598 	 * Reserve contexts for QP0.  These QP contexts will be setup to
3599 	 * act as aliases for the real QP0.  Note: We are required to grab
3600 	 * two QPs (one per port) even if we are operating in single-port
3601 	 * mode.
3602 	 */
3603 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3604 	    HERMON_SLEEP, &qp0_rsrc);
3605 	if (status != DDI_SUCCESS) {
3606 		mutex_destroy(&state->hs_spec_qplock);
3607 		return (DDI_FAILURE);
3608 	}
3609 	state->hs_spec_qp0 = qp0_rsrc;
3610 
3611 	/*
3612 	 * Reserve contexts for QP1.  These QP contexts will be setup to
3613 	 * act as aliases for the real QP1.  Note: We are required to grab
3614 	 * two QPs (one per port) even if we are operating in single-port
3615 	 * mode.
3616 	 */
3617 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3618 	    HERMON_SLEEP, &qp1_rsrc);
3619 	if (status != DDI_SUCCESS) {
3620 		hermon_rsrc_free(state, &qp0_rsrc);
3621 		mutex_destroy(&state->hs_spec_qplock);
3622 		return (DDI_FAILURE);
3623 	}
3624 	state->hs_spec_qp1 = qp1_rsrc;
3625 
3626 	status = hermon_rsrc_alloc(state, HERMON_QPC, 4,
3627 	    HERMON_SLEEP, &qp_resvd);
3628 	if (status != DDI_SUCCESS) {
3629 		hermon_rsrc_free(state, &qp1_rsrc);
3630 		hermon_rsrc_free(state, &qp0_rsrc);
3631 		mutex_destroy(&state->hs_spec_qplock);
3632 		return (DDI_FAILURE);
3633 	}
3634 	state->hs_spec_qp_unused = qp_resvd;
3635 
3636 	return (DDI_SUCCESS);
3637 }
3638 
3639 
3640 /*
3641  * hermon_special_qp_contexts_unreserve()
3642  *    Context: Only called from attach() and/or detach() path contexts
3643  */
3644 static void
3645 hermon_special_qp_contexts_unreserve(hermon_state_t *state)
3646 {
3647 
3648 	/* Unreserve contexts for spec_qp_unused */
3649 	hermon_rsrc_free(state, &state->hs_spec_qp_unused);
3650 
3651 	/* Unreserve contexts for QP1 */
3652 	hermon_rsrc_free(state, &state->hs_spec_qp1);
3653 
3654 	/* Unreserve contexts for QP0 */
3655 	hermon_rsrc_free(state, &state->hs_spec_qp0);
3656 
3657 	/* Destroy the lock used for special QP rsrc management */
3658 	mutex_destroy(&state->hs_spec_qplock);
3659 
3660 }
3661 
3662 
3663 /*
3664  * hermon_sw_reset()
3665  *    Context: Currently called only from attach() path context
3666  */
3667 static int
3668 hermon_sw_reset(hermon_state_t *state)
3669 {
3670 	ddi_acc_handle_t	hdl = hermon_get_pcihdl(state);
3671 	ddi_acc_handle_t	cmdhdl = hermon_get_cmdhdl(state);
3672 	uint32_t		reset_delay;
3673 	int			status, i;
3674 	uint32_t		sem;
3675 	uint_t			offset;
3676 	uint32_t		data32;		/* for devctl & linkctl */
3677 	int			loopcnt;
3678 
3679 	/* initialize the FMA retry loop */
3680 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
3681 	hermon_pio_init(fm_loop_cnt2, fm_status2, fm_test2);
3682 
3683 	/*
3684 	 * If the configured software reset delay is set to zero, then we
3685 	 * will not attempt a software reset of the Hermon device.
3686 	 */
3687 	reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
3688 	if (reset_delay == 0) {
3689 		return (DDI_SUCCESS);
3690 	}
3691 
3692 	/* the FMA retry loop starts. */
3693 	hermon_pio_start(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3694 	    fm_test);
3695 	hermon_pio_start(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3696 	    fm_test2);
3697 
3698 	/* Query the PCI capabilities of the HCA device */
3699 	/* but don't process the VPD until after reset */
3700 	status = hermon_pci_capability_list(state, hdl);
3701 	if (status != DDI_SUCCESS) {
3702 		cmn_err(CE_NOTE, "failed to get pci capabilities list(0x%x)\n",
3703 		    status);
3704 		return (DDI_FAILURE);
3705 	}
3706 
3707 	/*
3708 	 * Read all PCI config info (reg0...reg63).  Note: According to the
3709 	 * Hermon software reset application note, we should not read or
3710 	 * restore the values in reg22 and reg23.
3711 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
3712 	 * register LAST, and technically, you need to restore the
3713 	 * PCIE Capability "device control" and "link control" (word-sized,
3714 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
3715 	 * We hold off restoring the command register - offset 0x4 - till last
3716 	 */
3717 
3718 	/* 1st, wait for the semaphore assure accessibility - per PRM */
3719 	status = -1;
3720 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
3721 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
3722 		if (sem == 0) {
3723 			status = 0;
3724 			break;
3725 		}
3726 		drv_usecwait(1);
3727 	}
3728 
3729 	/* Check if timeout happens */
3730 	if (status == -1) {
3731 		/*
3732 		 * Remove this acc handle from Hermon, then log
3733 		 * the error.
3734 		 */
3735 		hermon_pci_config_teardown(state, &hdl);
3736 
3737 		cmn_err(CE_WARN, "hermon_sw_reset timeout: "
3738 		    "failed to get the semaphore(0x%p)\n",
3739 		    (void *)state->hs_cmd_regs.sw_semaphore);
3740 
3741 		hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_NON_FATAL);
3742 		return (DDI_FAILURE);
3743 	}
3744 
3745 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3746 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3747 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3748 			state->hs_cfg_data[i]  = pci_config_get32(hdl, i << 2);
3749 		}
3750 	}
3751 
3752 	/*
3753 	 * Perform the software reset (by writing 1 at offset 0xF0010)
3754 	 */
3755 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
3756 
3757 	/*
3758 	 * This delay is required so as not to cause a panic here. If the
3759 	 * device is accessed too soon after reset it will not respond to
3760 	 * config cycles, causing a Master Abort and panic.
3761 	 */
3762 	drv_usecwait(reset_delay);
3763 
3764 	/*
3765 	 * Poll waiting for the device to finish resetting.
3766 	 */
3767 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
3768 	while ((pci_config_get32(hdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
3769 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
3770 		if (--loopcnt == 0)
3771 			break;	/* just in case, break and go on */
3772 	}
3773 	if (loopcnt == 0)
3774 		cmn_err(CE_CONT, "!Never see VEND_ID - read == %X",
3775 		    pci_config_get32(hdl, 0));
3776 
3777 	/*
3778 	 * Restore the config info
3779 	 */
3780 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3781 		if (i == 1) continue;	/* skip the status/ctrl reg */
3782 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3783 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3784 			pci_config_put32(hdl, i << 2, state->hs_cfg_data[i]);
3785 		}
3786 	}
3787 
3788 	/*
3789 	 * PCI Express Capability - we saved during capability list, and
3790 	 * we'll restore them here.
3791 	 */
3792 	offset = state->hs_pci_cap_offset;
3793 	data32 = state->hs_pci_cap_devctl;
3794 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
3795 	data32 = state->hs_pci_cap_lnkctl;
3796 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
3797 
3798 	pci_config_put32(hdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
3799 
3800 	/* the FMA retry loop ends. */
3801 	hermon_pio_end(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3802 	    fm_test2);
3803 	hermon_pio_end(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3804 	    fm_test);
3805 
3806 	return (DDI_SUCCESS);
3807 
3808 pio_error2:
3809 	/* fall through */
3810 pio_error:
3811 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_NON_FATAL);
3812 	return (DDI_FAILURE);
3813 }
3814 
3815 
3816 /*
3817  * hermon_mcg_init()
3818  *    Context: Only called from attach() path context
3819  */
3820 static int
3821 hermon_mcg_init(hermon_state_t *state)
3822 {
3823 	uint_t		mcg_tmp_sz;
3824 
3825 
3826 	/*
3827 	 * Allocate space for the MCG temporary copy buffer.  This is
3828 	 * used by the Attach/Detach Multicast Group code
3829 	 */
3830 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3831 	state->hs_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);
3832 
3833 	/*
3834 	 * Initialize the multicast group mutex.  This ensures atomic
3835 	 * access to add, modify, and remove entries in the multicast
3836 	 * group hash lists.
3837 	 */
3838 	mutex_init(&state->hs_mcglock, NULL, MUTEX_DRIVER,
3839 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3840 
3841 	return (DDI_SUCCESS);
3842 }
3843 
3844 
3845 /*
3846  * hermon_mcg_fini()
3847  *    Context: Only called from attach() and/or detach() path contexts
3848  */
3849 static void
3850 hermon_mcg_fini(hermon_state_t *state)
3851 {
3852 	uint_t		mcg_tmp_sz;
3853 
3854 
3855 	/* Free up the space used for the MCG temporary copy buffer */
3856 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3857 	kmem_free(state->hs_mcgtmp, mcg_tmp_sz);
3858 
3859 	/* Destroy the multicast group mutex */
3860 	mutex_destroy(&state->hs_mcglock);
3861 
3862 }
3863 
3864 
3865 /*
3866  * hermon_fw_version_check()
3867  *    Context: Only called from attach() path context
3868  */
3869 static int
3870 hermon_fw_version_check(hermon_state_t *state)
3871 {
3872 
3873 	uint_t	hermon_fw_ver_major;
3874 	uint_t	hermon_fw_ver_minor;
3875 	uint_t	hermon_fw_ver_subminor;
3876 
3877 #ifdef FMA_TEST
3878 	if (hermon_test_num == -1) {
3879 		return (DDI_FAILURE);
3880 	}
3881 #endif
3882 
3883 	/*
3884 	 * Depending on which version of driver we have attached, and which
3885 	 * HCA we've attached, the firmware version checks will be different.
3886 	 * We set up the comparison values for both Arbel and Sinai HCAs.
3887 	 */
3888 	switch (state->hs_operational_mode) {
3889 	case HERMON_HCA_MODE:
3890 		hermon_fw_ver_major = HERMON_FW_VER_MAJOR;
3891 		hermon_fw_ver_minor = HERMON_FW_VER_MINOR;
3892 		hermon_fw_ver_subminor = HERMON_FW_VER_SUBMINOR;
3893 		break;
3894 
3895 	default:
3896 		return (DDI_FAILURE);
3897 	}
3898 
3899 	/*
3900 	 * If FW revision major number is less than acceptable,
3901 	 * return failure, else if greater return success.  If
3902 	 * the major numbers are equal than check the minor number
3903 	 */
3904 	if (state->hs_fw.fw_rev_major < hermon_fw_ver_major) {
3905 		return (DDI_FAILURE);
3906 	} else if (state->hs_fw.fw_rev_major > hermon_fw_ver_major) {
3907 		return (DDI_SUCCESS);
3908 	}
3909 
3910 	/*
3911 	 * Do the same check as above, except for minor revision numbers
3912 	 * If the minor numbers are equal than check the subminor number
3913 	 */
3914 	if (state->hs_fw.fw_rev_minor < hermon_fw_ver_minor) {
3915 		return (DDI_FAILURE);
3916 	} else if (state->hs_fw.fw_rev_minor > hermon_fw_ver_minor) {
3917 		return (DDI_SUCCESS);
3918 	}
3919 
3920 	/*
3921 	 * Once again we do the same check as above, except for the subminor
3922 	 * revision number.  If the subminor numbers are equal here, then
3923 	 * these are the same firmware version, return success
3924 	 */
3925 	if (state->hs_fw.fw_rev_subminor < hermon_fw_ver_subminor) {
3926 		return (DDI_FAILURE);
3927 	} else if (state->hs_fw.fw_rev_subminor > hermon_fw_ver_subminor) {
3928 		return (DDI_SUCCESS);
3929 	}
3930 
3931 	return (DDI_SUCCESS);
3932 }
3933 
3934 
3935 /*
3936  * hermon_device_info_report()
3937  *    Context: Only called from attach() path context
3938  */
3939 static void
3940 hermon_device_info_report(hermon_state_t *state)
3941 {
3942 
3943 	cmn_err(CE_CONT, "?hermon%d: FW ver: %04d.%04d.%04d, "
3944 	    "HW rev: %02d\n", state->hs_instance, state->hs_fw.fw_rev_major,
3945 	    state->hs_fw.fw_rev_minor, state->hs_fw.fw_rev_subminor,
3946 	    state->hs_revision_id);
3947 	cmn_err(CE_CONT, "?hermon%d: %64s (0x%016" PRIx64 ")\n",
3948 	    state->hs_instance, state->hs_nodedesc, state->hs_nodeguid);
3949 
3950 }
3951 
3952 
3953 /*
3954  * hermon_pci_capability_list()
3955  *    Context: Only called from attach() path context
3956  */
3957 static int
3958 hermon_pci_capability_list(hermon_state_t *state, ddi_acc_handle_t hdl)
3959 {
3960 	uint_t		offset, data;
3961 	uint32_t	data32;
3962 
3963 	state->hs_pci_cap_offset = 0;		/* make sure it's cleared */
3964 
3965 	/*
3966 	 * Check for the "PCI Capabilities" bit in the "Status Register".
3967 	 * Bit 4 in this register indicates the presence of a "PCI
3968 	 * Capabilities" list.
3969 	 *
3970 	 * PCI-Express requires this bit to be set to 1.
3971 	 */
3972 	data = pci_config_get16(hdl, 0x06);
3973 	if ((data & 0x10) == 0) {
3974 		return (DDI_FAILURE);
3975 	}
3976 
3977 	/*
3978 	 * Starting from offset 0x34 in PCI config space, find the
3979 	 * head of "PCI capabilities" list, and walk the list.  If
3980 	 * capabilities of a known type are encountered (e.g.
3981 	 * "PCI-X Capability"), then call the appropriate handler
3982 	 * function.
3983 	 */
3984 	offset = pci_config_get8(hdl, 0x34);
3985 	while (offset != 0x0) {
3986 		data = pci_config_get8(hdl, offset);
3987 		/*
3988 		 * Check for known capability types.  Hermon has the
3989 		 * following:
3990 		 *    o Power Mgmt	 (0x02)
3991 		 *    o VPD Capability   (0x03)
3992 		 *    o PCI-E Capability (0x10)
3993 		 *    o MSIX Capability  (0x11)
3994 		 */
3995 		switch (data) {
3996 		case 0x01:
3997 			/* power mgmt handling */
3998 			break;
3999 		case 0x03:
4000 
4001 /*
4002  * Reading the PCIe VPD is inconsistent - that is, sometimes causes
4003  * problems on (mostly) X64, though we've also seen problems w/ Sparc
4004  * and Tavor --- so, for now until it's root caused, don't try and
4005  * read it
4006  */
4007 #ifdef HERMON_VPD_WORKS
4008 			hermon_pci_capability_vpd(state, hdl, offset);
4009 #else
4010 			delay(100);
4011 			hermon_pci_capability_vpd(state, hdl, offset);
4012 #endif
4013 			break;
4014 		case 0x10:
4015 			/*
4016 			 * PCI Express Capability - save offset & contents
4017 			 * for later in reset
4018 			 */
4019 			state->hs_pci_cap_offset = offset;
4020 			data32 = pci_config_get32(hdl,
4021 			    offset + HERMON_PCI_CAP_DEV_OFFS);
4022 			state->hs_pci_cap_devctl = data32;
4023 			data32 = pci_config_get32(hdl,
4024 			    offset + HERMON_PCI_CAP_LNK_OFFS);
4025 			state->hs_pci_cap_lnkctl = data32;
4026 			break;
4027 		case 0x11:
4028 			/*
4029 			 * MSIX support - nothing to do, taken care of in the
4030 			 * MSI/MSIX interrupt frameworkd
4031 			 */
4032 			break;
4033 		default:
4034 			/* just go on to the next */
4035 			break;
4036 		}
4037 
4038 		/* Get offset of next entry in list */
4039 		offset = pci_config_get8(hdl, offset + 1);
4040 	}
4041 
4042 	return (DDI_SUCCESS);
4043 }
4044 
4045 /*
4046  * hermon_pci_read_vpd()
4047  *    Context: Only called from attach() path context
4048  *    utility routine for hermon_pci_capability_vpd()
4049  */
4050 static int
4051 hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
4052     uint32_t *data)
4053 {
4054 	int		retry = 40;  /* retry counter for EEPROM poll */
4055 	uint32_t	val;
4056 	int		vpd_addr = offset + 2;
4057 	int		vpd_data = offset + 4;
4058 
4059 	/*
4060 	 * In order to read a 32-bit value from VPD, we are to write down
4061 	 * the address (offset in the VPD itself) to the address register.
4062 	 * To signal the read, we also clear bit 31.  We then poll on bit 31
4063 	 * and when it is set, we can then read our 4 bytes from the data
4064 	 * register.
4065 	 */
4066 	(void) pci_config_put32(hdl, offset, addr << 16);
4067 	do {
4068 		drv_usecwait(1000);
4069 		val = pci_config_get16(hdl, vpd_addr);
4070 		if (val & 0x8000) {		/* flag bit set */
4071 			*data = pci_config_get32(hdl, vpd_data);
4072 			return (DDI_SUCCESS);
4073 		}
4074 	} while (--retry);
4075 	/* read of flag failed write one message but count the failures */
4076 	if (debug_vpd == 0)
4077 		cmn_err(CE_NOTE,
4078 		    "!Failed to see flag bit after VPD addr write\n");
4079 	debug_vpd++;
4080 
4081 
4082 vpd_read_fail:
4083 	return (DDI_FAILURE);
4084 }
4085 
4086 
4087 
4088 /*
4089  *   hermon_pci_capability_vpd()
4090  *    Context: Only called from attach() path context
4091  */
4092 static void
4093 hermon_pci_capability_vpd(hermon_state_t *state, ddi_acc_handle_t hdl,
4094     uint_t offset)
4095 {
4096 	uint8_t			name_length;
4097 	uint8_t			pn_length;
4098 	int			i, err = 0;
4099 	int			vpd_str_id = 0;
4100 	int			vpd_ro_desc;
4101 	int			vpd_ro_pn_desc;
4102 #ifdef _BIG_ENDIAN
4103 	uint32_t		data32;
4104 #endif /* _BIG_ENDIAN */
4105 	union {
4106 		uint32_t	vpd_int[HERMON_VPD_HDR_DWSIZE];
4107 		uchar_t		vpd_char[HERMON_VPD_HDR_BSIZE];
4108 	} vpd;
4109 
4110 
4111 	/*
4112 	 * Read in the Vital Product Data (VPD) to the extend needed
4113 	 * by the fwflash utility
4114 	 */
4115 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4116 		err = hermon_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
4117 		if (err != DDI_SUCCESS) {
4118 			cmn_err(CE_NOTE, "!VPD read failed\n");
4119 			goto out;
4120 		}
4121 	}
4122 
4123 #ifdef _BIG_ENDIAN
4124 	/* Need to swap bytes for big endian. */
4125 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4126 		data32 = vpd.vpd_int[i];
4127 		vpd.vpd_char[(i << 2) + 3] =
4128 		    (uchar_t)((data32 & 0xFF000000) >> 24);
4129 		vpd.vpd_char[(i << 2) + 2] =
4130 		    (uchar_t)((data32 & 0x00FF0000) >> 16);
4131 		vpd.vpd_char[(i << 2) + 1] =
4132 		    (uchar_t)((data32 & 0x0000FF00) >> 8);
4133 		vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
4134 	}
4135 #endif	/* _BIG_ENDIAN */
4136 
4137 	/* Check for VPD String ID Tag */
4138 	if (vpd.vpd_char[vpd_str_id] == 0x82) {
4139 		/* get the product name */
4140 		name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
4141 		if (name_length > sizeof (state->hs_hca_name)) {
4142 			cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
4143 			    name_length);
4144 			goto out;
4145 		}
4146 		(void) memcpy(state->hs_hca_name, &vpd.vpd_char[vpd_str_id + 3],
4147 		    name_length);
4148 		state->hs_hca_name[name_length] = 0;
4149 
4150 		/* get the part number */
4151 		vpd_ro_desc = name_length + 3; /* read-only tag location */
4152 		vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */
4153 
4154 		/* Verify read-only tag and Part Number keyword. */
4155 		if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
4156 		    (vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
4157 		    vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
4158 			cmn_err(CE_NOTE, "!VPD Part Number not found\n");
4159 			goto out;
4160 		}
4161 
4162 		pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
4163 		if (pn_length > sizeof (state->hs_hca_pn)) {
4164 			cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
4165 			    name_length);
4166 			goto out;
4167 		}
4168 		(void) memcpy(state->hs_hca_pn,
4169 		    &vpd.vpd_char[vpd_ro_pn_desc + 3],
4170 		    pn_length);
4171 		state->hs_hca_pn[pn_length] = 0;
4172 		state->hs_hca_pn_len = pn_length;
4173 		cmn_err(CE_CONT, "!vpd %s\n", state->hs_hca_pn);
4174 	} else {
4175 		/* Wrong VPD String ID Tag */
4176 		cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
4177 		    vpd.vpd_char[0]);
4178 		goto out;
4179 	}
4180 	return;
4181 out:
4182 	state->hs_hca_pn_len = 0;
4183 }
4184 
4185 
4186 
4187 /*
4188  * hermon_intr_or_msi_init()
4189  *    Context: Only called from attach() path context
4190  */
4191 static int
4192 hermon_intr_or_msi_init(hermon_state_t *state)
4193 {
4194 	int	status;
4195 
4196 
4197 	/* Query for the list of supported interrupt event types */
4198 	status = ddi_intr_get_supported_types(state->hs_dip,
4199 	    &state->hs_intr_types_avail);
4200 	if (status != DDI_SUCCESS) {
4201 		return (DDI_FAILURE);
4202 	}
4203 
4204 	/*
4205 	 * If Hermon supports MSI-X in this system (and, if it
4206 	 * hasn't been overridden by a configuration variable), then
4207 	 * the default behavior is to use a single MSI-X.  Otherwise,
4208 	 * fallback to using legacy interrupts.  Also, if MSI-X is chosen,
4209 	 * but fails for whatever reasons, then next try MSI
4210 	 */
4211 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4212 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4213 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSIX);
4214 		if (status == DDI_SUCCESS) {
4215 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSIX;
4216 			return (DDI_SUCCESS);
4217 		}
4218 	}
4219 
4220 	/*
4221 	 * If Hermon supports MSI in this system (and, if it
4222 	 * hasn't been overridden by a configuration variable), then
4223 	 * the default behavior is to use a single MSIX.  Otherwise,
4224 	 * fallback to using legacy interrupts.  Also, if MSI is chosen,
4225 	 * but fails for whatever reasons, then fallback to using legacy
4226 	 * interrupts.
4227 	 */
4228 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4229 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSI)) {
4230 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSI);
4231 		if (status == DDI_SUCCESS) {
4232 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSI;
4233 			return (DDI_SUCCESS);
4234 		}
4235 	}
4236 
4237 	/*
4238 	 * MSI interrupt allocation failed, or was not available.  Fallback to
4239 	 * legacy interrupt support.
4240 	 */
4241 	if (state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED) {
4242 		status = hermon_add_intrs(state, DDI_INTR_TYPE_FIXED);
4243 		if (status == DDI_SUCCESS) {
4244 			state->hs_intr_type_chosen = DDI_INTR_TYPE_FIXED;
4245 			return (DDI_SUCCESS);
4246 		}
4247 	}
4248 
4249 	/*
4250 	 * None of MSI, MSI-X, nor legacy interrupts were successful.
4251 	 * Return failure.
4252 	 */
4253 	return (DDI_FAILURE);
4254 }
4255 
4256 /*
4257  * hermon_add_intrs()
4258  *    Context: Only called from attach() patch context
4259  */
4260 static int
4261 hermon_add_intrs(hermon_state_t *state, int intr_type)
4262 {
4263 	int	status;
4264 
4265 
4266 	/* Get number of interrupts/MSI supported */
4267 	status = ddi_intr_get_nintrs(state->hs_dip, intr_type,
4268 	    &state->hs_intrmsi_count);
4269 	if (status != DDI_SUCCESS) {
4270 		return (DDI_FAILURE);
4271 	}
4272 
4273 	/* Get number of available interrupts/MSI */
4274 	status = ddi_intr_get_navail(state->hs_dip, intr_type,
4275 	    &state->hs_intrmsi_avail);
4276 	if (status != DDI_SUCCESS) {
4277 		return (DDI_FAILURE);
4278 	}
4279 
4280 	/* Ensure that we have at least one (1) usable MSI or interrupt */
4281 	if ((state->hs_intrmsi_avail < 1) || (state->hs_intrmsi_count < 1)) {
4282 		return (DDI_FAILURE);
4283 	}
4284 
4285 	/* Attempt to allocate the maximum #interrupt/MSI handles */
4286 	status = ddi_intr_alloc(state->hs_dip, &state->hs_intrmsi_hdl[0],
4287 	    intr_type, 0, min(HERMON_MSIX_MAX, state->hs_intrmsi_avail),
4288 	    &state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL);
4289 	if (status != DDI_SUCCESS) {
4290 		return (DDI_FAILURE);
4291 	}
4292 
4293 	/* Ensure that we have allocated at least one (1) MSI or interrupt */
4294 	if (state->hs_intrmsi_allocd < 1) {
4295 		return (DDI_FAILURE);
4296 	}
4297 	state->hs_eq_dist = state->hs_intrmsi_allocd - 1; /* start at 0 */
4298 
4299 	/*
4300 	 * Extract the priority for the allocated interrupt/MSI.  This
4301 	 * will be used later when initializing certain mutexes.
4302 	 */
4303 	status = ddi_intr_get_pri(state->hs_intrmsi_hdl[0],
4304 	    &state->hs_intrmsi_pri);
4305 	if (status != DDI_SUCCESS) {
4306 		/* Free the allocated interrupt/MSI handle */
4307 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4308 
4309 		return (DDI_FAILURE);
4310 	}
4311 
4312 	/* Make sure the interrupt/MSI priority is below 'high level' */
4313 	if (state->hs_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
4314 		/* Free the allocated interrupt/MSI handle */
4315 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4316 
4317 		return (DDI_FAILURE);
4318 	}
4319 
4320 	/* Get add'l capability information regarding interrupt/MSI */
4321 	status = ddi_intr_get_cap(state->hs_intrmsi_hdl[0],
4322 	    &state->hs_intrmsi_cap);
4323 	if (status != DDI_SUCCESS) {
4324 		/* Free the allocated interrupt/MSI handle */
4325 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4326 
4327 		return (DDI_FAILURE);
4328 	}
4329 
4330 	return (DDI_SUCCESS);
4331 }
4332 
4333 
4334 /*
4335  * hermon_intr_or_msi_fini()
4336  *    Context: Only called from attach() and/or detach() path contexts
4337  */
4338 static int
4339 hermon_intr_or_msi_fini(hermon_state_t *state)
4340 {
4341 	int	status;
4342 	int	intr;
4343 
4344 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
4345 
4346 		/* Free the allocated interrupt/MSI handle */
4347 		status = ddi_intr_free(state->hs_intrmsi_hdl[intr]);
4348 		if (status != DDI_SUCCESS) {
4349 			return (DDI_FAILURE);
4350 		}
4351 	}
4352 	return (DDI_SUCCESS);
4353 }
4354 
4355 
4356 /*ARGSUSED*/
4357 void
4358 hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
4359     uint_t offset)
4360 {
4361 	uint32_t	msix_data;
4362 	uint16_t	msg_cntr;
4363 	uint32_t	t_offset;	/* table offset */
4364 	uint32_t	t_bir;
4365 	uint32_t	p_offset;	/* pba */
4366 	uint32_t	p_bir;
4367 	int		t_size;		/* size in entries - each is 4 dwords */
4368 
4369 	/* come in with offset pointing at the capability structure */
4370 
4371 	msix_data = pci_config_get32(hdl, offset);
4372 	cmn_err(CE_CONT, "Full cap structure dword = %X\n", msix_data);
4373 	msg_cntr =  pci_config_get16(hdl, offset+2);
4374 	cmn_err(CE_CONT, "MSIX msg_control = %X\n", msg_cntr);
4375 	offset += 4;
4376 	msix_data = pci_config_get32(hdl, offset);	/* table info */
4377 	t_offset = (msix_data & 0xFFF8) >> 3;
4378 	t_bir = msix_data & 0x07;
4379 	offset += 4;
4380 	cmn_err(CE_CONT, "  table %X --offset = %X, bir(bar) = %X\n",
4381 	    msix_data, t_offset, t_bir);
4382 	msix_data = pci_config_get32(hdl, offset);	/* PBA info */
4383 	p_offset = (msix_data & 0xFFF8) >> 3;
4384 	p_bir = msix_data & 0x07;
4385 
4386 	cmn_err(CE_CONT, "  PBA   %X --offset = %X, bir(bar) = %X\n",
4387 	    msix_data, p_offset, p_bir);
4388 	t_size = msg_cntr & 0x7FF;		/* low eleven bits */
4389 	cmn_err(CE_CONT, "    table size = %X entries\n", t_size);
4390 
4391 	offset = t_offset;		/* reuse this for offset from BAR */
4392 #ifdef HERMON_SUPPORTS_MSIX_BAR
4393 	cmn_err(CE_CONT, "First 2 table entries behind BAR2 \n");
4394 	for (i = 0; i < 2; i++) {
4395 		for (j = 0; j < 4; j++, offset += 4) {
4396 			msix_data = ddi_get32(state->hs_reg_msihdl,
4397 			    (uint32_t *)((uintptr_t)state->hs_reg_msi_baseaddr
4398 			    + offset));
4399 			cmn_err(CE_CONT, "MSI table entry %d, dword %d == %X\n",
4400 			    i, j, msix_data);
4401 		}
4402 	}
4403 #endif
4404 
4405 }
4406 
4407 /*
4408  * X86 fastreboot support functions.
4409  * These functions are used to save/restore MSI-X table/PBA and also
4410  * to disable MSI-X interrupts in hermon_quiesce().
4411  */
4412 
4413 /* Return the message control for MSI-X */
4414 static ushort_t
4415 get_msix_ctrl(dev_info_t *dip)
4416 {
4417 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4418 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4419 	    DEVI(dip)->devi_instance);
4420 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4421 	ASSERT(pci_cfg_hdl != NULL);
4422 
4423 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4424 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4425 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4426 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4427 			return (0);
4428 	}
4429 	ASSERT(msix_ctrl != 0);
4430 
4431 	return (msix_ctrl);
4432 }
4433 
4434 /* Return the MSI-X table size */
4435 static size_t
4436 get_msix_tbl_size(dev_info_t *dip)
4437 {
4438 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4439 	ASSERT(msix_ctrl != 0);
4440 
4441 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4442 	    PCI_MSIX_VECTOR_SIZE);
4443 }
4444 
4445 /* Return the MSI-X PBA size */
4446 static size_t
4447 get_msix_pba_size(dev_info_t *dip)
4448 {
4449 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4450 	ASSERT(msix_ctrl != 0);
4451 
4452 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8);
4453 }
4454 
4455 /* Set up the MSI-X table/PBA save area */
4456 static void
4457 hermon_set_msix_info(hermon_state_t *state)
4458 {
4459 	uint_t			rnumber, breg, nregs;
4460 	ushort_t		caps_ctrl, msix_ctrl;
4461 	pci_regspec_t		*rp;
4462 	int			reg_size, addr_space, offset, *regs_list, i;
4463 
4464 	/*
4465 	 * MSI-X BIR Index Table:
4466 	 * BAR indicator register (BIR) to Base Address register.
4467 	 */
4468 	uchar_t pci_msix_bir_index[8] = {0x10, 0x14, 0x18, 0x1c,
4469 	    0x20, 0x24, 0xff, 0xff};
4470 
4471 	/* Fastreboot data access  attribute */
4472 	ddi_device_acc_attr_t	dev_attr = {
4473 		0,				/* version */
4474 		DDI_STRUCTURE_LE_ACC,
4475 		DDI_STRICTORDER_ACC,		/* attr access */
4476 		0
4477 	};
4478 
4479 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4480 	ASSERT(pci_cfg_hdl != NULL);
4481 
4482 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4483 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4484 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4485 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4486 			return;
4487 	}
4488 	ASSERT(msix_ctrl != 0);
4489 
4490 	state->hs_msix_tbl_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4491 	    PCI_MSIX_TBL_OFFSET);
4492 
4493 	/* Get the BIR for MSI-X table */
4494 	breg = pci_msix_bir_index[state->hs_msix_tbl_offset &
4495 	    PCI_MSIX_TBL_BIR_MASK];
4496 	ASSERT(breg != 0xFF);
4497 
4498 	/* Set the MSI-X table offset */
4499 	state->hs_msix_tbl_offset = state->hs_msix_tbl_offset &
4500 	    ~PCI_MSIX_TBL_BIR_MASK;
4501 
4502 	/* Set the MSI-X table size */
4503 	state->hs_msix_tbl_size = ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4504 	    PCI_MSIX_VECTOR_SIZE;
4505 
4506 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
4507 	    DDI_PROP_DONTPASS, "reg", (int **)&regs_list, &nregs) !=
4508 	    DDI_PROP_SUCCESS) {
4509 		return;
4510 	}
4511 	reg_size = sizeof (pci_regspec_t) / sizeof (int);
4512 
4513 	/* Check the register number for MSI-X table */
4514 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4515 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4516 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4517 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4518 
4519 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4520 		    (addr_space == PCI_ADDR_MEM64))) {
4521 			rnumber = i;
4522 			break;
4523 		}
4524 	}
4525 	ASSERT(rnumber != 0);
4526 	state->hs_msix_tbl_rnumber = rnumber;
4527 
4528 	/* Set device attribute version and access according to Hermon FM */
4529 	dev_attr.devacc_attr_version = hermon_devacc_attr_version(state);
4530 	dev_attr.devacc_attr_access = hermon_devacc_attr_access(state);
4531 
4532 	/* Map the entire MSI-X vector table */
4533 	if (hermon_regs_map_setup(state, state->hs_msix_tbl_rnumber,
4534 	    (caddr_t *)&state->hs_msix_tbl_addr, state->hs_msix_tbl_offset,
4535 	    state->hs_msix_tbl_size, &dev_attr,
4536 	    &state->hs_fm_msix_tblhdl) != DDI_SUCCESS) {
4537 		return;
4538 	}
4539 
4540 	state->hs_msix_pba_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4541 	    PCI_MSIX_PBA_OFFSET);
4542 
4543 	/* Get the BIR for MSI-X PBA */
4544 	breg = pci_msix_bir_index[state->hs_msix_pba_offset &
4545 	    PCI_MSIX_PBA_BIR_MASK];
4546 	ASSERT(breg != 0xFF);
4547 
4548 	/* Set the MSI-X PBA offset */
4549 	state->hs_msix_pba_offset = state->hs_msix_pba_offset &
4550 	    ~PCI_MSIX_PBA_BIR_MASK;
4551 
4552 	/* Set the MSI-X PBA size */
4553 	state->hs_msix_pba_size =
4554 	    ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8;
4555 
4556 	/* Check the register number for MSI-X PBA */
4557 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4558 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4559 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4560 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4561 
4562 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4563 		    (addr_space == PCI_ADDR_MEM64))) {
4564 			rnumber = i;
4565 			break;
4566 		}
4567 	}
4568 	ASSERT(rnumber != 0);
4569 	state->hs_msix_pba_rnumber = rnumber;
4570 	ddi_prop_free(regs_list);
4571 
4572 	/* Map in the MSI-X Pending Bit Array */
4573 	if (hermon_regs_map_setup(state, state->hs_msix_pba_rnumber,
4574 	    (caddr_t *)&state->hs_msix_pba_addr, state->hs_msix_pba_offset,
4575 	    state->hs_msix_pba_size, &dev_attr,
4576 	    &state->hs_fm_msix_pbahdl) != DDI_SUCCESS) {
4577 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
4578 		state->hs_fm_msix_tblhdl = NULL;
4579 		return;
4580 	}
4581 
4582 	/* Set the MSI-X table save area */
4583 	state->hs_msix_tbl_entries = kmem_alloc(state->hs_msix_tbl_size,
4584 	    KM_SLEEP);
4585 
4586 	/* Set the MSI-X PBA save area */
4587 	state->hs_msix_pba_entries = kmem_alloc(state->hs_msix_pba_size,
4588 	    KM_SLEEP);
4589 }
4590 
4591 /* Disable Hermon interrupts */
4592 static int
4593 hermon_intr_disable(hermon_state_t *state)
4594 {
4595 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4596 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4597 	ddi_acc_handle_t msix_tblhdl = hermon_get_msix_tblhdl(state);
4598 	int i, j;
4599 	ASSERT(pci_cfg_hdl != NULL && msix_tblhdl != NULL);
4600 	ASSERT(state->hs_intr_types_avail &
4601 	    (DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX));
4602 
4603 	/*
4604 	 * Check if MSI-X interrupts are used. If so, disable MSI-X interupts.
4605 	 * If not, since Hermon doesn't support MSI interrupts, assuming the
4606 	 * legacy interrupt is used instead, disable the legacy interrupt.
4607 	 */
4608 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4609 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4610 
4611 		if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4612 		    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4613 			if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL,
4614 			    caps_ctrl, PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4615 				return (DDI_FAILURE);
4616 		}
4617 		ASSERT(msix_ctrl != 0);
4618 
4619 		if (!(msix_ctrl & PCI_MSIX_ENABLE_BIT))
4620 			return (DDI_SUCCESS);
4621 
4622 		/* Clear all inums in MSI-X table */
4623 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4624 		    i += PCI_MSIX_VECTOR_SIZE) {
4625 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4626 				char *addr = state->hs_msix_tbl_addr + i + j;
4627 				ddi_put32(msix_tblhdl,
4628 				    (uint32_t *)(uintptr_t)addr, 0x0);
4629 			}
4630 		}
4631 
4632 		/* Disable MSI-X interrupts */
4633 		msix_ctrl &= ~PCI_MSIX_ENABLE_BIT;
4634 		PCI_CAP_PUT16(pci_cfg_hdl, NULL, caps_ctrl, PCI_MSIX_CTRL,
4635 		    msix_ctrl);
4636 
4637 	} else {
4638 		uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
4639 		ASSERT(state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED);
4640 
4641 		/* Disable the legacy interrupts */
4642 		cmdreg |= PCI_COMM_INTX_DISABLE;
4643 		pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
4644 	}
4645 
4646 	return (DDI_SUCCESS);
4647 }
4648 
4649 /* Hermon quiesce(9F) entry */
4650 static int
4651 hermon_quiesce(dev_info_t *dip)
4652 {
4653 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4654 	    DEVI(dip)->devi_instance);
4655 	ddi_acc_handle_t pcihdl = hermon_get_pcihdl(state);
4656 	ddi_acc_handle_t cmdhdl = hermon_get_cmdhdl(state);
4657 	ddi_acc_handle_t msix_tbl_hdl = hermon_get_msix_tblhdl(state);
4658 	ddi_acc_handle_t msix_pba_hdl = hermon_get_msix_pbahdl(state);
4659 	uint32_t sem, reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
4660 	uint64_t data64;
4661 	uint32_t data32;
4662 	int status, i, j, loopcnt;
4663 	uint_t offset;
4664 
4665 	ASSERT(state != NULL);
4666 
4667 	/* start fastreboot */
4668 	state->hs_quiescing = B_TRUE;
4669 
4670 	/* If it's in maintenance mode, do nothing but return with SUCCESS */
4671 	if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
4672 		return (DDI_SUCCESS);
4673 	}
4674 
4675 	/* suppress Hermon FM ereports */
4676 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
4677 		hermon_clr_state_nolock(state, HCA_EREPORT_FM);
4678 	}
4679 
4680 	/* Shutdown HCA ports */
4681 	if (hermon_hca_ports_shutdown(state,
4682 	    state->hs_cfg_profile->cp_num_ports) != HERMON_CMD_SUCCESS) {
4683 		state->hs_quiescing = B_FALSE;
4684 		return (DDI_FAILURE);
4685 	}
4686 
4687 	/* Close HCA */
4688 	if (hermon_close_hca_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN) !=
4689 	    HERMON_CMD_SUCCESS) {
4690 		state->hs_quiescing = B_FALSE;
4691 		return (DDI_FAILURE);
4692 	}
4693 
4694 	/* Disable interrupts */
4695 	if (hermon_intr_disable(state) != DDI_SUCCESS) {
4696 		state->hs_quiescing = B_FALSE;
4697 		return (DDI_FAILURE);
4698 	}
4699 
4700 	/*
4701 	 * Query the PCI capabilities of the HCA device, but don't process
4702 	 * the VPD until after reset.
4703 	 */
4704 	if (hermon_pci_capability_list(state, pcihdl) != DDI_SUCCESS) {
4705 		state->hs_quiescing = B_FALSE;
4706 		return (DDI_FAILURE);
4707 	}
4708 
4709 	/*
4710 	 * Read all PCI config info (reg0...reg63).  Note: According to the
4711 	 * Hermon software reset application note, we should not read or
4712 	 * restore the values in reg22 and reg23.
4713 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
4714 	 * register LAST, and technically, you need to restore the
4715 	 * PCIE Capability "device control" and "link control" (word-sized,
4716 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
4717 	 * We hold off restoring the command register - offset 0x4 - till last
4718 	 */
4719 
4720 	/* 1st, wait for the semaphore assure accessibility - per PRM */
4721 	status = -1;
4722 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
4723 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
4724 		if (sem == 0) {
4725 			status = 0;
4726 			break;
4727 		}
4728 		drv_usecwait(1);
4729 	}
4730 
4731 	/* Check if timeout happens */
4732 	if (status == -1) {
4733 		state->hs_quiescing = B_FALSE;
4734 		return (DDI_FAILURE);
4735 	}
4736 
4737 	/* MSI-X interrupts are used, save the MSI-X table */
4738 	if (msix_tbl_hdl && msix_pba_hdl) {
4739 		/* save MSI-X table */
4740 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4741 		    i += PCI_MSIX_VECTOR_SIZE) {
4742 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4743 				char *addr = state->hs_msix_tbl_addr + i + j;
4744 				data32 = ddi_get32(msix_tbl_hdl,
4745 				    (uint32_t *)(uintptr_t)addr);
4746 				*(uint32_t *)(uintptr_t)(state->
4747 				    hs_msix_tbl_entries + i + j) = data32;
4748 			}
4749 		}
4750 		/* save MSI-X PBA */
4751 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4752 			char *addr = state->hs_msix_pba_addr + i;
4753 			data64 = ddi_get64(msix_pba_hdl,
4754 			    (uint64_t *)(uintptr_t)addr);
4755 			*(uint64_t *)(uintptr_t)(state->
4756 			    hs_msix_pba_entries + i) = data64;
4757 		}
4758 	}
4759 
4760 	/* save PCI config space */
4761 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4762 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4763 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4764 			state->hs_cfg_data[i]  =
4765 			    pci_config_get32(pcihdl, i << 2);
4766 		}
4767 	}
4768 
4769 	/* SW-reset HCA */
4770 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
4771 
4772 	/*
4773 	 * This delay is required so as not to cause a panic here. If the
4774 	 * device is accessed too soon after reset it will not respond to
4775 	 * config cycles, causing a Master Abort and panic.
4776 	 */
4777 	drv_usecwait(reset_delay);
4778 
4779 	/* Poll waiting for the device to finish resetting */
4780 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
4781 	while ((pci_config_get32(pcihdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
4782 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
4783 		if (--loopcnt == 0)
4784 			break;	/* just in case, break and go on */
4785 	}
4786 	if (loopcnt == 0) {
4787 		state->hs_quiescing = B_FALSE;
4788 		return (DDI_FAILURE);
4789 	}
4790 
4791 	/* Restore the config info */
4792 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4793 		if (i == 1) continue;	/* skip the status/ctrl reg */
4794 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4795 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4796 			pci_config_put32(pcihdl, i << 2, state->hs_cfg_data[i]);
4797 		}
4798 	}
4799 
4800 	/* If MSI-X interrupts are used, restore the MSI-X table */
4801 	if (msix_tbl_hdl && msix_pba_hdl) {
4802 		/* restore MSI-X PBA */
4803 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4804 			char *addr = state->hs_msix_pba_addr + i;
4805 			data64 = *(uint64_t *)(uintptr_t)
4806 			    (state->hs_msix_pba_entries + i);
4807 			ddi_put64(msix_pba_hdl,
4808 			    (uint64_t *)(uintptr_t)addr, data64);
4809 		}
4810 		/* restore MSI-X table */
4811 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4812 		    i += PCI_MSIX_VECTOR_SIZE) {
4813 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4814 				char *addr = state->hs_msix_tbl_addr + i + j;
4815 				data32 = *(uint32_t *)(uintptr_t)
4816 				    (state->hs_msix_tbl_entries + i + j);
4817 				ddi_put32(msix_tbl_hdl,
4818 				    (uint32_t *)(uintptr_t)addr, data32);
4819 			}
4820 		}
4821 	}
4822 
4823 	/*
4824 	 * PCI Express Capability - we saved during capability list, and
4825 	 * we'll restore them here.
4826 	 */
4827 	offset = state->hs_pci_cap_offset;
4828 	data32 = state->hs_pci_cap_devctl;
4829 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
4830 	data32 = state->hs_pci_cap_lnkctl;
4831 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
4832 
4833 	/* restore the command register */
4834 	pci_config_put32(pcihdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
4835 
4836 	return (DDI_SUCCESS);
4837 }
4838