xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon.c (revision f18d8787c0ba765f61b003e2aae78db90b48f833)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * hermon.c
28  *    Hermon (InfiniBand) HCA Driver attach/detach Routines
29  *
30  *    Implements all the routines necessary for the attach, setup,
31  *    initialization (and subsequent possible teardown and detach) of the
32  *    Hermon InfiniBand HCA driver.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/file.h>
37 #include <sys/open.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h>
43 #include <sys/pci.h>
44 #include <sys/pci_cap.h>
45 #include <sys/bitmap.h>
46 #include <sys/policy.h>
47 
48 #include <sys/ib/adapters/hermon/hermon.h>
49 
50 /* /etc/system can tune this down, if that is desirable. */
51 int hermon_msix_max = HERMON_MSIX_MAX;
52 
53 /* The following works around a problem in pre-2_7_000 firmware. */
54 #define	HERMON_FW_WORKAROUND
55 
56 int hermon_verbose = 0;
57 
58 /* Hermon HCA State Pointer */
59 void *hermon_statep;
60 
61 int debug_vpd = 0;
62 
63 /* Disable the internal error-check polling thread */
64 int hermon_no_inter_err_chk = 0;
65 
66 /*
67  * The Hermon "userland resource database" is common to instances of the
68  * Hermon HCA driver.  This structure "hermon_userland_rsrc_db" contains all
69  * the necessary information to maintain it.
70  */
71 hermon_umap_db_t hermon_userland_rsrc_db;
72 
73 static int hermon_attach(dev_info_t *, ddi_attach_cmd_t);
74 static int hermon_detach(dev_info_t *, ddi_detach_cmd_t);
75 static int hermon_open(dev_t *, int, int, cred_t *);
76 static int hermon_close(dev_t, int, int, cred_t *);
77 static int hermon_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
78 
79 static int hermon_drv_init(hermon_state_t *state, dev_info_t *dip,
80     int instance);
81 static void hermon_drv_fini(hermon_state_t *state);
82 static void hermon_drv_fini2(hermon_state_t *state);
83 static int hermon_isr_init(hermon_state_t *state);
84 static void hermon_isr_fini(hermon_state_t *state);
85 
86 static int hermon_hw_init(hermon_state_t *state);
87 
88 static void hermon_hw_fini(hermon_state_t *state,
89     hermon_drv_cleanup_level_t cleanup);
90 static int hermon_soft_state_init(hermon_state_t *state);
91 static void hermon_soft_state_fini(hermon_state_t *state);
92 static int hermon_icm_config_setup(hermon_state_t *state,
93     hermon_hw_initqueryhca_t *inithca);
94 static void hermon_icm_tables_init(hermon_state_t *state);
95 static void hermon_icm_tables_fini(hermon_state_t *state);
96 static int hermon_icm_dma_init(hermon_state_t *state);
97 static void hermon_icm_dma_fini(hermon_state_t *state);
98 static void hermon_inithca_set(hermon_state_t *state,
99     hermon_hw_initqueryhca_t *inithca);
100 static int hermon_hca_port_init(hermon_state_t *state);
101 static int hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init);
102 static int hermon_internal_uarpg_init(hermon_state_t *state);
103 static void hermon_internal_uarpg_fini(hermon_state_t *state);
104 static int hermon_special_qp_contexts_reserve(hermon_state_t *state);
105 static void hermon_special_qp_contexts_unreserve(hermon_state_t *state);
106 static int hermon_sw_reset(hermon_state_t *state);
107 static int hermon_mcg_init(hermon_state_t *state);
108 static void hermon_mcg_fini(hermon_state_t *state);
109 static int hermon_fw_version_check(hermon_state_t *state);
110 static void hermon_device_info_report(hermon_state_t *state);
111 static int hermon_pci_capability_list(hermon_state_t *state,
112     ddi_acc_handle_t hdl);
113 static void hermon_pci_capability_vpd(hermon_state_t *state,
114     ddi_acc_handle_t hdl, uint_t offset);
115 static int hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
116     uint32_t addr, uint32_t *data);
117 static int hermon_intr_or_msi_init(hermon_state_t *state);
118 static int hermon_add_intrs(hermon_state_t *state, int intr_type);
119 static int hermon_intr_or_msi_fini(hermon_state_t *state);
120 void hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
121     uint_t offset);
122 
123 static uint64_t hermon_size_icm(hermon_state_t *state);
124 
125 /* X86 fastreboot support */
126 static ushort_t get_msix_ctrl(dev_info_t *);
127 static size_t get_msix_tbl_size(dev_info_t *);
128 static size_t get_msix_pba_size(dev_info_t *);
129 static void hermon_set_msix_info(hermon_state_t *);
130 static int hermon_intr_disable(hermon_state_t *);
131 static int hermon_quiesce(dev_info_t *);
132 
133 
134 /* Character/Block Operations */
135 static struct cb_ops hermon_cb_ops = {
136 	hermon_open,		/* open */
137 	hermon_close,		/* close */
138 	nodev,			/* strategy (block) */
139 	nodev,			/* print (block) */
140 	nodev,			/* dump (block) */
141 	nodev,			/* read */
142 	nodev,			/* write */
143 	hermon_ioctl,		/* ioctl */
144 	hermon_devmap,		/* devmap */
145 	NULL,			/* mmap */
146 	nodev,			/* segmap */
147 	nochpoll,		/* chpoll */
148 	ddi_prop_op,		/* prop_op */
149 	NULL,			/* streams */
150 	D_NEW | D_MP |
151 	D_64BIT | D_HOTPLUG |
152 	D_DEVMAP,		/* flags */
153 	CB_REV			/* rev */
154 };
155 
156 /* Driver Operations */
157 static struct dev_ops hermon_ops = {
158 	DEVO_REV,		/* struct rev */
159 	0,			/* refcnt */
160 	hermon_getinfo,		/* getinfo */
161 	nulldev,		/* identify */
162 	nulldev,		/* probe */
163 	hermon_attach,		/* attach */
164 	hermon_detach,		/* detach */
165 	nodev,			/* reset */
166 	&hermon_cb_ops,		/* cb_ops */
167 	NULL,			/* bus_ops */
168 	nodev,			/* power */
169 	hermon_quiesce,		/* devo_quiesce */
170 };
171 
172 /* Module Driver Info */
173 static struct modldrv hermon_modldrv = {
174 	&mod_driverops,
175 	"ConnectX IB Driver",
176 	&hermon_ops
177 };
178 
179 /* Module Linkage */
180 static struct modlinkage hermon_modlinkage = {
181 	MODREV_1,
182 	&hermon_modldrv,
183 	NULL
184 };
185 
186 /*
187  * This extern refers to the ibc_operations_t function vector that is defined
188  * in the hermon_ci.c file.
189  */
190 extern ibc_operations_t	hermon_ibc_ops;
191 
192 /*
193  * _init()
194  */
195 int
196 _init()
197 {
198 	int	status;
199 
200 	status = ddi_soft_state_init(&hermon_statep, sizeof (hermon_state_t),
201 	    (size_t)HERMON_INITIAL_STATES);
202 	if (status != 0) {
203 		return (status);
204 	}
205 
206 	status = ibc_init(&hermon_modlinkage);
207 	if (status != 0) {
208 		ddi_soft_state_fini(&hermon_statep);
209 		return (status);
210 	}
211 
212 	status = mod_install(&hermon_modlinkage);
213 	if (status != 0) {
214 		ibc_fini(&hermon_modlinkage);
215 		ddi_soft_state_fini(&hermon_statep);
216 		return (status);
217 	}
218 
219 	/* Initialize the Hermon "userland resources database" */
220 	hermon_umap_db_init();
221 
222 	return (status);
223 }
224 
225 
226 /*
227  * _info()
228  */
229 int
230 _info(struct modinfo *modinfop)
231 {
232 	int	status;
233 
234 	status = mod_info(&hermon_modlinkage, modinfop);
235 	return (status);
236 }
237 
238 
239 /*
240  * _fini()
241  */
242 int
243 _fini()
244 {
245 	int	status;
246 
247 	status = mod_remove(&hermon_modlinkage);
248 	if (status != 0) {
249 		return (status);
250 	}
251 
252 	/* Destroy the Hermon "userland resources database" */
253 	hermon_umap_db_fini();
254 
255 	ibc_fini(&hermon_modlinkage);
256 	ddi_soft_state_fini(&hermon_statep);
257 
258 	return (status);
259 }
260 
261 
262 /*
263  * hermon_getinfo()
264  */
265 /* ARGSUSED */
266 static int
267 hermon_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
268 {
269 	dev_t		dev;
270 	hermon_state_t 	*state;
271 	minor_t		instance;
272 
273 	switch (cmd) {
274 	case DDI_INFO_DEVT2DEVINFO:
275 		dev = (dev_t)arg;
276 		instance = HERMON_DEV_INSTANCE(dev);
277 		state = ddi_get_soft_state(hermon_statep, instance);
278 		if (state == NULL) {
279 			return (DDI_FAILURE);
280 		}
281 		*result = (void *)state->hs_dip;
282 		return (DDI_SUCCESS);
283 
284 	case DDI_INFO_DEVT2INSTANCE:
285 		dev = (dev_t)arg;
286 		instance = HERMON_DEV_INSTANCE(dev);
287 		*result = (void *)(uintptr_t)instance;
288 		return (DDI_SUCCESS);
289 
290 	default:
291 		break;
292 	}
293 
294 	return (DDI_FAILURE);
295 }
296 
297 
298 /*
299  * hermon_open()
300  */
301 /* ARGSUSED */
302 static int
303 hermon_open(dev_t *devp, int flag, int otyp, cred_t *credp)
304 {
305 	hermon_state_t		*state;
306 	hermon_rsrc_t 		*rsrcp;
307 	hermon_umap_db_entry_t	*umapdb, *umapdb2;
308 	minor_t			instance;
309 	uint64_t		key, value;
310 	uint_t			hr_indx;
311 	dev_t			dev;
312 	int			status;
313 
314 	instance = HERMON_DEV_INSTANCE(*devp);
315 	state = ddi_get_soft_state(hermon_statep, instance);
316 	if (state == NULL) {
317 		return (ENXIO);
318 	}
319 
320 	/*
321 	 * Only allow driver to be opened for character access, and verify
322 	 * whether exclusive access is allowed.
323 	 */
324 	if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
325 	    secpolicy_excl_open(credp) != 0)) {
326 		return (EINVAL);
327 	}
328 
329 	/*
330 	 * Search for the current process PID in the "userland resources
331 	 * database".  If it is not found, then attempt to allocate a UAR
332 	 * page and add the ("key", "value") pair to the database.
333 	 * Note:  As a last step we always return a devp appropriate for
334 	 * the open.  Either we return a new minor number (based on the
335 	 * instance and the UAR page index) or we return the current minor
336 	 * number for the given client process.
337 	 *
338 	 * We also add an entry to the database to allow for lookup from
339 	 * "dev_t" to the current process PID.  This is necessary because,
340 	 * under certain circumstance, the process PID that calls the Hermon
341 	 * close() entry point may not be the same as the one who called
342 	 * open().  Specifically, this can happen if a child process calls
343 	 * the Hermon's open() entry point, gets a UAR page, maps it out (using
344 	 * mmap()), and then exits without calling munmap().  Because mmap()
345 	 * adds a reference to the file descriptor, at the exit of the child
346 	 * process the file descriptor is "inherited" by the parent (and will
347 	 * be close()'d by the parent's PID only when it exits).
348 	 *
349 	 * Note: We use the hermon_umap_db_find_nolock() and
350 	 * hermon_umap_db_add_nolock() database access routines below (with
351 	 * an explicit mutex_enter of the database lock - "hdl_umapdb_lock")
352 	 * to ensure that the multiple accesses (in this case searching for,
353 	 * and then adding _two_ database entries) can be done atomically.
354 	 */
355 	key = ddi_get_pid();
356 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
357 	status = hermon_umap_db_find_nolock(instance, key,
358 	    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
359 	if (status != DDI_SUCCESS) {
360 		/*
361 		 * If we are in 'maintenance mode', we cannot alloc a UAR page.
362 		 * But we still need some rsrcp value, and a mostly unique
363 		 * hr_indx value.  So we set rsrcp to NULL for maintenance
364 		 * mode, and use a rolling count for hr_indx.  The field
365 		 * 'hs_open_hr_indx' is used only in this maintenance mode
366 		 * condition.
367 		 *
368 		 * Otherwise, if we are in operational mode then we allocate
369 		 * the UAR page as normal, and use the rsrcp value and tr_indx
370 		 * value from that allocation.
371 		 */
372 		if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
373 			rsrcp = NULL;
374 			hr_indx = state->hs_open_ar_indx++;
375 		} else {
376 			/* Allocate a new UAR page for this process */
377 			status = hermon_rsrc_alloc(state, HERMON_UARPG, 1,
378 			    HERMON_NOSLEEP, &rsrcp);
379 			if (status != DDI_SUCCESS) {
380 				mutex_exit(
381 				    &hermon_userland_rsrc_db.hdl_umapdb_lock);
382 				return (EAGAIN);
383 			}
384 
385 			hr_indx = rsrcp->hr_indx;
386 		}
387 
388 		/*
389 		 * Allocate an entry to track the UAR page resource in the
390 		 * "userland resources database".
391 		 */
392 		umapdb = hermon_umap_db_alloc(instance, key,
393 		    MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
394 		if (umapdb == NULL) {
395 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
396 			/* If in "maintenance mode", don't free the rsrc */
397 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
398 				hermon_rsrc_free(state, &rsrcp);
399 			}
400 			return (EAGAIN);
401 		}
402 
403 		/*
404 		 * Create a new device number.  Minor number is a function of
405 		 * the UAR page index (15 bits) and the device instance number
406 		 * (3 bits).
407 		 */
408 		dev = makedevice(getmajor(*devp), (hr_indx <<
409 		    HERMON_MINORNUM_SHIFT) | instance);
410 
411 		/*
412 		 * Allocate another entry in the "userland resources database"
413 		 * to track the association of the device number (above) to
414 		 * the current process ID (in "key").
415 		 */
416 		umapdb2 = hermon_umap_db_alloc(instance, dev,
417 		    MLNX_UMAP_PID_RSRC, (uint64_t)key);
418 		if (umapdb2 == NULL) {
419 			mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
420 			hermon_umap_db_free(umapdb);
421 			/* If in "maintenance mode", don't free the rsrc */
422 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
423 				hermon_rsrc_free(state, &rsrcp);
424 			}
425 			return (EAGAIN);
426 		}
427 
428 		/* Add the entries to the database */
429 		hermon_umap_db_add_nolock(umapdb);
430 		hermon_umap_db_add_nolock(umapdb2);
431 
432 	} else {
433 		/*
434 		 * Return the same device number as on the original open()
435 		 * call.  This was calculated as a function of the UAR page
436 		 * index (top 16 bits) and the device instance number
437 		 */
438 		rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
439 		dev = makedevice(getmajor(*devp), (rsrcp->hr_indx <<
440 		    HERMON_MINORNUM_SHIFT) | instance);
441 	}
442 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
443 
444 	*devp = dev;
445 
446 	return (0);
447 }
448 
449 
450 /*
451  * hermon_close()
452  */
453 /* ARGSUSED */
454 static int
455 hermon_close(dev_t dev, int flag, int otyp, cred_t *credp)
456 {
457 	hermon_state_t		*state;
458 	hermon_rsrc_t		*rsrcp;
459 	hermon_umap_db_entry_t	*umapdb;
460 	hermon_umap_db_priv_t	*priv;
461 	minor_t			instance;
462 	uint64_t		key, value;
463 	int			status, reset_status = 0;
464 
465 	instance = HERMON_DEV_INSTANCE(dev);
466 	state = ddi_get_soft_state(hermon_statep, instance);
467 	if (state == NULL) {
468 		return (ENXIO);
469 	}
470 
471 	/*
472 	 * Search for "dev_t" in the "userland resources database".  As
473 	 * explained above in hermon_open(), we can't depend on using the
474 	 * current process ID here to do the lookup because the process
475 	 * that ultimately closes may not be the same one who opened
476 	 * (because of inheritance).
477 	 * So we lookup the "dev_t" (which points to the PID of the process
478 	 * that opened), and we remove the entry from the database (and free
479 	 * it up).  Then we do another query based on the PID value.  And when
480 	 * we find that database entry, we free it up too and then free the
481 	 * Hermon UAR page resource.
482 	 *
483 	 * Note: We use the hermon_umap_db_find_nolock() database access
484 	 * routine below (with an explicit mutex_enter of the database lock)
485 	 * to ensure that the multiple accesses (which attempt to remove the
486 	 * two database entries) can be done atomically.
487 	 *
488 	 * This works the same in both maintenance mode and HCA mode, except
489 	 * for the call to hermon_rsrc_free().  In the case of maintenance mode,
490 	 * this call is not needed, as it was not allocated in hermon_open()
491 	 * above.
492 	 */
493 	key = dev;
494 	mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
495 	status = hermon_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
496 	    &value, HERMON_UMAP_DB_REMOVE, &umapdb);
497 	if (status == DDI_SUCCESS) {
498 		/*
499 		 * If the "hdb_priv" field is non-NULL, it indicates that
500 		 * some "on close" handling is still necessary.  Call
501 		 * hermon_umap_db_handle_onclose_cb() to do the handling (i.e.
502 		 * to invoke all the registered callbacks).  Then free up
503 		 * the resources associated with "hdb_priv" and continue
504 		 * closing.
505 		 */
506 		priv = (hermon_umap_db_priv_t *)umapdb->hdbe_common.hdb_priv;
507 		if (priv != NULL) {
508 			reset_status = hermon_umap_db_handle_onclose_cb(priv);
509 			kmem_free(priv, sizeof (hermon_umap_db_priv_t));
510 			umapdb->hdbe_common.hdb_priv = (void *)NULL;
511 		}
512 
513 		hermon_umap_db_free(umapdb);
514 
515 		/*
516 		 * Now do another lookup using PID as the key (copy it from
517 		 * "value").  When this lookup is complete, the "value" field
518 		 * will contain the hermon_rsrc_t pointer for the UAR page
519 		 * resource.
520 		 */
521 		key = value;
522 		status = hermon_umap_db_find_nolock(instance, key,
523 		    MLNX_UMAP_UARPG_RSRC, &value, HERMON_UMAP_DB_REMOVE,
524 		    &umapdb);
525 		if (status == DDI_SUCCESS) {
526 			hermon_umap_db_free(umapdb);
527 			/* If in "maintenance mode", don't free the rsrc */
528 			if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
529 				rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
530 				hermon_rsrc_free(state, &rsrcp);
531 			}
532 		}
533 	}
534 	mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
535 	return (reset_status);
536 }
537 
538 
539 /*
540  * hermon_attach()
541  *    Context: Only called from attach() path context
542  */
543 static int
544 hermon_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
545 {
546 	hermon_state_t	*state;
547 	ibc_clnt_hdl_t	tmp_ibtfpriv;
548 	ibc_status_t	ibc_status;
549 	int		instance;
550 	int		status;
551 
552 #ifdef __lock_lint
553 	(void) hermon_quiesce(dip);
554 #endif
555 
556 	switch (cmd) {
557 	case DDI_ATTACH:
558 		instance = ddi_get_instance(dip);
559 		status = ddi_soft_state_zalloc(hermon_statep, instance);
560 		if (status != DDI_SUCCESS) {
561 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
562 			    "attach_ssz_fail", instance);
563 			goto fail_attach_nomsg;
564 
565 		}
566 		state = ddi_get_soft_state(hermon_statep, instance);
567 		if (state == NULL) {
568 			ddi_soft_state_free(hermon_statep, instance);
569 			cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
570 			    "attach_gss_fail", instance);
571 			goto fail_attach_nomsg;
572 		}
573 
574 		/* clear the attach error buffer */
575 		HERMON_ATTACH_MSG_INIT(state->hs_attach_buf);
576 
577 		/* Save away devinfo and instance before hermon_fm_init() */
578 		state->hs_dip = dip;
579 		state->hs_instance = instance;
580 
581 		hermon_fm_init(state);
582 
583 		/*
584 		 * Initialize Hermon driver and hardware.
585 		 *
586 		 * Note: If this initialization fails we may still wish to
587 		 * create a device node and remain operational so that Hermon
588 		 * firmware can be updated/flashed (i.e. "maintenance mode").
589 		 * If this is the case, then "hs_operational_mode" will be
590 		 * equal to HERMON_MAINTENANCE_MODE.  We will not attempt to
591 		 * attach to the IBTF or register with the IBMF (i.e. no
592 		 * InfiniBand interfaces will be enabled).
593 		 */
594 		status = hermon_drv_init(state, dip, instance);
595 		if ((status != DDI_SUCCESS) &&
596 		    (HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
597 			goto fail_attach;
598 		}
599 
600 		/*
601 		 * Change the Hermon FM mode
602 		 */
603 		if ((hermon_get_state(state) & HCA_PIO_FM) &&
604 		    HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
605 			/*
606 			 * Now we wait for 50ms to give an opportunity
607 			 * to Solaris FMA so that HW errors can be notified.
608 			 * Then check if there are HW errors or not. If
609 			 * a HW error is detected, the Hermon attachment
610 			 * must be failed.
611 			 */
612 			delay(drv_usectohz(50000));
613 			if (hermon_init_failure(state)) {
614 				hermon_drv_fini(state);
615 				HERMON_WARNING(state, "unable to "
616 				    "attach Hermon due to a HW error");
617 				HERMON_ATTACH_MSG(state->hs_attach_buf,
618 				    "hermon_attach_failure");
619 				goto fail_attach;
620 			}
621 
622 			/*
623 			 * There seems no HW errors during the attachment,
624 			 * so let's change the Hermon FM state to the
625 			 * ereport only mode.
626 			 */
627 			if (hermon_fm_ereport_init(state) != DDI_SUCCESS) {
628 				/* unwind the resources */
629 				hermon_drv_fini(state);
630 				HERMON_ATTACH_MSG(state->hs_attach_buf,
631 				    "hermon_attach_failure");
632 				goto fail_attach;
633 			}
634 		}
635 
636 		/* Create the minor node for device */
637 		status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
638 		    DDI_PSEUDO, 0);
639 		if (status != DDI_SUCCESS) {
640 			hermon_drv_fini(state);
641 			HERMON_ATTACH_MSG(state->hs_attach_buf,
642 			    "attach_create_mn_fail");
643 			goto fail_attach;
644 		}
645 
646 		/*
647 		 * If we are in "maintenance mode", then we don't want to
648 		 * register with the IBTF.  All InfiniBand interfaces are
649 		 * uninitialized, and the device is only capable of handling
650 		 * requests to update/flash firmware (or test/debug requests).
651 		 */
652 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
653 			cmn_err(CE_NOTE, "!Hermon is operational\n");
654 
655 			/* Attach to InfiniBand Transport Framework (IBTF) */
656 			ibc_status = ibc_attach(&tmp_ibtfpriv,
657 			    &state->hs_ibtfinfo);
658 			if (ibc_status != IBC_SUCCESS) {
659 				cmn_err(CE_CONT, "hermon_attach: ibc_attach "
660 				    "failed\n");
661 				ddi_remove_minor_node(dip, "devctl");
662 				hermon_drv_fini(state);
663 				HERMON_ATTACH_MSG(state->hs_attach_buf,
664 				    "attach_ibcattach_fail");
665 				goto fail_attach;
666 			}
667 
668 			/*
669 			 * Now that we've successfully attached to the IBTF,
670 			 * we enable all appropriate asynch and CQ events to
671 			 * be forwarded to the IBTF.
672 			 */
673 			HERMON_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);
674 
675 			ibc_post_attach(state->hs_ibtfpriv);
676 
677 			/* Register agents with IB Mgmt Framework (IBMF) */
678 			status = hermon_agent_handlers_init(state);
679 			if (status != DDI_SUCCESS) {
680 				(void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
681 				HERMON_QUIESCE_IBTF_CALLB(state);
682 				if (state->hs_in_evcallb != 0) {
683 					HERMON_WARNING(state, "unable to "
684 					    "quiesce Hermon IBTF callbacks");
685 				}
686 				ibc_detach(tmp_ibtfpriv);
687 				ddi_remove_minor_node(dip, "devctl");
688 				hermon_drv_fini(state);
689 				HERMON_ATTACH_MSG(state->hs_attach_buf,
690 				    "attach_agentinit_fail");
691 				goto fail_attach;
692 			}
693 		}
694 
695 		/* Report attach in maintenance mode, if appropriate */
696 		if (!(HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
697 			cmn_err(CE_NOTE, "hermon%d: driver attached "
698 			    "(for maintenance mode only)", state->hs_instance);
699 			hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_DEGRADED);
700 		}
701 
702 		/* Report that driver was loaded */
703 		ddi_report_dev(dip);
704 
705 		/* Send device information to log file */
706 		hermon_device_info_report(state);
707 
708 		/* DEBUG PRINT */
709 		cmn_err(CE_CONT, "!Hermon attach complete\n");
710 		return (DDI_SUCCESS);
711 
712 	case DDI_RESUME:
713 		/* Add code here for DDI_RESUME XXX */
714 		return (DDI_FAILURE);
715 
716 	default:
717 		cmn_err(CE_WARN, "hermon_attach: unknown cmd (0x%x)\n", cmd);
718 		break;
719 	}
720 
721 fail_attach:
722 	cmn_err(CE_NOTE, "hermon%d: driver failed to attach: %s", instance,
723 	    state->hs_attach_buf);
724 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
725 		hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
726 	}
727 	hermon_drv_fini2(state);
728 	hermon_fm_fini(state);
729 	ddi_soft_state_free(hermon_statep, instance);
730 
731 fail_attach_nomsg:
732 	return (DDI_FAILURE);
733 }
734 
735 
736 /*
737  * hermon_detach()
738  *    Context: Only called from detach() path context
739  */
740 static int
741 hermon_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
742 {
743 	hermon_state_t	*state;
744 	ibc_clnt_hdl_t	tmp_ibtfpriv;
745 	ibc_status_t	ibc_status;
746 	int		instance, status;
747 
748 	instance = ddi_get_instance(dip);
749 	state = ddi_get_soft_state(hermon_statep, instance);
750 	if (state == NULL) {
751 		return (DDI_FAILURE);
752 	}
753 
754 	switch (cmd) {
755 	case DDI_DETACH:
756 		/*
757 		 * If we are in "maintenance mode", then we do not want to
758 		 * do teardown for any of the InfiniBand interfaces.
759 		 * Specifically, this means not detaching from IBTF (we never
760 		 * attached to begin with) and not deregistering from IBMF.
761 		 */
762 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
763 			/* Unregister agents from IB Mgmt Framework (IBMF) */
764 			status = hermon_agent_handlers_fini(state);
765 			if (status != DDI_SUCCESS) {
766 				return (DDI_FAILURE);
767 			}
768 
769 			/*
770 			 * Attempt the "pre-detach" from InfiniBand Transport
771 			 * Framework (IBTF).  At this point the IBTF is still
772 			 * capable of handling incoming asynch and completion
773 			 * events.  This "pre-detach" is primarily a mechanism
774 			 * to notify the appropriate IBTF clients that the
775 			 * HCA is being removed/offlined.
776 			 */
777 			ibc_status = ibc_pre_detach(state->hs_ibtfpriv, cmd);
778 			if (ibc_status != IBC_SUCCESS) {
779 				status = hermon_agent_handlers_init(state);
780 				if (status != DDI_SUCCESS) {
781 					HERMON_WARNING(state, "failed to "
782 					    "restart Hermon agents");
783 				}
784 				return (DDI_FAILURE);
785 			}
786 
787 			/*
788 			 * Before we can fully detach from the IBTF we need to
789 			 * ensure that we have handled all outstanding event
790 			 * callbacks.  This is accomplished by quiescing the
791 			 * event callback mechanism.  Note: if we are unable
792 			 * to successfully quiesce the callbacks, then this is
793 			 * an indication that something has probably gone
794 			 * seriously wrong.  We print out a warning, but
795 			 * continue.
796 			 */
797 			tmp_ibtfpriv = state->hs_ibtfpriv;
798 			HERMON_QUIESCE_IBTF_CALLB(state);
799 			if (state->hs_in_evcallb != 0) {
800 				HERMON_WARNING(state, "unable to quiesce "
801 				    "Hermon IBTF callbacks");
802 			}
803 
804 			/* Complete the detach from the IBTF */
805 			ibc_detach(tmp_ibtfpriv);
806 		}
807 
808 		/* Remove the minor node for device */
809 		ddi_remove_minor_node(dip, "devctl");
810 
811 		/*
812 		 * Only call hermon_drv_fini() if we are in Hermon HCA mode.
813 		 * (Because if we are in "maintenance mode", then we never
814 		 * successfully finished init.)  Only report successful
815 		 * detach for normal HCA mode.
816 		 */
817 		if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
818 			/* Cleanup driver resources and shutdown hardware */
819 			hermon_drv_fini(state);
820 			cmn_err(CE_CONT, "!Hermon driver successfully "
821 			    "detached\n");
822 		}
823 
824 		hermon_drv_fini2(state);
825 		hermon_fm_fini(state);
826 		ddi_soft_state_free(hermon_statep, instance);
827 
828 		return (DDI_SUCCESS);
829 
830 	case DDI_SUSPEND:
831 		/* Add code here for DDI_SUSPEND XXX */
832 		return (DDI_FAILURE);
833 
834 	default:
835 		cmn_err(CE_WARN, "hermon_detach: unknown cmd (0x%x)\n", cmd);
836 		break;
837 	}
838 
839 	return (DDI_FAILURE);
840 }
841 
842 /*
843  * hermon_dma_attr_init()
844  *    Context: Can be called from interrupt or base context.
845  */
846 
847 /* ARGSUSED */
848 void
849 hermon_dma_attr_init(hermon_state_t *state, ddi_dma_attr_t *dma_attr)
850 {
851 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
852 
853 	dma_attr->dma_attr_version	= DMA_ATTR_V0;
854 	dma_attr->dma_attr_addr_lo	= 0;
855 	dma_attr->dma_attr_addr_hi	= 0xFFFFFFFFFFFFFFFFull;
856 	dma_attr->dma_attr_count_max	= 0xFFFFFFFFFFFFFFFFull;
857 	dma_attr->dma_attr_align	= HERMON_PAGESIZE;  /* default 4K */
858 	dma_attr->dma_attr_burstsizes	= 0x3FF;
859 	dma_attr->dma_attr_minxfer	= 1;
860 	dma_attr->dma_attr_maxxfer	= 0xFFFFFFFFFFFFFFFFull;
861 	dma_attr->dma_attr_seg		= 0xFFFFFFFFFFFFFFFFull;
862 	dma_attr->dma_attr_sgllen	= 0x7FFFFFFF;
863 	dma_attr->dma_attr_granular	= 1;
864 	dma_attr->dma_attr_flags	= 0;
865 }
866 
867 /*
868  * hermon_dma_alloc()
869  *    Context: Can be called from base context.
870  */
871 int
872 hermon_dma_alloc(hermon_state_t *state, hermon_dma_info_t *dma_info,
873     uint16_t opcode)
874 {
875 	ddi_dma_handle_t	dma_hdl;
876 	ddi_dma_attr_t		dma_attr;
877 	ddi_acc_handle_t	acc_hdl;
878 	ddi_dma_cookie_t	cookie;
879 	uint64_t		kaddr;
880 	uint64_t		real_len;
881 	uint_t			ccount;
882 	int			status;
883 
884 	hermon_dma_attr_init(state, &dma_attr);
885 #ifdef	__sparc
886 	if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
887 		dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
888 #endif
889 
890 	/* Allocate a DMA handle */
891 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, DDI_DMA_SLEEP,
892 	    NULL, &dma_hdl);
893 	if (status != DDI_SUCCESS) {
894 		IBTF_DPRINTF_L2("DMA", "alloc handle failed: %d", status);
895 		cmn_err(CE_CONT, "DMA alloc handle failed(status %d)", status);
896 		return (DDI_FAILURE);
897 	}
898 
899 	/* Allocate DMA memory */
900 	status = ddi_dma_mem_alloc(dma_hdl, dma_info->length,
901 	    &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
902 	    (caddr_t *)&kaddr, (size_t *)&real_len, &acc_hdl);
903 	if (status != DDI_SUCCESS) {
904 		ddi_dma_free_handle(&dma_hdl);
905 		IBTF_DPRINTF_L2("DMA", "memory alloc failed: %d", status);
906 		cmn_err(CE_CONT, "DMA memory alloc failed(status %d)", status);
907 		return (DDI_FAILURE);
908 	}
909 	bzero((caddr_t)(uintptr_t)kaddr, real_len);
910 
911 	/* Bind the memory to the handle */
912 	status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
913 	    (caddr_t)(uintptr_t)kaddr, (size_t)real_len, DDI_DMA_RDWR |
914 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &ccount);
915 	if (status != DDI_SUCCESS) {
916 		ddi_dma_mem_free(&acc_hdl);
917 		ddi_dma_free_handle(&dma_hdl);
918 		IBTF_DPRINTF_L2("DMA", "bind handle failed: %d", status);
919 		cmn_err(CE_CONT, "DMA bind handle failed(status %d)", status);
920 		return (DDI_FAILURE);
921 	}
922 
923 	/* Package the hermon_dma_info contents and return */
924 	dma_info->vaddr   = kaddr;
925 	dma_info->dma_hdl = dma_hdl;
926 	dma_info->acc_hdl = acc_hdl;
927 
928 	/* Pass the mapping information to the firmware */
929 	status = hermon_map_cmd_post(state, dma_info, opcode, cookie, ccount);
930 	if (status != DDI_SUCCESS) {
931 		char *s;
932 		hermon_dma_free(dma_info);
933 		switch (opcode) {
934 		case MAP_ICM:
935 			s = "MAP_ICM";
936 			break;
937 		case MAP_FA:
938 			s = "MAP_FA";
939 			break;
940 		case MAP_ICM_AUX:
941 			s = "MAP_ICM_AUX";
942 			break;
943 		default:
944 			s = "UNKNOWN";
945 		}
946 		cmn_err(CE_NOTE, "Map cmd '%s' failed, status %08x\n",
947 		    s, status);
948 		return (DDI_FAILURE);
949 	}
950 
951 	return (DDI_SUCCESS);
952 }
953 
954 /*
955  * hermon_dma_free()
956  *    Context: Can be called from base context.
957  */
958 void
959 hermon_dma_free(hermon_dma_info_t *info)
960 {
961 	/* Unbind the handles and free the memory */
962 	(void) ddi_dma_unbind_handle(info->dma_hdl);
963 	ddi_dma_mem_free(&info->acc_hdl);
964 	ddi_dma_free_handle(&info->dma_hdl);
965 }
966 
967 /* These macros are valid for use only in hermon_icm_alloc/hermon_icm_free. */
968 #define	HERMON_ICM_ALLOC(rsrc) \
969 	hermon_icm_alloc(state, rsrc, index1, index2)
970 #define	HERMON_ICM_FREE(rsrc) \
971 	hermon_icm_free(state, rsrc, index1, index2)
972 
973 /*
974  * hermon_icm_alloc()
975  *    Context: Can be called from base context.
976  *
977  * Only one thread can be here for a given hermon_rsrc_type_t "type".
978  *
979  * "num_to_hdl" is set if there is a need for lookups from resource
980  * number/index to resource handle.  This is needed for QPs/CQs/SRQs
981  * for the various affiliated events/errors.
982  */
983 int
984 hermon_icm_alloc(hermon_state_t *state, hermon_rsrc_type_t type,
985     uint32_t index1, uint32_t index2)
986 {
987 	hermon_icm_table_t	*icm;
988 	hermon_dma_info_t	*dma_info;
989 	uint8_t			*bitmap;
990 	int			status;
991 	int			num_to_hdl = 0;
992 
993 	if (hermon_verbose) {
994 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: rsrc_type (0x%x) "
995 		    "index1/2 (0x%x/0x%x)", type, index1, index2);
996 	}
997 
998 	icm = &state->hs_icm[type];
999 
1000 	switch (type) {
1001 	case HERMON_QPC:
1002 		status = HERMON_ICM_ALLOC(HERMON_CMPT_QPC);
1003 		if (status != DDI_SUCCESS) {
1004 			return (status);
1005 		}
1006 		status = HERMON_ICM_ALLOC(HERMON_RDB);
1007 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1008 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1009 			return (status);
1010 		}
1011 		status = HERMON_ICM_ALLOC(HERMON_ALTC);
1012 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1013 			HERMON_ICM_FREE(HERMON_RDB);
1014 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1015 			return (status);
1016 		}
1017 		status = HERMON_ICM_ALLOC(HERMON_AUXC);
1018 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1019 			HERMON_ICM_FREE(HERMON_ALTC);
1020 			HERMON_ICM_FREE(HERMON_RDB);
1021 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1022 			return (status);
1023 		}
1024 		num_to_hdl = 1;
1025 		break;
1026 	case HERMON_SRQC:
1027 		status = HERMON_ICM_ALLOC(HERMON_CMPT_SRQC);
1028 		if (status != DDI_SUCCESS) {
1029 			return (status);
1030 		}
1031 		num_to_hdl = 1;
1032 		break;
1033 	case HERMON_CQC:
1034 		status = HERMON_ICM_ALLOC(HERMON_CMPT_CQC);
1035 		if (status != DDI_SUCCESS) {
1036 			return (status);
1037 		}
1038 		num_to_hdl = 1;
1039 		break;
1040 	case HERMON_EQC:
1041 		status = HERMON_ICM_ALLOC(HERMON_CMPT_EQC);
1042 		if (status != DDI_SUCCESS) {	/* undo icm_alloc's */
1043 			return (status);
1044 		}
1045 		break;
1046 	}
1047 
1048 	/* ensure existence of bitmap and dmainfo, sets "dma_info" */
1049 	hermon_bitmap(bitmap, dma_info, icm, index1, num_to_hdl);
1050 
1051 	/* Set up the DMA handle for allocation and mapping */
1052 	dma_info += index2;
1053 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_info))
1054 	dma_info->length  = icm->span << icm->log_object_size;
1055 	dma_info->icmaddr = icm->icm_baseaddr +
1056 	    (((index1 << icm->split_shift) +
1057 	    (index2 << icm->span_shift)) << icm->log_object_size);
1058 
1059 	/* Allocate memory for the num_to_qp/cq/srq pointers */
1060 	if (num_to_hdl)
1061 		icm->num_to_hdl[index1][index2] =
1062 		    kmem_zalloc(HERMON_ICM_SPAN * sizeof (void *), KM_SLEEP);
1063 
1064 	if (hermon_verbose) {
1065 		IBTF_DPRINTF_L2("hermon", "alloc DMA: "
1066 		    "rsrc (0x%x) index (%x, %x) "
1067 		    "icm_addr/len (%llx/%x) bitmap %p", type, index1, index2,
1068 		    (longlong_t)dma_info->icmaddr, dma_info->length, bitmap);
1069 	}
1070 
1071 	/* Allocate and map memory for this span */
1072 	status = hermon_dma_alloc(state, dma_info, MAP_ICM);
1073 	if (status != DDI_SUCCESS) {
1074 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: DMA "
1075 		    "allocation failed, status 0x%x", status);
1076 		switch (type) {
1077 		case HERMON_QPC:
1078 			HERMON_ICM_FREE(HERMON_AUXC);
1079 			HERMON_ICM_FREE(HERMON_ALTC);
1080 			HERMON_ICM_FREE(HERMON_RDB);
1081 			HERMON_ICM_FREE(HERMON_CMPT_QPC);
1082 			break;
1083 		case HERMON_SRQC:
1084 			HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1085 			break;
1086 		case HERMON_CQC:
1087 			HERMON_ICM_FREE(HERMON_CMPT_CQC);
1088 			break;
1089 		case HERMON_EQC:
1090 			HERMON_ICM_FREE(HERMON_CMPT_EQC);
1091 			break;
1092 		}
1093 
1094 		return (DDI_FAILURE);
1095 	}
1096 	if (hermon_verbose) {
1097 		IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: mapping ICM: "
1098 		    "rsrc_type (0x%x) index (0x%x, 0x%x) alloc length (0x%x) "
1099 		    "icm_addr (0x%lx)", type, index1, index2, dma_info->length,
1100 		    dma_info->icmaddr);
1101 	}
1102 
1103 	/* Set the bit for this slot in the table bitmap */
1104 	HERMON_BMAP_BIT_SET(icm->icm_bitmap[index1], index2);
1105 
1106 	return (DDI_SUCCESS);
1107 }
1108 
1109 /*
1110  * hermon_icm_free()
1111  *    Context: Can be called from base context.
1112  *
1113  * ICM resources have been successfully returned from hermon_icm_alloc().
1114  * Associated dma_info is no longer in use.  Free the ICM backing memory.
1115  */
1116 void
1117 hermon_icm_free(hermon_state_t *state, hermon_rsrc_type_t type,
1118     uint32_t index1, uint32_t index2)
1119 {
1120 	hermon_icm_table_t	*icm;
1121 	hermon_dma_info_t	*dma_info;
1122 	int			status;
1123 
1124 	icm = &state->hs_icm[type];
1125 	ASSERT(icm->icm_dma[index1][index2].icm_refcnt == 0);
1126 
1127 	if (hermon_verbose) {
1128 		IBTF_DPRINTF_L2("hermon", "hermon_icm_free: rsrc_type (0x%x) "
1129 		    "index (0x%x, 0x%x)", type, index1, index2);
1130 	}
1131 
1132 	dma_info = icm->icm_dma[index1] + index2;
1133 
1134 	/* The following only happens if attach() is failing. */
1135 	if (dma_info == NULL)
1136 		return;
1137 
1138 	/* Unmap the ICM allocation, then free the backing DMA memory */
1139 	status = hermon_unmap_icm_cmd_post(state, dma_info);
1140 	if (status != DDI_SUCCESS) {
1141 		HERMON_WARNING(state, "UNMAP_ICM failure");
1142 	}
1143 	hermon_dma_free(dma_info);
1144 
1145 	/* Clear the bit in the ICM table bitmap */
1146 	HERMON_BMAP_BIT_CLR(icm->icm_bitmap[index1], index2);
1147 
1148 	switch (type) {
1149 	case HERMON_QPC:
1150 		HERMON_ICM_FREE(HERMON_AUXC);
1151 		HERMON_ICM_FREE(HERMON_ALTC);
1152 		HERMON_ICM_FREE(HERMON_RDB);
1153 		HERMON_ICM_FREE(HERMON_CMPT_QPC);
1154 		break;
1155 	case HERMON_SRQC:
1156 		HERMON_ICM_FREE(HERMON_CMPT_SRQC);
1157 		break;
1158 	case HERMON_CQC:
1159 		HERMON_ICM_FREE(HERMON_CMPT_CQC);
1160 		break;
1161 	case HERMON_EQC:
1162 		HERMON_ICM_FREE(HERMON_CMPT_EQC);
1163 		break;
1164 
1165 	}
1166 }
1167 
1168 
1169 /*
1170  * hermon_icm_num_to_hdl()
1171  *    Context: Can be called from base or interrupt context.
1172  *
1173  * Given an index of a resource, index through the sparsely allocated
1174  * arrays to find the pointer to its software handle.  Return NULL if
1175  * any of the arrays of pointers has been freed (should never happen).
1176  */
1177 void *
1178 hermon_icm_num_to_hdl(hermon_state_t *state, hermon_rsrc_type_t type,
1179     uint32_t idx)
1180 {
1181 	hermon_icm_table_t	*icm;
1182 	uint32_t		span_offset;
1183 	uint32_t		index1, index2;
1184 	void			***p1, **p2;
1185 
1186 	icm = &state->hs_icm[type];
1187 	hermon_index(index1, index2, idx, icm, span_offset);
1188 	p1 = icm->num_to_hdl[index1];
1189 	if (p1 == NULL) {
1190 		IBTF_DPRINTF_L2("hermon", "icm_num_to_hdl failed at level 1"
1191 		    ": rsrc_type %d, index 0x%x", type, idx);
1192 		return (NULL);
1193 	}
1194 	p2 = p1[index2];
1195 	if (p2 == NULL) {
1196 		IBTF_DPRINTF_L2("hermon", "icm_num_to_hdl failed at level 2"
1197 		    ": rsrc_type %d, index 0x%x", type, idx);
1198 		return (NULL);
1199 	}
1200 	return (p2[span_offset]);
1201 }
1202 
1203 /*
1204  * hermon_icm_set_num_to_hdl()
1205  *    Context: Can be called from base or interrupt context.
1206  *
1207  * Given an index of a resource, we index through the sparsely allocated
1208  * arrays to store the software handle, used by hermon_icm_num_to_hdl().
1209  * This function is used to both set and reset (set to NULL) the handle.
1210  * This table is allocated during ICM allocation for the given resource,
1211  * so its existence is a given, and the store location does not conflict
1212  * with any other stores to the table (no locking needed).
1213  */
1214 void
1215 hermon_icm_set_num_to_hdl(hermon_state_t *state, hermon_rsrc_type_t type,
1216     uint32_t idx, void *hdl)
1217 {
1218 	hermon_icm_table_t	*icm;
1219 	uint32_t		span_offset;
1220 	uint32_t		index1, index2;
1221 
1222 	icm = &state->hs_icm[type];
1223 	hermon_index(index1, index2, idx, icm, span_offset);
1224 	ASSERT((hdl == NULL) ^
1225 	    (icm->num_to_hdl[index1][index2][span_offset] == NULL));
1226 	icm->num_to_hdl[index1][index2][span_offset] = hdl;
1227 }
1228 
1229 /*
1230  * hermon_device_mode()
1231  *    Context: Can be called from base or interrupt context.
1232  *
1233  * Return HERMON_HCA_MODE for operational mode
1234  * Return HERMON_MAINTENANCE_MODE for maintenance mode
1235  * Return 0 otherwise
1236  *
1237  * A non-zero return for either operational or maintenance mode simplifies
1238  * one of the 2 uses of this function.
1239  */
1240 int
1241 hermon_device_mode(hermon_state_t *state)
1242 {
1243 	if (state->hs_vendor_id != PCI_VENID_MLX)
1244 		return (0);
1245 
1246 	switch (state->hs_device_id) {
1247 	case PCI_DEVID_HERMON_SDR:
1248 	case PCI_DEVID_HERMON_DDR:
1249 	case PCI_DEVID_HERMON_DDRG2:
1250 	case PCI_DEVID_HERMON_QDRG2:
1251 	case PCI_DEVID_HERMON_QDRG2V:
1252 		return (HERMON_HCA_MODE);
1253 	case PCI_DEVID_HERMON_MAINT:
1254 		return (HERMON_MAINTENANCE_MODE);
1255 	default:
1256 		return (0);
1257 	}
1258 }
1259 
1260 /*
1261  * hermon_drv_init()
1262  *    Context: Only called from attach() path context
1263  */
1264 /* ARGSUSED */
1265 static int
1266 hermon_drv_init(hermon_state_t *state, dev_info_t *dip, int instance)
1267 {
1268 	int	status;
1269 
1270 	/* Retrieve PCI device, vendor and rev IDs */
1271 	state->hs_vendor_id	 = HERMON_GET_VENDOR_ID(state->hs_dip);
1272 	state->hs_device_id	 = HERMON_GET_DEVICE_ID(state->hs_dip);
1273 	state->hs_revision_id	 = HERMON_GET_REVISION_ID(state->hs_dip);
1274 
1275 	/*
1276 	 * Check and set the operational mode of the device. If the driver is
1277 	 * bound to the Hermon device in "maintenance mode", then this generally
1278 	 * means that either the device has been specifically jumpered to
1279 	 * start in this mode or the firmware boot process has failed to
1280 	 * successfully load either the primary or the secondary firmware
1281 	 * image.
1282 	 */
1283 	state->hs_operational_mode = hermon_device_mode(state);
1284 	switch (state->hs_operational_mode) {
1285 	case HERMON_HCA_MODE:
1286 		state->hs_cfg_profile_setting = HERMON_CFG_MEMFREE;
1287 		break;
1288 	case HERMON_MAINTENANCE_MODE:
1289 		HERMON_FMANOTE(state, HERMON_FMA_MAINT);
1290 		state->hs_fm_degraded_reason = HCA_FW_MISC; /* not fw reason */
1291 		return (DDI_FAILURE);
1292 	default:
1293 		HERMON_FMANOTE(state, HERMON_FMA_PCIID);
1294 		HERMON_WARNING(state, "unexpected device type detected");
1295 		return (DDI_FAILURE);
1296 	}
1297 
1298 	/*
1299 	 * Initialize the Hermon hardware.
1300 	 *
1301 	 * Note:  If this routine returns an error, it is often a reasonably
1302 	 * good indication that something Hermon firmware-related has caused
1303 	 * the failure or some HW related errors have caused the failure.
1304 	 * (also there are few possibilities that SW (e.g. SW resource
1305 	 * shortage) can cause the failure, but the majority case is due to
1306 	 * either a firmware related error or a HW related one) In order to
1307 	 * give the user an opportunity (if desired) to update or reflash
1308 	 * the Hermon firmware image, we set "hs_operational_mode" flag
1309 	 * (described above) to indicate that we wish to enter maintenance
1310 	 * mode in case of the firmware-related issue.
1311 	 */
1312 	status = hermon_hw_init(state);
1313 	if (status != DDI_SUCCESS) {
1314 		cmn_err(CE_NOTE, "hermon%d: error during attach: %s", instance,
1315 		    state->hs_attach_buf);
1316 		return (DDI_FAILURE);
1317 	}
1318 
1319 	/*
1320 	 * Now that the ISR has been setup, arm all the EQs for event
1321 	 * generation.
1322 	 */
1323 
1324 	status = hermon_eq_arm_all(state);
1325 	if (status != DDI_SUCCESS) {
1326 		cmn_err(CE_NOTE, "EQ Arm All failed\n");
1327 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1328 		return (DDI_FAILURE);
1329 	}
1330 
1331 	/* test interrupts and event queues */
1332 	status = hermon_nop_post(state, 0x0, 0x0);
1333 	if (status != DDI_SUCCESS) {
1334 		cmn_err(CE_NOTE, "Interrupts/EQs failed\n");
1335 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1336 		return (DDI_FAILURE);
1337 	}
1338 
1339 	/* Initialize Hermon softstate */
1340 	status = hermon_soft_state_init(state);
1341 	if (status != DDI_SUCCESS) {
1342 		cmn_err(CE_NOTE, "Failed to init soft state\n");
1343 		hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1344 		return (DDI_FAILURE);
1345 	}
1346 
1347 	return (DDI_SUCCESS);
1348 }
1349 
1350 
1351 /*
1352  * hermon_drv_fini()
1353  *    Context: Only called from attach() and/or detach() path contexts
1354  */
1355 static void
1356 hermon_drv_fini(hermon_state_t *state)
1357 {
1358 	/* Cleanup Hermon softstate */
1359 	hermon_soft_state_fini(state);
1360 
1361 	/* Cleanup Hermon resources and shutdown hardware */
1362 	hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
1363 }
1364 
1365 
1366 /*
1367  * hermon_drv_fini2()
1368  *    Context: Only called from attach() and/or detach() path contexts
1369  */
1370 static void
1371 hermon_drv_fini2(hermon_state_t *state)
1372 {
1373 	if (state->hs_fm_poll_thread) {
1374 		ddi_periodic_delete(state->hs_fm_poll_thread);
1375 		state->hs_fm_poll_thread = NULL;
1376 	}
1377 
1378 	/* HERMON_DRV_CLEANUP_LEVEL1 */
1379 	if (state->hs_fm_cmdhdl) {
1380 		hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
1381 		state->hs_fm_cmdhdl = NULL;
1382 	}
1383 
1384 	if (state->hs_reg_cmdhdl) {
1385 		ddi_regs_map_free(&state->hs_reg_cmdhdl);
1386 		state->hs_reg_cmdhdl = NULL;
1387 	}
1388 
1389 	/* HERMON_DRV_CLEANUP_LEVEL0 */
1390 	if (state->hs_msix_tbl_entries) {
1391 		kmem_free(state->hs_msix_tbl_entries,
1392 		    state->hs_msix_tbl_size);
1393 		state->hs_msix_tbl_entries = NULL;
1394 	}
1395 
1396 	if (state->hs_msix_pba_entries) {
1397 		kmem_free(state->hs_msix_pba_entries,
1398 		    state->hs_msix_pba_size);
1399 		state->hs_msix_pba_entries = NULL;
1400 	}
1401 
1402 	if (state->hs_fm_msix_tblhdl) {
1403 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
1404 		state->hs_fm_msix_tblhdl = NULL;
1405 	}
1406 
1407 	if (state->hs_reg_msix_tblhdl) {
1408 		ddi_regs_map_free(&state->hs_reg_msix_tblhdl);
1409 		state->hs_reg_msix_tblhdl = NULL;
1410 	}
1411 
1412 	if (state->hs_fm_msix_pbahdl) {
1413 		hermon_regs_map_free(state, &state->hs_fm_msix_pbahdl);
1414 		state->hs_fm_msix_pbahdl = NULL;
1415 	}
1416 
1417 	if (state->hs_reg_msix_pbahdl) {
1418 		ddi_regs_map_free(&state->hs_reg_msix_pbahdl);
1419 		state->hs_reg_msix_pbahdl = NULL;
1420 	}
1421 
1422 	if (state->hs_fm_pcihdl) {
1423 		hermon_pci_config_teardown(state, &state->hs_fm_pcihdl);
1424 		state->hs_fm_pcihdl = NULL;
1425 	}
1426 
1427 	if (state->hs_reg_pcihdl) {
1428 		pci_config_teardown(&state->hs_reg_pcihdl);
1429 		state->hs_reg_pcihdl = NULL;
1430 	}
1431 }
1432 
1433 
1434 /*
1435  * hermon_isr_init()
1436  *    Context: Only called from attach() path context
1437  */
1438 static int
1439 hermon_isr_init(hermon_state_t *state)
1440 {
1441 	int	status;
1442 	int	intr;
1443 
1444 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1445 
1446 		/*
1447 		 * Add a handler for the interrupt or MSI
1448 		 */
1449 		status = ddi_intr_add_handler(state->hs_intrmsi_hdl[intr],
1450 		    hermon_isr, (caddr_t)state, (void *)(uintptr_t)intr);
1451 		if (status  != DDI_SUCCESS) {
1452 			return (DDI_FAILURE);
1453 		}
1454 
1455 		/*
1456 		 * Enable the software interrupt.  Note: depending on the value
1457 		 * returned in the capability flag, we have to call either
1458 		 * ddi_intr_block_enable() or ddi_intr_enable().
1459 		 */
1460 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1461 			status = ddi_intr_block_enable(
1462 			    &state->hs_intrmsi_hdl[intr], 1);
1463 			if (status != DDI_SUCCESS) {
1464 				return (DDI_FAILURE);
1465 			}
1466 		} else {
1467 			status = ddi_intr_enable(state->hs_intrmsi_hdl[intr]);
1468 			if (status != DDI_SUCCESS) {
1469 				return (DDI_FAILURE);
1470 			}
1471 		}
1472 	}
1473 
1474 	/*
1475 	 * Now that the ISR has been enabled, defer arm_all  EQs for event
1476 	 * generation until later, in case MSIX is enabled
1477 	 */
1478 	return (DDI_SUCCESS);
1479 }
1480 
1481 
1482 /*
1483  * hermon_isr_fini()
1484  *    Context: Only called from attach() and/or detach() path contexts
1485  */
1486 static void
1487 hermon_isr_fini(hermon_state_t *state)
1488 {
1489 	int	intr;
1490 
1491 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
1492 		/* Disable the software interrupt */
1493 		if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
1494 			(void) ddi_intr_block_disable(
1495 			    &state->hs_intrmsi_hdl[intr], 1);
1496 		} else {
1497 			(void) ddi_intr_disable(state->hs_intrmsi_hdl[intr]);
1498 		}
1499 
1500 		/*
1501 		 * Remove the software handler for the interrupt or MSI
1502 		 */
1503 		(void) ddi_intr_remove_handler(state->hs_intrmsi_hdl[intr]);
1504 	}
1505 }
1506 
1507 
1508 /*
1509  * Sum of ICM configured values:
1510  *     cMPT, dMPT, MTT, QPC, SRQC, RDB, CQC, ALTC, AUXC, EQC, MCG
1511  *
1512  */
1513 static uint64_t
1514 hermon_size_icm(hermon_state_t *state)
1515 {
1516 	hermon_hw_querydevlim_t	*devlim;
1517 	hermon_cfg_profile_t	*cfg;
1518 	uint64_t		num_cmpts, num_dmpts, num_mtts;
1519 	uint64_t		num_qpcs, num_srqc, num_rdbs;
1520 #ifndef HERMON_FW_WORKAROUND
1521 	uint64_t		num_auxc;
1522 #endif
1523 	uint64_t		num_cqcs, num_altc;
1524 	uint64_t		num_eqcs, num_mcgs;
1525 	uint64_t		size;
1526 
1527 	devlim = &state->hs_devlim;
1528 	cfg = state->hs_cfg_profile;
1529 	/* number of respective entries */
1530 	num_cmpts = (uint64_t)0x1 << cfg->cp_log_num_cmpt;
1531 	num_mtts = (uint64_t)0x1 << cfg->cp_log_num_mtt;
1532 	num_dmpts = (uint64_t)0x1 << cfg->cp_log_num_dmpt;
1533 	num_qpcs = (uint64_t)0x1 << cfg->cp_log_num_qp;
1534 	num_srqc = (uint64_t)0x1 << cfg->cp_log_num_srq;
1535 	num_rdbs = (uint64_t)0x1 << cfg->cp_log_num_rdb;
1536 	num_cqcs = (uint64_t)0x1 << cfg->cp_log_num_cq;
1537 	num_altc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1538 #ifndef HERMON_FW_WORKAROUND
1539 	num_auxc = (uint64_t)0x1 << cfg->cp_log_num_qp;
1540 #endif
1541 	num_eqcs = (uint64_t)0x1 << cfg->cp_log_num_eq;
1542 	num_mcgs = (uint64_t)0x1 << cfg->cp_log_num_mcg;
1543 
1544 	size =
1545 	    num_cmpts 	* devlim->cmpt_entry_sz +
1546 	    num_dmpts	* devlim->dmpt_entry_sz +
1547 	    num_mtts	* devlim->mtt_entry_sz +
1548 	    num_qpcs	* devlim->qpc_entry_sz +
1549 	    num_srqc	* devlim->srq_entry_sz +
1550 	    num_rdbs	* devlim->rdmardc_entry_sz +
1551 	    num_cqcs	* devlim->cqc_entry_sz +
1552 	    num_altc	* devlim->altc_entry_sz +
1553 #ifdef HERMON_FW_WORKAROUND
1554 	    0x80000000ull +
1555 #else
1556 	    num_auxc	* devlim->aux_entry_sz	+
1557 #endif
1558 	    num_eqcs	* devlim->eqc_entry_sz +
1559 	    num_mcgs	* HERMON_MCGMEM_SZ(state);
1560 	return (size);
1561 }
1562 
1563 
1564 /*
1565  * hermon_hw_init()
1566  *    Context: Only called from attach() path context
1567  */
1568 static int
1569 hermon_hw_init(hermon_state_t *state)
1570 {
1571 	hermon_drv_cleanup_level_t	cleanup;
1572 	sm_nodeinfo_t			nodeinfo;
1573 	uint64_t			clr_intr_offset;
1574 	int				status;
1575 	uint32_t			fw_size;	/* in page */
1576 	uint64_t			offset;
1577 
1578 	/* This is where driver initialization begins */
1579 	cleanup = HERMON_DRV_CLEANUP_LEVEL0;
1580 
1581 	/* Setup device access attributes */
1582 	state->hs_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V1;
1583 	state->hs_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1584 	state->hs_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1585 	state->hs_reg_accattr.devacc_attr_access = DDI_DEFAULT_ACC;
1586 
1587 	/* Setup fma-protected access attributes */
1588 	state->hs_fm_accattr.devacc_attr_version =
1589 	    hermon_devacc_attr_version(state);
1590 	state->hs_fm_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1591 	state->hs_fm_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1592 	/* set acc err protection type */
1593 	state->hs_fm_accattr.devacc_attr_access =
1594 	    hermon_devacc_attr_access(state);
1595 
1596 	/* Setup for PCI config read/write of HCA device */
1597 	status = hermon_pci_config_setup(state, &state->hs_fm_pcihdl);
1598 	if (status != DDI_SUCCESS) {
1599 		hermon_hw_fini(state, cleanup);
1600 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1601 		    "hw_init_PCI_config_space_regmap_fail");
1602 		/* This case is not the degraded one */
1603 		return (DDI_FAILURE);
1604 	}
1605 
1606 	/* Map PCI config space and MSI-X tables/pba */
1607 	hermon_set_msix_info(state);
1608 
1609 	/* Map in Hermon registers (CMD, UAR, MSIX) and setup offsets */
1610 	status = hermon_regs_map_setup(state, HERMON_CMD_BAR,
1611 	    &state->hs_reg_cmd_baseaddr, 0, 0, &state->hs_fm_accattr,
1612 	    &state->hs_fm_cmdhdl);
1613 	if (status != DDI_SUCCESS) {
1614 		hermon_hw_fini(state, cleanup);
1615 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1616 		    "hw_init_CMD_BAR_regmap_fail");
1617 		/* This case is not the degraded one */
1618 		return (DDI_FAILURE);
1619 	}
1620 
1621 	cleanup = HERMON_DRV_CLEANUP_LEVEL1;
1622 	/*
1623 	 * We defer UAR-BAR mapping until later.  Need to know if
1624 	 * blueflame mapping is to be done, and don't know that until after
1625 	 * we get the dev_caps, so do it right after that
1626 	 */
1627 
1628 	/*
1629 	 * There is a third BAR defined for Hermon - it is for MSIX
1630 	 *
1631 	 * Will need to explore it's possible need/use w/ Mellanox
1632 	 * [es] Temporary mapping maybe
1633 	 */
1634 
1635 #ifdef HERMON_SUPPORTS_MSIX_BAR
1636 	status = ddi_regs_map_setup(state->hs_dip, HERMON_MSIX_BAR,
1637 	    &state->hs_reg_msi_baseaddr, 0, 0, &state->hs_reg_accattr,
1638 	    &state->hs_reg_msihdl);
1639 	if (status != DDI_SUCCESS) {
1640 		hermon_hw_fini(state, cleanup);
1641 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1642 		    "hw_init_MSIX_BAR_regmap_fail");
1643 		/* This case is not the degraded one */
1644 		return (DDI_FAILURE);
1645 	}
1646 #endif
1647 
1648 	cleanup = HERMON_DRV_CLEANUP_LEVEL2;
1649 
1650 	/*
1651 	 * Save interesting registers away. The offsets of the first two
1652 	 * here (HCR and sw_reset) are detailed in the PRM, the others are
1653 	 * derived from values in the QUERY_FW output, so we'll save them
1654 	 * off later.
1655 	 */
1656 	/* Host Command Register (HCR) */
1657 	state->hs_cmd_regs.hcr = (hermon_hw_hcr_t *)
1658 	    ((uintptr_t)state->hs_reg_cmd_baseaddr + HERMON_CMD_HCR_OFFSET);
1659 	state->hs_cmd_toggle = 0;	/* initialize it for use */
1660 
1661 	/* Software Reset register (sw_reset) and semaphore */
1662 	state->hs_cmd_regs.sw_reset = (uint32_t *)
1663 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1664 	    HERMON_CMD_SW_RESET_OFFSET);
1665 	state->hs_cmd_regs.sw_semaphore = (uint32_t *)
1666 	    ((uintptr_t)state->hs_reg_cmd_baseaddr +
1667 	    HERMON_CMD_SW_SEMAPHORE_OFFSET);
1668 
1669 	/* make sure init'd before we start filling things in */
1670 	bzero(&state->hs_hcaparams, sizeof (struct hermon_hw_initqueryhca_s));
1671 
1672 	/* Initialize the Phase1 configuration profile */
1673 	status = hermon_cfg_profile_init_phase1(state);
1674 	if (status != DDI_SUCCESS) {
1675 		hermon_hw_fini(state, cleanup);
1676 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1677 		    "hw_init_cfginit1_fail");
1678 		/* This case is not the degraded one */
1679 		return (DDI_FAILURE);
1680 	}
1681 	cleanup = HERMON_DRV_CLEANUP_LEVEL3;
1682 
1683 	/* Do a software reset of the adapter to ensure proper state */
1684 	status = hermon_sw_reset(state);
1685 	if (status != HERMON_CMD_SUCCESS) {
1686 		hermon_hw_fini(state, cleanup);
1687 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1688 		    "hw_init_sw_reset_fail");
1689 		/* This case is not the degraded one */
1690 		return (DDI_FAILURE);
1691 	}
1692 
1693 	/* Initialize mailboxes */
1694 	status = hermon_rsrc_init_phase1(state);
1695 	if (status != DDI_SUCCESS) {
1696 		hermon_hw_fini(state, cleanup);
1697 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1698 		    "hw_init_rsrcinit1_fail");
1699 		/* This case is not the degraded one */
1700 		return (DDI_FAILURE);
1701 	}
1702 	cleanup = HERMON_DRV_CLEANUP_LEVEL4;
1703 
1704 	/* Post QUERY_FW */
1705 	status = hermon_cmn_query_cmd_post(state, QUERY_FW, 0, 0, &state->hs_fw,
1706 	    sizeof (hermon_hw_queryfw_t), HERMON_CMD_NOSLEEP_SPIN);
1707 	if (status != HERMON_CMD_SUCCESS) {
1708 		cmn_err(CE_NOTE, "QUERY_FW command failed: %08x\n", status);
1709 		hermon_hw_fini(state, cleanup);
1710 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1711 		    "hw_init_query_fw_cmd_fail");
1712 		/* This case is not the degraded one */
1713 		return (DDI_FAILURE);
1714 	}
1715 
1716 	/* Validate what/that HERMON FW version is appropriate */
1717 
1718 	status = hermon_fw_version_check(state);
1719 	if (status != DDI_SUCCESS) {
1720 		HERMON_FMANOTE(state, HERMON_FMA_FWVER);
1721 		if (state->hs_operational_mode == HERMON_HCA_MODE) {
1722 			cmn_err(CE_CONT, "Unsupported Hermon FW version: "
1723 			    "expected: %04d.%04d.%04d, "
1724 			    "actual: %04d.%04d.%04d\n",
1725 			    HERMON_FW_VER_MAJOR,
1726 			    HERMON_FW_VER_MINOR,
1727 			    HERMON_FW_VER_SUBMINOR,
1728 			    state->hs_fw.fw_rev_major,
1729 			    state->hs_fw.fw_rev_minor,
1730 			    state->hs_fw.fw_rev_subminor);
1731 		} else {
1732 			cmn_err(CE_CONT, "Unsupported FW version: "
1733 			    "%04d.%04d.%04d\n",
1734 			    state->hs_fw.fw_rev_major,
1735 			    state->hs_fw.fw_rev_minor,
1736 			    state->hs_fw.fw_rev_subminor);
1737 		}
1738 		state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1739 		state->hs_fm_degraded_reason = HCA_FW_MISMATCH;
1740 		hermon_hw_fini(state, cleanup);
1741 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1742 		    "hw_init_checkfwver_fail");
1743 		/* This case is the degraded one */
1744 		return (HERMON_CMD_BAD_NVMEM);
1745 	}
1746 
1747 	/*
1748 	 * Save off the rest of the interesting registers that we'll be using.
1749 	 * Setup the offsets for the other registers.
1750 	 */
1751 
1752 	/*
1753 	 * Hermon does the intr_offset from the BAR - technically should get the
1754 	 * BAR info from the response, but PRM says it's from BAR0-1, which is
1755 	 * for us the CMD BAR
1756 	 */
1757 
1758 	clr_intr_offset	 = state->hs_fw.clr_intr_offs & HERMON_CMD_OFFSET_MASK;
1759 
1760 	/* Save Clear Interrupt address */
1761 	state->hs_cmd_regs.clr_intr = (uint64_t *)
1762 	    (uintptr_t)(state->hs_reg_cmd_baseaddr + clr_intr_offset);
1763 
1764 	/*
1765 	 * Set the error buffer also into the structure - used in hermon_event.c
1766 	 * to check for internal error on the HCA, not reported in eqe or
1767 	 * (necessarily) by interrupt
1768 	 */
1769 	state->hs_cmd_regs.fw_err_buf = (uint32_t *)(uintptr_t)
1770 	    (state->hs_reg_cmd_baseaddr + state->hs_fw.error_buf_addr);
1771 
1772 	/*
1773 	 * Invoke a polling thread to check the error buffer periodically.
1774 	 */
1775 	if (!hermon_no_inter_err_chk) {
1776 		state->hs_fm_poll_thread = ddi_periodic_add(
1777 		    hermon_inter_err_chk, (void *)state, FM_POLL_INTERVAL,
1778 		    DDI_IPL_0);
1779 	}
1780 
1781 	cleanup = HERMON_DRV_CLEANUP_LEVEL5;
1782 
1783 	/*
1784 	 * Allocate, map, and run the HCA Firmware.
1785 	 */
1786 
1787 	/* Allocate memory for the firmware to load into and map it */
1788 
1789 	/* get next higher power of 2 */
1790 	fw_size = 1 << highbit(state->hs_fw.fw_pages);
1791 	state->hs_fw_dma.length = fw_size << HERMON_PAGESHIFT;
1792 	status = hermon_dma_alloc(state, &state->hs_fw_dma, MAP_FA);
1793 	if (status != DDI_SUCCESS) {
1794 		cmn_err(CE_NOTE, "FW alloc failed\n");
1795 		hermon_hw_fini(state, cleanup);
1796 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1797 		    "hw_init_dma_alloc_fw_fail");
1798 		/* This case is not the degraded one */
1799 		return (DDI_FAILURE);
1800 	}
1801 
1802 	cleanup = HERMON_DRV_CLEANUP_LEVEL6;
1803 
1804 	/* Invoke the RUN_FW cmd to run the firmware */
1805 	status = hermon_run_fw_cmd_post(state);
1806 	if (status != DDI_SUCCESS) {
1807 		cmn_err(CE_NOTE, "RUN_FW command failed: 0x%08x\n", status);
1808 		if (status == HERMON_CMD_BAD_NVMEM) {
1809 			state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
1810 			state->hs_fm_degraded_reason = HCA_FW_CORRUPT;
1811 		}
1812 		hermon_hw_fini(state, cleanup);
1813 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_run_fw_fail");
1814 		/*
1815 		 * If the status is HERMON_CMD_BAD_NVMEM, it's likely the
1816 		 * firmware is corrupted, so the mode falls into the
1817 		 * maintenance mode.
1818 		 */
1819 		return (status == HERMON_CMD_BAD_NVMEM ? HERMON_CMD_BAD_NVMEM :
1820 		    DDI_FAILURE);
1821 	}
1822 
1823 
1824 	/*
1825 	 * QUERY DEVICE LIMITS/CAPABILITIES
1826 	 * NOTE - in Hermon, the command is changed to QUERY_DEV_CAP,
1827 	 * but for familiarity we have kept the structure name the
1828 	 * same as Tavor/Arbel
1829 	 */
1830 
1831 	status = hermon_cmn_query_cmd_post(state, QUERY_DEV_CAP, 0, 0,
1832 	    &state->hs_devlim, sizeof (hermon_hw_querydevlim_t),
1833 	    HERMON_CMD_NOSLEEP_SPIN);
1834 	if (status != HERMON_CMD_SUCCESS) {
1835 		cmn_err(CE_NOTE, "QUERY_DEV_CAP command failed: 0x%08x\n",
1836 		    status);
1837 		hermon_hw_fini(state, cleanup);
1838 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_devcap_fail");
1839 		/* This case is not the degraded one */
1840 		return (DDI_FAILURE);
1841 	}
1842 
1843 	state->hs_rsvd_eqs = max(state->hs_devlim.num_rsvd_eq,
1844 	    (4 * state->hs_devlim.num_rsvd_uar));
1845 
1846 	/* now we have enough info to map in the UAR BAR */
1847 	/*
1848 	 * First, we figure out how to map the BAR for UAR - use only half if
1849 	 * BlueFlame is enabled - in that case the mapped length is 1/2 the
1850 	 * log_max_uar_sz (max__uar - 1) * 1MB ( +20).
1851 	 */
1852 
1853 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1854 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1855 	} else {
1856 		offset = 0;	/* a zero length means map the whole thing */
1857 	}
1858 	status = hermon_regs_map_setup(state, HERMON_UAR_BAR,
1859 	    &state->hs_reg_uar_baseaddr, 0, offset, &state->hs_fm_accattr,
1860 	    &state->hs_fm_uarhdl);
1861 	if (status != DDI_SUCCESS) {
1862 		HERMON_ATTACH_MSG(state->hs_attach_buf, "UAR BAR mapping");
1863 		/* This case is not the degraded one */
1864 		return (DDI_FAILURE);
1865 	}
1866 
1867 	/* and if BlueFlame is enabled, map the other half there */
1868 	if (state->hs_devlim.blu_flm) {		/* Blue Flame Enabled */
1869 		offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
1870 		status = ddi_regs_map_setup(state->hs_dip, HERMON_UAR_BAR,
1871 		    &state->hs_reg_bf_baseaddr, offset, offset,
1872 		    &state->hs_reg_accattr, &state->hs_reg_bfhdl);
1873 		if (status != DDI_SUCCESS) {
1874 			HERMON_ATTACH_MSG(state->hs_attach_buf,
1875 			    "BlueFlame BAR mapping");
1876 			/* This case is not the degraded one */
1877 			return (DDI_FAILURE);
1878 		}
1879 		/* This will be used in hw_fini if we fail to init. */
1880 		state->hs_bf_offset = offset;
1881 	}
1882 	cleanup = HERMON_DRV_CLEANUP_LEVEL7;
1883 
1884 	/* Hermon has a couple of things needed for phase 2 in query port */
1885 
1886 	status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0, 0x01,
1887 	    &state->hs_queryport, sizeof (hermon_hw_query_port_t),
1888 	    HERMON_CMD_NOSLEEP_SPIN);
1889 	if (status != HERMON_CMD_SUCCESS) {
1890 		cmn_err(CE_NOTE, "QUERY_PORT command failed: 0x%08x\n",
1891 		    status);
1892 		hermon_hw_fini(state, cleanup);
1893 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1894 		    "hw_init_queryport_fail");
1895 		/* This case is not the degraded one */
1896 		return (DDI_FAILURE);
1897 	}
1898 
1899 	/* Initialize the Phase2 Hermon configuration profile */
1900 	status = hermon_cfg_profile_init_phase2(state);
1901 	if (status != DDI_SUCCESS) {
1902 		cmn_err(CE_NOTE, "CFG phase 2 failed: 0x%08x\n", status);
1903 		hermon_hw_fini(state, cleanup);
1904 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1905 		    "hw_init_cfginit2_fail");
1906 		/* This case is not the degraded one */
1907 		return (DDI_FAILURE);
1908 	}
1909 
1910 	/* Determine and set the ICM size */
1911 	state->hs_icm_sz = hermon_size_icm(state);
1912 	status		 = hermon_set_icm_size_cmd_post(state);
1913 	if (status != DDI_SUCCESS) {
1914 		cmn_err(CE_NOTE, "Hermon: SET_ICM_SIZE cmd failed: 0x%08x\n",
1915 		    status);
1916 		hermon_hw_fini(state, cleanup);
1917 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1918 		    "hw_init_seticmsz_fail");
1919 		/* This case is not the degraded one */
1920 		return (DDI_FAILURE);
1921 	}
1922 	/* alloc icm aux physical memory and map it */
1923 
1924 	state->hs_icma_dma.length = 1 << highbit(state->hs_icma_sz);
1925 
1926 	status = hermon_dma_alloc(state, &state->hs_icma_dma, MAP_ICM_AUX);
1927 	if (status != DDI_SUCCESS) {
1928 		cmn_err(CE_NOTE, "failed to alloc (0x%llx) bytes for ICMA\n",
1929 		    (longlong_t)state->hs_icma_dma.length);
1930 		hermon_hw_fini(state, cleanup);
1931 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1932 		    "hw_init_dma_alloc_icm_aux_fail");
1933 		/* This case is not the degraded one */
1934 		return (DDI_FAILURE);
1935 	}
1936 	cleanup = HERMON_DRV_CLEANUP_LEVEL8;
1937 
1938 	cleanup = HERMON_DRV_CLEANUP_LEVEL9;
1939 
1940 	/* Allocate an array of structures to house the ICM tables */
1941 	state->hs_icm = kmem_zalloc(HERMON_NUM_ICM_RESOURCES *
1942 	    sizeof (hermon_icm_table_t), KM_SLEEP);
1943 
1944 	/* Set up the ICM address space and the INIT_HCA command input */
1945 	status = hermon_icm_config_setup(state, &state->hs_hcaparams);
1946 	if (status != HERMON_CMD_SUCCESS) {
1947 		cmn_err(CE_NOTE, "ICM configuration failed\n");
1948 		hermon_hw_fini(state, cleanup);
1949 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1950 		    "hw_init_icm_config_setup_fail");
1951 		/* This case is not the degraded one */
1952 		return (DDI_FAILURE);
1953 	}
1954 	cleanup = HERMON_DRV_CLEANUP_LEVEL10;
1955 
1956 	/* Initialize the adapter with the INIT_HCA cmd */
1957 	status = hermon_init_hca_cmd_post(state, &state->hs_hcaparams,
1958 	    HERMON_CMD_NOSLEEP_SPIN);
1959 	if (status != HERMON_CMD_SUCCESS) {
1960 		cmn_err(CE_NOTE, "INIT_HCA command failed: %08x\n", status);
1961 		hermon_hw_fini(state, cleanup);
1962 		HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_hca_fail");
1963 		/* This case is not the degraded one */
1964 		return (DDI_FAILURE);
1965 	}
1966 	cleanup = HERMON_DRV_CLEANUP_LEVEL11;
1967 
1968 	/* Enter the second phase of init for Hermon configuration/resources */
1969 	status = hermon_rsrc_init_phase2(state);
1970 	if (status != DDI_SUCCESS) {
1971 		hermon_hw_fini(state, cleanup);
1972 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1973 		    "hw_init_rsrcinit2_fail");
1974 		/* This case is not the degraded one */
1975 		return (DDI_FAILURE);
1976 	}
1977 	cleanup = HERMON_DRV_CLEANUP_LEVEL12;
1978 
1979 	/* Query the adapter via QUERY_ADAPTER */
1980 	status = hermon_cmn_query_cmd_post(state, QUERY_ADAPTER, 0, 0,
1981 	    &state->hs_adapter, sizeof (hermon_hw_queryadapter_t),
1982 	    HERMON_CMD_NOSLEEP_SPIN);
1983 	if (status != HERMON_CMD_SUCCESS) {
1984 		cmn_err(CE_NOTE, "Hermon: QUERY_ADAPTER command failed: %08x\n",
1985 		    status);
1986 		hermon_hw_fini(state, cleanup);
1987 		HERMON_ATTACH_MSG(state->hs_attach_buf,
1988 		    "hw_init_query_adapter_fail");
1989 		/* This case is not the degraded one */
1990 		return (DDI_FAILURE);
1991 	}
1992 
1993 	/* Allocate protection domain (PD) for Hermon internal use */
1994 	status = hermon_pd_alloc(state, &state->hs_pdhdl_internal,
1995 	    HERMON_SLEEP);
1996 	if (status != DDI_SUCCESS) {
1997 		cmn_err(CE_NOTE, "failed to alloc internal PD\n");
1998 		hermon_hw_fini(state, cleanup);
1999 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2000 		    "hw_init_internal_pd_alloc_fail");
2001 		/* This case is not the degraded one */
2002 		return (DDI_FAILURE);
2003 	}
2004 	cleanup = HERMON_DRV_CLEANUP_LEVEL13;
2005 
2006 	/* Setup UAR page for kernel use */
2007 	status = hermon_internal_uarpg_init(state);
2008 	if (status != DDI_SUCCESS) {
2009 		cmn_err(CE_NOTE, "failed to setup internal UAR\n");
2010 		hermon_hw_fini(state, cleanup);
2011 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2012 		    "hw_init_internal_uarpg_alloc_fail");
2013 		/* This case is not the degraded one */
2014 		return (DDI_FAILURE);
2015 	}
2016 	cleanup = HERMON_DRV_CLEANUP_LEVEL14;
2017 
2018 	/* Query and initialize the Hermon interrupt/MSI information */
2019 	status = hermon_intr_or_msi_init(state);
2020 	if (status != DDI_SUCCESS) {
2021 		cmn_err(CE_NOTE, "failed to setup INTR/MSI\n");
2022 		hermon_hw_fini(state, cleanup);
2023 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2024 		    "hw_init_intr_or_msi_init_fail");
2025 		/* This case is not the degraded one */
2026 		return (DDI_FAILURE);
2027 	}
2028 	cleanup = HERMON_DRV_CLEANUP_LEVEL15;
2029 
2030 	status = hermon_isr_init(state);	/* set up the isr */
2031 	if (status != DDI_SUCCESS) {
2032 		cmn_err(CE_NOTE, "failed to init isr\n");
2033 		hermon_hw_fini(state, cleanup);
2034 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2035 		    "hw_init_isrinit_fail");
2036 		/* This case is not the degraded one */
2037 		return (DDI_FAILURE);
2038 	}
2039 	cleanup = HERMON_DRV_CLEANUP_LEVEL16;
2040 
2041 	/* Setup the event queues */
2042 	status = hermon_eq_init_all(state);
2043 	if (status != DDI_SUCCESS) {
2044 		cmn_err(CE_NOTE, "failed to init EQs\n");
2045 		hermon_hw_fini(state, cleanup);
2046 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2047 		    "hw_init_eqinitall_fail");
2048 		/* This case is not the degraded one */
2049 		return (DDI_FAILURE);
2050 	}
2051 	cleanup = HERMON_DRV_CLEANUP_LEVEL17;
2052 
2053 
2054 
2055 	/* Reserve contexts for QP0 and QP1 */
2056 	status = hermon_special_qp_contexts_reserve(state);
2057 	if (status != DDI_SUCCESS) {
2058 		cmn_err(CE_NOTE, "failed to init special QPs\n");
2059 		hermon_hw_fini(state, cleanup);
2060 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2061 		    "hw_init_rsrv_sqp_fail");
2062 		/* This case is not the degraded one */
2063 		return (DDI_FAILURE);
2064 	}
2065 	cleanup = HERMON_DRV_CLEANUP_LEVEL18;
2066 
2067 	/* Initialize for multicast group handling */
2068 	status = hermon_mcg_init(state);
2069 	if (status != DDI_SUCCESS) {
2070 		cmn_err(CE_NOTE, "failed to init multicast\n");
2071 		hermon_hw_fini(state, cleanup);
2072 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2073 		    "hw_init_mcg_init_fail");
2074 		/* This case is not the degraded one */
2075 		return (DDI_FAILURE);
2076 	}
2077 	cleanup = HERMON_DRV_CLEANUP_LEVEL19;
2078 
2079 	/* Initialize the Hermon IB port(s) */
2080 	status = hermon_hca_port_init(state);
2081 	if (status != DDI_SUCCESS) {
2082 		cmn_err(CE_NOTE, "failed to init HCA Port\n");
2083 		hermon_hw_fini(state, cleanup);
2084 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2085 		    "hw_init_hca_port_init_fail");
2086 		/* This case is not the degraded one */
2087 		return (DDI_FAILURE);
2088 	}
2089 
2090 	cleanup = HERMON_DRV_CLEANUP_ALL;
2091 
2092 	/* Determine NodeGUID and SystemImageGUID */
2093 	status = hermon_getnodeinfo_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
2094 	    &nodeinfo);
2095 	if (status != HERMON_CMD_SUCCESS) {
2096 		cmn_err(CE_NOTE, "GetNodeInfo command failed: %08x\n", status);
2097 		hermon_hw_fini(state, cleanup);
2098 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2099 		    "hw_init_getnodeinfo_cmd_fail");
2100 		/* This case is not the degraded one */
2101 		return (DDI_FAILURE);
2102 	}
2103 
2104 	/*
2105 	 * If the NodeGUID value was set in OBP properties, then we use that
2106 	 * value.  But we still print a message if the value we queried from
2107 	 * firmware does not match this value.
2108 	 *
2109 	 * Otherwise if OBP value is not set then we use the value from
2110 	 * firmware unconditionally.
2111 	 */
2112 	if (state->hs_cfg_profile->cp_nodeguid) {
2113 		state->hs_nodeguid   = state->hs_cfg_profile->cp_nodeguid;
2114 	} else {
2115 		state->hs_nodeguid = nodeinfo.NodeGUID;
2116 	}
2117 
2118 	if (state->hs_nodeguid != nodeinfo.NodeGUID) {
2119 		cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
2120 		    "does not match value set by device property");
2121 	}
2122 
2123 	/*
2124 	 * If the SystemImageGUID value was set in OBP properties, then we use
2125 	 * that value.  But we still print a message if the value we queried
2126 	 * from firmware does not match this value.
2127 	 *
2128 	 * Otherwise if OBP value is not set then we use the value from
2129 	 * firmware unconditionally.
2130 	 */
2131 	if (state->hs_cfg_profile->cp_sysimgguid) {
2132 		state->hs_sysimgguid = state->hs_cfg_profile->cp_sysimgguid;
2133 	} else {
2134 		state->hs_sysimgguid = nodeinfo.SystemImageGUID;
2135 	}
2136 
2137 	if (state->hs_sysimgguid != nodeinfo.SystemImageGUID) {
2138 		cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
2139 		    "does not match value set by device property");
2140 	}
2141 
2142 	/* Get NodeDescription */
2143 	status = hermon_getnodedesc_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
2144 	    (sm_nodedesc_t *)&state->hs_nodedesc);
2145 	if (status != HERMON_CMD_SUCCESS) {
2146 		cmn_err(CE_CONT, "GetNodeDesc command failed: %08x\n", status);
2147 		hermon_hw_fini(state, cleanup);
2148 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2149 		    "hw_init_getnodedesc_cmd_fail");
2150 		/* This case is not the degraded one */
2151 		return (DDI_FAILURE);
2152 	}
2153 
2154 	return (DDI_SUCCESS);
2155 }
2156 
2157 
2158 /*
2159  * hermon_hw_fini()
2160  *    Context: Only called from attach() and/or detach() path contexts
2161  */
2162 static void
2163 hermon_hw_fini(hermon_state_t *state, hermon_drv_cleanup_level_t cleanup)
2164 {
2165 	uint_t		num_ports;
2166 	int		i, status;
2167 
2168 
2169 	/*
2170 	 * JBDB - We might not want to run these returns in all cases of
2171 	 * Bad News. We should still attempt to free all of the DMA memory
2172 	 * resources...  This needs to be worked last, after all allocations
2173 	 * are implemented. For now, and possibly for later, this works.
2174 	 */
2175 
2176 	switch (cleanup) {
2177 	/*
2178 	 * If we add more driver initialization steps that should be cleaned
2179 	 * up here, we need to ensure that HERMON_DRV_CLEANUP_ALL is still the
2180 	 * first entry (i.e. corresponds to the last init step).
2181 	 */
2182 	case HERMON_DRV_CLEANUP_ALL:
2183 		/* Shutdown the Hermon IB port(s) */
2184 		num_ports = state->hs_cfg_profile->cp_num_ports;
2185 		(void) hermon_hca_ports_shutdown(state, num_ports);
2186 		/* FALLTHROUGH */
2187 
2188 	case HERMON_DRV_CLEANUP_LEVEL19:
2189 		/* Teardown resources used for multicast group handling */
2190 		hermon_mcg_fini(state);
2191 		/* FALLTHROUGH */
2192 
2193 	case HERMON_DRV_CLEANUP_LEVEL18:
2194 		/* Unreserve the special QP contexts */
2195 		hermon_special_qp_contexts_unreserve(state);
2196 		/* FALLTHROUGH */
2197 
2198 	case HERMON_DRV_CLEANUP_LEVEL17:
2199 		/*
2200 		 * Attempt to teardown all event queues (EQ).  If we fail
2201 		 * here then print a warning message and return.  Something
2202 		 * (either in HW or SW) has gone seriously wrong.
2203 		 */
2204 		status = hermon_eq_fini_all(state);
2205 		if (status != DDI_SUCCESS) {
2206 			HERMON_WARNING(state, "failed to teardown EQs");
2207 			return;
2208 		}
2209 		/* FALLTHROUGH */
2210 	case HERMON_DRV_CLEANUP_LEVEL16:
2211 		/* Teardown Hermon interrupts */
2212 		hermon_isr_fini(state);
2213 		/* FALLTHROUGH */
2214 
2215 	case HERMON_DRV_CLEANUP_LEVEL15:
2216 		status = hermon_intr_or_msi_fini(state);
2217 		if (status != DDI_SUCCESS) {
2218 			HERMON_WARNING(state, "failed to free intr/MSI");
2219 			return;
2220 		}
2221 		/* FALLTHROUGH */
2222 
2223 	case HERMON_DRV_CLEANUP_LEVEL14:
2224 		/* Free the resources for the Hermon internal UAR pages */
2225 		hermon_internal_uarpg_fini(state);
2226 		/* FALLTHROUGH */
2227 
2228 	case HERMON_DRV_CLEANUP_LEVEL13:
2229 		/*
2230 		 * Free the PD that was used internally by Hermon software.  If
2231 		 * we fail here then print a warning and return.  Something
2232 		 * (probably software-related, but perhaps HW) has gone wrong.
2233 		 */
2234 		status = hermon_pd_free(state, &state->hs_pdhdl_internal);
2235 		if (status != DDI_SUCCESS) {
2236 			HERMON_WARNING(state, "failed to free internal PD");
2237 			return;
2238 		}
2239 		/* FALLTHROUGH */
2240 
2241 	case HERMON_DRV_CLEANUP_LEVEL12:
2242 		/* Cleanup all the phase2 resources first */
2243 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_ALL);
2244 		/* FALLTHROUGH */
2245 
2246 	case HERMON_DRV_CLEANUP_LEVEL11:
2247 		/* LEVEL11 is after INIT_HCA */
2248 		/* FALLTHROUGH */
2249 
2250 
2251 	case HERMON_DRV_CLEANUP_LEVEL10:
2252 		/*
2253 		 * Unmap the ICM memory area with UNMAP_ICM command.
2254 		 */
2255 		status = hermon_unmap_icm_cmd_post(state, NULL);
2256 		if (status != DDI_SUCCESS) {
2257 			cmn_err(CE_WARN,
2258 			    "hermon_hw_fini: failed to unmap ICM\n");
2259 		}
2260 
2261 		/* Free the initial ICM DMA handles */
2262 		hermon_icm_dma_fini(state);
2263 
2264 		/* Free the ICM table structures */
2265 		hermon_icm_tables_fini(state);
2266 
2267 		/* Free the ICM table handles */
2268 		kmem_free(state->hs_icm, HERMON_NUM_ICM_RESOURCES *
2269 		    sizeof (hermon_icm_table_t));
2270 
2271 		/* FALLTHROUGH */
2272 
2273 	case HERMON_DRV_CLEANUP_LEVEL9:
2274 		/*
2275 		 * Unmap the ICM Aux memory area with UNMAP_ICM_AUX command.
2276 		 */
2277 		status = hermon_unmap_icm_aux_cmd_post(state);
2278 		if (status != HERMON_CMD_SUCCESS) {
2279 			cmn_err(CE_NOTE,
2280 			    "hermon_hw_fini: failed to unmap ICMA\n");
2281 		}
2282 		/* FALLTHROUGH */
2283 
2284 	case HERMON_DRV_CLEANUP_LEVEL8:
2285 		/*
2286 		 * Deallocate ICM Aux DMA memory.
2287 		 */
2288 		hermon_dma_free(&state->hs_icma_dma);
2289 		/* FALLTHROUGH */
2290 
2291 	case HERMON_DRV_CLEANUP_LEVEL7:
2292 		if (state->hs_fm_uarhdl) {
2293 			hermon_regs_map_free(state, &state->hs_fm_uarhdl);
2294 			state->hs_fm_uarhdl = NULL;
2295 		}
2296 
2297 		if (state->hs_reg_uarhdl) {
2298 			ddi_regs_map_free(&state->hs_reg_uarhdl);
2299 			state->hs_reg_uarhdl = NULL;
2300 		}
2301 
2302 		if (state->hs_bf_offset != 0 && state->hs_reg_bfhdl) {
2303 			ddi_regs_map_free(&state->hs_reg_bfhdl);
2304 			state->hs_reg_bfhdl = NULL;
2305 		}
2306 
2307 		for (i = 0; i < HERMON_MAX_PORTS; i++) {
2308 			if (state->hs_pkey[i]) {
2309 				kmem_free(state->hs_pkey[i], (1 <<
2310 				    state->hs_cfg_profile->cp_log_max_pkeytbl) *
2311 				    sizeof (ib_pkey_t));
2312 				state->hs_pkey[i] = NULL;
2313 			}
2314 			if (state->hs_guid[i]) {
2315 				kmem_free(state->hs_guid[i], (1 <<
2316 				    state->hs_cfg_profile->cp_log_max_gidtbl) *
2317 				    sizeof (ib_guid_t));
2318 				state->hs_guid[i] = NULL;
2319 			}
2320 		}
2321 		/* FALLTHROUGH */
2322 
2323 	case HERMON_DRV_CLEANUP_LEVEL6:
2324 		/*
2325 		 * Unmap the firmware memory area with UNMAP_FA command.
2326 		 */
2327 		status = hermon_unmap_fa_cmd_post(state);
2328 
2329 		if (status != HERMON_CMD_SUCCESS) {
2330 			cmn_err(CE_NOTE,
2331 			    "hermon_hw_fini: failed to unmap FW\n");
2332 		}
2333 
2334 		/*
2335 		 * Deallocate firmware DMA memory.
2336 		 */
2337 		hermon_dma_free(&state->hs_fw_dma);
2338 		/* FALLTHROUGH */
2339 
2340 	case HERMON_DRV_CLEANUP_LEVEL5:
2341 		/* stop the poll thread */
2342 		if (state->hs_fm_poll_thread) {
2343 			ddi_periodic_delete(state->hs_fm_poll_thread);
2344 			state->hs_fm_poll_thread = NULL;
2345 		}
2346 		/* FALLTHROUGH */
2347 
2348 	case HERMON_DRV_CLEANUP_LEVEL4:
2349 		/* Then cleanup the phase1 resources */
2350 		hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_PHASE1_COMPLETE);
2351 		/* FALLTHROUGH */
2352 
2353 	case HERMON_DRV_CLEANUP_LEVEL3:
2354 		/* Teardown any resources allocated for the config profile */
2355 		hermon_cfg_profile_fini(state);
2356 		/* FALLTHROUGH */
2357 
2358 	case HERMON_DRV_CLEANUP_LEVEL2:
2359 #ifdef HERMON_SUPPORTS_MSIX_BAR
2360 		/*
2361 		 * unmap 3rd BAR, MSIX BAR
2362 		 */
2363 		if (state->hs_reg_msihdl) {
2364 			ddi_regs_map_free(&state->hs_reg_msihdl);
2365 			state->hs_reg_msihdl = NULL;
2366 		}
2367 		/* FALLTHROUGH */
2368 #endif
2369 	case HERMON_DRV_CLEANUP_LEVEL1:
2370 	case HERMON_DRV_CLEANUP_LEVEL0:
2371 		/*
2372 		 * LEVEL1 and LEVEL0 resources are freed in
2373 		 * hermon_drv_fini2().
2374 		 */
2375 		break;
2376 
2377 	default:
2378 		HERMON_WARNING(state, "unexpected driver cleanup level");
2379 		return;
2380 	}
2381 }
2382 
2383 
2384 /*
2385  * hermon_soft_state_init()
2386  *    Context: Only called from attach() path context
2387  */
2388 static int
2389 hermon_soft_state_init(hermon_state_t *state)
2390 {
2391 	ibt_hca_attr_t		*hca_attr;
2392 	uint64_t		maxval, val;
2393 	ibt_hca_flags_t		caps = IBT_HCA_NO_FLAGS;
2394 	ibt_hca_flags2_t	caps2 = IBT_HCA2_NO_FLAGS;
2395 	int			status;
2396 	int			max_send_wqe_bytes;
2397 	int			max_recv_wqe_bytes;
2398 
2399 	/*
2400 	 * The ibc_hca_info_t struct is passed to the IBTF.  This is the
2401 	 * routine where we initialize it.  Many of the init values come from
2402 	 * either configuration variables or successful queries of the Hermon
2403 	 * hardware abilities
2404 	 */
2405 	state->hs_ibtfinfo.hca_ci_vers	= IBCI_V4;
2406 	state->hs_ibtfinfo.hca_handle	= (ibc_hca_hdl_t)state;
2407 	state->hs_ibtfinfo.hca_ops	= &hermon_ibc_ops;
2408 
2409 	hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
2410 	state->hs_ibtfinfo.hca_attr = hca_attr;
2411 
2412 	hca_attr->hca_dip = state->hs_dip;
2413 	hca_attr->hca_fw_major_version = state->hs_fw.fw_rev_major;
2414 	hca_attr->hca_fw_minor_version = state->hs_fw.fw_rev_minor;
2415 	hca_attr->hca_fw_micro_version = state->hs_fw.fw_rev_subminor;
2416 
2417 	/* CQ interrupt moderation maximums - each limited to 16 bits */
2418 	hca_attr->hca_max_cq_mod_count = 0xFFFF;
2419 	hca_attr->hca_max_cq_mod_usec = 0xFFFF;
2420 	hca_attr->hca_max_cq_handlers = state->hs_intrmsi_allocd;
2421 
2422 
2423 	/*
2424 	 * Determine HCA capabilities:
2425 	 * No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
2426 	 *    IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
2427 	 *    or IBT_HCA_SHUTDOWN_PORT
2428 	 * But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
2429 	 *    IBT_HCA_RNR_NAK, IBT_HCA_CURRENT_QP_STATE, IBT_HCA_PORT_UP,
2430 	 *    IBT_HCA_SRQ, IBT_HCA_RESIZE_SRQ and IBT_HCA_FMR are always
2431 	 *    supported
2432 	 * All other features are conditionally supported, depending on the
2433 	 *    status return by the Hermon HCA in QUERY_DEV_LIM.
2434 	 */
2435 	if (state->hs_devlim.ud_multi) {
2436 		caps |= IBT_HCA_UD_MULTICAST;
2437 	}
2438 	if (state->hs_devlim.atomic) {
2439 		caps |= IBT_HCA_ATOMICS_HCA;
2440 	}
2441 	if (state->hs_devlim.apm) {
2442 		caps |= IBT_HCA_AUTO_PATH_MIG;
2443 	}
2444 	if (state->hs_devlim.pkey_v) {
2445 		caps |= IBT_HCA_PKEY_CNTR;
2446 	}
2447 	if (state->hs_devlim.qkey_v) {
2448 		caps |= IBT_HCA_QKEY_CNTR;
2449 	}
2450 	if (state->hs_devlim.ipoib_cksm) {
2451 		caps |= IBT_HCA_CKSUM_FULL;
2452 		caps2 |= IBT_HCA2_IP_CLASS;
2453 	}
2454 	if (state->hs_devlim.mod_wr_srq) {
2455 		caps |= IBT_HCA_RESIZE_SRQ;
2456 	}
2457 	if (state->hs_devlim.lif) {
2458 		caps |= IBT_HCA_LOCAL_INVAL_FENCE;
2459 	}
2460 	if (state->hs_devlim.reserved_lkey) {
2461 		caps2 |= IBT_HCA2_RES_LKEY;
2462 		hca_attr->hca_reserved_lkey = state->hs_devlim.rsv_lkey;
2463 	}
2464 	if (state->hs_devlim.local_inv && state->hs_devlim.remote_inv &&
2465 	    state->hs_devlim.fast_reg_wr) {	/* fw needs to be >= 2.7.000 */
2466 		if ((state->hs_fw.fw_rev_major > 2) ||
2467 		    ((state->hs_fw.fw_rev_major == 2) &&
2468 		    (state->hs_fw.fw_rev_minor >= 7)))
2469 			caps2 |= IBT_HCA2_MEM_MGT_EXT;
2470 	}
2471 	if (state->hs_devlim.log_max_rss_tbl_sz) {
2472 		hca_attr->hca_rss_max_log2_table =
2473 		    state->hs_devlim.log_max_rss_tbl_sz;
2474 		if (state->hs_devlim.rss_xor)
2475 			caps2 |= IBT_HCA2_RSS_XOR_ALG;
2476 		if (state->hs_devlim.rss_toep)
2477 			caps2 |= IBT_HCA2_RSS_TPL_ALG;
2478 	}
2479 	if (state->hs_devlim.mps) {
2480 		caps |= IBT_HCA_ZERO_BASED_VA;
2481 	}
2482 	if (state->hs_devlim.zb) {
2483 		caps |= IBT_HCA_MULT_PAGE_SZ_MR;
2484 	}
2485 	caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
2486 	    IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
2487 	    IBT_HCA_PORT_UP | IBT_HCA_RC_SRQ | IBT_HCA_UD_SRQ | IBT_HCA_FMR);
2488 	caps2 |= IBT_HCA2_DMA_MR;
2489 
2490 	if (state->hs_devlim.log_max_gso_sz) {
2491 		hca_attr->hca_max_lso_size =
2492 		    (1 << state->hs_devlim.log_max_gso_sz);
2493 		/* 64 = ctrl & datagram seg, 4 = LSO seg, 16 = 1 SGL */
2494 		hca_attr->hca_max_lso_hdr_size =
2495 		    state->hs_devlim.max_desc_sz_sq - (64 + 4 + 16);
2496 	}
2497 
2498 	caps |= IBT_HCA_WQE_SIZE_INFO;
2499 	max_send_wqe_bytes = state->hs_devlim.max_desc_sz_sq;
2500 	max_recv_wqe_bytes = state->hs_devlim.max_desc_sz_rq;
2501 	hca_attr->hca_ud_send_sgl_sz = (max_send_wqe_bytes / 16) - 4;
2502 	hca_attr->hca_conn_send_sgl_sz = (max_send_wqe_bytes / 16) - 1;
2503 	hca_attr->hca_conn_rdma_sgl_overhead = 1;
2504 	hca_attr->hca_conn_rdma_write_sgl_sz = (max_send_wqe_bytes / 16) - 2;
2505 	hca_attr->hca_conn_rdma_read_sgl_sz = (512 / 16) - 2; /* see PRM */
2506 	hca_attr->hca_recv_sgl_sz = max_recv_wqe_bytes / 16;
2507 
2508 	/* We choose not to support "inline" unless it improves performance */
2509 	hca_attr->hca_max_inline_size = 0;
2510 	hca_attr->hca_ud_send_inline_sz = 0;
2511 	hca_attr->hca_conn_send_inline_sz = 0;
2512 	hca_attr->hca_conn_rdmaw_inline_overhead = 4;
2513 
2514 #if defined(_ELF64)
2515 	/* 32-bit kernels are too small for Fibre Channel over IB */
2516 	if (state->hs_devlim.fcoib && (caps2 & IBT_HCA2_MEM_MGT_EXT)) {
2517 		caps2 |= IBT_HCA2_FC;
2518 		hca_attr->hca_rfci_max_log2_qp = 7;	/* 128 per port */
2519 		hca_attr->hca_fexch_max_log2_qp = 16;	/* 64K per port */
2520 		hca_attr->hca_fexch_max_log2_mem = 20;	/* 1MB per MPT */
2521 	}
2522 #endif
2523 
2524 	hca_attr->hca_flags = caps;
2525 	hca_attr->hca_flags2 = caps2;
2526 
2527 	/*
2528 	 * Set hca_attr's IDs
2529 	 */
2530 	hca_attr->hca_vendor_id	 = state->hs_vendor_id;
2531 	hca_attr->hca_device_id	 = state->hs_device_id;
2532 	hca_attr->hca_version_id = state->hs_revision_id;
2533 
2534 	/*
2535 	 * Determine number of available QPs and max QP size.  Number of
2536 	 * available QPs is determined by subtracting the number of
2537 	 * "reserved QPs" (i.e. reserved for firmware use) from the
2538 	 * total number configured.
2539 	 */
2540 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2541 	hca_attr->hca_max_qp = val - ((uint64_t)1 <<
2542 	    state->hs_devlim.log_rsvd_qp);
2543 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_qp_sz);
2544 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_qp_sz);
2545 	if (val > maxval) {
2546 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2547 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2548 		    "soft_state_init_maxqpsz_toobig_fail");
2549 		return (DDI_FAILURE);
2550 	}
2551 	/* we need to reduce this by the max space needed for headroom */
2552 	hca_attr->hca_max_qp_sz = (uint_t)val - (HERMON_QP_OH_SIZE >>
2553 	    HERMON_QP_WQE_LOG_MINIMUM) - 1;
2554 
2555 	/*
2556 	 * Determine max scatter-gather size in WQEs. The HCA has split
2557 	 * the max sgl into rec'v Q and send Q values. Use the least.
2558 	 *
2559 	 * This is mainly useful for legacy clients.  Smart clients
2560 	 * such as IPoIB will use the IBT_HCA_WQE_SIZE_INFO sgl info.
2561 	 */
2562 	if (state->hs_devlim.max_sg_rq <= state->hs_devlim.max_sg_sq) {
2563 		maxval = state->hs_devlim.max_sg_rq;
2564 	} else {
2565 		maxval = state->hs_devlim.max_sg_sq;
2566 	}
2567 	val	= state->hs_cfg_profile->cp_wqe_max_sgl;
2568 	if (val > maxval) {
2569 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2570 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2571 		    "soft_state_init_toomanysgl_fail");
2572 		return (DDI_FAILURE);
2573 	}
2574 	/* If the rounded value for max SGL is too large, cap it */
2575 	if (state->hs_cfg_profile->cp_wqe_real_max_sgl > maxval) {
2576 		state->hs_cfg_profile->cp_wqe_real_max_sgl = (uint32_t)maxval;
2577 		val = maxval;
2578 	} else {
2579 		val = state->hs_cfg_profile->cp_wqe_real_max_sgl;
2580 	}
2581 
2582 	hca_attr->hca_max_sgl	 = (uint_t)val;
2583 	hca_attr->hca_max_rd_sgl = 0;	/* zero because RD is unsupported */
2584 
2585 	/*
2586 	 * Determine number of available CQs and max CQ size. Number of
2587 	 * available CQs is determined by subtracting the number of
2588 	 * "reserved CQs" (i.e. reserved for firmware use) from the
2589 	 * total number configured.
2590 	 */
2591 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_cq);
2592 	hca_attr->hca_max_cq = val - ((uint64_t)1 <<
2593 	    state->hs_devlim.log_rsvd_cq);
2594 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_cq_sz);
2595 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_cq_sz) - 1;
2596 	if (val > maxval) {
2597 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2598 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2599 		    "soft_state_init_maxcqsz_toobig_fail");
2600 		return (DDI_FAILURE);
2601 	}
2602 	hca_attr->hca_max_cq_sz = (uint_t)val;
2603 
2604 	/*
2605 	 * Determine number of available SRQs and max SRQ size. Number of
2606 	 * available SRQs is determined by subtracting the number of
2607 	 * "reserved SRQs" (i.e. reserved for firmware use) from the
2608 	 * total number configured.
2609 	 */
2610 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_srq);
2611 	hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
2612 	    state->hs_devlim.log_rsvd_srq);
2613 	maxval  = ((uint64_t)1 << state->hs_devlim.log_max_srq_sz);
2614 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_srq_sz);
2615 
2616 	if (val > maxval) {
2617 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2618 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2619 		    "soft_state_init_maxsrqsz_toobig_fail");
2620 		return (DDI_FAILURE);
2621 	}
2622 	hca_attr->hca_max_srqs_sz = (uint_t)val;
2623 
2624 	val	= hca_attr->hca_recv_sgl_sz - 1; /* SRQ has a list link */
2625 	maxval	= state->hs_devlim.max_sg_rq - 1;
2626 	if (val > maxval) {
2627 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2628 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2629 		    "soft_state_init_toomanysrqsgl_fail");
2630 		return (DDI_FAILURE);
2631 	}
2632 	hca_attr->hca_max_srq_sgl = (uint_t)val;
2633 
2634 	/*
2635 	 * Determine supported HCA page sizes
2636 	 * XXX
2637 	 * For now we simply return the system pagesize as the only supported
2638 	 * pagesize
2639 	 */
2640 	hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
2641 	    IBT_PAGE_4K);
2642 
2643 	/*
2644 	 * Determine number of available MemReg, MemWin, and their max size.
2645 	 * Number of available MRs and MWs is determined by subtracting
2646 	 * the number of "reserved MPTs" (i.e. reserved for firmware use)
2647 	 * from the total number configured for each.
2648 	 */
2649 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_dmpt);
2650 	hca_attr->hca_max_memr	  = val - ((uint64_t)1 <<
2651 	    state->hs_devlim.log_rsvd_dmpt);
2652 	hca_attr->hca_max_mem_win = state->hs_devlim.mem_win ? (val -
2653 	    ((uint64_t)1 << state->hs_devlim.log_rsvd_dmpt)) : 0;
2654 	maxval	= state->hs_devlim.log_max_mrw_sz;
2655 	val	= state->hs_cfg_profile->cp_log_max_mrw_sz;
2656 	if (val > maxval) {
2657 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2658 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2659 		    "soft_state_init_maxmrwsz_toobig_fail");
2660 		return (DDI_FAILURE);
2661 	}
2662 	hca_attr->hca_max_memr_len = ((uint64_t)1 << val);
2663 
2664 	/* Determine RDMA/Atomic properties */
2665 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_rdb);
2666 	hca_attr->hca_max_rsc = (uint_t)val;
2667 	val = state->hs_cfg_profile->cp_hca_max_rdma_in_qp;
2668 	hca_attr->hca_max_rdma_in_qp  = (uint8_t)val;
2669 	val = state->hs_cfg_profile->cp_hca_max_rdma_out_qp;
2670 	hca_attr->hca_max_rdma_out_qp = (uint8_t)val;
2671 	hca_attr->hca_max_rdma_in_ee  = 0;
2672 	hca_attr->hca_max_rdma_out_ee = 0;
2673 
2674 	/*
2675 	 * Determine maximum number of raw IPv6 and Ether QPs.  Set to 0
2676 	 * because neither type of raw QP is supported
2677 	 */
2678 	hca_attr->hca_max_ipv6_qp  = 0;
2679 	hca_attr->hca_max_ether_qp = 0;
2680 
2681 	/* Determine max number of MCGs and max QP-per-MCG */
2682 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
2683 	hca_attr->hca_max_mcg_qps   = (uint_t)val;
2684 	val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_mcg);
2685 	hca_attr->hca_max_mcg	    = (uint_t)val;
2686 	val = state->hs_cfg_profile->cp_num_qp_per_mcg;
2687 	hca_attr->hca_max_qp_per_mcg = (uint_t)val;
2688 
2689 	/* Determine max number partitions (i.e. PKeys) */
2690 	maxval	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2691 	    state->hs_queryport.log_max_pkey);
2692 	val	= ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
2693 	    state->hs_cfg_profile->cp_log_max_pkeytbl);
2694 
2695 	if (val > maxval) {
2696 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2697 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2698 		    "soft_state_init_toomanypkey_fail");
2699 		return (DDI_FAILURE);
2700 	}
2701 	hca_attr->hca_max_partitions = (uint16_t)val;
2702 
2703 	/* Determine number of ports */
2704 	maxval = state->hs_devlim.num_ports;
2705 	val = state->hs_cfg_profile->cp_num_ports;
2706 	if ((val > maxval) || (val == 0)) {
2707 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2708 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2709 		    "soft_state_init_toomanyports_fail");
2710 		return (DDI_FAILURE);
2711 	}
2712 	hca_attr->hca_nports = (uint8_t)val;
2713 
2714 	/* Copy NodeGUID and SystemImageGUID from softstate */
2715 	hca_attr->hca_node_guid = state->hs_nodeguid;
2716 	hca_attr->hca_si_guid	= state->hs_sysimgguid;
2717 
2718 	/*
2719 	 * Determine local ACK delay.  Use the value suggested by the Hermon
2720 	 * hardware (from the QUERY_DEV_CAP command)
2721 	 */
2722 	hca_attr->hca_local_ack_delay = state->hs_devlim.ca_ack_delay;
2723 
2724 	/* Determine max SGID table and PKey table sizes */
2725 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_gidtbl);
2726 	hca_attr->hca_max_port_sgid_tbl_sz = (uint_t)val;
2727 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_pkeytbl);
2728 	hca_attr->hca_max_port_pkey_tbl_sz = (uint16_t)val;
2729 
2730 	/* Determine max number of PDs */
2731 	maxval	= ((uint64_t)1 << state->hs_devlim.log_max_pd);
2732 	val	= ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_pd);
2733 	if (val > maxval) {
2734 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2735 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2736 		    "soft_state_init_toomanypd_fail");
2737 		return (DDI_FAILURE);
2738 	}
2739 	hca_attr->hca_max_pd = (uint_t)val;
2740 
2741 	/* Determine max number of Address Handles (NOT IN ARBEL or HERMON) */
2742 	hca_attr->hca_max_ah = 0;
2743 
2744 	/* No RDDs or EECs (since Reliable Datagram is not supported) */
2745 	hca_attr->hca_max_rdd = 0;
2746 	hca_attr->hca_max_eec = 0;
2747 
2748 	/* Initialize lock for reserved UAR page access */
2749 	mutex_init(&state->hs_uar_lock, NULL, MUTEX_DRIVER,
2750 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2751 
2752 	/* Initialize the flash fields */
2753 	state->hs_fw_flashstarted = 0;
2754 	mutex_init(&state->hs_fw_flashlock, NULL, MUTEX_DRIVER,
2755 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2756 
2757 	/* Initialize the lock for the info ioctl */
2758 	mutex_init(&state->hs_info_lock, NULL, MUTEX_DRIVER,
2759 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
2760 
2761 	/* Initialize the AVL tree for QP number support */
2762 	hermon_qpn_avl_init(state);
2763 
2764 	/* Initialize the cq_sched info structure */
2765 	status = hermon_cq_sched_init(state);
2766 	if (status != DDI_SUCCESS) {
2767 		hermon_qpn_avl_fini(state);
2768 		mutex_destroy(&state->hs_info_lock);
2769 		mutex_destroy(&state->hs_fw_flashlock);
2770 		mutex_destroy(&state->hs_uar_lock);
2771 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2772 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2773 		    "soft_state_init_cqsched_init_fail");
2774 		return (DDI_FAILURE);
2775 	}
2776 
2777 	/* Initialize the fcoib info structure */
2778 	status = hermon_fcoib_init(state);
2779 	if (status != DDI_SUCCESS) {
2780 		hermon_cq_sched_fini(state);
2781 		hermon_qpn_avl_fini(state);
2782 		mutex_destroy(&state->hs_info_lock);
2783 		mutex_destroy(&state->hs_fw_flashlock);
2784 		mutex_destroy(&state->hs_uar_lock);
2785 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2786 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2787 		    "soft_state_init_fcoibinit_fail");
2788 		return (DDI_FAILURE);
2789 	}
2790 
2791 	/* Initialize the kstat info structure */
2792 	status = hermon_kstat_init(state);
2793 	if (status != DDI_SUCCESS) {
2794 		hermon_fcoib_fini(state);
2795 		hermon_cq_sched_fini(state);
2796 		hermon_qpn_avl_fini(state);
2797 		mutex_destroy(&state->hs_info_lock);
2798 		mutex_destroy(&state->hs_fw_flashlock);
2799 		mutex_destroy(&state->hs_uar_lock);
2800 		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
2801 		HERMON_ATTACH_MSG(state->hs_attach_buf,
2802 		    "soft_state_init_kstatinit_fail");
2803 		return (DDI_FAILURE);
2804 	}
2805 
2806 	return (DDI_SUCCESS);
2807 }
2808 
2809 
2810 /*
2811  * hermon_soft_state_fini()
2812  *    Context: Called only from detach() path context
2813  */
2814 static void
2815 hermon_soft_state_fini(hermon_state_t *state)
2816 {
2817 
2818 	/* Teardown the kstat info */
2819 	hermon_kstat_fini(state);
2820 
2821 	/* Teardown the fcoib info */
2822 	hermon_fcoib_fini(state);
2823 
2824 	/* Teardown the cq_sched info */
2825 	hermon_cq_sched_fini(state);
2826 
2827 	/* Teardown the AVL tree for QP number support */
2828 	hermon_qpn_avl_fini(state);
2829 
2830 	/* Free up info ioctl mutex */
2831 	mutex_destroy(&state->hs_info_lock);
2832 
2833 	/* Free up flash mutex */
2834 	mutex_destroy(&state->hs_fw_flashlock);
2835 
2836 	/* Free up the UAR page access mutex */
2837 	mutex_destroy(&state->hs_uar_lock);
2838 
2839 	/* Free up the hca_attr struct */
2840 	kmem_free(state->hs_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));
2841 
2842 }
2843 
2844 /*
2845  * hermon_icm_config_setup()
2846  *    Context: Only called from attach() path context
2847  */
2848 static int
2849 hermon_icm_config_setup(hermon_state_t *state,
2850     hermon_hw_initqueryhca_t *inithca)
2851 {
2852 	hermon_hw_querydevlim_t	*devlim;
2853 	hermon_cfg_profile_t	*cfg;
2854 	hermon_icm_table_t	*icm_p[HERMON_NUM_ICM_RESOURCES];
2855 	hermon_icm_table_t	*icm;
2856 	hermon_icm_table_t	*tmp;
2857 	uint64_t		icm_addr;
2858 	uint64_t		icm_size;
2859 	int			status, i, j;
2860 
2861 
2862 	/* Bring in local devlims, cfg_profile and hs_icm table list */
2863 	devlim = &state->hs_devlim;
2864 	cfg = state->hs_cfg_profile;
2865 	icm = state->hs_icm;
2866 
2867 	/*
2868 	 * Assign each ICM table's entry size from data in the devlims,
2869 	 * except for RDB and MCG sizes, which are not returned in devlims
2870 	 * but do have a fixed size, and the UAR context entry size, which
2871 	 * we determine. For this, we use the "cp_num_pgs_per_uce" value
2872 	 * from our hs_cfg_profile.
2873 	 */
2874 	icm[HERMON_CMPT].object_size	= devlim->cmpt_entry_sz;
2875 	icm[HERMON_CMPT_QPC].object_size	= devlim->cmpt_entry_sz;
2876 	icm[HERMON_CMPT_SRQC].object_size	= devlim->cmpt_entry_sz;
2877 	icm[HERMON_CMPT_CQC].object_size	= devlim->cmpt_entry_sz;
2878 	icm[HERMON_CMPT_EQC].object_size	= devlim->cmpt_entry_sz;
2879 	icm[HERMON_MTT].object_size	= devlim->mtt_entry_sz;
2880 	icm[HERMON_DMPT].object_size	= devlim->dmpt_entry_sz;
2881 	icm[HERMON_QPC].object_size	= devlim->qpc_entry_sz;
2882 	icm[HERMON_CQC].object_size	= devlim->cqc_entry_sz;
2883 	icm[HERMON_SRQC].object_size	= devlim->srq_entry_sz;
2884 	icm[HERMON_EQC].object_size	= devlim->eqc_entry_sz;
2885 	icm[HERMON_RDB].object_size	= devlim->rdmardc_entry_sz *
2886 	    cfg->cp_hca_max_rdma_in_qp;
2887 	icm[HERMON_MCG].object_size	= HERMON_MCGMEM_SZ(state);
2888 	icm[HERMON_ALTC].object_size	= devlim->altc_entry_sz;
2889 	icm[HERMON_AUXC].object_size	= devlim->aux_entry_sz;
2890 
2891 	/* Assign each ICM table's log2 number of entries */
2892 	icm[HERMON_CMPT].log_num_entries = cfg->cp_log_num_cmpt;
2893 	icm[HERMON_CMPT_QPC].log_num_entries = cfg->cp_log_num_qp;
2894 	icm[HERMON_CMPT_SRQC].log_num_entries = cfg->cp_log_num_srq;
2895 	icm[HERMON_CMPT_CQC].log_num_entries = cfg->cp_log_num_cq;
2896 	icm[HERMON_CMPT_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
2897 	icm[HERMON_MTT].log_num_entries	= cfg->cp_log_num_mtt;
2898 	icm[HERMON_DMPT].log_num_entries = cfg->cp_log_num_dmpt;
2899 	icm[HERMON_QPC].log_num_entries	= cfg->cp_log_num_qp;
2900 	icm[HERMON_SRQC].log_num_entries = cfg->cp_log_num_srq;
2901 	icm[HERMON_CQC].log_num_entries	= cfg->cp_log_num_cq;
2902 	icm[HERMON_EQC].log_num_entries	= HERMON_NUM_EQ_SHIFT;
2903 	icm[HERMON_RDB].log_num_entries	= cfg->cp_log_num_qp;
2904 	icm[HERMON_MCG].log_num_entries	= cfg->cp_log_num_mcg;
2905 	icm[HERMON_ALTC].log_num_entries = cfg->cp_log_num_qp;
2906 	icm[HERMON_AUXC].log_num_entries = cfg->cp_log_num_qp;
2907 
2908 	/* Initialize the ICM tables */
2909 	hermon_icm_tables_init(state);
2910 
2911 	/*
2912 	 * ICM tables must be aligned on their size in the ICM address
2913 	 * space. So, here we order the tables from largest total table
2914 	 * size to the smallest. All tables are a power of 2 in size, so
2915 	 * this will ensure that all tables are aligned on their own size
2916 	 * without wasting space in the ICM.
2917 	 *
2918 	 * In order to easily set the ICM addresses without needing to
2919 	 * worry about the ordering of our table indices as relates to
2920 	 * the hermon_rsrc_type_t enum, we will use a list of pointers
2921 	 * representing the tables for the sort, then assign ICM addresses
2922 	 * below using it.
2923 	 */
2924 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2925 		icm_p[i] = &icm[i];
2926 	}
2927 	for (i = HERMON_NUM_ICM_RESOURCES; i > 0; i--) {
2928 		switch (i) {
2929 		case HERMON_CMPT_QPC:
2930 		case HERMON_CMPT_SRQC:
2931 		case HERMON_CMPT_CQC:
2932 		case HERMON_CMPT_EQC:
2933 			continue;
2934 		}
2935 		for (j = 1; j < i; j++) {
2936 			if (icm_p[j]->table_size > icm_p[j - 1]->table_size) {
2937 				tmp		= icm_p[j];
2938 				icm_p[j]	= icm_p[j - 1];
2939 				icm_p[j - 1]	= tmp;
2940 			}
2941 		}
2942 	}
2943 
2944 	/* Initialize the ICM address and ICM size */
2945 	icm_addr = icm_size = 0;
2946 
2947 	/*
2948 	 * Set the ICM base address of each table, using our sorted
2949 	 * list of pointers from above.
2950 	 */
2951 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
2952 		j = icm_p[i]->icm_type;
2953 		switch (j) {
2954 		case HERMON_CMPT_QPC:
2955 		case HERMON_CMPT_SRQC:
2956 		case HERMON_CMPT_CQC:
2957 		case HERMON_CMPT_EQC:
2958 			continue;
2959 		}
2960 		if (icm[j].table_size) {
2961 			/*
2962 			 * Set the ICM base address in the table, save the
2963 			 * ICM offset in the rsrc pool and increment the
2964 			 * total ICM allocation.
2965 			 */
2966 			icm[j].icm_baseaddr = icm_addr;
2967 			if (hermon_verbose) {
2968 				IBTF_DPRINTF_L2("ICMADDR", "rsrc %x @ %p"
2969 				    " size %llx", j, icm[j].icm_baseaddr,
2970 				    icm[j].table_size);
2971 			}
2972 			icm_size += icm[j].table_size;
2973 		}
2974 
2975 		/* Verify that we don't exceed maximum ICM size */
2976 		if (icm_size > devlim->max_icm_size) {
2977 			/* free the ICM table memory resources */
2978 			hermon_icm_tables_fini(state);
2979 			cmn_err(CE_WARN, "ICM configuration exceeds maximum "
2980 			    "configuration: max (0x%lx) requested (0x%lx)\n",
2981 			    (ulong_t)devlim->max_icm_size, (ulong_t)icm_size);
2982 			HERMON_ATTACH_MSG(state->hs_attach_buf,
2983 			    "icm_config_toobig_fail");
2984 			return (DDI_FAILURE);
2985 		}
2986 
2987 		/* assign address to the 4 pieces of the CMPT */
2988 		if (j == HERMON_CMPT) {
2989 			uint64_t cmpt_size = icm[j].table_size >> 2;
2990 #define	init_cmpt_icm_baseaddr(rsrc, indx)				\
2991 	icm[rsrc].icm_baseaddr	= icm_addr + (indx * cmpt_size);
2992 			init_cmpt_icm_baseaddr(HERMON_CMPT_QPC, 0);
2993 			init_cmpt_icm_baseaddr(HERMON_CMPT_SRQC, 1);
2994 			init_cmpt_icm_baseaddr(HERMON_CMPT_CQC, 2);
2995 			init_cmpt_icm_baseaddr(HERMON_CMPT_EQC, 3);
2996 		}
2997 
2998 		/* Increment the ICM address for the next table */
2999 		icm_addr += icm[j].table_size;
3000 	}
3001 
3002 	/* Populate the structure for the INIT_HCA command */
3003 	hermon_inithca_set(state, inithca);
3004 
3005 	/*
3006 	 * Prior to invoking INIT_HCA, we must have ICM memory in place
3007 	 * for the reserved objects in each table. We will allocate and map
3008 	 * this initial ICM memory here. Note that given the assignment
3009 	 * of span_size above, tables that are smaller or equal in total
3010 	 * size to the default span_size will be mapped in full.
3011 	 */
3012 	status = hermon_icm_dma_init(state);
3013 	if (status != DDI_SUCCESS) {
3014 		/* free the ICM table memory resources */
3015 		hermon_icm_tables_fini(state);
3016 		HERMON_WARNING(state, "Failed to allocate initial ICM");
3017 		HERMON_ATTACH_MSG(state->hs_attach_buf,
3018 		    "icm_config_dma_init_fail");
3019 		return (DDI_FAILURE);
3020 	}
3021 
3022 	return (DDI_SUCCESS);
3023 }
3024 
3025 /*
3026  * hermon_inithca_set()
3027  *    Context: Only called from attach() path context
3028  */
3029 static void
3030 hermon_inithca_set(hermon_state_t *state, hermon_hw_initqueryhca_t *inithca)
3031 {
3032 	hermon_cfg_profile_t	*cfg;
3033 	hermon_icm_table_t	*icm;
3034 	int			i;
3035 
3036 
3037 	/* Populate the INIT_HCA structure */
3038 	icm = state->hs_icm;
3039 	cfg = state->hs_cfg_profile;
3040 
3041 	/* set version */
3042 	inithca->version = 0x02;	/* PRM 0.36 */
3043 	/* set cacheline - log2 in 16-byte chunks */
3044 	inithca->log2_cacheline = 0x2;	/* optimized for 64 byte cache */
3045 
3046 	/* we need to update the inithca info with thie UAR info too */
3047 	inithca->uar.log_max_uars = highbit(cfg->cp_log_num_uar);
3048 	inithca->uar.uar_pg_sz = PAGESHIFT - HERMON_PAGESHIFT;
3049 
3050 	/* Set endianess */
3051 #ifdef	_LITTLE_ENDIAN
3052 	inithca->big_endian	= 0;
3053 #else
3054 	inithca->big_endian	= 1;
3055 #endif
3056 
3057 	/* Port Checking is on by default */
3058 	inithca->udav_port_chk	= HERMON_UDAV_PORTCHK_ENABLED;
3059 
3060 	/* Enable IPoIB checksum */
3061 	if (state->hs_devlim.ipoib_cksm)
3062 		inithca->chsum_en = 1;
3063 
3064 	/* Set each ICM table's attributes */
3065 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3066 		switch (icm[i].icm_type) {
3067 		case HERMON_CMPT:
3068 			inithca->tpt.cmpt_baseaddr = icm[i].icm_baseaddr;
3069 			break;
3070 
3071 		case HERMON_MTT:
3072 			inithca->tpt.mtt_baseaddr = icm[i].icm_baseaddr;
3073 			break;
3074 
3075 		case HERMON_DMPT:
3076 			inithca->tpt.dmpt_baseaddr = icm[i].icm_baseaddr;
3077 			inithca->tpt.log_dmpt_sz   = icm[i].log_num_entries;
3078 			inithca->tpt.pgfault_rnr_to = 0; /* just in case */
3079 			break;
3080 
3081 		case HERMON_QPC:
3082 			inithca->context.log_num_qp = icm[i].log_num_entries;
3083 			inithca->context.qpc_baseaddr_h =
3084 			    icm[i].icm_baseaddr >> 32;
3085 			inithca->context.qpc_baseaddr_l =
3086 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
3087 			break;
3088 
3089 		case HERMON_CQC:
3090 			inithca->context.log_num_cq = icm[i].log_num_entries;
3091 			inithca->context.cqc_baseaddr_h =
3092 			    icm[i].icm_baseaddr >> 32;
3093 			inithca->context.cqc_baseaddr_l =
3094 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
3095 			break;
3096 
3097 		case HERMON_SRQC:
3098 			inithca->context.log_num_srq = icm[i].log_num_entries;
3099 			inithca->context.srqc_baseaddr_h =
3100 			    icm[i].icm_baseaddr >> 32;
3101 			inithca->context.srqc_baseaddr_l =
3102 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
3103 			break;
3104 
3105 		case HERMON_EQC:
3106 			inithca->context.log_num_eq = icm[i].log_num_entries;
3107 			inithca->context.eqc_baseaddr_h =
3108 			    icm[i].icm_baseaddr >> 32;
3109 			inithca->context.eqc_baseaddr_l =
3110 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
3111 			break;
3112 
3113 		case HERMON_RDB:
3114 			inithca->context.rdmardc_baseaddr_h =
3115 			    icm[i].icm_baseaddr >> 32;
3116 			inithca->context.rdmardc_baseaddr_l =
3117 			    (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
3118 			inithca->context.log_num_rdmardc =
3119 			    cfg->cp_log_num_rdb - cfg->cp_log_num_qp;
3120 			break;
3121 
3122 		case HERMON_MCG:
3123 			inithca->multi.mc_baseaddr    = icm[i].icm_baseaddr;
3124 			inithca->multi.log_mc_tbl_sz  = icm[i].log_num_entries;
3125 			inithca->multi.log_mc_tbl_ent =
3126 			    highbit(HERMON_MCGMEM_SZ(state)) - 1;
3127 			inithca->multi.log_mc_tbl_hash_sz =
3128 			    cfg->cp_log_num_mcg_hash;
3129 			inithca->multi.mc_hash_fn = HERMON_MCG_DEFAULT_HASH_FN;
3130 			break;
3131 
3132 		case HERMON_ALTC:
3133 			inithca->context.altc_baseaddr = icm[i].icm_baseaddr;
3134 			break;
3135 
3136 		case HERMON_AUXC:
3137 			inithca->context.auxc_baseaddr = icm[i].icm_baseaddr;
3138 			break;
3139 
3140 		default:
3141 			break;
3142 
3143 		}
3144 	}
3145 
3146 }
3147 
3148 /*
3149  * hermon_icm_tables_init()
3150  *    Context: Only called from attach() path context
3151  *
3152  * Dynamic ICM breaks the various ICM tables into "span_size" chunks
3153  * to enable allocation of backing memory on demand.  Arbel used a
3154  * fixed size ARBEL_ICM_SPAN_SIZE (initially was 512KB) as the
3155  * span_size for all ICM chunks.  Hermon has other considerations,
3156  * so the span_size used differs from Arbel.
3157  *
3158  * The basic considerations for why Hermon differs are:
3159  *
3160  *	1) ICM memory is in units of HERMON pages.
3161  *
3162  *	2) The AUXC table is approximately 1 byte per QP.
3163  *
3164  *	3) ICM memory for AUXC, ALTC, and RDB is allocated when
3165  *	the ICM memory for the corresponding QPC is allocated.
3166  *
3167  *	4) ICM memory for the CMPT corresponding to the various primary
3168  *	resources (QPC, SRQC, CQC, and EQC) is allocated when the ICM
3169  *	memory for the primary resource is allocated.
3170  *
3171  * One HERMON page (4KB) would typically map 4K QPs worth of AUXC.
3172  * So, the minimum chunk for the various QPC related ICM memory should
3173  * all be allocated to support the 4K QPs.  Currently, this means the
3174  * amount of memory for the various QP chunks is:
3175  *
3176  *	QPC	256*4K bytes
3177  *	RDB	128*4K bytes
3178  *	CMPT	 64*4K bytes
3179  *	ALTC	 64*4K bytes
3180  *	AUXC	  1*4K bytes
3181  *
3182  * The span_size chosen for the QP resource is 4KB of AUXC entries,
3183  * or 1 HERMON_PAGESIZE worth, which is the minimum ICM mapping size.
3184  *
3185  * Other ICM resources can have their span_size be more arbitrary.
3186  * This is 4K (HERMON_ICM_SPAN), except for MTTs because they are tiny.
3187  */
3188 
3189 /* macro to make the code below cleaner */
3190 #define	init_dependent(rsrc, dep)				\
3191 	icm[dep].span		= icm[rsrc].span;		\
3192 	icm[dep].num_spans	= icm[rsrc].num_spans;		\
3193 	icm[dep].split_shift	= icm[rsrc].split_shift;	\
3194 	icm[dep].span_mask	= icm[rsrc].span_mask;		\
3195 	icm[dep].span_shift	= icm[rsrc].span_shift;		\
3196 	icm[dep].rsrc_mask	= icm[rsrc].rsrc_mask;		\
3197 	if (hermon_verbose) {					\
3198 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3199 		    "rsrc (0x%x) size (0x%lx) span (0x%x) "	\
3200 		    "num_spans (0x%x)", dep, icm[dep].table_size, \
3201 		    icm[dep].span, icm[dep].num_spans);		\
3202 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3203 		    "span_shift (0x%x) split_shift (0x%x)",	\
3204 		    icm[dep].span_shift, icm[dep].split_shift);	\
3205 		IBTF_DPRINTF_L2("hermon", "tables_init: "	\
3206 		    "span_mask (0x%x)  rsrc_mask   (0x%x)",	\
3207 		    icm[dep].span_mask, icm[dep].rsrc_mask);	\
3208 	}
3209 
3210 static void
3211 hermon_icm_tables_init(hermon_state_t *state)
3212 {
3213 	hermon_icm_table_t	*icm;
3214 	int			i, k;
3215 	uint32_t		per_split;
3216 
3217 
3218 	icm = state->hs_icm;
3219 
3220 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3221 		icm[i].icm_type		= i;
3222 		icm[i].num_entries	= 1 << icm[i].log_num_entries;
3223 		icm[i].log_object_size	= highbit(icm[i].object_size) - 1;
3224 		icm[i].table_size	= icm[i].num_entries <<
3225 		    icm[i].log_object_size;
3226 
3227 		/* deal with "dependent" resource types */
3228 		switch (i) {
3229 		case HERMON_AUXC:
3230 #ifdef HERMON_FW_WORKAROUND
3231 			icm[i].table_size = 0x80000000ull;
3232 #endif
3233 			/* FALLTHROUGH */
3234 		case HERMON_CMPT_QPC:
3235 		case HERMON_RDB:
3236 		case HERMON_ALTC:
3237 			init_dependent(HERMON_QPC, i);
3238 			continue;
3239 		case HERMON_CMPT_SRQC:
3240 			init_dependent(HERMON_SRQC, i);
3241 			continue;
3242 		case HERMON_CMPT_CQC:
3243 			init_dependent(HERMON_CQC, i);
3244 			continue;
3245 		case HERMON_CMPT_EQC:
3246 			init_dependent(HERMON_EQC, i);
3247 			continue;
3248 		}
3249 
3250 		icm[i].span = HERMON_ICM_SPAN;	/* default #rsrc's in 1 span */
3251 		if (i == HERMON_MTT) /* Alloc enough MTTs to map 256MB */
3252 			icm[i].span = HERMON_ICM_SPAN * 16;
3253 		icm[i].num_spans = icm[i].num_entries / icm[i].span;
3254 		if (icm[i].num_spans == 0) {
3255 			icm[i].span = icm[i].num_entries;
3256 			per_split = 1;
3257 			icm[i].num_spans = icm[i].num_entries / icm[i].span;
3258 		} else {
3259 			per_split = icm[i].num_spans / HERMON_ICM_SPLIT;
3260 			if (per_split == 0) {
3261 				per_split = 1;
3262 			}
3263 		}
3264 		if (hermon_verbose)
3265 			IBTF_DPRINTF_L2("ICM", "rsrc %x  span %x  num_spans %x",
3266 			    i, icm[i].span, icm[i].num_spans);
3267 
3268 		/*
3269 		 * Ensure a minimum table size of an ICM page, and a
3270 		 * maximum span size of the ICM table size.  This ensures
3271 		 * that we don't have less than an ICM page to map, which is
3272 		 * impossible, and that we will map an entire table at
3273 		 * once if it's total size is less than the span size.
3274 		 */
3275 		icm[i].table_size = max(icm[i].table_size, HERMON_PAGESIZE);
3276 
3277 		icm[i].span_shift = 0;
3278 		for (k = icm[i].span; k != 1; k >>= 1)
3279 			icm[i].span_shift++;
3280 		icm[i].split_shift = icm[i].span_shift;
3281 		for (k = per_split; k != 1; k >>= 1)
3282 			icm[i].split_shift++;
3283 		icm[i].span_mask = (1 << icm[i].split_shift) -
3284 		    (1 << icm[i].span_shift);
3285 		icm[i].rsrc_mask = (1 << icm[i].span_shift) - 1;
3286 
3287 
3288 		/* Initialize the table lock */
3289 		mutex_init(&icm[i].icm_table_lock, NULL, MUTEX_DRIVER,
3290 		    DDI_INTR_PRI(state->hs_intrmsi_pri));
3291 		cv_init(&icm[i].icm_table_cv, NULL, CV_DRIVER, NULL);
3292 
3293 		if (hermon_verbose) {
3294 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3295 			    "rsrc (0x%x) size (0x%lx)", i, icm[i].table_size);
3296 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3297 			    "span (0x%x) num_spans (0x%x)",
3298 			    icm[i].span, icm[i].num_spans);
3299 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3300 			    "span_shift (0x%x) split_shift (0x%x)",
3301 			    icm[i].span_shift, icm[i].split_shift);
3302 			IBTF_DPRINTF_L2("hermon", "tables_init: "
3303 			    "span_mask (0x%x)  rsrc_mask   (0x%x)",
3304 			    icm[i].span_mask, icm[i].rsrc_mask);
3305 		}
3306 	}
3307 
3308 }
3309 
3310 /*
3311  * hermon_icm_tables_fini()
3312  *    Context: Only called from attach() path context
3313  *
3314  * Clean up all icm_tables.  Free the bitmap and dma_info arrays.
3315  */
3316 static void
3317 hermon_icm_tables_fini(hermon_state_t *state)
3318 {
3319 	hermon_icm_table_t	*icm;
3320 	int			nspans;
3321 	int			i, j;
3322 
3323 
3324 	icm = state->hs_icm;
3325 
3326 	for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
3327 
3328 		mutex_enter(&icm[i].icm_table_lock);
3329 		nspans = icm[i].num_spans;
3330 
3331 		for (j = 0; j < HERMON_ICM_SPLIT; j++) {
3332 			if (icm[i].icm_dma[j])
3333 				/* Free the ICM DMA slots */
3334 				kmem_free(icm[i].icm_dma[j],
3335 				    nspans * sizeof (hermon_dma_info_t));
3336 
3337 			if (icm[i].icm_bitmap[j])
3338 				/* Free the table bitmap */
3339 				kmem_free(icm[i].icm_bitmap[j],
3340 				    (nspans + 7) / 8);
3341 		}
3342 		/* Destroy the table lock */
3343 		cv_destroy(&icm[i].icm_table_cv);
3344 		mutex_exit(&icm[i].icm_table_lock);
3345 		mutex_destroy(&icm[i].icm_table_lock);
3346 	}
3347 
3348 }
3349 
3350 /*
3351  * hermon_icm_dma_init()
3352  *    Context: Only called from attach() path context
3353  */
3354 static int
3355 hermon_icm_dma_init(hermon_state_t *state)
3356 {
3357 	hermon_icm_table_t	*icm;
3358 	hermon_rsrc_type_t	type;
3359 	int			status;
3360 
3361 
3362 	/*
3363 	 * This routine will allocate initial ICM DMA resources for ICM
3364 	 * tables that have reserved ICM objects. This is the only routine
3365 	 * where we should have to allocate ICM outside of hermon_rsrc_alloc().
3366 	 * We need to allocate ICM here explicitly, rather than in
3367 	 * hermon_rsrc_alloc(), because we've not yet completed the resource
3368 	 * pool initialization. When the resource pools are initialized
3369 	 * (in hermon_rsrc_init_phase2(), see hermon_rsrc.c for more
3370 	 * information), resource preallocations will be invoked to match
3371 	 * the ICM allocations seen here. We will then be able to use the
3372 	 * normal allocation path.  Note we don't need to set a refcnt on
3373 	 * these initial allocations because that will be done in the calls
3374 	 * to hermon_rsrc_alloc() from hermon_hw_entries_init() for the
3375 	 * "prealloc" objects (see hermon_rsrc.c for more information).
3376 	 */
3377 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3378 
3379 		/* ICM for these is allocated within hermon_icm_alloc() */
3380 		switch (type) {
3381 		case HERMON_CMPT:
3382 		case HERMON_CMPT_QPC:
3383 		case HERMON_CMPT_SRQC:
3384 		case HERMON_CMPT_CQC:
3385 		case HERMON_CMPT_EQC:
3386 		case HERMON_AUXC:
3387 		case HERMON_ALTC:
3388 		case HERMON_RDB:
3389 			continue;
3390 		}
3391 
3392 		icm = &state->hs_icm[type];
3393 
3394 		mutex_enter(&icm->icm_table_lock);
3395 		status = hermon_icm_alloc(state, type, 0, 0);
3396 		mutex_exit(&icm->icm_table_lock);
3397 		if (status != DDI_SUCCESS) {
3398 			while (type--) {
3399 				icm = &state->hs_icm[type];
3400 				mutex_enter(&icm->icm_table_lock);
3401 				hermon_icm_free(state, type, 0, 0);
3402 				mutex_exit(&icm->icm_table_lock);
3403 			}
3404 			return (DDI_FAILURE);
3405 		}
3406 
3407 		if (hermon_verbose) {
3408 			IBTF_DPRINTF_L2("hermon", "hermon_icm_dma_init: "
3409 			    "table (0x%x) index (0x%x) allocated", type, 0);
3410 		}
3411 	}
3412 
3413 	return (DDI_SUCCESS);
3414 }
3415 
3416 /*
3417  * hermon_icm_dma_fini()
3418  *    Context: Only called from attach() path context
3419  *
3420  * ICM has been completely unmapped.  We just free the memory here.
3421  */
3422 static void
3423 hermon_icm_dma_fini(hermon_state_t *state)
3424 {
3425 	hermon_icm_table_t	*icm;
3426 	hermon_dma_info_t	*dma_info;
3427 	hermon_rsrc_type_t	type;
3428 	int			index1, index2;
3429 
3430 
3431 	for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
3432 		icm = &state->hs_icm[type];
3433 		for (index1 = 0; index1 < HERMON_ICM_SPLIT; index1++) {
3434 			dma_info = icm->icm_dma[index1];
3435 			if (dma_info == NULL)
3436 				continue;
3437 			for (index2 = 0; index2 < icm->num_spans; index2++) {
3438 				if (dma_info[index2].dma_hdl)
3439 					hermon_dma_free(&dma_info[index2]);
3440 				dma_info[index2].dma_hdl = NULL;
3441 			}
3442 		}
3443 	}
3444 
3445 }
3446 
3447 /*
3448  * hermon_hca_port_init()
3449  *    Context: Only called from attach() path context
3450  */
3451 static int
3452 hermon_hca_port_init(hermon_state_t *state)
3453 {
3454 	hermon_hw_set_port_t	*portinits, *initport;
3455 	hermon_cfg_profile_t	*cfgprof;
3456 	uint_t			num_ports;
3457 	int			i = 0, status;
3458 	uint64_t		maxval, val;
3459 	uint64_t		sysimgguid, nodeguid, portguid;
3460 
3461 
3462 	cfgprof = state->hs_cfg_profile;
3463 
3464 	/* Get number of HCA ports */
3465 	num_ports = cfgprof->cp_num_ports;
3466 
3467 	/* Allocate space for Hermon set port  struct(s) */
3468 	portinits = (hermon_hw_set_port_t *)kmem_zalloc(num_ports *
3469 	    sizeof (hermon_hw_set_port_t), KM_SLEEP);
3470 
3471 
3472 
3473 	/* Post commands to initialize each Hermon HCA port */
3474 	/*
3475 	 * In Hermon, the process is different than in previous HCAs.
3476 	 * Here, you have to:
3477 	 *	QUERY_PORT - to get basic information from the HCA
3478 	 *	set the fields accordingly
3479 	 *	SET_PORT - to change/set everything as desired
3480 	 *	INIT_PORT - to bring the port up
3481 	 *
3482 	 * Needs to be done for each port in turn
3483 	 */
3484 
3485 	for (i = 0; i < num_ports; i++) {
3486 		bzero(&state->hs_queryport, sizeof (hermon_hw_query_port_t));
3487 		status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0,
3488 		    (i + 1), &state->hs_queryport,
3489 		    sizeof (hermon_hw_query_port_t), HERMON_CMD_NOSLEEP_SPIN);
3490 		if (status != HERMON_CMD_SUCCESS) {
3491 			cmn_err(CE_CONT, "Hermon: QUERY_PORT (port %02d) "
3492 			    "command failed: %08x\n", i + 1, status);
3493 			goto init_ports_fail;
3494 		}
3495 		initport = &portinits[i];
3496 		state->hs_initport = &portinits[i];
3497 
3498 		bzero(initport, sizeof (hermon_hw_query_port_t));
3499 
3500 		/*
3501 		 * Determine whether we need to override the firmware's
3502 		 * default SystemImageGUID setting.
3503 		 */
3504 		sysimgguid = cfgprof->cp_sysimgguid;
3505 		if (sysimgguid != 0) {
3506 			initport->sig		= 1;
3507 			initport->sys_img_guid	= sysimgguid;
3508 		}
3509 
3510 		/*
3511 		 * Determine whether we need to override the firmware's
3512 		 * default NodeGUID setting.
3513 		 */
3514 		nodeguid = cfgprof->cp_nodeguid;
3515 		if (nodeguid != 0) {
3516 			initport->ng		= 1;
3517 			initport->node_guid	= nodeguid;
3518 		}
3519 
3520 		/*
3521 		 * Determine whether we need to override the firmware's
3522 		 * default PortGUID setting.
3523 		 */
3524 		portguid = cfgprof->cp_portguid[i];
3525 		if (portguid != 0) {
3526 			initport->g0		= 1;
3527 			initport->guid0		= portguid;
3528 		}
3529 
3530 		/* Validate max MTU size */
3531 		maxval  = state->hs_queryport.ib_mtu;
3532 		val	= cfgprof->cp_max_mtu;
3533 		if (val > maxval) {
3534 			goto init_ports_fail;
3535 		}
3536 
3537 		/* Set mtu_cap to 4096 bytes */
3538 		initport->mmc = 1;	/* set the change bit */
3539 		initport->mtu_cap = 5;	/* for 4096 bytes */
3540 
3541 		/* Validate the max port width */
3542 		maxval  = state->hs_queryport.ib_port_wid;
3543 		val	= cfgprof->cp_max_port_width;
3544 		if (val > maxval) {
3545 			goto init_ports_fail;
3546 		}
3547 
3548 		/* Validate max VL cap size */
3549 		maxval  = state->hs_queryport.max_vl;
3550 		val	= cfgprof->cp_max_vlcap;
3551 		if (val > maxval) {
3552 			goto init_ports_fail;
3553 		}
3554 
3555 		/* Since we're doing mtu_cap, cut vl_cap down */
3556 		initport->mvc = 1;	/* set this change bit */
3557 		initport->vl_cap = 3;	/* 3 means vl0-vl3, 4 total */
3558 
3559 		/* Validate max GID table size */
3560 		maxval  = ((uint64_t)1 << state->hs_queryport.log_max_gid);
3561 		val	= ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
3562 		if (val > maxval) {
3563 			goto init_ports_fail;
3564 		}
3565 		initport->max_gid = (uint16_t)val;
3566 		initport->mg = 1;
3567 
3568 		/* Validate max PKey table size */
3569 		maxval	= ((uint64_t)1 << state->hs_queryport.log_max_pkey);
3570 		val	= ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
3571 		if (val > maxval) {
3572 			goto init_ports_fail;
3573 		}
3574 		initport->max_pkey = (uint16_t)val;
3575 		initport->mp = 1;
3576 		/*
3577 		 * Post the SET_PORT cmd to Hermon firmware. This sets
3578 		 * the parameters of the port.
3579 		 */
3580 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3581 		    HERMON_CMD_NOSLEEP_SPIN);
3582 		if (status != HERMON_CMD_SUCCESS) {
3583 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3584 			    "failed: %08x\n", i + 1, status);
3585 			goto init_ports_fail;
3586 		}
3587 		/* issue another SET_PORT cmd - performance fix/workaround */
3588 		/* XXX - need to discuss with Mellanox */
3589 		bzero(initport, sizeof (hermon_hw_query_port_t));
3590 		initport->cap_mask = 0x02500868;
3591 		status = hermon_set_port_cmd_post(state, initport, i + 1,
3592 		    HERMON_CMD_NOSLEEP_SPIN);
3593 		if (status != HERMON_CMD_SUCCESS) {
3594 			cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
3595 			    "failed: %08x\n", i + 1, status);
3596 			goto init_ports_fail;
3597 		}
3598 	}
3599 
3600 	/*
3601 	 * Finally, do the INIT_PORT for each port in turn
3602 	 * When this command completes, the corresponding Hermon port
3603 	 * will be physically "Up" and initialized.
3604 	 */
3605 	for (i = 0; i < num_ports; i++) {
3606 		status = hermon_init_port_cmd_post(state, i + 1,
3607 		    HERMON_CMD_NOSLEEP_SPIN);
3608 		if (status != HERMON_CMD_SUCCESS) {
3609 			cmn_err(CE_CONT, "Hermon: INIT_PORT (port %02d) "
3610 			    "comman failed: %08x\n", i + 1, status);
3611 			goto init_ports_fail;
3612 		}
3613 	}
3614 
3615 	/* Free up the memory for Hermon port init struct(s), return success */
3616 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3617 	return (DDI_SUCCESS);
3618 
3619 init_ports_fail:
3620 	/*
3621 	 * Free up the memory for Hermon port init struct(s), shutdown any
3622 	 * successfully initialized ports, and return failure
3623 	 */
3624 	kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
3625 	(void) hermon_hca_ports_shutdown(state, i);
3626 
3627 	return (DDI_FAILURE);
3628 }
3629 
3630 
3631 /*
3632  * hermon_hca_ports_shutdown()
3633  *    Context: Only called from attach() and/or detach() path contexts
3634  */
3635 static int
3636 hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init)
3637 {
3638 	int	i, status;
3639 
3640 	/*
3641 	 * Post commands to shutdown all init'd Hermon HCA ports.  Note: if
3642 	 * any of these commands fail for any reason, it would be entirely
3643 	 * unexpected and probably indicative a serious problem (HW or SW).
3644 	 * Although we do return void from this function, this type of failure
3645 	 * should not go unreported.  That is why we have the warning message.
3646 	 */
3647 	for (i = 0; i < num_init; i++) {
3648 		status = hermon_close_port_cmd_post(state, i + 1,
3649 		    HERMON_CMD_NOSLEEP_SPIN);
3650 		if (status != HERMON_CMD_SUCCESS) {
3651 			HERMON_WARNING(state, "failed to shutdown HCA port");
3652 			return (status);
3653 		}
3654 	}
3655 	return (HERMON_CMD_SUCCESS);
3656 }
3657 
3658 
3659 /*
3660  * hermon_internal_uarpg_init
3661  *    Context: Only called from attach() path context
3662  */
3663 static int
3664 hermon_internal_uarpg_init(hermon_state_t *state)
3665 {
3666 	int	status;
3667 	hermon_dbr_info_t 	*info;
3668 
3669 	/*
3670 	 * Allocate the UAR page for kernel use. This UAR page is
3671 	 * the privileged UAR page through which all kernel generated
3672 	 * doorbells will be rung. There are a number of UAR pages
3673 	 * reserved by hardware at the front of the UAR BAR, indicated
3674 	 * by DEVCAP.num_rsvd_uar, which we have already allocated. So,
3675 	 * the kernel page, or UAR page index num_rsvd_uar, will be
3676 	 * allocated here for kernel use.
3677 	 */
3678 
3679 	status = hermon_rsrc_alloc(state, HERMON_UARPG, 1, HERMON_SLEEP,
3680 	    &state->hs_uarkpg_rsrc);
3681 	if (status != DDI_SUCCESS) {
3682 		return (DDI_FAILURE);
3683 	}
3684 
3685 	/* Setup pointer to kernel UAR page */
3686 	state->hs_uar = (hermon_hw_uar_t *)state->hs_uarkpg_rsrc->hr_addr;
3687 
3688 	/* need to set up DBr tracking as well */
3689 	status = hermon_dbr_page_alloc(state, &info);
3690 	if (status != DDI_SUCCESS) {
3691 		return (DDI_FAILURE);
3692 	}
3693 	state->hs_kern_dbr = info;
3694 	return (DDI_SUCCESS);
3695 }
3696 
3697 
3698 /*
3699  * hermon_internal_uarpg_fini
3700  *    Context: Only called from attach() and/or detach() path contexts
3701  */
3702 static void
3703 hermon_internal_uarpg_fini(hermon_state_t *state)
3704 {
3705 	/* Free up Hermon UAR page #1 (kernel driver doorbells) */
3706 	hermon_rsrc_free(state, &state->hs_uarkpg_rsrc);
3707 }
3708 
3709 
3710 /*
3711  * hermon_special_qp_contexts_reserve()
3712  *    Context: Only called from attach() path context
3713  */
3714 static int
3715 hermon_special_qp_contexts_reserve(hermon_state_t *state)
3716 {
3717 	hermon_rsrc_t	*qp0_rsrc, *qp1_rsrc, *qp_resvd;
3718 	int		status;
3719 
3720 	/* Initialize the lock used for special QP rsrc management */
3721 	mutex_init(&state->hs_spec_qplock, NULL, MUTEX_DRIVER,
3722 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3723 
3724 	/*
3725 	 * Reserve contexts for QP0.  These QP contexts will be setup to
3726 	 * act as aliases for the real QP0.  Note: We are required to grab
3727 	 * two QPs (one per port) even if we are operating in single-port
3728 	 * mode.
3729 	 */
3730 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3731 	    HERMON_SLEEP, &qp0_rsrc);
3732 	if (status != DDI_SUCCESS) {
3733 		mutex_destroy(&state->hs_spec_qplock);
3734 		return (DDI_FAILURE);
3735 	}
3736 	state->hs_spec_qp0 = qp0_rsrc;
3737 
3738 	/*
3739 	 * Reserve contexts for QP1.  These QP contexts will be setup to
3740 	 * act as aliases for the real QP1.  Note: We are required to grab
3741 	 * two QPs (one per port) even if we are operating in single-port
3742 	 * mode.
3743 	 */
3744 	status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
3745 	    HERMON_SLEEP, &qp1_rsrc);
3746 	if (status != DDI_SUCCESS) {
3747 		hermon_rsrc_free(state, &qp0_rsrc);
3748 		mutex_destroy(&state->hs_spec_qplock);
3749 		return (DDI_FAILURE);
3750 	}
3751 	state->hs_spec_qp1 = qp1_rsrc;
3752 
3753 	status = hermon_rsrc_alloc(state, HERMON_QPC, 4,
3754 	    HERMON_SLEEP, &qp_resvd);
3755 	if (status != DDI_SUCCESS) {
3756 		hermon_rsrc_free(state, &qp1_rsrc);
3757 		hermon_rsrc_free(state, &qp0_rsrc);
3758 		mutex_destroy(&state->hs_spec_qplock);
3759 		return (DDI_FAILURE);
3760 	}
3761 	state->hs_spec_qp_unused = qp_resvd;
3762 
3763 	return (DDI_SUCCESS);
3764 }
3765 
3766 
3767 /*
3768  * hermon_special_qp_contexts_unreserve()
3769  *    Context: Only called from attach() and/or detach() path contexts
3770  */
3771 static void
3772 hermon_special_qp_contexts_unreserve(hermon_state_t *state)
3773 {
3774 
3775 	/* Unreserve contexts for spec_qp_unused */
3776 	hermon_rsrc_free(state, &state->hs_spec_qp_unused);
3777 
3778 	/* Unreserve contexts for QP1 */
3779 	hermon_rsrc_free(state, &state->hs_spec_qp1);
3780 
3781 	/* Unreserve contexts for QP0 */
3782 	hermon_rsrc_free(state, &state->hs_spec_qp0);
3783 
3784 	/* Destroy the lock used for special QP rsrc management */
3785 	mutex_destroy(&state->hs_spec_qplock);
3786 
3787 }
3788 
3789 
3790 /*
3791  * hermon_sw_reset()
3792  *    Context: Currently called only from attach() path context
3793  */
3794 static int
3795 hermon_sw_reset(hermon_state_t *state)
3796 {
3797 	ddi_acc_handle_t	hdl = hermon_get_pcihdl(state);
3798 	ddi_acc_handle_t	cmdhdl = hermon_get_cmdhdl(state);
3799 	uint32_t		reset_delay;
3800 	int			status, i;
3801 	uint32_t		sem;
3802 	uint_t			offset;
3803 	uint32_t		data32;		/* for devctl & linkctl */
3804 	int			loopcnt;
3805 
3806 	/* initialize the FMA retry loop */
3807 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
3808 	hermon_pio_init(fm_loop_cnt2, fm_status2, fm_test2);
3809 
3810 	/*
3811 	 * If the configured software reset delay is set to zero, then we
3812 	 * will not attempt a software reset of the Hermon device.
3813 	 */
3814 	reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
3815 	if (reset_delay == 0) {
3816 		return (DDI_SUCCESS);
3817 	}
3818 
3819 	/* the FMA retry loop starts. */
3820 	hermon_pio_start(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3821 	    fm_test);
3822 	hermon_pio_start(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3823 	    fm_test2);
3824 
3825 	/* Query the PCI capabilities of the HCA device */
3826 	/* but don't process the VPD until after reset */
3827 	status = hermon_pci_capability_list(state, hdl);
3828 	if (status != DDI_SUCCESS) {
3829 		cmn_err(CE_NOTE, "failed to get pci capabilities list(0x%x)\n",
3830 		    status);
3831 		return (DDI_FAILURE);
3832 	}
3833 
3834 	/*
3835 	 * Read all PCI config info (reg0...reg63).  Note: According to the
3836 	 * Hermon software reset application note, we should not read or
3837 	 * restore the values in reg22 and reg23.
3838 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
3839 	 * register LAST, and technically, you need to restore the
3840 	 * PCIE Capability "device control" and "link control" (word-sized,
3841 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
3842 	 * We hold off restoring the command register - offset 0x4 - till last
3843 	 */
3844 
3845 	/* 1st, wait for the semaphore assure accessibility - per PRM */
3846 	status = -1;
3847 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
3848 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
3849 		if (sem == 0) {
3850 			status = 0;
3851 			break;
3852 		}
3853 		drv_usecwait(1);
3854 	}
3855 
3856 	/* Check if timeout happens */
3857 	if (status == -1) {
3858 		/*
3859 		 * Remove this acc handle from Hermon, then log
3860 		 * the error.
3861 		 */
3862 		hermon_pci_config_teardown(state, &hdl);
3863 
3864 		cmn_err(CE_WARN, "hermon_sw_reset timeout: "
3865 		    "failed to get the semaphore(0x%p)\n",
3866 		    (void *)state->hs_cmd_regs.sw_semaphore);
3867 
3868 		hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_NON_FATAL);
3869 		return (DDI_FAILURE);
3870 	}
3871 
3872 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3873 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3874 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3875 			state->hs_cfg_data[i]  = pci_config_get32(hdl, i << 2);
3876 		}
3877 	}
3878 
3879 	/*
3880 	 * Perform the software reset (by writing 1 at offset 0xF0010)
3881 	 */
3882 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
3883 
3884 	/*
3885 	 * This delay is required so as not to cause a panic here. If the
3886 	 * device is accessed too soon after reset it will not respond to
3887 	 * config cycles, causing a Master Abort and panic.
3888 	 */
3889 	drv_usecwait(reset_delay);
3890 
3891 	/*
3892 	 * Poll waiting for the device to finish resetting.
3893 	 */
3894 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
3895 	while ((pci_config_get32(hdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
3896 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
3897 		if (--loopcnt == 0)
3898 			break;	/* just in case, break and go on */
3899 	}
3900 	if (loopcnt == 0)
3901 		cmn_err(CE_CONT, "!Never see VEND_ID - read == %X",
3902 		    pci_config_get32(hdl, 0));
3903 
3904 	/*
3905 	 * Restore the config info
3906 	 */
3907 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
3908 		if (i == 1) continue;	/* skip the status/ctrl reg */
3909 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
3910 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
3911 			pci_config_put32(hdl, i << 2, state->hs_cfg_data[i]);
3912 		}
3913 	}
3914 
3915 	/*
3916 	 * PCI Express Capability - we saved during capability list, and
3917 	 * we'll restore them here.
3918 	 */
3919 	offset = state->hs_pci_cap_offset;
3920 	data32 = state->hs_pci_cap_devctl;
3921 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
3922 	data32 = state->hs_pci_cap_lnkctl;
3923 	pci_config_put32(hdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
3924 
3925 	pci_config_put32(hdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
3926 
3927 	/* the FMA retry loop ends. */
3928 	hermon_pio_end(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
3929 	    fm_test2);
3930 	hermon_pio_end(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
3931 	    fm_test);
3932 
3933 	return (DDI_SUCCESS);
3934 
3935 pio_error2:
3936 	/* fall through */
3937 pio_error:
3938 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_NON_FATAL);
3939 	return (DDI_FAILURE);
3940 }
3941 
3942 
3943 /*
3944  * hermon_mcg_init()
3945  *    Context: Only called from attach() path context
3946  */
3947 static int
3948 hermon_mcg_init(hermon_state_t *state)
3949 {
3950 	uint_t		mcg_tmp_sz;
3951 
3952 
3953 	/*
3954 	 * Allocate space for the MCG temporary copy buffer.  This is
3955 	 * used by the Attach/Detach Multicast Group code
3956 	 */
3957 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3958 	state->hs_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);
3959 
3960 	/*
3961 	 * Initialize the multicast group mutex.  This ensures atomic
3962 	 * access to add, modify, and remove entries in the multicast
3963 	 * group hash lists.
3964 	 */
3965 	mutex_init(&state->hs_mcglock, NULL, MUTEX_DRIVER,
3966 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
3967 
3968 	return (DDI_SUCCESS);
3969 }
3970 
3971 
3972 /*
3973  * hermon_mcg_fini()
3974  *    Context: Only called from attach() and/or detach() path contexts
3975  */
3976 static void
3977 hermon_mcg_fini(hermon_state_t *state)
3978 {
3979 	uint_t		mcg_tmp_sz;
3980 
3981 
3982 	/* Free up the space used for the MCG temporary copy buffer */
3983 	mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
3984 	kmem_free(state->hs_mcgtmp, mcg_tmp_sz);
3985 
3986 	/* Destroy the multicast group mutex */
3987 	mutex_destroy(&state->hs_mcglock);
3988 
3989 }
3990 
3991 
3992 /*
3993  * hermon_fw_version_check()
3994  *    Context: Only called from attach() path context
3995  */
3996 static int
3997 hermon_fw_version_check(hermon_state_t *state)
3998 {
3999 
4000 	uint_t	hermon_fw_ver_major;
4001 	uint_t	hermon_fw_ver_minor;
4002 	uint_t	hermon_fw_ver_subminor;
4003 
4004 #ifdef FMA_TEST
4005 	if (hermon_test_num == -1) {
4006 		return (DDI_FAILURE);
4007 	}
4008 #endif
4009 
4010 	/*
4011 	 * Depending on which version of driver we have attached, and which
4012 	 * HCA we've attached, the firmware version checks will be different.
4013 	 * We set up the comparison values for both Arbel and Sinai HCAs.
4014 	 */
4015 	switch (state->hs_operational_mode) {
4016 	case HERMON_HCA_MODE:
4017 		hermon_fw_ver_major = HERMON_FW_VER_MAJOR;
4018 		hermon_fw_ver_minor = HERMON_FW_VER_MINOR;
4019 		hermon_fw_ver_subminor = HERMON_FW_VER_SUBMINOR;
4020 		break;
4021 
4022 	default:
4023 		return (DDI_FAILURE);
4024 	}
4025 
4026 	/*
4027 	 * If FW revision major number is less than acceptable,
4028 	 * return failure, else if greater return success.  If
4029 	 * the major numbers are equal than check the minor number
4030 	 */
4031 	if (state->hs_fw.fw_rev_major < hermon_fw_ver_major) {
4032 		return (DDI_FAILURE);
4033 	} else if (state->hs_fw.fw_rev_major > hermon_fw_ver_major) {
4034 		return (DDI_SUCCESS);
4035 	}
4036 
4037 	/*
4038 	 * Do the same check as above, except for minor revision numbers
4039 	 * If the minor numbers are equal than check the subminor number
4040 	 */
4041 	if (state->hs_fw.fw_rev_minor < hermon_fw_ver_minor) {
4042 		return (DDI_FAILURE);
4043 	} else if (state->hs_fw.fw_rev_minor > hermon_fw_ver_minor) {
4044 		return (DDI_SUCCESS);
4045 	}
4046 
4047 	/*
4048 	 * Once again we do the same check as above, except for the subminor
4049 	 * revision number.  If the subminor numbers are equal here, then
4050 	 * these are the same firmware version, return success
4051 	 */
4052 	if (state->hs_fw.fw_rev_subminor < hermon_fw_ver_subminor) {
4053 		return (DDI_FAILURE);
4054 	} else if (state->hs_fw.fw_rev_subminor > hermon_fw_ver_subminor) {
4055 		return (DDI_SUCCESS);
4056 	}
4057 
4058 	return (DDI_SUCCESS);
4059 }
4060 
4061 
4062 /*
4063  * hermon_device_info_report()
4064  *    Context: Only called from attach() path context
4065  */
4066 static void
4067 hermon_device_info_report(hermon_state_t *state)
4068 {
4069 
4070 	cmn_err(CE_CONT, "?hermon%d: FW ver: %04d.%04d.%04d, "
4071 	    "HW rev: %02d\n", state->hs_instance, state->hs_fw.fw_rev_major,
4072 	    state->hs_fw.fw_rev_minor, state->hs_fw.fw_rev_subminor,
4073 	    state->hs_revision_id);
4074 	cmn_err(CE_CONT, "?hermon%d: %64s (0x%016" PRIx64 ")\n",
4075 	    state->hs_instance, state->hs_nodedesc, state->hs_nodeguid);
4076 
4077 }
4078 
4079 
4080 /*
4081  * hermon_pci_capability_list()
4082  *    Context: Only called from attach() path context
4083  */
4084 static int
4085 hermon_pci_capability_list(hermon_state_t *state, ddi_acc_handle_t hdl)
4086 {
4087 	uint_t		offset, data;
4088 	uint32_t	data32;
4089 
4090 	state->hs_pci_cap_offset = 0;		/* make sure it's cleared */
4091 
4092 	/*
4093 	 * Check for the "PCI Capabilities" bit in the "Status Register".
4094 	 * Bit 4 in this register indicates the presence of a "PCI
4095 	 * Capabilities" list.
4096 	 *
4097 	 * PCI-Express requires this bit to be set to 1.
4098 	 */
4099 	data = pci_config_get16(hdl, 0x06);
4100 	if ((data & 0x10) == 0) {
4101 		return (DDI_FAILURE);
4102 	}
4103 
4104 	/*
4105 	 * Starting from offset 0x34 in PCI config space, find the
4106 	 * head of "PCI capabilities" list, and walk the list.  If
4107 	 * capabilities of a known type are encountered (e.g.
4108 	 * "PCI-X Capability"), then call the appropriate handler
4109 	 * function.
4110 	 */
4111 	offset = pci_config_get8(hdl, 0x34);
4112 	while (offset != 0x0) {
4113 		data = pci_config_get8(hdl, offset);
4114 		/*
4115 		 * Check for known capability types.  Hermon has the
4116 		 * following:
4117 		 *    o Power Mgmt	 (0x02)
4118 		 *    o VPD Capability   (0x03)
4119 		 *    o PCI-E Capability (0x10)
4120 		 *    o MSIX Capability  (0x11)
4121 		 */
4122 		switch (data) {
4123 		case 0x01:
4124 			/* power mgmt handling */
4125 			break;
4126 		case 0x03:
4127 
4128 /*
4129  * Reading the PCIe VPD is inconsistent - that is, sometimes causes
4130  * problems on (mostly) X64, though we've also seen problems w/ Sparc
4131  * and Tavor --- so, for now until it's root caused, don't try and
4132  * read it
4133  */
4134 #ifdef HERMON_VPD_WORKS
4135 			hermon_pci_capability_vpd(state, hdl, offset);
4136 #else
4137 			delay(100);
4138 			hermon_pci_capability_vpd(state, hdl, offset);
4139 #endif
4140 			break;
4141 		case 0x10:
4142 			/*
4143 			 * PCI Express Capability - save offset & contents
4144 			 * for later in reset
4145 			 */
4146 			state->hs_pci_cap_offset = offset;
4147 			data32 = pci_config_get32(hdl,
4148 			    offset + HERMON_PCI_CAP_DEV_OFFS);
4149 			state->hs_pci_cap_devctl = data32;
4150 			data32 = pci_config_get32(hdl,
4151 			    offset + HERMON_PCI_CAP_LNK_OFFS);
4152 			state->hs_pci_cap_lnkctl = data32;
4153 			break;
4154 		case 0x11:
4155 			/*
4156 			 * MSIX support - nothing to do, taken care of in the
4157 			 * MSI/MSIX interrupt frameworkd
4158 			 */
4159 			break;
4160 		default:
4161 			/* just go on to the next */
4162 			break;
4163 		}
4164 
4165 		/* Get offset of next entry in list */
4166 		offset = pci_config_get8(hdl, offset + 1);
4167 	}
4168 
4169 	return (DDI_SUCCESS);
4170 }
4171 
4172 /*
4173  * hermon_pci_read_vpd()
4174  *    Context: Only called from attach() path context
4175  *    utility routine for hermon_pci_capability_vpd()
4176  */
4177 static int
4178 hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
4179     uint32_t *data)
4180 {
4181 	int		retry = 40;  /* retry counter for EEPROM poll */
4182 	uint32_t	val;
4183 	int		vpd_addr = offset + 2;
4184 	int		vpd_data = offset + 4;
4185 
4186 	/*
4187 	 * In order to read a 32-bit value from VPD, we are to write down
4188 	 * the address (offset in the VPD itself) to the address register.
4189 	 * To signal the read, we also clear bit 31.  We then poll on bit 31
4190 	 * and when it is set, we can then read our 4 bytes from the data
4191 	 * register.
4192 	 */
4193 	(void) pci_config_put32(hdl, offset, addr << 16);
4194 	do {
4195 		drv_usecwait(1000);
4196 		val = pci_config_get16(hdl, vpd_addr);
4197 		if (val & 0x8000) {		/* flag bit set */
4198 			*data = pci_config_get32(hdl, vpd_data);
4199 			return (DDI_SUCCESS);
4200 		}
4201 	} while (--retry);
4202 	/* read of flag failed write one message but count the failures */
4203 	if (debug_vpd == 0)
4204 		cmn_err(CE_NOTE,
4205 		    "!Failed to see flag bit after VPD addr write\n");
4206 	debug_vpd++;
4207 
4208 
4209 vpd_read_fail:
4210 	return (DDI_FAILURE);
4211 }
4212 
4213 
4214 
4215 /*
4216  *   hermon_pci_capability_vpd()
4217  *    Context: Only called from attach() path context
4218  */
4219 static void
4220 hermon_pci_capability_vpd(hermon_state_t *state, ddi_acc_handle_t hdl,
4221     uint_t offset)
4222 {
4223 	uint8_t			name_length;
4224 	uint8_t			pn_length;
4225 	int			i, err = 0;
4226 	int			vpd_str_id = 0;
4227 	int			vpd_ro_desc;
4228 	int			vpd_ro_pn_desc;
4229 #ifdef _BIG_ENDIAN
4230 	uint32_t		data32;
4231 #endif /* _BIG_ENDIAN */
4232 	union {
4233 		uint32_t	vpd_int[HERMON_VPD_HDR_DWSIZE];
4234 		uchar_t		vpd_char[HERMON_VPD_HDR_BSIZE];
4235 	} vpd;
4236 
4237 
4238 	/*
4239 	 * Read in the Vital Product Data (VPD) to the extend needed
4240 	 * by the fwflash utility
4241 	 */
4242 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4243 		err = hermon_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
4244 		if (err != DDI_SUCCESS) {
4245 			cmn_err(CE_NOTE, "!VPD read failed\n");
4246 			goto out;
4247 		}
4248 	}
4249 
4250 #ifdef _BIG_ENDIAN
4251 	/* Need to swap bytes for big endian. */
4252 	for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
4253 		data32 = vpd.vpd_int[i];
4254 		vpd.vpd_char[(i << 2) + 3] =
4255 		    (uchar_t)((data32 & 0xFF000000) >> 24);
4256 		vpd.vpd_char[(i << 2) + 2] =
4257 		    (uchar_t)((data32 & 0x00FF0000) >> 16);
4258 		vpd.vpd_char[(i << 2) + 1] =
4259 		    (uchar_t)((data32 & 0x0000FF00) >> 8);
4260 		vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
4261 	}
4262 #endif	/* _BIG_ENDIAN */
4263 
4264 	/* Check for VPD String ID Tag */
4265 	if (vpd.vpd_char[vpd_str_id] == 0x82) {
4266 		/* get the product name */
4267 		name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
4268 		if (name_length > sizeof (state->hs_hca_name)) {
4269 			cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
4270 			    name_length);
4271 			goto out;
4272 		}
4273 		(void) memcpy(state->hs_hca_name, &vpd.vpd_char[vpd_str_id + 3],
4274 		    name_length);
4275 		state->hs_hca_name[name_length] = 0;
4276 
4277 		/* get the part number */
4278 		vpd_ro_desc = name_length + 3; /* read-only tag location */
4279 		vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */
4280 
4281 		/* Verify read-only tag and Part Number keyword. */
4282 		if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
4283 		    (vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
4284 		    vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
4285 			cmn_err(CE_NOTE, "!VPD Part Number not found\n");
4286 			goto out;
4287 		}
4288 
4289 		pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
4290 		if (pn_length > sizeof (state->hs_hca_pn)) {
4291 			cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
4292 			    name_length);
4293 			goto out;
4294 		}
4295 		(void) memcpy(state->hs_hca_pn,
4296 		    &vpd.vpd_char[vpd_ro_pn_desc + 3],
4297 		    pn_length);
4298 		state->hs_hca_pn[pn_length] = 0;
4299 		state->hs_hca_pn_len = pn_length;
4300 		cmn_err(CE_CONT, "!vpd %s\n", state->hs_hca_pn);
4301 	} else {
4302 		/* Wrong VPD String ID Tag */
4303 		cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
4304 		    vpd.vpd_char[0]);
4305 		goto out;
4306 	}
4307 	return;
4308 out:
4309 	state->hs_hca_pn_len = 0;
4310 }
4311 
4312 
4313 
4314 /*
4315  * hermon_intr_or_msi_init()
4316  *    Context: Only called from attach() path context
4317  */
4318 static int
4319 hermon_intr_or_msi_init(hermon_state_t *state)
4320 {
4321 	int	status;
4322 
4323 	/* Query for the list of supported interrupt event types */
4324 	status = ddi_intr_get_supported_types(state->hs_dip,
4325 	    &state->hs_intr_types_avail);
4326 	if (status != DDI_SUCCESS) {
4327 		return (DDI_FAILURE);
4328 	}
4329 
4330 	/*
4331 	 * If Hermon supports MSI-X in this system (and, if it
4332 	 * hasn't been overridden by a configuration variable), then
4333 	 * the default behavior is to use a single MSI-X.  Otherwise,
4334 	 * fallback to using legacy interrupts.  Also, if MSI-X is chosen,
4335 	 * but fails for whatever reasons, then next try MSI
4336 	 */
4337 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4338 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4339 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSIX);
4340 		if (status == DDI_SUCCESS) {
4341 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSIX;
4342 			return (DDI_SUCCESS);
4343 		}
4344 	}
4345 
4346 	/*
4347 	 * If Hermon supports MSI in this system (and, if it
4348 	 * hasn't been overridden by a configuration variable), then
4349 	 * the default behavior is to use a single MSIX.  Otherwise,
4350 	 * fallback to using legacy interrupts.  Also, if MSI is chosen,
4351 	 * but fails for whatever reasons, then fallback to using legacy
4352 	 * interrupts.
4353 	 */
4354 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4355 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSI)) {
4356 		status = hermon_add_intrs(state, DDI_INTR_TYPE_MSI);
4357 		if (status == DDI_SUCCESS) {
4358 			state->hs_intr_type_chosen = DDI_INTR_TYPE_MSI;
4359 			return (DDI_SUCCESS);
4360 		}
4361 	}
4362 
4363 	/*
4364 	 * MSI interrupt allocation failed, or was not available.  Fallback to
4365 	 * legacy interrupt support.
4366 	 */
4367 	if (state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED) {
4368 		status = hermon_add_intrs(state, DDI_INTR_TYPE_FIXED);
4369 		if (status == DDI_SUCCESS) {
4370 			state->hs_intr_type_chosen = DDI_INTR_TYPE_FIXED;
4371 			return (DDI_SUCCESS);
4372 		}
4373 	}
4374 
4375 	/*
4376 	 * None of MSI, MSI-X, nor legacy interrupts were successful.
4377 	 * Return failure.
4378 	 */
4379 	return (DDI_FAILURE);
4380 }
4381 
4382 /* ARGSUSED */
4383 static int
4384 hermon_intr_cb_handler(dev_info_t *dip, ddi_cb_action_t action, void *cbarg,
4385     void *arg1, void *arg2)
4386 {
4387 	hermon_state_t *state = (hermon_state_t *)arg1;
4388 
4389 	IBTF_DPRINTF_L2("hermon", "interrupt callback: instance %d, "
4390 	    "action %d, cbarg %d\n", state->hs_instance, action,
4391 	    (uint32_t)(uintptr_t)cbarg);
4392 	return (DDI_SUCCESS);
4393 }
4394 
4395 /*
4396  * hermon_add_intrs()
4397  *    Context: Only called from attach() patch context
4398  */
4399 static int
4400 hermon_add_intrs(hermon_state_t *state, int intr_type)
4401 {
4402 	int	status;
4403 
4404 	if (state->hs_intr_cb_hdl == NULL) {
4405 		status = ddi_cb_register(state->hs_dip, DDI_CB_FLAG_INTR,
4406 		    hermon_intr_cb_handler, state, NULL,
4407 		    &state->hs_intr_cb_hdl);
4408 		if (status != DDI_SUCCESS) {
4409 			cmn_err(CE_CONT, "ddi_cb_register failed: 0x%x\n",
4410 			    status);
4411 			state->hs_intr_cb_hdl = NULL;
4412 			return (DDI_FAILURE);
4413 		}
4414 	}
4415 
4416 	/* Get number of interrupts/MSI supported */
4417 	status = ddi_intr_get_nintrs(state->hs_dip, intr_type,
4418 	    &state->hs_intrmsi_count);
4419 	if (status != DDI_SUCCESS) {
4420 		(void) ddi_cb_unregister(state->hs_intr_cb_hdl);
4421 		state->hs_intr_cb_hdl = NULL;
4422 		return (DDI_FAILURE);
4423 	}
4424 
4425 	/* Get number of available interrupts/MSI */
4426 	status = ddi_intr_get_navail(state->hs_dip, intr_type,
4427 	    &state->hs_intrmsi_avail);
4428 	if (status != DDI_SUCCESS) {
4429 		(void) ddi_cb_unregister(state->hs_intr_cb_hdl);
4430 		state->hs_intr_cb_hdl = NULL;
4431 		return (DDI_FAILURE);
4432 	}
4433 
4434 	/* Ensure that we have at least one (1) usable MSI or interrupt */
4435 	if ((state->hs_intrmsi_avail < 1) || (state->hs_intrmsi_count < 1)) {
4436 		(void) ddi_cb_unregister(state->hs_intr_cb_hdl);
4437 		state->hs_intr_cb_hdl = NULL;
4438 		return (DDI_FAILURE);
4439 	}
4440 
4441 	/*
4442 	 * Allocate the #interrupt/MSI handles.
4443 	 * The number we request is the minimum of these three values:
4444 	 *	HERMON_MSIX_MAX			driver maximum (array size)
4445 	 *	hermon_msix_max			/etc/system override to...
4446 	 *						HERMON_MSIX_MAX
4447 	 *	state->hs_intrmsi_avail		Maximum the ddi provides.
4448 	 */
4449 	status = ddi_intr_alloc(state->hs_dip, &state->hs_intrmsi_hdl[0],
4450 	    intr_type, 0, min(min(HERMON_MSIX_MAX, state->hs_intrmsi_avail),
4451 	    hermon_msix_max), &state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL);
4452 	if (status != DDI_SUCCESS) {
4453 		(void) ddi_cb_unregister(state->hs_intr_cb_hdl);
4454 		state->hs_intr_cb_hdl = NULL;
4455 		return (DDI_FAILURE);
4456 	}
4457 
4458 	/* Ensure that we have allocated at least one (1) MSI or interrupt */
4459 	if (state->hs_intrmsi_allocd < 1) {
4460 		(void) ddi_cb_unregister(state->hs_intr_cb_hdl);
4461 		state->hs_intr_cb_hdl = NULL;
4462 		return (DDI_FAILURE);
4463 	}
4464 
4465 	/*
4466 	 * Extract the priority for the allocated interrupt/MSI.  This
4467 	 * will be used later when initializing certain mutexes.
4468 	 */
4469 	status = ddi_intr_get_pri(state->hs_intrmsi_hdl[0],
4470 	    &state->hs_intrmsi_pri);
4471 	if (status != DDI_SUCCESS) {
4472 		/* Free the allocated interrupt/MSI handle */
4473 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4474 
4475 		(void) ddi_cb_unregister(state->hs_intr_cb_hdl);
4476 		state->hs_intr_cb_hdl = NULL;
4477 		return (DDI_FAILURE);
4478 	}
4479 
4480 	/* Make sure the interrupt/MSI priority is below 'high level' */
4481 	if (state->hs_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
4482 		/* Free the allocated interrupt/MSI handle */
4483 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4484 
4485 		return (DDI_FAILURE);
4486 	}
4487 
4488 	/* Get add'l capability information regarding interrupt/MSI */
4489 	status = ddi_intr_get_cap(state->hs_intrmsi_hdl[0],
4490 	    &state->hs_intrmsi_cap);
4491 	if (status != DDI_SUCCESS) {
4492 		/* Free the allocated interrupt/MSI handle */
4493 		(void) ddi_intr_free(state->hs_intrmsi_hdl[0]);
4494 
4495 		return (DDI_FAILURE);
4496 	}
4497 
4498 	return (DDI_SUCCESS);
4499 }
4500 
4501 
4502 /*
4503  * hermon_intr_or_msi_fini()
4504  *    Context: Only called from attach() and/or detach() path contexts
4505  */
4506 static int
4507 hermon_intr_or_msi_fini(hermon_state_t *state)
4508 {
4509 	int	status;
4510 	int	intr;
4511 
4512 	for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
4513 
4514 		/* Free the allocated interrupt/MSI handle */
4515 		status = ddi_intr_free(state->hs_intrmsi_hdl[intr]);
4516 		if (status != DDI_SUCCESS) {
4517 			return (DDI_FAILURE);
4518 		}
4519 	}
4520 	if (state->hs_intr_cb_hdl) {
4521 		(void) ddi_cb_unregister(state->hs_intr_cb_hdl);
4522 		state->hs_intr_cb_hdl = NULL;
4523 	}
4524 	return (DDI_SUCCESS);
4525 }
4526 
4527 
4528 /*ARGSUSED*/
4529 void
4530 hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
4531     uint_t offset)
4532 {
4533 	uint32_t	msix_data;
4534 	uint16_t	msg_cntr;
4535 	uint32_t	t_offset;	/* table offset */
4536 	uint32_t	t_bir;
4537 	uint32_t	p_offset;	/* pba */
4538 	uint32_t	p_bir;
4539 	int		t_size;		/* size in entries - each is 4 dwords */
4540 
4541 	/* come in with offset pointing at the capability structure */
4542 
4543 	msix_data = pci_config_get32(hdl, offset);
4544 	cmn_err(CE_CONT, "Full cap structure dword = %X\n", msix_data);
4545 	msg_cntr =  pci_config_get16(hdl, offset+2);
4546 	cmn_err(CE_CONT, "MSIX msg_control = %X\n", msg_cntr);
4547 	offset += 4;
4548 	msix_data = pci_config_get32(hdl, offset);	/* table info */
4549 	t_offset = (msix_data & 0xFFF8) >> 3;
4550 	t_bir = msix_data & 0x07;
4551 	offset += 4;
4552 	cmn_err(CE_CONT, "  table %X --offset = %X, bir(bar) = %X\n",
4553 	    msix_data, t_offset, t_bir);
4554 	msix_data = pci_config_get32(hdl, offset);	/* PBA info */
4555 	p_offset = (msix_data & 0xFFF8) >> 3;
4556 	p_bir = msix_data & 0x07;
4557 
4558 	cmn_err(CE_CONT, "  PBA   %X --offset = %X, bir(bar) = %X\n",
4559 	    msix_data, p_offset, p_bir);
4560 	t_size = msg_cntr & 0x7FF;		/* low eleven bits */
4561 	cmn_err(CE_CONT, "    table size = %X entries\n", t_size);
4562 
4563 	offset = t_offset;		/* reuse this for offset from BAR */
4564 #ifdef HERMON_SUPPORTS_MSIX_BAR
4565 	cmn_err(CE_CONT, "First 2 table entries behind BAR2 \n");
4566 	for (i = 0; i < 2; i++) {
4567 		for (j = 0; j < 4; j++, offset += 4) {
4568 			msix_data = ddi_get32(state->hs_reg_msihdl,
4569 			    (uint32_t *)((uintptr_t)state->hs_reg_msi_baseaddr
4570 			    + offset));
4571 			cmn_err(CE_CONT, "MSI table entry %d, dword %d == %X\n",
4572 			    i, j, msix_data);
4573 		}
4574 	}
4575 #endif
4576 
4577 }
4578 
4579 /*
4580  * X86 fastreboot support functions.
4581  * These functions are used to save/restore MSI-X table/PBA and also
4582  * to disable MSI-X interrupts in hermon_quiesce().
4583  */
4584 
4585 /* Return the message control for MSI-X */
4586 static ushort_t
4587 get_msix_ctrl(dev_info_t *dip)
4588 {
4589 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4590 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4591 	    DEVI(dip)->devi_instance);
4592 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4593 	ASSERT(pci_cfg_hdl != NULL);
4594 
4595 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4596 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4597 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4598 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4599 			return (0);
4600 	}
4601 	ASSERT(msix_ctrl != 0);
4602 
4603 	return (msix_ctrl);
4604 }
4605 
4606 /* Return the MSI-X table size */
4607 static size_t
4608 get_msix_tbl_size(dev_info_t *dip)
4609 {
4610 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4611 	ASSERT(msix_ctrl != 0);
4612 
4613 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4614 	    PCI_MSIX_VECTOR_SIZE);
4615 }
4616 
4617 /* Return the MSI-X PBA size */
4618 static size_t
4619 get_msix_pba_size(dev_info_t *dip)
4620 {
4621 	ushort_t msix_ctrl = get_msix_ctrl(dip);
4622 	ASSERT(msix_ctrl != 0);
4623 
4624 	return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8);
4625 }
4626 
4627 /* Set up the MSI-X table/PBA save area */
4628 static void
4629 hermon_set_msix_info(hermon_state_t *state)
4630 {
4631 	uint_t			rnumber, breg, nregs;
4632 	ushort_t		caps_ctrl, msix_ctrl;
4633 	pci_regspec_t		*rp;
4634 	int			reg_size, addr_space, offset, *regs_list, i;
4635 
4636 	/*
4637 	 * MSI-X BIR Index Table:
4638 	 * BAR indicator register (BIR) to Base Address register.
4639 	 */
4640 	uchar_t pci_msix_bir_index[8] = {0x10, 0x14, 0x18, 0x1c,
4641 	    0x20, 0x24, 0xff, 0xff};
4642 
4643 	/* Fastreboot data access  attribute */
4644 	ddi_device_acc_attr_t	dev_attr = {
4645 		0,				/* version */
4646 		DDI_STRUCTURE_LE_ACC,
4647 		DDI_STRICTORDER_ACC,		/* attr access */
4648 		0
4649 	};
4650 
4651 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4652 	ASSERT(pci_cfg_hdl != NULL);
4653 
4654 	if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4655 	    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4656 		if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL, caps_ctrl,
4657 		    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4658 			return;
4659 	}
4660 	ASSERT(msix_ctrl != 0);
4661 
4662 	state->hs_msix_tbl_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4663 	    PCI_MSIX_TBL_OFFSET);
4664 
4665 	/* Get the BIR for MSI-X table */
4666 	breg = pci_msix_bir_index[state->hs_msix_tbl_offset &
4667 	    PCI_MSIX_TBL_BIR_MASK];
4668 	ASSERT(breg != 0xFF);
4669 
4670 	/* Set the MSI-X table offset */
4671 	state->hs_msix_tbl_offset = state->hs_msix_tbl_offset &
4672 	    ~PCI_MSIX_TBL_BIR_MASK;
4673 
4674 	/* Set the MSI-X table size */
4675 	state->hs_msix_tbl_size = ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
4676 	    PCI_MSIX_VECTOR_SIZE;
4677 
4678 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
4679 	    DDI_PROP_DONTPASS, "reg", (int **)&regs_list, &nregs) !=
4680 	    DDI_PROP_SUCCESS) {
4681 		return;
4682 	}
4683 	reg_size = sizeof (pci_regspec_t) / sizeof (int);
4684 
4685 	/* Check the register number for MSI-X table */
4686 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4687 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4688 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4689 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4690 
4691 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4692 		    (addr_space == PCI_ADDR_MEM64))) {
4693 			rnumber = i;
4694 			break;
4695 		}
4696 	}
4697 	ASSERT(rnumber != 0);
4698 	state->hs_msix_tbl_rnumber = rnumber;
4699 
4700 	/* Set device attribute version and access according to Hermon FM */
4701 	dev_attr.devacc_attr_version = hermon_devacc_attr_version(state);
4702 	dev_attr.devacc_attr_access = hermon_devacc_attr_access(state);
4703 
4704 	/* Map the entire MSI-X vector table */
4705 	if (hermon_regs_map_setup(state, state->hs_msix_tbl_rnumber,
4706 	    (caddr_t *)&state->hs_msix_tbl_addr, state->hs_msix_tbl_offset,
4707 	    state->hs_msix_tbl_size, &dev_attr,
4708 	    &state->hs_fm_msix_tblhdl) != DDI_SUCCESS) {
4709 		return;
4710 	}
4711 
4712 	state->hs_msix_pba_offset = PCI_CAP_GET32(pci_cfg_hdl, NULL, caps_ctrl,
4713 	    PCI_MSIX_PBA_OFFSET);
4714 
4715 	/* Get the BIR for MSI-X PBA */
4716 	breg = pci_msix_bir_index[state->hs_msix_pba_offset &
4717 	    PCI_MSIX_PBA_BIR_MASK];
4718 	ASSERT(breg != 0xFF);
4719 
4720 	/* Set the MSI-X PBA offset */
4721 	state->hs_msix_pba_offset = state->hs_msix_pba_offset &
4722 	    ~PCI_MSIX_PBA_BIR_MASK;
4723 
4724 	/* Set the MSI-X PBA size */
4725 	state->hs_msix_pba_size =
4726 	    ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8;
4727 
4728 	/* Check the register number for MSI-X PBA */
4729 	for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
4730 		rp = (pci_regspec_t *)&regs_list[i * reg_size];
4731 		addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
4732 		offset = PCI_REG_REG_G(rp->pci_phys_hi);
4733 
4734 		if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
4735 		    (addr_space == PCI_ADDR_MEM64))) {
4736 			rnumber = i;
4737 			break;
4738 		}
4739 	}
4740 	ASSERT(rnumber != 0);
4741 	state->hs_msix_pba_rnumber = rnumber;
4742 	ddi_prop_free(regs_list);
4743 
4744 	/* Map in the MSI-X Pending Bit Array */
4745 	if (hermon_regs_map_setup(state, state->hs_msix_pba_rnumber,
4746 	    (caddr_t *)&state->hs_msix_pba_addr, state->hs_msix_pba_offset,
4747 	    state->hs_msix_pba_size, &dev_attr,
4748 	    &state->hs_fm_msix_pbahdl) != DDI_SUCCESS) {
4749 		hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
4750 		state->hs_fm_msix_tblhdl = NULL;
4751 		return;
4752 	}
4753 
4754 	/* Set the MSI-X table save area */
4755 	state->hs_msix_tbl_entries = kmem_alloc(state->hs_msix_tbl_size,
4756 	    KM_SLEEP);
4757 
4758 	/* Set the MSI-X PBA save area */
4759 	state->hs_msix_pba_entries = kmem_alloc(state->hs_msix_pba_size,
4760 	    KM_SLEEP);
4761 }
4762 
4763 /* Disable Hermon interrupts */
4764 static int
4765 hermon_intr_disable(hermon_state_t *state)
4766 {
4767 	ushort_t msix_ctrl = 0, caps_ctrl = 0;
4768 	ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
4769 	ddi_acc_handle_t msix_tblhdl = hermon_get_msix_tblhdl(state);
4770 	int i, j;
4771 	ASSERT(pci_cfg_hdl != NULL && msix_tblhdl != NULL);
4772 	ASSERT(state->hs_intr_types_avail &
4773 	    (DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX));
4774 
4775 	/*
4776 	 * Check if MSI-X interrupts are used. If so, disable MSI-X interupts.
4777 	 * If not, since Hermon doesn't support MSI interrupts, assuming the
4778 	 * legacy interrupt is used instead, disable the legacy interrupt.
4779 	 */
4780 	if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
4781 	    (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
4782 
4783 		if ((PCI_CAP_LOCATE(pci_cfg_hdl,
4784 		    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
4785 			if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, NULL,
4786 			    caps_ctrl, PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
4787 				return (DDI_FAILURE);
4788 		}
4789 		ASSERT(msix_ctrl != 0);
4790 
4791 		if (!(msix_ctrl & PCI_MSIX_ENABLE_BIT))
4792 			return (DDI_SUCCESS);
4793 
4794 		/* Clear all inums in MSI-X table */
4795 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4796 		    i += PCI_MSIX_VECTOR_SIZE) {
4797 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4798 				char *addr = state->hs_msix_tbl_addr + i + j;
4799 				ddi_put32(msix_tblhdl,
4800 				    (uint32_t *)(uintptr_t)addr, 0x0);
4801 			}
4802 		}
4803 
4804 		/* Disable MSI-X interrupts */
4805 		msix_ctrl &= ~PCI_MSIX_ENABLE_BIT;
4806 		PCI_CAP_PUT16(pci_cfg_hdl, NULL, caps_ctrl, PCI_MSIX_CTRL,
4807 		    msix_ctrl);
4808 
4809 	} else {
4810 		uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
4811 		ASSERT(state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED);
4812 
4813 		/* Disable the legacy interrupts */
4814 		cmdreg |= PCI_COMM_INTX_DISABLE;
4815 		pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
4816 	}
4817 
4818 	return (DDI_SUCCESS);
4819 }
4820 
4821 /* Hermon quiesce(9F) entry */
4822 static int
4823 hermon_quiesce(dev_info_t *dip)
4824 {
4825 	hermon_state_t *state = ddi_get_soft_state(hermon_statep,
4826 	    DEVI(dip)->devi_instance);
4827 	ddi_acc_handle_t pcihdl = hermon_get_pcihdl(state);
4828 	ddi_acc_handle_t cmdhdl = hermon_get_cmdhdl(state);
4829 	ddi_acc_handle_t msix_tbl_hdl = hermon_get_msix_tblhdl(state);
4830 	ddi_acc_handle_t msix_pba_hdl = hermon_get_msix_pbahdl(state);
4831 	uint32_t sem, reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
4832 	uint64_t data64;
4833 	uint32_t data32;
4834 	int status, i, j, loopcnt;
4835 	uint_t offset;
4836 
4837 	ASSERT(state != NULL);
4838 
4839 	/* start fastreboot */
4840 	state->hs_quiescing = B_TRUE;
4841 
4842 	/* If it's in maintenance mode, do nothing but return with SUCCESS */
4843 	if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
4844 		return (DDI_SUCCESS);
4845 	}
4846 
4847 	/* suppress Hermon FM ereports */
4848 	if (hermon_get_state(state) & HCA_EREPORT_FM) {
4849 		hermon_clr_state_nolock(state, HCA_EREPORT_FM);
4850 	}
4851 
4852 	/* Shutdown HCA ports */
4853 	if (hermon_hca_ports_shutdown(state,
4854 	    state->hs_cfg_profile->cp_num_ports) != HERMON_CMD_SUCCESS) {
4855 		state->hs_quiescing = B_FALSE;
4856 		return (DDI_FAILURE);
4857 	}
4858 
4859 	/* Close HCA */
4860 	if (hermon_close_hca_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN) !=
4861 	    HERMON_CMD_SUCCESS) {
4862 		state->hs_quiescing = B_FALSE;
4863 		return (DDI_FAILURE);
4864 	}
4865 
4866 	/* Disable interrupts */
4867 	if (hermon_intr_disable(state) != DDI_SUCCESS) {
4868 		state->hs_quiescing = B_FALSE;
4869 		return (DDI_FAILURE);
4870 	}
4871 
4872 	/*
4873 	 * Query the PCI capabilities of the HCA device, but don't process
4874 	 * the VPD until after reset.
4875 	 */
4876 	if (hermon_pci_capability_list(state, pcihdl) != DDI_SUCCESS) {
4877 		state->hs_quiescing = B_FALSE;
4878 		return (DDI_FAILURE);
4879 	}
4880 
4881 	/*
4882 	 * Read all PCI config info (reg0...reg63).  Note: According to the
4883 	 * Hermon software reset application note, we should not read or
4884 	 * restore the values in reg22 and reg23.
4885 	 * NOTE:  For Hermon (and Arbel too) it says to restore the command
4886 	 * register LAST, and technically, you need to restore the
4887 	 * PCIE Capability "device control" and "link control" (word-sized,
4888 	 * at offsets 0x08 and 0x10 from the capbility ID respectively).
4889 	 * We hold off restoring the command register - offset 0x4 - till last
4890 	 */
4891 
4892 	/* 1st, wait for the semaphore assure accessibility - per PRM */
4893 	status = -1;
4894 	for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
4895 		sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
4896 		if (sem == 0) {
4897 			status = 0;
4898 			break;
4899 		}
4900 		drv_usecwait(1);
4901 	}
4902 
4903 	/* Check if timeout happens */
4904 	if (status == -1) {
4905 		state->hs_quiescing = B_FALSE;
4906 		return (DDI_FAILURE);
4907 	}
4908 
4909 	/* MSI-X interrupts are used, save the MSI-X table */
4910 	if (msix_tbl_hdl && msix_pba_hdl) {
4911 		/* save MSI-X table */
4912 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4913 		    i += PCI_MSIX_VECTOR_SIZE) {
4914 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4915 				char *addr = state->hs_msix_tbl_addr + i + j;
4916 				data32 = ddi_get32(msix_tbl_hdl,
4917 				    (uint32_t *)(uintptr_t)addr);
4918 				*(uint32_t *)(uintptr_t)(state->
4919 				    hs_msix_tbl_entries + i + j) = data32;
4920 			}
4921 		}
4922 		/* save MSI-X PBA */
4923 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4924 			char *addr = state->hs_msix_pba_addr + i;
4925 			data64 = ddi_get64(msix_pba_hdl,
4926 			    (uint64_t *)(uintptr_t)addr);
4927 			*(uint64_t *)(uintptr_t)(state->
4928 			    hs_msix_pba_entries + i) = data64;
4929 		}
4930 	}
4931 
4932 	/* save PCI config space */
4933 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4934 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4935 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4936 			state->hs_cfg_data[i]  =
4937 			    pci_config_get32(pcihdl, i << 2);
4938 		}
4939 	}
4940 
4941 	/* SW-reset HCA */
4942 	ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);
4943 
4944 	/*
4945 	 * This delay is required so as not to cause a panic here. If the
4946 	 * device is accessed too soon after reset it will not respond to
4947 	 * config cycles, causing a Master Abort and panic.
4948 	 */
4949 	drv_usecwait(reset_delay);
4950 
4951 	/* Poll waiting for the device to finish resetting */
4952 	loopcnt = 100;	/* 100 times @ 100 usec - total delay 10 msec */
4953 	while ((pci_config_get32(pcihdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
4954 		drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
4955 		if (--loopcnt == 0)
4956 			break;	/* just in case, break and go on */
4957 	}
4958 	if (loopcnt == 0) {
4959 		state->hs_quiescing = B_FALSE;
4960 		return (DDI_FAILURE);
4961 	}
4962 
4963 	/* Restore the config info */
4964 	for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
4965 		if (i == 1) continue;	/* skip the status/ctrl reg */
4966 		if ((i != HERMON_SW_RESET_REG22_RSVD) &&
4967 		    (i != HERMON_SW_RESET_REG23_RSVD)) {
4968 			pci_config_put32(pcihdl, i << 2, state->hs_cfg_data[i]);
4969 		}
4970 	}
4971 
4972 	/* If MSI-X interrupts are used, restore the MSI-X table */
4973 	if (msix_tbl_hdl && msix_pba_hdl) {
4974 		/* restore MSI-X PBA */
4975 		for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
4976 			char *addr = state->hs_msix_pba_addr + i;
4977 			data64 = *(uint64_t *)(uintptr_t)
4978 			    (state->hs_msix_pba_entries + i);
4979 			ddi_put64(msix_pba_hdl,
4980 			    (uint64_t *)(uintptr_t)addr, data64);
4981 		}
4982 		/* restore MSI-X table */
4983 		for (i = 0; i < get_msix_tbl_size(state->hs_dip);
4984 		    i += PCI_MSIX_VECTOR_SIZE) {
4985 			for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
4986 				char *addr = state->hs_msix_tbl_addr + i + j;
4987 				data32 = *(uint32_t *)(uintptr_t)
4988 				    (state->hs_msix_tbl_entries + i + j);
4989 				ddi_put32(msix_tbl_hdl,
4990 				    (uint32_t *)(uintptr_t)addr, data32);
4991 			}
4992 		}
4993 	}
4994 
4995 	/*
4996 	 * PCI Express Capability - we saved during capability list, and
4997 	 * we'll restore them here.
4998 	 */
4999 	offset = state->hs_pci_cap_offset;
5000 	data32 = state->hs_pci_cap_devctl;
5001 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
5002 	data32 = state->hs_pci_cap_lnkctl;
5003 	pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);
5004 
5005 	/* restore the command register */
5006 	pci_config_put32(pcihdl, 0x04, (state->hs_cfg_data[1] | 0x0006));
5007 
5008 	return (DDI_SUCCESS);
5009 }
5010