xref: /titanic_52/usr/src/uts/common/io/ib/adapters/hermon/hermon_cq.c (revision 17a2b317610f531d565bf4e940433aab2d9e6985)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * hermon_cq.c
28  *    Hermon Completion Queue Processing Routines
29  *
30  *    Implements all the routines necessary for allocating, freeing, resizing,
31  *    and handling the completion type events that the Hermon hardware can
32  *    generate.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/sysmacros.h>
42 
43 #include <sys/ib/adapters/hermon/hermon.h>
44 
45 int hermon_should_panic = 0;	/* debugging aid */
46 
47 #define	hermon_cq_update_ci_doorbell(cq)				\
48 	/* Build the doorbell record data (low 24 bits only) */		\
49 	HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr,			\
50 	    cq->cq_consindx & 0x00FFFFFF)
51 
52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq,
53     uint_t cmd);
54 #pragma inline(hermon_cq_arm_doorbell)
55 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr);
56 #pragma inline(hermon_arm_cq_dbr_init)
57 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
58     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
59 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
60     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
61 
62 
63 /*
64  * hermon_cq_alloc()
65  *    Context: Can be called only from user or kernel context.
66  */
67 int
68 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
69     ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl,
70     uint_t sleepflag)
71 {
72 	hermon_rsrc_t		*cqc, *rsrc;
73 	hermon_umap_db_entry_t	*umapdb;
74 	hermon_hw_cqc_t		cqc_entry;
75 	hermon_cqhdl_t		cq;
76 	ibt_mr_attr_t		mr_attr;
77 	hermon_mr_options_t	op;
78 	hermon_pdhdl_t		pd;
79 	hermon_mrhdl_t		mr;
80 	hermon_hw_cqe_t		*buf;
81 	uint64_t		value;
82 	uint32_t		log_cq_size, uarpg;
83 	uint_t			cq_is_umap;
84 	uint32_t		status, flag;
85 	hermon_cq_sched_t	*cq_schedp;
86 
87 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr))
88 
89 	/*
90 	 * Determine whether CQ is being allocated for userland access or
91 	 * whether it is being allocated for kernel access.  If the CQ is
92 	 * being allocated for userland access, then lookup the UAR
93 	 * page number for the current process.  Note:  If this is not found
94 	 * (e.g. if the process has not previously open()'d the Hermon driver),
95 	 * then an error is returned.
96 	 */
97 	cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
98 	if (cq_is_umap) {
99 		status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
100 		    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
101 		if (status != DDI_SUCCESS) {
102 			status = IBT_INVALID_PARAM;
103 			goto cqalloc_fail;
104 		}
105 		uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
106 	} else {
107 		uarpg = state->hs_kernel_uar_index;
108 	}
109 
110 	/* Use the internal protection domain (PD) for setting up CQs */
111 	pd = state->hs_pdhdl_internal;
112 
113 	/* Increment the reference count on the protection domain (PD) */
114 	hermon_pd_refcnt_inc(pd);
115 
116 	/*
117 	 * Allocate an CQ context entry.  This will be filled in with all
118 	 * the necessary parameters to define the Completion Queue.  And then
119 	 * ownership will be passed to the hardware in the final step
120 	 * below.  If we fail here, we must undo the protection domain
121 	 * reference count.
122 	 */
123 	status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc);
124 	if (status != DDI_SUCCESS) {
125 		status = IBT_INSUFF_RESOURCE;
126 		goto cqalloc_fail1;
127 	}
128 
129 	/*
130 	 * Allocate the software structure for tracking the completion queue
131 	 * (i.e. the Hermon Completion Queue handle).  If we fail here, we must
132 	 * undo the protection domain reference count and the previous
133 	 * resource allocation.
134 	 */
135 	status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc);
136 	if (status != DDI_SUCCESS) {
137 		status = IBT_INSUFF_RESOURCE;
138 		goto cqalloc_fail2;
139 	}
140 	cq = (hermon_cqhdl_t)rsrc->hr_addr;
141 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
142 	cq->cq_is_umap = cq_is_umap;
143 	cq->cq_cqnum = cqc->hr_indx;	/* just use index, implicit in Hermon */
144 	cq->cq_intmod_count = 0;
145 	cq->cq_intmod_usec = 0;
146 
147 	/*
148 	 * If this will be a user-mappable CQ, then allocate an entry for
149 	 * the "userland resources database".  This will later be added to
150 	 * the database (after all further CQ operations are successful).
151 	 * If we fail here, we must undo the reference counts and the
152 	 * previous resource allocation.
153 	 */
154 	if (cq->cq_is_umap) {
155 		umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum,
156 		    MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
157 		if (umapdb == NULL) {
158 			status = IBT_INSUFF_RESOURCE;
159 			goto cqalloc_fail3;
160 		}
161 	}
162 
163 
164 	/*
165 	 * Allocate the doorbell record.  We'll need one for the CQ, handling
166 	 * both consumer index (SET CI) and the CQ state (CQ ARM).
167 	 */
168 
169 	status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl,
170 	    &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset);
171 	if (status != DDI_SUCCESS) {
172 		status = IBT_INSUFF_RESOURCE;
173 		goto cqalloc_fail4;
174 	}
175 
176 	/*
177 	 * Calculate the appropriate size for the completion queue.
178 	 * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
179 	 * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
180 	 * to round the requested size up to the next highest power-of-2
181 	 */
182 	cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE);
183 	log_cq_size = highbit(cq_attr->cq_size);
184 
185 	/*
186 	 * Next we verify that the rounded-up size is valid (i.e. consistent
187 	 * with the device limits and/or software-configured limits)
188 	 */
189 	if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
190 		status = IBT_HCA_CQ_EXCEEDED;
191 		goto cqalloc_fail4a;
192 	}
193 
194 	/*
195 	 * Allocate the memory for Completion Queue.
196 	 *
197 	 * Note: Although we use the common queue allocation routine, we
198 	 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
199 	 * kernel system memory) for kernel CQs because it would be
200 	 * inefficient to have CQs located in DDR memory.  This is primarily
201 	 * because CQs are read from (by software) more than they are written
202 	 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
203 	 * user-mappable CQs for a similar reason.)
204 	 * It is also worth noting that, unlike Hermon QP work queues,
205 	 * completion queues do not have the same strict alignment
206 	 * requirements.  It is sufficient for the CQ memory to be both
207 	 * aligned to and bound to addresses which are a multiple of CQE size.
208 	 */
209 	cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
210 
211 	cq->cq_cqinfo.qa_alloc_align = PAGESIZE;
212 	cq->cq_cqinfo.qa_bind_align  = PAGESIZE;
213 	if (cq->cq_is_umap) {
214 		cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
215 	} else {
216 		cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
217 		hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr);
218 	}
219 	status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
220 	if (status != DDI_SUCCESS) {
221 		status = IBT_INSUFF_RESOURCE;
222 		goto cqalloc_fail4;
223 	}
224 	buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
225 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
226 
227 	/*
228 	 * The ownership bit of the CQE's is set by the HW during the process
229 	 * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1
230 	 *
231 	 */
232 
233 	/*
234 	 * Register the memory for the CQ.  The memory for the CQ must
235 	 * be registered in the Hermon TPT tables.  This gives us the LKey
236 	 * to specify in the CQ context below.  Note: If this is a user-
237 	 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
238 	 */
239 	flag = (sleepflag == HERMON_SLEEP) ?  IBT_MR_SLEEP : IBT_MR_NOSLEEP;
240 	mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
241 	mr_attr.mr_len	 = cq->cq_cqinfo.qa_size;
242 	mr_attr.mr_as	 = NULL;
243 	mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
244 	op.mro_bind_type   = state->hs_cfg_profile->cp_iommu_bypass;
245 	op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
246 	op.mro_bind_override_addr = 0;
247 	status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
248 	    HERMON_CQ_CMPT);
249 	if (status != DDI_SUCCESS) {
250 		status = IBT_INSUFF_RESOURCE;
251 		goto cqalloc_fail5;
252 	}
253 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
254 
255 	cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state);
256 	if (cq_attr->cq_flags & IBT_CQ_HID) {
257 		if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) {
258 			IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x",
259 			    cq_attr->cq_hid);
260 			status = IBT_INVALID_PARAM;
261 			goto cqalloc_fail5;
262 		}
263 		cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid);
264 		IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum);
265 	} else {
266 		cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched;
267 		if (cq_schedp == NULL) {
268 			cq_schedp = &state->hs_cq_sched_default;
269 		} else if (cq_schedp != &state->hs_cq_sched_default) {
270 			int i;
271 			hermon_cq_sched_t *tmp;
272 
273 			tmp = state->hs_cq_sched_array;
274 			for (i = 0; i < state->hs_cq_sched_array_size; i++)
275 				if (cq_schedp == &tmp[i])
276 					break;	/* found it */
277 			if (i >= state->hs_cq_sched_array_size) {
278 				cmn_err(CE_CONT, "!Invalid cq_sched argument: "
279 				    "ignored\n");
280 				cq_schedp = &state->hs_cq_sched_default;
281 			}
282 		}
283 		cq->cq_eqnum = HERMON_HID_TO_EQNUM(state,
284 		    HERMON_CQSCHED_NEXT_HID(cq_schedp));
285 		IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, "
286 		    "eqn %d", cq_schedp->cqs_start_hid - 1,
287 		    cq_schedp->cqs_len, cq->cq_eqnum);
288 	}
289 
290 	/*
291 	 * Fill in the CQC entry.  This is the final step before passing
292 	 * ownership of the CQC entry to the Hermon hardware.  We use all of
293 	 * the information collected/calculated above to fill in the
294 	 * requisite portions of the CQC.  Note: If this CQ is going to be
295 	 * used for userland access, then we need to set the UAR page number
296 	 * appropriately (otherwise it's a "don't care")
297 	 */
298 	bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
299 
300 	cqc_entry.state		= HERMON_CQ_DISARMED;
301 	cqc_entry.pg_offs	= cq->cq_cqinfo.qa_pgoffs >> 5;
302 	cqc_entry.log_cq_sz	= log_cq_size;
303 	cqc_entry.usr_page	= uarpg;
304 	cqc_entry.c_eqn		= cq->cq_eqnum;
305 	cqc_entry.log2_pgsz	= mr->mr_log2_pgsz;
306 	cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
307 	cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
308 	cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32);
309 	cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3);
310 
311 	/*
312 	 * Write the CQC entry to hardware - we pass ownership of
313 	 * the entry to the hardware (using the Hermon SW2HW_CQ firmware
314 	 * command).  Note: In general, this operation shouldn't fail.  But
315 	 * if it does, we have to undo everything we've done above before
316 	 * returning error.
317 	 */
318 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
319 	    sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag);
320 	if (status != HERMON_CMD_SUCCESS) {
321 		cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n",
322 		    status);
323 		if (status == HERMON_CMD_INVALID_STATUS) {
324 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
325 		}
326 		status = ibc_get_ci_failure(0);
327 		goto cqalloc_fail6;
328 	}
329 
330 	/*
331 	 * Fill in the rest of the Hermon Completion Queue handle.  Having
332 	 * successfully transferred ownership of the CQC, we can update the
333 	 * following fields for use in further operations on the CQ.
334 	 */
335 	cq->cq_resize_hdl = 0;
336 	cq->cq_cqcrsrcp	  = cqc;
337 	cq->cq_rsrcp	  = rsrc;
338 	cq->cq_consindx	  = 0;
339 		/* least restrictive */
340 	cq->cq_buf	  = buf;
341 	cq->cq_bufsz	  = (1 << log_cq_size);
342 	cq->cq_log_cqsz	  = log_cq_size;
343 	cq->cq_mrhdl	  = mr;
344 	cq->cq_refcnt	  = 0;
345 	cq->cq_is_special = 0;
346 	cq->cq_uarpg	  = uarpg;
347 	cq->cq_umap_dhp	  = (devmap_cookie_t)NULL;
348 	avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare,
349 	    sizeof (struct hermon_workq_avl_s),
350 	    offsetof(struct hermon_workq_avl_s, wqa_link));
351 
352 	cq->cq_hdlrarg	  = (void *)ibt_cqhdl;
353 
354 	/*
355 	 * Put CQ handle in Hermon CQNum-to-CQHdl list.  Then fill in the
356 	 * "actual_size" and "cqhdl" and return success
357 	 */
358 	hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq);
359 
360 	/*
361 	 * If this is a user-mappable CQ, then we need to insert the previously
362 	 * allocated entry into the "userland resources database".  This will
363 	 * allow for later lookup during devmap() (i.e. mmap()) calls.
364 	 */
365 	if (cq->cq_is_umap) {
366 		hermon_umap_db_add(umapdb);
367 	}
368 
369 	/*
370 	 * Fill in the return arguments (if necessary).  This includes the
371 	 * real completion queue size.
372 	 */
373 	if (actual_size != NULL) {
374 		*actual_size = (1 << log_cq_size) - 1;
375 	}
376 	*cqhdl = cq;
377 
378 	return (DDI_SUCCESS);
379 
380 /*
381  * The following is cleanup for all possible failure cases in this routine
382  */
383 cqalloc_fail6:
384 	if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
385 	    sleepflag) != DDI_SUCCESS) {
386 		HERMON_WARNING(state, "failed to deregister CQ memory");
387 	}
388 cqalloc_fail5:
389 	hermon_queue_free(&cq->cq_cqinfo);
390 cqalloc_fail4a:
391 	hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr);
392 cqalloc_fail4:
393 	if (cq_is_umap) {
394 		hermon_umap_db_free(umapdb);
395 	}
396 cqalloc_fail3:
397 	hermon_rsrc_free(state, &rsrc);
398 cqalloc_fail2:
399 	hermon_rsrc_free(state, &cqc);
400 cqalloc_fail1:
401 	hermon_pd_refcnt_dec(pd);
402 cqalloc_fail:
403 	return (status);
404 }
405 
406 
407 /*
408  * hermon_cq_free()
409  *    Context: Can be called only from user or kernel context.
410  */
411 /* ARGSUSED */
412 int
413 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag)
414 {
415 	hermon_rsrc_t		*cqc, *rsrc;
416 	hermon_umap_db_entry_t	*umapdb;
417 	hermon_hw_cqc_t		cqc_entry;
418 	hermon_pdhdl_t		pd;
419 	hermon_mrhdl_t		mr;
420 	hermon_cqhdl_t		cq, resize;
421 	uint32_t		cqnum;
422 	uint64_t		value;
423 	uint_t			maxprot;
424 	int			status;
425 
426 	/*
427 	 * Pull all the necessary information from the Hermon Completion Queue
428 	 * handle.  This is necessary here because the resource for the
429 	 * CQ handle is going to be freed up as part of this operation.
430 	 */
431 	cq	= *cqhdl;
432 	mutex_enter(&cq->cq_lock);
433 	cqc	= cq->cq_cqcrsrcp;
434 	rsrc	= cq->cq_rsrcp;
435 	pd	= state->hs_pdhdl_internal;
436 	mr	= cq->cq_mrhdl;
437 	cqnum	= cq->cq_cqnum;
438 
439 	resize = cq->cq_resize_hdl;		/* save the handle for later */
440 
441 	/*
442 	 * If there are work queues still associated with the CQ, then return
443 	 * an error.  Otherwise, we will be holding the CQ lock.
444 	 */
445 	if (cq->cq_refcnt != 0) {
446 		mutex_exit(&cq->cq_lock);
447 		return (IBT_CQ_BUSY);
448 	}
449 
450 	/*
451 	 * If this was a user-mappable CQ, then we need to remove its entry
452 	 * from the "userland resources database".  If it is also currently
453 	 * mmap()'d out to a user process, then we need to call
454 	 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
455 	 * We also need to invalidate the CQ tracking information for the
456 	 * user mapping.
457 	 */
458 	if (cq->cq_is_umap) {
459 		status = hermon_umap_db_find(state->hs_instance, cqnum,
460 		    MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
461 		    &umapdb);
462 		if (status != DDI_SUCCESS) {
463 			mutex_exit(&cq->cq_lock);
464 			HERMON_WARNING(state, "failed to find in database");
465 			return (ibc_get_ci_failure(0));
466 		}
467 		hermon_umap_db_free(umapdb);
468 		if (cq->cq_umap_dhp != NULL) {
469 			maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
470 			status = devmap_devmem_remap(cq->cq_umap_dhp,
471 			    state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size,
472 			    maxprot, DEVMAP_MAPPING_INVALID, NULL);
473 			if (status != DDI_SUCCESS) {
474 				mutex_exit(&cq->cq_lock);
475 				HERMON_WARNING(state, "failed in CQ memory "
476 				    "devmap_devmem_remap()");
477 				return (ibc_get_ci_failure(0));
478 			}
479 			cq->cq_umap_dhp = (devmap_cookie_t)NULL;
480 		}
481 	}
482 
483 	/*
484 	 * Put NULL into the Arbel CQNum-to-CQHdl list.  This will allow any
485 	 * in-progress events to detect that the CQ corresponding to this
486 	 * number has been freed.
487 	 */
488 	hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL);
489 
490 	mutex_exit(&cq->cq_lock);
491 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
492 
493 	/*
494 	 * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ
495 	 * firmware command).  If the ownership transfer fails for any reason,
496 	 * then it is an indication that something (either in HW or SW) has
497 	 * gone seriously wrong.
498 	 */
499 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
500 	    sizeof (hermon_hw_cqc_t), cqnum, sleepflag);
501 	if (status != HERMON_CMD_SUCCESS) {
502 		HERMON_WARNING(state, "failed to reclaim CQC ownership");
503 		cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n",
504 		    status);
505 		if (status == HERMON_CMD_INVALID_STATUS) {
506 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
507 		}
508 		return (ibc_get_ci_failure(0));
509 	}
510 
511 	/*
512 	 * From here on, we start reliquishing resources - but check to see
513 	 * if a resize was in progress - if so, we need to relinquish those
514 	 * resources as well
515 	 */
516 
517 
518 	/*
519 	 * Deregister the memory for the Completion Queue.  If this fails
520 	 * for any reason, then it is an indication that something (either
521 	 * in HW or SW) has gone seriously wrong.  So we print a warning
522 	 * message and return.
523 	 */
524 	status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
525 	    sleepflag);
526 	if (status != DDI_SUCCESS) {
527 		HERMON_WARNING(state, "failed to deregister CQ memory");
528 		return (ibc_get_ci_failure(0));
529 	}
530 
531 	if (resize)	{	/* there was a pointer to a handle */
532 		mr = resize->cq_mrhdl;	/* reuse the pointer to the region */
533 		status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
534 		    sleepflag);
535 		if (status != DDI_SUCCESS) {
536 			HERMON_WARNING(state, "failed to deregister resize CQ "
537 			    "memory");
538 			return (ibc_get_ci_failure(0));
539 		}
540 	}
541 
542 	/* Free the memory for the CQ */
543 	hermon_queue_free(&cq->cq_cqinfo);
544 	if (resize)	{
545 		hermon_queue_free(&resize->cq_cqinfo);
546 		/* and the temporary handle */
547 		kmem_free(resize, sizeof (struct hermon_sw_cq_s));
548 	}
549 
550 	/* everything else does not matter for the resize in progress */
551 
552 	/* Free the dbr */
553 	hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr);
554 
555 	/* Free the Hermon Completion Queue handle */
556 	hermon_rsrc_free(state, &rsrc);
557 
558 	/* Free up the CQC entry resource */
559 	hermon_rsrc_free(state, &cqc);
560 
561 	/* Decrement the reference count on the protection domain (PD) */
562 	hermon_pd_refcnt_dec(pd);
563 
564 	/* Set the cqhdl pointer to NULL and return success */
565 	*cqhdl = NULL;
566 
567 	return (DDI_SUCCESS);
568 }
569 
570 
571 /*
572  * hermon_cq_resize()
573  *    Context: Can be called only from user or kernel context.
574  */
575 int
576 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size,
577     uint_t *actual_size, uint_t sleepflag)
578 {
579 	hermon_hw_cqc_t		cqc_entry;
580 	hermon_cqhdl_t		resize_hdl;
581 	hermon_qalloc_info_t	new_cqinfo;
582 	ibt_mr_attr_t		mr_attr;
583 	hermon_mr_options_t	op;
584 	hermon_pdhdl_t		pd;
585 	hermon_mrhdl_t		mr;
586 	hermon_hw_cqe_t		*buf;
587 	uint32_t		new_prod_indx;
588 	uint_t			log_cq_size;
589 	int			status, flag;
590 
591 	if (cq->cq_resize_hdl != 0) {	/* already in process */
592 		status = IBT_CQ_BUSY;
593 		goto cqresize_fail;
594 	}
595 
596 
597 	/* Use the internal protection domain (PD) for CQs */
598 	pd = state->hs_pdhdl_internal;
599 
600 	/*
601 	 * Calculate the appropriate size for the new resized completion queue.
602 	 * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
603 	 * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
604 	 * to round the requested size up to the next highest power-of-2
605 	 */
606 	req_size = max(req_size, HERMON_CQ_MIN_SIZE);
607 	log_cq_size = highbit(req_size);
608 
609 	/*
610 	 * Next we verify that the rounded-up size is valid (i.e. consistent
611 	 * with the device limits and/or software-configured limits)
612 	 */
613 	if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
614 		status = IBT_HCA_CQ_EXCEEDED;
615 		goto cqresize_fail;
616 	}
617 
618 	/*
619 	 * Allocate the memory for newly resized Completion Queue.
620 	 *
621 	 * Note: Although we use the common queue allocation routine, we
622 	 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
623 	 * kernel system memory) for kernel CQs because it would be
624 	 * inefficient to have CQs located in DDR memory.  This is the same
625 	 * as we do when we first allocate completion queues primarily
626 	 * because CQs are read from (by software) more than they are written
627 	 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
628 	 * user-mappable CQs for a similar reason.)
629 	 * It is also worth noting that, unlike Hermon QP work queues,
630 	 * completion queues do not have the same strict alignment
631 	 * requirements.  It is sufficient for the CQ memory to be both
632 	 * aligned to and bound to addresses which are a multiple of CQE size.
633 	 */
634 
635 	/* first, alloc the resize_handle */
636 	resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP);
637 
638 	new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
639 	new_cqinfo.qa_alloc_align = PAGESIZE;
640 	new_cqinfo.qa_bind_align  = PAGESIZE;
641 	if (cq->cq_is_umap) {
642 		new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
643 	} else {
644 		new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
645 	}
646 	status = hermon_queue_alloc(state, &new_cqinfo, sleepflag);
647 	if (status != DDI_SUCCESS) {
648 		/* free the resize handle */
649 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
650 		status = IBT_INSUFF_RESOURCE;
651 		goto cqresize_fail;
652 	}
653 	buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
654 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
655 
656 	/*
657 	 * No initialization of the cq is needed - the command will do it
658 	 */
659 
660 	/*
661 	 * Register the memory for the CQ.  The memory for the CQ must
662 	 * be registered in the Hermon TPT tables.  This gives us the LKey
663 	 * to specify in the CQ context below.
664 	 */
665 	flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
666 	mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
667 	mr_attr.mr_len	 = new_cqinfo.qa_size;
668 	mr_attr.mr_as	 = NULL;
669 	mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
670 	op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
671 	op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
672 	op.mro_bind_override_addr = 0;
673 	status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
674 	    HERMON_CQ_CMPT);
675 	if (status != DDI_SUCCESS) {
676 		hermon_queue_free(&new_cqinfo);
677 		/* free the resize handle */
678 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
679 		status = IBT_INSUFF_RESOURCE;
680 		goto cqresize_fail;
681 	}
682 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
683 
684 	/*
685 	 * Now we grab the CQ lock.  Since we will be updating the actual
686 	 * CQ location and the producer/consumer indexes, we should hold
687 	 * the lock.
688 	 *
689 	 * We do a ARBEL_NOSLEEP here (and below), though, because we are
690 	 * holding the "cq_lock" and if we got raised to interrupt level
691 	 * by priority inversion, we would not want to block in this routine
692 	 * waiting for success.
693 	 */
694 	mutex_enter(&cq->cq_lock);
695 
696 	/*
697 	 * Fill in the CQC entry.  For the resize operation this is the
698 	 * final step before attempting the resize operation on the CQC entry.
699 	 * We use all of the information collected/calculated above to fill
700 	 * in the requisite portions of the CQC.
701 	 */
702 	bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
703 	cqc_entry.log_cq_sz	= log_cq_size;
704 	cqc_entry.pg_offs	= new_cqinfo.qa_pgoffs >> 5;
705 	cqc_entry.log2_pgsz	= mr->mr_log2_pgsz;
706 	cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
707 	cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
708 
709 	/*
710 	 * Write the CQC entry to hardware.  Lastly, we pass ownership of
711 	 * the entry to the hardware (using the Hermon RESIZE_CQ firmware
712 	 * command).  Note: In general, this operation shouldn't fail.  But
713 	 * if it does, we have to undo everything we've done above before
714 	 * returning error.  Also note that the status returned may indicate
715 	 * the code to return to the IBTF.
716 	 */
717 	status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
718 	    &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN);
719 	if (status != HERMON_CMD_SUCCESS) {
720 		/* Resize attempt has failed, drop CQ lock and cleanup */
721 		mutex_exit(&cq->cq_lock);
722 		if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
723 		    sleepflag) != DDI_SUCCESS) {
724 			HERMON_WARNING(state, "failed to deregister CQ memory");
725 		}
726 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
727 		hermon_queue_free(&new_cqinfo);
728 		if (status == HERMON_CMD_BAD_SIZE) {
729 			return (IBT_CQ_SZ_INSUFFICIENT);
730 		} else {
731 			cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: "
732 			    "%08x\n", status);
733 			if (status == HERMON_CMD_INVALID_STATUS) {
734 				hermon_fm_ereport(state, HCA_SYS_ERR,
735 				    HCA_ERR_SRV_LOST);
736 			}
737 			return (ibc_get_ci_failure(0));
738 		}
739 	}
740 
741 	/*
742 	 * For Hermon, we've alloc'd another handle structure and save off the
743 	 * important things in it. Then, in polling we check to see if there's
744 	 * a "resizing handle" and if so we look for the "special CQE", opcode
745 	 * 0x16, that indicates the transition to the new buffer.
746 	 *
747 	 * At that point, we'll adjust everything - including dereg and
748 	 * freeing of the original buffer, updating all the necessary fields
749 	 * in the cq_hdl, and setting up for the next cqe polling
750 	 */
751 
752 	resize_hdl->cq_buf 	= buf;
753 	resize_hdl->cq_bufsz	= (1 << log_cq_size);
754 	resize_hdl->cq_mrhdl	= mr;
755 	resize_hdl->cq_log_cqsz = log_cq_size;
756 
757 	bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo),
758 	    sizeof (struct hermon_qalloc_info_s));
759 
760 	/* now, save the address in the cq_handle */
761 	cq->cq_resize_hdl = resize_hdl;
762 
763 	/*
764 	 * Drop the CQ lock now.
765 	 */
766 
767 	mutex_exit(&cq->cq_lock);
768 	/*
769 	 * Fill in the return arguments (if necessary).  This includes the
770 	 * real new completion queue size.
771 	 */
772 	if (actual_size != NULL) {
773 		*actual_size = (1 << log_cq_size) - 1;
774 	}
775 
776 	return (DDI_SUCCESS);
777 
778 cqresize_fail:
779 	return (status);
780 }
781 
782 
783 /*
784  * hermon_cq_modify()
785  *    Context: Can be called base context.
786  */
787 /* ARGSUSED */
788 int
789 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq,
790     uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag)
791 {
792 	int	status;
793 	hermon_hw_cqc_t		cqc_entry;
794 
795 	mutex_enter(&cq->cq_lock);
796 	if (count != cq->cq_intmod_count ||
797 	    usec != cq->cq_intmod_usec) {
798 		bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
799 		cqc_entry.cq_max_cnt = count;
800 		cqc_entry.cq_period = usec;
801 		status = hermon_modify_cq_cmd_post(state, &cqc_entry,
802 		    cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag);
803 		if (status != HERMON_CMD_SUCCESS) {
804 			mutex_exit(&cq->cq_lock);
805 			cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ "
806 			    "command failed: %08x\n", status);
807 			if (status == HERMON_CMD_INVALID_STATUS) {
808 				hermon_fm_ereport(state, HCA_SYS_ERR,
809 				    HCA_ERR_SRV_LOST);
810 			}
811 			return (ibc_get_ci_failure(0));
812 		}
813 		cq->cq_intmod_count = count;
814 		cq->cq_intmod_usec = usec;
815 	}
816 	if (hid && (hid - 1 != cq->cq_eqnum)) {
817 		bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
818 		cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid);
819 		status = hermon_modify_cq_cmd_post(state, &cqc_entry,
820 		    cq->cq_cqnum, MODIFY_EQN, sleepflag);
821 		if (status != HERMON_CMD_SUCCESS) {
822 			mutex_exit(&cq->cq_lock);
823 			cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: "
824 			    "%08x\n", status);
825 			if (status == HERMON_CMD_INVALID_STATUS) {
826 				hermon_fm_ereport(state, HCA_SYS_ERR,
827 				    HCA_ERR_SRV_LOST);
828 			}
829 			return (ibc_get_ci_failure(0));
830 		}
831 		cq->cq_eqnum = hid - 1;
832 	}
833 	mutex_exit(&cq->cq_lock);
834 	return (DDI_SUCCESS);
835 }
836 
837 /*
838  * hermon_cq_notify()
839  *    Context: Can be called from interrupt or base context.
840  */
841 int
842 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq,
843     ibt_cq_notify_flags_t flags)
844 {
845 	uint_t	cmd;
846 	ibt_status_t status;
847 
848 	/* Validate IBT flags and call doorbell routine. */
849 	if (flags == IBT_NEXT_COMPLETION) {
850 		cmd = HERMON_CQDB_NOTIFY_CQ;
851 	} else if (flags == IBT_NEXT_SOLICITED) {
852 		cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT;
853 	} else {
854 		return (IBT_CQ_NOTIFY_TYPE_INVALID);
855 	}
856 
857 	status = hermon_cq_arm_doorbell(state, cq, cmd);
858 	return (status);
859 }
860 
861 
862 /*
863  * hermon_cq_poll()
864  *    Context: Can be called from interrupt or base context.
865  */
866 int
867 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p,
868     uint_t num_wc, uint_t *num_polled)
869 {
870 	hermon_hw_cqe_t	*cqe;
871 	uint_t		opcode;
872 	uint32_t	cons_indx, wrap_around_mask, shift, mask;
873 	uint32_t	polled_cnt, spec_op = 0;
874 	int		status;
875 
876 	/*
877 	 * Check for user-mappable CQ memory.  Note:  We do not allow kernel
878 	 * clients to poll CQ memory that is accessible directly by the user.
879 	 * If the CQ memory is user accessible, then return an error.
880 	 */
881 	if (cq->cq_is_umap) {
882 		return (IBT_CQ_HDL_INVALID);
883 	}
884 
885 	mutex_enter(&cq->cq_lock);
886 
887 	/* Get the consumer index */
888 	cons_indx = cq->cq_consindx;
889 	shift = cq->cq_log_cqsz;
890 	mask = cq->cq_bufsz;
891 
892 	/*
893 	 * Calculate the wrap around mask.  Note: This operation only works
894 	 * because all Hermon completion queues have power-of-2 sizes
895 	 */
896 	wrap_around_mask = (cq->cq_bufsz - 1);
897 
898 	/* Calculate the pointer to the first CQ entry */
899 	cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
900 
901 	/*
902 	 * Keep pulling entries from the CQ until we find an entry owned by
903 	 * the hardware.  As long as there the CQE's owned by SW, process
904 	 * each entry by calling hermon_cq_cqe_consume() and updating the CQ
905 	 * consumer index.  Note:  We only update the consumer index if
906 	 * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB.  Otherwise,
907 	 * it indicates that we are going to "recycle" the CQE (probably
908 	 * because it is a error CQE and corresponds to more than one
909 	 * completion).
910 	 */
911 	polled_cnt = 0;
912 	while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) {
913 		if (cq->cq_resize_hdl != 0) {	/* in midst of resize */
914 			/* peek at the opcode */
915 			opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
916 			if (opcode == HERMON_CQE_RCV_RESIZE_CODE) {
917 				hermon_cq_resize_helper(state, cq);
918 
919 				/* Increment the consumer index */
920 				cons_indx = (cons_indx + 1);
921 				spec_op = 1; /* plus one for the limiting CQE */
922 
923 				wrap_around_mask = (cq->cq_bufsz - 1);
924 
925 				/* Update the pointer to the next CQ entry */
926 				cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
927 
928 				continue;
929 			}
930 		}	/* in resizing CQ */
931 
932 		/*
933 		 * either resizing and not the special opcode, or
934 		 * not resizing at all
935 		 */
936 		hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]);
937 
938 		/* Increment the consumer index */
939 		cons_indx = (cons_indx + 1);
940 
941 		/* Update the pointer to the next CQ entry */
942 		cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
943 
944 		/*
945 		 * If we have run out of space to store work completions,
946 		 * then stop and return the ones we have pulled of the CQ.
947 		 */
948 		if (polled_cnt >= num_wc) {
949 			break;
950 		}
951 	}
952 
953 	/*
954 	 * Now we only ring the doorbell (to update the consumer index) if
955 	 * we've actually consumed a CQ entry.
956 	 */
957 	if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
958 		/*
959 		 * Update the consumer index in both the CQ handle and the
960 		 * doorbell record.
961 		 */
962 		cq->cq_consindx = cons_indx;
963 		hermon_cq_update_ci_doorbell(cq);
964 
965 	} else if (polled_cnt == 0) {
966 		if (spec_op != 0) {
967 			/* if we got the special opcode, update the consindx */
968 			cq->cq_consindx = cons_indx;
969 			hermon_cq_update_ci_doorbell(cq);
970 		}
971 	}
972 
973 	mutex_exit(&cq->cq_lock);
974 
975 	/* Set "num_polled" (if necessary) */
976 	if (num_polled != NULL) {
977 		*num_polled = polled_cnt;
978 	}
979 
980 	/* Set CQ_EMPTY condition if needed, otherwise return success */
981 	if (polled_cnt == 0) {
982 		status = IBT_CQ_EMPTY;
983 	} else {
984 		status = DDI_SUCCESS;
985 	}
986 
987 	/*
988 	 * Check if the system is currently panicking.  If it is, then call
989 	 * the Hermon interrupt service routine.  This step is necessary here
990 	 * because we might be in a polled I/O mode and without the call to
991 	 * hermon_isr() - and its subsequent calls to poll and rearm each
992 	 * event queue - we might overflow our EQs and render the system
993 	 * unable to sync/dump.
994 	 */
995 	if (ddi_in_panic() != 0) {
996 		(void) hermon_isr((caddr_t)state, (caddr_t)NULL);
997 	}
998 	return (status);
999 }
1000 
1001 /*
1002  *	cmd_sn must be initialized to 1 to enable proper reenabling
1003  *	by hermon_arm_cq_dbr_update().
1004  */
1005 static void
1006 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr)
1007 {
1008 	uint32_t *target;
1009 
1010 	target = (uint32_t *)cq_arm_dbr + 1;
1011 	*target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT);
1012 }
1013 
1014 
1015 /*
1016  *	User cmd_sn needs help from this kernel function to know
1017  *	when it should be incremented (modulo 4).  We do an atomic
1018  *	update of the arm_cq dbr to communicate this fact.  We retry
1019  *	in the case that user library is racing with us.  We zero
1020  *	out the cmd field so that the user library can use the cmd
1021  *	field to track the last command it issued (solicited verses any).
1022  */
1023 static void
1024 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr)
1025 {
1026 	uint32_t tmp, cmp, new;
1027 	uint32_t old_cmd_sn, new_cmd_sn;
1028 	uint32_t *target;
1029 	int retries = 0;
1030 
1031 	target = (uint32_t *)cq_arm_dbr + 1;
1032 retry:
1033 	cmp = *target;
1034 	tmp = htonl(cmp);
1035 	old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1036 	new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) &
1037 	    (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1038 	new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn);
1039 	tmp = atomic_cas_32(target, cmp, new);
1040 	if (tmp != cmp) {	/* cas failed, so need to retry */
1041 		drv_usecwait(retries & 0xff);   /* avoid race */
1042 		if (++retries > 100000) {
1043 			cmn_err(CE_CONT, "cas failed in hermon\n");
1044 			retries = 0;
1045 		}
1046 		goto retry;
1047 	}
1048 }
1049 
1050 
1051 /*
1052  * hermon_cq_handler()
1053  *    Context: Only called from interrupt context
1054  */
1055 /* ARGSUSED */
1056 int
1057 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1058     hermon_hw_eqe_t *eqe)
1059 {
1060 	hermon_cqhdl_t		cq;
1061 	uint_t			cqnum;
1062 
1063 	/* Get the CQ handle from CQ number in event descriptor */
1064 	cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1065 	cq = hermon_cqhdl_from_cqnum(state, cqnum);
1066 
1067 	/*
1068 	 * If the CQ handle is NULL, this is probably an indication
1069 	 * that the CQ has been freed already.  In which case, we
1070 	 * should not deliver this event.
1071 	 *
1072 	 * We also check that the CQ number in the handle is the
1073 	 * same as the CQ number in the event queue entry.  This
1074 	 * extra check allows us to handle the case where a CQ was
1075 	 * freed and then allocated again in the time it took to
1076 	 * handle the event queue processing.  By constantly incrementing
1077 	 * the non-constrained portion of the CQ number every time
1078 	 * a new CQ is allocated, we mitigate (somewhat) the chance
1079 	 * that a stale event could be passed to the client's CQ
1080 	 * handler.
1081 	 *
1082 	 * Lastly, we check if "hs_ibtfpriv" is NULL.  If it is then it
1083 	 * means that we've have either received this event before we
1084 	 * finished attaching to the IBTF or we've received it while we
1085 	 * are in the process of detaching.
1086 	 */
1087 	if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1088 	    (state->hs_ibtfpriv != NULL)) {
1089 		hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr);
1090 		HERMON_DO_IBTF_CQ_CALLB(state, cq);
1091 	}
1092 
1093 	return (DDI_SUCCESS);
1094 }
1095 
1096 
1097 /*
1098  * hermon_cq_err_handler()
1099  *    Context: Only called from interrupt context
1100  */
1101 /* ARGSUSED */
1102 int
1103 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1104     hermon_hw_eqe_t *eqe)
1105 {
1106 	hermon_cqhdl_t		cq;
1107 	uint_t			cqnum;
1108 	ibc_async_event_t	event;
1109 	ibt_async_code_t	type;
1110 
1111 	HERMON_FMANOTE(state, HERMON_FMA_OVERRUN);
1112 	/* Get the CQ handle from CQ number in event descriptor */
1113 	cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1114 	cq = hermon_cqhdl_from_cqnum(state, cqnum);
1115 
1116 	/*
1117 	 * If the CQ handle is NULL, this is probably an indication
1118 	 * that the CQ has been freed already.  In which case, we
1119 	 * should not deliver this event.
1120 	 *
1121 	 * We also check that the CQ number in the handle is the
1122 	 * same as the CQ number in the event queue entry.  This
1123 	 * extra check allows us to handle the case where a CQ was
1124 	 * freed and then allocated again in the time it took to
1125 	 * handle the event queue processing.  By constantly incrementing
1126 	 * the non-constrained portion of the CQ number every time
1127 	 * a new CQ is allocated, we mitigate (somewhat) the chance
1128 	 * that a stale event could be passed to the client's CQ
1129 	 * handler.
1130 	 *
1131 	 * And then we check if "hs_ibtfpriv" is NULL.  If it is then it
1132 	 * means that we've have either received this event before we
1133 	 * finished attaching to the IBTF or we've received it while we
1134 	 * are in the process of detaching.
1135 	 */
1136 	if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1137 	    (state->hs_ibtfpriv != NULL)) {
1138 		event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1139 		type		= IBT_ERROR_CQ;
1140 		HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event);
1141 	}
1142 
1143 	return (DDI_SUCCESS);
1144 }
1145 
1146 
1147 /*
1148  * hermon_cq_refcnt_inc()
1149  *    Context: Can be called from interrupt or base context.
1150  */
1151 int
1152 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special)
1153 {
1154 	/*
1155 	 * Increment the completion queue's reference count.  Note: In order
1156 	 * to ensure compliance with IBA C11-15, we must ensure that a given
1157 	 * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1158 	 * This is accomplished here by keeping track of how the referenced
1159 	 * CQ is being used.
1160 	 */
1161 	mutex_enter(&cq->cq_lock);
1162 	if (cq->cq_refcnt == 0) {
1163 		cq->cq_is_special = is_special;
1164 	} else {
1165 		if (cq->cq_is_special != is_special) {
1166 			mutex_exit(&cq->cq_lock);
1167 			return (DDI_FAILURE);
1168 		}
1169 	}
1170 	cq->cq_refcnt++;
1171 	mutex_exit(&cq->cq_lock);
1172 	return (DDI_SUCCESS);
1173 }
1174 
1175 
1176 /*
1177  * hermon_cq_refcnt_dec()
1178  *    Context: Can be called from interrupt or base context.
1179  */
1180 void
1181 hermon_cq_refcnt_dec(hermon_cqhdl_t cq)
1182 {
1183 	/* Decrement the completion queue's reference count */
1184 	mutex_enter(&cq->cq_lock);
1185 	cq->cq_refcnt--;
1186 	mutex_exit(&cq->cq_lock);
1187 }
1188 
1189 
1190 /*
1191  * hermon_cq_arm_doorbell()
1192  *    Context: Can be called from interrupt or base context.
1193  */
1194 static int
1195 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd)
1196 {
1197 	uint32_t	cq_num;
1198 	uint32_t	*target;
1199 	uint32_t	old_cmd, cmp, new, tmp, cmd_sn;
1200 	ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1201 
1202 	/* initialize the FMA retry loop */
1203 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
1204 
1205 	cq_num = cq->cq_cqnum;
1206 	target = (uint32_t *)cq->cq_arm_ci_vdbr + 1;
1207 
1208 	/* the FMA retry loop starts for Hermon doorbell register. */
1209 	hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1210 	    fm_test_num);
1211 retry:
1212 	cmp = *target;
1213 	tmp = htonl(cmp);
1214 	old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT);
1215 	cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1216 	if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) {
1217 		if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1218 			cmd_sn |= (HERMON_CQDB_NOTIFY_CQ <<
1219 			    HERMON_CQDB_CMD_SHIFT);
1220 			new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1221 			tmp = atomic_cas_32(target, cmp, new);
1222 			if (tmp != cmp)
1223 				goto retry;
1224 			HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1225 			    &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1226 			    32) | (cq->cq_consindx & 0xFFFFFF));
1227 		} /* else it's already armed */
1228 	} else {
1229 		ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT);
1230 		if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1231 		    old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1232 			cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT <<
1233 			    HERMON_CQDB_CMD_SHIFT);
1234 			new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1235 			tmp = atomic_cas_32(target, cmp, new);
1236 			if (tmp != cmp)
1237 				goto retry;
1238 			HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1239 			    &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1240 			    32) | (cq->cq_consindx & 0xFFFFFF));
1241 		} /* else it's already armed */
1242 	}
1243 
1244 	/* the FMA retry loop ends. */
1245 	hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1246 	    fm_test_num);
1247 
1248 	return (IBT_SUCCESS);
1249 
1250 pio_error:
1251 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1252 	return (ibc_get_ci_failure(0));
1253 }
1254 
1255 
1256 /*
1257  * hermon_cqhdl_from_cqnum()
1258  *    Context: Can be called from interrupt or base context.
1259  *
1260  *    This routine is important because changing the unconstrained
1261  *    portion of the CQ number is critical to the detection of a
1262  *    potential race condition in the CQ handler code (i.e. the case
1263  *    where a CQ is freed and alloc'd again before an event for the
1264  *    "old" CQ can be handled).
1265  *
1266  *    While this is not a perfect solution (not sure that one exists)
1267  *    it does help to mitigate the chance that this race condition will
1268  *    cause us to deliver a "stale" event to the new CQ owner.  Note:
1269  *    this solution does not scale well because the number of constrained
1270  *    bits increases (and, hence, the number of unconstrained bits
1271  *    decreases) as the number of supported CQs grows.  For small and
1272  *    intermediate values, it should hopefully provide sufficient
1273  *    protection.
1274  */
1275 hermon_cqhdl_t
1276 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum)
1277 {
1278 	uint_t	cqindx, cqmask;
1279 
1280 	/* Calculate the CQ table index from the cqnum */
1281 	cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1;
1282 	cqindx = cqnum & cqmask;
1283 	return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx));
1284 }
1285 
1286 /*
1287  * hermon_cq_cqe_consume()
1288  *    Context: Can be called from interrupt or base context.
1289  */
1290 static void
1291 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1292     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1293 {
1294 	uint_t		opcode, qpnum, qp1_indx;
1295 	ibt_wc_flags_t	flags;
1296 	ibt_wrc_opcode_t type;
1297 
1298 	/*
1299 	 * Determine if this is an "error" CQE by examining "opcode".  If it
1300 	 * is an error CQE, then call hermon_cq_errcqe_consume() and return
1301 	 * whatever status it returns.  Otherwise, this is a successful
1302 	 * completion.
1303 	 */
1304 	opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
1305 	if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) ||
1306 	    (opcode == HERMON_CQE_RECV_ERR_OPCODE)) {
1307 		hermon_cq_errcqe_consume(state, cq, cqe, wc);
1308 		return;
1309 	}
1310 
1311 	/*
1312 	 * Fetch the Work Request ID using the information in the CQE.
1313 	 * See hermon_wr.c for more details.
1314 	 */
1315 	wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1316 
1317 	/*
1318 	 * Parse the CQE opcode to determine completion type.  This will set
1319 	 * not only the type of the completion, but also any flags that might
1320 	 * be associated with it (e.g. whether immediate data is present).
1321 	 */
1322 	flags = IBT_WC_NO_FLAGS;
1323 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->hs_fcoib_may_be_running))
1324 	if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) {
1325 
1326 		/* Send CQE */
1327 		switch (opcode) {
1328 		case HERMON_CQE_SND_RDMAWR_IMM:
1329 		case HERMON_CQE_SND_RDMAWR:
1330 			type = IBT_WRC_RDMAW;
1331 			break;
1332 
1333 		case HERMON_CQE_SND_SEND_INV:
1334 		case HERMON_CQE_SND_SEND_IMM:
1335 		case HERMON_CQE_SND_SEND:
1336 			type = IBT_WRC_SEND;
1337 			break;
1338 
1339 		case HERMON_CQE_SND_LSO:
1340 			type = IBT_WRC_SEND_LSO;
1341 			break;
1342 
1343 		case HERMON_CQE_SND_RDMARD:
1344 			type = IBT_WRC_RDMAR;
1345 			break;
1346 
1347 		case HERMON_CQE_SND_ATOMIC_CS:
1348 			type = IBT_WRC_CSWAP;
1349 			break;
1350 
1351 		case HERMON_CQE_SND_ATOMIC_FA:
1352 			type = IBT_WRC_FADD;
1353 			break;
1354 
1355 		case HERMON_CQE_SND_BIND_MW:
1356 			type = IBT_WRC_BIND;
1357 			break;
1358 
1359 		case HERMON_CQE_SND_FRWR:
1360 			type = IBT_WRC_FAST_REG_PMR;
1361 			break;
1362 
1363 		case HERMON_CQE_SND_LCL_INV:
1364 			type = IBT_WRC_LOCAL_INVALIDATE;
1365 			break;
1366 
1367 		default:
1368 			HERMON_WARNING(state, "unknown send CQE type");
1369 			wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1370 			return;
1371 		}
1372 	} else if ((state->hs_fcoib_may_be_running == B_TRUE) &&
1373 	    hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) {
1374 		type = IBT_WRC_RECV;
1375 		if (HERMON_CQE_FEXCH_DIFE(cq, cqe))
1376 			flags |= IBT_WC_DIF_ERROR;
1377 		wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1378 		wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe);
1379 		wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe);
1380 		wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe);
1381 		wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe);
1382 		wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) &
1383 		    IBT_WC_DETAIL_FC_MATCH_MASK;
1384 		wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1385 		flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED;
1386 	} else {
1387 		/*
1388 		 * Parse the remaining contents of the CQE into the work
1389 		 * completion.  This means filling in SL, QP number, SLID,
1390 		 * immediate data, etc.
1391 		 *
1392 		 * Note: Not all of these fields are valid in a given
1393 		 * completion.  Many of them depend on the actual type of
1394 		 * completion.  So we fill in all of the fields and leave
1395 		 * it up to the IBTF and consumer to sort out which are
1396 		 * valid based on their context.
1397 		 */
1398 		wc->wc_sl	  = HERMON_CQE_SL_GET(cq, cqe);
1399 		wc->wc_qpn	  = HERMON_CQE_DQPN_GET(cq, cqe);
1400 		wc->wc_slid	  = HERMON_CQE_DLID_GET(cq, cqe);
1401 		wc->wc_immed_data =
1402 		    HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1403 		wc->wc_ethertype  = (wc->wc_immed_data & 0xFFFF);
1404 		wc->wc_pkey_ix	  = (wc->wc_immed_data &
1405 		    ((1 << state->hs_queryport.log_max_pkey) - 1));
1406 		/*
1407 		 * Fill in "bytes transferred" as appropriate.  Also,
1408 		 * if necessary, fill in the "path bits" field.
1409 		 */
1410 		wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe);
1411 		wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1412 
1413 		/*
1414 		 * Check for GRH, update the flags, then fill in "wc_flags"
1415 		 * field in the work completion
1416 		 */
1417 		if (HERMON_CQE_GRH_GET(cq, cqe) != 0) {
1418 			flags |= IBT_WC_GRH_PRESENT;
1419 		}
1420 
1421 		/* Receive CQE */
1422 		switch (opcode) {
1423 		case HERMON_CQE_RCV_SEND_IMM:
1424 			/*
1425 			 * Note:  According to the PRM, all QP1 recv
1426 			 * completions look like the result of a Send with
1427 			 * Immediate.  They are not, however, (MADs are Send
1428 			 * Only) so we need to check the QP number and set
1429 			 * the flag only if it is non-QP1.
1430 			 */
1431 			qpnum	 = HERMON_CQE_QPNUM_GET(cq, cqe);
1432 			qp1_indx = state->hs_spec_qp1->hr_indx;
1433 			if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1434 				flags |= IBT_WC_IMMED_DATA_PRESENT;
1435 			}
1436 			/* FALLTHROUGH */
1437 
1438 		case HERMON_CQE_RCV_SEND:
1439 			type = IBT_WRC_RECV;
1440 			if (HERMON_CQE_IS_IPOK(cq, cqe)) {
1441 				wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe);
1442 				flags |= IBT_WC_CKSUM_OK;
1443 				wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK &
1444 				    HERMON_CQE_IPOIB_STATUS(cq, cqe);
1445 			}
1446 			break;
1447 
1448 		case HERMON_CQE_RCV_SEND_INV:
1449 			type = IBT_WRC_RECV;
1450 			flags |= IBT_WC_RKEY_INVALIDATED;
1451 			wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */
1452 			break;
1453 
1454 		case HERMON_CQE_RCV_RDMAWR_IMM:
1455 			flags |= IBT_WC_IMMED_DATA_PRESENT;
1456 			type = IBT_WRC_RECV_RDMAWI;
1457 			break;
1458 
1459 		default:
1460 
1461 			HERMON_WARNING(state, "unknown recv CQE type");
1462 			wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1463 			return;
1464 		}
1465 	}
1466 	wc->wc_type = type;
1467 	wc->wc_flags = flags;
1468 	wc->wc_status = IBT_WC_SUCCESS;
1469 }
1470 
1471 /*
1472  * hermon_cq_errcqe_consume()
1473  *    Context: Can be called from interrupt or base context.
1474  */
1475 static void
1476 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1477     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1478 {
1479 	uint32_t		imm_eth_pkey_cred;
1480 	uint_t			status;
1481 	ibt_wc_status_t		ibt_status;
1482 
1483 	/*
1484 	 * Fetch the Work Request ID using the information in the CQE.
1485 	 * See hermon_wr.c for more details.
1486 	 */
1487 	wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1488 
1489 	/*
1490 	 * Parse the CQE opcode to determine completion type.  We know that
1491 	 * the CQE is an error completion, so we extract only the completion
1492 	 * status/syndrome here.
1493 	 */
1494 	imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe);
1495 	status = imm_eth_pkey_cred;
1496 	if (status != HERMON_CQE_WR_FLUSHED_ERR)
1497 		IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x  "
1498 		    "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe),
1499 		    HERMON_CQE_WQECNTR_GET(cq, cqe), status,
1500 		    HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe));
1501 	switch (status) {
1502 	case HERMON_CQE_LOC_LEN_ERR:
1503 		HERMON_WARNING(state, HERMON_FMA_LOCLEN);
1504 		ibt_status = IBT_WC_LOCAL_LEN_ERR;
1505 		break;
1506 
1507 	case HERMON_CQE_LOC_OP_ERR:
1508 		HERMON_WARNING(state, HERMON_FMA_LOCQPOP);
1509 		ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1510 		break;
1511 
1512 	case HERMON_CQE_LOC_PROT_ERR:
1513 		HERMON_WARNING(state, HERMON_FMA_LOCPROT);
1514 		ibt_status = IBT_WC_LOCAL_PROTECT_ERR;
1515 		IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe);
1516 		if (hermon_should_panic) {
1517 			cmn_err(CE_PANIC, "Hermon intentional PANIC - "
1518 			    "Local Protection Error\n");
1519 		}
1520 		break;
1521 
1522 	case HERMON_CQE_WR_FLUSHED_ERR:
1523 		ibt_status = IBT_WC_WR_FLUSHED_ERR;
1524 		break;
1525 
1526 	case HERMON_CQE_MW_BIND_ERR:
1527 		HERMON_WARNING(state, HERMON_FMA_MWBIND);
1528 		ibt_status = IBT_WC_MEM_WIN_BIND_ERR;
1529 		break;
1530 
1531 	case HERMON_CQE_BAD_RESPONSE_ERR:
1532 		HERMON_WARNING(state, HERMON_FMA_RESP);
1533 		ibt_status = IBT_WC_BAD_RESPONSE_ERR;
1534 		break;
1535 
1536 	case HERMON_CQE_LOCAL_ACCESS_ERR:
1537 		HERMON_WARNING(state, HERMON_FMA_LOCACC);
1538 		ibt_status = IBT_WC_LOCAL_ACCESS_ERR;
1539 		break;
1540 
1541 	case HERMON_CQE_REM_INV_REQ_ERR:
1542 		HERMON_WARNING(state, HERMON_FMA_REMREQ);
1543 		ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1544 		break;
1545 
1546 	case HERMON_CQE_REM_ACC_ERR:
1547 		HERMON_WARNING(state, HERMON_FMA_REMACC);
1548 		ibt_status = IBT_WC_REMOTE_ACCESS_ERR;
1549 		break;
1550 
1551 	case HERMON_CQE_REM_OP_ERR:
1552 		HERMON_WARNING(state, HERMON_FMA_REMOP);
1553 		ibt_status = IBT_WC_REMOTE_OP_ERR;
1554 		break;
1555 
1556 	case HERMON_CQE_TRANS_TO_ERR:
1557 		HERMON_WARNING(state, HERMON_FMA_XPORTCNT);
1558 		ibt_status = IBT_WC_TRANS_TIMEOUT_ERR;
1559 		break;
1560 
1561 	case HERMON_CQE_RNRNAK_TO_ERR:
1562 		HERMON_WARNING(state, HERMON_FMA_RNRCNT);
1563 		ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1564 		break;
1565 
1566 	/*
1567 	 * The following error codes are not supported in the Hermon driver
1568 	 * as they relate only to Reliable Datagram completion statuses:
1569 	 *    case HERMON_CQE_LOCAL_RDD_VIO_ERR:
1570 	 *    case HERMON_CQE_REM_INV_RD_REQ_ERR:
1571 	 *    case HERMON_CQE_EEC_REM_ABORTED_ERR:
1572 	 *    case HERMON_CQE_INV_EEC_NUM_ERR:
1573 	 *    case HERMON_CQE_INV_EEC_STATE_ERR:
1574 	 *    case HERMON_CQE_LOC_EEC_ERR:
1575 	 */
1576 
1577 	default:
1578 		HERMON_WARNING(state, "unknown error CQE status");
1579 		HERMON_FMANOTE(state, HERMON_FMA_UNKN);
1580 		ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1581 		break;
1582 	}
1583 
1584 	wc->wc_status = ibt_status;
1585 }
1586 
1587 
1588 /*
1589  * hermon_cq_resize_helper()
1590  *    Context: Can be called only from user or kernel context.
1591  */
1592 void
1593 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq)
1594 {
1595 	hermon_cqhdl_t 		resize_hdl;
1596 	int			status;
1597 
1598 	/*
1599 	 * we're here because we found the special cqe opcode, so we have
1600 	 * to update the cq_handle, release the old resources, clear the
1601 	 * flag in the cq_hdl, and release the resize_hdl.  When we return
1602 	 * above, it will take care of the rest
1603 	 */
1604 	ASSERT(MUTEX_HELD(&cq->cq_lock));
1605 
1606 	resize_hdl = cq->cq_resize_hdl;
1607 
1608 	/*
1609 	 * Deregister the memory for the old Completion Queue.  Note: We
1610 	 * really can't return error here because we have no good way to
1611 	 * cleanup.  Plus, the deregistration really shouldn't ever happen.
1612 	 * So, if it does, it is an indication that something has gone
1613 	 * seriously wrong.  So we print a warning message and return error
1614 	 * (knowing, of course, that the "old" CQ memory will be leaked)
1615 	 */
1616 	status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL,
1617 	    HERMON_SLEEP);
1618 	if (status != DDI_SUCCESS) {
1619 		HERMON_WARNING(state, "failed to deregister old CQ memory");
1620 	}
1621 
1622 	/* Next, free the memory from the old CQ buffer */
1623 	hermon_queue_free(&cq->cq_cqinfo);
1624 
1625 	/* now we can update the cq_hdl with the new things saved */
1626 
1627 	cq->cq_buf   = resize_hdl->cq_buf;
1628 	cq->cq_mrhdl = resize_hdl->cq_mrhdl;
1629 	cq->cq_bufsz = resize_hdl->cq_bufsz;
1630 	cq->cq_log_cqsz = resize_hdl->cq_log_cqsz;
1631 	cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp;
1632 	cq->cq_resize_hdl = 0;
1633 	bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo,
1634 	    sizeof (struct hermon_qalloc_info_s));
1635 
1636 	/* finally, release the resizing handle */
1637 	kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
1638 }
1639 
1640 
1641 /*
1642  * hermon_cq_entries_flush()
1643  * Context: Can be called from interrupt or base context.
1644  */
1645 /* ARGSUSED */
1646 void
1647 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp)
1648 {
1649 	hermon_cqhdl_t		cq;
1650 	hermon_hw_cqe_t		*cqe, *next_cqe;
1651 	hermon_srqhdl_t		srq;
1652 	hermon_workq_hdr_t	*wq;
1653 	uint32_t		cons_indx, tail_cons_indx, wrap_around_mask;
1654 	uint32_t		new_indx, check_indx, qpnum;
1655 	uint32_t		shift, mask;
1656 	int			outstanding_cqes;
1657 
1658 	qpnum = qp->qp_qpnum;
1659 	if ((srq = qp->qp_srqhdl) != NULL)
1660 		wq = qp->qp_srqhdl->srq_wq_wqhdr;
1661 	else
1662 		wq = NULL;
1663 	cq = qp->qp_rq_cqhdl;
1664 
1665 	if (cq == NULL) {
1666 		cq = qp->qp_sq_cqhdl;
1667 	}
1668 
1669 do_send_cq:	/* loop back to here if send_cq is not the same as recv_cq */
1670 	if (cq == NULL)
1671 		return;
1672 
1673 	cons_indx = cq->cq_consindx;
1674 	shift = cq->cq_log_cqsz;
1675 	mask = cq->cq_bufsz;
1676 	wrap_around_mask = mask - 1;
1677 
1678 	/* Calculate the pointer to the first CQ entry */
1679 	cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
1680 
1681 	/*
1682 	 * Loop through the CQ looking for entries owned by software.  If an
1683 	 * entry is owned by software then we increment an 'outstanding_cqes'
1684 	 * count to know how many entries total we have on our CQ.  We use this
1685 	 * value further down to know how many entries to loop through looking
1686 	 * for our same QP number.
1687 	 */
1688 	outstanding_cqes = 0;
1689 	tail_cons_indx = cons_indx;
1690 	while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) {
1691 		/* increment total cqes count */
1692 		outstanding_cqes++;
1693 
1694 		/* increment the consumer index */
1695 		tail_cons_indx++;
1696 
1697 		/* update the pointer to the next cq entry */
1698 		cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask];
1699 	}
1700 
1701 	/*
1702 	 * Using the 'tail_cons_indx' that was just set, we now know how many
1703 	 * total CQEs possible there are.  Set the 'check_indx' and the
1704 	 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1705 	 */
1706 	check_indx = new_indx = (tail_cons_indx - 1);
1707 
1708 	while (--outstanding_cqes >= 0) {
1709 		cqe = &cq->cq_buf[check_indx & wrap_around_mask];
1710 
1711 		/*
1712 		 * If the QP number is the same in the CQE as the QP, then
1713 		 * we must "consume" it.  If it is for an SRQ wqe, then we
1714 		 * also must free the wqe back onto the free list of the SRQ.
1715 		 */
1716 		if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) {
1717 			if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) ==
1718 			    HERMON_COMPLETION_RECV)) {
1719 				uint64_t *desc;
1720 				int indx;
1721 
1722 				/* Add wqe back to SRQ free list */
1723 				indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) &
1724 				    wq->wq_mask;
1725 				desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
1726 				((uint16_t *)desc)[1] = htons(indx);
1727 				wq->wq_tail = indx;
1728 			}
1729 		} else {	/* CQEs for other QPNs need to remain */
1730 			if (check_indx != new_indx) {
1731 				next_cqe =
1732 				    &cq->cq_buf[new_indx & wrap_around_mask];
1733 				/* Copy the CQE into the "next_cqe" pointer. */
1734 				bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t));
1735 			}
1736 			new_indx--;	/* move index to next CQE to fill */
1737 		}
1738 		check_indx--;		/* move index to next CQE to check */
1739 	}
1740 
1741 	/*
1742 	 * Update consumer index to be the 'new_indx'.  This moves it past all
1743 	 * removed entries.  Because 'new_indx' is pointing to the last
1744 	 * previously valid SW owned entry, we add 1 to point the cons_indx to
1745 	 * the first HW owned entry.
1746 	 */
1747 	cons_indx = (new_indx + 1);
1748 
1749 	/*
1750 	 * Now we only ring the doorbell (to update the consumer index) if
1751 	 * we've actually consumed a CQ entry.  If we found no QP number
1752 	 * matches above, then we would not have removed anything.  So only if
1753 	 * something was removed do we ring the doorbell.
1754 	 */
1755 	if (cq->cq_consindx != cons_indx) {
1756 		/*
1757 		 * Update the consumer index in both the CQ handle and the
1758 		 * doorbell record.
1759 		 */
1760 		cq->cq_consindx = cons_indx;
1761 
1762 		hermon_cq_update_ci_doorbell(cq);
1763 
1764 	}
1765 	if (cq != qp->qp_sq_cqhdl) {
1766 		cq = qp->qp_sq_cqhdl;
1767 		goto do_send_cq;
1768 	}
1769 }
1770 
1771 /*
1772  * hermon_get_cq_sched_list()
1773  *    Context: Only called from attach() path context
1774  *
1775  * Read properties, creating entries in hs_cq_sched_list with
1776  * information about the requested "expected" and "minimum"
1777  * number of MSI-X interrupt vectors per list entry.
1778  */
1779 static int
1780 hermon_get_cq_sched_list(hermon_state_t *state)
1781 {
1782 	char **listp, ulp_prop[HERMON_CQH_MAX + 4];
1783 	uint_t nlist, i, j, ndata;
1784 	int *data;
1785 	size_t len;
1786 	hermon_cq_sched_t *cq_schedp;
1787 
1788 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip,
1789 	    DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) !=
1790 	    DDI_PROP_SUCCESS)
1791 		return (0);
1792 
1793 	state->hs_cq_sched_array_size = nlist;
1794 	state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist *
1795 	    sizeof (hermon_cq_sched_t), KM_SLEEP);
1796 	for (i = 0; i < nlist; i++) {
1797 		if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) {
1798 			cmn_err(CE_CONT, "'cqh' property name too long\n");
1799 			goto game_over;
1800 		}
1801 		for (j = 0; j < i; j++) {
1802 			if (strcmp(listp[j], listp[i]) == 0) {
1803 				cmn_err(CE_CONT, "Duplicate 'cqh' property\n");
1804 				goto game_over;
1805 			}
1806 		}
1807 		(void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX);
1808 		ulp_prop[0] = 'c';
1809 		ulp_prop[1] = 'q';
1810 		ulp_prop[2] = 'h';
1811 		ulp_prop[3] = '-';
1812 		(void) strncpy(ulp_prop + 4, listp[i], len + 1);
1813 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1814 		    DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) !=
1815 		    DDI_PROP_SUCCESS) {
1816 			cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop);
1817 			goto game_over;
1818 		}
1819 		if (ndata != 2) {
1820 			cmn_err(CE_CONT, "property '%s' does not "
1821 			    "have 2 integers\n", ulp_prop);
1822 			goto game_over_free_data;
1823 		}
1824 		cq_schedp[i].cqs_desired = data[0];
1825 		cq_schedp[i].cqs_minimum = data[1];
1826 		cq_schedp[i].cqs_refcnt = 0;
1827 		ddi_prop_free(data);
1828 	}
1829 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1830 	    DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) !=
1831 	    DDI_PROP_SUCCESS) {
1832 		cmn_err(CE_CONT, "property 'cqh-default' not found\n");
1833 		goto game_over;
1834 	}
1835 	if (ndata != 2) {
1836 		cmn_err(CE_CONT, "property 'cqh-default' does not "
1837 		    "have 2 integers\n");
1838 		goto game_over_free_data;
1839 	}
1840 	cq_schedp = &state->hs_cq_sched_default;
1841 	cq_schedp->cqs_desired = data[0];
1842 	cq_schedp->cqs_minimum = data[1];
1843 	cq_schedp->cqs_refcnt = 0;
1844 	ddi_prop_free(data);
1845 	ddi_prop_free(listp);
1846 	return (1);		/* game on */
1847 
1848 game_over_free_data:
1849 	ddi_prop_free(data);
1850 game_over:
1851 	cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n");
1852 	cmn_err(CE_CONT, "completion handler groups not being used\n");
1853 	kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t));
1854 	state->hs_cq_sched_array_size = 0;
1855 	ddi_prop_free(listp);
1856 	return (0);
1857 }
1858 
1859 /*
1860  * hermon_cq_sched_init()
1861  *    Context: Only called from attach() path context
1862  *
1863  * Read the hermon.conf properties looking for cq_sched info,
1864  * creating reserved pools of MSI-X interrupt ranges for the
1865  * specified ULPs.
1866  */
1867 int
1868 hermon_cq_sched_init(hermon_state_t *state)
1869 {
1870 	hermon_cq_sched_t *cq_schedp, *defp;
1871 	int i, desired, array_size;
1872 
1873 	mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER,
1874 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
1875 
1876 	mutex_enter(&state->hs_cq_sched_lock);
1877 	state->hs_cq_sched_array = NULL;
1878 
1879 	/* initialize cq_sched_default */
1880 	defp = &state->hs_cq_sched_default;
1881 	defp->cqs_start_hid = 1;
1882 	defp->cqs_len = state->hs_intrmsi_allocd;
1883 	defp->cqs_next_alloc = defp->cqs_len - 1;
1884 	(void) strncpy(defp->cqs_name, "default", 8);
1885 
1886 	/* Read properties to determine which ULPs use cq_sched */
1887 	if (hermon_get_cq_sched_list(state) == 0)
1888 		goto done;
1889 
1890 	/* Determine if we have enough vectors, or if we have to scale down */
1891 	desired = defp->cqs_desired;	/* default desired (from hermon.conf) */
1892 	if (desired <= 0)
1893 		goto done;		/* all interrupts in the default pool */
1894 	cq_schedp = state->hs_cq_sched_array;
1895 	array_size = state->hs_cq_sched_array_size;
1896 	for (i = 0; i < array_size; i++)
1897 		desired += cq_schedp[i].cqs_desired;
1898 	if (desired > state->hs_intrmsi_allocd) {
1899 		cmn_err(CE_CONT, "#interrupts allocated (%d) is less than "
1900 		    "the #interrupts desired (%d)\n",
1901 		    state->hs_intrmsi_allocd, desired);
1902 		cmn_err(CE_CONT, "completion handler groups not being used\n");
1903 		goto done;		/* all interrupts in the default pool */
1904 	}
1905 	/* Game on.  For each cq_sched group, reserve the MSI-X range */
1906 	for (i = 0; i < array_size; i++) {
1907 		desired = cq_schedp[i].cqs_desired;
1908 		cq_schedp[i].cqs_start_hid = defp->cqs_start_hid;
1909 		cq_schedp[i].cqs_len = desired;
1910 		cq_schedp[i].cqs_next_alloc = desired - 1;
1911 		defp->cqs_len -= desired;
1912 		defp->cqs_start_hid += desired;
1913 	}
1914 	/* reset default's start allocation seed */
1915 	state->hs_cq_sched_default.cqs_next_alloc =
1916 	    state->hs_cq_sched_default.cqs_len - 1;
1917 
1918 done:
1919 	mutex_exit(&state->hs_cq_sched_lock);
1920 	return (IBT_SUCCESS);
1921 }
1922 
1923 void
1924 hermon_cq_sched_fini(hermon_state_t *state)
1925 {
1926 	mutex_enter(&state->hs_cq_sched_lock);
1927 	if (state->hs_cq_sched_array_size) {
1928 		kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) *
1929 		    state->hs_cq_sched_array_size);
1930 		state->hs_cq_sched_array_size = 0;
1931 		state->hs_cq_sched_array = NULL;
1932 	}
1933 	mutex_exit(&state->hs_cq_sched_lock);
1934 	mutex_destroy(&state->hs_cq_sched_lock);
1935 }
1936 
1937 int
1938 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr,
1939     hermon_cq_sched_t **cq_sched_pp)
1940 {
1941 	hermon_cq_sched_t	*cq_schedp;
1942 	int			i;
1943 	char			*name;
1944 	ibt_cq_sched_flags_t	flags;
1945 
1946 	flags = attr->cqs_flags;
1947 	if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) {
1948 		*cq_sched_pp = NULL;
1949 		return (IBT_SUCCESS);
1950 	}
1951 	name = attr->cqs_pool_name;
1952 
1953 	mutex_enter(&state->hs_cq_sched_lock);
1954 	cq_schedp = state->hs_cq_sched_array;
1955 	for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) {
1956 		if (strcmp(name, cq_schedp->cqs_name) == 0) {
1957 			if (cq_schedp->cqs_len != 0)
1958 				cq_schedp->cqs_refcnt++;
1959 			break;	/* found it */
1960 		}
1961 	}
1962 	if ((i == state->hs_cq_sched_array_size) ||	/* not found, or */
1963 	    (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */
1964 		cq_schedp = NULL;
1965 	mutex_exit(&state->hs_cq_sched_lock);
1966 
1967 	*cq_sched_pp = cq_schedp;	/* set to valid hdl, or to NULL */
1968 	if ((cq_schedp == NULL) &&
1969 	    (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP))
1970 		return (IBT_CQ_NO_SCHED_GROUP);
1971 	else
1972 		return (IBT_SUCCESS);
1973 }
1974 
1975 int
1976 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp)
1977 {
1978 	if (cq_schedp != NULL) {
1979 		/* Just decrement refcnt */
1980 		mutex_enter(&state->hs_cq_sched_lock);
1981 		if (cq_schedp->cqs_refcnt == 0)
1982 			HERMON_WARNING(state, "cq_sched free underflow\n");
1983 		else
1984 			cq_schedp->cqs_refcnt--;
1985 		mutex_exit(&state->hs_cq_sched_lock);
1986 	}
1987 	return (IBT_SUCCESS);
1988 }
1989