xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/hermon/hermon_cq.c (revision fcdb3229a31dd4ff700c69238814e326aad49098)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * hermon_cq.c
28  *    Hermon Completion Queue Processing Routines
29  *
30  *    Implements all the routines necessary for allocating, freeing, resizing,
31  *    and handling the completion type events that the Hermon hardware can
32  *    generate.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/sysmacros.h>
42 
43 #include <sys/ib/adapters/hermon/hermon.h>
44 
45 int hermon_should_panic = 0;	/* debugging aid */
46 
47 #define	hermon_cq_update_ci_doorbell(cq)				\
48 	/* Build the doorbell record data (low 24 bits only) */		\
49 	HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr,			\
50 	    cq->cq_consindx & 0x00FFFFFF)
51 
52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq,
53     uint_t cmd);
54 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr);
55 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
56     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
57 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
58     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
59 
60 
61 /*
62  * hermon_cq_alloc()
63  *    Context: Can be called only from user or kernel context.
64  */
65 int
hermon_cq_alloc(hermon_state_t * state,ibt_cq_hdl_t ibt_cqhdl,ibt_cq_attr_t * cq_attr,uint_t * actual_size,hermon_cqhdl_t * cqhdl,uint_t sleepflag)66 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
67     ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl,
68     uint_t sleepflag)
69 {
70 	hermon_rsrc_t		*cqc, *rsrc;
71 	hermon_umap_db_entry_t	*umapdb;
72 	hermon_hw_cqc_t		cqc_entry;
73 	hermon_cqhdl_t		cq;
74 	ibt_mr_attr_t		mr_attr;
75 	hermon_mr_options_t	op;
76 	hermon_pdhdl_t		pd;
77 	hermon_mrhdl_t		mr;
78 	hermon_hw_cqe_t		*buf;
79 	uint64_t		value;
80 	uint32_t		log_cq_size, uarpg;
81 	uint_t			cq_is_umap;
82 	uint32_t		status, flag;
83 	hermon_cq_sched_t	*cq_schedp;
84 
85 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr))
86 
87 	/*
88 	 * Determine whether CQ is being allocated for userland access or
89 	 * whether it is being allocated for kernel access.  If the CQ is
90 	 * being allocated for userland access, then lookup the UAR
91 	 * page number for the current process.  Note:  If this is not found
92 	 * (e.g. if the process has not previously open()'d the Hermon driver),
93 	 * then an error is returned.
94 	 */
95 	cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
96 	if (cq_is_umap) {
97 		status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
98 		    MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
99 		if (status != DDI_SUCCESS) {
100 			status = IBT_INVALID_PARAM;
101 			goto cqalloc_fail;
102 		}
103 		uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
104 	} else {
105 		uarpg = state->hs_kernel_uar_index;
106 	}
107 
108 	/* Use the internal protection domain (PD) for setting up CQs */
109 	pd = state->hs_pdhdl_internal;
110 
111 	/* Increment the reference count on the protection domain (PD) */
112 	hermon_pd_refcnt_inc(pd);
113 
114 	/*
115 	 * Allocate an CQ context entry.  This will be filled in with all
116 	 * the necessary parameters to define the Completion Queue.  And then
117 	 * ownership will be passed to the hardware in the final step
118 	 * below.  If we fail here, we must undo the protection domain
119 	 * reference count.
120 	 */
121 	status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc);
122 	if (status != DDI_SUCCESS) {
123 		status = IBT_INSUFF_RESOURCE;
124 		goto cqalloc_fail1;
125 	}
126 
127 	/*
128 	 * Allocate the software structure for tracking the completion queue
129 	 * (i.e. the Hermon Completion Queue handle).  If we fail here, we must
130 	 * undo the protection domain reference count and the previous
131 	 * resource allocation.
132 	 */
133 	status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc);
134 	if (status != DDI_SUCCESS) {
135 		status = IBT_INSUFF_RESOURCE;
136 		goto cqalloc_fail2;
137 	}
138 	cq = (hermon_cqhdl_t)rsrc->hr_addr;
139 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
140 	cq->cq_is_umap = cq_is_umap;
141 	cq->cq_cqnum = cqc->hr_indx;	/* just use index, implicit in Hermon */
142 	cq->cq_intmod_count = 0;
143 	cq->cq_intmod_usec = 0;
144 
145 	/*
146 	 * If this will be a user-mappable CQ, then allocate an entry for
147 	 * the "userland resources database".  This will later be added to
148 	 * the database (after all further CQ operations are successful).
149 	 * If we fail here, we must undo the reference counts and the
150 	 * previous resource allocation.
151 	 */
152 	if (cq->cq_is_umap) {
153 		umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum,
154 		    MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
155 		if (umapdb == NULL) {
156 			status = IBT_INSUFF_RESOURCE;
157 			goto cqalloc_fail3;
158 		}
159 	}
160 
161 
162 	/*
163 	 * Allocate the doorbell record.  We'll need one for the CQ, handling
164 	 * both consumer index (SET CI) and the CQ state (CQ ARM).
165 	 */
166 
167 	status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl,
168 	    &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset);
169 	if (status != DDI_SUCCESS) {
170 		status = IBT_INSUFF_RESOURCE;
171 		goto cqalloc_fail4;
172 	}
173 
174 	/*
175 	 * Calculate the appropriate size for the completion queue.
176 	 * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
177 	 * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
178 	 * to round the requested size up to the next highest power-of-2
179 	 */
180 	cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE);
181 	log_cq_size = highbit(cq_attr->cq_size);
182 
183 	/*
184 	 * Next we verify that the rounded-up size is valid (i.e. consistent
185 	 * with the device limits and/or software-configured limits)
186 	 */
187 	if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
188 		status = IBT_HCA_CQ_EXCEEDED;
189 		goto cqalloc_fail4a;
190 	}
191 
192 	/*
193 	 * Allocate the memory for Completion Queue.
194 	 *
195 	 * Note: Although we use the common queue allocation routine, we
196 	 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
197 	 * kernel system memory) for kernel CQs because it would be
198 	 * inefficient to have CQs located in DDR memory.  This is primarily
199 	 * because CQs are read from (by software) more than they are written
200 	 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
201 	 * user-mappable CQs for a similar reason.)
202 	 * It is also worth noting that, unlike Hermon QP work queues,
203 	 * completion queues do not have the same strict alignment
204 	 * requirements.  It is sufficient for the CQ memory to be both
205 	 * aligned to and bound to addresses which are a multiple of CQE size.
206 	 */
207 	cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
208 
209 	cq->cq_cqinfo.qa_alloc_align = PAGESIZE;
210 	cq->cq_cqinfo.qa_bind_align  = PAGESIZE;
211 	if (cq->cq_is_umap) {
212 		cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
213 	} else {
214 		cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
215 		hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr);
216 	}
217 	status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
218 	if (status != DDI_SUCCESS) {
219 		status = IBT_INSUFF_RESOURCE;
220 		goto cqalloc_fail4;
221 	}
222 	buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
223 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
224 
225 	/*
226 	 * The ownership bit of the CQE's is set by the HW during the process
227 	 * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1
228 	 *
229 	 */
230 
231 	/*
232 	 * Register the memory for the CQ.  The memory for the CQ must
233 	 * be registered in the Hermon TPT tables.  This gives us the LKey
234 	 * to specify in the CQ context below.  Note: If this is a user-
235 	 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
236 	 */
237 	flag = (sleepflag == HERMON_SLEEP) ?  IBT_MR_SLEEP : IBT_MR_NOSLEEP;
238 	mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
239 	mr_attr.mr_len	 = cq->cq_cqinfo.qa_size;
240 	mr_attr.mr_as	 = NULL;
241 	mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
242 	op.mro_bind_type   = state->hs_cfg_profile->cp_iommu_bypass;
243 	op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
244 	op.mro_bind_override_addr = 0;
245 	status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
246 	    HERMON_CQ_CMPT);
247 	if (status != DDI_SUCCESS) {
248 		status = IBT_INSUFF_RESOURCE;
249 		goto cqalloc_fail5;
250 	}
251 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
252 
253 	cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state);
254 	if (cq_attr->cq_flags & IBT_CQ_HID) {
255 		if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) {
256 			IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x",
257 			    cq_attr->cq_hid);
258 			status = IBT_INVALID_PARAM;
259 			goto cqalloc_fail5;
260 		}
261 		cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid);
262 		IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum);
263 	} else {
264 		cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched;
265 		if (cq_schedp == NULL) {
266 			cq_schedp = &state->hs_cq_sched_default;
267 		} else if (cq_schedp != &state->hs_cq_sched_default) {
268 			int i;
269 			hermon_cq_sched_t *tmp;
270 
271 			tmp = state->hs_cq_sched_array;
272 			for (i = 0; i < state->hs_cq_sched_array_size; i++)
273 				if (cq_schedp == &tmp[i])
274 					break;	/* found it */
275 			if (i >= state->hs_cq_sched_array_size) {
276 				cmn_err(CE_CONT, "!Invalid cq_sched argument: "
277 				    "ignored\n");
278 				cq_schedp = &state->hs_cq_sched_default;
279 			}
280 		}
281 		cq->cq_eqnum = HERMON_HID_TO_EQNUM(state,
282 		    HERMON_CQSCHED_NEXT_HID(cq_schedp));
283 		IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, "
284 		    "eqn %d", cq_schedp->cqs_start_hid - 1,
285 		    cq_schedp->cqs_len, cq->cq_eqnum);
286 	}
287 
288 	/*
289 	 * Fill in the CQC entry.  This is the final step before passing
290 	 * ownership of the CQC entry to the Hermon hardware.  We use all of
291 	 * the information collected/calculated above to fill in the
292 	 * requisite portions of the CQC.  Note: If this CQ is going to be
293 	 * used for userland access, then we need to set the UAR page number
294 	 * appropriately (otherwise it's a "don't care")
295 	 */
296 	bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
297 
298 	cqc_entry.state		= HERMON_CQ_DISARMED;
299 	cqc_entry.pg_offs	= cq->cq_cqinfo.qa_pgoffs >> 5;
300 	cqc_entry.log_cq_sz	= log_cq_size;
301 	cqc_entry.usr_page	= uarpg;
302 	cqc_entry.c_eqn		= cq->cq_eqnum;
303 	cqc_entry.log2_pgsz	= mr->mr_log2_pgsz;
304 	cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
305 	cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
306 	cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32);
307 	cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3);
308 
309 	/*
310 	 * Write the CQC entry to hardware - we pass ownership of
311 	 * the entry to the hardware (using the Hermon SW2HW_CQ firmware
312 	 * command).  Note: In general, this operation shouldn't fail.  But
313 	 * if it does, we have to undo everything we've done above before
314 	 * returning error.
315 	 */
316 	status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
317 	    sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag);
318 	if (status != HERMON_CMD_SUCCESS) {
319 		cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n",
320 		    status);
321 		if (status == HERMON_CMD_INVALID_STATUS) {
322 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
323 		}
324 		status = ibc_get_ci_failure(0);
325 		goto cqalloc_fail6;
326 	}
327 
328 	/*
329 	 * Fill in the rest of the Hermon Completion Queue handle.  Having
330 	 * successfully transferred ownership of the CQC, we can update the
331 	 * following fields for use in further operations on the CQ.
332 	 */
333 	cq->cq_resize_hdl = 0;
334 	cq->cq_cqcrsrcp	  = cqc;
335 	cq->cq_rsrcp	  = rsrc;
336 	cq->cq_consindx	  = 0;
337 		/* least restrictive */
338 	cq->cq_buf	  = buf;
339 	cq->cq_bufsz	  = (1 << log_cq_size);
340 	cq->cq_log_cqsz	  = log_cq_size;
341 	cq->cq_mrhdl	  = mr;
342 	cq->cq_refcnt	  = 0;
343 	cq->cq_is_special = 0;
344 	cq->cq_uarpg	  = uarpg;
345 	cq->cq_umap_dhp	  = (devmap_cookie_t)NULL;
346 	avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare,
347 	    sizeof (struct hermon_workq_avl_s),
348 	    offsetof(struct hermon_workq_avl_s, wqa_link));
349 
350 	cq->cq_hdlrarg	  = (void *)ibt_cqhdl;
351 
352 	/*
353 	 * Put CQ handle in Hermon CQNum-to-CQHdl list.  Then fill in the
354 	 * "actual_size" and "cqhdl" and return success
355 	 */
356 	hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq);
357 
358 	/*
359 	 * If this is a user-mappable CQ, then we need to insert the previously
360 	 * allocated entry into the "userland resources database".  This will
361 	 * allow for later lookup during devmap() (i.e. mmap()) calls.
362 	 */
363 	if (cq->cq_is_umap) {
364 		hermon_umap_db_add(umapdb);
365 	}
366 
367 	/*
368 	 * Fill in the return arguments (if necessary).  This includes the
369 	 * real completion queue size.
370 	 */
371 	if (actual_size != NULL) {
372 		*actual_size = (1 << log_cq_size) - 1;
373 	}
374 	*cqhdl = cq;
375 
376 	return (DDI_SUCCESS);
377 
378 /*
379  * The following is cleanup for all possible failure cases in this routine
380  */
381 cqalloc_fail6:
382 	if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
383 	    sleepflag) != DDI_SUCCESS) {
384 		HERMON_WARNING(state, "failed to deregister CQ memory");
385 	}
386 cqalloc_fail5:
387 	hermon_queue_free(&cq->cq_cqinfo);
388 cqalloc_fail4a:
389 	hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr);
390 cqalloc_fail4:
391 	if (cq_is_umap) {
392 		hermon_umap_db_free(umapdb);
393 	}
394 cqalloc_fail3:
395 	hermon_rsrc_free(state, &rsrc);
396 cqalloc_fail2:
397 	hermon_rsrc_free(state, &cqc);
398 cqalloc_fail1:
399 	hermon_pd_refcnt_dec(pd);
400 cqalloc_fail:
401 	return (status);
402 }
403 
404 
405 /*
406  * hermon_cq_free()
407  *    Context: Can be called only from user or kernel context.
408  */
409 /* ARGSUSED */
410 int
hermon_cq_free(hermon_state_t * state,hermon_cqhdl_t * cqhdl,uint_t sleepflag)411 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag)
412 {
413 	hermon_rsrc_t		*cqc, *rsrc;
414 	hermon_umap_db_entry_t	*umapdb;
415 	hermon_hw_cqc_t		cqc_entry;
416 	hermon_pdhdl_t		pd;
417 	hermon_mrhdl_t		mr;
418 	hermon_cqhdl_t		cq, resize;
419 	uint32_t		cqnum;
420 	uint64_t		value;
421 	uint_t			maxprot;
422 	int			status;
423 
424 	/*
425 	 * Pull all the necessary information from the Hermon Completion Queue
426 	 * handle.  This is necessary here because the resource for the
427 	 * CQ handle is going to be freed up as part of this operation.
428 	 */
429 	cq	= *cqhdl;
430 	mutex_enter(&cq->cq_lock);
431 	cqc	= cq->cq_cqcrsrcp;
432 	rsrc	= cq->cq_rsrcp;
433 	pd	= state->hs_pdhdl_internal;
434 	mr	= cq->cq_mrhdl;
435 	cqnum	= cq->cq_cqnum;
436 
437 	resize = cq->cq_resize_hdl;		/* save the handle for later */
438 
439 	/*
440 	 * If there are work queues still associated with the CQ, then return
441 	 * an error.  Otherwise, we will be holding the CQ lock.
442 	 */
443 	if (cq->cq_refcnt != 0) {
444 		mutex_exit(&cq->cq_lock);
445 		return (IBT_CQ_BUSY);
446 	}
447 
448 	/*
449 	 * If this was a user-mappable CQ, then we need to remove its entry
450 	 * from the "userland resources database".  If it is also currently
451 	 * mmap()'d out to a user process, then we need to call
452 	 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
453 	 * We also need to invalidate the CQ tracking information for the
454 	 * user mapping.
455 	 */
456 	if (cq->cq_is_umap) {
457 		status = hermon_umap_db_find(state->hs_instance, cqnum,
458 		    MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
459 		    &umapdb);
460 		if (status != DDI_SUCCESS) {
461 			mutex_exit(&cq->cq_lock);
462 			HERMON_WARNING(state, "failed to find in database");
463 			return (ibc_get_ci_failure(0));
464 		}
465 		hermon_umap_db_free(umapdb);
466 		if (cq->cq_umap_dhp != NULL) {
467 			maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
468 			status = devmap_devmem_remap(cq->cq_umap_dhp,
469 			    state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size,
470 			    maxprot, DEVMAP_MAPPING_INVALID, NULL);
471 			if (status != DDI_SUCCESS) {
472 				mutex_exit(&cq->cq_lock);
473 				HERMON_WARNING(state, "failed in CQ memory "
474 				    "devmap_devmem_remap()");
475 				return (ibc_get_ci_failure(0));
476 			}
477 			cq->cq_umap_dhp = (devmap_cookie_t)NULL;
478 		}
479 	}
480 
481 	/*
482 	 * Put NULL into the Arbel CQNum-to-CQHdl list.  This will allow any
483 	 * in-progress events to detect that the CQ corresponding to this
484 	 * number has been freed.
485 	 */
486 	hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL);
487 
488 	mutex_exit(&cq->cq_lock);
489 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
490 
491 	/*
492 	 * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ
493 	 * firmware command).  If the ownership transfer fails for any reason,
494 	 * then it is an indication that something (either in HW or SW) has
495 	 * gone seriously wrong.
496 	 */
497 	status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
498 	    sizeof (hermon_hw_cqc_t), cqnum, sleepflag);
499 	if (status != HERMON_CMD_SUCCESS) {
500 		HERMON_WARNING(state, "failed to reclaim CQC ownership");
501 		cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n",
502 		    status);
503 		if (status == HERMON_CMD_INVALID_STATUS) {
504 			hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
505 		}
506 		return (ibc_get_ci_failure(0));
507 	}
508 
509 	/*
510 	 * From here on, we start reliquishing resources - but check to see
511 	 * if a resize was in progress - if so, we need to relinquish those
512 	 * resources as well
513 	 */
514 
515 
516 	/*
517 	 * Deregister the memory for the Completion Queue.  If this fails
518 	 * for any reason, then it is an indication that something (either
519 	 * in HW or SW) has gone seriously wrong.  So we print a warning
520 	 * message and return.
521 	 */
522 	status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
523 	    sleepflag);
524 	if (status != DDI_SUCCESS) {
525 		HERMON_WARNING(state, "failed to deregister CQ memory");
526 		return (ibc_get_ci_failure(0));
527 	}
528 
529 	if (resize)	{	/* there was a pointer to a handle */
530 		mr = resize->cq_mrhdl;	/* reuse the pointer to the region */
531 		status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
532 		    sleepflag);
533 		if (status != DDI_SUCCESS) {
534 			HERMON_WARNING(state, "failed to deregister resize CQ "
535 			    "memory");
536 			return (ibc_get_ci_failure(0));
537 		}
538 	}
539 
540 	/* Free the memory for the CQ */
541 	hermon_queue_free(&cq->cq_cqinfo);
542 	if (resize)	{
543 		hermon_queue_free(&resize->cq_cqinfo);
544 		/* and the temporary handle */
545 		kmem_free(resize, sizeof (struct hermon_sw_cq_s));
546 	}
547 
548 	/* everything else does not matter for the resize in progress */
549 
550 	/* Free the dbr */
551 	hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr);
552 
553 	/* Free the Hermon Completion Queue handle */
554 	hermon_rsrc_free(state, &rsrc);
555 
556 	/* Free up the CQC entry resource */
557 	hermon_rsrc_free(state, &cqc);
558 
559 	/* Decrement the reference count on the protection domain (PD) */
560 	hermon_pd_refcnt_dec(pd);
561 
562 	/* Set the cqhdl pointer to NULL and return success */
563 	*cqhdl = NULL;
564 
565 	return (DDI_SUCCESS);
566 }
567 
568 
569 /*
570  * hermon_cq_resize()
571  *    Context: Can be called only from user or kernel context.
572  */
573 int
hermon_cq_resize(hermon_state_t * state,hermon_cqhdl_t cq,uint_t req_size,uint_t * actual_size,uint_t sleepflag)574 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size,
575     uint_t *actual_size, uint_t sleepflag)
576 {
577 	hermon_hw_cqc_t		cqc_entry;
578 	hermon_cqhdl_t		resize_hdl;
579 	hermon_qalloc_info_t	new_cqinfo;
580 	ibt_mr_attr_t		mr_attr;
581 	hermon_mr_options_t	op;
582 	hermon_pdhdl_t		pd;
583 	hermon_mrhdl_t		mr;
584 	hermon_hw_cqe_t		*buf;
585 	uint32_t		new_prod_indx;
586 	uint_t			log_cq_size;
587 	int			status, flag;
588 
589 	if (cq->cq_resize_hdl != 0) {	/* already in process */
590 		status = IBT_CQ_BUSY;
591 		goto cqresize_fail;
592 	}
593 
594 
595 	/* Use the internal protection domain (PD) for CQs */
596 	pd = state->hs_pdhdl_internal;
597 
598 	/*
599 	 * Calculate the appropriate size for the new resized completion queue.
600 	 * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
601 	 * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
602 	 * to round the requested size up to the next highest power-of-2
603 	 */
604 	req_size = max(req_size, HERMON_CQ_MIN_SIZE);
605 	log_cq_size = highbit(req_size);
606 
607 	/*
608 	 * Next we verify that the rounded-up size is valid (i.e. consistent
609 	 * with the device limits and/or software-configured limits)
610 	 */
611 	if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
612 		status = IBT_HCA_CQ_EXCEEDED;
613 		goto cqresize_fail;
614 	}
615 
616 	/*
617 	 * Allocate the memory for newly resized Completion Queue.
618 	 *
619 	 * Note: Although we use the common queue allocation routine, we
620 	 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
621 	 * kernel system memory) for kernel CQs because it would be
622 	 * inefficient to have CQs located in DDR memory.  This is the same
623 	 * as we do when we first allocate completion queues primarily
624 	 * because CQs are read from (by software) more than they are written
625 	 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
626 	 * user-mappable CQs for a similar reason.)
627 	 * It is also worth noting that, unlike Hermon QP work queues,
628 	 * completion queues do not have the same strict alignment
629 	 * requirements.  It is sufficient for the CQ memory to be both
630 	 * aligned to and bound to addresses which are a multiple of CQE size.
631 	 */
632 
633 	/* first, alloc the resize_handle */
634 	resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP);
635 
636 	new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
637 	new_cqinfo.qa_alloc_align = PAGESIZE;
638 	new_cqinfo.qa_bind_align  = PAGESIZE;
639 	if (cq->cq_is_umap) {
640 		new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
641 	} else {
642 		new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
643 	}
644 	status = hermon_queue_alloc(state, &new_cqinfo, sleepflag);
645 	if (status != DDI_SUCCESS) {
646 		/* free the resize handle */
647 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
648 		status = IBT_INSUFF_RESOURCE;
649 		goto cqresize_fail;
650 	}
651 	buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
652 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
653 
654 	/*
655 	 * No initialization of the cq is needed - the command will do it
656 	 */
657 
658 	/*
659 	 * Register the memory for the CQ.  The memory for the CQ must
660 	 * be registered in the Hermon TPT tables.  This gives us the LKey
661 	 * to specify in the CQ context below.
662 	 */
663 	flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
664 	mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
665 	mr_attr.mr_len	 = new_cqinfo.qa_size;
666 	mr_attr.mr_as	 = NULL;
667 	mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
668 	op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
669 	op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
670 	op.mro_bind_override_addr = 0;
671 	status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
672 	    HERMON_CQ_CMPT);
673 	if (status != DDI_SUCCESS) {
674 		hermon_queue_free(&new_cqinfo);
675 		/* free the resize handle */
676 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
677 		status = IBT_INSUFF_RESOURCE;
678 		goto cqresize_fail;
679 	}
680 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
681 
682 	/*
683 	 * Now we grab the CQ lock.  Since we will be updating the actual
684 	 * CQ location and the producer/consumer indexes, we should hold
685 	 * the lock.
686 	 *
687 	 * We do a ARBEL_NOSLEEP here (and below), though, because we are
688 	 * holding the "cq_lock" and if we got raised to interrupt level
689 	 * by priority inversion, we would not want to block in this routine
690 	 * waiting for success.
691 	 */
692 	mutex_enter(&cq->cq_lock);
693 
694 	/*
695 	 * Fill in the CQC entry.  For the resize operation this is the
696 	 * final step before attempting the resize operation on the CQC entry.
697 	 * We use all of the information collected/calculated above to fill
698 	 * in the requisite portions of the CQC.
699 	 */
700 	bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
701 	cqc_entry.log_cq_sz	= log_cq_size;
702 	cqc_entry.pg_offs	= new_cqinfo.qa_pgoffs >> 5;
703 	cqc_entry.log2_pgsz	= mr->mr_log2_pgsz;
704 	cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
705 	cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
706 
707 	/*
708 	 * Write the CQC entry to hardware.  Lastly, we pass ownership of
709 	 * the entry to the hardware (using the Hermon RESIZE_CQ firmware
710 	 * command).  Note: In general, this operation shouldn't fail.  But
711 	 * if it does, we have to undo everything we've done above before
712 	 * returning error.  Also note that the status returned may indicate
713 	 * the code to return to the IBTF.
714 	 */
715 	status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
716 	    &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN);
717 	if (status != HERMON_CMD_SUCCESS) {
718 		/* Resize attempt has failed, drop CQ lock and cleanup */
719 		mutex_exit(&cq->cq_lock);
720 		if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
721 		    sleepflag) != DDI_SUCCESS) {
722 			HERMON_WARNING(state, "failed to deregister CQ memory");
723 		}
724 		kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
725 		hermon_queue_free(&new_cqinfo);
726 		if (status == HERMON_CMD_BAD_SIZE) {
727 			return (IBT_CQ_SZ_INSUFFICIENT);
728 		} else {
729 			cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: "
730 			    "%08x\n", status);
731 			if (status == HERMON_CMD_INVALID_STATUS) {
732 				hermon_fm_ereport(state, HCA_SYS_ERR,
733 				    HCA_ERR_SRV_LOST);
734 			}
735 			return (ibc_get_ci_failure(0));
736 		}
737 	}
738 
739 	/*
740 	 * For Hermon, we've alloc'd another handle structure and save off the
741 	 * important things in it. Then, in polling we check to see if there's
742 	 * a "resizing handle" and if so we look for the "special CQE", opcode
743 	 * 0x16, that indicates the transition to the new buffer.
744 	 *
745 	 * At that point, we'll adjust everything - including dereg and
746 	 * freeing of the original buffer, updating all the necessary fields
747 	 * in the cq_hdl, and setting up for the next cqe polling
748 	 */
749 
750 	resize_hdl->cq_buf	= buf;
751 	resize_hdl->cq_bufsz	= (1 << log_cq_size);
752 	resize_hdl->cq_mrhdl	= mr;
753 	resize_hdl->cq_log_cqsz = log_cq_size;
754 
755 	bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo),
756 	    sizeof (struct hermon_qalloc_info_s));
757 
758 	/* now, save the address in the cq_handle */
759 	cq->cq_resize_hdl = resize_hdl;
760 
761 	/*
762 	 * Drop the CQ lock now.
763 	 */
764 
765 	mutex_exit(&cq->cq_lock);
766 	/*
767 	 * Fill in the return arguments (if necessary).  This includes the
768 	 * real new completion queue size.
769 	 */
770 	if (actual_size != NULL) {
771 		*actual_size = (1 << log_cq_size) - 1;
772 	}
773 
774 	return (DDI_SUCCESS);
775 
776 cqresize_fail:
777 	return (status);
778 }
779 
780 
781 /*
782  * hermon_cq_modify()
783  *    Context: Can be called base context.
784  */
785 /* ARGSUSED */
786 int
hermon_cq_modify(hermon_state_t * state,hermon_cqhdl_t cq,uint_t count,uint_t usec,ibt_cq_handler_id_t hid,uint_t sleepflag)787 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq,
788     uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag)
789 {
790 	int	status;
791 	hermon_hw_cqc_t		cqc_entry;
792 
793 	mutex_enter(&cq->cq_lock);
794 	if (count != cq->cq_intmod_count ||
795 	    usec != cq->cq_intmod_usec) {
796 		bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
797 		cqc_entry.cq_max_cnt = count;
798 		cqc_entry.cq_period = usec;
799 		status = hermon_modify_cq_cmd_post(state, &cqc_entry,
800 		    cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag);
801 		if (status != HERMON_CMD_SUCCESS) {
802 			mutex_exit(&cq->cq_lock);
803 			cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ "
804 			    "command failed: %08x\n", status);
805 			if (status == HERMON_CMD_INVALID_STATUS) {
806 				hermon_fm_ereport(state, HCA_SYS_ERR,
807 				    HCA_ERR_SRV_LOST);
808 			}
809 			return (ibc_get_ci_failure(0));
810 		}
811 		cq->cq_intmod_count = count;
812 		cq->cq_intmod_usec = usec;
813 	}
814 	if (hid && (hid - 1 != cq->cq_eqnum)) {
815 		bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
816 		cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid);
817 		status = hermon_modify_cq_cmd_post(state, &cqc_entry,
818 		    cq->cq_cqnum, MODIFY_EQN, sleepflag);
819 		if (status != HERMON_CMD_SUCCESS) {
820 			mutex_exit(&cq->cq_lock);
821 			cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: "
822 			    "%08x\n", status);
823 			if (status == HERMON_CMD_INVALID_STATUS) {
824 				hermon_fm_ereport(state, HCA_SYS_ERR,
825 				    HCA_ERR_SRV_LOST);
826 			}
827 			return (ibc_get_ci_failure(0));
828 		}
829 		cq->cq_eqnum = hid - 1;
830 	}
831 	mutex_exit(&cq->cq_lock);
832 	return (DDI_SUCCESS);
833 }
834 
835 /*
836  * hermon_cq_notify()
837  *    Context: Can be called from interrupt or base context.
838  */
839 int
hermon_cq_notify(hermon_state_t * state,hermon_cqhdl_t cq,ibt_cq_notify_flags_t flags)840 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq,
841     ibt_cq_notify_flags_t flags)
842 {
843 	uint_t	cmd;
844 	ibt_status_t status;
845 
846 	/* Validate IBT flags and call doorbell routine. */
847 	if (flags == IBT_NEXT_COMPLETION) {
848 		cmd = HERMON_CQDB_NOTIFY_CQ;
849 	} else if (flags == IBT_NEXT_SOLICITED) {
850 		cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT;
851 	} else {
852 		return (IBT_CQ_NOTIFY_TYPE_INVALID);
853 	}
854 
855 	status = hermon_cq_arm_doorbell(state, cq, cmd);
856 	return (status);
857 }
858 
859 
860 /*
861  * hermon_cq_poll()
862  *    Context: Can be called from interrupt or base context.
863  */
864 int
hermon_cq_poll(hermon_state_t * state,hermon_cqhdl_t cq,ibt_wc_t * wc_p,uint_t num_wc,uint_t * num_polled)865 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p,
866     uint_t num_wc, uint_t *num_polled)
867 {
868 	hermon_hw_cqe_t	*cqe;
869 	uint_t		opcode;
870 	uint32_t	cons_indx, wrap_around_mask, shift, mask;
871 	uint32_t	polled_cnt, spec_op = 0;
872 	int		status;
873 
874 	/*
875 	 * Check for user-mappable CQ memory.  Note:  We do not allow kernel
876 	 * clients to poll CQ memory that is accessible directly by the user.
877 	 * If the CQ memory is user accessible, then return an error.
878 	 */
879 	if (cq->cq_is_umap) {
880 		return (IBT_CQ_HDL_INVALID);
881 	}
882 
883 	mutex_enter(&cq->cq_lock);
884 
885 	/* Get the consumer index */
886 	cons_indx = cq->cq_consindx;
887 	shift = cq->cq_log_cqsz;
888 	mask = cq->cq_bufsz;
889 
890 	/*
891 	 * Calculate the wrap around mask.  Note: This operation only works
892 	 * because all Hermon completion queues have power-of-2 sizes
893 	 */
894 	wrap_around_mask = (cq->cq_bufsz - 1);
895 
896 	/* Calculate the pointer to the first CQ entry */
897 	cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
898 
899 	/*
900 	 * Keep pulling entries from the CQ until we find an entry owned by
901 	 * the hardware.  As long as there the CQE's owned by SW, process
902 	 * each entry by calling hermon_cq_cqe_consume() and updating the CQ
903 	 * consumer index.  Note:  We only update the consumer index if
904 	 * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB.  Otherwise,
905 	 * it indicates that we are going to "recycle" the CQE (probably
906 	 * because it is a error CQE and corresponds to more than one
907 	 * completion).
908 	 */
909 	polled_cnt = 0;
910 	while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) {
911 		if (cq->cq_resize_hdl != 0) {	/* in midst of resize */
912 			/* peek at the opcode */
913 			opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
914 			if (opcode == HERMON_CQE_RCV_RESIZE_CODE) {
915 				hermon_cq_resize_helper(state, cq);
916 
917 				/* Increment the consumer index */
918 				cons_indx = (cons_indx + 1);
919 				spec_op = 1; /* plus one for the limiting CQE */
920 
921 				wrap_around_mask = (cq->cq_bufsz - 1);
922 
923 				/* Update the pointer to the next CQ entry */
924 				cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
925 
926 				continue;
927 			}
928 		}	/* in resizing CQ */
929 
930 		/*
931 		 * either resizing and not the special opcode, or
932 		 * not resizing at all
933 		 */
934 		hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]);
935 
936 		/* Increment the consumer index */
937 		cons_indx = (cons_indx + 1);
938 
939 		/* Update the pointer to the next CQ entry */
940 		cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
941 
942 		/*
943 		 * If we have run out of space to store work completions,
944 		 * then stop and return the ones we have pulled of the CQ.
945 		 */
946 		if (polled_cnt >= num_wc) {
947 			break;
948 		}
949 	}
950 
951 	/*
952 	 * Now we only ring the doorbell (to update the consumer index) if
953 	 * we've actually consumed a CQ entry.
954 	 */
955 	if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
956 		/*
957 		 * Update the consumer index in both the CQ handle and the
958 		 * doorbell record.
959 		 */
960 		cq->cq_consindx = cons_indx;
961 		hermon_cq_update_ci_doorbell(cq);
962 
963 	} else if (polled_cnt == 0) {
964 		if (spec_op != 0) {
965 			/* if we got the special opcode, update the consindx */
966 			cq->cq_consindx = cons_indx;
967 			hermon_cq_update_ci_doorbell(cq);
968 		}
969 	}
970 
971 	mutex_exit(&cq->cq_lock);
972 
973 	/* Set "num_polled" (if necessary) */
974 	if (num_polled != NULL) {
975 		*num_polled = polled_cnt;
976 	}
977 
978 	/* Set CQ_EMPTY condition if needed, otherwise return success */
979 	if (polled_cnt == 0) {
980 		status = IBT_CQ_EMPTY;
981 	} else {
982 		status = DDI_SUCCESS;
983 	}
984 
985 	/*
986 	 * Check if the system is currently panicking.  If it is, then call
987 	 * the Hermon interrupt service routine.  This step is necessary here
988 	 * because we might be in a polled I/O mode and without the call to
989 	 * hermon_isr() - and its subsequent calls to poll and rearm each
990 	 * event queue - we might overflow our EQs and render the system
991 	 * unable to sync/dump.
992 	 */
993 	if (ddi_in_panic() != 0) {
994 		(void) hermon_isr((caddr_t)state, (caddr_t)NULL);
995 	}
996 	return (status);
997 }
998 
999 /*
1000  *	cmd_sn must be initialized to 1 to enable proper reenabling
1001  *	by hermon_arm_cq_dbr_update().
1002  */
1003 static void
hermon_arm_cq_dbr_init(hermon_dbr_t * cq_arm_dbr)1004 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr)
1005 {
1006 	uint32_t *target;
1007 
1008 	target = (uint32_t *)cq_arm_dbr + 1;
1009 	*target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT);
1010 }
1011 
1012 
1013 /*
1014  *	User cmd_sn needs help from this kernel function to know
1015  *	when it should be incremented (modulo 4).  We do an atomic
1016  *	update of the arm_cq dbr to communicate this fact.  We retry
1017  *	in the case that user library is racing with us.  We zero
1018  *	out the cmd field so that the user library can use the cmd
1019  *	field to track the last command it issued (solicited verses any).
1020  */
1021 static void
hermon_arm_cq_dbr_update(hermon_dbr_t * cq_arm_dbr)1022 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr)
1023 {
1024 	uint32_t tmp, cmp, new;
1025 	uint32_t old_cmd_sn, new_cmd_sn;
1026 	uint32_t *target;
1027 	int retries = 0;
1028 
1029 	target = (uint32_t *)cq_arm_dbr + 1;
1030 retry:
1031 	cmp = *target;
1032 	tmp = htonl(cmp);
1033 	old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1034 	new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) &
1035 	    (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1036 	new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn);
1037 	tmp = atomic_cas_32(target, cmp, new);
1038 	if (tmp != cmp) {	/* cas failed, so need to retry */
1039 		drv_usecwait(retries & 0xff);   /* avoid race */
1040 		if (++retries > 100000) {
1041 			cmn_err(CE_CONT, "cas failed in hermon\n");
1042 			retries = 0;
1043 		}
1044 		goto retry;
1045 	}
1046 }
1047 
1048 
1049 /*
1050  * hermon_cq_handler()
1051  *    Context: Only called from interrupt context
1052  */
1053 /* ARGSUSED */
1054 int
hermon_cq_handler(hermon_state_t * state,hermon_eqhdl_t eq,hermon_hw_eqe_t * eqe)1055 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1056     hermon_hw_eqe_t *eqe)
1057 {
1058 	hermon_cqhdl_t		cq;
1059 	uint_t			cqnum;
1060 
1061 	/* Get the CQ handle from CQ number in event descriptor */
1062 	cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1063 	cq = hermon_cqhdl_from_cqnum(state, cqnum);
1064 
1065 	/*
1066 	 * If the CQ handle is NULL, this is probably an indication
1067 	 * that the CQ has been freed already.  In which case, we
1068 	 * should not deliver this event.
1069 	 *
1070 	 * We also check that the CQ number in the handle is the
1071 	 * same as the CQ number in the event queue entry.  This
1072 	 * extra check allows us to handle the case where a CQ was
1073 	 * freed and then allocated again in the time it took to
1074 	 * handle the event queue processing.  By constantly incrementing
1075 	 * the non-constrained portion of the CQ number every time
1076 	 * a new CQ is allocated, we mitigate (somewhat) the chance
1077 	 * that a stale event could be passed to the client's CQ
1078 	 * handler.
1079 	 *
1080 	 * Lastly, we check if "hs_ibtfpriv" is NULL.  If it is then it
1081 	 * means that we've have either received this event before we
1082 	 * finished attaching to the IBTF or we've received it while we
1083 	 * are in the process of detaching.
1084 	 */
1085 	if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1086 	    (state->hs_ibtfpriv != NULL)) {
1087 		hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr);
1088 		HERMON_DO_IBTF_CQ_CALLB(state, cq);
1089 	}
1090 
1091 	return (DDI_SUCCESS);
1092 }
1093 
1094 
1095 /*
1096  * hermon_cq_err_handler()
1097  *    Context: Only called from interrupt context
1098  */
1099 /* ARGSUSED */
1100 int
hermon_cq_err_handler(hermon_state_t * state,hermon_eqhdl_t eq,hermon_hw_eqe_t * eqe)1101 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1102     hermon_hw_eqe_t *eqe)
1103 {
1104 	hermon_cqhdl_t		cq;
1105 	uint_t			cqnum;
1106 	ibc_async_event_t	event;
1107 	ibt_async_code_t	type;
1108 
1109 	HERMON_FMANOTE(state, HERMON_FMA_OVERRUN);
1110 	/* Get the CQ handle from CQ number in event descriptor */
1111 	cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1112 	cq = hermon_cqhdl_from_cqnum(state, cqnum);
1113 
1114 	/*
1115 	 * If the CQ handle is NULL, this is probably an indication
1116 	 * that the CQ has been freed already.  In which case, we
1117 	 * should not deliver this event.
1118 	 *
1119 	 * We also check that the CQ number in the handle is the
1120 	 * same as the CQ number in the event queue entry.  This
1121 	 * extra check allows us to handle the case where a CQ was
1122 	 * freed and then allocated again in the time it took to
1123 	 * handle the event queue processing.  By constantly incrementing
1124 	 * the non-constrained portion of the CQ number every time
1125 	 * a new CQ is allocated, we mitigate (somewhat) the chance
1126 	 * that a stale event could be passed to the client's CQ
1127 	 * handler.
1128 	 *
1129 	 * And then we check if "hs_ibtfpriv" is NULL.  If it is then it
1130 	 * means that we've have either received this event before we
1131 	 * finished attaching to the IBTF or we've received it while we
1132 	 * are in the process of detaching.
1133 	 */
1134 	if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1135 	    (state->hs_ibtfpriv != NULL)) {
1136 		event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1137 		type		= IBT_ERROR_CQ;
1138 		HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event);
1139 	}
1140 
1141 	return (DDI_SUCCESS);
1142 }
1143 
1144 
1145 /*
1146  * hermon_cq_refcnt_inc()
1147  *    Context: Can be called from interrupt or base context.
1148  */
1149 int
hermon_cq_refcnt_inc(hermon_cqhdl_t cq,uint_t is_special)1150 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special)
1151 {
1152 	/*
1153 	 * Increment the completion queue's reference count.  Note: In order
1154 	 * to ensure compliance with IBA C11-15, we must ensure that a given
1155 	 * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1156 	 * This is accomplished here by keeping track of how the referenced
1157 	 * CQ is being used.
1158 	 */
1159 	mutex_enter(&cq->cq_lock);
1160 	if (cq->cq_refcnt == 0) {
1161 		cq->cq_is_special = is_special;
1162 	} else {
1163 		if (cq->cq_is_special != is_special) {
1164 			mutex_exit(&cq->cq_lock);
1165 			return (DDI_FAILURE);
1166 		}
1167 	}
1168 	cq->cq_refcnt++;
1169 	mutex_exit(&cq->cq_lock);
1170 	return (DDI_SUCCESS);
1171 }
1172 
1173 
1174 /*
1175  * hermon_cq_refcnt_dec()
1176  *    Context: Can be called from interrupt or base context.
1177  */
1178 void
hermon_cq_refcnt_dec(hermon_cqhdl_t cq)1179 hermon_cq_refcnt_dec(hermon_cqhdl_t cq)
1180 {
1181 	/* Decrement the completion queue's reference count */
1182 	mutex_enter(&cq->cq_lock);
1183 	cq->cq_refcnt--;
1184 	mutex_exit(&cq->cq_lock);
1185 }
1186 
1187 
1188 /*
1189  * hermon_cq_arm_doorbell()
1190  *    Context: Can be called from interrupt or base context.
1191  */
1192 static int
hermon_cq_arm_doorbell(hermon_state_t * state,hermon_cqhdl_t cq,uint_t cq_cmd)1193 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd)
1194 {
1195 	uint32_t	cq_num;
1196 	uint32_t	*target;
1197 	uint32_t	old_cmd, cmp, new, tmp, cmd_sn;
1198 	ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1199 
1200 	/* initialize the FMA retry loop */
1201 	hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
1202 
1203 	cq_num = cq->cq_cqnum;
1204 	target = (uint32_t *)cq->cq_arm_ci_vdbr + 1;
1205 
1206 	/* the FMA retry loop starts for Hermon doorbell register. */
1207 	hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1208 	    fm_test_num);
1209 retry:
1210 	cmp = *target;
1211 	tmp = htonl(cmp);
1212 	old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT);
1213 	cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1214 	if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) {
1215 		if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1216 			cmd_sn |= (HERMON_CQDB_NOTIFY_CQ <<
1217 			    HERMON_CQDB_CMD_SHIFT);
1218 			new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1219 			tmp = atomic_cas_32(target, cmp, new);
1220 			if (tmp != cmp)
1221 				goto retry;
1222 			HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1223 			    &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1224 			    32) | (cq->cq_consindx & 0xFFFFFF));
1225 		} /* else it's already armed */
1226 	} else {
1227 		ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT);
1228 		if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1229 		    old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1230 			cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT <<
1231 			    HERMON_CQDB_CMD_SHIFT);
1232 			new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1233 			tmp = atomic_cas_32(target, cmp, new);
1234 			if (tmp != cmp)
1235 				goto retry;
1236 			HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1237 			    &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1238 			    32) | (cq->cq_consindx & 0xFFFFFF));
1239 		} /* else it's already armed */
1240 	}
1241 
1242 	/* the FMA retry loop ends. */
1243 	hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1244 	    fm_test_num);
1245 
1246 	return (IBT_SUCCESS);
1247 
1248 pio_error:
1249 	hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1250 	return (ibc_get_ci_failure(0));
1251 }
1252 
1253 
1254 /*
1255  * hermon_cqhdl_from_cqnum()
1256  *    Context: Can be called from interrupt or base context.
1257  *
1258  *    This routine is important because changing the unconstrained
1259  *    portion of the CQ number is critical to the detection of a
1260  *    potential race condition in the CQ handler code (i.e. the case
1261  *    where a CQ is freed and alloc'd again before an event for the
1262  *    "old" CQ can be handled).
1263  *
1264  *    While this is not a perfect solution (not sure that one exists)
1265  *    it does help to mitigate the chance that this race condition will
1266  *    cause us to deliver a "stale" event to the new CQ owner.  Note:
1267  *    this solution does not scale well because the number of constrained
1268  *    bits increases (and, hence, the number of unconstrained bits
1269  *    decreases) as the number of supported CQs grows.  For small and
1270  *    intermediate values, it should hopefully provide sufficient
1271  *    protection.
1272  */
1273 hermon_cqhdl_t
hermon_cqhdl_from_cqnum(hermon_state_t * state,uint_t cqnum)1274 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum)
1275 {
1276 	uint_t	cqindx, cqmask;
1277 
1278 	/* Calculate the CQ table index from the cqnum */
1279 	cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1;
1280 	cqindx = cqnum & cqmask;
1281 	return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx));
1282 }
1283 
1284 /*
1285  * hermon_cq_cqe_consume()
1286  *    Context: Can be called from interrupt or base context.
1287  */
1288 static void
hermon_cq_cqe_consume(hermon_state_t * state,hermon_cqhdl_t cq,hermon_hw_cqe_t * cqe,ibt_wc_t * wc)1289 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1290     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1291 {
1292 	uint_t		opcode, qpnum, qp1_indx;
1293 	ibt_wc_flags_t	flags;
1294 	ibt_wrc_opcode_t type;
1295 
1296 	/*
1297 	 * Determine if this is an "error" CQE by examining "opcode".  If it
1298 	 * is an error CQE, then call hermon_cq_errcqe_consume() and return
1299 	 * whatever status it returns.  Otherwise, this is a successful
1300 	 * completion.
1301 	 */
1302 	opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
1303 	if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) ||
1304 	    (opcode == HERMON_CQE_RECV_ERR_OPCODE)) {
1305 		hermon_cq_errcqe_consume(state, cq, cqe, wc);
1306 		return;
1307 	}
1308 
1309 	/*
1310 	 * Fetch the Work Request ID using the information in the CQE.
1311 	 * See hermon_wr.c for more details.
1312 	 */
1313 	wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1314 
1315 	/*
1316 	 * Parse the CQE opcode to determine completion type.  This will set
1317 	 * not only the type of the completion, but also any flags that might
1318 	 * be associated with it (e.g. whether immediate data is present).
1319 	 */
1320 	flags = IBT_WC_NO_FLAGS;
1321 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->hs_fcoib_may_be_running))
1322 	if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) {
1323 
1324 		/* Send CQE */
1325 		switch (opcode) {
1326 		case HERMON_CQE_SND_RDMAWR_IMM:
1327 		case HERMON_CQE_SND_RDMAWR:
1328 			type = IBT_WRC_RDMAW;
1329 			break;
1330 
1331 		case HERMON_CQE_SND_SEND_INV:
1332 		case HERMON_CQE_SND_SEND_IMM:
1333 		case HERMON_CQE_SND_SEND:
1334 			type = IBT_WRC_SEND;
1335 			break;
1336 
1337 		case HERMON_CQE_SND_LSO:
1338 			type = IBT_WRC_SEND_LSO;
1339 			break;
1340 
1341 		case HERMON_CQE_SND_RDMARD:
1342 			type = IBT_WRC_RDMAR;
1343 			break;
1344 
1345 		case HERMON_CQE_SND_ATOMIC_CS:
1346 			type = IBT_WRC_CSWAP;
1347 			break;
1348 
1349 		case HERMON_CQE_SND_ATOMIC_FA:
1350 			type = IBT_WRC_FADD;
1351 			break;
1352 
1353 		case HERMON_CQE_SND_BIND_MW:
1354 			type = IBT_WRC_BIND;
1355 			break;
1356 
1357 		case HERMON_CQE_SND_FRWR:
1358 			type = IBT_WRC_FAST_REG_PMR;
1359 			break;
1360 
1361 		case HERMON_CQE_SND_LCL_INV:
1362 			type = IBT_WRC_LOCAL_INVALIDATE;
1363 			break;
1364 
1365 		default:
1366 			HERMON_WARNING(state, "unknown send CQE type");
1367 			wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1368 			return;
1369 		}
1370 	} else if ((state->hs_fcoib_may_be_running == B_TRUE) &&
1371 	    hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) {
1372 		type = IBT_WRC_RECV;
1373 		if (HERMON_CQE_FEXCH_DIFE(cq, cqe))
1374 			flags |= IBT_WC_DIF_ERROR;
1375 		wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1376 		wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe);
1377 		wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe);
1378 		wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe);
1379 		wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe);
1380 		wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) &
1381 		    IBT_WC_DETAIL_FC_MATCH_MASK;
1382 		wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1383 		flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED;
1384 	} else {
1385 		/*
1386 		 * Parse the remaining contents of the CQE into the work
1387 		 * completion.  This means filling in SL, QP number, SLID,
1388 		 * immediate data, etc.
1389 		 *
1390 		 * Note: Not all of these fields are valid in a given
1391 		 * completion.  Many of them depend on the actual type of
1392 		 * completion.  So we fill in all of the fields and leave
1393 		 * it up to the IBTF and consumer to sort out which are
1394 		 * valid based on their context.
1395 		 */
1396 		wc->wc_sl	  = HERMON_CQE_SL_GET(cq, cqe);
1397 		wc->wc_qpn	  = HERMON_CQE_DQPN_GET(cq, cqe);
1398 		wc->wc_slid	  = HERMON_CQE_DLID_GET(cq, cqe);
1399 		wc->wc_immed_data =
1400 		    HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1401 		wc->wc_ethertype  = (wc->wc_immed_data & 0xFFFF);
1402 		wc->wc_pkey_ix	  = (wc->wc_immed_data &
1403 		    ((1 << state->hs_queryport.log_max_pkey) - 1));
1404 		/*
1405 		 * Fill in "bytes transferred" as appropriate.  Also,
1406 		 * if necessary, fill in the "path bits" field.
1407 		 */
1408 		wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe);
1409 		wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1410 
1411 		/*
1412 		 * Check for GRH, update the flags, then fill in "wc_flags"
1413 		 * field in the work completion
1414 		 */
1415 		if (HERMON_CQE_GRH_GET(cq, cqe) != 0) {
1416 			flags |= IBT_WC_GRH_PRESENT;
1417 		}
1418 
1419 		/* Receive CQE */
1420 		switch (opcode) {
1421 		case HERMON_CQE_RCV_SEND_IMM:
1422 			/*
1423 			 * Note:  According to the PRM, all QP1 recv
1424 			 * completions look like the result of a Send with
1425 			 * Immediate.  They are not, however, (MADs are Send
1426 			 * Only) so we need to check the QP number and set
1427 			 * the flag only if it is non-QP1.
1428 			 */
1429 			qpnum	 = HERMON_CQE_QPNUM_GET(cq, cqe);
1430 			qp1_indx = state->hs_spec_qp1->hr_indx;
1431 			if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1432 				flags |= IBT_WC_IMMED_DATA_PRESENT;
1433 			}
1434 			/* FALLTHROUGH */
1435 
1436 		case HERMON_CQE_RCV_SEND:
1437 			type = IBT_WRC_RECV;
1438 			if (HERMON_CQE_IS_IPOK(cq, cqe)) {
1439 				wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe);
1440 				flags |= IBT_WC_CKSUM_OK;
1441 				wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK &
1442 				    HERMON_CQE_IPOIB_STATUS(cq, cqe);
1443 			}
1444 			break;
1445 
1446 		case HERMON_CQE_RCV_SEND_INV:
1447 			type = IBT_WRC_RECV;
1448 			flags |= IBT_WC_RKEY_INVALIDATED;
1449 			wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */
1450 			break;
1451 
1452 		case HERMON_CQE_RCV_RDMAWR_IMM:
1453 			flags |= IBT_WC_IMMED_DATA_PRESENT;
1454 			type = IBT_WRC_RECV_RDMAWI;
1455 			break;
1456 
1457 		default:
1458 
1459 			HERMON_WARNING(state, "unknown recv CQE type");
1460 			wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1461 			return;
1462 		}
1463 	}
1464 	wc->wc_type = type;
1465 	wc->wc_flags = flags;
1466 	wc->wc_status = IBT_WC_SUCCESS;
1467 }
1468 
1469 /*
1470  * hermon_cq_errcqe_consume()
1471  *    Context: Can be called from interrupt or base context.
1472  */
1473 static void
hermon_cq_errcqe_consume(hermon_state_t * state,hermon_cqhdl_t cq,hermon_hw_cqe_t * cqe,ibt_wc_t * wc)1474 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1475     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1476 {
1477 	uint32_t		imm_eth_pkey_cred;
1478 	uint_t			status;
1479 	ibt_wc_status_t		ibt_status;
1480 
1481 	/*
1482 	 * Fetch the Work Request ID using the information in the CQE.
1483 	 * See hermon_wr.c for more details.
1484 	 */
1485 	wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1486 
1487 	/*
1488 	 * Parse the CQE opcode to determine completion type.  We know that
1489 	 * the CQE is an error completion, so we extract only the completion
1490 	 * status/syndrome here.
1491 	 */
1492 	imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe);
1493 	status = imm_eth_pkey_cred;
1494 	if (status != HERMON_CQE_WR_FLUSHED_ERR)
1495 		IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x  "
1496 		    "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe),
1497 		    HERMON_CQE_WQECNTR_GET(cq, cqe), status,
1498 		    HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe));
1499 	switch (status) {
1500 	case HERMON_CQE_LOC_LEN_ERR:
1501 		HERMON_WARNING(state, HERMON_FMA_LOCLEN);
1502 		ibt_status = IBT_WC_LOCAL_LEN_ERR;
1503 		break;
1504 
1505 	case HERMON_CQE_LOC_OP_ERR:
1506 		HERMON_WARNING(state, HERMON_FMA_LOCQPOP);
1507 		ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1508 		break;
1509 
1510 	case HERMON_CQE_LOC_PROT_ERR:
1511 		HERMON_WARNING(state, HERMON_FMA_LOCPROT);
1512 		ibt_status = IBT_WC_LOCAL_PROTECT_ERR;
1513 		IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe);
1514 		if (hermon_should_panic) {
1515 			cmn_err(CE_PANIC, "Hermon intentional PANIC - "
1516 			    "Local Protection Error\n");
1517 		}
1518 		break;
1519 
1520 	case HERMON_CQE_WR_FLUSHED_ERR:
1521 		ibt_status = IBT_WC_WR_FLUSHED_ERR;
1522 		break;
1523 
1524 	case HERMON_CQE_MW_BIND_ERR:
1525 		HERMON_WARNING(state, HERMON_FMA_MWBIND);
1526 		ibt_status = IBT_WC_MEM_WIN_BIND_ERR;
1527 		break;
1528 
1529 	case HERMON_CQE_BAD_RESPONSE_ERR:
1530 		HERMON_WARNING(state, HERMON_FMA_RESP);
1531 		ibt_status = IBT_WC_BAD_RESPONSE_ERR;
1532 		break;
1533 
1534 	case HERMON_CQE_LOCAL_ACCESS_ERR:
1535 		HERMON_WARNING(state, HERMON_FMA_LOCACC);
1536 		ibt_status = IBT_WC_LOCAL_ACCESS_ERR;
1537 		break;
1538 
1539 	case HERMON_CQE_REM_INV_REQ_ERR:
1540 		HERMON_WARNING(state, HERMON_FMA_REMREQ);
1541 		ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1542 		break;
1543 
1544 	case HERMON_CQE_REM_ACC_ERR:
1545 		HERMON_WARNING(state, HERMON_FMA_REMACC);
1546 		ibt_status = IBT_WC_REMOTE_ACCESS_ERR;
1547 		break;
1548 
1549 	case HERMON_CQE_REM_OP_ERR:
1550 		HERMON_WARNING(state, HERMON_FMA_REMOP);
1551 		ibt_status = IBT_WC_REMOTE_OP_ERR;
1552 		break;
1553 
1554 	case HERMON_CQE_TRANS_TO_ERR:
1555 		HERMON_WARNING(state, HERMON_FMA_XPORTCNT);
1556 		ibt_status = IBT_WC_TRANS_TIMEOUT_ERR;
1557 		break;
1558 
1559 	case HERMON_CQE_RNRNAK_TO_ERR:
1560 		HERMON_WARNING(state, HERMON_FMA_RNRCNT);
1561 		ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1562 		break;
1563 
1564 	/*
1565 	 * The following error codes are not supported in the Hermon driver
1566 	 * as they relate only to Reliable Datagram completion statuses:
1567 	 *    case HERMON_CQE_LOCAL_RDD_VIO_ERR:
1568 	 *    case HERMON_CQE_REM_INV_RD_REQ_ERR:
1569 	 *    case HERMON_CQE_EEC_REM_ABORTED_ERR:
1570 	 *    case HERMON_CQE_INV_EEC_NUM_ERR:
1571 	 *    case HERMON_CQE_INV_EEC_STATE_ERR:
1572 	 *    case HERMON_CQE_LOC_EEC_ERR:
1573 	 */
1574 
1575 	default:
1576 		HERMON_WARNING(state, "unknown error CQE status");
1577 		HERMON_FMANOTE(state, HERMON_FMA_UNKN);
1578 		ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1579 		break;
1580 	}
1581 
1582 	wc->wc_status = ibt_status;
1583 }
1584 
1585 
1586 /*
1587  * hermon_cq_resize_helper()
1588  *    Context: Can be called only from user or kernel context.
1589  */
1590 void
hermon_cq_resize_helper(hermon_state_t * state,hermon_cqhdl_t cq)1591 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq)
1592 {
1593 	hermon_cqhdl_t		resize_hdl;
1594 	int			status;
1595 
1596 	/*
1597 	 * we're here because we found the special cqe opcode, so we have
1598 	 * to update the cq_handle, release the old resources, clear the
1599 	 * flag in the cq_hdl, and release the resize_hdl.  When we return
1600 	 * above, it will take care of the rest
1601 	 */
1602 	ASSERT(MUTEX_HELD(&cq->cq_lock));
1603 
1604 	resize_hdl = cq->cq_resize_hdl;
1605 
1606 	/*
1607 	 * Deregister the memory for the old Completion Queue.  Note: We
1608 	 * really can't return error here because we have no good way to
1609 	 * cleanup.  Plus, the deregistration really shouldn't ever happen.
1610 	 * So, if it does, it is an indication that something has gone
1611 	 * seriously wrong.  So we print a warning message and return error
1612 	 * (knowing, of course, that the "old" CQ memory will be leaked)
1613 	 */
1614 	status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL,
1615 	    HERMON_SLEEP);
1616 	if (status != DDI_SUCCESS) {
1617 		HERMON_WARNING(state, "failed to deregister old CQ memory");
1618 	}
1619 
1620 	/* Next, free the memory from the old CQ buffer */
1621 	hermon_queue_free(&cq->cq_cqinfo);
1622 
1623 	/* now we can update the cq_hdl with the new things saved */
1624 
1625 	cq->cq_buf   = resize_hdl->cq_buf;
1626 	cq->cq_mrhdl = resize_hdl->cq_mrhdl;
1627 	cq->cq_bufsz = resize_hdl->cq_bufsz;
1628 	cq->cq_log_cqsz = resize_hdl->cq_log_cqsz;
1629 	cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp;
1630 	cq->cq_resize_hdl = 0;
1631 	bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo,
1632 	    sizeof (struct hermon_qalloc_info_s));
1633 
1634 	/* finally, release the resizing handle */
1635 	kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
1636 }
1637 
1638 
1639 /*
1640  * hermon_cq_entries_flush()
1641  * Context: Can be called from interrupt or base context.
1642  */
1643 /* ARGSUSED */
1644 void
hermon_cq_entries_flush(hermon_state_t * state,hermon_qphdl_t qp)1645 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp)
1646 {
1647 	hermon_cqhdl_t		cq;
1648 	hermon_hw_cqe_t		*cqe, *next_cqe;
1649 	hermon_srqhdl_t		srq;
1650 	hermon_workq_hdr_t	*wq;
1651 	uint32_t		cons_indx, tail_cons_indx, wrap_around_mask;
1652 	uint32_t		new_indx, check_indx, qpnum;
1653 	uint32_t		shift, mask;
1654 	int			outstanding_cqes;
1655 
1656 	qpnum = qp->qp_qpnum;
1657 	if ((srq = qp->qp_srqhdl) != NULL)
1658 		wq = qp->qp_srqhdl->srq_wq_wqhdr;
1659 	else
1660 		wq = NULL;
1661 	cq = qp->qp_rq_cqhdl;
1662 
1663 	if (cq == NULL) {
1664 		cq = qp->qp_sq_cqhdl;
1665 	}
1666 
1667 do_send_cq:	/* loop back to here if send_cq is not the same as recv_cq */
1668 	if (cq == NULL)
1669 		return;
1670 
1671 	cons_indx = cq->cq_consindx;
1672 	shift = cq->cq_log_cqsz;
1673 	mask = cq->cq_bufsz;
1674 	wrap_around_mask = mask - 1;
1675 
1676 	/* Calculate the pointer to the first CQ entry */
1677 	cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
1678 
1679 	/*
1680 	 * Loop through the CQ looking for entries owned by software.  If an
1681 	 * entry is owned by software then we increment an 'outstanding_cqes'
1682 	 * count to know how many entries total we have on our CQ.  We use this
1683 	 * value further down to know how many entries to loop through looking
1684 	 * for our same QP number.
1685 	 */
1686 	outstanding_cqes = 0;
1687 	tail_cons_indx = cons_indx;
1688 	while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) {
1689 		/* increment total cqes count */
1690 		outstanding_cqes++;
1691 
1692 		/* increment the consumer index */
1693 		tail_cons_indx++;
1694 
1695 		/* update the pointer to the next cq entry */
1696 		cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask];
1697 	}
1698 
1699 	/*
1700 	 * Using the 'tail_cons_indx' that was just set, we now know how many
1701 	 * total CQEs possible there are.  Set the 'check_indx' and the
1702 	 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1703 	 */
1704 	check_indx = new_indx = (tail_cons_indx - 1);
1705 
1706 	while (--outstanding_cqes >= 0) {
1707 		cqe = &cq->cq_buf[check_indx & wrap_around_mask];
1708 
1709 		/*
1710 		 * If the QP number is the same in the CQE as the QP, then
1711 		 * we must "consume" it.  If it is for an SRQ wqe, then we
1712 		 * also must free the wqe back onto the free list of the SRQ.
1713 		 */
1714 		if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) {
1715 			if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) ==
1716 			    HERMON_COMPLETION_RECV)) {
1717 				uint64_t *desc;
1718 				int indx;
1719 
1720 				/* Add wqe back to SRQ free list */
1721 				indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) &
1722 				    wq->wq_mask;
1723 				desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
1724 				((uint16_t *)desc)[1] = htons(indx);
1725 				wq->wq_tail = indx;
1726 			}
1727 		} else {	/* CQEs for other QPNs need to remain */
1728 			if (check_indx != new_indx) {
1729 				next_cqe =
1730 				    &cq->cq_buf[new_indx & wrap_around_mask];
1731 				/* Copy the CQE into the "next_cqe" pointer. */
1732 				bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t));
1733 			}
1734 			new_indx--;	/* move index to next CQE to fill */
1735 		}
1736 		check_indx--;		/* move index to next CQE to check */
1737 	}
1738 
1739 	/*
1740 	 * Update consumer index to be the 'new_indx'.  This moves it past all
1741 	 * removed entries.  Because 'new_indx' is pointing to the last
1742 	 * previously valid SW owned entry, we add 1 to point the cons_indx to
1743 	 * the first HW owned entry.
1744 	 */
1745 	cons_indx = (new_indx + 1);
1746 
1747 	/*
1748 	 * Now we only ring the doorbell (to update the consumer index) if
1749 	 * we've actually consumed a CQ entry.  If we found no QP number
1750 	 * matches above, then we would not have removed anything.  So only if
1751 	 * something was removed do we ring the doorbell.
1752 	 */
1753 	if (cq->cq_consindx != cons_indx) {
1754 		/*
1755 		 * Update the consumer index in both the CQ handle and the
1756 		 * doorbell record.
1757 		 */
1758 		cq->cq_consindx = cons_indx;
1759 
1760 		hermon_cq_update_ci_doorbell(cq);
1761 
1762 	}
1763 	if (cq != qp->qp_sq_cqhdl) {
1764 		cq = qp->qp_sq_cqhdl;
1765 		goto do_send_cq;
1766 	}
1767 }
1768 
1769 /*
1770  * hermon_get_cq_sched_list()
1771  *    Context: Only called from attach() path context
1772  *
1773  * Read properties, creating entries in hs_cq_sched_list with
1774  * information about the requested "expected" and "minimum"
1775  * number of MSI-X interrupt vectors per list entry.
1776  */
1777 static int
hermon_get_cq_sched_list(hermon_state_t * state)1778 hermon_get_cq_sched_list(hermon_state_t *state)
1779 {
1780 	char **listp, ulp_prop[HERMON_CQH_MAX + 4];
1781 	uint_t nlist, i, j, ndata;
1782 	int *data;
1783 	size_t len;
1784 	hermon_cq_sched_t *cq_schedp;
1785 
1786 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip,
1787 	    DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) !=
1788 	    DDI_PROP_SUCCESS)
1789 		return (0);
1790 
1791 	state->hs_cq_sched_array_size = nlist;
1792 	state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist *
1793 	    sizeof (hermon_cq_sched_t), KM_SLEEP);
1794 	for (i = 0; i < nlist; i++) {
1795 		if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) {
1796 			cmn_err(CE_CONT, "'cqh' property name too long\n");
1797 			goto game_over;
1798 		}
1799 		for (j = 0; j < i; j++) {
1800 			if (strcmp(listp[j], listp[i]) == 0) {
1801 				cmn_err(CE_CONT, "Duplicate 'cqh' property\n");
1802 				goto game_over;
1803 			}
1804 		}
1805 		(void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX);
1806 		ulp_prop[0] = 'c';
1807 		ulp_prop[1] = 'q';
1808 		ulp_prop[2] = 'h';
1809 		ulp_prop[3] = '-';
1810 		(void) strncpy(ulp_prop + 4, listp[i], len + 1);
1811 		if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1812 		    DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) !=
1813 		    DDI_PROP_SUCCESS) {
1814 			cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop);
1815 			goto game_over;
1816 		}
1817 		if (ndata != 2) {
1818 			cmn_err(CE_CONT, "property '%s' does not "
1819 			    "have 2 integers\n", ulp_prop);
1820 			goto game_over_free_data;
1821 		}
1822 		cq_schedp[i].cqs_desired = data[0];
1823 		cq_schedp[i].cqs_minimum = data[1];
1824 		cq_schedp[i].cqs_refcnt = 0;
1825 		ddi_prop_free(data);
1826 	}
1827 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1828 	    DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) !=
1829 	    DDI_PROP_SUCCESS) {
1830 		cmn_err(CE_CONT, "property 'cqh-default' not found\n");
1831 		goto game_over;
1832 	}
1833 	if (ndata != 2) {
1834 		cmn_err(CE_CONT, "property 'cqh-default' does not "
1835 		    "have 2 integers\n");
1836 		goto game_over_free_data;
1837 	}
1838 	cq_schedp = &state->hs_cq_sched_default;
1839 	cq_schedp->cqs_desired = data[0];
1840 	cq_schedp->cqs_minimum = data[1];
1841 	cq_schedp->cqs_refcnt = 0;
1842 	ddi_prop_free(data);
1843 	ddi_prop_free(listp);
1844 	return (1);		/* game on */
1845 
1846 game_over_free_data:
1847 	ddi_prop_free(data);
1848 game_over:
1849 	cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n");
1850 	cmn_err(CE_CONT, "completion handler groups not being used\n");
1851 	kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t));
1852 	state->hs_cq_sched_array_size = 0;
1853 	ddi_prop_free(listp);
1854 	return (0);
1855 }
1856 
1857 /*
1858  * hermon_cq_sched_init()
1859  *    Context: Only called from attach() path context
1860  *
1861  * Read the hermon.conf properties looking for cq_sched info,
1862  * creating reserved pools of MSI-X interrupt ranges for the
1863  * specified ULPs.
1864  */
1865 int
hermon_cq_sched_init(hermon_state_t * state)1866 hermon_cq_sched_init(hermon_state_t *state)
1867 {
1868 	hermon_cq_sched_t *cq_schedp, *defp;
1869 	int i, desired, array_size;
1870 
1871 	mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER,
1872 	    DDI_INTR_PRI(state->hs_intrmsi_pri));
1873 
1874 	mutex_enter(&state->hs_cq_sched_lock);
1875 	state->hs_cq_sched_array = NULL;
1876 
1877 	/* initialize cq_sched_default */
1878 	defp = &state->hs_cq_sched_default;
1879 	defp->cqs_start_hid = 1;
1880 	defp->cqs_len = state->hs_intrmsi_allocd;
1881 	defp->cqs_next_alloc = defp->cqs_len - 1;
1882 	(void) strncpy(defp->cqs_name, "default", 8);
1883 
1884 	/* Read properties to determine which ULPs use cq_sched */
1885 	if (hermon_get_cq_sched_list(state) == 0)
1886 		goto done;
1887 
1888 	/* Determine if we have enough vectors, or if we have to scale down */
1889 	desired = defp->cqs_desired;	/* default desired (from hermon.conf) */
1890 	if (desired <= 0)
1891 		goto done;		/* all interrupts in the default pool */
1892 	cq_schedp = state->hs_cq_sched_array;
1893 	array_size = state->hs_cq_sched_array_size;
1894 	for (i = 0; i < array_size; i++)
1895 		desired += cq_schedp[i].cqs_desired;
1896 	if (desired > state->hs_intrmsi_allocd) {
1897 		cmn_err(CE_CONT, "#interrupts allocated (%d) is less than "
1898 		    "the #interrupts desired (%d)\n",
1899 		    state->hs_intrmsi_allocd, desired);
1900 		cmn_err(CE_CONT, "completion handler groups not being used\n");
1901 		goto done;		/* all interrupts in the default pool */
1902 	}
1903 	/* Game on.  For each cq_sched group, reserve the MSI-X range */
1904 	for (i = 0; i < array_size; i++) {
1905 		desired = cq_schedp[i].cqs_desired;
1906 		cq_schedp[i].cqs_start_hid = defp->cqs_start_hid;
1907 		cq_schedp[i].cqs_len = desired;
1908 		cq_schedp[i].cqs_next_alloc = desired - 1;
1909 		defp->cqs_len -= desired;
1910 		defp->cqs_start_hid += desired;
1911 	}
1912 	/* reset default's start allocation seed */
1913 	state->hs_cq_sched_default.cqs_next_alloc =
1914 	    state->hs_cq_sched_default.cqs_len - 1;
1915 
1916 done:
1917 	mutex_exit(&state->hs_cq_sched_lock);
1918 	return (IBT_SUCCESS);
1919 }
1920 
1921 void
hermon_cq_sched_fini(hermon_state_t * state)1922 hermon_cq_sched_fini(hermon_state_t *state)
1923 {
1924 	mutex_enter(&state->hs_cq_sched_lock);
1925 	if (state->hs_cq_sched_array_size) {
1926 		kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) *
1927 		    state->hs_cq_sched_array_size);
1928 		state->hs_cq_sched_array_size = 0;
1929 		state->hs_cq_sched_array = NULL;
1930 	}
1931 	mutex_exit(&state->hs_cq_sched_lock);
1932 	mutex_destroy(&state->hs_cq_sched_lock);
1933 }
1934 
1935 int
hermon_cq_sched_alloc(hermon_state_t * state,ibt_cq_sched_attr_t * attr,hermon_cq_sched_t ** cq_sched_pp)1936 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr,
1937     hermon_cq_sched_t **cq_sched_pp)
1938 {
1939 	hermon_cq_sched_t	*cq_schedp;
1940 	int			i;
1941 	char			*name;
1942 	ibt_cq_sched_flags_t	flags;
1943 
1944 	flags = attr->cqs_flags;
1945 	if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) {
1946 		*cq_sched_pp = NULL;
1947 		return (IBT_SUCCESS);
1948 	}
1949 	name = attr->cqs_pool_name;
1950 
1951 	mutex_enter(&state->hs_cq_sched_lock);
1952 	cq_schedp = state->hs_cq_sched_array;
1953 	for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) {
1954 		if (strcmp(name, cq_schedp->cqs_name) == 0) {
1955 			if (cq_schedp->cqs_len != 0)
1956 				cq_schedp->cqs_refcnt++;
1957 			break;	/* found it */
1958 		}
1959 	}
1960 	if ((i == state->hs_cq_sched_array_size) ||	/* not found, or */
1961 	    (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */
1962 		cq_schedp = NULL;
1963 	mutex_exit(&state->hs_cq_sched_lock);
1964 
1965 	*cq_sched_pp = cq_schedp;	/* set to valid hdl, or to NULL */
1966 	if ((cq_schedp == NULL) &&
1967 	    (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP))
1968 		return (IBT_CQ_NO_SCHED_GROUP);
1969 	else
1970 		return (IBT_SUCCESS);
1971 }
1972 
1973 int
hermon_cq_sched_free(hermon_state_t * state,hermon_cq_sched_t * cq_schedp)1974 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp)
1975 {
1976 	if (cq_schedp != NULL) {
1977 		/* Just decrement refcnt */
1978 		mutex_enter(&state->hs_cq_sched_lock);
1979 		if (cq_schedp->cqs_refcnt == 0)
1980 			HERMON_WARNING(state, "cq_sched free underflow\n");
1981 		else
1982 			cq_schedp->cqs_refcnt--;
1983 		mutex_exit(&state->hs_cq_sched_lock);
1984 	}
1985 	return (IBT_SUCCESS);
1986 }
1987