1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_cq.c
28 * Hermon Completion Queue Processing Routines
29 *
30 * Implements all the routines necessary for allocating, freeing, resizing,
31 * and handling the completion type events that the Hermon hardware can
32 * generate.
33 */
34
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/sysmacros.h>
42
43 #include <sys/ib/adapters/hermon/hermon.h>
44
45 int hermon_should_panic = 0; /* debugging aid */
46
47 #define hermon_cq_update_ci_doorbell(cq) \
48 /* Build the doorbell record data (low 24 bits only) */ \
49 HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr, \
50 cq->cq_consindx & 0x00FFFFFF)
51
52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq,
53 uint_t cmd);
54 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr);
55 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
56 hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
57 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
58 hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
59
60
61 /*
62 * hermon_cq_alloc()
63 * Context: Can be called only from user or kernel context.
64 */
65 int
hermon_cq_alloc(hermon_state_t * state,ibt_cq_hdl_t ibt_cqhdl,ibt_cq_attr_t * cq_attr,uint_t * actual_size,hermon_cqhdl_t * cqhdl,uint_t sleepflag)66 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
67 ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl,
68 uint_t sleepflag)
69 {
70 hermon_rsrc_t *cqc, *rsrc;
71 hermon_umap_db_entry_t *umapdb;
72 hermon_hw_cqc_t cqc_entry;
73 hermon_cqhdl_t cq;
74 ibt_mr_attr_t mr_attr;
75 hermon_mr_options_t op;
76 hermon_pdhdl_t pd;
77 hermon_mrhdl_t mr;
78 hermon_hw_cqe_t *buf;
79 uint64_t value;
80 uint32_t log_cq_size, uarpg;
81 uint_t cq_is_umap;
82 uint32_t status, flag;
83 hermon_cq_sched_t *cq_schedp;
84
85 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr))
86
87 /*
88 * Determine whether CQ is being allocated for userland access or
89 * whether it is being allocated for kernel access. If the CQ is
90 * being allocated for userland access, then lookup the UAR
91 * page number for the current process. Note: If this is not found
92 * (e.g. if the process has not previously open()'d the Hermon driver),
93 * then an error is returned.
94 */
95 cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
96 if (cq_is_umap) {
97 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
98 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
99 if (status != DDI_SUCCESS) {
100 status = IBT_INVALID_PARAM;
101 goto cqalloc_fail;
102 }
103 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
104 } else {
105 uarpg = state->hs_kernel_uar_index;
106 }
107
108 /* Use the internal protection domain (PD) for setting up CQs */
109 pd = state->hs_pdhdl_internal;
110
111 /* Increment the reference count on the protection domain (PD) */
112 hermon_pd_refcnt_inc(pd);
113
114 /*
115 * Allocate an CQ context entry. This will be filled in with all
116 * the necessary parameters to define the Completion Queue. And then
117 * ownership will be passed to the hardware in the final step
118 * below. If we fail here, we must undo the protection domain
119 * reference count.
120 */
121 status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc);
122 if (status != DDI_SUCCESS) {
123 status = IBT_INSUFF_RESOURCE;
124 goto cqalloc_fail1;
125 }
126
127 /*
128 * Allocate the software structure for tracking the completion queue
129 * (i.e. the Hermon Completion Queue handle). If we fail here, we must
130 * undo the protection domain reference count and the previous
131 * resource allocation.
132 */
133 status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc);
134 if (status != DDI_SUCCESS) {
135 status = IBT_INSUFF_RESOURCE;
136 goto cqalloc_fail2;
137 }
138 cq = (hermon_cqhdl_t)rsrc->hr_addr;
139 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
140 cq->cq_is_umap = cq_is_umap;
141 cq->cq_cqnum = cqc->hr_indx; /* just use index, implicit in Hermon */
142 cq->cq_intmod_count = 0;
143 cq->cq_intmod_usec = 0;
144
145 /*
146 * If this will be a user-mappable CQ, then allocate an entry for
147 * the "userland resources database". This will later be added to
148 * the database (after all further CQ operations are successful).
149 * If we fail here, we must undo the reference counts and the
150 * previous resource allocation.
151 */
152 if (cq->cq_is_umap) {
153 umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum,
154 MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
155 if (umapdb == NULL) {
156 status = IBT_INSUFF_RESOURCE;
157 goto cqalloc_fail3;
158 }
159 }
160
161
162 /*
163 * Allocate the doorbell record. We'll need one for the CQ, handling
164 * both consumer index (SET CI) and the CQ state (CQ ARM).
165 */
166
167 status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl,
168 &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset);
169 if (status != DDI_SUCCESS) {
170 status = IBT_INSUFF_RESOURCE;
171 goto cqalloc_fail4;
172 }
173
174 /*
175 * Calculate the appropriate size for the completion queue.
176 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also
177 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is
178 * to round the requested size up to the next highest power-of-2
179 */
180 cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE);
181 log_cq_size = highbit(cq_attr->cq_size);
182
183 /*
184 * Next we verify that the rounded-up size is valid (i.e. consistent
185 * with the device limits and/or software-configured limits)
186 */
187 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
188 status = IBT_HCA_CQ_EXCEEDED;
189 goto cqalloc_fail4a;
190 }
191
192 /*
193 * Allocate the memory for Completion Queue.
194 *
195 * Note: Although we use the common queue allocation routine, we
196 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
197 * kernel system memory) for kernel CQs because it would be
198 * inefficient to have CQs located in DDR memory. This is primarily
199 * because CQs are read from (by software) more than they are written
200 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
201 * user-mappable CQs for a similar reason.)
202 * It is also worth noting that, unlike Hermon QP work queues,
203 * completion queues do not have the same strict alignment
204 * requirements. It is sufficient for the CQ memory to be both
205 * aligned to and bound to addresses which are a multiple of CQE size.
206 */
207 cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
208
209 cq->cq_cqinfo.qa_alloc_align = PAGESIZE;
210 cq->cq_cqinfo.qa_bind_align = PAGESIZE;
211 if (cq->cq_is_umap) {
212 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
213 } else {
214 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
215 hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr);
216 }
217 status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
218 if (status != DDI_SUCCESS) {
219 status = IBT_INSUFF_RESOURCE;
220 goto cqalloc_fail4;
221 }
222 buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
223 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
224
225 /*
226 * The ownership bit of the CQE's is set by the HW during the process
227 * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1
228 *
229 */
230
231 /*
232 * Register the memory for the CQ. The memory for the CQ must
233 * be registered in the Hermon TPT tables. This gives us the LKey
234 * to specify in the CQ context below. Note: If this is a user-
235 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
236 */
237 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
238 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
239 mr_attr.mr_len = cq->cq_cqinfo.qa_size;
240 mr_attr.mr_as = NULL;
241 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
242 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
243 op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
244 op.mro_bind_override_addr = 0;
245 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
246 HERMON_CQ_CMPT);
247 if (status != DDI_SUCCESS) {
248 status = IBT_INSUFF_RESOURCE;
249 goto cqalloc_fail5;
250 }
251 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
252
253 cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state);
254 if (cq_attr->cq_flags & IBT_CQ_HID) {
255 if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) {
256 IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x",
257 cq_attr->cq_hid);
258 status = IBT_INVALID_PARAM;
259 goto cqalloc_fail5;
260 }
261 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid);
262 IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum);
263 } else {
264 cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched;
265 if (cq_schedp == NULL) {
266 cq_schedp = &state->hs_cq_sched_default;
267 } else if (cq_schedp != &state->hs_cq_sched_default) {
268 int i;
269 hermon_cq_sched_t *tmp;
270
271 tmp = state->hs_cq_sched_array;
272 for (i = 0; i < state->hs_cq_sched_array_size; i++)
273 if (cq_schedp == &tmp[i])
274 break; /* found it */
275 if (i >= state->hs_cq_sched_array_size) {
276 cmn_err(CE_CONT, "!Invalid cq_sched argument: "
277 "ignored\n");
278 cq_schedp = &state->hs_cq_sched_default;
279 }
280 }
281 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state,
282 HERMON_CQSCHED_NEXT_HID(cq_schedp));
283 IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, "
284 "eqn %d", cq_schedp->cqs_start_hid - 1,
285 cq_schedp->cqs_len, cq->cq_eqnum);
286 }
287
288 /*
289 * Fill in the CQC entry. This is the final step before passing
290 * ownership of the CQC entry to the Hermon hardware. We use all of
291 * the information collected/calculated above to fill in the
292 * requisite portions of the CQC. Note: If this CQ is going to be
293 * used for userland access, then we need to set the UAR page number
294 * appropriately (otherwise it's a "don't care")
295 */
296 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
297
298 cqc_entry.state = HERMON_CQ_DISARMED;
299 cqc_entry.pg_offs = cq->cq_cqinfo.qa_pgoffs >> 5;
300 cqc_entry.log_cq_sz = log_cq_size;
301 cqc_entry.usr_page = uarpg;
302 cqc_entry.c_eqn = cq->cq_eqnum;
303 cqc_entry.log2_pgsz = mr->mr_log2_pgsz;
304 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
305 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
306 cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32);
307 cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3);
308
309 /*
310 * Write the CQC entry to hardware - we pass ownership of
311 * the entry to the hardware (using the Hermon SW2HW_CQ firmware
312 * command). Note: In general, this operation shouldn't fail. But
313 * if it does, we have to undo everything we've done above before
314 * returning error.
315 */
316 status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
317 sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag);
318 if (status != HERMON_CMD_SUCCESS) {
319 cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n",
320 status);
321 if (status == HERMON_CMD_INVALID_STATUS) {
322 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
323 }
324 status = ibc_get_ci_failure(0);
325 goto cqalloc_fail6;
326 }
327
328 /*
329 * Fill in the rest of the Hermon Completion Queue handle. Having
330 * successfully transferred ownership of the CQC, we can update the
331 * following fields for use in further operations on the CQ.
332 */
333 cq->cq_resize_hdl = 0;
334 cq->cq_cqcrsrcp = cqc;
335 cq->cq_rsrcp = rsrc;
336 cq->cq_consindx = 0;
337 /* least restrictive */
338 cq->cq_buf = buf;
339 cq->cq_bufsz = (1 << log_cq_size);
340 cq->cq_log_cqsz = log_cq_size;
341 cq->cq_mrhdl = mr;
342 cq->cq_refcnt = 0;
343 cq->cq_is_special = 0;
344 cq->cq_uarpg = uarpg;
345 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
346 avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare,
347 sizeof (struct hermon_workq_avl_s),
348 offsetof(struct hermon_workq_avl_s, wqa_link));
349
350 cq->cq_hdlrarg = (void *)ibt_cqhdl;
351
352 /*
353 * Put CQ handle in Hermon CQNum-to-CQHdl list. Then fill in the
354 * "actual_size" and "cqhdl" and return success
355 */
356 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq);
357
358 /*
359 * If this is a user-mappable CQ, then we need to insert the previously
360 * allocated entry into the "userland resources database". This will
361 * allow for later lookup during devmap() (i.e. mmap()) calls.
362 */
363 if (cq->cq_is_umap) {
364 hermon_umap_db_add(umapdb);
365 }
366
367 /*
368 * Fill in the return arguments (if necessary). This includes the
369 * real completion queue size.
370 */
371 if (actual_size != NULL) {
372 *actual_size = (1 << log_cq_size) - 1;
373 }
374 *cqhdl = cq;
375
376 return (DDI_SUCCESS);
377
378 /*
379 * The following is cleanup for all possible failure cases in this routine
380 */
381 cqalloc_fail6:
382 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
383 sleepflag) != DDI_SUCCESS) {
384 HERMON_WARNING(state, "failed to deregister CQ memory");
385 }
386 cqalloc_fail5:
387 hermon_queue_free(&cq->cq_cqinfo);
388 cqalloc_fail4a:
389 hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr);
390 cqalloc_fail4:
391 if (cq_is_umap) {
392 hermon_umap_db_free(umapdb);
393 }
394 cqalloc_fail3:
395 hermon_rsrc_free(state, &rsrc);
396 cqalloc_fail2:
397 hermon_rsrc_free(state, &cqc);
398 cqalloc_fail1:
399 hermon_pd_refcnt_dec(pd);
400 cqalloc_fail:
401 return (status);
402 }
403
404
405 /*
406 * hermon_cq_free()
407 * Context: Can be called only from user or kernel context.
408 */
409 /* ARGSUSED */
410 int
hermon_cq_free(hermon_state_t * state,hermon_cqhdl_t * cqhdl,uint_t sleepflag)411 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag)
412 {
413 hermon_rsrc_t *cqc, *rsrc;
414 hermon_umap_db_entry_t *umapdb;
415 hermon_hw_cqc_t cqc_entry;
416 hermon_pdhdl_t pd;
417 hermon_mrhdl_t mr;
418 hermon_cqhdl_t cq, resize;
419 uint32_t cqnum;
420 uint64_t value;
421 uint_t maxprot;
422 int status;
423
424 /*
425 * Pull all the necessary information from the Hermon Completion Queue
426 * handle. This is necessary here because the resource for the
427 * CQ handle is going to be freed up as part of this operation.
428 */
429 cq = *cqhdl;
430 mutex_enter(&cq->cq_lock);
431 cqc = cq->cq_cqcrsrcp;
432 rsrc = cq->cq_rsrcp;
433 pd = state->hs_pdhdl_internal;
434 mr = cq->cq_mrhdl;
435 cqnum = cq->cq_cqnum;
436
437 resize = cq->cq_resize_hdl; /* save the handle for later */
438
439 /*
440 * If there are work queues still associated with the CQ, then return
441 * an error. Otherwise, we will be holding the CQ lock.
442 */
443 if (cq->cq_refcnt != 0) {
444 mutex_exit(&cq->cq_lock);
445 return (IBT_CQ_BUSY);
446 }
447
448 /*
449 * If this was a user-mappable CQ, then we need to remove its entry
450 * from the "userland resources database". If it is also currently
451 * mmap()'d out to a user process, then we need to call
452 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
453 * We also need to invalidate the CQ tracking information for the
454 * user mapping.
455 */
456 if (cq->cq_is_umap) {
457 status = hermon_umap_db_find(state->hs_instance, cqnum,
458 MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
459 &umapdb);
460 if (status != DDI_SUCCESS) {
461 mutex_exit(&cq->cq_lock);
462 HERMON_WARNING(state, "failed to find in database");
463 return (ibc_get_ci_failure(0));
464 }
465 hermon_umap_db_free(umapdb);
466 if (cq->cq_umap_dhp != NULL) {
467 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
468 status = devmap_devmem_remap(cq->cq_umap_dhp,
469 state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size,
470 maxprot, DEVMAP_MAPPING_INVALID, NULL);
471 if (status != DDI_SUCCESS) {
472 mutex_exit(&cq->cq_lock);
473 HERMON_WARNING(state, "failed in CQ memory "
474 "devmap_devmem_remap()");
475 return (ibc_get_ci_failure(0));
476 }
477 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
478 }
479 }
480
481 /*
482 * Put NULL into the Arbel CQNum-to-CQHdl list. This will allow any
483 * in-progress events to detect that the CQ corresponding to this
484 * number has been freed.
485 */
486 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL);
487
488 mutex_exit(&cq->cq_lock);
489 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
490
491 /*
492 * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ
493 * firmware command). If the ownership transfer fails for any reason,
494 * then it is an indication that something (either in HW or SW) has
495 * gone seriously wrong.
496 */
497 status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
498 sizeof (hermon_hw_cqc_t), cqnum, sleepflag);
499 if (status != HERMON_CMD_SUCCESS) {
500 HERMON_WARNING(state, "failed to reclaim CQC ownership");
501 cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n",
502 status);
503 if (status == HERMON_CMD_INVALID_STATUS) {
504 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
505 }
506 return (ibc_get_ci_failure(0));
507 }
508
509 /*
510 * From here on, we start reliquishing resources - but check to see
511 * if a resize was in progress - if so, we need to relinquish those
512 * resources as well
513 */
514
515
516 /*
517 * Deregister the memory for the Completion Queue. If this fails
518 * for any reason, then it is an indication that something (either
519 * in HW or SW) has gone seriously wrong. So we print a warning
520 * message and return.
521 */
522 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
523 sleepflag);
524 if (status != DDI_SUCCESS) {
525 HERMON_WARNING(state, "failed to deregister CQ memory");
526 return (ibc_get_ci_failure(0));
527 }
528
529 if (resize) { /* there was a pointer to a handle */
530 mr = resize->cq_mrhdl; /* reuse the pointer to the region */
531 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
532 sleepflag);
533 if (status != DDI_SUCCESS) {
534 HERMON_WARNING(state, "failed to deregister resize CQ "
535 "memory");
536 return (ibc_get_ci_failure(0));
537 }
538 }
539
540 /* Free the memory for the CQ */
541 hermon_queue_free(&cq->cq_cqinfo);
542 if (resize) {
543 hermon_queue_free(&resize->cq_cqinfo);
544 /* and the temporary handle */
545 kmem_free(resize, sizeof (struct hermon_sw_cq_s));
546 }
547
548 /* everything else does not matter for the resize in progress */
549
550 /* Free the dbr */
551 hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr);
552
553 /* Free the Hermon Completion Queue handle */
554 hermon_rsrc_free(state, &rsrc);
555
556 /* Free up the CQC entry resource */
557 hermon_rsrc_free(state, &cqc);
558
559 /* Decrement the reference count on the protection domain (PD) */
560 hermon_pd_refcnt_dec(pd);
561
562 /* Set the cqhdl pointer to NULL and return success */
563 *cqhdl = NULL;
564
565 return (DDI_SUCCESS);
566 }
567
568
569 /*
570 * hermon_cq_resize()
571 * Context: Can be called only from user or kernel context.
572 */
573 int
hermon_cq_resize(hermon_state_t * state,hermon_cqhdl_t cq,uint_t req_size,uint_t * actual_size,uint_t sleepflag)574 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size,
575 uint_t *actual_size, uint_t sleepflag)
576 {
577 hermon_hw_cqc_t cqc_entry;
578 hermon_cqhdl_t resize_hdl;
579 hermon_qalloc_info_t new_cqinfo;
580 ibt_mr_attr_t mr_attr;
581 hermon_mr_options_t op;
582 hermon_pdhdl_t pd;
583 hermon_mrhdl_t mr;
584 hermon_hw_cqe_t *buf;
585 uint32_t new_prod_indx;
586 uint_t log_cq_size;
587 int status, flag;
588
589 if (cq->cq_resize_hdl != 0) { /* already in process */
590 status = IBT_CQ_BUSY;
591 goto cqresize_fail;
592 }
593
594
595 /* Use the internal protection domain (PD) for CQs */
596 pd = state->hs_pdhdl_internal;
597
598 /*
599 * Calculate the appropriate size for the new resized completion queue.
600 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also
601 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is
602 * to round the requested size up to the next highest power-of-2
603 */
604 req_size = max(req_size, HERMON_CQ_MIN_SIZE);
605 log_cq_size = highbit(req_size);
606
607 /*
608 * Next we verify that the rounded-up size is valid (i.e. consistent
609 * with the device limits and/or software-configured limits)
610 */
611 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
612 status = IBT_HCA_CQ_EXCEEDED;
613 goto cqresize_fail;
614 }
615
616 /*
617 * Allocate the memory for newly resized Completion Queue.
618 *
619 * Note: Although we use the common queue allocation routine, we
620 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
621 * kernel system memory) for kernel CQs because it would be
622 * inefficient to have CQs located in DDR memory. This is the same
623 * as we do when we first allocate completion queues primarily
624 * because CQs are read from (by software) more than they are written
625 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
626 * user-mappable CQs for a similar reason.)
627 * It is also worth noting that, unlike Hermon QP work queues,
628 * completion queues do not have the same strict alignment
629 * requirements. It is sufficient for the CQ memory to be both
630 * aligned to and bound to addresses which are a multiple of CQE size.
631 */
632
633 /* first, alloc the resize_handle */
634 resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP);
635
636 new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
637 new_cqinfo.qa_alloc_align = PAGESIZE;
638 new_cqinfo.qa_bind_align = PAGESIZE;
639 if (cq->cq_is_umap) {
640 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
641 } else {
642 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
643 }
644 status = hermon_queue_alloc(state, &new_cqinfo, sleepflag);
645 if (status != DDI_SUCCESS) {
646 /* free the resize handle */
647 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
648 status = IBT_INSUFF_RESOURCE;
649 goto cqresize_fail;
650 }
651 buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
652 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
653
654 /*
655 * No initialization of the cq is needed - the command will do it
656 */
657
658 /*
659 * Register the memory for the CQ. The memory for the CQ must
660 * be registered in the Hermon TPT tables. This gives us the LKey
661 * to specify in the CQ context below.
662 */
663 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
664 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
665 mr_attr.mr_len = new_cqinfo.qa_size;
666 mr_attr.mr_as = NULL;
667 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
668 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
669 op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
670 op.mro_bind_override_addr = 0;
671 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
672 HERMON_CQ_CMPT);
673 if (status != DDI_SUCCESS) {
674 hermon_queue_free(&new_cqinfo);
675 /* free the resize handle */
676 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
677 status = IBT_INSUFF_RESOURCE;
678 goto cqresize_fail;
679 }
680 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
681
682 /*
683 * Now we grab the CQ lock. Since we will be updating the actual
684 * CQ location and the producer/consumer indexes, we should hold
685 * the lock.
686 *
687 * We do a ARBEL_NOSLEEP here (and below), though, because we are
688 * holding the "cq_lock" and if we got raised to interrupt level
689 * by priority inversion, we would not want to block in this routine
690 * waiting for success.
691 */
692 mutex_enter(&cq->cq_lock);
693
694 /*
695 * Fill in the CQC entry. For the resize operation this is the
696 * final step before attempting the resize operation on the CQC entry.
697 * We use all of the information collected/calculated above to fill
698 * in the requisite portions of the CQC.
699 */
700 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
701 cqc_entry.log_cq_sz = log_cq_size;
702 cqc_entry.pg_offs = new_cqinfo.qa_pgoffs >> 5;
703 cqc_entry.log2_pgsz = mr->mr_log2_pgsz;
704 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
705 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
706
707 /*
708 * Write the CQC entry to hardware. Lastly, we pass ownership of
709 * the entry to the hardware (using the Hermon RESIZE_CQ firmware
710 * command). Note: In general, this operation shouldn't fail. But
711 * if it does, we have to undo everything we've done above before
712 * returning error. Also note that the status returned may indicate
713 * the code to return to the IBTF.
714 */
715 status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
716 &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN);
717 if (status != HERMON_CMD_SUCCESS) {
718 /* Resize attempt has failed, drop CQ lock and cleanup */
719 mutex_exit(&cq->cq_lock);
720 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
721 sleepflag) != DDI_SUCCESS) {
722 HERMON_WARNING(state, "failed to deregister CQ memory");
723 }
724 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
725 hermon_queue_free(&new_cqinfo);
726 if (status == HERMON_CMD_BAD_SIZE) {
727 return (IBT_CQ_SZ_INSUFFICIENT);
728 } else {
729 cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: "
730 "%08x\n", status);
731 if (status == HERMON_CMD_INVALID_STATUS) {
732 hermon_fm_ereport(state, HCA_SYS_ERR,
733 HCA_ERR_SRV_LOST);
734 }
735 return (ibc_get_ci_failure(0));
736 }
737 }
738
739 /*
740 * For Hermon, we've alloc'd another handle structure and save off the
741 * important things in it. Then, in polling we check to see if there's
742 * a "resizing handle" and if so we look for the "special CQE", opcode
743 * 0x16, that indicates the transition to the new buffer.
744 *
745 * At that point, we'll adjust everything - including dereg and
746 * freeing of the original buffer, updating all the necessary fields
747 * in the cq_hdl, and setting up for the next cqe polling
748 */
749
750 resize_hdl->cq_buf = buf;
751 resize_hdl->cq_bufsz = (1 << log_cq_size);
752 resize_hdl->cq_mrhdl = mr;
753 resize_hdl->cq_log_cqsz = log_cq_size;
754
755 bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo),
756 sizeof (struct hermon_qalloc_info_s));
757
758 /* now, save the address in the cq_handle */
759 cq->cq_resize_hdl = resize_hdl;
760
761 /*
762 * Drop the CQ lock now.
763 */
764
765 mutex_exit(&cq->cq_lock);
766 /*
767 * Fill in the return arguments (if necessary). This includes the
768 * real new completion queue size.
769 */
770 if (actual_size != NULL) {
771 *actual_size = (1 << log_cq_size) - 1;
772 }
773
774 return (DDI_SUCCESS);
775
776 cqresize_fail:
777 return (status);
778 }
779
780
781 /*
782 * hermon_cq_modify()
783 * Context: Can be called base context.
784 */
785 /* ARGSUSED */
786 int
hermon_cq_modify(hermon_state_t * state,hermon_cqhdl_t cq,uint_t count,uint_t usec,ibt_cq_handler_id_t hid,uint_t sleepflag)787 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq,
788 uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag)
789 {
790 int status;
791 hermon_hw_cqc_t cqc_entry;
792
793 mutex_enter(&cq->cq_lock);
794 if (count != cq->cq_intmod_count ||
795 usec != cq->cq_intmod_usec) {
796 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
797 cqc_entry.cq_max_cnt = count;
798 cqc_entry.cq_period = usec;
799 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
800 cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag);
801 if (status != HERMON_CMD_SUCCESS) {
802 mutex_exit(&cq->cq_lock);
803 cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ "
804 "command failed: %08x\n", status);
805 if (status == HERMON_CMD_INVALID_STATUS) {
806 hermon_fm_ereport(state, HCA_SYS_ERR,
807 HCA_ERR_SRV_LOST);
808 }
809 return (ibc_get_ci_failure(0));
810 }
811 cq->cq_intmod_count = count;
812 cq->cq_intmod_usec = usec;
813 }
814 if (hid && (hid - 1 != cq->cq_eqnum)) {
815 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
816 cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid);
817 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
818 cq->cq_cqnum, MODIFY_EQN, sleepflag);
819 if (status != HERMON_CMD_SUCCESS) {
820 mutex_exit(&cq->cq_lock);
821 cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: "
822 "%08x\n", status);
823 if (status == HERMON_CMD_INVALID_STATUS) {
824 hermon_fm_ereport(state, HCA_SYS_ERR,
825 HCA_ERR_SRV_LOST);
826 }
827 return (ibc_get_ci_failure(0));
828 }
829 cq->cq_eqnum = hid - 1;
830 }
831 mutex_exit(&cq->cq_lock);
832 return (DDI_SUCCESS);
833 }
834
835 /*
836 * hermon_cq_notify()
837 * Context: Can be called from interrupt or base context.
838 */
839 int
hermon_cq_notify(hermon_state_t * state,hermon_cqhdl_t cq,ibt_cq_notify_flags_t flags)840 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq,
841 ibt_cq_notify_flags_t flags)
842 {
843 uint_t cmd;
844 ibt_status_t status;
845
846 /* Validate IBT flags and call doorbell routine. */
847 if (flags == IBT_NEXT_COMPLETION) {
848 cmd = HERMON_CQDB_NOTIFY_CQ;
849 } else if (flags == IBT_NEXT_SOLICITED) {
850 cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT;
851 } else {
852 return (IBT_CQ_NOTIFY_TYPE_INVALID);
853 }
854
855 status = hermon_cq_arm_doorbell(state, cq, cmd);
856 return (status);
857 }
858
859
860 /*
861 * hermon_cq_poll()
862 * Context: Can be called from interrupt or base context.
863 */
864 int
hermon_cq_poll(hermon_state_t * state,hermon_cqhdl_t cq,ibt_wc_t * wc_p,uint_t num_wc,uint_t * num_polled)865 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p,
866 uint_t num_wc, uint_t *num_polled)
867 {
868 hermon_hw_cqe_t *cqe;
869 uint_t opcode;
870 uint32_t cons_indx, wrap_around_mask, shift, mask;
871 uint32_t polled_cnt, spec_op = 0;
872 int status;
873
874 /*
875 * Check for user-mappable CQ memory. Note: We do not allow kernel
876 * clients to poll CQ memory that is accessible directly by the user.
877 * If the CQ memory is user accessible, then return an error.
878 */
879 if (cq->cq_is_umap) {
880 return (IBT_CQ_HDL_INVALID);
881 }
882
883 mutex_enter(&cq->cq_lock);
884
885 /* Get the consumer index */
886 cons_indx = cq->cq_consindx;
887 shift = cq->cq_log_cqsz;
888 mask = cq->cq_bufsz;
889
890 /*
891 * Calculate the wrap around mask. Note: This operation only works
892 * because all Hermon completion queues have power-of-2 sizes
893 */
894 wrap_around_mask = (cq->cq_bufsz - 1);
895
896 /* Calculate the pointer to the first CQ entry */
897 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
898
899 /*
900 * Keep pulling entries from the CQ until we find an entry owned by
901 * the hardware. As long as there the CQE's owned by SW, process
902 * each entry by calling hermon_cq_cqe_consume() and updating the CQ
903 * consumer index. Note: We only update the consumer index if
904 * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB. Otherwise,
905 * it indicates that we are going to "recycle" the CQE (probably
906 * because it is a error CQE and corresponds to more than one
907 * completion).
908 */
909 polled_cnt = 0;
910 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) {
911 if (cq->cq_resize_hdl != 0) { /* in midst of resize */
912 /* peek at the opcode */
913 opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
914 if (opcode == HERMON_CQE_RCV_RESIZE_CODE) {
915 hermon_cq_resize_helper(state, cq);
916
917 /* Increment the consumer index */
918 cons_indx = (cons_indx + 1);
919 spec_op = 1; /* plus one for the limiting CQE */
920
921 wrap_around_mask = (cq->cq_bufsz - 1);
922
923 /* Update the pointer to the next CQ entry */
924 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
925
926 continue;
927 }
928 } /* in resizing CQ */
929
930 /*
931 * either resizing and not the special opcode, or
932 * not resizing at all
933 */
934 hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]);
935
936 /* Increment the consumer index */
937 cons_indx = (cons_indx + 1);
938
939 /* Update the pointer to the next CQ entry */
940 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
941
942 /*
943 * If we have run out of space to store work completions,
944 * then stop and return the ones we have pulled of the CQ.
945 */
946 if (polled_cnt >= num_wc) {
947 break;
948 }
949 }
950
951 /*
952 * Now we only ring the doorbell (to update the consumer index) if
953 * we've actually consumed a CQ entry.
954 */
955 if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
956 /*
957 * Update the consumer index in both the CQ handle and the
958 * doorbell record.
959 */
960 cq->cq_consindx = cons_indx;
961 hermon_cq_update_ci_doorbell(cq);
962
963 } else if (polled_cnt == 0) {
964 if (spec_op != 0) {
965 /* if we got the special opcode, update the consindx */
966 cq->cq_consindx = cons_indx;
967 hermon_cq_update_ci_doorbell(cq);
968 }
969 }
970
971 mutex_exit(&cq->cq_lock);
972
973 /* Set "num_polled" (if necessary) */
974 if (num_polled != NULL) {
975 *num_polled = polled_cnt;
976 }
977
978 /* Set CQ_EMPTY condition if needed, otherwise return success */
979 if (polled_cnt == 0) {
980 status = IBT_CQ_EMPTY;
981 } else {
982 status = DDI_SUCCESS;
983 }
984
985 /*
986 * Check if the system is currently panicking. If it is, then call
987 * the Hermon interrupt service routine. This step is necessary here
988 * because we might be in a polled I/O mode and without the call to
989 * hermon_isr() - and its subsequent calls to poll and rearm each
990 * event queue - we might overflow our EQs and render the system
991 * unable to sync/dump.
992 */
993 if (ddi_in_panic() != 0) {
994 (void) hermon_isr((caddr_t)state, (caddr_t)NULL);
995 }
996 return (status);
997 }
998
999 /*
1000 * cmd_sn must be initialized to 1 to enable proper reenabling
1001 * by hermon_arm_cq_dbr_update().
1002 */
1003 static void
hermon_arm_cq_dbr_init(hermon_dbr_t * cq_arm_dbr)1004 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr)
1005 {
1006 uint32_t *target;
1007
1008 target = (uint32_t *)cq_arm_dbr + 1;
1009 *target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT);
1010 }
1011
1012
1013 /*
1014 * User cmd_sn needs help from this kernel function to know
1015 * when it should be incremented (modulo 4). We do an atomic
1016 * update of the arm_cq dbr to communicate this fact. We retry
1017 * in the case that user library is racing with us. We zero
1018 * out the cmd field so that the user library can use the cmd
1019 * field to track the last command it issued (solicited verses any).
1020 */
1021 static void
hermon_arm_cq_dbr_update(hermon_dbr_t * cq_arm_dbr)1022 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr)
1023 {
1024 uint32_t tmp, cmp, new;
1025 uint32_t old_cmd_sn, new_cmd_sn;
1026 uint32_t *target;
1027 int retries = 0;
1028
1029 target = (uint32_t *)cq_arm_dbr + 1;
1030 retry:
1031 cmp = *target;
1032 tmp = htonl(cmp);
1033 old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1034 new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) &
1035 (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1036 new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn);
1037 tmp = atomic_cas_32(target, cmp, new);
1038 if (tmp != cmp) { /* cas failed, so need to retry */
1039 drv_usecwait(retries & 0xff); /* avoid race */
1040 if (++retries > 100000) {
1041 cmn_err(CE_CONT, "cas failed in hermon\n");
1042 retries = 0;
1043 }
1044 goto retry;
1045 }
1046 }
1047
1048
1049 /*
1050 * hermon_cq_handler()
1051 * Context: Only called from interrupt context
1052 */
1053 /* ARGSUSED */
1054 int
hermon_cq_handler(hermon_state_t * state,hermon_eqhdl_t eq,hermon_hw_eqe_t * eqe)1055 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1056 hermon_hw_eqe_t *eqe)
1057 {
1058 hermon_cqhdl_t cq;
1059 uint_t cqnum;
1060
1061 /* Get the CQ handle from CQ number in event descriptor */
1062 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1063 cq = hermon_cqhdl_from_cqnum(state, cqnum);
1064
1065 /*
1066 * If the CQ handle is NULL, this is probably an indication
1067 * that the CQ has been freed already. In which case, we
1068 * should not deliver this event.
1069 *
1070 * We also check that the CQ number in the handle is the
1071 * same as the CQ number in the event queue entry. This
1072 * extra check allows us to handle the case where a CQ was
1073 * freed and then allocated again in the time it took to
1074 * handle the event queue processing. By constantly incrementing
1075 * the non-constrained portion of the CQ number every time
1076 * a new CQ is allocated, we mitigate (somewhat) the chance
1077 * that a stale event could be passed to the client's CQ
1078 * handler.
1079 *
1080 * Lastly, we check if "hs_ibtfpriv" is NULL. If it is then it
1081 * means that we've have either received this event before we
1082 * finished attaching to the IBTF or we've received it while we
1083 * are in the process of detaching.
1084 */
1085 if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1086 (state->hs_ibtfpriv != NULL)) {
1087 hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr);
1088 HERMON_DO_IBTF_CQ_CALLB(state, cq);
1089 }
1090
1091 return (DDI_SUCCESS);
1092 }
1093
1094
1095 /*
1096 * hermon_cq_err_handler()
1097 * Context: Only called from interrupt context
1098 */
1099 /* ARGSUSED */
1100 int
hermon_cq_err_handler(hermon_state_t * state,hermon_eqhdl_t eq,hermon_hw_eqe_t * eqe)1101 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1102 hermon_hw_eqe_t *eqe)
1103 {
1104 hermon_cqhdl_t cq;
1105 uint_t cqnum;
1106 ibc_async_event_t event;
1107 ibt_async_code_t type;
1108
1109 HERMON_FMANOTE(state, HERMON_FMA_OVERRUN);
1110 /* Get the CQ handle from CQ number in event descriptor */
1111 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1112 cq = hermon_cqhdl_from_cqnum(state, cqnum);
1113
1114 /*
1115 * If the CQ handle is NULL, this is probably an indication
1116 * that the CQ has been freed already. In which case, we
1117 * should not deliver this event.
1118 *
1119 * We also check that the CQ number in the handle is the
1120 * same as the CQ number in the event queue entry. This
1121 * extra check allows us to handle the case where a CQ was
1122 * freed and then allocated again in the time it took to
1123 * handle the event queue processing. By constantly incrementing
1124 * the non-constrained portion of the CQ number every time
1125 * a new CQ is allocated, we mitigate (somewhat) the chance
1126 * that a stale event could be passed to the client's CQ
1127 * handler.
1128 *
1129 * And then we check if "hs_ibtfpriv" is NULL. If it is then it
1130 * means that we've have either received this event before we
1131 * finished attaching to the IBTF or we've received it while we
1132 * are in the process of detaching.
1133 */
1134 if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1135 (state->hs_ibtfpriv != NULL)) {
1136 event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1137 type = IBT_ERROR_CQ;
1138 HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event);
1139 }
1140
1141 return (DDI_SUCCESS);
1142 }
1143
1144
1145 /*
1146 * hermon_cq_refcnt_inc()
1147 * Context: Can be called from interrupt or base context.
1148 */
1149 int
hermon_cq_refcnt_inc(hermon_cqhdl_t cq,uint_t is_special)1150 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special)
1151 {
1152 /*
1153 * Increment the completion queue's reference count. Note: In order
1154 * to ensure compliance with IBA C11-15, we must ensure that a given
1155 * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1156 * This is accomplished here by keeping track of how the referenced
1157 * CQ is being used.
1158 */
1159 mutex_enter(&cq->cq_lock);
1160 if (cq->cq_refcnt == 0) {
1161 cq->cq_is_special = is_special;
1162 } else {
1163 if (cq->cq_is_special != is_special) {
1164 mutex_exit(&cq->cq_lock);
1165 return (DDI_FAILURE);
1166 }
1167 }
1168 cq->cq_refcnt++;
1169 mutex_exit(&cq->cq_lock);
1170 return (DDI_SUCCESS);
1171 }
1172
1173
1174 /*
1175 * hermon_cq_refcnt_dec()
1176 * Context: Can be called from interrupt or base context.
1177 */
1178 void
hermon_cq_refcnt_dec(hermon_cqhdl_t cq)1179 hermon_cq_refcnt_dec(hermon_cqhdl_t cq)
1180 {
1181 /* Decrement the completion queue's reference count */
1182 mutex_enter(&cq->cq_lock);
1183 cq->cq_refcnt--;
1184 mutex_exit(&cq->cq_lock);
1185 }
1186
1187
1188 /*
1189 * hermon_cq_arm_doorbell()
1190 * Context: Can be called from interrupt or base context.
1191 */
1192 static int
hermon_cq_arm_doorbell(hermon_state_t * state,hermon_cqhdl_t cq,uint_t cq_cmd)1193 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd)
1194 {
1195 uint32_t cq_num;
1196 uint32_t *target;
1197 uint32_t old_cmd, cmp, new, tmp, cmd_sn;
1198 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1199
1200 /* initialize the FMA retry loop */
1201 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
1202
1203 cq_num = cq->cq_cqnum;
1204 target = (uint32_t *)cq->cq_arm_ci_vdbr + 1;
1205
1206 /* the FMA retry loop starts for Hermon doorbell register. */
1207 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1208 fm_test_num);
1209 retry:
1210 cmp = *target;
1211 tmp = htonl(cmp);
1212 old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT);
1213 cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1214 if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) {
1215 if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1216 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ <<
1217 HERMON_CQDB_CMD_SHIFT);
1218 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1219 tmp = atomic_cas_32(target, cmp, new);
1220 if (tmp != cmp)
1221 goto retry;
1222 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1223 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1224 32) | (cq->cq_consindx & 0xFFFFFF));
1225 } /* else it's already armed */
1226 } else {
1227 ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT);
1228 if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1229 old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1230 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT <<
1231 HERMON_CQDB_CMD_SHIFT);
1232 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1233 tmp = atomic_cas_32(target, cmp, new);
1234 if (tmp != cmp)
1235 goto retry;
1236 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1237 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1238 32) | (cq->cq_consindx & 0xFFFFFF));
1239 } /* else it's already armed */
1240 }
1241
1242 /* the FMA retry loop ends. */
1243 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1244 fm_test_num);
1245
1246 return (IBT_SUCCESS);
1247
1248 pio_error:
1249 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1250 return (ibc_get_ci_failure(0));
1251 }
1252
1253
1254 /*
1255 * hermon_cqhdl_from_cqnum()
1256 * Context: Can be called from interrupt or base context.
1257 *
1258 * This routine is important because changing the unconstrained
1259 * portion of the CQ number is critical to the detection of a
1260 * potential race condition in the CQ handler code (i.e. the case
1261 * where a CQ is freed and alloc'd again before an event for the
1262 * "old" CQ can be handled).
1263 *
1264 * While this is not a perfect solution (not sure that one exists)
1265 * it does help to mitigate the chance that this race condition will
1266 * cause us to deliver a "stale" event to the new CQ owner. Note:
1267 * this solution does not scale well because the number of constrained
1268 * bits increases (and, hence, the number of unconstrained bits
1269 * decreases) as the number of supported CQs grows. For small and
1270 * intermediate values, it should hopefully provide sufficient
1271 * protection.
1272 */
1273 hermon_cqhdl_t
hermon_cqhdl_from_cqnum(hermon_state_t * state,uint_t cqnum)1274 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum)
1275 {
1276 uint_t cqindx, cqmask;
1277
1278 /* Calculate the CQ table index from the cqnum */
1279 cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1;
1280 cqindx = cqnum & cqmask;
1281 return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx));
1282 }
1283
1284 /*
1285 * hermon_cq_cqe_consume()
1286 * Context: Can be called from interrupt or base context.
1287 */
1288 static void
hermon_cq_cqe_consume(hermon_state_t * state,hermon_cqhdl_t cq,hermon_hw_cqe_t * cqe,ibt_wc_t * wc)1289 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1290 hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1291 {
1292 uint_t opcode, qpnum, qp1_indx;
1293 ibt_wc_flags_t flags;
1294 ibt_wrc_opcode_t type;
1295
1296 /*
1297 * Determine if this is an "error" CQE by examining "opcode". If it
1298 * is an error CQE, then call hermon_cq_errcqe_consume() and return
1299 * whatever status it returns. Otherwise, this is a successful
1300 * completion.
1301 */
1302 opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
1303 if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) ||
1304 (opcode == HERMON_CQE_RECV_ERR_OPCODE)) {
1305 hermon_cq_errcqe_consume(state, cq, cqe, wc);
1306 return;
1307 }
1308
1309 /*
1310 * Fetch the Work Request ID using the information in the CQE.
1311 * See hermon_wr.c for more details.
1312 */
1313 wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1314
1315 /*
1316 * Parse the CQE opcode to determine completion type. This will set
1317 * not only the type of the completion, but also any flags that might
1318 * be associated with it (e.g. whether immediate data is present).
1319 */
1320 flags = IBT_WC_NO_FLAGS;
1321 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->hs_fcoib_may_be_running))
1322 if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) {
1323
1324 /* Send CQE */
1325 switch (opcode) {
1326 case HERMON_CQE_SND_RDMAWR_IMM:
1327 case HERMON_CQE_SND_RDMAWR:
1328 type = IBT_WRC_RDMAW;
1329 break;
1330
1331 case HERMON_CQE_SND_SEND_INV:
1332 case HERMON_CQE_SND_SEND_IMM:
1333 case HERMON_CQE_SND_SEND:
1334 type = IBT_WRC_SEND;
1335 break;
1336
1337 case HERMON_CQE_SND_LSO:
1338 type = IBT_WRC_SEND_LSO;
1339 break;
1340
1341 case HERMON_CQE_SND_RDMARD:
1342 type = IBT_WRC_RDMAR;
1343 break;
1344
1345 case HERMON_CQE_SND_ATOMIC_CS:
1346 type = IBT_WRC_CSWAP;
1347 break;
1348
1349 case HERMON_CQE_SND_ATOMIC_FA:
1350 type = IBT_WRC_FADD;
1351 break;
1352
1353 case HERMON_CQE_SND_BIND_MW:
1354 type = IBT_WRC_BIND;
1355 break;
1356
1357 case HERMON_CQE_SND_FRWR:
1358 type = IBT_WRC_FAST_REG_PMR;
1359 break;
1360
1361 case HERMON_CQE_SND_LCL_INV:
1362 type = IBT_WRC_LOCAL_INVALIDATE;
1363 break;
1364
1365 default:
1366 HERMON_WARNING(state, "unknown send CQE type");
1367 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1368 return;
1369 }
1370 } else if ((state->hs_fcoib_may_be_running == B_TRUE) &&
1371 hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) {
1372 type = IBT_WRC_RECV;
1373 if (HERMON_CQE_FEXCH_DIFE(cq, cqe))
1374 flags |= IBT_WC_DIF_ERROR;
1375 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1376 wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe);
1377 wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe);
1378 wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe);
1379 wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe);
1380 wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) &
1381 IBT_WC_DETAIL_FC_MATCH_MASK;
1382 wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1383 flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED;
1384 } else {
1385 /*
1386 * Parse the remaining contents of the CQE into the work
1387 * completion. This means filling in SL, QP number, SLID,
1388 * immediate data, etc.
1389 *
1390 * Note: Not all of these fields are valid in a given
1391 * completion. Many of them depend on the actual type of
1392 * completion. So we fill in all of the fields and leave
1393 * it up to the IBTF and consumer to sort out which are
1394 * valid based on their context.
1395 */
1396 wc->wc_sl = HERMON_CQE_SL_GET(cq, cqe);
1397 wc->wc_qpn = HERMON_CQE_DQPN_GET(cq, cqe);
1398 wc->wc_slid = HERMON_CQE_DLID_GET(cq, cqe);
1399 wc->wc_immed_data =
1400 HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1401 wc->wc_ethertype = (wc->wc_immed_data & 0xFFFF);
1402 wc->wc_pkey_ix = (wc->wc_immed_data &
1403 ((1 << state->hs_queryport.log_max_pkey) - 1));
1404 /*
1405 * Fill in "bytes transferred" as appropriate. Also,
1406 * if necessary, fill in the "path bits" field.
1407 */
1408 wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe);
1409 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1410
1411 /*
1412 * Check for GRH, update the flags, then fill in "wc_flags"
1413 * field in the work completion
1414 */
1415 if (HERMON_CQE_GRH_GET(cq, cqe) != 0) {
1416 flags |= IBT_WC_GRH_PRESENT;
1417 }
1418
1419 /* Receive CQE */
1420 switch (opcode) {
1421 case HERMON_CQE_RCV_SEND_IMM:
1422 /*
1423 * Note: According to the PRM, all QP1 recv
1424 * completions look like the result of a Send with
1425 * Immediate. They are not, however, (MADs are Send
1426 * Only) so we need to check the QP number and set
1427 * the flag only if it is non-QP1.
1428 */
1429 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe);
1430 qp1_indx = state->hs_spec_qp1->hr_indx;
1431 if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1432 flags |= IBT_WC_IMMED_DATA_PRESENT;
1433 }
1434 /* FALLTHROUGH */
1435
1436 case HERMON_CQE_RCV_SEND:
1437 type = IBT_WRC_RECV;
1438 if (HERMON_CQE_IS_IPOK(cq, cqe)) {
1439 wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe);
1440 flags |= IBT_WC_CKSUM_OK;
1441 wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK &
1442 HERMON_CQE_IPOIB_STATUS(cq, cqe);
1443 }
1444 break;
1445
1446 case HERMON_CQE_RCV_SEND_INV:
1447 type = IBT_WRC_RECV;
1448 flags |= IBT_WC_RKEY_INVALIDATED;
1449 wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */
1450 break;
1451
1452 case HERMON_CQE_RCV_RDMAWR_IMM:
1453 flags |= IBT_WC_IMMED_DATA_PRESENT;
1454 type = IBT_WRC_RECV_RDMAWI;
1455 break;
1456
1457 default:
1458
1459 HERMON_WARNING(state, "unknown recv CQE type");
1460 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1461 return;
1462 }
1463 }
1464 wc->wc_type = type;
1465 wc->wc_flags = flags;
1466 wc->wc_status = IBT_WC_SUCCESS;
1467 }
1468
1469 /*
1470 * hermon_cq_errcqe_consume()
1471 * Context: Can be called from interrupt or base context.
1472 */
1473 static void
hermon_cq_errcqe_consume(hermon_state_t * state,hermon_cqhdl_t cq,hermon_hw_cqe_t * cqe,ibt_wc_t * wc)1474 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1475 hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1476 {
1477 uint32_t imm_eth_pkey_cred;
1478 uint_t status;
1479 ibt_wc_status_t ibt_status;
1480
1481 /*
1482 * Fetch the Work Request ID using the information in the CQE.
1483 * See hermon_wr.c for more details.
1484 */
1485 wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1486
1487 /*
1488 * Parse the CQE opcode to determine completion type. We know that
1489 * the CQE is an error completion, so we extract only the completion
1490 * status/syndrome here.
1491 */
1492 imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe);
1493 status = imm_eth_pkey_cred;
1494 if (status != HERMON_CQE_WR_FLUSHED_ERR)
1495 IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x "
1496 "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe),
1497 HERMON_CQE_WQECNTR_GET(cq, cqe), status,
1498 HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe));
1499 switch (status) {
1500 case HERMON_CQE_LOC_LEN_ERR:
1501 HERMON_WARNING(state, HERMON_FMA_LOCLEN);
1502 ibt_status = IBT_WC_LOCAL_LEN_ERR;
1503 break;
1504
1505 case HERMON_CQE_LOC_OP_ERR:
1506 HERMON_WARNING(state, HERMON_FMA_LOCQPOP);
1507 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1508 break;
1509
1510 case HERMON_CQE_LOC_PROT_ERR:
1511 HERMON_WARNING(state, HERMON_FMA_LOCPROT);
1512 ibt_status = IBT_WC_LOCAL_PROTECT_ERR;
1513 IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe);
1514 if (hermon_should_panic) {
1515 cmn_err(CE_PANIC, "Hermon intentional PANIC - "
1516 "Local Protection Error\n");
1517 }
1518 break;
1519
1520 case HERMON_CQE_WR_FLUSHED_ERR:
1521 ibt_status = IBT_WC_WR_FLUSHED_ERR;
1522 break;
1523
1524 case HERMON_CQE_MW_BIND_ERR:
1525 HERMON_WARNING(state, HERMON_FMA_MWBIND);
1526 ibt_status = IBT_WC_MEM_WIN_BIND_ERR;
1527 break;
1528
1529 case HERMON_CQE_BAD_RESPONSE_ERR:
1530 HERMON_WARNING(state, HERMON_FMA_RESP);
1531 ibt_status = IBT_WC_BAD_RESPONSE_ERR;
1532 break;
1533
1534 case HERMON_CQE_LOCAL_ACCESS_ERR:
1535 HERMON_WARNING(state, HERMON_FMA_LOCACC);
1536 ibt_status = IBT_WC_LOCAL_ACCESS_ERR;
1537 break;
1538
1539 case HERMON_CQE_REM_INV_REQ_ERR:
1540 HERMON_WARNING(state, HERMON_FMA_REMREQ);
1541 ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1542 break;
1543
1544 case HERMON_CQE_REM_ACC_ERR:
1545 HERMON_WARNING(state, HERMON_FMA_REMACC);
1546 ibt_status = IBT_WC_REMOTE_ACCESS_ERR;
1547 break;
1548
1549 case HERMON_CQE_REM_OP_ERR:
1550 HERMON_WARNING(state, HERMON_FMA_REMOP);
1551 ibt_status = IBT_WC_REMOTE_OP_ERR;
1552 break;
1553
1554 case HERMON_CQE_TRANS_TO_ERR:
1555 HERMON_WARNING(state, HERMON_FMA_XPORTCNT);
1556 ibt_status = IBT_WC_TRANS_TIMEOUT_ERR;
1557 break;
1558
1559 case HERMON_CQE_RNRNAK_TO_ERR:
1560 HERMON_WARNING(state, HERMON_FMA_RNRCNT);
1561 ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1562 break;
1563
1564 /*
1565 * The following error codes are not supported in the Hermon driver
1566 * as they relate only to Reliable Datagram completion statuses:
1567 * case HERMON_CQE_LOCAL_RDD_VIO_ERR:
1568 * case HERMON_CQE_REM_INV_RD_REQ_ERR:
1569 * case HERMON_CQE_EEC_REM_ABORTED_ERR:
1570 * case HERMON_CQE_INV_EEC_NUM_ERR:
1571 * case HERMON_CQE_INV_EEC_STATE_ERR:
1572 * case HERMON_CQE_LOC_EEC_ERR:
1573 */
1574
1575 default:
1576 HERMON_WARNING(state, "unknown error CQE status");
1577 HERMON_FMANOTE(state, HERMON_FMA_UNKN);
1578 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1579 break;
1580 }
1581
1582 wc->wc_status = ibt_status;
1583 }
1584
1585
1586 /*
1587 * hermon_cq_resize_helper()
1588 * Context: Can be called only from user or kernel context.
1589 */
1590 void
hermon_cq_resize_helper(hermon_state_t * state,hermon_cqhdl_t cq)1591 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq)
1592 {
1593 hermon_cqhdl_t resize_hdl;
1594 int status;
1595
1596 /*
1597 * we're here because we found the special cqe opcode, so we have
1598 * to update the cq_handle, release the old resources, clear the
1599 * flag in the cq_hdl, and release the resize_hdl. When we return
1600 * above, it will take care of the rest
1601 */
1602 ASSERT(MUTEX_HELD(&cq->cq_lock));
1603
1604 resize_hdl = cq->cq_resize_hdl;
1605
1606 /*
1607 * Deregister the memory for the old Completion Queue. Note: We
1608 * really can't return error here because we have no good way to
1609 * cleanup. Plus, the deregistration really shouldn't ever happen.
1610 * So, if it does, it is an indication that something has gone
1611 * seriously wrong. So we print a warning message and return error
1612 * (knowing, of course, that the "old" CQ memory will be leaked)
1613 */
1614 status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL,
1615 HERMON_SLEEP);
1616 if (status != DDI_SUCCESS) {
1617 HERMON_WARNING(state, "failed to deregister old CQ memory");
1618 }
1619
1620 /* Next, free the memory from the old CQ buffer */
1621 hermon_queue_free(&cq->cq_cqinfo);
1622
1623 /* now we can update the cq_hdl with the new things saved */
1624
1625 cq->cq_buf = resize_hdl->cq_buf;
1626 cq->cq_mrhdl = resize_hdl->cq_mrhdl;
1627 cq->cq_bufsz = resize_hdl->cq_bufsz;
1628 cq->cq_log_cqsz = resize_hdl->cq_log_cqsz;
1629 cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp;
1630 cq->cq_resize_hdl = 0;
1631 bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo,
1632 sizeof (struct hermon_qalloc_info_s));
1633
1634 /* finally, release the resizing handle */
1635 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
1636 }
1637
1638
1639 /*
1640 * hermon_cq_entries_flush()
1641 * Context: Can be called from interrupt or base context.
1642 */
1643 /* ARGSUSED */
1644 void
hermon_cq_entries_flush(hermon_state_t * state,hermon_qphdl_t qp)1645 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp)
1646 {
1647 hermon_cqhdl_t cq;
1648 hermon_hw_cqe_t *cqe, *next_cqe;
1649 hermon_srqhdl_t srq;
1650 hermon_workq_hdr_t *wq;
1651 uint32_t cons_indx, tail_cons_indx, wrap_around_mask;
1652 uint32_t new_indx, check_indx, qpnum;
1653 uint32_t shift, mask;
1654 int outstanding_cqes;
1655
1656 qpnum = qp->qp_qpnum;
1657 if ((srq = qp->qp_srqhdl) != NULL)
1658 wq = qp->qp_srqhdl->srq_wq_wqhdr;
1659 else
1660 wq = NULL;
1661 cq = qp->qp_rq_cqhdl;
1662
1663 if (cq == NULL) {
1664 cq = qp->qp_sq_cqhdl;
1665 }
1666
1667 do_send_cq: /* loop back to here if send_cq is not the same as recv_cq */
1668 if (cq == NULL)
1669 return;
1670
1671 cons_indx = cq->cq_consindx;
1672 shift = cq->cq_log_cqsz;
1673 mask = cq->cq_bufsz;
1674 wrap_around_mask = mask - 1;
1675
1676 /* Calculate the pointer to the first CQ entry */
1677 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
1678
1679 /*
1680 * Loop through the CQ looking for entries owned by software. If an
1681 * entry is owned by software then we increment an 'outstanding_cqes'
1682 * count to know how many entries total we have on our CQ. We use this
1683 * value further down to know how many entries to loop through looking
1684 * for our same QP number.
1685 */
1686 outstanding_cqes = 0;
1687 tail_cons_indx = cons_indx;
1688 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) {
1689 /* increment total cqes count */
1690 outstanding_cqes++;
1691
1692 /* increment the consumer index */
1693 tail_cons_indx++;
1694
1695 /* update the pointer to the next cq entry */
1696 cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask];
1697 }
1698
1699 /*
1700 * Using the 'tail_cons_indx' that was just set, we now know how many
1701 * total CQEs possible there are. Set the 'check_indx' and the
1702 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1703 */
1704 check_indx = new_indx = (tail_cons_indx - 1);
1705
1706 while (--outstanding_cqes >= 0) {
1707 cqe = &cq->cq_buf[check_indx & wrap_around_mask];
1708
1709 /*
1710 * If the QP number is the same in the CQE as the QP, then
1711 * we must "consume" it. If it is for an SRQ wqe, then we
1712 * also must free the wqe back onto the free list of the SRQ.
1713 */
1714 if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) {
1715 if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) ==
1716 HERMON_COMPLETION_RECV)) {
1717 uint64_t *desc;
1718 int indx;
1719
1720 /* Add wqe back to SRQ free list */
1721 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) &
1722 wq->wq_mask;
1723 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
1724 ((uint16_t *)desc)[1] = htons(indx);
1725 wq->wq_tail = indx;
1726 }
1727 } else { /* CQEs for other QPNs need to remain */
1728 if (check_indx != new_indx) {
1729 next_cqe =
1730 &cq->cq_buf[new_indx & wrap_around_mask];
1731 /* Copy the CQE into the "next_cqe" pointer. */
1732 bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t));
1733 }
1734 new_indx--; /* move index to next CQE to fill */
1735 }
1736 check_indx--; /* move index to next CQE to check */
1737 }
1738
1739 /*
1740 * Update consumer index to be the 'new_indx'. This moves it past all
1741 * removed entries. Because 'new_indx' is pointing to the last
1742 * previously valid SW owned entry, we add 1 to point the cons_indx to
1743 * the first HW owned entry.
1744 */
1745 cons_indx = (new_indx + 1);
1746
1747 /*
1748 * Now we only ring the doorbell (to update the consumer index) if
1749 * we've actually consumed a CQ entry. If we found no QP number
1750 * matches above, then we would not have removed anything. So only if
1751 * something was removed do we ring the doorbell.
1752 */
1753 if (cq->cq_consindx != cons_indx) {
1754 /*
1755 * Update the consumer index in both the CQ handle and the
1756 * doorbell record.
1757 */
1758 cq->cq_consindx = cons_indx;
1759
1760 hermon_cq_update_ci_doorbell(cq);
1761
1762 }
1763 if (cq != qp->qp_sq_cqhdl) {
1764 cq = qp->qp_sq_cqhdl;
1765 goto do_send_cq;
1766 }
1767 }
1768
1769 /*
1770 * hermon_get_cq_sched_list()
1771 * Context: Only called from attach() path context
1772 *
1773 * Read properties, creating entries in hs_cq_sched_list with
1774 * information about the requested "expected" and "minimum"
1775 * number of MSI-X interrupt vectors per list entry.
1776 */
1777 static int
hermon_get_cq_sched_list(hermon_state_t * state)1778 hermon_get_cq_sched_list(hermon_state_t *state)
1779 {
1780 char **listp, ulp_prop[HERMON_CQH_MAX + 4];
1781 uint_t nlist, i, j, ndata;
1782 int *data;
1783 size_t len;
1784 hermon_cq_sched_t *cq_schedp;
1785
1786 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip,
1787 DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) !=
1788 DDI_PROP_SUCCESS)
1789 return (0);
1790
1791 state->hs_cq_sched_array_size = nlist;
1792 state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist *
1793 sizeof (hermon_cq_sched_t), KM_SLEEP);
1794 for (i = 0; i < nlist; i++) {
1795 if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) {
1796 cmn_err(CE_CONT, "'cqh' property name too long\n");
1797 goto game_over;
1798 }
1799 for (j = 0; j < i; j++) {
1800 if (strcmp(listp[j], listp[i]) == 0) {
1801 cmn_err(CE_CONT, "Duplicate 'cqh' property\n");
1802 goto game_over;
1803 }
1804 }
1805 (void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX);
1806 ulp_prop[0] = 'c';
1807 ulp_prop[1] = 'q';
1808 ulp_prop[2] = 'h';
1809 ulp_prop[3] = '-';
1810 (void) strncpy(ulp_prop + 4, listp[i], len + 1);
1811 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1812 DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) !=
1813 DDI_PROP_SUCCESS) {
1814 cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop);
1815 goto game_over;
1816 }
1817 if (ndata != 2) {
1818 cmn_err(CE_CONT, "property '%s' does not "
1819 "have 2 integers\n", ulp_prop);
1820 goto game_over_free_data;
1821 }
1822 cq_schedp[i].cqs_desired = data[0];
1823 cq_schedp[i].cqs_minimum = data[1];
1824 cq_schedp[i].cqs_refcnt = 0;
1825 ddi_prop_free(data);
1826 }
1827 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1828 DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) !=
1829 DDI_PROP_SUCCESS) {
1830 cmn_err(CE_CONT, "property 'cqh-default' not found\n");
1831 goto game_over;
1832 }
1833 if (ndata != 2) {
1834 cmn_err(CE_CONT, "property 'cqh-default' does not "
1835 "have 2 integers\n");
1836 goto game_over_free_data;
1837 }
1838 cq_schedp = &state->hs_cq_sched_default;
1839 cq_schedp->cqs_desired = data[0];
1840 cq_schedp->cqs_minimum = data[1];
1841 cq_schedp->cqs_refcnt = 0;
1842 ddi_prop_free(data);
1843 ddi_prop_free(listp);
1844 return (1); /* game on */
1845
1846 game_over_free_data:
1847 ddi_prop_free(data);
1848 game_over:
1849 cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n");
1850 cmn_err(CE_CONT, "completion handler groups not being used\n");
1851 kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t));
1852 state->hs_cq_sched_array_size = 0;
1853 ddi_prop_free(listp);
1854 return (0);
1855 }
1856
1857 /*
1858 * hermon_cq_sched_init()
1859 * Context: Only called from attach() path context
1860 *
1861 * Read the hermon.conf properties looking for cq_sched info,
1862 * creating reserved pools of MSI-X interrupt ranges for the
1863 * specified ULPs.
1864 */
1865 int
hermon_cq_sched_init(hermon_state_t * state)1866 hermon_cq_sched_init(hermon_state_t *state)
1867 {
1868 hermon_cq_sched_t *cq_schedp, *defp;
1869 int i, desired, array_size;
1870
1871 mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER,
1872 DDI_INTR_PRI(state->hs_intrmsi_pri));
1873
1874 mutex_enter(&state->hs_cq_sched_lock);
1875 state->hs_cq_sched_array = NULL;
1876
1877 /* initialize cq_sched_default */
1878 defp = &state->hs_cq_sched_default;
1879 defp->cqs_start_hid = 1;
1880 defp->cqs_len = state->hs_intrmsi_allocd;
1881 defp->cqs_next_alloc = defp->cqs_len - 1;
1882 (void) strncpy(defp->cqs_name, "default", 8);
1883
1884 /* Read properties to determine which ULPs use cq_sched */
1885 if (hermon_get_cq_sched_list(state) == 0)
1886 goto done;
1887
1888 /* Determine if we have enough vectors, or if we have to scale down */
1889 desired = defp->cqs_desired; /* default desired (from hermon.conf) */
1890 if (desired <= 0)
1891 goto done; /* all interrupts in the default pool */
1892 cq_schedp = state->hs_cq_sched_array;
1893 array_size = state->hs_cq_sched_array_size;
1894 for (i = 0; i < array_size; i++)
1895 desired += cq_schedp[i].cqs_desired;
1896 if (desired > state->hs_intrmsi_allocd) {
1897 cmn_err(CE_CONT, "#interrupts allocated (%d) is less than "
1898 "the #interrupts desired (%d)\n",
1899 state->hs_intrmsi_allocd, desired);
1900 cmn_err(CE_CONT, "completion handler groups not being used\n");
1901 goto done; /* all interrupts in the default pool */
1902 }
1903 /* Game on. For each cq_sched group, reserve the MSI-X range */
1904 for (i = 0; i < array_size; i++) {
1905 desired = cq_schedp[i].cqs_desired;
1906 cq_schedp[i].cqs_start_hid = defp->cqs_start_hid;
1907 cq_schedp[i].cqs_len = desired;
1908 cq_schedp[i].cqs_next_alloc = desired - 1;
1909 defp->cqs_len -= desired;
1910 defp->cqs_start_hid += desired;
1911 }
1912 /* reset default's start allocation seed */
1913 state->hs_cq_sched_default.cqs_next_alloc =
1914 state->hs_cq_sched_default.cqs_len - 1;
1915
1916 done:
1917 mutex_exit(&state->hs_cq_sched_lock);
1918 return (IBT_SUCCESS);
1919 }
1920
1921 void
hermon_cq_sched_fini(hermon_state_t * state)1922 hermon_cq_sched_fini(hermon_state_t *state)
1923 {
1924 mutex_enter(&state->hs_cq_sched_lock);
1925 if (state->hs_cq_sched_array_size) {
1926 kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) *
1927 state->hs_cq_sched_array_size);
1928 state->hs_cq_sched_array_size = 0;
1929 state->hs_cq_sched_array = NULL;
1930 }
1931 mutex_exit(&state->hs_cq_sched_lock);
1932 mutex_destroy(&state->hs_cq_sched_lock);
1933 }
1934
1935 int
hermon_cq_sched_alloc(hermon_state_t * state,ibt_cq_sched_attr_t * attr,hermon_cq_sched_t ** cq_sched_pp)1936 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr,
1937 hermon_cq_sched_t **cq_sched_pp)
1938 {
1939 hermon_cq_sched_t *cq_schedp;
1940 int i;
1941 char *name;
1942 ibt_cq_sched_flags_t flags;
1943
1944 flags = attr->cqs_flags;
1945 if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) {
1946 *cq_sched_pp = NULL;
1947 return (IBT_SUCCESS);
1948 }
1949 name = attr->cqs_pool_name;
1950
1951 mutex_enter(&state->hs_cq_sched_lock);
1952 cq_schedp = state->hs_cq_sched_array;
1953 for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) {
1954 if (strcmp(name, cq_schedp->cqs_name) == 0) {
1955 if (cq_schedp->cqs_len != 0)
1956 cq_schedp->cqs_refcnt++;
1957 break; /* found it */
1958 }
1959 }
1960 if ((i == state->hs_cq_sched_array_size) || /* not found, or */
1961 (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */
1962 cq_schedp = NULL;
1963 mutex_exit(&state->hs_cq_sched_lock);
1964
1965 *cq_sched_pp = cq_schedp; /* set to valid hdl, or to NULL */
1966 if ((cq_schedp == NULL) &&
1967 (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP))
1968 return (IBT_CQ_NO_SCHED_GROUP);
1969 else
1970 return (IBT_SUCCESS);
1971 }
1972
1973 int
hermon_cq_sched_free(hermon_state_t * state,hermon_cq_sched_t * cq_schedp)1974 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp)
1975 {
1976 if (cq_schedp != NULL) {
1977 /* Just decrement refcnt */
1978 mutex_enter(&state->hs_cq_sched_lock);
1979 if (cq_schedp->cqs_refcnt == 0)
1980 HERMON_WARNING(state, "cq_sched free underflow\n");
1981 else
1982 cq_schedp->cqs_refcnt--;
1983 mutex_exit(&state->hs_cq_sched_lock);
1984 }
1985 return (IBT_SUCCESS);
1986 }
1987