1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_qp.c
28 * Hermon Queue Pair Processing Routines
29 *
30 * Implements all the routines necessary for allocating, freeing, and
31 * querying the Hermon queue pairs.
32 */
33
34 #include <sys/types.h>
35 #include <sys/conf.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/modctl.h>
39 #include <sys/bitmap.h>
40 #include <sys/sysmacros.h>
41
42 #include <sys/ib/adapters/hermon/hermon.h>
43 #include <sys/ib/ib_pkt_hdrs.h>
44
45 static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
46 hermon_rsrc_t *qpc);
47 static int hermon_qpn_avl_compare(const void *q, const void *e);
48 static int hermon_special_qp_rsrc_alloc(hermon_state_t *state,
49 ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc);
50 static int hermon_special_qp_rsrc_free(hermon_state_t *state,
51 ibt_sqp_type_t type, uint_t port);
52 static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
53 uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
54 uint_t *logwqesz, uint_t *max_sgl);
55
56 /*
57 * hermon_qp_alloc()
58 * Context: Can be called only from user or kernel context.
59 */
60 int
hermon_qp_alloc(hermon_state_t * state,hermon_qp_info_t * qpinfo,uint_t sleepflag)61 hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
62 uint_t sleepflag)
63 {
64 hermon_rsrc_t *qpc, *rsrc;
65 hermon_rsrc_type_t rsrc_type;
66 hermon_umap_db_entry_t *umapdb;
67 hermon_qphdl_t qp;
68 ibt_qp_alloc_attr_t *attr_p;
69 ibt_qp_alloc_flags_t alloc_flags;
70 ibt_qp_type_t type;
71 hermon_qp_wq_type_t swq_type;
72 ibtl_qp_hdl_t ibt_qphdl;
73 ibt_chan_sizes_t *queuesz_p;
74 ib_qpn_t *qpn;
75 hermon_qphdl_t *qphdl;
76 ibt_mr_attr_t mr_attr;
77 hermon_mr_options_t mr_op;
78 hermon_srqhdl_t srq;
79 hermon_pdhdl_t pd;
80 hermon_cqhdl_t sq_cq, rq_cq;
81 hermon_mrhdl_t mr;
82 uint64_t value, qp_desc_off;
83 uint64_t *thewqe, thewqesz;
84 uint32_t *sq_buf, *rq_buf;
85 uint32_t log_qp_sq_size, log_qp_rq_size;
86 uint32_t sq_size, rq_size;
87 uint32_t sq_depth, rq_depth;
88 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
89 uint32_t max_sgl, max_recv_sgl, uarpg;
90 uint_t qp_is_umap;
91 uint_t qp_srq_en, i, j;
92 int status, flag;
93
94 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
95
96 /*
97 * Extract the necessary info from the hermon_qp_info_t structure
98 */
99 attr_p = qpinfo->qpi_attrp;
100 type = qpinfo->qpi_type;
101 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
102 queuesz_p = qpinfo->qpi_queueszp;
103 qpn = qpinfo->qpi_qpn;
104 qphdl = &qpinfo->qpi_qphdl;
105 alloc_flags = attr_p->qp_alloc_flags;
106
107 /*
108 * Verify correctness of alloc_flags.
109 *
110 * 1. FEXCH and RSS are only allocated via qp_range.
111 */
112 if (alloc_flags & (IBT_QP_USES_FEXCH | IBT_QP_USES_RSS)) {
113 return (IBT_INVALID_PARAM);
114 }
115 rsrc_type = HERMON_QPC;
116 qp_is_umap = 0;
117
118 /* 2. Make sure only one of these flags is set. */
119 switch (alloc_flags &
120 (IBT_QP_USER_MAP | IBT_QP_USES_RFCI | IBT_QP_USES_FCMD)) {
121 case IBT_QP_USER_MAP:
122 qp_is_umap = 1;
123 break;
124 case IBT_QP_USES_RFCI:
125 if (type != IBT_UD_RQP)
126 return (IBT_INVALID_PARAM);
127
128 switch (attr_p->qp_fc.fc_hca_port) {
129 case 1:
130 rsrc_type = HERMON_QPC_RFCI_PORT1;
131 break;
132 case 2:
133 rsrc_type = HERMON_QPC_RFCI_PORT2;
134 break;
135 default:
136 return (IBT_INVALID_PARAM);
137 }
138 break;
139 case IBT_QP_USES_FCMD:
140 if (type != IBT_UD_RQP)
141 return (IBT_INVALID_PARAM);
142 break;
143 case 0:
144 break;
145 default:
146 return (IBT_INVALID_PARAM); /* conflicting flags set */
147 }
148
149 /*
150 * Determine whether QP is being allocated for userland access or
151 * whether it is being allocated for kernel access. If the QP is
152 * being allocated for userland access, then lookup the UAR
153 * page number for the current process. Note: If this is not found
154 * (e.g. if the process has not previously open()'d the Hermon driver),
155 * then an error is returned.
156 */
157 if (qp_is_umap) {
158 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
159 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
160 if (status != DDI_SUCCESS) {
161 return (IBT_INVALID_PARAM);
162 }
163 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
164 } else {
165 uarpg = state->hs_kernel_uar_index;
166 }
167
168 /*
169 * Determine whether QP is being associated with an SRQ
170 */
171 qp_srq_en = (alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
172 if (qp_srq_en) {
173 /*
174 * Check for valid SRQ handle pointers
175 */
176 if (attr_p->qp_ibc_srq_hdl == NULL) {
177 status = IBT_SRQ_HDL_INVALID;
178 goto qpalloc_fail;
179 }
180 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
181 }
182
183 /*
184 * Check for valid QP service type (only UD/RC/UC supported)
185 */
186 if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
187 (type != IBT_UC_RQP))) {
188 status = IBT_QP_SRV_TYPE_INVALID;
189 goto qpalloc_fail;
190 }
191
192
193 /*
194 * Check for valid PD handle pointer
195 */
196 if (attr_p->qp_pd_hdl == NULL) {
197 status = IBT_PD_HDL_INVALID;
198 goto qpalloc_fail;
199 }
200 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
201
202 /*
203 * If on an SRQ, check to make sure the PD is the same
204 */
205 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
206 status = IBT_PD_HDL_INVALID;
207 goto qpalloc_fail;
208 }
209
210 /* Increment the reference count on the protection domain (PD) */
211 hermon_pd_refcnt_inc(pd);
212
213 /*
214 * Check for valid CQ handle pointers
215 *
216 * FCMD QPs do not require a receive cq handle.
217 */
218 if (attr_p->qp_ibc_scq_hdl == NULL) {
219 status = IBT_CQ_HDL_INVALID;
220 goto qpalloc_fail1;
221 }
222 sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
223 if ((attr_p->qp_ibc_rcq_hdl == NULL)) {
224 if ((alloc_flags & IBT_QP_USES_FCMD) == 0) {
225 status = IBT_CQ_HDL_INVALID;
226 goto qpalloc_fail1;
227 }
228 rq_cq = sq_cq; /* just use the send cq */
229 } else
230 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
231
232 /*
233 * Increment the reference count on the CQs. One or both of these
234 * could return error if we determine that the given CQ is already
235 * being used with a special (SMI/GSI) QP.
236 */
237 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
238 if (status != DDI_SUCCESS) {
239 status = IBT_CQ_HDL_INVALID;
240 goto qpalloc_fail1;
241 }
242 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
243 if (status != DDI_SUCCESS) {
244 status = IBT_CQ_HDL_INVALID;
245 goto qpalloc_fail2;
246 }
247
248 /*
249 * Allocate an QP context entry. This will be filled in with all
250 * the necessary parameters to define the Queue Pair. Unlike
251 * other Hermon hardware resources, ownership is not immediately
252 * given to hardware in the final step here. Instead, we must
253 * wait until the QP is later transitioned to the "Init" state before
254 * passing the QP to hardware. If we fail here, we must undo all
255 * the reference count (CQ and PD).
256 */
257 status = hermon_rsrc_alloc(state, rsrc_type, 1, sleepflag, &qpc);
258 if (status != DDI_SUCCESS) {
259 status = IBT_INSUFF_RESOURCE;
260 goto qpalloc_fail3;
261 }
262
263 /*
264 * Allocate the software structure for tracking the queue pair
265 * (i.e. the Hermon Queue Pair handle). If we fail here, we must
266 * undo the reference counts and the previous resource allocation.
267 */
268 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
269 if (status != DDI_SUCCESS) {
270 status = IBT_INSUFF_RESOURCE;
271 goto qpalloc_fail4;
272 }
273 qp = (hermon_qphdl_t)rsrc->hr_addr;
274 bzero(qp, sizeof (struct hermon_sw_qp_s));
275 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
276
277 qp->qp_alloc_flags = alloc_flags;
278
279 /*
280 * Calculate the QP number from QPC index. This routine handles
281 * all of the operations necessary to keep track of used, unused,
282 * and released QP numbers.
283 */
284 if (type == IBT_UD_RQP) {
285 qp->qp_qpnum = qpc->hr_indx;
286 qp->qp_ring = qp->qp_qpnum << 8;
287 qp->qp_qpn_hdl = NULL;
288 } else {
289 status = hermon_qp_create_qpn(state, qp, qpc);
290 if (status != DDI_SUCCESS) {
291 status = IBT_INSUFF_RESOURCE;
292 goto qpalloc_fail5;
293 }
294 }
295
296 /*
297 * If this will be a user-mappable QP, then allocate an entry for
298 * the "userland resources database". This will later be added to
299 * the database (after all further QP operations are successful).
300 * If we fail here, we must undo the reference counts and the
301 * previous resource allocation.
302 */
303 if (qp_is_umap) {
304 umapdb = hermon_umap_db_alloc(state->hs_instance, qp->qp_qpnum,
305 MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
306 if (umapdb == NULL) {
307 status = IBT_INSUFF_RESOURCE;
308 goto qpalloc_fail6;
309 }
310 }
311
312 /*
313 * Allocate the doorbell record. Hermon just needs one for the RQ,
314 * if the QP is not associated with an SRQ, and use uarpg (above) as
315 * the uar index
316 */
317
318 if (!qp_srq_en) {
319 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
320 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
321 if (status != DDI_SUCCESS) {
322 status = IBT_INSUFF_RESOURCE;
323 goto qpalloc_fail6;
324 }
325 }
326
327 qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
328
329 /*
330 * We verify that the requested number of SGL is valid (i.e.
331 * consistent with the device limits and/or software-configured
332 * limits). If not, then obviously the same cleanup needs to be done.
333 */
334 if (type == IBT_UD_RQP) {
335 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
336 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
337 } else {
338 max_sgl = state->hs_ibtfinfo.hca_attr->hca_conn_send_sgl_sz;
339 swq_type = HERMON_QP_WQ_TYPE_SENDQ_CONN;
340 }
341 max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
342 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
343 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
344 status = IBT_HCA_SGL_EXCEEDED;
345 goto qpalloc_fail7;
346 }
347
348 /*
349 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
350 * This will depend on the requested number of SGLs. Note: this
351 * has the side-effect of also calculating the real number of SGLs
352 * (for the calculated WQE size).
353 *
354 * For QP's on an SRQ, we set these to 0.
355 */
356 if (qp_srq_en) {
357 qp->qp_rq_log_wqesz = 0;
358 qp->qp_rq_sgl = 0;
359 } else {
360 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
361 max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
362 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
363 }
364 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
365 max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
366
367 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
368
369 /* NOTE: currently policy in driver, later maybe IBTF interface */
370 qp->qp_no_prefetch = 0;
371
372 /*
373 * for prefetching, we need to add the number of wqes in
374 * the 2k area plus one to the number requested, but
375 * ONLY for send queue. If no_prefetch == 1 (prefetch off)
376 * it's exactly TWO wqes for the headroom
377 */
378 if (qp->qp_no_prefetch)
379 qp->qp_sq_headroom = 2 * sq_wqe_size;
380 else
381 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
382 /*
383 * hdrm wqes must be integral since both sq_wqe_size &
384 * HERMON_QP_OH_SIZE are power of 2
385 */
386 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
387
388
389 /*
390 * Calculate the appropriate size for the work queues.
391 * For send queue, add in the headroom wqes to the calculation.
392 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
393 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
394 * to round the requested size up to the next highest power-of-2
395 */
396 /* first, adjust to a minimum and tell the caller the change */
397 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
398 HERMON_QP_MIN_SIZE);
399 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
400 HERMON_QP_MIN_SIZE);
401 /*
402 * now, calculate the alloc size, taking into account
403 * the headroom for the sq
404 */
405 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
406 /* if the total is a power of two, reduce it */
407 if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes)) {
408 log_qp_sq_size = log_qp_sq_size - 1;
409 }
410
411 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
412 if (ISP2(attr_p->qp_sizes.cs_rq)) {
413 log_qp_rq_size = log_qp_rq_size - 1;
414 }
415
416 /*
417 * Next we verify that the rounded-up size is valid (i.e. consistent
418 * with the device limits and/or software-configured limits). If not,
419 * then obviously we have a lot of cleanup to do before returning.
420 *
421 * NOTE: the first condition deals with the (test) case of cs_sq
422 * being just less than 2^32. In this case, the headroom addition
423 * to the requested cs_sq will pass the test when it should not.
424 * This test no longer lets that case slip through the check.
425 */
426 if ((attr_p->qp_sizes.cs_sq >
427 (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
428 (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
429 (!qp_srq_en && (log_qp_rq_size >
430 state->hs_cfg_profile->cp_log_max_qp_sz))) {
431 status = IBT_HCA_WR_EXCEEDED;
432 goto qpalloc_fail7;
433 }
434
435 /*
436 * Allocate the memory for QP work queues. Since Hermon work queues
437 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
438 * the work queue memory is very important. We used to allocate
439 * work queues (the combined receive and send queues) so that they
440 * would be aligned on their combined size. That alignment guaranteed
441 * that they would never cross the 4GB boundary (Hermon work queues
442 * are on the order of MBs at maximum). Now we are able to relax
443 * this alignment constraint by ensuring that the IB address assigned
444 * to the queue memory (as a result of the hermon_mr_register() call)
445 * is offset from zero.
446 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
447 * guarantee the alignment, but when attempting to use IOMMU bypass
448 * mode we found that we were not allowed to specify any alignment
449 * that was more restrictive than the system page size.
450 * So we avoided this constraint by passing two alignment values,
451 * one for the memory allocation itself and the other for the DMA
452 * handle (for later bind). This used to cause more memory than
453 * necessary to be allocated (in order to guarantee the more
454 * restrictive alignment contraint). But by guaranteeing the
455 * zero-based IB virtual address for the queue, we are able to
456 * conserve this memory.
457 */
458 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
459 sq_depth = 1 << log_qp_sq_size;
460 sq_size = sq_depth * sq_wqe_size;
461
462 /* QP on SRQ sets these to 0 */
463 if (qp_srq_en) {
464 rq_wqe_size = 0;
465 rq_size = 0;
466 } else {
467 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
468 rq_depth = 1 << log_qp_rq_size;
469 rq_size = rq_depth * rq_wqe_size;
470 }
471
472 qp->qp_wqinfo.qa_size = sq_size + rq_size;
473
474 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
475 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
476
477 if (qp_is_umap) {
478 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
479 } else {
480 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
481 }
482 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
483 if (status != DDI_SUCCESS) {
484 status = IBT_INSUFF_RESOURCE;
485 goto qpalloc_fail7;
486 }
487
488 /*
489 * Sort WQs in memory according to stride (*q_wqe_size), largest first
490 * If they are equal, still put the SQ first
491 */
492 qp->qp_sq_baseaddr = 0;
493 qp->qp_rq_baseaddr = 0;
494 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
495 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
496
497 /* if this QP is on an SRQ, set the rq_buf to NULL */
498 if (qp_srq_en) {
499 rq_buf = NULL;
500 } else {
501 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
502 qp->qp_rq_baseaddr = sq_size;
503 }
504 } else {
505 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
506 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
507 qp->qp_sq_baseaddr = rq_size;
508 }
509
510 if (qp_is_umap == 0) {
511 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
512 if (qp->qp_sq_wqhdr == NULL) {
513 status = IBT_INSUFF_RESOURCE;
514 goto qpalloc_fail8;
515 }
516 if (qp_srq_en) {
517 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
518 qp->qp_rq_wqavl.wqa_srq_en = 1;
519 qp->qp_rq_wqavl.wqa_srq = srq;
520 } else {
521 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
522 if (qp->qp_rq_wqhdr == NULL) {
523 status = IBT_INSUFF_RESOURCE;
524 goto qpalloc_fail8;
525 }
526 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
527 }
528 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
529 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
530 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
531 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
532 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
533 }
534
535 /*
536 * Register the memory for the QP work queues. The memory for the
537 * QP must be registered in the Hermon cMPT tables. This gives us the
538 * LKey to specify in the QP context later. Note: The memory for
539 * Hermon work queues (both Send and Recv) must be contiguous and
540 * registered as a single memory region. Note: If the QP memory is
541 * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
542 * meet the alignment restriction, we pass the "mro_bind_override_addr"
543 * flag in the call to hermon_mr_register(). This guarantees that the
544 * resulting IB vaddr will be zero-based (modulo the offset into the
545 * first page). If we fail here, we still have the bunch of resource
546 * and reference count cleanup to do.
547 */
548 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
549 IBT_MR_NOSLEEP;
550 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
551 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
552 mr_attr.mr_as = NULL;
553 mr_attr.mr_flags = flag;
554 if (qp_is_umap) {
555 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
556 } else {
557 /* HERMON_QUEUE_LOCATION_NORMAL */
558 mr_op.mro_bind_type =
559 state->hs_cfg_profile->cp_iommu_bypass;
560 }
561 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
562 mr_op.mro_bind_override_addr = 1;
563 status = hermon_mr_register(state, pd, &mr_attr, &mr,
564 &mr_op, HERMON_QP_CMPT);
565 if (status != DDI_SUCCESS) {
566 status = IBT_INSUFF_RESOURCE;
567 goto qpalloc_fail9;
568 }
569
570 /*
571 * Calculate the offset between the kernel virtual address space
572 * and the IB virtual address space. This will be used when
573 * posting work requests to properly initialize each WQE.
574 */
575 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
576 (uint64_t)mr->mr_bindinfo.bi_addr;
577
578 /*
579 * Fill in all the return arguments (if necessary). This includes
580 * real work queue sizes (in wqes), real SGLs, and QP number
581 */
582 if (queuesz_p != NULL) {
583 queuesz_p->cs_sq =
584 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
585 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
586
587 /* if this QP is on an SRQ, set these to 0 */
588 if (qp_srq_en) {
589 queuesz_p->cs_rq = 0;
590 queuesz_p->cs_rq_sgl = 0;
591 } else {
592 queuesz_p->cs_rq = (1 << log_qp_rq_size);
593 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
594 }
595 }
596 if (qpn != NULL) {
597 *qpn = (ib_qpn_t)qp->qp_qpnum;
598 }
599
600 /*
601 * Fill in the rest of the Hermon Queue Pair handle.
602 */
603 qp->qp_qpcrsrcp = qpc;
604 qp->qp_rsrcp = rsrc;
605 qp->qp_state = HERMON_QP_RESET;
606 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
607 qp->qp_pdhdl = pd;
608 qp->qp_mrhdl = mr;
609 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
610 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
611 qp->qp_is_special = 0;
612 qp->qp_uarpg = uarpg;
613 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
614 qp->qp_sq_cqhdl = sq_cq;
615 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
616 qp->qp_sq_logqsz = log_qp_sq_size;
617 qp->qp_sq_buf = sq_buf;
618 qp->qp_desc_off = qp_desc_off;
619 qp->qp_rq_cqhdl = rq_cq;
620 qp->qp_rq_buf = rq_buf;
621 qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
622 0;
623
624 /* if this QP is on an SRQ, set rq_bufsz to 0 */
625 if (qp_srq_en) {
626 qp->qp_rq_bufsz = 0;
627 qp->qp_rq_logqsz = 0;
628 } else {
629 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
630 qp->qp_rq_logqsz = log_qp_rq_size;
631 }
632
633 qp->qp_forward_sqd_event = 0;
634 qp->qp_sqd_still_draining = 0;
635 qp->qp_hdlrarg = (void *)ibt_qphdl;
636 qp->qp_mcg_refcnt = 0;
637
638 /*
639 * If this QP is to be associated with an SRQ, set the SRQ handle
640 */
641 if (qp_srq_en) {
642 qp->qp_srqhdl = srq;
643 hermon_srq_refcnt_inc(qp->qp_srqhdl);
644 } else {
645 qp->qp_srqhdl = NULL;
646 }
647
648 /* Determine the QP service type */
649 qp->qp_type = type;
650 if (type == IBT_RC_RQP) {
651 qp->qp_serv_type = HERMON_QP_RC;
652 } else if (type == IBT_UD_RQP) {
653 if (alloc_flags & IBT_QP_USES_RFCI)
654 qp->qp_serv_type = HERMON_QP_RFCI;
655 else if (alloc_flags & IBT_QP_USES_FCMD)
656 qp->qp_serv_type = HERMON_QP_FCMND;
657 else
658 qp->qp_serv_type = HERMON_QP_UD;
659 } else {
660 qp->qp_serv_type = HERMON_QP_UC;
661 }
662
663 /*
664 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
665 */
666
667 /*
668 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
669 * set the quadword to all F's - high-order bit is owner (init to one)
670 * and the rest for the headroom definition of prefetching
671 *
672 */
673 wqesz_shift = qp->qp_sq_log_wqesz;
674 thewqesz = 1 << wqesz_shift;
675 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
676 if (qp_is_umap == 0) {
677 for (i = 0; i < sq_depth; i++) {
678 /*
679 * for each stride, go through and every 64 bytes
680 * write the init value - having set the address
681 * once, just keep incrementing it
682 */
683 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
684 *(uint32_t *)thewqe = 0xFFFFFFFF;
685 }
686 }
687 }
688
689 /* Zero out the QP context */
690 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
691
692 /*
693 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
694 * "qphdl" and return success
695 */
696 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, qp);
697
698 /*
699 * If this is a user-mappable QP, then we need to insert the previously
700 * allocated entry into the "userland resources database". This will
701 * allow for later lookup during devmap() (i.e. mmap()) calls.
702 */
703 if (qp_is_umap) {
704 hermon_umap_db_add(umapdb);
705 }
706 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
707 DDI_INTR_PRI(state->hs_intrmsi_pri));
708
709 *qphdl = qp;
710
711 return (DDI_SUCCESS);
712
713 /*
714 * The following is cleanup for all possible failure cases in this routine
715 */
716 qpalloc_fail9:
717 hermon_queue_free(&qp->qp_wqinfo);
718 qpalloc_fail8:
719 if (qp->qp_sq_wqhdr)
720 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
721 if (qp->qp_rq_wqhdr)
722 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
723 qpalloc_fail7:
724 if (qp_is_umap) {
725 hermon_umap_db_free(umapdb);
726 }
727 if (!qp_srq_en) {
728 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
729 }
730
731 qpalloc_fail6:
732 /*
733 * Releasing the QPN will also free up the QPC context. Update
734 * the QPC context pointer to indicate this.
735 */
736 if (qp->qp_qpn_hdl) {
737 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
738 HERMON_QPN_RELEASE);
739 } else {
740 hermon_rsrc_free(state, &qpc);
741 }
742 qpc = NULL;
743 qpalloc_fail5:
744 hermon_rsrc_free(state, &rsrc);
745 qpalloc_fail4:
746 if (qpc) {
747 hermon_rsrc_free(state, &qpc);
748 }
749 qpalloc_fail3:
750 hermon_cq_refcnt_dec(rq_cq);
751 qpalloc_fail2:
752 hermon_cq_refcnt_dec(sq_cq);
753 qpalloc_fail1:
754 hermon_pd_refcnt_dec(pd);
755 qpalloc_fail:
756 return (status);
757 }
758
759
760
761 /*
762 * hermon_special_qp_alloc()
763 * Context: Can be called only from user or kernel context.
764 */
765 int
hermon_special_qp_alloc(hermon_state_t * state,hermon_qp_info_t * qpinfo,uint_t sleepflag)766 hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
767 uint_t sleepflag)
768 {
769 hermon_rsrc_t *qpc, *rsrc;
770 hermon_qphdl_t qp;
771 ibt_qp_alloc_attr_t *attr_p;
772 ibt_sqp_type_t type;
773 uint8_t port;
774 ibtl_qp_hdl_t ibt_qphdl;
775 ibt_chan_sizes_t *queuesz_p;
776 hermon_qphdl_t *qphdl;
777 ibt_mr_attr_t mr_attr;
778 hermon_mr_options_t mr_op;
779 hermon_pdhdl_t pd;
780 hermon_cqhdl_t sq_cq, rq_cq;
781 hermon_mrhdl_t mr;
782 uint64_t qp_desc_off;
783 uint64_t *thewqe, thewqesz;
784 uint32_t *sq_buf, *rq_buf;
785 uint32_t log_qp_sq_size, log_qp_rq_size;
786 uint32_t sq_size, rq_size, max_sgl;
787 uint32_t uarpg;
788 uint32_t sq_depth;
789 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
790 int status, flag, i, j;
791
792 /*
793 * Extract the necessary info from the hermon_qp_info_t structure
794 */
795 attr_p = qpinfo->qpi_attrp;
796 type = qpinfo->qpi_type;
797 port = qpinfo->qpi_port;
798 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
799 queuesz_p = qpinfo->qpi_queueszp;
800 qphdl = &qpinfo->qpi_qphdl;
801
802 /*
803 * Check for valid special QP type (only SMI & GSI supported)
804 */
805 if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
806 status = IBT_QP_SPECIAL_TYPE_INVALID;
807 goto spec_qpalloc_fail;
808 }
809
810 /*
811 * Check for valid port number
812 */
813 if (!hermon_portnum_is_valid(state, port)) {
814 status = IBT_HCA_PORT_INVALID;
815 goto spec_qpalloc_fail;
816 }
817 port = port - 1;
818
819 /*
820 * Check for valid PD handle pointer
821 */
822 if (attr_p->qp_pd_hdl == NULL) {
823 status = IBT_PD_HDL_INVALID;
824 goto spec_qpalloc_fail;
825 }
826 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
827
828 /* Increment the reference count on the PD */
829 hermon_pd_refcnt_inc(pd);
830
831 /*
832 * Check for valid CQ handle pointers
833 */
834 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
835 (attr_p->qp_ibc_rcq_hdl == NULL)) {
836 status = IBT_CQ_HDL_INVALID;
837 goto spec_qpalloc_fail1;
838 }
839 sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
840 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
841
842 /*
843 * Increment the reference count on the CQs. One or both of these
844 * could return error if we determine that the given CQ is already
845 * being used with a non-special QP (i.e. a normal QP).
846 */
847 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_SPECIAL);
848 if (status != DDI_SUCCESS) {
849 status = IBT_CQ_HDL_INVALID;
850 goto spec_qpalloc_fail1;
851 }
852 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_SPECIAL);
853 if (status != DDI_SUCCESS) {
854 status = IBT_CQ_HDL_INVALID;
855 goto spec_qpalloc_fail2;
856 }
857
858 /*
859 * Allocate the special QP resources. Essentially, this allocation
860 * amounts to checking if the request special QP has already been
861 * allocated. If successful, the QP context return is an actual
862 * QP context that has been "aliased" to act as a special QP of the
863 * appropriate type (and for the appropriate port). Just as in
864 * hermon_qp_alloc() above, ownership for this QP context is not
865 * immediately given to hardware in the final step here. Instead, we
866 * wait until the QP is later transitioned to the "Init" state before
867 * passing the QP to hardware. If we fail here, we must undo all
868 * the reference count (CQ and PD).
869 */
870 status = hermon_special_qp_rsrc_alloc(state, type, port, &qpc);
871 if (status != DDI_SUCCESS) {
872 goto spec_qpalloc_fail3;
873 }
874
875 /*
876 * Allocate the software structure for tracking the special queue
877 * pair (i.e. the Hermon Queue Pair handle). If we fail here, we
878 * must undo the reference counts and the previous resource allocation.
879 */
880 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
881 if (status != DDI_SUCCESS) {
882 status = IBT_INSUFF_RESOURCE;
883 goto spec_qpalloc_fail4;
884 }
885 qp = (hermon_qphdl_t)rsrc->hr_addr;
886
887 bzero(qp, sizeof (struct hermon_sw_qp_s));
888
889 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
890 qp->qp_alloc_flags = attr_p->qp_alloc_flags;
891
892 /*
893 * Actual QP number is a combination of the index of the QPC and
894 * the port number. This is because the special QP contexts must
895 * be allocated two-at-a-time.
896 */
897 qp->qp_qpnum = qpc->hr_indx + port;
898 qp->qp_ring = qp->qp_qpnum << 8;
899
900 uarpg = state->hs_kernel_uar_index; /* must be for spec qp */
901 /*
902 * Allocate the doorbell record. Hermon uses only one for the RQ so
903 * alloc a qp doorbell, using uarpg (above) as the uar index
904 */
905
906 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
907 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
908 if (status != DDI_SUCCESS) {
909 status = IBT_INSUFF_RESOURCE;
910 goto spec_qpalloc_fail5;
911 }
912 /*
913 * Calculate the appropriate size for the work queues.
914 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
915 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
916 * to round the requested size up to the next highest power-of-2
917 */
918 attr_p->qp_sizes.cs_sq =
919 max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE);
920 attr_p->qp_sizes.cs_rq =
921 max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE);
922 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
923 if (ISP2(attr_p->qp_sizes.cs_sq)) {
924 log_qp_sq_size = log_qp_sq_size - 1;
925 }
926 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
927 if (ISP2(attr_p->qp_sizes.cs_rq)) {
928 log_qp_rq_size = log_qp_rq_size - 1;
929 }
930
931 /*
932 * Next we verify that the rounded-up size is valid (i.e. consistent
933 * with the device limits and/or software-configured limits). If not,
934 * then obviously we have a bit of cleanup to do before returning.
935 */
936 if ((log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
937 (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz)) {
938 status = IBT_HCA_WR_EXCEEDED;
939 goto spec_qpalloc_fail5a;
940 }
941
942 /*
943 * Next we verify that the requested number of SGL is valid (i.e.
944 * consistent with the device limits and/or software-configured
945 * limits). If not, then obviously the same cleanup needs to be done.
946 */
947 max_sgl = state->hs_cfg_profile->cp_wqe_real_max_sgl;
948 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
949 (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
950 status = IBT_HCA_SGL_EXCEEDED;
951 goto spec_qpalloc_fail5a;
952 }
953
954 /*
955 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
956 * This will depend on the requested number of SGLs. Note: this
957 * has the side-effect of also calculating the real number of SGLs
958 * (for the calculated WQE size).
959 */
960 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
961 max_sgl, HERMON_QP_WQ_TYPE_RECVQ,
962 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
963 if (type == IBT_SMI_SQP) {
964 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
965 max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP0,
966 &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
967 } else {
968 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
969 max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP1,
970 &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
971 }
972
973 /*
974 * Allocate the memory for QP work queues. Since Hermon work queues
975 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
976 * the work queue memory is very important. We used to allocate
977 * work queues (the combined receive and send queues) so that they
978 * would be aligned on their combined size. That alignment guaranteed
979 * that they would never cross the 4GB boundary (Hermon work queues
980 * are on the order of MBs at maximum). Now we are able to relax
981 * this alignment constraint by ensuring that the IB address assigned
982 * to the queue memory (as a result of the hermon_mr_register() call)
983 * is offset from zero.
984 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
985 * guarantee the alignment, but when attempting to use IOMMU bypass
986 * mode we found that we were not allowed to specify any alignment
987 * that was more restrictive than the system page size.
988 * So we avoided this constraint by passing two alignment values,
989 * one for the memory allocation itself and the other for the DMA
990 * handle (for later bind). This used to cause more memory than
991 * necessary to be allocated (in order to guarantee the more
992 * restrictive alignment contraint). But by guaranteeing the
993 * zero-based IB virtual address for the queue, we are able to
994 * conserve this memory.
995 */
996 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
997 sq_depth = 1 << log_qp_sq_size;
998 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
999
1000 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1001 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
1002
1003 qp->qp_wqinfo.qa_size = sq_size + rq_size;
1004
1005 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1006 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
1007 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1008
1009 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1010 if (status != NULL) {
1011 status = IBT_INSUFF_RESOURCE;
1012 goto spec_qpalloc_fail5a;
1013 }
1014
1015 /*
1016 * Sort WQs in memory according to depth, stride (*q_wqe_size),
1017 * biggest first. If equal, the Send Queue still goes first
1018 */
1019 qp->qp_sq_baseaddr = 0;
1020 qp->qp_rq_baseaddr = 0;
1021 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1022 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1023 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1024 qp->qp_rq_baseaddr = sq_size;
1025 } else {
1026 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1027 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1028 qp->qp_sq_baseaddr = rq_size;
1029 }
1030
1031 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1032 if (qp->qp_sq_wqhdr == NULL) {
1033 status = IBT_INSUFF_RESOURCE;
1034 goto spec_qpalloc_fail6;
1035 }
1036 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(1 << log_qp_rq_size);
1037 if (qp->qp_rq_wqhdr == NULL) {
1038 status = IBT_INSUFF_RESOURCE;
1039 goto spec_qpalloc_fail6;
1040 }
1041 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1042 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1043 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1044 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1045 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1046 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1047
1048 /*
1049 * Register the memory for the special QP work queues. The memory for
1050 * the special QP must be registered in the Hermon cMPT tables. This
1051 * gives us the LKey to specify in the QP context later. Note: The
1052 * memory for Hermon work queues (both Send and Recv) must be contiguous
1053 * and registered as a single memory region. Also, in order to meet the
1054 * alignment restriction, we pass the "mro_bind_override_addr" flag in
1055 * the call to hermon_mr_register(). This guarantees that the resulting
1056 * IB vaddr will be zero-based (modulo the offset into the first page).
1057 * If we fail here, we have a bunch of resource and reference count
1058 * cleanup to do.
1059 */
1060 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1061 IBT_MR_NOSLEEP;
1062 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1063 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
1064 mr_attr.mr_as = NULL;
1065 mr_attr.mr_flags = flag;
1066
1067 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
1068 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1069 mr_op.mro_bind_override_addr = 1;
1070
1071 status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op,
1072 HERMON_QP_CMPT);
1073 if (status != DDI_SUCCESS) {
1074 status = IBT_INSUFF_RESOURCE;
1075 goto spec_qpalloc_fail6;
1076 }
1077
1078 /*
1079 * Calculate the offset between the kernel virtual address space
1080 * and the IB virtual address space. This will be used when
1081 * posting work requests to properly initialize each WQE.
1082 */
1083 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1084 (uint64_t)mr->mr_bindinfo.bi_addr;
1085
1086 /* set the prefetch - initially, not prefetching */
1087 qp->qp_no_prefetch = 1;
1088
1089 if (qp->qp_no_prefetch)
1090 qp->qp_sq_headroom = 2 * sq_wqe_size;
1091 else
1092 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1093 /*
1094 * hdrm wqes must be integral since both sq_wqe_size &
1095 * HERMON_QP_OH_SIZE are power of 2
1096 */
1097 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1098 /*
1099 * Fill in all the return arguments (if necessary). This includes
1100 * real work queue sizes, real SGLs, and QP number (which will be
1101 * either zero or one, depending on the special QP type)
1102 */
1103 if (queuesz_p != NULL) {
1104 queuesz_p->cs_sq =
1105 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1106 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1107 queuesz_p->cs_rq = (1 << log_qp_rq_size);
1108 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1109 }
1110
1111 /*
1112 * Fill in the rest of the Hermon Queue Pair handle. We can update
1113 * the following fields for use in further operations on the QP.
1114 */
1115 qp->qp_qpcrsrcp = qpc;
1116 qp->qp_rsrcp = rsrc;
1117 qp->qp_state = HERMON_QP_RESET;
1118 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1119 qp->qp_pdhdl = pd;
1120 qp->qp_mrhdl = mr;
1121 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1122 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1123 qp->qp_is_special = (type == IBT_SMI_SQP) ?
1124 HERMON_QP_SMI : HERMON_QP_GSI;
1125 qp->qp_uarpg = uarpg;
1126 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1127 qp->qp_sq_cqhdl = sq_cq;
1128 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
1129 qp->qp_sq_buf = sq_buf;
1130 qp->qp_sq_logqsz = log_qp_sq_size;
1131 qp->qp_desc_off = qp_desc_off;
1132 qp->qp_rq_cqhdl = rq_cq;
1133 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
1134 qp->qp_rq_buf = rq_buf;
1135 qp->qp_rq_logqsz = log_qp_rq_size;
1136 qp->qp_portnum = port;
1137 qp->qp_pkeyindx = 0;
1138 qp->qp_forward_sqd_event = 0;
1139 qp->qp_sqd_still_draining = 0;
1140 qp->qp_hdlrarg = (void *)ibt_qphdl;
1141 qp->qp_mcg_refcnt = 0;
1142 qp->qp_srqhdl = NULL;
1143
1144 /* All special QPs are UD QP service type */
1145 qp->qp_type = IBT_UD_RQP;
1146 qp->qp_serv_type = HERMON_QP_UD;
1147
1148 /*
1149 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1150 */
1151
1152 /*
1153 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1154 * set the quadword to all F's - high-order bit is owner (init to one)
1155 * and the rest for the headroom definition of prefetching
1156 *
1157 */
1158
1159 wqesz_shift = qp->qp_sq_log_wqesz;
1160 thewqesz = 1 << wqesz_shift;
1161 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1162 for (i = 0; i < sq_depth; i++) {
1163 /*
1164 * for each stride, go through and every 64 bytes write the
1165 * init value - having set the address once, just keep
1166 * incrementing it
1167 */
1168 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1169 *(uint32_t *)thewqe = 0xFFFFFFFF;
1170 }
1171 }
1172
1173
1174 /* Zero out the QP context */
1175 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1176
1177 /*
1178 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
1179 * "qphdl" and return success
1180 */
1181 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, qp);
1182
1183 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1184 DDI_INTR_PRI(state->hs_intrmsi_pri));
1185
1186 *qphdl = qp;
1187
1188 return (DDI_SUCCESS);
1189
1190 /*
1191 * The following is cleanup for all possible failure cases in this routine
1192 */
1193 spec_qpalloc_fail6:
1194 hermon_queue_free(&qp->qp_wqinfo);
1195 if (qp->qp_sq_wqhdr)
1196 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1197 if (qp->qp_rq_wqhdr)
1198 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1199 spec_qpalloc_fail5a:
1200 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1201 spec_qpalloc_fail5:
1202 hermon_rsrc_free(state, &rsrc);
1203 spec_qpalloc_fail4:
1204 if (hermon_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1205 HERMON_WARNING(state, "failed to free special QP rsrc");
1206 }
1207 spec_qpalloc_fail3:
1208 hermon_cq_refcnt_dec(rq_cq);
1209 spec_qpalloc_fail2:
1210 hermon_cq_refcnt_dec(sq_cq);
1211 spec_qpalloc_fail1:
1212 hermon_pd_refcnt_dec(pd);
1213 spec_qpalloc_fail:
1214 return (status);
1215 }
1216
1217
1218 /*
1219 * hermon_qp_alloc_range()
1220 * Context: Can be called only from user or kernel context.
1221 */
1222 int
hermon_qp_alloc_range(hermon_state_t * state,uint_t log2,hermon_qp_info_t * qpinfo,ibtl_qp_hdl_t * ibt_qphdl,ibc_cq_hdl_t * send_cq,ibc_cq_hdl_t * recv_cq,hermon_qphdl_t * qphdl,uint_t sleepflag)1223 hermon_qp_alloc_range(hermon_state_t *state, uint_t log2,
1224 hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibt_qphdl,
1225 ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq,
1226 hermon_qphdl_t *qphdl, uint_t sleepflag)
1227 {
1228 hermon_rsrc_t *qpc, *rsrc;
1229 hermon_rsrc_type_t rsrc_type;
1230 hermon_qphdl_t qp;
1231 hermon_qp_range_t *qp_range_p;
1232 ibt_qp_alloc_attr_t *attr_p;
1233 ibt_qp_type_t type;
1234 hermon_qp_wq_type_t swq_type;
1235 ibt_chan_sizes_t *queuesz_p;
1236 ibt_mr_attr_t mr_attr;
1237 hermon_mr_options_t mr_op;
1238 hermon_srqhdl_t srq;
1239 hermon_pdhdl_t pd;
1240 hermon_cqhdl_t sq_cq, rq_cq;
1241 hermon_mrhdl_t mr;
1242 uint64_t qp_desc_off;
1243 uint64_t *thewqe, thewqesz;
1244 uint32_t *sq_buf, *rq_buf;
1245 uint32_t log_qp_sq_size, log_qp_rq_size;
1246 uint32_t sq_size, rq_size;
1247 uint32_t sq_depth, rq_depth;
1248 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
1249 uint32_t max_sgl, max_recv_sgl, uarpg;
1250 uint_t qp_srq_en, i, j;
1251 int ii; /* loop counter for range */
1252 int status, flag;
1253 uint_t serv_type;
1254
1255 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
1256
1257 /*
1258 * Extract the necessary info from the hermon_qp_info_t structure
1259 */
1260 attr_p = qpinfo->qpi_attrp;
1261 type = qpinfo->qpi_type;
1262 queuesz_p = qpinfo->qpi_queueszp;
1263
1264 if (attr_p->qp_alloc_flags & IBT_QP_USES_RSS) {
1265 if (log2 > state->hs_ibtfinfo.hca_attr->hca_rss_max_log2_table)
1266 return (IBT_INSUFF_RESOURCE);
1267 rsrc_type = HERMON_QPC;
1268 serv_type = HERMON_QP_UD;
1269 } else if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1270 if (log2 > state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp)
1271 return (IBT_INSUFF_RESOURCE);
1272 switch (attr_p->qp_fc.fc_hca_port) {
1273 case 1:
1274 rsrc_type = HERMON_QPC_FEXCH_PORT1;
1275 break;
1276 case 2:
1277 rsrc_type = HERMON_QPC_FEXCH_PORT2;
1278 break;
1279 default:
1280 return (IBT_INVALID_PARAM);
1281 }
1282 serv_type = HERMON_QP_FEXCH;
1283 } else
1284 return (IBT_INVALID_PARAM);
1285
1286 /*
1287 * Determine whether QP is being allocated for userland access or
1288 * whether it is being allocated for kernel access. If the QP is
1289 * being allocated for userland access, fail (too complex for now).
1290 */
1291 if (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) {
1292 return (IBT_NOT_SUPPORTED);
1293 } else {
1294 uarpg = state->hs_kernel_uar_index;
1295 }
1296
1297 /*
1298 * Determine whether QP is being associated with an SRQ
1299 */
1300 qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
1301 if (qp_srq_en) {
1302 /*
1303 * Check for valid SRQ handle pointers
1304 */
1305 if (attr_p->qp_ibc_srq_hdl == NULL) {
1306 return (IBT_SRQ_HDL_INVALID);
1307 }
1308 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
1309 }
1310
1311 /*
1312 * Check for valid QP service type (only UD supported)
1313 */
1314 if (type != IBT_UD_RQP) {
1315 return (IBT_QP_SRV_TYPE_INVALID);
1316 }
1317
1318 /*
1319 * Check for valid PD handle pointer
1320 */
1321 if (attr_p->qp_pd_hdl == NULL) {
1322 return (IBT_PD_HDL_INVALID);
1323 }
1324 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
1325
1326 /*
1327 * If on an SRQ, check to make sure the PD is the same
1328 */
1329 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
1330 return (IBT_PD_HDL_INVALID);
1331 }
1332
1333 /* set loop variable here, for freeing resources on error */
1334 ii = 0;
1335
1336 /*
1337 * Allocate 2^log2 contiguous/aligned QP context entries. This will
1338 * be filled in with all the necessary parameters to define the
1339 * Queue Pairs. Unlike other Hermon hardware resources, ownership
1340 * is not immediately given to hardware in the final step here.
1341 * Instead, we must wait until the QP is later transitioned to the
1342 * "Init" state before passing the QP to hardware. If we fail here,
1343 * we must undo all the reference count (CQ and PD).
1344 */
1345 status = hermon_rsrc_alloc(state, rsrc_type, 1 << log2, sleepflag,
1346 &qpc);
1347 if (status != DDI_SUCCESS) {
1348 return (IBT_INSUFF_RESOURCE);
1349 }
1350
1351 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH)
1352 /*
1353 * Need to init the MKEYs for the FEXCH QPs.
1354 *
1355 * For FEXCH QP subranges, we return the QPN base as
1356 * "relative" to the full FEXCH QP range for the port.
1357 */
1358 *(qpinfo->qpi_qpn) = hermon_fcoib_fexch_relative_qpn(state,
1359 attr_p->qp_fc.fc_hca_port, qpc->hr_indx);
1360 else
1361 *(qpinfo->qpi_qpn) = (ib_qpn_t)qpc->hr_indx;
1362
1363 qp_range_p = kmem_alloc(sizeof (*qp_range_p),
1364 (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1365 if (qp_range_p == NULL) {
1366 status = IBT_INSUFF_RESOURCE;
1367 goto qpalloc_fail0;
1368 }
1369 mutex_init(&qp_range_p->hqpr_lock, NULL, MUTEX_DRIVER,
1370 DDI_INTR_PRI(state->hs_intrmsi_pri));
1371 mutex_enter(&qp_range_p->hqpr_lock);
1372 qp_range_p->hqpr_refcnt = 1 << log2;
1373 qp_range_p->hqpr_qpcrsrc = qpc;
1374 mutex_exit(&qp_range_p->hqpr_lock);
1375
1376 for_each_qp:
1377
1378 /* Increment the reference count on the protection domain (PD) */
1379 hermon_pd_refcnt_inc(pd);
1380
1381 rq_cq = (hermon_cqhdl_t)recv_cq[ii];
1382 sq_cq = (hermon_cqhdl_t)send_cq[ii];
1383 if (sq_cq == NULL) {
1384 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1385 /* if no send completions, just use rq_cq */
1386 sq_cq = rq_cq;
1387 } else {
1388 status = IBT_CQ_HDL_INVALID;
1389 goto qpalloc_fail1;
1390 }
1391 }
1392
1393 /*
1394 * Increment the reference count on the CQs. One or both of these
1395 * could return error if we determine that the given CQ is already
1396 * being used with a special (SMI/GSI) QP.
1397 */
1398 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
1399 if (status != DDI_SUCCESS) {
1400 status = IBT_CQ_HDL_INVALID;
1401 goto qpalloc_fail1;
1402 }
1403 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
1404 if (status != DDI_SUCCESS) {
1405 status = IBT_CQ_HDL_INVALID;
1406 goto qpalloc_fail2;
1407 }
1408
1409 /*
1410 * Allocate the software structure for tracking the queue pair
1411 * (i.e. the Hermon Queue Pair handle). If we fail here, we must
1412 * undo the reference counts and the previous resource allocation.
1413 */
1414 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
1415 if (status != DDI_SUCCESS) {
1416 status = IBT_INSUFF_RESOURCE;
1417 goto qpalloc_fail4;
1418 }
1419 qp = (hermon_qphdl_t)rsrc->hr_addr;
1420 bzero(qp, sizeof (struct hermon_sw_qp_s));
1421 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1422 qp->qp_alloc_flags = attr_p->qp_alloc_flags;
1423
1424 /*
1425 * Calculate the QP number from QPC index. This routine handles
1426 * all of the operations necessary to keep track of used, unused,
1427 * and released QP numbers.
1428 */
1429 qp->qp_qpnum = qpc->hr_indx + ii;
1430 qp->qp_ring = qp->qp_qpnum << 8;
1431 qp->qp_qpn_hdl = NULL;
1432
1433 /*
1434 * Allocate the doorbell record. Hermon just needs one for the RQ,
1435 * if the QP is not associated with an SRQ, and use uarpg (above) as
1436 * the uar index
1437 */
1438
1439 if (!qp_srq_en) {
1440 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
1441 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
1442 if (status != DDI_SUCCESS) {
1443 status = IBT_INSUFF_RESOURCE;
1444 goto qpalloc_fail6;
1445 }
1446 }
1447
1448 qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
1449
1450 /*
1451 * We verify that the requested number of SGL is valid (i.e.
1452 * consistent with the device limits and/or software-configured
1453 * limits). If not, then obviously the same cleanup needs to be done.
1454 */
1455 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
1456 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
1457 max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
1458 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
1459 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
1460 status = IBT_HCA_SGL_EXCEEDED;
1461 goto qpalloc_fail7;
1462 }
1463
1464 /*
1465 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
1466 * This will depend on the requested number of SGLs. Note: this
1467 * has the side-effect of also calculating the real number of SGLs
1468 * (for the calculated WQE size).
1469 *
1470 * For QP's on an SRQ, we set these to 0.
1471 */
1472 if (qp_srq_en) {
1473 qp->qp_rq_log_wqesz = 0;
1474 qp->qp_rq_sgl = 0;
1475 } else {
1476 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
1477 max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
1478 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
1479 }
1480 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
1481 max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
1482
1483 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1484
1485 /* NOTE: currently policy in driver, later maybe IBTF interface */
1486 qp->qp_no_prefetch = 0;
1487
1488 /*
1489 * for prefetching, we need to add the number of wqes in
1490 * the 2k area plus one to the number requested, but
1491 * ONLY for send queue. If no_prefetch == 1 (prefetch off)
1492 * it's exactly TWO wqes for the headroom
1493 */
1494 if (qp->qp_no_prefetch)
1495 qp->qp_sq_headroom = 2 * sq_wqe_size;
1496 else
1497 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1498 /*
1499 * hdrm wqes must be integral since both sq_wqe_size &
1500 * HERMON_QP_OH_SIZE are power of 2
1501 */
1502 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1503
1504
1505 /*
1506 * Calculate the appropriate size for the work queues.
1507 * For send queue, add in the headroom wqes to the calculation.
1508 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
1509 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
1510 * to round the requested size up to the next highest power-of-2
1511 */
1512 /* first, adjust to a minimum and tell the caller the change */
1513 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
1514 HERMON_QP_MIN_SIZE);
1515 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
1516 HERMON_QP_MIN_SIZE);
1517 /*
1518 * now, calculate the alloc size, taking into account
1519 * the headroom for the sq
1520 */
1521 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
1522 /* if the total is a power of two, reduce it */
1523 if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes)) {
1524 log_qp_sq_size = log_qp_sq_size - 1;
1525 }
1526
1527 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
1528 if (ISP2(attr_p->qp_sizes.cs_rq)) {
1529 log_qp_rq_size = log_qp_rq_size - 1;
1530 }
1531
1532 /*
1533 * Next we verify that the rounded-up size is valid (i.e. consistent
1534 * with the device limits and/or software-configured limits). If not,
1535 * then obviously we have a lot of cleanup to do before returning.
1536 *
1537 * NOTE: the first condition deals with the (test) case of cs_sq
1538 * being just less than 2^32. In this case, the headroom addition
1539 * to the requested cs_sq will pass the test when it should not.
1540 * This test no longer lets that case slip through the check.
1541 */
1542 if ((attr_p->qp_sizes.cs_sq >
1543 (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
1544 (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
1545 (!qp_srq_en && (log_qp_rq_size >
1546 state->hs_cfg_profile->cp_log_max_qp_sz))) {
1547 status = IBT_HCA_WR_EXCEEDED;
1548 goto qpalloc_fail7;
1549 }
1550
1551 /*
1552 * Allocate the memory for QP work queues. Since Hermon work queues
1553 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
1554 * the work queue memory is very important. We used to allocate
1555 * work queues (the combined receive and send queues) so that they
1556 * would be aligned on their combined size. That alignment guaranteed
1557 * that they would never cross the 4GB boundary (Hermon work queues
1558 * are on the order of MBs at maximum). Now we are able to relax
1559 * this alignment constraint by ensuring that the IB address assigned
1560 * to the queue memory (as a result of the hermon_mr_register() call)
1561 * is offset from zero.
1562 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
1563 * guarantee the alignment, but when attempting to use IOMMU bypass
1564 * mode we found that we were not allowed to specify any alignment
1565 * that was more restrictive than the system page size.
1566 * So we avoided this constraint by passing two alignment values,
1567 * one for the memory allocation itself and the other for the DMA
1568 * handle (for later bind). This used to cause more memory than
1569 * necessary to be allocated (in order to guarantee the more
1570 * restrictive alignment contraint). But by guaranteeing the
1571 * zero-based IB virtual address for the queue, we are able to
1572 * conserve this memory.
1573 */
1574 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1575 sq_depth = 1 << log_qp_sq_size;
1576 sq_size = sq_depth * sq_wqe_size;
1577
1578 /* QP on SRQ sets these to 0 */
1579 if (qp_srq_en) {
1580 rq_wqe_size = 0;
1581 rq_size = 0;
1582 } else {
1583 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1584 rq_depth = 1 << log_qp_rq_size;
1585 rq_size = rq_depth * rq_wqe_size;
1586 }
1587
1588 qp->qp_wqinfo.qa_size = sq_size + rq_size;
1589 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1590 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
1591 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1592 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1593 if (status != DDI_SUCCESS) {
1594 status = IBT_INSUFF_RESOURCE;
1595 goto qpalloc_fail7;
1596 }
1597
1598 /*
1599 * Sort WQs in memory according to stride (*q_wqe_size), largest first
1600 * If they are equal, still put the SQ first
1601 */
1602 qp->qp_sq_baseaddr = 0;
1603 qp->qp_rq_baseaddr = 0;
1604 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1605 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1606
1607 /* if this QP is on an SRQ, set the rq_buf to NULL */
1608 if (qp_srq_en) {
1609 rq_buf = NULL;
1610 } else {
1611 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1612 qp->qp_rq_baseaddr = sq_size;
1613 }
1614 } else {
1615 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1616 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1617 qp->qp_sq_baseaddr = rq_size;
1618 }
1619
1620 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1621 if (qp->qp_sq_wqhdr == NULL) {
1622 status = IBT_INSUFF_RESOURCE;
1623 goto qpalloc_fail8;
1624 }
1625 if (qp_srq_en) {
1626 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
1627 qp->qp_rq_wqavl.wqa_srq_en = 1;
1628 qp->qp_rq_wqavl.wqa_srq = srq;
1629 } else {
1630 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
1631 if (qp->qp_rq_wqhdr == NULL) {
1632 status = IBT_INSUFF_RESOURCE;
1633 goto qpalloc_fail8;
1634 }
1635 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1636 }
1637 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1638 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1639 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1640 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1641 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1642
1643 /*
1644 * Register the memory for the QP work queues. The memory for the
1645 * QP must be registered in the Hermon cMPT tables. This gives us the
1646 * LKey to specify in the QP context later. Note: The memory for
1647 * Hermon work queues (both Send and Recv) must be contiguous and
1648 * registered as a single memory region. Note: If the QP memory is
1649 * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
1650 * meet the alignment restriction, we pass the "mro_bind_override_addr"
1651 * flag in the call to hermon_mr_register(). This guarantees that the
1652 * resulting IB vaddr will be zero-based (modulo the offset into the
1653 * first page). If we fail here, we still have the bunch of resource
1654 * and reference count cleanup to do.
1655 */
1656 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1657 IBT_MR_NOSLEEP;
1658 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1659 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
1660 mr_attr.mr_as = NULL;
1661 mr_attr.mr_flags = flag;
1662 /* HERMON_QUEUE_LOCATION_NORMAL */
1663 mr_op.mro_bind_type =
1664 state->hs_cfg_profile->cp_iommu_bypass;
1665 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1666 mr_op.mro_bind_override_addr = 1;
1667 status = hermon_mr_register(state, pd, &mr_attr, &mr,
1668 &mr_op, HERMON_QP_CMPT);
1669 if (status != DDI_SUCCESS) {
1670 status = IBT_INSUFF_RESOURCE;
1671 goto qpalloc_fail9;
1672 }
1673
1674 /*
1675 * Calculate the offset between the kernel virtual address space
1676 * and the IB virtual address space. This will be used when
1677 * posting work requests to properly initialize each WQE.
1678 */
1679 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1680 (uint64_t)mr->mr_bindinfo.bi_addr;
1681
1682 /*
1683 * Fill in all the return arguments (if necessary). This includes
1684 * real work queue sizes (in wqes), real SGLs, and QP number
1685 */
1686 if (queuesz_p != NULL) {
1687 queuesz_p->cs_sq =
1688 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1689 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1690
1691 /* if this QP is on an SRQ, set these to 0 */
1692 if (qp_srq_en) {
1693 queuesz_p->cs_rq = 0;
1694 queuesz_p->cs_rq_sgl = 0;
1695 } else {
1696 queuesz_p->cs_rq = (1 << log_qp_rq_size);
1697 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1698 }
1699 }
1700
1701 /*
1702 * Fill in the rest of the Hermon Queue Pair handle.
1703 */
1704 qp->qp_qpcrsrcp = NULL;
1705 qp->qp_rsrcp = rsrc;
1706 qp->qp_state = HERMON_QP_RESET;
1707 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1708 qp->qp_pdhdl = pd;
1709 qp->qp_mrhdl = mr;
1710 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1711 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1712 qp->qp_is_special = 0;
1713 qp->qp_uarpg = uarpg;
1714 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1715 qp->qp_sq_cqhdl = sq_cq;
1716 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
1717 qp->qp_sq_logqsz = log_qp_sq_size;
1718 qp->qp_sq_buf = sq_buf;
1719 qp->qp_desc_off = qp_desc_off;
1720 qp->qp_rq_cqhdl = rq_cq;
1721 qp->qp_rq_buf = rq_buf;
1722 qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
1723 0;
1724
1725 /* if this QP is on an SRQ, set rq_bufsz to 0 */
1726 if (qp_srq_en) {
1727 qp->qp_rq_bufsz = 0;
1728 qp->qp_rq_logqsz = 0;
1729 } else {
1730 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
1731 qp->qp_rq_logqsz = log_qp_rq_size;
1732 }
1733
1734 qp->qp_forward_sqd_event = 0;
1735 qp->qp_sqd_still_draining = 0;
1736 qp->qp_hdlrarg = (void *)ibt_qphdl[ii];
1737 qp->qp_mcg_refcnt = 0;
1738
1739 /*
1740 * If this QP is to be associated with an SRQ, set the SRQ handle
1741 */
1742 if (qp_srq_en) {
1743 qp->qp_srqhdl = srq;
1744 hermon_srq_refcnt_inc(qp->qp_srqhdl);
1745 } else {
1746 qp->qp_srqhdl = NULL;
1747 }
1748
1749 qp->qp_type = IBT_UD_RQP;
1750 qp->qp_serv_type = serv_type;
1751
1752 /*
1753 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1754 */
1755
1756 /*
1757 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1758 * set the quadword to all F's - high-order bit is owner (init to one)
1759 * and the rest for the headroom definition of prefetching.
1760 */
1761 if ((attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) == 0) {
1762 wqesz_shift = qp->qp_sq_log_wqesz;
1763 thewqesz = 1 << wqesz_shift;
1764 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1765 for (i = 0; i < sq_depth; i++) {
1766 /*
1767 * for each stride, go through and every 64 bytes
1768 * write the init value - having set the address
1769 * once, just keep incrementing it
1770 */
1771 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1772 *(uint32_t *)thewqe = 0xFFFFFFFF;
1773 }
1774 }
1775 }
1776
1777 /* Zero out the QP context */
1778 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1779
1780 /*
1781 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
1782 * "qphdl" and return success
1783 */
1784 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + ii, qp);
1785
1786 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1787 DDI_INTR_PRI(state->hs_intrmsi_pri));
1788
1789 qp->qp_rangep = qp_range_p;
1790
1791 qphdl[ii] = qp;
1792
1793 if (++ii < (1 << log2))
1794 goto for_each_qp;
1795
1796 return (DDI_SUCCESS);
1797
1798 /*
1799 * The following is cleanup for all possible failure cases in this routine
1800 */
1801 qpalloc_fail9:
1802 hermon_queue_free(&qp->qp_wqinfo);
1803 qpalloc_fail8:
1804 if (qp->qp_sq_wqhdr)
1805 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1806 if (qp->qp_rq_wqhdr)
1807 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1808 qpalloc_fail7:
1809 if (!qp_srq_en) {
1810 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1811 }
1812
1813 qpalloc_fail6:
1814 hermon_rsrc_free(state, &rsrc);
1815 qpalloc_fail4:
1816 hermon_cq_refcnt_dec(rq_cq);
1817 qpalloc_fail2:
1818 hermon_cq_refcnt_dec(sq_cq);
1819 qpalloc_fail1:
1820 hermon_pd_refcnt_dec(pd);
1821 qpalloc_fail0:
1822 if (ii == 0) {
1823 if (qp_range_p)
1824 kmem_free(qp_range_p, sizeof (*qp_range_p));
1825 hermon_rsrc_free(state, &qpc);
1826 } else {
1827 /* qp_range_p and qpc rsrc will be freed in hermon_qp_free */
1828
1829 mutex_enter(&qp->qp_rangep->hqpr_lock);
1830 qp_range_p->hqpr_refcnt = ii;
1831 mutex_exit(&qp->qp_rangep->hqpr_lock);
1832 while (--ii >= 0) {
1833 ibc_qpn_hdl_t qpn_hdl;
1834 int free_status;
1835
1836 free_status = hermon_qp_free(state, &qphdl[ii],
1837 IBC_FREE_QP_AND_QPN, &qpn_hdl, sleepflag);
1838 if (free_status != DDI_SUCCESS)
1839 cmn_err(CE_CONT, "!qp_range: status 0x%x: "
1840 "error status %x during free",
1841 status, free_status);
1842 }
1843 }
1844
1845 return (status);
1846 }
1847
1848
1849 /*
1850 * hermon_qp_free()
1851 * This function frees up the QP resources. Depending on the value
1852 * of the "free_qp_flags", the QP number may not be released until
1853 * a subsequent call to hermon_qp_release_qpn().
1854 *
1855 * Context: Can be called only from user or kernel context.
1856 */
1857 /* ARGSUSED */
1858 int
hermon_qp_free(hermon_state_t * state,hermon_qphdl_t * qphdl,ibc_free_qp_flags_t free_qp_flags,ibc_qpn_hdl_t * qpnh,uint_t sleepflag)1859 hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl,
1860 ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1861 uint_t sleepflag)
1862 {
1863 hermon_rsrc_t *qpc, *rsrc;
1864 hermon_umap_db_entry_t *umapdb;
1865 hermon_qpn_entry_t *entry;
1866 hermon_pdhdl_t pd;
1867 hermon_mrhdl_t mr;
1868 hermon_cqhdl_t sq_cq, rq_cq;
1869 hermon_srqhdl_t srq;
1870 hermon_qphdl_t qp;
1871 uint64_t value;
1872 uint_t type, port;
1873 uint_t maxprot;
1874 uint_t qp_srq_en;
1875 int status;
1876
1877 /*
1878 * Pull all the necessary information from the Hermon Queue Pair
1879 * handle. This is necessary here because the resource for the
1880 * QP handle is going to be freed up as part of this operation.
1881 */
1882 qp = *qphdl;
1883 mutex_enter(&qp->qp_lock);
1884 qpc = qp->qp_qpcrsrcp; /* NULL if part of a "range" */
1885 rsrc = qp->qp_rsrcp;
1886 pd = qp->qp_pdhdl;
1887 srq = qp->qp_srqhdl;
1888 mr = qp->qp_mrhdl;
1889 rq_cq = qp->qp_rq_cqhdl;
1890 sq_cq = qp->qp_sq_cqhdl;
1891 port = qp->qp_portnum;
1892 qp_srq_en = qp->qp_alloc_flags & IBT_QP_USES_SRQ;
1893
1894 /*
1895 * If the QP is part of an MCG, then we fail the qp_free
1896 */
1897 if (qp->qp_mcg_refcnt != 0) {
1898 mutex_exit(&qp->qp_lock);
1899 status = ibc_get_ci_failure(0);
1900 goto qpfree_fail;
1901 }
1902
1903 /*
1904 * If the QP is not already in "Reset" state, then transition to
1905 * "Reset". This is necessary because software does not reclaim
1906 * ownership of the QP context until the QP is in the "Reset" state.
1907 * If the ownership transfer fails for any reason, then it is an
1908 * indication that something (either in HW or SW) has gone seriously
1909 * wrong. So we print a warning message and return.
1910 */
1911 if (qp->qp_state != HERMON_QP_RESET) {
1912 if (hermon_qp_to_reset(state, qp) != DDI_SUCCESS) {
1913 mutex_exit(&qp->qp_lock);
1914 HERMON_WARNING(state, "failed to reset QP context");
1915 status = ibc_get_ci_failure(0);
1916 goto qpfree_fail;
1917 }
1918 qp->qp_state = HERMON_QP_RESET;
1919 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1920
1921 /*
1922 * Do any additional handling necessary for the transition
1923 * to the "Reset" state (e.g. update the WRID lists)
1924 */
1925 if (hermon_wrid_to_reset_handling(state, qp) != DDI_SUCCESS) {
1926 mutex_exit(&qp->qp_lock);
1927 HERMON_WARNING(state, "failed to reset QP WRID list");
1928 status = ibc_get_ci_failure(0);
1929 goto qpfree_fail;
1930 }
1931 }
1932
1933 /*
1934 * If this was a user-mappable QP, then we need to remove its entry
1935 * from the "userland resources database". If it is also currently
1936 * mmap()'d out to a user process, then we need to call
1937 * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1938 * We also need to invalidate the QP tracking information for the
1939 * user mapping.
1940 */
1941 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1942 status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum,
1943 MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
1944 &umapdb);
1945 if (status != DDI_SUCCESS) {
1946 mutex_exit(&qp->qp_lock);
1947 HERMON_WARNING(state, "failed to find in database");
1948 return (ibc_get_ci_failure(0));
1949 }
1950 hermon_umap_db_free(umapdb);
1951 if (qp->qp_umap_dhp != NULL) {
1952 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1953 status = devmap_devmem_remap(qp->qp_umap_dhp,
1954 state->hs_dip, 0, 0, qp->qp_wqinfo.qa_size,
1955 maxprot, DEVMAP_MAPPING_INVALID, NULL);
1956 if (status != DDI_SUCCESS) {
1957 mutex_exit(&qp->qp_lock);
1958 HERMON_WARNING(state, "failed in QP memory "
1959 "devmap_devmem_remap()");
1960 return (ibc_get_ci_failure(0));
1961 }
1962 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1963 }
1964 }
1965
1966
1967 /*
1968 * Put NULL into the Hermon QPNum-to-QPHdl list. This will allow any
1969 * in-progress events to detect that the QP corresponding to this
1970 * number has been freed. Note: it does depend in whether we are
1971 * freeing a special QP or not.
1972 */
1973 if (qpc == NULL) {
1974 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1975 qp->qp_qpnum, NULL);
1976 } else if (qp->qp_is_special) {
1977 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1978 qpc->hr_indx + port, NULL);
1979 } else {
1980 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1981 qpc->hr_indx, NULL);
1982 }
1983
1984 /*
1985 * Drop the QP lock
1986 * At this point the lock is no longer necessary. We cannot
1987 * protect from multiple simultaneous calls to free the same QP.
1988 * In addition, since the QP lock is contained in the QP "software
1989 * handle" resource, which we will free (see below), it is
1990 * important that we have no further references to that memory.
1991 */
1992 mutex_exit(&qp->qp_lock);
1993 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1994
1995 /*
1996 * Free the QP resources
1997 * Start by deregistering and freeing the memory for work queues.
1998 * Next free any previously allocated context information
1999 * (depending on QP type)
2000 * Finally, decrement the necessary reference counts.
2001 * If this fails for any reason, then it is an indication that
2002 * something (either in HW or SW) has gone seriously wrong. So we
2003 * print a warning message and return.
2004 */
2005 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
2006 sleepflag);
2007 if (status != DDI_SUCCESS) {
2008 HERMON_WARNING(state, "failed to deregister QP memory");
2009 status = ibc_get_ci_failure(0);
2010 goto qpfree_fail;
2011 }
2012
2013 /* Free the memory for the QP */
2014 hermon_queue_free(&qp->qp_wqinfo);
2015
2016 if (qp->qp_sq_wqhdr)
2017 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
2018 if (qp->qp_rq_wqhdr)
2019 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
2020
2021 /* Free the dbr */
2022 if (!qp_srq_en) {
2023 hermon_dbr_free(state, qp->qp_uarpg, qp->qp_rq_vdbr);
2024 }
2025
2026 /*
2027 * Free up the remainder of the QP resources. Note: we have a few
2028 * different resources to free up depending on whether the QP is a
2029 * special QP or not. As described above, if any of these fail for
2030 * any reason it is an indication that something (either in HW or SW)
2031 * has gone seriously wrong. So we print a warning message and
2032 * return.
2033 */
2034 if (qp->qp_is_special) {
2035 type = (qp->qp_is_special == HERMON_QP_SMI) ?
2036 IBT_SMI_SQP : IBT_GSI_SQP;
2037
2038 /* Free up resources for the special QP */
2039 status = hermon_special_qp_rsrc_free(state, type, port);
2040 if (status != DDI_SUCCESS) {
2041 HERMON_WARNING(state, "failed to free special QP rsrc");
2042 status = ibc_get_ci_failure(0);
2043 goto qpfree_fail;
2044 }
2045
2046 } else if (qp->qp_rangep) {
2047 int refcnt;
2048 mutex_enter(&qp->qp_rangep->hqpr_lock);
2049 refcnt = --qp->qp_rangep->hqpr_refcnt;
2050 mutex_exit(&qp->qp_rangep->hqpr_lock);
2051 if (refcnt == 0) {
2052 mutex_destroy(&qp->qp_rangep->hqpr_lock);
2053 hermon_rsrc_free(state, &qp->qp_rangep->hqpr_qpcrsrc);
2054 kmem_free(qp->qp_rangep, sizeof (*qp->qp_rangep));
2055 }
2056 qp->qp_rangep = NULL;
2057 } else if (qp->qp_qpn_hdl == NULL) {
2058 hermon_rsrc_free(state, &qpc);
2059 } else {
2060 /*
2061 * Check the flags and determine whether to release the
2062 * QPN or not, based on their value.
2063 */
2064 if (free_qp_flags == IBC_FREE_QP_ONLY) {
2065 entry = qp->qp_qpn_hdl;
2066 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2067 HERMON_QPN_FREE_ONLY);
2068 *qpnh = (ibc_qpn_hdl_t)entry;
2069 } else {
2070 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2071 HERMON_QPN_RELEASE);
2072 }
2073 }
2074
2075 mutex_destroy(&qp->qp_sq_lock);
2076
2077 /* Free the Hermon Queue Pair handle */
2078 hermon_rsrc_free(state, &rsrc);
2079
2080 /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
2081 hermon_cq_refcnt_dec(rq_cq);
2082 hermon_cq_refcnt_dec(sq_cq);
2083 hermon_pd_refcnt_dec(pd);
2084 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2085 hermon_srq_refcnt_dec(srq);
2086 }
2087
2088 /* Set the qphdl pointer to NULL and return success */
2089 *qphdl = NULL;
2090
2091 return (DDI_SUCCESS);
2092
2093 qpfree_fail:
2094 return (status);
2095 }
2096
2097
2098 /*
2099 * hermon_qp_query()
2100 * Context: Can be called from interrupt or base context.
2101 */
2102 int
hermon_qp_query(hermon_state_t * state,hermon_qphdl_t qp,ibt_qp_query_attr_t * attr_p)2103 hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qp,
2104 ibt_qp_query_attr_t *attr_p)
2105 {
2106 ibt_cep_state_t qp_state;
2107 ibt_qp_ud_attr_t *ud;
2108 ibt_qp_rc_attr_t *rc;
2109 ibt_qp_uc_attr_t *uc;
2110 ibt_cep_flags_t enable_flags;
2111 hermon_hw_addr_path_t *qpc_path, *qpc_alt_path;
2112 ibt_cep_path_t *path_ptr, *alt_path_ptr;
2113 hermon_hw_qpc_t *qpc;
2114 int status;
2115 uint_t tmp_sched_q, tmp_alt_sched_q;
2116
2117 mutex_enter(&qp->qp_lock);
2118
2119 /*
2120 * Grab the temporary QPC entry from QP software state
2121 */
2122 qpc = &qp->qpc;
2123
2124 /* Convert the current Hermon QP state to IBTF QP state */
2125 switch (qp->qp_state) {
2126 case HERMON_QP_RESET:
2127 qp_state = IBT_STATE_RESET; /* "Reset" */
2128 break;
2129 case HERMON_QP_INIT:
2130 qp_state = IBT_STATE_INIT; /* Initialized */
2131 break;
2132 case HERMON_QP_RTR:
2133 qp_state = IBT_STATE_RTR; /* Ready to Receive */
2134 break;
2135 case HERMON_QP_RTS:
2136 qp_state = IBT_STATE_RTS; /* Ready to Send */
2137 break;
2138 case HERMON_QP_SQERR:
2139 qp_state = IBT_STATE_SQE; /* Send Queue Error */
2140 break;
2141 case HERMON_QP_SQD:
2142 if (qp->qp_sqd_still_draining) {
2143 qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */
2144 } else {
2145 qp_state = IBT_STATE_SQD; /* SQ Drained */
2146 }
2147 break;
2148 case HERMON_QP_ERR:
2149 qp_state = IBT_STATE_ERROR; /* Error */
2150 break;
2151 default:
2152 mutex_exit(&qp->qp_lock);
2153 return (ibc_get_ci_failure(0));
2154 }
2155 attr_p->qp_info.qp_state = qp_state;
2156
2157 /* SRQ Hook. */
2158 attr_p->qp_srq = NULL;
2159
2160 /*
2161 * The following QP information is always returned, regardless of
2162 * the current QP state. Note: Some special handling is necessary
2163 * for calculating the QP number on special QP (QP0 and QP1).
2164 */
2165 attr_p->qp_sq_cq =
2166 (qp->qp_sq_cqhdl == NULL) ? NULL : qp->qp_sq_cqhdl->cq_hdlrarg;
2167 attr_p->qp_rq_cq =
2168 (qp->qp_rq_cqhdl == NULL) ? NULL : qp->qp_rq_cqhdl->cq_hdlrarg;
2169 if (qp->qp_is_special) {
2170 attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1;
2171 } else {
2172 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
2173 }
2174 attr_p->qp_sq_sgl = qp->qp_sq_sgl;
2175 attr_p->qp_rq_sgl = qp->qp_rq_sgl;
2176 attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz - qp->qp_sq_hdrmwqes;
2177 attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
2178
2179 /*
2180 * If QP is currently in the "Reset" state, then only the above are
2181 * returned
2182 */
2183 if (qp_state == IBT_STATE_RESET) {
2184 mutex_exit(&qp->qp_lock);
2185 return (DDI_SUCCESS);
2186 }
2187
2188 /*
2189 * Post QUERY_QP command to firmware
2190 *
2191 * We do a HERMON_NOSLEEP here because we are holding the "qp_lock".
2192 * Since we may be in the interrupt context (or subsequently raised
2193 * to interrupt level by priority inversion), we do not want to block
2194 * in this routine waiting for success.
2195 */
2196 tmp_sched_q = qpc->pri_addr_path.sched_q;
2197 tmp_alt_sched_q = qpc->alt_addr_path.sched_q;
2198 status = hermon_cmn_query_cmd_post(state, QUERY_QP, 0, qp->qp_qpnum,
2199 qpc, sizeof (hermon_hw_qpc_t), HERMON_CMD_NOSLEEP_SPIN);
2200 if (status != HERMON_CMD_SUCCESS) {
2201 mutex_exit(&qp->qp_lock);
2202 cmn_err(CE_WARN, "hermon%d: hermon_qp_query: QUERY_QP "
2203 "command failed: %08x\n", state->hs_instance, status);
2204 if (status == HERMON_CMD_INVALID_STATUS) {
2205 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2206 }
2207 return (ibc_get_ci_failure(0));
2208 }
2209 qpc->pri_addr_path.sched_q = tmp_sched_q;
2210 qpc->alt_addr_path.sched_q = tmp_alt_sched_q;
2211
2212 /*
2213 * Fill in the additional QP info based on the QP's transport type.
2214 */
2215 if (qp->qp_type == IBT_UD_RQP) {
2216
2217 /* Fill in the UD-specific info */
2218 ud = &attr_p->qp_info.qp_transport.ud;
2219 ud->ud_qkey = (ib_qkey_t)qpc->qkey;
2220 ud->ud_sq_psn = qpc->next_snd_psn;
2221 ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx;
2222 /* port+1 for port 1/2 */
2223 ud->ud_port =
2224 (uint8_t)(((qpc->pri_addr_path.sched_q >> 6) & 0x01) + 1);
2225
2226 attr_p->qp_info.qp_trans = IBT_UD_SRV;
2227
2228 if (qp->qp_serv_type == HERMON_QP_FEXCH) {
2229 ibt_pmr_desc_t *pmr;
2230 uint64_t heart_beat;
2231
2232 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pmr))
2233 pmr = &attr_p->qp_query_fexch.fq_uni_mem_desc;
2234 pmr->pmd_iova = 0;
2235 pmr->pmd_lkey = pmr->pmd_rkey =
2236 hermon_fcoib_qpn_to_mkey(state, qp->qp_qpnum);
2237 pmr->pmd_phys_buf_list_sz =
2238 state->hs_fcoib.hfc_mtts_per_mpt;
2239 pmr->pmd_sync_required = 0;
2240
2241 pmr = &attr_p->qp_query_fexch.fq_bi_mem_desc;
2242 pmr->pmd_iova = 0;
2243 pmr->pmd_lkey = 0;
2244 pmr->pmd_rkey = 0;
2245 pmr->pmd_phys_buf_list_sz = 0;
2246 pmr->pmd_sync_required = 0;
2247
2248 attr_p->qp_query_fexch.fq_flags =
2249 ((hermon_get_heart_beat_rq_cmd_post(state,
2250 qp->qp_qpnum, &heart_beat) == HERMON_CMD_SUCCESS) &&
2251 (heart_beat == 0)) ? IBT_FEXCH_HEART_BEAT_OK :
2252 IBT_FEXCH_NO_FLAGS;
2253
2254 ud->ud_fc = qp->qp_fc_attr;
2255 } else if (qp->qp_serv_type == HERMON_QP_FCMND ||
2256 qp->qp_serv_type == HERMON_QP_RFCI) {
2257 ud->ud_fc = qp->qp_fc_attr;
2258 }
2259
2260 } else if (qp->qp_serv_type == HERMON_QP_RC) {
2261
2262 /* Fill in the RC-specific info */
2263 rc = &attr_p->qp_info.qp_transport.rc;
2264 rc->rc_sq_psn = qpc->next_snd_psn;
2265 rc->rc_rq_psn = qpc->next_rcv_psn;
2266 rc->rc_dst_qpn = qpc->rem_qpn;
2267
2268 /* Grab the path migration state information */
2269 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2270 rc->rc_mig_state = IBT_STATE_MIGRATED;
2271 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2272 rc->rc_mig_state = IBT_STATE_REARMED;
2273 } else {
2274 rc->rc_mig_state = IBT_STATE_ARMED;
2275 }
2276 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
2277 rc->rc_rdma_ra_in = (1 << qpc->rra_max);
2278 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
2279 rc->rc_path_mtu = qpc->mtu;
2280 rc->rc_retry_cnt = qpc->retry_cnt;
2281
2282 /* Get the common primary address path fields */
2283 qpc_path = &qpc->pri_addr_path;
2284 path_ptr = &rc->rc_path;
2285 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2286 HERMON_ADDRPATH_QP);
2287
2288 /* Fill in the additional primary address path fields */
2289 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
2290 path_ptr->cep_hca_port_num =
2291 path_ptr->cep_adds_vect.av_port_num =
2292 (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2293 path_ptr->cep_timeout = qpc_path->ack_timeout;
2294
2295 /* Get the common alternate address path fields */
2296 qpc_alt_path = &qpc->alt_addr_path;
2297 alt_path_ptr = &rc->rc_alt_path;
2298 hermon_get_addr_path(state, qpc_alt_path,
2299 &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2300
2301 /* Fill in the additional alternate address path fields */
2302 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
2303 alt_path_ptr->cep_hca_port_num =
2304 alt_path_ptr->cep_adds_vect.av_port_num =
2305 (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2306 alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout;
2307
2308 /* Get the RNR retry time from primary path */
2309 rc->rc_rnr_retry_cnt = qpc->rnr_retry;
2310
2311 /* Set the enable flags based on RDMA/Atomic enable bits */
2312 enable_flags = IBT_CEP_NO_FLAGS;
2313 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
2314 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2315 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
2316 attr_p->qp_info.qp_flags = enable_flags;
2317
2318 attr_p->qp_info.qp_trans = IBT_RC_SRV;
2319
2320 } else if (qp->qp_serv_type == HERMON_QP_UC) {
2321
2322 /* Fill in the UC-specific info */
2323 uc = &attr_p->qp_info.qp_transport.uc;
2324 uc->uc_sq_psn = qpc->next_snd_psn;
2325 uc->uc_rq_psn = qpc->next_rcv_psn;
2326 uc->uc_dst_qpn = qpc->rem_qpn;
2327
2328 /* Grab the path migration state information */
2329 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2330 uc->uc_mig_state = IBT_STATE_MIGRATED;
2331 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2332 uc->uc_mig_state = IBT_STATE_REARMED;
2333 } else {
2334 uc->uc_mig_state = IBT_STATE_ARMED;
2335 }
2336 uc->uc_path_mtu = qpc->mtu;
2337
2338 /* Get the common primary address path fields */
2339 qpc_path = &qpc->pri_addr_path;
2340 path_ptr = &uc->uc_path;
2341 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2342 HERMON_ADDRPATH_QP);
2343
2344 /* Fill in the additional primary address path fields */
2345 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
2346 path_ptr->cep_hca_port_num =
2347 path_ptr->cep_adds_vect.av_port_num =
2348 (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2349
2350 /* Get the common alternate address path fields */
2351 qpc_alt_path = &qpc->alt_addr_path;
2352 alt_path_ptr = &uc->uc_alt_path;
2353 hermon_get_addr_path(state, qpc_alt_path,
2354 &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2355
2356 /* Fill in the additional alternate address path fields */
2357 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
2358 alt_path_ptr->cep_hca_port_num =
2359 alt_path_ptr->cep_adds_vect.av_port_num =
2360 (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2361
2362 /*
2363 * Set the enable flags based on RDMA enable bits (by
2364 * definition UC doesn't support Atomic or RDMA Read)
2365 */
2366 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2367 attr_p->qp_info.qp_flags = enable_flags;
2368
2369 attr_p->qp_info.qp_trans = IBT_UC_SRV;
2370
2371 } else {
2372 HERMON_WARNING(state, "unexpected QP transport type");
2373 mutex_exit(&qp->qp_lock);
2374 return (ibc_get_ci_failure(0));
2375 }
2376
2377 /*
2378 * Under certain circumstances it is possible for the Hermon hardware
2379 * to transition to one of the error states without software directly
2380 * knowing about it. The QueryQP() call is the one place where we
2381 * have an opportunity to sample and update our view of the QP state.
2382 */
2383 if (qpc->state == HERMON_QP_SQERR) {
2384 attr_p->qp_info.qp_state = IBT_STATE_SQE;
2385 qp->qp_state = HERMON_QP_SQERR;
2386 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQERR);
2387 }
2388 if (qpc->state == HERMON_QP_ERR) {
2389 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
2390 qp->qp_state = HERMON_QP_ERR;
2391 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR);
2392 }
2393 mutex_exit(&qp->qp_lock);
2394
2395 return (DDI_SUCCESS);
2396 }
2397
2398
2399 /*
2400 * hermon_qp_create_qpn()
2401 * Context: Can be called from interrupt or base context.
2402 */
2403 static int
hermon_qp_create_qpn(hermon_state_t * state,hermon_qphdl_t qp,hermon_rsrc_t * qpc)2404 hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
2405 hermon_rsrc_t *qpc)
2406 {
2407 hermon_qpn_entry_t query;
2408 hermon_qpn_entry_t *entry;
2409 avl_index_t where;
2410
2411 /*
2412 * Build a query (for the AVL tree lookup) and attempt to find
2413 * a previously added entry that has a matching QPC index. If
2414 * no matching entry is found, then allocate, initialize, and
2415 * add an entry to the AVL tree.
2416 * If a matching entry is found, then increment its QPN counter
2417 * and reference counter.
2418 */
2419 query.qpn_indx = qpc->hr_indx;
2420 mutex_enter(&state->hs_qpn_avl_lock);
2421 entry = (hermon_qpn_entry_t *)avl_find(&state->hs_qpn_avl,
2422 &query, &where);
2423 if (entry == NULL) {
2424 /*
2425 * Allocate and initialize a QPN entry, then insert
2426 * it into the AVL tree.
2427 */
2428 entry = (hermon_qpn_entry_t *)kmem_zalloc(
2429 sizeof (hermon_qpn_entry_t), KM_NOSLEEP);
2430 if (entry == NULL) {
2431 mutex_exit(&state->hs_qpn_avl_lock);
2432 return (DDI_FAILURE);
2433 }
2434 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
2435
2436 entry->qpn_indx = qpc->hr_indx;
2437 entry->qpn_refcnt = 0;
2438 entry->qpn_counter = 0;
2439
2440 avl_insert(&state->hs_qpn_avl, entry, where);
2441 }
2442
2443 /*
2444 * Make the AVL tree entry point to the QP context resource that
2445 * it will be responsible for tracking
2446 */
2447 entry->qpn_qpc = qpc;
2448
2449 /*
2450 * Setup the QP handle to point to the AVL tree entry. Then
2451 * generate the new QP number from the entry's QPN counter value
2452 * and the hardware's QP context table index.
2453 */
2454 qp->qp_qpn_hdl = entry;
2455 qp->qp_qpnum = ((entry->qpn_counter <<
2456 state->hs_cfg_profile->cp_log_num_qp) | qpc->hr_indx) &
2457 HERMON_QP_MAXNUMBER_MSK;
2458 qp->qp_ring = qp->qp_qpnum << 8;
2459
2460 /*
2461 * Increment the reference counter and QPN counter. The QPN
2462 * counter always indicates the next available number for use.
2463 */
2464 entry->qpn_counter++;
2465 entry->qpn_refcnt++;
2466
2467 mutex_exit(&state->hs_qpn_avl_lock);
2468
2469 return (DDI_SUCCESS);
2470 }
2471
2472
2473 /*
2474 * hermon_qp_release_qpn()
2475 * Context: Can be called only from user or kernel context.
2476 */
2477 void
hermon_qp_release_qpn(hermon_state_t * state,hermon_qpn_entry_t * entry,int flags)2478 hermon_qp_release_qpn(hermon_state_t *state, hermon_qpn_entry_t *entry,
2479 int flags)
2480 {
2481 ASSERT(entry != NULL);
2482
2483 mutex_enter(&state->hs_qpn_avl_lock);
2484
2485 /*
2486 * If we are releasing the QP number here, then we decrement the
2487 * reference count and check for zero references. If there are
2488 * zero references, then we free the QPC context (if it hadn't
2489 * already been freed during a HERMON_QPN_FREE_ONLY free, i.e. for
2490 * reuse with another similar QP number) and remove the tracking
2491 * structure from the QP number AVL tree and free the structure.
2492 * If we are not releasing the QP number here, then, as long as we
2493 * have not exhausted the usefulness of the QPC context (that is,
2494 * re-used it too many times without the reference count having
2495 * gone to zero), we free up the QPC context for use by another
2496 * thread (which will use it to construct a different QP number
2497 * from the same QPC table index).
2498 */
2499 if (flags == HERMON_QPN_RELEASE) {
2500 entry->qpn_refcnt--;
2501
2502 /*
2503 * If the reference count is zero, then we free the QPC
2504 * context (if it hadn't already been freed in an early
2505 * step, e.g. HERMON_QPN_FREE_ONLY) and remove/free the
2506 * tracking structure from the QP number AVL tree.
2507 */
2508 if (entry->qpn_refcnt == 0) {
2509 if (entry->qpn_qpc != NULL) {
2510 hermon_rsrc_free(state, &entry->qpn_qpc);
2511 }
2512
2513 /*
2514 * If the current entry has served it's useful
2515 * purpose (i.e. been reused the maximum allowable
2516 * number of times), then remove it from QP number
2517 * AVL tree and free it up.
2518 */
2519 if (entry->qpn_counter >= (1 <<
2520 (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2521 avl_remove(&state->hs_qpn_avl, entry);
2522 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2523 }
2524 }
2525
2526 } else if (flags == HERMON_QPN_FREE_ONLY) {
2527 /*
2528 * Even if we are not freeing the QP number, that will not
2529 * always prevent us from releasing the QPC context. In fact,
2530 * since the QPC context only forms part of the whole QPN,
2531 * we want to free it up for use by other consumers. But
2532 * if the reference count is non-zero (which it will always
2533 * be when we are doing HERMON_QPN_FREE_ONLY) and the counter
2534 * has reached its maximum value, then we cannot reuse the
2535 * QPC context until the reference count eventually reaches
2536 * zero (in HERMON_QPN_RELEASE, above).
2537 */
2538 if (entry->qpn_counter < (1 <<
2539 (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2540 hermon_rsrc_free(state, &entry->qpn_qpc);
2541 }
2542 }
2543 mutex_exit(&state->hs_qpn_avl_lock);
2544 }
2545
2546
2547 /*
2548 * hermon_qpn_avl_compare()
2549 * Context: Can be called from user or kernel context.
2550 */
2551 static int
hermon_qpn_avl_compare(const void * q,const void * e)2552 hermon_qpn_avl_compare(const void *q, const void *e)
2553 {
2554 hermon_qpn_entry_t *entry, *query;
2555
2556 entry = (hermon_qpn_entry_t *)e;
2557 query = (hermon_qpn_entry_t *)q;
2558
2559 if (query->qpn_indx < entry->qpn_indx) {
2560 return (-1);
2561 } else if (query->qpn_indx > entry->qpn_indx) {
2562 return (+1);
2563 } else {
2564 return (0);
2565 }
2566 }
2567
2568
2569 /*
2570 * hermon_qpn_avl_init()
2571 * Context: Only called from attach() path context
2572 */
2573 void
hermon_qpn_avl_init(hermon_state_t * state)2574 hermon_qpn_avl_init(hermon_state_t *state)
2575 {
2576 /* Initialize the lock used for QP number (QPN) AVL tree access */
2577 mutex_init(&state->hs_qpn_avl_lock, NULL, MUTEX_DRIVER,
2578 DDI_INTR_PRI(state->hs_intrmsi_pri));
2579
2580 /* Initialize the AVL tree for the QP number (QPN) storage */
2581 avl_create(&state->hs_qpn_avl, hermon_qpn_avl_compare,
2582 sizeof (hermon_qpn_entry_t),
2583 offsetof(hermon_qpn_entry_t, qpn_avlnode));
2584 }
2585
2586
2587 /*
2588 * hermon_qpn_avl_fini()
2589 * Context: Only called from attach() and/or detach() path contexts
2590 */
2591 void
hermon_qpn_avl_fini(hermon_state_t * state)2592 hermon_qpn_avl_fini(hermon_state_t *state)
2593 {
2594 hermon_qpn_entry_t *entry;
2595 void *cookie;
2596
2597 /*
2598 * Empty all entries (if necessary) and destroy the AVL tree
2599 * that was used for QP number (QPN) tracking.
2600 */
2601 cookie = NULL;
2602 while ((entry = (hermon_qpn_entry_t *)avl_destroy_nodes(
2603 &state->hs_qpn_avl, &cookie)) != NULL) {
2604 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2605 }
2606 avl_destroy(&state->hs_qpn_avl);
2607
2608 /* Destroy the lock used for QP number (QPN) AVL tree access */
2609 mutex_destroy(&state->hs_qpn_avl_lock);
2610 }
2611
2612
2613 /*
2614 * hermon_qphdl_from_qpnum()
2615 * Context: Can be called from interrupt or base context.
2616 *
2617 * This routine is important because changing the unconstrained
2618 * portion of the QP number is critical to the detection of a
2619 * potential race condition in the QP event handler code (i.e. the case
2620 * where a QP is freed and alloc'd again before an event for the
2621 * "old" QP can be handled).
2622 *
2623 * While this is not a perfect solution (not sure that one exists)
2624 * it does help to mitigate the chance that this race condition will
2625 * cause us to deliver a "stale" event to the new QP owner. Note:
2626 * this solution does not scale well because the number of constrained
2627 * bits increases (and, hence, the number of unconstrained bits
2628 * decreases) as the number of supported QPs grows. For small and
2629 * intermediate values, it should hopefully provide sufficient
2630 * protection.
2631 */
2632 hermon_qphdl_t
hermon_qphdl_from_qpnum(hermon_state_t * state,uint_t qpnum)2633 hermon_qphdl_from_qpnum(hermon_state_t *state, uint_t qpnum)
2634 {
2635 uint_t qpindx, qpmask;
2636
2637 /* Calculate the QP table index from the qpnum */
2638 qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1;
2639 qpindx = qpnum & qpmask;
2640 return (hermon_icm_num_to_hdl(state, HERMON_QPC, qpindx));
2641 }
2642
2643
2644 /*
2645 * hermon_special_qp_rsrc_alloc
2646 * Context: Can be called from interrupt or base context.
2647 */
2648 static int
hermon_special_qp_rsrc_alloc(hermon_state_t * state,ibt_sqp_type_t type,uint_t port,hermon_rsrc_t ** qp_rsrc)2649 hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type,
2650 uint_t port, hermon_rsrc_t **qp_rsrc)
2651 {
2652 uint_t mask, flags;
2653 int status;
2654
2655 mutex_enter(&state->hs_spec_qplock);
2656 flags = state->hs_spec_qpflags;
2657 if (type == IBT_SMI_SQP) {
2658 /*
2659 * Check here to see if the driver has been configured
2660 * to instruct the Hermon firmware to handle all incoming
2661 * SMP messages (i.e. messages sent to SMA). If so,
2662 * then we will treat QP0 as if it has already been
2663 * allocated (for internal use). Otherwise, if we allow
2664 * the allocation to happen, it will cause unexpected
2665 * behaviors (e.g. Hermon SMA becomes unresponsive).
2666 */
2667 if (state->hs_cfg_profile->cp_qp0_agents_in_fw != 0) {
2668 mutex_exit(&state->hs_spec_qplock);
2669 return (IBT_QP_IN_USE);
2670 }
2671
2672 /*
2673 * If this is the first QP0 allocation, then post
2674 * a CONF_SPECIAL_QP firmware command
2675 */
2676 if ((flags & HERMON_SPECIAL_QP0_RSRC_MASK) == 0) {
2677 status = hermon_conf_special_qp_cmd_post(state,
2678 state->hs_spec_qp0->hr_indx, HERMON_CMD_QP_SMI,
2679 HERMON_CMD_NOSLEEP_SPIN,
2680 HERMON_CMD_SPEC_QP_OPMOD(
2681 state->hs_cfg_profile->cp_qp0_agents_in_fw,
2682 state->hs_cfg_profile->cp_qp1_agents_in_fw));
2683 if (status != HERMON_CMD_SUCCESS) {
2684 mutex_exit(&state->hs_spec_qplock);
2685 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2686 "command failed: %08x\n",
2687 state->hs_instance, status);
2688 return (IBT_INSUFF_RESOURCE);
2689 }
2690 }
2691
2692 /*
2693 * Now check (and, if necessary, modify) the flags to indicate
2694 * whether the allocation was successful
2695 */
2696 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2697 if (flags & mask) {
2698 mutex_exit(&state->hs_spec_qplock);
2699 return (IBT_QP_IN_USE);
2700 }
2701 state->hs_spec_qpflags |= mask;
2702 *qp_rsrc = state->hs_spec_qp0;
2703
2704 } else {
2705 /*
2706 * If this is the first QP1 allocation, then post
2707 * a CONF_SPECIAL_QP firmware command
2708 */
2709 if ((flags & HERMON_SPECIAL_QP1_RSRC_MASK) == 0) {
2710 status = hermon_conf_special_qp_cmd_post(state,
2711 state->hs_spec_qp1->hr_indx, HERMON_CMD_QP_GSI,
2712 HERMON_CMD_NOSLEEP_SPIN,
2713 HERMON_CMD_SPEC_QP_OPMOD(
2714 state->hs_cfg_profile->cp_qp0_agents_in_fw,
2715 state->hs_cfg_profile->cp_qp1_agents_in_fw));
2716 if (status != HERMON_CMD_SUCCESS) {
2717 mutex_exit(&state->hs_spec_qplock);
2718 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2719 "command failed: %08x\n",
2720 state->hs_instance, status);
2721 return (IBT_INSUFF_RESOURCE);
2722 }
2723 }
2724
2725 /*
2726 * Now check (and, if necessary, modify) the flags to indicate
2727 * whether the allocation was successful
2728 */
2729 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2730 if (flags & mask) {
2731 mutex_exit(&state->hs_spec_qplock);
2732 return (IBT_QP_IN_USE);
2733 }
2734 state->hs_spec_qpflags |= mask;
2735 *qp_rsrc = state->hs_spec_qp1;
2736 }
2737
2738 mutex_exit(&state->hs_spec_qplock);
2739 return (DDI_SUCCESS);
2740 }
2741
2742
2743 /*
2744 * hermon_special_qp_rsrc_free
2745 * Context: Can be called from interrupt or base context.
2746 */
2747 static int
hermon_special_qp_rsrc_free(hermon_state_t * state,ibt_sqp_type_t type,uint_t port)2748 hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type,
2749 uint_t port)
2750 {
2751 uint_t mask, flags;
2752 int status;
2753
2754 mutex_enter(&state->hs_spec_qplock);
2755 if (type == IBT_SMI_SQP) {
2756 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2757 state->hs_spec_qpflags &= ~mask;
2758 flags = state->hs_spec_qpflags;
2759
2760 /*
2761 * If this is the last QP0 free, then post a CONF_SPECIAL_QP
2762 * NOW, If this is the last Special QP free, then post a
2763 * CONF_SPECIAL_QP firmware command - it'll stop them all
2764 */
2765 if (flags) {
2766 status = hermon_conf_special_qp_cmd_post(state, 0,
2767 HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, 0);
2768 if (status != HERMON_CMD_SUCCESS) {
2769 mutex_exit(&state->hs_spec_qplock);
2770 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2771 "command failed: %08x\n",
2772 state->hs_instance, status);
2773 if (status == HERMON_CMD_INVALID_STATUS) {
2774 hermon_fm_ereport(state, HCA_SYS_ERR,
2775 HCA_ERR_SRV_LOST);
2776 }
2777 return (ibc_get_ci_failure(0));
2778 }
2779 }
2780 } else {
2781 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2782 state->hs_spec_qpflags &= ~mask;
2783 flags = state->hs_spec_qpflags;
2784
2785 /*
2786 * If this is the last QP1 free, then post a CONF_SPECIAL_QP
2787 * NOW, if this is the last special QP free, then post a
2788 * CONF_SPECIAL_QP firmware command - it'll stop them all
2789 */
2790 if (flags) {
2791 status = hermon_conf_special_qp_cmd_post(state, 0,
2792 HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, 0);
2793 if (status != HERMON_CMD_SUCCESS) {
2794 mutex_exit(&state->hs_spec_qplock);
2795 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2796 "command failed: %08x\n",
2797 state->hs_instance, status);
2798 if (status == HERMON_CMD_INVALID_STATUS) {
2799 hermon_fm_ereport(state, HCA_SYS_ERR,
2800 HCA_ERR_SRV_LOST);
2801 }
2802 return (ibc_get_ci_failure(0));
2803 }
2804 }
2805 }
2806
2807 mutex_exit(&state->hs_spec_qplock);
2808 return (DDI_SUCCESS);
2809 }
2810
2811
2812 /*
2813 * hermon_qp_sgl_to_logwqesz()
2814 * Context: Can be called from interrupt or base context.
2815 */
2816 static void
hermon_qp_sgl_to_logwqesz(hermon_state_t * state,uint_t num_sgl,uint_t real_max_sgl,hermon_qp_wq_type_t wq_type,uint_t * logwqesz,uint_t * max_sgl)2817 hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
2818 uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
2819 uint_t *logwqesz, uint_t *max_sgl)
2820 {
2821 uint_t max_size, log2, actual_sgl;
2822
2823 switch (wq_type) {
2824 case HERMON_QP_WQ_TYPE_SENDQ_UD:
2825 /*
2826 * Use requested maximum SGL to calculate max descriptor size
2827 * (while guaranteeing that the descriptor size is a
2828 * power-of-2 cachelines).
2829 */
2830 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2831 log2 = highbit(max_size);
2832 if (ISP2(max_size)) {
2833 log2 = log2 - 1;
2834 }
2835
2836 /* Make sure descriptor is at least the minimum size */
2837 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2838
2839 /* Calculate actual number of SGL (given WQE size) */
2840 actual_sgl = ((1 << log2) -
2841 sizeof (hermon_hw_snd_wqe_ctrl_t)) >> 4;
2842 break;
2843
2844 case HERMON_QP_WQ_TYPE_SENDQ_CONN:
2845 /*
2846 * Use requested maximum SGL to calculate max descriptor size
2847 * (while guaranteeing that the descriptor size is a
2848 * power-of-2 cachelines).
2849 */
2850 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2851 log2 = highbit(max_size);
2852 if (ISP2(max_size)) {
2853 log2 = log2 - 1;
2854 }
2855
2856 /* Make sure descriptor is at least the minimum size */
2857 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2858
2859 /* Calculate actual number of SGL (given WQE size) */
2860 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SND_HDRS) >> 4;
2861 break;
2862
2863 case HERMON_QP_WQ_TYPE_RECVQ:
2864 /*
2865 * Same as above (except for Recv WQEs)
2866 */
2867 max_size = (HERMON_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2868 log2 = highbit(max_size);
2869 if (ISP2(max_size)) {
2870 log2 = log2 - 1;
2871 }
2872
2873 /* Make sure descriptor is at least the minimum size */
2874 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2875
2876 /* Calculate actual number of SGL (given WQE size) */
2877 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_RCV_HDRS) >> 4;
2878 break;
2879
2880 case HERMON_QP_WQ_TYPE_SENDMLX_QP0:
2881 /*
2882 * Same as above (except for MLX transport WQEs). For these
2883 * WQEs we have to account for the space consumed by the
2884 * "inline" packet headers. (This is smaller than for QP1
2885 * below because QP0 is not allowed to send packets with a GRH.
2886 */
2887 max_size = (HERMON_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2888 log2 = highbit(max_size);
2889 if (ISP2(max_size)) {
2890 log2 = log2 - 1;
2891 }
2892
2893 /* Make sure descriptor is at least the minimum size */
2894 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2895
2896 /* Calculate actual number of SGL (given WQE size) */
2897 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP0_HDRS) >> 4;
2898 break;
2899
2900 case HERMON_QP_WQ_TYPE_SENDMLX_QP1:
2901 /*
2902 * Same as above. For these WQEs we again have to account for
2903 * the space consumed by the "inline" packet headers. (This
2904 * is larger than for QP0 above because we have to account for
2905 * the possibility of a GRH in each packet - and this
2906 * introduces an alignment issue that causes us to consume
2907 * an additional 8 bytes).
2908 */
2909 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2910 log2 = highbit(max_size);
2911 if (ISP2(max_size)) {
2912 log2 = log2 - 1;
2913 }
2914
2915 /* Make sure descriptor is at least the minimum size */
2916 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2917
2918 /* Calculate actual number of SGL (given WQE size) */
2919 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4;
2920 break;
2921
2922 default:
2923 HERMON_WARNING(state, "unexpected work queue type");
2924 break;
2925 }
2926
2927 /* Fill in the return values */
2928 *logwqesz = log2;
2929 *max_sgl = min(real_max_sgl, actual_sgl);
2930 }
2931