1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * tavor_qp.c
29 * Tavor Queue Pair Processing Routines
30 *
31 * Implements all the routines necessary for allocating, freeing, and
32 * querying the Tavor queue pairs.
33 */
34
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/sysmacros.h>
42
43 #include <sys/ib/adapters/tavor/tavor.h>
44 #include <sys/ib/ib_pkt_hdrs.h>
45
46 static int tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp,
47 tavor_rsrc_t *qpc);
48 static int tavor_qpn_avl_compare(const void *q, const void *e);
49 static int tavor_special_qp_rsrc_alloc(tavor_state_t *state,
50 ibt_sqp_type_t type, uint_t port, tavor_rsrc_t **qp_rsrc);
51 static int tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type,
52 uint_t port);
53 static void tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
54 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
55
56 /*
57 * tavor_qp_alloc()
58 * Context: Can be called only from user or kernel context.
59 */
60 int
tavor_qp_alloc(tavor_state_t * state,tavor_qp_info_t * qpinfo,uint_t sleepflag,tavor_qp_options_t * op)61 tavor_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo,
62 uint_t sleepflag, tavor_qp_options_t *op)
63 {
64 tavor_rsrc_pool_info_t *rsrc_pool;
65 tavor_rsrc_t *qpc, *rsrc, *rdb;
66 tavor_umap_db_entry_t *umapdb;
67 tavor_qphdl_t qp;
68 ibt_qp_alloc_attr_t *attr_p;
69 ibt_qp_type_t type;
70 ibtl_qp_hdl_t ibt_qphdl;
71 ibt_chan_sizes_t *queuesz_p;
72 ib_qpn_t *qpn;
73 tavor_qphdl_t *qphdl;
74 ibt_mr_attr_t mr_attr;
75 tavor_mr_options_t mr_op;
76 tavor_srqhdl_t srq;
77 tavor_pdhdl_t pd;
78 tavor_cqhdl_t sq_cq, rq_cq;
79 tavor_mrhdl_t mr;
80 uint64_t value, qp_desc_off;
81 uint32_t *sq_buf, *rq_buf;
82 uint32_t log_qp_sq_size, log_qp_rq_size;
83 uint32_t sq_size, rq_size;
84 uint32_t sq_wqe_size, rq_wqe_size;
85 uint32_t max_rdb, max_sgl, uarpg;
86 uint_t wq_location, dma_xfer_mode, qp_is_umap;
87 uint_t qp_srq_en;
88 int status, flag;
89
90 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
91
92 /*
93 * Check the "options" flag. Currently this flag tells the driver
94 * whether or not the QP's work queues should be come from normal
95 * system memory or whether they should be allocated from DDR memory.
96 */
97 if (op == NULL) {
98 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
99 } else {
100 wq_location = op->qpo_wq_loc;
101 }
102
103 /*
104 * Extract the necessary info from the tavor_qp_info_t structure
105 */
106 attr_p = qpinfo->qpi_attrp;
107 type = qpinfo->qpi_type;
108 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
109 queuesz_p = qpinfo->qpi_queueszp;
110 qpn = qpinfo->qpi_qpn;
111 qphdl = &qpinfo->qpi_qphdl;
112
113 /*
114 * Determine whether QP is being allocated for userland access or
115 * whether it is being allocated for kernel access. If the QP is
116 * being allocated for userland access, then lookup the UAR doorbell
117 * page number for the current process. Note: If this is not found
118 * (e.g. if the process has not previously open()'d the Tavor driver),
119 * then an error is returned.
120 */
121 qp_is_umap = (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) ? 1 : 0;
122 if (qp_is_umap) {
123 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
124 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
125 if (status != DDI_SUCCESS) {
126 goto qpalloc_fail;
127 }
128 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
129 }
130
131 /*
132 * Determine whether QP is being associated with an SRQ
133 */
134 qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
135 if (qp_srq_en) {
136 /*
137 * Check for valid SRQ handle pointers
138 */
139 if (attr_p->qp_ibc_srq_hdl == NULL) {
140 goto qpalloc_fail;
141 }
142 srq = (tavor_srqhdl_t)attr_p->qp_ibc_srq_hdl;
143 }
144
145 /*
146 * Check for valid QP service type (only UD/RC/UC supported)
147 */
148 if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
149 (type != IBT_UC_RQP))) {
150 goto qpalloc_fail;
151 }
152
153 /*
154 * Only RC is supported on an SRQ -- This is a Tavor hardware
155 * limitation. Arbel native mode will not have this shortcoming.
156 */
157 if (qp_srq_en && type != IBT_RC_RQP) {
158 goto qpalloc_fail;
159 }
160
161 /*
162 * Check for valid PD handle pointer
163 */
164 if (attr_p->qp_pd_hdl == NULL) {
165 goto qpalloc_fail;
166 }
167 pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl;
168
169 /*
170 * If on an SRQ, check to make sure the PD is the same
171 */
172 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
173 goto qpalloc_fail;
174 }
175
176 /* Increment the reference count on the protection domain (PD) */
177 tavor_pd_refcnt_inc(pd);
178
179 /*
180 * Check for valid CQ handle pointers
181 */
182 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
183 (attr_p->qp_ibc_rcq_hdl == NULL)) {
184 goto qpalloc_fail1;
185 }
186 sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl;
187 rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
188
189 /*
190 * Increment the reference count on the CQs. One or both of these
191 * could return error if we determine that the given CQ is already
192 * being used with a special (SMI/GSI) QP.
193 */
194 status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_NORMAL);
195 if (status != DDI_SUCCESS) {
196 goto qpalloc_fail1;
197 }
198 status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_NORMAL);
199 if (status != DDI_SUCCESS) {
200 goto qpalloc_fail2;
201 }
202
203 /*
204 * Allocate an QP context entry. This will be filled in with all
205 * the necessary parameters to define the Queue Pair. Unlike
206 * other Tavor hardware resources, ownership is not immediately
207 * given to hardware in the final step here. Instead, we must
208 * wait until the QP is later transitioned to the "Init" state before
209 * passing the QP to hardware. If we fail here, we must undo all
210 * the reference count (CQ and PD).
211 */
212 status = tavor_rsrc_alloc(state, TAVOR_QPC, 1, sleepflag, &qpc);
213 if (status != DDI_SUCCESS) {
214 goto qpalloc_fail3;
215 }
216
217 /*
218 * Allocate the software structure for tracking the queue pair
219 * (i.e. the Tavor Queue Pair handle). If we fail here, we must
220 * undo the reference counts and the previous resource allocation.
221 */
222 status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc);
223 if (status != DDI_SUCCESS) {
224 goto qpalloc_fail4;
225 }
226 qp = (tavor_qphdl_t)rsrc->tr_addr;
227 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
228
229 /*
230 * Calculate the QP number from QPC index. This routine handles
231 * all of the operations necessary to keep track of used, unused,
232 * and released QP numbers.
233 */
234 status = tavor_qp_create_qpn(state, qp, qpc);
235 if (status != DDI_SUCCESS) {
236 goto qpalloc_fail5;
237 }
238
239 /*
240 * If this will be a user-mappable QP, then allocate an entry for
241 * the "userland resources database". This will later be added to
242 * the database (after all further QP operations are successful).
243 * If we fail here, we must undo the reference counts and the
244 * previous resource allocation.
245 */
246 if (qp_is_umap) {
247 umapdb = tavor_umap_db_alloc(state->ts_instance, qp->qp_qpnum,
248 MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
249 if (umapdb == NULL) {
250 goto qpalloc_fail6;
251 }
252 }
253
254 /*
255 * If this is an RC QP, then pre-allocate the maximum number of RDB
256 * entries. This allows us to ensure that we can later cover all
257 * the resources needed by hardware for handling multiple incoming
258 * RDMA Reads. Note: These resources are obviously not always
259 * necessary. They are allocated here anyway. Someday maybe this
260 * can be modified to allocate these on-the-fly (i.e. only if RDMA
261 * Read or Atomic operations are enabled) XXX
262 * If we fail here, we have a bunch of resource and reference count
263 * cleanup to do.
264 */
265 if (type == IBT_RC_RQP) {
266 max_rdb = state->ts_cfg_profile->cp_hca_max_rdma_in_qp;
267 status = tavor_rsrc_alloc(state, TAVOR_RDB, max_rdb,
268 sleepflag, &rdb);
269 if (status != DDI_SUCCESS) {
270 goto qpalloc_fail7;
271 }
272 qp->qp_rdbrsrcp = rdb;
273 /* Calculate offset (into DDR memory) of RDB entries */
274 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_RDB];
275 qp->qp_rdb_ddraddr = (uintptr_t)rsrc_pool->rsrc_ddr_offset +
276 (rdb->tr_indx << TAVOR_RDB_SIZE_SHIFT);
277 }
278
279 /*
280 * Calculate the appropriate size for the work queues.
281 * Note: All Tavor QP work queues must be a power-of-2 in size. Also
282 * they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is
283 * to round the requested size up to the next highest power-of-2
284 */
285 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE);
286 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE);
287 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
288 if (ISP2(attr_p->qp_sizes.cs_sq)) {
289 log_qp_sq_size = log_qp_sq_size - 1;
290 }
291 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
292 if (ISP2(attr_p->qp_sizes.cs_rq)) {
293 log_qp_rq_size = log_qp_rq_size - 1;
294 }
295
296 /*
297 * Next we verify that the rounded-up size is valid (i.e. consistent
298 * with the device limits and/or software-configured limits). If not,
299 * then obviously we have a lot of cleanup to do before returning.
300 */
301 if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) ||
302 (!qp_srq_en && (log_qp_rq_size >
303 state->ts_cfg_profile->cp_log_max_qp_sz))) {
304 goto qpalloc_fail8;
305 }
306
307 /*
308 * Next we verify that the requested number of SGL is valid (i.e.
309 * consistent with the device limits and/or software-configured
310 * limits). If not, then obviously the same cleanup needs to be done.
311 */
312 max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl;
313 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
314 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_sgl))) {
315 goto qpalloc_fail8;
316 }
317
318 /*
319 * Determine this QP's WQE sizes (for both the Send and Recv WQEs).
320 * This will depend on the requested number of SGLs. Note: this
321 * has the side-effect of also calculating the real number of SGLs
322 * (for the calculated WQE size).
323 *
324 * For QP's on an SRQ, we set these to 0.
325 */
326 if (qp_srq_en) {
327 qp->qp_rq_log_wqesz = 0;
328 qp->qp_rq_sgl = 0;
329 } else {
330 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
331 TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz,
332 &qp->qp_rq_sgl);
333 }
334 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
335 TAVOR_QP_WQ_TYPE_SENDQ, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
336
337 /*
338 * Allocate the memory for QP work queues. Note: The location from
339 * which we will allocate these work queues has been passed in
340 * through the tavor_qp_options_t structure. Since Tavor work queues
341 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
342 * the work queue memory is very important. We used to allocate
343 * work queues (the combined receive and send queues) so that they
344 * would be aligned on their combined size. That alignment guaranteed
345 * that they would never cross the 4GB boundary (Tavor work queues
346 * are on the order of MBs at maximum). Now we are able to relax
347 * this alignment constraint by ensuring that the IB address assigned
348 * to the queue memory (as a result of the tavor_mr_register() call)
349 * is offset from zero.
350 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
351 * guarantee the alignment, but when attempting to use IOMMU bypass
352 * mode we found that we were not allowed to specify any alignment
353 * that was more restrictive than the system page size.
354 * So we avoided this constraint by passing two alignment values,
355 * one for the memory allocation itself and the other for the DMA
356 * handle (for later bind). This used to cause more memory than
357 * necessary to be allocated (in order to guarantee the more
358 * restrictive alignment contraint). But be guaranteeing the
359 * zero-based IB virtual address for the queue, we are able to
360 * conserve this memory.
361 * Note: If QP is not user-mappable, then it may come from either
362 * kernel system memory or from HCA-attached local DDR memory.
363 */
364 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
365 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
366
367 /* QP on SRQ sets these to 0 */
368 if (qp_srq_en) {
369 rq_wqe_size = 0;
370 rq_size = 0;
371 } else {
372 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
373 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
374 }
375
376 qp->qp_wqinfo.qa_size = sq_size + rq_size;
377 qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size);
378 qp->qp_wqinfo.qa_bind_align = max(sq_wqe_size, rq_wqe_size);
379 if (qp_is_umap) {
380 qp->qp_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
381 } else {
382 qp->qp_wqinfo.qa_location = wq_location;
383 }
384 status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
385 if (status != DDI_SUCCESS) {
386 goto qpalloc_fail8;
387 }
388 if (sq_wqe_size > rq_wqe_size) {
389 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
390
391 /*
392 * If QP's on an SRQ, we set the rq_buf to NULL
393 */
394 if (qp_srq_en)
395 rq_buf = NULL;
396 else
397 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
398 } else {
399 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
400 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
401 }
402
403 /*
404 * Register the memory for the QP work queues. The memory for the
405 * QP must be registered in the Tavor TPT tables. This gives us the
406 * LKey to specify in the QP context later. Note: The memory for
407 * Tavor work queues (both Send and Recv) must be contiguous and
408 * registered as a single memory region. Note also: If the work
409 * queue is to be allocated from DDR memory, then only a "bypass"
410 * mapping is appropriate. And if the QP memory is user-mappable,
411 * then we force DDI_DMA_CONSISTENT mapping.
412 * Also, in order to meet the alignment restriction, we pass the
413 * "mro_bind_override_addr" flag in the call to tavor_mr_register().
414 * This guarantees that the resulting IB vaddr will be zero-based
415 * (modulo the offset into the first page).
416 * If we fail here, we still have the bunch of resource and reference
417 * count cleanup to do.
418 */
419 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
420 IBT_MR_NOSLEEP;
421 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
422 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
423 mr_attr.mr_as = NULL;
424 mr_attr.mr_flags = flag;
425 if (qp_is_umap) {
426 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
427 } else {
428 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
429 mr_op.mro_bind_type =
430 state->ts_cfg_profile->cp_iommu_bypass;
431 dma_xfer_mode =
432 state->ts_cfg_profile->cp_streaming_consistent;
433 if (dma_xfer_mode == DDI_DMA_STREAMING) {
434 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
435 }
436 } else {
437 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
438 }
439 }
440 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
441 mr_op.mro_bind_override_addr = 1;
442 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
443 if (status != DDI_SUCCESS) {
444 goto qpalloc_fail9;
445 }
446
447 /*
448 * Calculate the offset between the kernel virtual address space
449 * and the IB virtual address space. This will be used when
450 * posting work requests to properly initialize each WQE.
451 */
452 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
453 (uint64_t)mr->mr_bindinfo.bi_addr;
454
455 /*
456 * Fill in all the return arguments (if necessary). This includes
457 * real work queue sizes, real SGLs, and QP number
458 */
459 if (queuesz_p != NULL) {
460 queuesz_p->cs_sq = (1 << log_qp_sq_size);
461 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
462
463 /* QP on an SRQ set these to 0 */
464 if (qp_srq_en) {
465 queuesz_p->cs_rq = 0;
466 queuesz_p->cs_rq_sgl = 0;
467 } else {
468 queuesz_p->cs_rq = (1 << log_qp_rq_size);
469 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
470 }
471 }
472 if (qpn != NULL) {
473 *qpn = (ib_qpn_t)qp->qp_qpnum;
474 }
475
476 /*
477 * Fill in the rest of the Tavor Queue Pair handle. We can update
478 * the following fields for use in further operations on the QP.
479 */
480 qp->qp_qpcrsrcp = qpc;
481 qp->qp_rsrcp = rsrc;
482 qp->qp_state = TAVOR_QP_RESET;
483 qp->qp_pdhdl = pd;
484 qp->qp_mrhdl = mr;
485 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
486 TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED;
487 qp->qp_is_special = 0;
488 qp->qp_is_umap = qp_is_umap;
489 qp->qp_uarpg = (qp->qp_is_umap) ? uarpg : 0;
490 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
491 qp->qp_sq_cqhdl = sq_cq;
492 qp->qp_sq_lastwqeaddr = NULL;
493 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
494 qp->qp_sq_buf = sq_buf;
495 qp->qp_desc_off = qp_desc_off;
496 qp->qp_rq_cqhdl = rq_cq;
497 qp->qp_rq_lastwqeaddr = NULL;
498 qp->qp_rq_buf = rq_buf;
499
500 /* QP on an SRQ sets this to 0 */
501 if (qp_srq_en) {
502 qp->qp_rq_bufsz = 0;
503 } else {
504 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
505 }
506
507 qp->qp_forward_sqd_event = 0;
508 qp->qp_sqd_still_draining = 0;
509 qp->qp_hdlrarg = (void *)ibt_qphdl;
510 qp->qp_mcg_refcnt = 0;
511
512 /*
513 * If this QP is to be associated with an SRQ, then set the SRQ handle
514 * appropriately.
515 */
516 if (qp_srq_en) {
517 qp->qp_srqhdl = srq;
518 qp->qp_srq_en = TAVOR_QP_SRQ_ENABLED;
519 tavor_srq_refcnt_inc(qp->qp_srqhdl);
520 } else {
521 qp->qp_srqhdl = NULL;
522 qp->qp_srq_en = TAVOR_QP_SRQ_DISABLED;
523 }
524
525 /* Determine if later ddi_dma_sync will be necessary */
526 qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo);
527
528 /* Determine the QP service type */
529 if (type == IBT_RC_RQP) {
530 qp->qp_serv_type = TAVOR_QP_RC;
531 } else if (type == IBT_UD_RQP) {
532 qp->qp_serv_type = TAVOR_QP_UD;
533 } else {
534 qp->qp_serv_type = TAVOR_QP_UC;
535 }
536
537 /* Zero out the QP context */
538 bzero(&qp->qpc, sizeof (tavor_hw_qpc_t));
539
540 /*
541 * Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the
542 * "qphdl" and return success
543 */
544 ASSERT(state->ts_qphdl[qpc->tr_indx] == NULL);
545 state->ts_qphdl[qpc->tr_indx] = qp;
546
547 /*
548 * If this is a user-mappable QP, then we need to insert the previously
549 * allocated entry into the "userland resources database". This will
550 * allow for later lookup during devmap() (i.e. mmap()) calls.
551 */
552 if (qp_is_umap) {
553 tavor_umap_db_add(umapdb);
554 }
555
556 *qphdl = qp;
557
558 return (DDI_SUCCESS);
559
560 /*
561 * The following is cleanup for all possible failure cases in this routine
562 */
563 qpalloc_fail9:
564 tavor_queue_free(state, &qp->qp_wqinfo);
565 qpalloc_fail8:
566 if (type == IBT_RC_RQP) {
567 tavor_rsrc_free(state, &rdb);
568 }
569 qpalloc_fail7:
570 if (qp_is_umap) {
571 tavor_umap_db_free(umapdb);
572 }
573 qpalloc_fail6:
574 /*
575 * Releasing the QPN will also free up the QPC context. Update
576 * the QPC context pointer to indicate this.
577 */
578 tavor_qp_release_qpn(state, qp->qp_qpn_hdl, TAVOR_QPN_RELEASE);
579 qpc = NULL;
580 qpalloc_fail5:
581 tavor_rsrc_free(state, &rsrc);
582 qpalloc_fail4:
583 if (qpc) {
584 tavor_rsrc_free(state, &qpc);
585 }
586 qpalloc_fail3:
587 tavor_cq_refcnt_dec(rq_cq);
588 qpalloc_fail2:
589 tavor_cq_refcnt_dec(sq_cq);
590 qpalloc_fail1:
591 tavor_pd_refcnt_dec(pd);
592 qpalloc_fail:
593 return (status);
594 }
595
596
597
598 /*
599 * tavor_special_qp_alloc()
600 * Context: Can be called only from user or kernel context.
601 */
602 int
tavor_special_qp_alloc(tavor_state_t * state,tavor_qp_info_t * qpinfo,uint_t sleepflag,tavor_qp_options_t * op)603 tavor_special_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo,
604 uint_t sleepflag, tavor_qp_options_t *op)
605 {
606 tavor_rsrc_t *qpc, *rsrc;
607 tavor_qphdl_t qp;
608 ibt_qp_alloc_attr_t *attr_p;
609 ibt_sqp_type_t type;
610 uint8_t port;
611 ibtl_qp_hdl_t ibt_qphdl;
612 ibt_chan_sizes_t *queuesz_p;
613 tavor_qphdl_t *qphdl;
614 ibt_mr_attr_t mr_attr;
615 tavor_mr_options_t mr_op;
616 tavor_pdhdl_t pd;
617 tavor_cqhdl_t sq_cq, rq_cq;
618 tavor_mrhdl_t mr;
619 uint64_t qp_desc_off;
620 uint32_t *sq_buf, *rq_buf;
621 uint32_t log_qp_sq_size, log_qp_rq_size;
622 uint32_t sq_size, rq_size, max_sgl;
623 uint32_t sq_wqe_size, rq_wqe_size;
624 uint_t wq_location, dma_xfer_mode;
625 int status, flag;
626
627 /*
628 * Check the "options" flag. Currently this flag tells the driver
629 * whether or not the QP's work queues should be come from normal
630 * system memory or whether they should be allocated from DDR memory.
631 */
632 if (op == NULL) {
633 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
634 } else {
635 wq_location = op->qpo_wq_loc;
636 }
637
638 /*
639 * Extract the necessary info from the tavor_qp_info_t structure
640 */
641 attr_p = qpinfo->qpi_attrp;
642 type = qpinfo->qpi_type;
643 port = qpinfo->qpi_port;
644 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
645 queuesz_p = qpinfo->qpi_queueszp;
646 qphdl = &qpinfo->qpi_qphdl;
647
648 /*
649 * Check for valid special QP type (only SMI & GSI supported)
650 */
651 if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
652 goto spec_qpalloc_fail;
653 }
654
655 /*
656 * Check for valid port number
657 */
658 if (!tavor_portnum_is_valid(state, port)) {
659 goto spec_qpalloc_fail;
660 }
661 port = port - 1;
662
663 /*
664 * Check for valid PD handle pointer
665 */
666 if (attr_p->qp_pd_hdl == NULL) {
667 goto spec_qpalloc_fail;
668 }
669 pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl;
670
671 /* Increment the reference count on the PD */
672 tavor_pd_refcnt_inc(pd);
673
674 /*
675 * Check for valid CQ handle pointers
676 */
677 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
678 (attr_p->qp_ibc_rcq_hdl == NULL)) {
679 goto spec_qpalloc_fail1;
680 }
681 sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl;
682 rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
683
684 /*
685 * Increment the reference count on the CQs. One or both of these
686 * could return error if we determine that the given CQ is already
687 * being used with a non-special QP (i.e. a normal QP).
688 */
689 status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_SPECIAL);
690 if (status != DDI_SUCCESS) {
691 goto spec_qpalloc_fail1;
692 }
693 status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_SPECIAL);
694 if (status != DDI_SUCCESS) {
695 goto spec_qpalloc_fail2;
696 }
697
698 /*
699 * Allocate the special QP resources. Essentially, this allocation
700 * amounts to checking if the request special QP has already been
701 * allocated. If successful, the QP context return is an actual
702 * QP context that has been "aliased" to act as a special QP of the
703 * appropriate type (and for the appropriate port). Just as in
704 * tavor_qp_alloc() above, ownership for this QP context is not
705 * immediately given to hardware in the final step here. Instead, we
706 * wait until the QP is later transitioned to the "Init" state before
707 * passing the QP to hardware. If we fail here, we must undo all
708 * the reference count (CQ and PD).
709 */
710 status = tavor_special_qp_rsrc_alloc(state, type, port, &qpc);
711 if (status != DDI_SUCCESS) {
712 goto spec_qpalloc_fail3;
713 }
714
715 /*
716 * Allocate the software structure for tracking the special queue
717 * pair (i.e. the Tavor Queue Pair handle). If we fail here, we
718 * must undo the reference counts and the previous resource allocation.
719 */
720 status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc);
721 if (status != DDI_SUCCESS) {
722 goto spec_qpalloc_fail4;
723 }
724 qp = (tavor_qphdl_t)rsrc->tr_addr;
725 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
726
727 /*
728 * Actual QP number is a combination of the index of the QPC and
729 * the port number. This is because the special QP contexts must
730 * be allocated two-at-a-time.
731 */
732 qp->qp_qpnum = qpc->tr_indx + port;
733
734 /*
735 * Calculate the appropriate size for the work queues.
736 * Note: All Tavor QP work queues must be a power-of-2 in size. Also
737 * they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is
738 * to round the requested size up to the next highest power-of-2
739 */
740 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE);
741 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE);
742 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
743 if (ISP2(attr_p->qp_sizes.cs_sq)) {
744 log_qp_sq_size = log_qp_sq_size - 1;
745 }
746 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
747 if (ISP2(attr_p->qp_sizes.cs_rq)) {
748 log_qp_rq_size = log_qp_rq_size - 1;
749 }
750
751 /*
752 * Next we verify that the rounded-up size is valid (i.e. consistent
753 * with the device limits and/or software-configured limits). If not,
754 * then obviously we have a bit of cleanup to do before returning.
755 */
756 if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) ||
757 (log_qp_rq_size > state->ts_cfg_profile->cp_log_max_qp_sz)) {
758 goto spec_qpalloc_fail5;
759 }
760
761 /*
762 * Next we verify that the requested number of SGL is valid (i.e.
763 * consistent with the device limits and/or software-configured
764 * limits). If not, then obviously the same cleanup needs to be done.
765 */
766 max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl;
767 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
768 (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
769 goto spec_qpalloc_fail5;
770 }
771
772 /*
773 * Determine this QP's WQE sizes (for both the Send and Recv WQEs).
774 * This will depend on the requested number of SGLs. Note: this
775 * has the side-effect of also calculating the real number of SGLs
776 * (for the calculated WQE size).
777 */
778 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
779 TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
780 if (type == IBT_SMI_SQP) {
781 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
782 TAVOR_QP_WQ_TYPE_SENDMLX_QP0, &qp->qp_sq_log_wqesz,
783 &qp->qp_sq_sgl);
784 } else {
785 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
786 TAVOR_QP_WQ_TYPE_SENDMLX_QP1, &qp->qp_sq_log_wqesz,
787 &qp->qp_sq_sgl);
788 }
789
790 /*
791 * Allocate the memory for QP work queues. Note: The location from
792 * which we will allocate these work queues has been passed in
793 * through the tavor_qp_options_t structure. Since Tavor work queues
794 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
795 * the work queue memory is very important. We used to allocate
796 * work queues (the combined receive and send queues) so that they
797 * would be aligned on their combined size. That alignment guaranteed
798 * that they would never cross the 4GB boundary (Tavor work queues
799 * are on the order of MBs at maximum). Now we are able to relax
800 * this alignment constraint by ensuring that the IB address assigned
801 * to the queue memory (as a result of the tavor_mr_register() call)
802 * is offset from zero.
803 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
804 * guarantee the alignment, but when attempting to use IOMMU bypass
805 * mode we found that we were not allowed to specify any alignment
806 * that was more restrictive than the system page size.
807 * So we avoided this constraint by passing two alignment values,
808 * one for the memory allocation itself and the other for the DMA
809 * handle (for later bind). This used to cause more memory than
810 * necessary to be allocated (in order to guarantee the more
811 * restrictive alignment contraint). But be guaranteeing the
812 * zero-based IB virtual address for the queue, we are able to
813 * conserve this memory.
814 */
815 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
816 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
817 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
818 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
819 qp->qp_wqinfo.qa_size = sq_size + rq_size;
820 qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size);
821 qp->qp_wqinfo.qa_bind_align = max(sq_wqe_size, rq_wqe_size);
822 qp->qp_wqinfo.qa_location = wq_location;
823 status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
824 if (status != 0) {
825 goto spec_qpalloc_fail5;
826 }
827 if (sq_wqe_size > rq_wqe_size) {
828 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
829 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
830 } else {
831 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
832 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
833 }
834
835 /*
836 * Register the memory for the special QP work queues. The memory for
837 * the special QP must be registered in the Tavor TPT tables. This
838 * gives us the LKey to specify in the QP context later. Note: The
839 * memory for Tavor work queues (both Send and Recv) must be contiguous
840 * and registered as a single memory region. Note also: If the work
841 * queue is to be allocated from DDR memory, then only a "bypass"
842 * mapping is appropriate.
843 * Also, in order to meet the alignment restriction, we pass the
844 * "mro_bind_override_addr" flag in the call to tavor_mr_register().
845 * This guarantees that the resulting IB vaddr will be zero-based
846 * (modulo the offset into the first page).
847 * If we fail here, we have a bunch of resource and reference count
848 * cleanup to do.
849 */
850 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
851 IBT_MR_NOSLEEP;
852 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
853 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
854 mr_attr.mr_as = NULL;
855 mr_attr.mr_flags = flag;
856 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
857 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
858
859 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
860 if (dma_xfer_mode == DDI_DMA_STREAMING) {
861 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
862 }
863 } else {
864 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
865 }
866 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
867 mr_op.mro_bind_override_addr = 1;
868 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
869 if (status != DDI_SUCCESS) {
870 goto spec_qpalloc_fail6;
871 }
872
873 /*
874 * Calculate the offset between the kernel virtual address space
875 * and the IB virtual address space. This will be used when
876 * posting work requests to properly initialize each WQE.
877 */
878 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
879 (uint64_t)mr->mr_bindinfo.bi_addr;
880
881 /*
882 * Fill in all the return arguments (if necessary). This includes
883 * real work queue sizes, real SGLs, and QP number (which will be
884 * either zero or one, depending on the special QP type)
885 */
886 if (queuesz_p != NULL) {
887 queuesz_p->cs_sq = (1 << log_qp_sq_size);
888 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
889 queuesz_p->cs_rq = (1 << log_qp_rq_size);
890 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
891 }
892
893 /*
894 * Fill in the rest of the Tavor Queue Pair handle. We can update
895 * the following fields for use in further operations on the QP.
896 */
897 qp->qp_qpcrsrcp = qpc;
898 qp->qp_rsrcp = rsrc;
899 qp->qp_state = TAVOR_QP_RESET;
900 qp->qp_pdhdl = pd;
901 qp->qp_mrhdl = mr;
902 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
903 TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED;
904 qp->qp_is_special = (type == IBT_SMI_SQP) ?
905 TAVOR_QP_SMI : TAVOR_QP_GSI;
906 qp->qp_is_umap = 0;
907 qp->qp_uarpg = 0;
908 qp->qp_sq_cqhdl = sq_cq;
909 qp->qp_sq_lastwqeaddr = NULL;
910 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
911 qp->qp_sq_buf = sq_buf;
912 qp->qp_desc_off = qp_desc_off;
913 qp->qp_rq_cqhdl = rq_cq;
914 qp->qp_rq_lastwqeaddr = NULL;
915 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
916 qp->qp_rq_buf = rq_buf;
917 qp->qp_portnum = port;
918 qp->qp_pkeyindx = 0;
919 qp->qp_hdlrarg = (void *)ibt_qphdl;
920 qp->qp_mcg_refcnt = 0;
921 qp->qp_srq_en = 0;
922 qp->qp_srqhdl = NULL;
923
924 /* Determine if later ddi_dma_sync will be necessary */
925 qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo);
926
927 /* All special QPs are UD QP service type */
928 qp->qp_serv_type = TAVOR_QP_UD;
929
930 /* Zero out the QP context */
931 bzero(&qp->qpc, sizeof (tavor_hw_qpc_t));
932
933 /*
934 * Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the
935 * "qphdl" and return success
936 */
937 ASSERT(state->ts_qphdl[qpc->tr_indx + port] == NULL);
938 state->ts_qphdl[qpc->tr_indx + port] = qp;
939
940 *qphdl = qp;
941
942 return (DDI_SUCCESS);
943
944 /*
945 * The following is cleanup for all possible failure cases in this routine
946 */
947 spec_qpalloc_fail6:
948 tavor_queue_free(state, &qp->qp_wqinfo);
949 spec_qpalloc_fail5:
950 tavor_rsrc_free(state, &rsrc);
951 spec_qpalloc_fail4:
952 if (tavor_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
953 TAVOR_WARNING(state, "failed to free special QP rsrc");
954 }
955 spec_qpalloc_fail3:
956 tavor_cq_refcnt_dec(rq_cq);
957 spec_qpalloc_fail2:
958 tavor_cq_refcnt_dec(sq_cq);
959 spec_qpalloc_fail1:
960 tavor_pd_refcnt_dec(pd);
961 spec_qpalloc_fail:
962 return (status);
963 }
964
965
966 /*
967 * tavor_qp_free()
968 * This function frees up the QP resources. Depending on the value
969 * of the "free_qp_flags", the QP number may not be released until
970 * a subsequent call to tavor_qp_release_qpn().
971 *
972 * Context: Can be called only from user or kernel context.
973 */
974 /* ARGSUSED */
975 int
tavor_qp_free(tavor_state_t * state,tavor_qphdl_t * qphdl,ibc_free_qp_flags_t free_qp_flags,ibc_qpn_hdl_t * qpnh,uint_t sleepflag)976 tavor_qp_free(tavor_state_t *state, tavor_qphdl_t *qphdl,
977 ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
978 uint_t sleepflag)
979 {
980 tavor_rsrc_t *qpc, *rdb, *rsrc;
981 tavor_umap_db_entry_t *umapdb;
982 tavor_qpn_entry_t *entry;
983 tavor_pdhdl_t pd;
984 tavor_mrhdl_t mr;
985 tavor_cqhdl_t sq_cq, rq_cq;
986 tavor_srqhdl_t srq;
987 tavor_qphdl_t qp;
988 uint64_t value;
989 uint_t type, port;
990 uint_t maxprot;
991 uint_t qp_srq_en;
992 int status;
993
994 /*
995 * Pull all the necessary information from the Tavor Queue Pair
996 * handle. This is necessary here because the resource for the
997 * QP handle is going to be freed up as part of this operation.
998 */
999 qp = *qphdl;
1000 mutex_enter(&qp->qp_lock);
1001 qpc = qp->qp_qpcrsrcp;
1002 rsrc = qp->qp_rsrcp;
1003 pd = qp->qp_pdhdl;
1004 srq = qp->qp_srqhdl;
1005 mr = qp->qp_mrhdl;
1006 rq_cq = qp->qp_rq_cqhdl;
1007 sq_cq = qp->qp_sq_cqhdl;
1008 rdb = qp->qp_rdbrsrcp;
1009 port = qp->qp_portnum;
1010 qp_srq_en = qp->qp_srq_en;
1011
1012 /*
1013 * If the QP is part of an MCG, then we fail the qp_free
1014 */
1015 if (qp->qp_mcg_refcnt != 0) {
1016 mutex_exit(&qp->qp_lock);
1017 goto qpfree_fail;
1018 }
1019
1020 /*
1021 * If the QP is not already in "Reset" state, then transition to
1022 * "Reset". This is necessary because software does not reclaim
1023 * ownership of the QP context until the QP is in the "Reset" state.
1024 * If the ownership transfer fails for any reason, then it is an
1025 * indication that something (either in HW or SW) has gone seriously
1026 * wrong. So we print a warning message and return.
1027 */
1028 if (qp->qp_state != TAVOR_QP_RESET) {
1029 if (tavor_qp_to_reset(state, qp) != DDI_SUCCESS) {
1030 mutex_exit(&qp->qp_lock);
1031 TAVOR_WARNING(state, "failed to reset QP context");
1032 goto qpfree_fail;
1033 }
1034 qp->qp_state = TAVOR_QP_RESET;
1035
1036 /*
1037 * Do any additional handling necessary for the transition
1038 * to the "Reset" state (e.g. update the WRID lists)
1039 */
1040 tavor_wrid_to_reset_handling(state, qp);
1041 }
1042
1043 /*
1044 * If this was a user-mappable QP, then we need to remove its entry
1045 * from the "userland resources database". If it is also currently
1046 * mmap()'d out to a user process, then we need to call
1047 * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1048 * We also need to invalidate the QP tracking information for the
1049 * user mapping.
1050 */
1051 if (qp->qp_is_umap) {
1052 status = tavor_umap_db_find(state->ts_instance, qp->qp_qpnum,
1053 MLNX_UMAP_QPMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
1054 &umapdb);
1055 if (status != DDI_SUCCESS) {
1056 mutex_exit(&qp->qp_lock);
1057 TAVOR_WARNING(state, "failed to find in database");
1058 return (ibc_get_ci_failure(0));
1059 }
1060 tavor_umap_db_free(umapdb);
1061 if (qp->qp_umap_dhp != NULL) {
1062 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1063 status = devmap_devmem_remap(qp->qp_umap_dhp,
1064 state->ts_dip, 0, 0, qp->qp_wqinfo.qa_size,
1065 maxprot, DEVMAP_MAPPING_INVALID, NULL);
1066 if (status != DDI_SUCCESS) {
1067 mutex_exit(&qp->qp_lock);
1068 TAVOR_WARNING(state, "failed in QP memory "
1069 "devmap_devmem_remap()");
1070 return (ibc_get_ci_failure(0));
1071 }
1072 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1073 }
1074 }
1075
1076 /*
1077 * Put NULL into the Tavor QPNum-to-QPHdl list. This will allow any
1078 * in-progress events to detect that the QP corresponding to this
1079 * number has been freed. Note: it does depend in whether we are
1080 * freeing a special QP or not.
1081 */
1082 if (qp->qp_is_special) {
1083 state->ts_qphdl[qpc->tr_indx + port] = NULL;
1084 } else {
1085 state->ts_qphdl[qpc->tr_indx] = NULL;
1086 }
1087
1088 /*
1089 * Drop the QP lock
1090 * At this point the lock is no longer necessary. We cannot
1091 * protect from multiple simultaneous calls to free the same QP.
1092 * In addition, since the QP lock is contained in the QP "software
1093 * handle" resource, which we will free (see below), it is
1094 * important that we have no further references to that memory.
1095 */
1096 mutex_exit(&qp->qp_lock);
1097 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1098
1099 /*
1100 * Free the QP resources
1101 * Start by deregistering and freeing the memory for work queues.
1102 * Next free any previously allocated context information
1103 * (depending on QP type)
1104 * Finally, decrement the necessary reference counts.
1105 * If this fails for any reason, then it is an indication that
1106 * something (either in HW or SW) has gone seriously wrong. So we
1107 * print a warning message and return.
1108 */
1109 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1110 sleepflag);
1111 if (status != DDI_SUCCESS) {
1112 TAVOR_WARNING(state, "failed to deregister QP memory");
1113 goto qpfree_fail;
1114 }
1115
1116 /* Free the memory for the QP */
1117 tavor_queue_free(state, &qp->qp_wqinfo);
1118
1119 /*
1120 * Free up the remainder of the QP resources. Note: we have a few
1121 * different resources to free up depending on whether the QP is a
1122 * special QP or not. As described above, if any of these fail for
1123 * any reason it is an indication that something (either in HW or SW)
1124 * has gone seriously wrong. So we print a warning message and
1125 * return.
1126 */
1127 if (qp->qp_is_special) {
1128 type = (qp->qp_is_special == TAVOR_QP_SMI) ?
1129 IBT_SMI_SQP : IBT_GSI_SQP;
1130
1131 /* Free up resources for the special QP */
1132 status = tavor_special_qp_rsrc_free(state, type, port);
1133 if (status != DDI_SUCCESS) {
1134 TAVOR_WARNING(state, "failed to free special QP rsrc");
1135 goto qpfree_fail;
1136 }
1137
1138 } else {
1139 type = qp->qp_serv_type;
1140
1141 /* Free up the RDB entries resource */
1142 if (type == TAVOR_QP_RC) {
1143 tavor_rsrc_free(state, &rdb);
1144 }
1145
1146 /*
1147 * Check the flags and determine whether to release the
1148 * QPN or not, based on their value.
1149 */
1150 if (free_qp_flags == IBC_FREE_QP_ONLY) {
1151 entry = qp->qp_qpn_hdl;
1152 tavor_qp_release_qpn(state, qp->qp_qpn_hdl,
1153 TAVOR_QPN_FREE_ONLY);
1154 *qpnh = (ibc_qpn_hdl_t)entry;
1155 } else {
1156 tavor_qp_release_qpn(state, qp->qp_qpn_hdl,
1157 TAVOR_QPN_RELEASE);
1158 }
1159 }
1160
1161 /* Free the Tavor Queue Pair handle */
1162 tavor_rsrc_free(state, &rsrc);
1163
1164 /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
1165 tavor_cq_refcnt_dec(rq_cq);
1166 tavor_cq_refcnt_dec(sq_cq);
1167 tavor_pd_refcnt_dec(pd);
1168 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1169 tavor_srq_refcnt_dec(srq);
1170 }
1171
1172 /* Set the qphdl pointer to NULL and return success */
1173 *qphdl = NULL;
1174
1175 return (DDI_SUCCESS);
1176
1177 qpfree_fail:
1178 return (status);
1179 }
1180
1181
1182 /*
1183 * tavor_qp_query()
1184 * Context: Can be called from interrupt or base context.
1185 */
1186 int
tavor_qp_query(tavor_state_t * state,tavor_qphdl_t qp,ibt_qp_query_attr_t * attr_p)1187 tavor_qp_query(tavor_state_t *state, tavor_qphdl_t qp,
1188 ibt_qp_query_attr_t *attr_p)
1189 {
1190 ibt_cep_state_t qp_state;
1191 ibt_qp_ud_attr_t *ud;
1192 ibt_qp_rc_attr_t *rc;
1193 ibt_qp_uc_attr_t *uc;
1194 ibt_cep_flags_t enable_flags;
1195 tavor_hw_addr_path_t *qpc_path, *qpc_alt_path;
1196 ibt_cep_path_t *path_ptr, *alt_path_ptr;
1197 tavor_hw_qpc_t *qpc;
1198 int status;
1199
1200 mutex_enter(&qp->qp_lock);
1201
1202 /*
1203 * Grab the temporary QPC entry from QP software state
1204 */
1205 qpc = &qp->qpc;
1206
1207 /* Convert the current Tavor QP state to IBTF QP state */
1208 switch (qp->qp_state) {
1209 case TAVOR_QP_RESET:
1210 qp_state = IBT_STATE_RESET; /* "Reset" */
1211 break;
1212 case TAVOR_QP_INIT:
1213 qp_state = IBT_STATE_INIT; /* Initialized */
1214 break;
1215 case TAVOR_QP_RTR:
1216 qp_state = IBT_STATE_RTR; /* Ready to Receive */
1217 break;
1218 case TAVOR_QP_RTS:
1219 qp_state = IBT_STATE_RTS; /* Ready to Send */
1220 break;
1221 case TAVOR_QP_SQERR:
1222 qp_state = IBT_STATE_SQE; /* Send Queue Error */
1223 break;
1224 case TAVOR_QP_SQD:
1225 if (qp->qp_sqd_still_draining) {
1226 qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */
1227 } else {
1228 qp_state = IBT_STATE_SQD; /* SQ Drained */
1229 }
1230 break;
1231 case TAVOR_QP_ERR:
1232 qp_state = IBT_STATE_ERROR; /* Error */
1233 break;
1234 default:
1235 mutex_exit(&qp->qp_lock);
1236 return (ibc_get_ci_failure(0));
1237 }
1238 attr_p->qp_info.qp_state = qp_state;
1239
1240 /* SRQ Hook. */
1241 attr_p->qp_srq = NULL;
1242
1243 /*
1244 * The following QP information is always returned, regardless of
1245 * the current QP state. Note: Some special handling is necessary
1246 * for calculating the QP number on special QP (QP0 and QP1).
1247 */
1248 attr_p->qp_sq_cq = qp->qp_sq_cqhdl->cq_hdlrarg;
1249 attr_p->qp_rq_cq = qp->qp_rq_cqhdl->cq_hdlrarg;
1250 if (qp->qp_is_special) {
1251 attr_p->qp_qpn = (qp->qp_is_special == TAVOR_QP_SMI) ? 0 : 1;
1252 } else {
1253 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
1254 }
1255 attr_p->qp_sq_sgl = qp->qp_sq_sgl;
1256 attr_p->qp_rq_sgl = qp->qp_rq_sgl;
1257 attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz;
1258 attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
1259
1260 /*
1261 * If QP is currently in the "Reset" state, then only the above are
1262 * returned
1263 */
1264 if (qp_state == IBT_STATE_RESET) {
1265 mutex_exit(&qp->qp_lock);
1266 return (DDI_SUCCESS);
1267 }
1268
1269 /*
1270 * Post QUERY_QP command to firmware
1271 *
1272 * We do a TAVOR_NOSLEEP here because we are holding the "qp_lock".
1273 * Since we may be in the interrupt context (or subsequently raised
1274 * to interrupt level by priority inversion), we do not want to block
1275 * in this routine waiting for success.
1276 */
1277 status = tavor_cmn_query_cmd_post(state, QUERY_QP, qp->qp_qpnum,
1278 qpc, sizeof (tavor_hw_qpc_t), TAVOR_CMD_NOSLEEP_SPIN);
1279 if (status != TAVOR_CMD_SUCCESS) {
1280 mutex_exit(&qp->qp_lock);
1281 cmn_err(CE_CONT, "Tavor: QUERY_QP command failed: %08x\n",
1282 status);
1283 return (ibc_get_ci_failure(0));
1284 }
1285
1286 /*
1287 * Fill in the additional QP info based on the QP's transport type.
1288 */
1289 if (qp->qp_serv_type == TAVOR_QP_UD) {
1290
1291 /* Fill in the UD-specific info */
1292 ud = &attr_p->qp_info.qp_transport.ud;
1293 ud->ud_qkey = (ib_qkey_t)qpc->qkey;
1294 ud->ud_sq_psn = qpc->next_snd_psn;
1295 ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx;
1296 ud->ud_port = qpc->pri_addr_path.portnum;
1297
1298 attr_p->qp_info.qp_trans = IBT_UD_SRV;
1299
1300 } else if (qp->qp_serv_type == TAVOR_QP_RC) {
1301
1302 /* Fill in the RC-specific info */
1303 rc = &attr_p->qp_info.qp_transport.rc;
1304 rc->rc_sq_psn = qpc->next_snd_psn;
1305 rc->rc_rq_psn = qpc->next_rcv_psn;
1306 rc->rc_dst_qpn = qpc->rem_qpn;
1307
1308 /* Grab the path migration state information */
1309 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) {
1310 rc->rc_mig_state = IBT_STATE_MIGRATED;
1311 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) {
1312 rc->rc_mig_state = IBT_STATE_REARMED;
1313 } else {
1314 rc->rc_mig_state = IBT_STATE_ARMED;
1315 }
1316 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
1317 rc->rc_rdma_ra_in = (1 << qpc->rra_max);
1318 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
1319 rc->rc_path_mtu = qpc->mtu;
1320 rc->rc_retry_cnt = qpc->retry_cnt;
1321
1322 /* Get the common primary address path fields */
1323 qpc_path = &qpc->pri_addr_path;
1324 path_ptr = &rc->rc_path;
1325 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
1326 TAVOR_ADDRPATH_QP, qp);
1327
1328 /* Fill in the additional primary address path fields */
1329 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
1330 path_ptr->cep_hca_port_num = qpc_path->portnum;
1331 path_ptr->cep_timeout = qpc_path->ack_timeout;
1332
1333 /* Get the common alternate address path fields */
1334 qpc_alt_path = &qpc->alt_addr_path;
1335 alt_path_ptr = &rc->rc_alt_path;
1336 tavor_get_addr_path(state, qpc_alt_path,
1337 &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp);
1338
1339 /* Fill in the additional alternate address path fields */
1340 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
1341 alt_path_ptr->cep_hca_port_num = qpc_alt_path->portnum;
1342 alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout;
1343
1344 /* Get the RNR retry time from primary path */
1345 rc->rc_rnr_retry_cnt = qpc_path->rnr_retry;
1346
1347 /* Set the enable flags based on RDMA/Atomic enable bits */
1348 enable_flags = IBT_CEP_NO_FLAGS;
1349 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
1350 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
1351 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
1352 attr_p->qp_info.qp_flags = enable_flags;
1353
1354 attr_p->qp_info.qp_trans = IBT_RC_SRV;
1355
1356 } else if (qp->qp_serv_type == TAVOR_QP_UC) {
1357
1358 /* Fill in the UC-specific info */
1359 uc = &attr_p->qp_info.qp_transport.uc;
1360 uc->uc_sq_psn = qpc->next_snd_psn;
1361 uc->uc_rq_psn = qpc->next_rcv_psn;
1362 uc->uc_dst_qpn = qpc->rem_qpn;
1363
1364 /* Grab the path migration state information */
1365 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) {
1366 uc->uc_mig_state = IBT_STATE_MIGRATED;
1367 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) {
1368 uc->uc_mig_state = IBT_STATE_REARMED;
1369 } else {
1370 uc->uc_mig_state = IBT_STATE_ARMED;
1371 }
1372 uc->uc_path_mtu = qpc->mtu;
1373
1374 /* Get the common primary address path fields */
1375 qpc_path = &qpc->pri_addr_path;
1376 path_ptr = &uc->uc_path;
1377 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
1378 TAVOR_ADDRPATH_QP, qp);
1379
1380 /* Fill in the additional primary address path fields */
1381 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
1382 path_ptr->cep_hca_port_num = qpc_path->portnum;
1383
1384 /* Get the common alternate address path fields */
1385 qpc_alt_path = &qpc->alt_addr_path;
1386 alt_path_ptr = &uc->uc_alt_path;
1387 tavor_get_addr_path(state, qpc_alt_path,
1388 &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp);
1389
1390 /* Fill in the additional alternate address path fields */
1391 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
1392 alt_path_ptr->cep_hca_port_num = qpc_alt_path->portnum;
1393
1394 /*
1395 * Set the enable flags based on RDMA enable bits (by
1396 * definition UC doesn't support Atomic or RDMA Read)
1397 */
1398 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
1399 attr_p->qp_info.qp_flags = enable_flags;
1400
1401 attr_p->qp_info.qp_trans = IBT_UC_SRV;
1402
1403 } else {
1404 TAVOR_WARNING(state, "unexpected QP transport type");
1405 mutex_exit(&qp->qp_lock);
1406 return (ibc_get_ci_failure(0));
1407 }
1408
1409 /*
1410 * Under certain circumstances it is possible for the Tavor hardware
1411 * to transition to one of the error states without software directly
1412 * knowing about it. The QueryQP() call is the one place where we
1413 * have an opportunity to sample and update our view of the QP state.
1414 */
1415 if (qpc->state == TAVOR_QP_SQERR) {
1416 attr_p->qp_info.qp_state = IBT_STATE_SQE;
1417 qp->qp_state = TAVOR_QP_SQERR;
1418 }
1419 if (qpc->state == TAVOR_QP_ERR) {
1420 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
1421 qp->qp_state = TAVOR_QP_ERR;
1422 }
1423 mutex_exit(&qp->qp_lock);
1424
1425 return (DDI_SUCCESS);
1426 }
1427
1428
1429 /*
1430 * tavor_qp_create_qpn()
1431 * Context: Can be called from interrupt or base context.
1432 */
1433 static int
tavor_qp_create_qpn(tavor_state_t * state,tavor_qphdl_t qp,tavor_rsrc_t * qpc)1434 tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp, tavor_rsrc_t *qpc)
1435 {
1436 tavor_qpn_entry_t query;
1437 tavor_qpn_entry_t *entry;
1438 avl_index_t where;
1439
1440 /*
1441 * Build a query (for the AVL tree lookup) and attempt to find
1442 * a previously added entry that has a matching QPC index. If
1443 * no matching entry is found, then allocate, initialize, and
1444 * add an entry to the AVL tree.
1445 * If a matching entry is found, then increment its QPN counter
1446 * and reference counter.
1447 */
1448 query.qpn_indx = qpc->tr_indx;
1449 mutex_enter(&state->ts_qpn_avl_lock);
1450 entry = (tavor_qpn_entry_t *)avl_find(&state->ts_qpn_avl,
1451 &query, &where);
1452 if (entry == NULL) {
1453 /*
1454 * Allocate and initialize a QPN entry, then insert
1455 * it into the AVL tree.
1456 */
1457 entry = (tavor_qpn_entry_t *)kmem_zalloc(
1458 sizeof (tavor_qpn_entry_t), KM_NOSLEEP);
1459 if (entry == NULL) {
1460 mutex_exit(&state->ts_qpn_avl_lock);
1461 return (DDI_FAILURE);
1462 }
1463 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1464
1465 entry->qpn_indx = qpc->tr_indx;
1466 entry->qpn_refcnt = 0;
1467 entry->qpn_counter = 0;
1468
1469 avl_insert(&state->ts_qpn_avl, entry, where);
1470 }
1471
1472 /*
1473 * Make the AVL tree entry point to the QP context resource that
1474 * it will be responsible for tracking
1475 */
1476 entry->qpn_qpc = qpc;
1477
1478 /*
1479 * Setup the QP handle to point to the AVL tree entry. Then
1480 * generate the new QP number from the entry's QPN counter value
1481 * and the hardware's QP context table index.
1482 */
1483 qp->qp_qpn_hdl = entry;
1484 qp->qp_qpnum = ((entry->qpn_counter <<
1485 state->ts_cfg_profile->cp_log_num_qp) | qpc->tr_indx) &
1486 TAVOR_QP_MAXNUMBER_MSK;
1487
1488 /*
1489 * Increment the reference counter and QPN counter. The QPN
1490 * counter always indicates the next available number for use.
1491 */
1492 entry->qpn_counter++;
1493 entry->qpn_refcnt++;
1494
1495 mutex_exit(&state->ts_qpn_avl_lock);
1496 return (DDI_SUCCESS);
1497 }
1498
1499
1500 /*
1501 * tavor_qp_release_qpn()
1502 * Context: Can be called only from user or kernel context.
1503 */
1504 void
tavor_qp_release_qpn(tavor_state_t * state,tavor_qpn_entry_t * entry,int flags)1505 tavor_qp_release_qpn(tavor_state_t *state, tavor_qpn_entry_t *entry, int flags)
1506 {
1507 ASSERT(entry != NULL);
1508
1509 mutex_enter(&state->ts_qpn_avl_lock);
1510
1511 /*
1512 * If we are releasing the QP number here, then we decrement the
1513 * reference count and check for zero references. If there are
1514 * zero references, then we free the QPC context (if it hadn't
1515 * already been freed during a TAVOR_QPN_FREE_ONLY free, i.e. for
1516 * reuse with another similar QP number) and remove the tracking
1517 * structure from the QP number AVL tree and free the structure.
1518 * If we are not releasing the QP number here, then, as long as we
1519 * have not exhausted the usefulness of the QPC context (that is,
1520 * re-used it too many times without the reference count having
1521 * gone to zero), we free up the QPC context for use by another
1522 * thread (which will use it to construct a different QP number
1523 * from the same QPC table index).
1524 */
1525 if (flags == TAVOR_QPN_RELEASE) {
1526 entry->qpn_refcnt--;
1527
1528 /*
1529 * If the reference count is zero, then we free the QPC
1530 * context (if it hadn't already been freed in an early
1531 * step, e.g. TAVOR_QPN_FREE_ONLY) and remove/free the
1532 * tracking structure from the QP number AVL tree.
1533 */
1534 if (entry->qpn_refcnt == 0) {
1535 if (entry->qpn_qpc != NULL) {
1536 tavor_rsrc_free(state, &entry->qpn_qpc);
1537 }
1538
1539 /*
1540 * If the current entry has served it's useful
1541 * purpose (i.e. been reused the maximum allowable
1542 * number of times), then remove it from QP number
1543 * AVL tree and free it up.
1544 */
1545 if (entry->qpn_counter >= (1 <<
1546 (24 - state->ts_cfg_profile->cp_log_num_qp))) {
1547 avl_remove(&state->ts_qpn_avl, entry);
1548 kmem_free(entry, sizeof (tavor_qpn_entry_t));
1549 }
1550 }
1551
1552 } else if (flags == TAVOR_QPN_FREE_ONLY) {
1553 /*
1554 * Even if we are not freeing the QP number, that will not
1555 * always prevent us from releasing the QPC context. In fact,
1556 * since the QPC context only forms part of the whole QPN,
1557 * we want to free it up for use by other consumers. But
1558 * if the reference count is non-zero (which it will always
1559 * be when we are doing TAVOR_QPN_FREE_ONLY) and the counter
1560 * has reached its maximum value, then we cannot reuse the
1561 * QPC context until the reference count eventually reaches
1562 * zero (in TAVOR_QPN_RELEASE, above).
1563 */
1564 if (entry->qpn_counter < (1 <<
1565 (24 - state->ts_cfg_profile->cp_log_num_qp))) {
1566 tavor_rsrc_free(state, &entry->qpn_qpc);
1567 }
1568 }
1569 mutex_exit(&state->ts_qpn_avl_lock);
1570 }
1571
1572
1573 /*
1574 * tavor_qpn_db_compare()
1575 * Context: Can be called from user or kernel context.
1576 */
1577 static int
tavor_qpn_avl_compare(const void * q,const void * e)1578 tavor_qpn_avl_compare(const void *q, const void *e)
1579 {
1580 tavor_qpn_entry_t *entry, *query;
1581
1582 entry = (tavor_qpn_entry_t *)e;
1583 query = (tavor_qpn_entry_t *)q;
1584
1585 if (query->qpn_indx < entry->qpn_indx) {
1586 return (-1);
1587 } else if (query->qpn_indx > entry->qpn_indx) {
1588 return (+1);
1589 } else {
1590 return (0);
1591 }
1592 }
1593
1594
1595 /*
1596 * tavor_qpn_avl_init()
1597 * Context: Only called from attach() path context
1598 */
1599 void
tavor_qpn_avl_init(tavor_state_t * state)1600 tavor_qpn_avl_init(tavor_state_t *state)
1601 {
1602 /* Initialize the lock used for QP number (QPN) AVL tree access */
1603 mutex_init(&state->ts_qpn_avl_lock, NULL, MUTEX_DRIVER,
1604 DDI_INTR_PRI(state->ts_intrmsi_pri));
1605
1606 /* Initialize the AVL tree for the QP number (QPN) storage */
1607 avl_create(&state->ts_qpn_avl, tavor_qpn_avl_compare,
1608 sizeof (tavor_qpn_entry_t),
1609 offsetof(tavor_qpn_entry_t, qpn_avlnode));
1610 }
1611
1612
1613 /*
1614 * tavor_qpn_avl_fini()
1615 * Context: Only called from attach() and/or detach() path contexts
1616 */
1617 void
tavor_qpn_avl_fini(tavor_state_t * state)1618 tavor_qpn_avl_fini(tavor_state_t *state)
1619 {
1620 tavor_qpn_entry_t *entry;
1621 void *cookie;
1622
1623 /*
1624 * Empty all entries (if necessary) and destroy the AVL tree
1625 * that was used for QP number (QPN) tracking.
1626 */
1627 cookie = NULL;
1628 while ((entry = (tavor_qpn_entry_t *)avl_destroy_nodes(
1629 &state->ts_qpn_avl, &cookie)) != NULL) {
1630 kmem_free(entry, sizeof (tavor_qpn_entry_t));
1631 }
1632 avl_destroy(&state->ts_qpn_avl);
1633
1634 /* Destroy the lock used for QP number (QPN) AVL tree access */
1635 mutex_destroy(&state->ts_qpn_avl_lock);
1636 }
1637
1638
1639 /*
1640 * tavor_qphdl_from_qpnum()
1641 * Context: Can be called from interrupt or base context.
1642 *
1643 * This routine is important because changing the unconstrained
1644 * portion of the QP number is critical to the detection of a
1645 * potential race condition in the QP event handler code (i.e. the case
1646 * where a QP is freed and alloc'd again before an event for the
1647 * "old" QP can be handled).
1648 *
1649 * While this is not a perfect solution (not sure that one exists)
1650 * it does help to mitigate the chance that this race condition will
1651 * cause us to deliver a "stale" event to the new QP owner. Note:
1652 * this solution does not scale well because the number of constrained
1653 * bits increases (and, hence, the number of unconstrained bits
1654 * decreases) as the number of supported QPs grows. For small and
1655 * intermediate values, it should hopefully provide sufficient
1656 * protection.
1657 */
1658 tavor_qphdl_t
tavor_qphdl_from_qpnum(tavor_state_t * state,uint_t qpnum)1659 tavor_qphdl_from_qpnum(tavor_state_t *state, uint_t qpnum)
1660 {
1661 uint_t qpindx, qpmask;
1662
1663 /* Calculate the QP table index from the qpnum */
1664 qpmask = (1 << state->ts_cfg_profile->cp_log_num_qp) - 1;
1665 qpindx = qpnum & qpmask;
1666 return (state->ts_qphdl[qpindx]);
1667 }
1668
1669
1670 /*
1671 * tavor_special_qp_rsrc_alloc
1672 * Context: Can be called from interrupt or base context.
1673 */
1674 static int
tavor_special_qp_rsrc_alloc(tavor_state_t * state,ibt_sqp_type_t type,uint_t port,tavor_rsrc_t ** qp_rsrc)1675 tavor_special_qp_rsrc_alloc(tavor_state_t *state, ibt_sqp_type_t type,
1676 uint_t port, tavor_rsrc_t **qp_rsrc)
1677 {
1678 uint_t mask, flags;
1679 int status;
1680
1681 mutex_enter(&state->ts_spec_qplock);
1682 flags = state->ts_spec_qpflags;
1683 if (type == IBT_SMI_SQP) {
1684 /*
1685 * Check here to see if the driver has been configured
1686 * to instruct the Tavor firmware to handle all incoming
1687 * SMP messages (i.e. messages sent to SMA). If so,
1688 * then we will treat QP0 as if it has already been
1689 * allocated (for internal use). Otherwise, if we allow
1690 * the allocation to happen, it will cause unexpected
1691 * behaviors (e.g. Tavor SMA becomes unresponsive).
1692 */
1693 if (state->ts_cfg_profile->cp_qp0_agents_in_fw != 0) {
1694 mutex_exit(&state->ts_spec_qplock);
1695 return (IBT_QP_IN_USE);
1696 }
1697
1698 /*
1699 * If this is the first QP0 allocation, then post
1700 * a CONF_SPECIAL_QP firmware command
1701 */
1702 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
1703 status = tavor_conf_special_qp_cmd_post(state,
1704 state->ts_spec_qp0->tr_indx, TAVOR_CMD_QP_SMI,
1705 TAVOR_CMD_NOSLEEP_SPIN);
1706 if (status != TAVOR_CMD_SUCCESS) {
1707 mutex_exit(&state->ts_spec_qplock);
1708 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1709 "command failed: %08x\n", status);
1710 return (IBT_INSUFF_RESOURCE);
1711 }
1712 }
1713
1714 /*
1715 * Now check (and, if necessary, modify) the flags to indicate
1716 * whether the allocation was successful
1717 */
1718 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port));
1719 if (flags & mask) {
1720 mutex_exit(&state->ts_spec_qplock);
1721 return (IBT_QP_IN_USE);
1722 }
1723 state->ts_spec_qpflags |= mask;
1724 *qp_rsrc = state->ts_spec_qp0;
1725
1726 } else {
1727 /*
1728 * If this is the first QP1 allocation, then post
1729 * a CONF_SPECIAL_QP firmware command
1730 */
1731 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
1732 status = tavor_conf_special_qp_cmd_post(state,
1733 state->ts_spec_qp1->tr_indx, TAVOR_CMD_QP_GSI,
1734 TAVOR_CMD_NOSLEEP_SPIN);
1735 if (status != TAVOR_CMD_SUCCESS) {
1736 mutex_exit(&state->ts_spec_qplock);
1737 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1738 "command failed: %08x\n", status);
1739 return (IBT_INSUFF_RESOURCE);
1740 }
1741 }
1742
1743 /*
1744 * Now check (and, if necessary, modify) the flags to indicate
1745 * whether the allocation was successful
1746 */
1747 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port));
1748 if (flags & mask) {
1749 mutex_exit(&state->ts_spec_qplock);
1750 return (IBT_QP_IN_USE);
1751 }
1752 state->ts_spec_qpflags |= mask;
1753 *qp_rsrc = state->ts_spec_qp1;
1754 }
1755
1756 mutex_exit(&state->ts_spec_qplock);
1757 return (DDI_SUCCESS);
1758 }
1759
1760
1761 /*
1762 * tavor_special_qp_rsrc_free
1763 * Context: Can be called from interrupt or base context.
1764 */
1765 static int
tavor_special_qp_rsrc_free(tavor_state_t * state,ibt_sqp_type_t type,uint_t port)1766 tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type,
1767 uint_t port)
1768 {
1769 uint_t mask, flags;
1770 int status;
1771
1772 mutex_enter(&state->ts_spec_qplock);
1773 if (type == IBT_SMI_SQP) {
1774 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port));
1775 state->ts_spec_qpflags &= ~mask;
1776 flags = state->ts_spec_qpflags;
1777
1778 /*
1779 * If this is the last QP0 free, then post a CONF_SPECIAL_QP
1780 * firmware command
1781 */
1782 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
1783 status = tavor_conf_special_qp_cmd_post(state, 0,
1784 TAVOR_CMD_QP_SMI, TAVOR_CMD_NOSLEEP_SPIN);
1785 if (status != TAVOR_CMD_SUCCESS) {
1786 mutex_exit(&state->ts_spec_qplock);
1787 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1788 "command failed: %08x\n", status);
1789 return (ibc_get_ci_failure(0));
1790 }
1791 }
1792 } else {
1793 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port));
1794 state->ts_spec_qpflags &= ~mask;
1795 flags = state->ts_spec_qpflags;
1796
1797 /*
1798 * If this is the last QP1 free, then post a CONF_SPECIAL_QP
1799 * firmware command
1800 */
1801 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
1802 status = tavor_conf_special_qp_cmd_post(state, 0,
1803 TAVOR_CMD_QP_GSI, TAVOR_CMD_NOSLEEP_SPIN);
1804 if (status != TAVOR_CMD_SUCCESS) {
1805 mutex_exit(&state->ts_spec_qplock);
1806 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1807 "command failed: %08x\n", status);
1808 return (ibc_get_ci_failure(0));
1809 }
1810 }
1811 }
1812
1813 mutex_exit(&state->ts_spec_qplock);
1814 return (DDI_SUCCESS);
1815 }
1816
1817
1818 /*
1819 * tavor_qp_sgl_to_logwqesz()
1820 * Context: Can be called from interrupt or base context.
1821 */
1822 static void
tavor_qp_sgl_to_logwqesz(tavor_state_t * state,uint_t num_sgl,tavor_qp_wq_type_t wq_type,uint_t * logwqesz,uint_t * max_sgl)1823 tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
1824 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
1825 {
1826 uint_t max_size, log2, actual_sgl;
1827
1828 switch (wq_type) {
1829 case TAVOR_QP_WQ_TYPE_SENDQ:
1830 /*
1831 * Use requested maximum SGL to calculate max descriptor size
1832 * (while guaranteeing that the descriptor size is a
1833 * power-of-2 cachelines).
1834 */
1835 max_size = (TAVOR_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
1836 log2 = highbit(max_size);
1837 if (ISP2(max_size)) {
1838 log2 = log2 - 1;
1839 }
1840
1841 /* Make sure descriptor is at least the minimum size */
1842 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
1843
1844 /* Calculate actual number of SGL (given WQE size) */
1845 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_SND_HDRS) >> 4;
1846 break;
1847
1848 case TAVOR_QP_WQ_TYPE_RECVQ:
1849 /*
1850 * Same as above (except for Recv WQEs)
1851 */
1852 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
1853 log2 = highbit(max_size);
1854 if (ISP2(max_size)) {
1855 log2 = log2 - 1;
1856 }
1857
1858 /* Make sure descriptor is at least the minimum size */
1859 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
1860
1861 /* Calculate actual number of SGL (given WQE size) */
1862 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4;
1863 break;
1864
1865 case TAVOR_QP_WQ_TYPE_SENDMLX_QP0:
1866 /*
1867 * Same as above (except for MLX transport WQEs). For these
1868 * WQEs we have to account for the space consumed by the
1869 * "inline" packet headers. (This is smaller than for QP1
1870 * below because QP0 is not allowed to send packets with a GRH.
1871 */
1872 max_size = (TAVOR_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
1873 log2 = highbit(max_size);
1874 if (ISP2(max_size)) {
1875 log2 = log2 - 1;
1876 }
1877
1878 /* Make sure descriptor is at least the minimum size */
1879 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
1880
1881 /* Calculate actual number of SGL (given WQE size) */
1882 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP0_HDRS) >> 4;
1883 break;
1884
1885 case TAVOR_QP_WQ_TYPE_SENDMLX_QP1:
1886 /*
1887 * Same as above. For these WQEs we again have to account for
1888 * the space consumed by the "inline" packet headers. (This
1889 * is larger than for QP0 above because we have to account for
1890 * the possibility of a GRH in each packet - and this
1891 * introduces an alignment issue that causes us to consume
1892 * an additional 8 bytes).
1893 */
1894 max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
1895 log2 = highbit(max_size);
1896 if (ISP2(max_size)) {
1897 log2 = log2 - 1;
1898 }
1899
1900 /* Make sure descriptor is at least the minimum size */
1901 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
1902
1903 /* Calculate actual number of SGL (given WQE size) */
1904 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
1905 break;
1906
1907 default:
1908 TAVOR_WARNING(state, "unexpected work queue type");
1909 break;
1910 }
1911
1912 /* Fill in the return values */
1913 *logwqesz = log2;
1914 *max_sgl = min(state->ts_cfg_profile->cp_wqe_real_max_sgl, actual_sgl);
1915 }
1916