1 /*
2 * Copyright (c) 2024, Broadcom. All rights reserved. The term
3 * Broadcom refers to Broadcom Limited and/or its subsidiaries.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
26 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 */
29
30 #include <sys/mman.h>
31
32 #include <netinet/in.h>
33
34 #include <assert.h>
35 #include <errno.h>
36 #include <malloc.h>
37 #include <pthread.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43
44 #include "main.h"
45 #include "verbs.h"
46
47 static int ibv_to_bnxt_re_wr_opcd[11] = {
48 BNXT_RE_WR_OPCD_RDMA_WRITE,
49 BNXT_RE_WR_OPCD_RDMA_WRITE_IMM,
50 BNXT_RE_WR_OPCD_SEND,
51 BNXT_RE_WR_OPCD_SEND_IMM,
52 BNXT_RE_WR_OPCD_RDMA_READ,
53 BNXT_RE_WR_OPCD_ATOMIC_CS,
54 BNXT_RE_WR_OPCD_ATOMIC_FA,
55 BNXT_RE_WR_OPCD_INVAL,
56 BNXT_RE_WR_OPCD_INVAL,
57 BNXT_RE_WR_OPCD_INVAL,
58 BNXT_RE_WR_OPCD_INVAL
59 };
60
61 static int ibv_wr_to_wc_opcd[11] = {
62 IBV_WC_RDMA_WRITE,
63 IBV_WC_RDMA_WRITE,
64 IBV_WC_SEND,
65 IBV_WC_SEND,
66 IBV_WC_RDMA_READ,
67 IBV_WC_COMP_SWAP,
68 IBV_WC_FETCH_ADD,
69 0xFF,
70 0xFF,
71 0xFF,
72 0xFF
73 };
74
75 static int bnxt_re_req_to_ibv_status [12] = {
76 IBV_WC_SUCCESS,
77 IBV_WC_BAD_RESP_ERR,
78 IBV_WC_LOC_LEN_ERR,
79 IBV_WC_LOC_QP_OP_ERR,
80 IBV_WC_LOC_PROT_ERR,
81 IBV_WC_MW_BIND_ERR,
82 IBV_WC_REM_INV_REQ_ERR,
83 IBV_WC_REM_ACCESS_ERR,
84 IBV_WC_REM_OP_ERR,
85 IBV_WC_RNR_RETRY_EXC_ERR,
86 IBV_WC_RETRY_EXC_ERR,
87 IBV_WC_WR_FLUSH_ERR
88 };
89
90 static int bnxt_re_res_to_ibv_status [9] = {
91 IBV_WC_SUCCESS,
92 IBV_WC_LOC_ACCESS_ERR,
93 IBV_WC_LOC_LEN_ERR,
94 IBV_WC_LOC_PROT_ERR,
95 IBV_WC_LOC_QP_OP_ERR,
96 IBV_WC_MW_BIND_ERR,
97 IBV_WC_REM_INV_REQ_ERR,
98 IBV_WC_WR_FLUSH_ERR,
99 IBV_WC_FATAL_ERR
100 };
101
102 static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc,
103 uint32_t *resize);
104
105 int bnxt_single_threaded;
106 int bnxt_dyn_debug;
bnxt_re_query_device(struct ibv_context * ibvctx,struct ibv_device_attr * dev_attr)107 int bnxt_re_query_device(struct ibv_context *ibvctx,
108 struct ibv_device_attr *dev_attr)
109 {
110 struct ibv_query_device cmd = {};
111 uint8_t fw_ver[8];
112 int status;
113
114 memset(dev_attr, 0, sizeof(struct ibv_device_attr));
115 status = ibv_cmd_query_device(ibvctx, dev_attr, (uint64_t *)&fw_ver,
116 &cmd, sizeof(cmd));
117 snprintf(dev_attr->fw_ver, 64, "%d.%d.%d.%d",
118 fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3]);
119
120 return status;
121 }
122
bnxt_re_query_device_compat(struct ibv_context * ibvctx,struct ibv_device_attr * dev_attr)123 int bnxt_re_query_device_compat(struct ibv_context *ibvctx,
124 struct ibv_device_attr *dev_attr)
125
126 {
127 int rc = 0;
128
129 rc = bnxt_re_query_device(ibvctx, dev_attr);
130
131 return rc;
132 }
133
bnxt_re_query_port(struct ibv_context * ibvctx,uint8_t port,struct ibv_port_attr * port_attr)134 int bnxt_re_query_port(struct ibv_context *ibvctx, uint8_t port,
135 struct ibv_port_attr *port_attr)
136 {
137 struct ibv_query_port cmd = {};
138
139 return ibv_cmd_query_port(ibvctx, port, port_attr, &cmd, sizeof(cmd));
140 }
141
bnxt_re_is_wcdpi_enabled(struct bnxt_re_context * cntx)142 static inline bool bnxt_re_is_wcdpi_enabled(struct bnxt_re_context *cntx)
143 {
144 return cntx->comp_mask & BNXT_RE_COMP_MASK_UCNTX_WC_DPI_ENABLED;
145 }
146
bnxt_re_map_db_page(struct ibv_context * ibvctx,uint64_t dbr,uint32_t dpi,uint32_t wcdpi)147 static int bnxt_re_map_db_page(struct ibv_context *ibvctx,
148 uint64_t dbr, uint32_t dpi, uint32_t wcdpi)
149 {
150 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx);
151 struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device);
152
153 cntx->udpi.dpindx = dpi;
154 cntx->udpi.dbpage = mmap(NULL, dev->pg_size, PROT_WRITE,
155 MAP_SHARED, ibvctx->cmd_fd, dbr);
156 if (cntx->udpi.dbpage == MAP_FAILED)
157 return -ENOMEM;
158 if (wcdpi) {
159 cntx->udpi.wcdbpg = mmap(NULL, dev->pg_size, PROT_WRITE,
160 MAP_SHARED, ibvctx->cmd_fd,
161 BNXT_RE_MAP_WC);
162 if (cntx->udpi.wcdbpg == MAP_FAILED)
163 return -ENOMEM;
164 cntx->udpi.wcdpi = wcdpi;
165 }
166
167 return 0;
168 }
169
bnxt_re_alloc_pd(struct ibv_context * ibvctx)170 struct ibv_pd *bnxt_re_alloc_pd(struct ibv_context *ibvctx)
171 {
172 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx);
173 struct bnxt_re_pd_resp resp = {};
174 struct ibv_alloc_pd cmd = {};
175 struct bnxt_re_pd *pd;
176 uint64_t dbr_map;
177
178 pd = calloc(1, sizeof(*pd));
179 if (!pd)
180 return NULL;
181
182 if (ibv_cmd_alloc_pd(ibvctx, &pd->ibvpd, &cmd, sizeof(cmd),
183 &resp.resp, sizeof(resp)))
184 goto out;
185
186 pd->pdid = resp.pdid;
187 /* Map DB page now. */
188 if (!cntx->udpi.dbpage) {
189 uint32_t wcdpi = 0;
190
191 if (bnxt_re_is_wcdpi_enabled(cntx) &&
192 resp.comp_mask & BNXT_RE_COMP_MASK_PD_HAS_WC_DPI)
193 wcdpi = resp.wcdpi;
194 if (bnxt_re_map_db_page(ibvctx, resp.dbr, resp.dpi, wcdpi))
195 goto fail;
196 if (cntx->cctx->chip_is_gen_p5_thor2 && cntx->udpi.wcdpi)
197 bnxt_re_init_pbuf_list(cntx);
198 }
199 if (resp.comp_mask & BNXT_RE_COMP_MASK_PD_HAS_DBR_BAR_ADDR) {
200 dbr_map = resp.dbr_bar_map & 0xFFFFFFFFFFFFF000;
201 cntx->bar_map = mmap(NULL, 4096, PROT_READ,
202 MAP_SHARED, ibvctx->cmd_fd, dbr_map);
203 if (cntx->bar_map == MAP_FAILED)
204 goto fail;
205 }
206
207 return &pd->ibvpd;
208 fail:
209 ibv_cmd_dealloc_pd(&pd->ibvpd);
210 out:
211 free(pd);
212 return NULL;
213 }
214
bnxt_re_free_pd(struct ibv_pd * ibvpd)215 int bnxt_re_free_pd(struct ibv_pd *ibvpd)
216 {
217 struct bnxt_re_pd *pd = to_bnxt_re_pd(ibvpd);
218 int status;
219
220 status = ibv_cmd_dealloc_pd(ibvpd);
221 if (status)
222 return status;
223 /* DPI un-mapping will be done during uninit_ucontext */
224 free(pd);
225
226 return 0;
227 }
228
get_ibv_mr_from_bnxt_re_mr(struct bnxt_re_mr * mr)229 struct ibv_mr *get_ibv_mr_from_bnxt_re_mr(struct bnxt_re_mr *mr)
230 {
231 return &mr->vmr;
232 }
233
bnxt_re_reg_mr(struct ibv_pd * ibvpd,void * sva,size_t len,int access)234 struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
235 int access)
236 {
237 struct bnxt_re_mr_resp resp = {};
238 struct ibv_reg_mr cmd = {};
239 struct bnxt_re_mr *mr;
240 uint64_t hw_va;
241 hw_va = (uint64_t) sva;
242
243 mr = calloc(1, sizeof(*mr));
244 if (!mr)
245 return NULL;
246
247 if (ibv_cmd_reg_mr(ibvpd, sva, len, hw_va, access, &mr->vmr,
248 &cmd, sizeof(cmd), &resp.resp, sizeof(resp))) {
249 free(mr);
250 return NULL;
251 }
252
253 return get_ibv_mr_from_bnxt_re_mr(mr);
254 }
255
bnxt_re_dereg_mr(VERBS_MR * ibvmr)256 int bnxt_re_dereg_mr(VERBS_MR *ibvmr)
257 {
258 struct bnxt_re_mr *mr = (struct bnxt_re_mr *)ibvmr;
259 int status;
260
261 status = ibv_cmd_dereg_mr(ibvmr);
262 if (status)
263 return status;
264 free(mr);
265
266 return 0;
267 }
268
bnxt_re_alloc_cqslab(struct bnxt_re_context * cntx,uint32_t ncqe,uint32_t cur)269 void *bnxt_re_alloc_cqslab(struct bnxt_re_context *cntx,
270 uint32_t ncqe, uint32_t cur)
271 {
272 struct bnxt_re_mem *mem;
273 uint32_t depth, sz;
274
275 depth = bnxt_re_init_depth(ncqe + 1, cntx->comp_mask);
276 if (depth > cntx->rdev->max_cq_depth + 1)
277 depth = cntx->rdev->max_cq_depth + 1;
278 if (depth == cur)
279 return NULL;
280 sz = get_aligned((depth * cntx->rdev->cqe_size), cntx->rdev->pg_size);
281 mem = bnxt_re_alloc_mem(sz, cntx->rdev->pg_size);
282 if (mem)
283 mem->pad = depth;
284 return mem;
285 }
286
_bnxt_re_create_cq(struct ibv_context * ibvctx,int ncqe,struct ibv_comp_channel * channel,int vec,bool soft_cq)287 struct ibv_cq *_bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe,
288 struct ibv_comp_channel *channel, int vec,
289 bool soft_cq)
290 {
291 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx);
292 struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device);
293 struct bnxt_re_cq_resp resp = {};
294 struct bnxt_re_cq_req cmd = {};
295 struct bnxt_re_cq *cq;
296 bool has_dpi;
297
298 if (ncqe > dev->max_cq_depth)
299 return NULL;
300
301 cq = calloc(1, (sizeof(*cq) + sizeof(struct bnxt_re_queue)));
302 if (!cq)
303 return NULL;
304 cq->cqq = (void *)((char *)cq + sizeof(*cq));
305 if (!cq->cqq)
306 goto mem;
307
308 cq->mem = bnxt_re_alloc_cqslab(cntx, ncqe, 0);
309 if (!cq->mem)
310 goto mem;
311 cq->cqq->depth = cq->mem->pad;
312 cq->cqq->stride = dev->cqe_size;
313 /* As an exception no need to call get_ring api we know
314 * this is the only consumer
315 */
316 cq->cqq->va = cq->mem->va_head;
317 if (!cq->cqq->va)
318 goto fail;
319
320 cmd.cq_va = (uint64_t)cq->cqq->va;
321 cmd.cq_handle = (uint64_t)cq;
322 if (soft_cq) {
323 cmd.comp_mask |= BNXT_RE_COMP_MASK_CQ_REQ_HAS_CAP_MASK;
324 cmd.cq_capab |= BNXT_RE_COMP_MASK_CQ_REQ_CAP_DBR_RECOVERY;
325 }
326 if (ibv_cmd_create_cq(ibvctx, ncqe, channel, vec,
327 &cq->ibvcq, &cmd.cmd, sizeof(cmd),
328 &resp.resp, sizeof(resp)))
329 goto fail;
330
331 has_dpi = resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_DB_INFO;
332 if (!cntx->udpi.dbpage && has_dpi) {
333 uint32_t wcdpi = 0;
334
335 if (bnxt_re_is_wcdpi_enabled(cntx) &&
336 resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_WC_DPI)
337 wcdpi = resp.wcdpi;
338 if (bnxt_re_map_db_page(ibvctx, resp.dbr, resp.dpi, wcdpi))
339 goto fail;
340 if (cntx->cctx->chip_is_gen_p5_thor2 && cntx->udpi.wcdpi)
341 bnxt_re_init_pbuf_list(cntx);
342 }
343
344 if (resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_CQ_PAGE) {
345 cq->cq_page = mmap(NULL, dev->pg_size, PROT_READ | PROT_WRITE, MAP_SHARED,
346 ibvctx->cmd_fd, resp.cq_page);
347 if (cq->cq_page == MAP_FAILED)
348 fprintf(stderr, DEV "Valid cq_page not mapped\n");
349 }
350
351 cq->cqid = resp.cqid;
352 cq->phase = resp.phase;
353 cq->cqq->tail = resp.tail;
354 cq->udpi = &cntx->udpi;
355 cq->first_arm = true;
356 cq->cntx = cntx;
357 cq->rand.seed = cq->cqid;
358 cq->shadow_db_key = BNXT_RE_DB_KEY_INVALID;
359 bnxt_re_dp_spin_init(&cq->cqq->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded);
360 INIT_DBLY_LIST_HEAD(&cq->sfhead);
361 INIT_DBLY_LIST_HEAD(&cq->rfhead);
362 INIT_DBLY_LIST_HEAD(&cq->prev_cq_head);
363 if (_is_db_drop_recovery_enable(cntx) && !soft_cq) {
364 INIT_DBLY_LIST_NODE(&cq->dbnode);
365 pthread_spin_lock(&cntx->cq_dbr_res.lock);
366 bnxt_re_list_add_node(&cq->dbnode, &cntx->cq_dbr_res.head);
367 pthread_spin_unlock(&cntx->cq_dbr_res.lock);
368 }
369
370 return &cq->ibvcq;
371 fail:
372 bnxt_re_free_mem(cq->mem);
373 mem:
374 free(cq);
375 return NULL;
376 }
377
bnxt_re_create_cq(struct ibv_context * ibvctx,int ncqe,struct ibv_comp_channel * channel,int vec)378 struct ibv_cq *bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe,
379 struct ibv_comp_channel *channel, int vec)
380 {
381 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx);
382 struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device);
383 sigset_t block_sig_set, old_sig_set;
384 int ret;
385
386 if (_is_db_drop_recovery_enable(cntx) && !cntx->dbr_cq) {
387 cntx->dbr_ev_chan =
388 ibv_create_comp_channel(ibvctx);
389 if (!cntx->dbr_ev_chan) {
390 fprintf(stderr,
391 DEV "Failed to create completion channel\n");
392 goto free;
393 }
394 cntx->dbr_cq = _bnxt_re_create_cq(ibvctx, 1, cntx->dbr_ev_chan, vec, 1);
395 if (!cntx->dbr_cq) {
396 fprintf(stderr, DEV "Couldn't create CQ\n");
397 goto free;
398 }
399 cntx->db_recovery_page = mmap(NULL, dev->pg_size, PROT_READ |
400 PROT_WRITE, MAP_SHARED,
401 ibvctx->cmd_fd, BNXT_RE_DB_RECOVERY_PAGE);
402 if (cntx->db_recovery_page == MAP_FAILED) {
403 fprintf(stderr, DEV "Couldn't map DB recovery page\n");
404 goto free;
405 }
406 /* Create pthread to handle the doorbell drop events. This thread is
407 * not going to handle any signals. Before creation block all the
408 * signals, and after creation restore the old signal mask.
409 */
410 sigfillset(&block_sig_set);
411 pthread_sigmask(SIG_BLOCK, &block_sig_set, &old_sig_set);
412 ret = pthread_create(&cntx->dbr_thread, NULL, bnxt_re_dbr_thread, cntx);
413 if (ret) {
414 fprintf(stderr, DEV "Couldn't create pthread\n");
415 pthread_sigmask(SIG_SETMASK, &old_sig_set, NULL);
416 goto free;
417 }
418 pthread_sigmask(SIG_SETMASK, &old_sig_set, NULL);
419 INIT_DBLY_LIST_HEAD(&cntx->qp_dbr_res.head);
420 pthread_spin_init(&cntx->qp_dbr_res.lock, PTHREAD_PROCESS_PRIVATE);
421 INIT_DBLY_LIST_HEAD(&cntx->cq_dbr_res.head);
422 pthread_spin_init(&cntx->cq_dbr_res.lock, PTHREAD_PROCESS_PRIVATE);
423 INIT_DBLY_LIST_HEAD(&cntx->srq_dbr_res.head);
424 pthread_spin_init(&cntx->srq_dbr_res.lock, PTHREAD_PROCESS_PRIVATE);
425 }
426 return(_bnxt_re_create_cq(ibvctx, ncqe, channel, vec, 0));
427 free:
428 if (cntx->dbr_ev_chan) {
429 ret = ibv_destroy_comp_channel(cntx->dbr_ev_chan);
430 if (ret)
431 fprintf(stderr, DEV "ibv_destroy_comp_channel error\n");
432 }
433
434 if (cntx->dbr_cq) {
435 if (cntx->db_recovery_page)
436 munmap(cntx->db_recovery_page, dev->pg_size);
437 ret = ibv_destroy_cq(cntx->dbr_cq);
438 if (ret)
439 fprintf(stderr, DEV "ibv_destroy_cq error\n");
440 }
441 return NULL;
442 }
443
bnxt_re_poll_kernel_cq(struct bnxt_re_cq * cq)444 int bnxt_re_poll_kernel_cq(struct bnxt_re_cq *cq)
445 {
446 struct ibv_wc tmp_wc;
447 int rc;
448
449 rc = ibv_cmd_poll_cq(&cq->ibvcq, 1, &tmp_wc);
450 if (unlikely(rc))
451 fprintf(stderr, "ibv_cmd_poll_cq failed: %d\n", rc);
452 return rc;
453 }
454
455 #define BNXT_RE_QUEUE_START_PHASE 0x01
456
457 /*
458 * Function to complete the last steps in CQ resize. Invoke poll function
459 * in the kernel driver; this serves as a signal to the driver to complete CQ
460 * resize steps required. Free memory mapped for the original CQ and switch
461 * over to the memory mapped for CQ with the new size. Finally Ack the Cutoff
462 * CQE. This function must be called under cq->cqq.lock.
463 */
bnxt_re_resize_cq_complete(struct bnxt_re_cq * cq)464 void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
465 {
466 struct bnxt_re_context *cntx = to_bnxt_re_context(cq->ibvcq.context);
467
468 bnxt_re_poll_kernel_cq(cq);
469 bnxt_re_free_mem(cq->mem);
470
471 cq->mem = cq->resize_mem;
472 cq->resize_mem = NULL;
473 /* As an exception no need to call get_ring api we know
474 * this is the only consumer
475 */
476 cq->cqq->va = cq->mem->va_head;
477 /*
478 * We don't want to memcpy() the entire cqq structure below; otherwise
479 * we'd end up overwriting cq->cqq.lock that is held by the caller.
480 * So we copy the members piecemeal. cqq->head, cqq->tail implicitly
481 * set to 0 before cutoff_ack DB.
482 */
483 cq->cqq->depth = cq->mem->pad;
484 cq->cqq->stride = cntx->rdev->cqe_size;
485 cq->cqq->head = 0;
486 cq->cqq->tail = 0;
487 cq->phase = BNXT_RE_QUEUE_START_PHASE;
488 /* Reset epoch portion of the flags */
489 cq->cqq->flags &= ~(BNXT_RE_FLAG_EPOCH_TAIL_MASK |
490 BNXT_RE_FLAG_EPOCH_HEAD_MASK);
491 bnxt_re_ring_cq_arm_db(cq, BNXT_RE_QUE_TYPE_CQ_CUT_ACK);
492 }
493
bnxt_re_resize_cq(struct ibv_cq * ibvcq,int ncqe)494 int bnxt_re_resize_cq(struct ibv_cq *ibvcq, int ncqe)
495 {
496 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvcq->context);
497 struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvcq->context->device);
498 struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq);
499 struct bnxt_re_resize_cq_req req = {};
500 uint32_t exit_cnt = 20;
501
502 struct ibv_resize_cq_resp resp = {};
503 int rc = 0;
504
505 if (ncqe > dev->max_cq_depth)
506 return -EINVAL;
507
508 bnxt_re_dp_spin_lock(&cq->cqq->qlock);
509 cq->resize_mem = bnxt_re_alloc_cqslab(cntx, ncqe, cq->cqq->depth);
510 if (unlikely(!cq->resize_mem)) {
511 rc = -ENOMEM;
512 goto done;
513 }
514 /* As an exception no need to call get_ring api we know
515 * this is the only consumer
516 */
517 req.cq_va = (uint64_t)cq->resize_mem->va_head;
518 rc = ibv_cmd_resize_cq(ibvcq, ncqe, &req.cmd,
519 sizeof(req), &resp, sizeof(resp));
520 if (unlikely(rc)) {
521 bnxt_re_free_mem(cq->resize_mem);
522 goto done;
523 }
524
525 while(true) {
526 struct ibv_wc tmp_wc = {0};
527 uint32_t resize = 0;
528 int dqed = 0;
529
530 struct bnxt_re_work_compl *compl = NULL;
531 dqed = bnxt_re_poll_one(cq, 1, &tmp_wc, &resize);
532 if (resize) {
533 break;
534 }
535 if (dqed) {
536 compl = calloc(1, sizeof(*compl));
537 if (unlikely(!compl)) {
538 fprintf(stderr, "%s: No Memory.. Continue\n", __func__);
539 break;
540 }
541 memcpy(&compl->wc, &tmp_wc, sizeof(tmp_wc));
542 bnxt_re_list_add_node(&compl->cnode, &cq->prev_cq_head);
543 compl = NULL;
544 memset(&tmp_wc, 0, sizeof(tmp_wc));
545 } else {
546 exit_cnt--;
547 if (unlikely(!exit_cnt)) {
548 rc = -EIO;
549 break;
550 } else {
551 /* wait for 100 milli seconds */
552 bnxt_re_sub_sec_busy_wait(100 * 1000000);
553 }
554 }
555 }
556 done:
557 bnxt_re_dp_spin_unlock(&cq->cqq->qlock);
558 return rc;
559 }
560
bnxt_re_destroy_resize_cq_list(struct bnxt_re_cq * cq)561 static void bnxt_re_destroy_resize_cq_list(struct bnxt_re_cq *cq)
562 {
563 struct bnxt_re_list_node *cur, *tmp;
564 struct bnxt_re_work_compl *compl;
565
566 if (bnxt_re_list_empty(&cq->prev_cq_head))
567 return;
568
569 list_for_each_node_safe(cur, tmp, &cq->prev_cq_head) {
570 compl = list_node(cur, struct bnxt_re_work_compl, cnode);
571 bnxt_re_list_del_node(&compl->cnode, &cq->prev_cq_head);
572 free(compl);
573 }
574
575 }
576
bnxt_re_destroy_cq(struct ibv_cq * ibvcq)577 int bnxt_re_destroy_cq(struct ibv_cq *ibvcq)
578 {
579 struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq);
580 int status;
581
582 if (_is_db_drop_recovery_enable(cq->cntx) &&
583 ibvcq != cq->cntx->dbr_cq) {
584 pthread_spin_lock(&cq->cntx->cq_dbr_res.lock);
585 bnxt_re_list_del_node(&cq->dbnode,
586 &cq->cntx->cq_dbr_res.head);
587 pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock);
588 }
589 status = ibv_cmd_destroy_cq(ibvcq);
590 if (status) {
591 if (_is_db_drop_recovery_enable(cq->cntx) &&
592 ibvcq != cq->cntx->dbr_cq) {
593 pthread_spin_lock(&cq->cntx->cq_dbr_res.lock);
594 bnxt_re_list_add_node(&cq->dbnode,
595 &cq->cntx->cq_dbr_res.head);
596 pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock);
597 }
598 return status;
599 }
600 bnxt_re_destroy_resize_cq_list(cq);
601 bnxt_re_free_mem(cq->mem);
602 free(cq);
603 return 0;
604 }
605
bnxt_re_poll_err_scqe(struct bnxt_re_qp * qp,struct ibv_wc * ibvwc,struct bnxt_re_req_cqe * scqe,uint32_t flg_val,int * cnt)606 static uint8_t bnxt_re_poll_err_scqe(struct bnxt_re_qp *qp,
607 struct ibv_wc *ibvwc,
608 struct bnxt_re_req_cqe *scqe,
609 uint32_t flg_val, int *cnt)
610 {
611 struct bnxt_re_queue *sq = qp->jsqq->hwque;
612 struct bnxt_re_wrid *swrid;
613 struct bnxt_re_cq *scq;
614 uint8_t status;
615 uint32_t head;
616
617 scq = to_bnxt_re_cq(qp->ibvqp.send_cq);
618
619 head = qp->jsqq->last_idx;
620 swrid = &qp->jsqq->swque[head];
621
622 *cnt = 1;
623 status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) &
624 BNXT_RE_BCQE_STATUS_MASK;
625 ibvwc->status = bnxt_re_req_to_ibv_status[status];
626 ibvwc->wc_flags = 0;
627 ibvwc->wr_id = swrid->wrid;
628 ibvwc->qp_num = qp->qpid;
629 ibvwc->opcode = swrid->wc_opcd;
630 ibvwc->byte_len = 0;
631
632 bnxt_re_incr_head(sq, swrid->slots);
633 bnxt_re_jqq_mod_last(qp->jsqq, head);
634
635 if (qp->qpst != IBV_QPS_ERR)
636 qp->qpst = IBV_QPS_ERR;
637 bnxt_re_list_add_node(&qp->snode, &scq->sfhead);
638 bnxt_re_trace("%s: qp_num = 0x%x status = %d\n",
639 __func__, ibvwc->qp_num, ibvwc->status)
640
641 return false;
642 }
643
bnxt_re_poll_success_scqe(struct bnxt_re_qp * qp,struct ibv_wc * ibvwc,struct bnxt_re_req_cqe * scqe,int * cnt)644 static uint8_t bnxt_re_poll_success_scqe(struct bnxt_re_qp *qp,
645 struct ibv_wc *ibvwc,
646 struct bnxt_re_req_cqe *scqe, int *cnt)
647 {
648 struct bnxt_re_queue *sq = qp->jsqq->hwque;
649 struct bnxt_re_wrid *swrid;
650 uint8_t pcqe = false;
651 uint32_t cindx, head;
652
653 head = qp->jsqq->last_idx;
654 swrid = &qp->jsqq->swque[head];
655 cindx = le32toh(scqe->con_indx) % qp->cap.max_swr;
656
657 if (!(swrid->sig & IBV_SEND_SIGNALED)) {
658 *cnt = 0;
659 } else {
660 ibvwc->status = IBV_WC_SUCCESS;
661 ibvwc->wc_flags = 0;
662 ibvwc->qp_num = qp->qpid;
663 ibvwc->wr_id = swrid->wrid;
664 ibvwc->opcode = swrid->wc_opcd;
665 if (ibvwc->opcode == IBV_WC_RDMA_READ ||
666 ibvwc->opcode == IBV_WC_COMP_SWAP ||
667 ibvwc->opcode == IBV_WC_FETCH_ADD)
668 ibvwc->byte_len = swrid->bytes;
669 *cnt = 1;
670 }
671 bnxt_re_incr_head(sq, swrid->slots);
672 bnxt_re_jqq_mod_last(qp->jsqq, head);
673 if (qp->jsqq->last_idx != cindx)
674 pcqe = true;
675
676 return pcqe;
677 }
678
bnxt_re_poll_scqe(struct bnxt_re_qp * qp,struct ibv_wc * ibvwc,void * cqe,uint32_t flg_val,int * cnt)679 static uint8_t bnxt_re_poll_scqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc,
680 void *cqe, uint32_t flg_val, int *cnt)
681 {
682 uint8_t status, pcqe = false;
683
684 status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) &
685 BNXT_RE_BCQE_STATUS_MASK;
686 if (status == BNXT_RE_REQ_ST_OK)
687 pcqe = bnxt_re_poll_success_scqe(qp, ibvwc, cqe, cnt);
688 else
689 pcqe = bnxt_re_poll_err_scqe(qp, ibvwc, cqe, flg_val, cnt);
690
691 return pcqe;
692 }
693
bnxt_re_release_srqe(struct bnxt_re_srq * srq,int tag)694 static void bnxt_re_release_srqe(struct bnxt_re_srq *srq, int tag)
695 {
696 bnxt_re_dp_spin_lock(&srq->srqq->qlock);
697 srq->srwrid[srq->last_idx].next_idx = tag;
698 srq->last_idx = tag;
699 srq->srwrid[srq->last_idx].next_idx = -1;
700 bnxt_re_dp_spin_unlock(&srq->srqq->qlock);
701 }
702
bnxt_re_poll_err_rcqe(struct bnxt_re_qp * qp,struct ibv_wc * ibvwc,struct bnxt_re_bcqe * hdr,uint32_t flg_val,void * cqe)703 static int bnxt_re_poll_err_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc,
704 struct bnxt_re_bcqe *hdr,
705 uint32_t flg_val, void *cqe)
706 {
707 struct bnxt_re_wrid *swque;
708 struct bnxt_re_queue *rq;
709 struct bnxt_re_cq *rcq;
710 uint8_t status, cnt;
711 uint32_t head = 0;
712
713 rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq);
714
715 status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) &
716 BNXT_RE_BCQE_STATUS_MASK;
717 /* skip h/w flush errors */
718 if (status == BNXT_RE_RSP_ST_HW_FLUSH)
719 return 0;
720
721 if (!qp->srq) {
722 rq = qp->jrqq->hwque;
723 head = qp->jrqq->last_idx;
724 swque = &qp->jrqq->swque[head];
725 ibvwc->wr_id = swque->wrid;
726 cnt = swque->slots;
727 } else {
728 struct bnxt_re_srq *srq;
729 int tag;
730
731 srq = qp->srq;
732 rq = srq->srqq;
733 cnt = 1;
734 tag = le32toh(hdr->qphi_rwrid) & BNXT_RE_BCQE_RWRID_MASK;
735 ibvwc->wr_id = srq->srwrid[tag].wrid;
736 bnxt_re_release_srqe(srq, tag);
737 }
738
739 ibvwc->status = bnxt_re_res_to_ibv_status[status];
740 ibvwc->qp_num = qp->qpid;
741 ibvwc->opcode = IBV_WC_RECV;
742 ibvwc->byte_len = 0;
743 ibvwc->wc_flags = 0;
744 if (qp->qptyp == IBV_QPT_UD)
745 ibvwc->src_qp = 0;
746
747 if (!qp->srq)
748 bnxt_re_jqq_mod_last(qp->jrqq, head);
749 bnxt_re_incr_head(rq, cnt);
750
751 if (!qp->srq)
752 bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead);
753
754 bnxt_re_trace("%s: qp_num = 0x%x status = %d\n",
755 __func__, ibvwc->qp_num, ibvwc->status)
756 return 1;
757 }
758
bnxt_re_fill_ud_cqe(struct ibv_wc * ibvwc,struct bnxt_re_bcqe * hdr,void * cqe,uint8_t flags)759 static void bnxt_re_fill_ud_cqe(struct ibv_wc *ibvwc,
760 struct bnxt_re_bcqe *hdr, void *cqe,
761 uint8_t flags)
762 {
763 struct bnxt_re_ud_cqe *ucqe = cqe;
764 uint32_t qpid;
765
766 qpid = ((le32toh(hdr->qphi_rwrid) >> BNXT_RE_BCQE_SRCQP_SHIFT) &
767 BNXT_RE_BCQE_SRCQP_SHIFT) << 0x10; /* higher 8 bits of 24 */
768 qpid |= (le64toh(ucqe->qplo_mac) >> BNXT_RE_UD_CQE_SRCQPLO_SHIFT) &
769 BNXT_RE_UD_CQE_SRCQPLO_MASK; /*lower 16 of 24 */
770 ibvwc->src_qp = qpid;
771 ibvwc->wc_flags |= IBV_WC_GRH;
772 ibvwc->sl = (flags & BNXT_RE_UD_FLAGS_IP_VER_MASK) >>
773 BNXT_RE_UD_FLAGS_IP_VER_SFT;
774 /*IB-stack ABI in user do not ask for MAC to be reported. */
775 }
776
bnxt_re_poll_success_rcqe(struct bnxt_re_qp * qp,struct ibv_wc * ibvwc,struct bnxt_re_bcqe * hdr,uint32_t flg_val,void * cqe)777 static void bnxt_re_poll_success_rcqe(struct bnxt_re_qp *qp,
778 struct ibv_wc *ibvwc,
779 struct bnxt_re_bcqe *hdr,
780 uint32_t flg_val, void *cqe)
781 {
782 uint8_t flags, is_imm, is_rdma;
783 struct bnxt_re_rc_cqe *rcqe;
784 struct bnxt_re_wrid *swque;
785 struct bnxt_re_queue *rq;
786 uint32_t head = 0;
787 uint32_t rcqe_len;
788 uint8_t cnt;
789
790 rcqe = cqe;
791 if (!qp->srq) {
792 rq = qp->jrqq->hwque;
793 head = qp->jrqq->last_idx;
794 swque = &qp->jrqq->swque[head];
795 cnt = swque->slots;
796 ibvwc->wr_id = swque->wrid;
797 } else {
798 struct bnxt_re_srq *srq;
799 int tag;
800
801 srq = qp->srq;
802 rq = srq->srqq;
803 cnt = 1;
804 tag = le32toh(hdr->qphi_rwrid) & BNXT_RE_BCQE_RWRID_MASK;
805 ibvwc->wr_id = srq->srwrid[tag].wrid;
806 bnxt_re_release_srqe(srq, tag);
807 }
808
809 ibvwc->status = IBV_WC_SUCCESS;
810 ibvwc->qp_num = qp->qpid;
811 rcqe_len = le32toh(rcqe->length);
812 ibvwc->byte_len = (qp->qptyp == IBV_QPT_UD) ?
813 rcqe_len & BNXT_RE_UD_CQE_LEN_MASK : rcqe_len;
814 ibvwc->opcode = IBV_WC_RECV;
815
816 flags = (flg_val >> BNXT_RE_BCQE_FLAGS_SHIFT) &
817 BNXT_RE_BCQE_FLAGS_MASK;
818 is_imm = (flags & BNXT_RE_RC_FLAGS_IMM_MASK) >>
819 BNXT_RE_RC_FLAGS_IMM_SHIFT;
820 is_rdma = (flags & BNXT_RE_RC_FLAGS_RDMA_MASK) >>
821 BNXT_RE_RC_FLAGS_RDMA_SHIFT;
822 ibvwc->wc_flags = 0;
823 if (is_imm) {
824 ibvwc->wc_flags |= IBV_WC_WITH_IMM;
825 /* The HW is returning imm_data in little-endian format,
826 * swap to Big Endian as expected by application
827 */
828 ibvwc->imm_data = htobe32(le32toh(rcqe->imm_key));
829 if (is_rdma)
830 ibvwc->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
831 }
832
833 if (qp->qptyp == IBV_QPT_UD) {
834 bnxt_re_fill_ud_cqe(ibvwc, hdr, cqe, flags);
835 }
836
837 if (!qp->srq)
838 bnxt_re_jqq_mod_last(qp->jrqq, head);
839 bnxt_re_incr_head(rq, cnt);
840 }
841
bnxt_re_poll_rcqe(struct bnxt_re_qp * qp,struct ibv_wc * ibvwc,void * cqe,uint32_t flg_val,int * cnt)842 static uint8_t bnxt_re_poll_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc,
843 void *cqe, uint32_t flg_val, int *cnt)
844 {
845 struct bnxt_re_bcqe *hdr;
846 uint8_t status, pcqe = false;
847
848 hdr = cqe + sizeof(struct bnxt_re_rc_cqe);
849
850 status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) &
851 BNXT_RE_BCQE_STATUS_MASK;
852 *cnt = 1;
853 if (status == BNXT_RE_RSP_ST_OK)
854 bnxt_re_poll_success_rcqe(qp, ibvwc, hdr, flg_val, cqe);
855 else
856 *cnt = bnxt_re_poll_err_rcqe(qp, ibvwc, hdr, flg_val, cqe);
857
858 return pcqe;
859 }
860
bnxt_re_qp_move_flush_err(struct bnxt_re_qp * qp)861 static void bnxt_re_qp_move_flush_err(struct bnxt_re_qp *qp)
862 {
863 struct bnxt_re_cq *scq, *rcq;
864
865 scq = to_bnxt_re_cq(qp->ibvqp.send_cq);
866 rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq);
867
868 if (qp->qpst != IBV_QPS_ERR)
869 qp->qpst = IBV_QPS_ERR;
870 bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead);
871 bnxt_re_list_add_node(&qp->snode, &scq->sfhead);
872 }
873
874 /* Always return false */
bnxt_re_poll_term_cqe(struct bnxt_re_qp * qp,int * cnt)875 static uint8_t bnxt_re_poll_term_cqe(struct bnxt_re_qp *qp, int *cnt)
876 {
877 /* For now just add the QP to flush list without
878 * considering the index reported in the CQE.
879 * Continue reporting flush completions until the
880 * SQ and RQ are empty.
881 */
882 *cnt = 0;
883 if (qp->qpst != IBV_QPS_RESET)
884 bnxt_re_qp_move_flush_err(qp);
885
886 return false;
887 }
888
bnxt_re_poll_one(struct bnxt_re_cq * cq,int nwc,struct ibv_wc * wc,uint32_t * resize)889 static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc,
890 uint32_t *resize)
891 {
892 int type, cnt = 0, dqed = 0, hw_polled = 0;
893 struct bnxt_re_queue *cqq = cq->cqq;
894 struct bnxt_re_req_cqe *scqe;
895 struct bnxt_re_ud_cqe *rcqe;
896 uint64_t *qp_handle = NULL;
897 struct bnxt_re_bcqe *hdr;
898 struct bnxt_re_qp *qp;
899 uint8_t pcqe = false;
900 uint32_t flg_val;
901 void *cqe;
902
903 while (nwc) {
904 cqe = cqq->va + cqq->head * bnxt_re_get_cqe_sz();
905 hdr = cqe + sizeof(struct bnxt_re_req_cqe);
906 flg_val = le32toh(hdr->flg_st_typ_ph);
907 if (unlikely(!bnxt_re_is_cqe_valid(flg_val, cq->phase)))
908 break;
909 type = (flg_val >> BNXT_RE_BCQE_TYPE_SHIFT) &
910 BNXT_RE_BCQE_TYPE_MASK;
911 switch (type) {
912 case BNXT_RE_WC_TYPE_SEND:
913 scqe = cqe;
914 qp_handle = (uint64_t *)&scqe->qp_handle;
915 qp = (struct bnxt_re_qp *)
916 (uintptr_t)le64toh(scqe->qp_handle);
917 if (!qp)
918 break; /*stale cqe. should be rung.*/
919 pcqe = bnxt_re_poll_scqe(qp, wc, cqe, flg_val, &cnt);
920 break;
921 case BNXT_RE_WC_TYPE_RECV_RC:
922 case BNXT_RE_WC_TYPE_RECV_UD:
923 rcqe = cqe;
924 qp_handle = (uint64_t *)&rcqe->qp_handle;
925 qp = (struct bnxt_re_qp *)
926 (uintptr_t)le64toh(rcqe->qp_handle);
927 if (!qp)
928 break; /*stale cqe. should be rung.*/
929 pcqe = bnxt_re_poll_rcqe(qp, wc, cqe, flg_val, &cnt);
930 break;
931 case BNXT_RE_WC_TYPE_RECV_RAW:
932 break;
933 case BNXT_RE_WC_TYPE_TERM:
934 scqe = cqe;
935 qp_handle = (uint64_t *)&scqe->qp_handle;
936 qp = (struct bnxt_re_qp *)
937 (uintptr_t)le64toh(scqe->qp_handle);
938 if (!qp)
939 break;
940 pcqe = bnxt_re_poll_term_cqe(qp, &cnt);
941 break;
942 case BNXT_RE_WC_TYPE_COFF:
943 /* Stop further processing and return */
944 bnxt_re_resize_cq_complete(cq);
945 if (unlikely(resize))
946 *resize = 1;
947 return dqed;
948 default:
949 break;
950 };
951
952 if (pcqe)
953 goto skipp_real;
954
955 hw_polled++;
956 if (qp_handle) {
957 *qp_handle = 0x0ULL; /* mark cqe as read */
958 qp_handle = NULL;
959 }
960 bnxt_re_incr_head(cq->cqq, 1);
961 bnxt_re_change_cq_phase(cq);
962 skipp_real:
963 if (cnt) {
964 cnt = 0;
965 dqed++;
966 nwc--;
967 wc++;
968 }
969 }
970
971 if (likely(hw_polled))
972 bnxt_re_ring_cq_db(cq);
973
974 return dqed;
975 }
976
bnxt_re_poll_flush_wcs(struct bnxt_re_joint_queue * jqq,struct ibv_wc * ibvwc,uint32_t qpid,int nwc)977 static int bnxt_re_poll_flush_wcs(struct bnxt_re_joint_queue *jqq,
978 struct ibv_wc *ibvwc, uint32_t qpid,
979 int nwc)
980 {
981 struct bnxt_re_queue *que;
982 struct bnxt_re_wrid *wrid;
983 uint32_t cnt = 0;
984
985 que = jqq->hwque;
986 while(nwc) {
987 if (bnxt_re_is_que_empty(que))
988 break;
989 wrid = &jqq->swque[jqq->last_idx];
990 ibvwc->status = IBV_WC_WR_FLUSH_ERR;
991 ibvwc->opcode = wrid->wc_opcd;
992 ibvwc->wr_id = wrid->wrid;
993 ibvwc->qp_num = qpid;
994 ibvwc->byte_len = 0;
995 ibvwc->wc_flags = 0;
996
997 bnxt_re_jqq_mod_last(jqq, jqq->last_idx);
998 bnxt_re_incr_head(que, wrid->slots);
999 nwc--;
1000 cnt++;
1001 ibvwc++;
1002 }
1003
1004 return cnt;
1005 }
1006
bnxt_re_poll_flush_wqes(struct bnxt_re_cq * cq,struct bnxt_re_list_head * lhead,struct ibv_wc * ibvwc,uint32_t nwc)1007 static int bnxt_re_poll_flush_wqes(struct bnxt_re_cq *cq,
1008 struct bnxt_re_list_head *lhead,
1009 struct ibv_wc *ibvwc,
1010 uint32_t nwc)
1011 {
1012 struct bnxt_re_list_node *cur, *tmp;
1013 struct bnxt_re_joint_queue *jqq;
1014 struct bnxt_re_qp *qp;
1015 bool sq_list = false;
1016 uint32_t polled = 0;
1017
1018 sq_list = (lhead == &cq->sfhead) ? true : false;
1019 if (!bnxt_re_list_empty(lhead)) {
1020 list_for_each_node_safe(cur, tmp, lhead) {
1021 if (sq_list) {
1022 qp = list_node(cur, struct bnxt_re_qp, snode);
1023 jqq = qp->jsqq;
1024 } else {
1025 qp = list_node(cur, struct bnxt_re_qp, rnode);
1026 jqq = qp->jrqq;
1027 if (!jqq) /* Using srq no need to flush */
1028 goto done;
1029 }
1030
1031 if (bnxt_re_is_que_empty(jqq->hwque))
1032 continue;
1033 polled += bnxt_re_poll_flush_wcs(jqq, ibvwc + polled,
1034 qp->qpid, nwc - polled);
1035 if (!(nwc - polled))
1036 break;
1037 }
1038 }
1039 done:
1040 return polled;
1041 }
1042
bnxt_re_poll_flush_lists(struct bnxt_re_cq * cq,uint32_t nwc,struct ibv_wc * ibvwc)1043 static int bnxt_re_poll_flush_lists(struct bnxt_re_cq *cq, uint32_t nwc,
1044 struct ibv_wc *ibvwc)
1045 {
1046 int left, polled = 0;
1047
1048 polled = bnxt_re_poll_flush_wqes(cq, &cq->sfhead, ibvwc, nwc);
1049 left = nwc - polled;
1050
1051 if (!left)
1052 return polled;
1053
1054 polled += bnxt_re_poll_flush_wqes(cq, &cq->rfhead,
1055 ibvwc + polled, left);
1056 return polled;
1057 }
1058
bnxt_re_poll_resize_cq_list(struct bnxt_re_cq * cq,uint32_t nwc,struct ibv_wc * ibvwc)1059 static int bnxt_re_poll_resize_cq_list(struct bnxt_re_cq *cq, uint32_t nwc,
1060 struct ibv_wc *ibvwc)
1061 {
1062 struct bnxt_re_list_node *cur, *tmp;
1063 struct bnxt_re_work_compl *compl;
1064 int left;
1065
1066 left = nwc;
1067 list_for_each_node_safe(cur, tmp, &cq->prev_cq_head) {
1068 compl = list_node(cur, struct bnxt_re_work_compl, cnode);
1069 if (!left)
1070 break;
1071 memcpy(ibvwc, &compl->wc, sizeof(*ibvwc));
1072 ibvwc++;
1073 left--;
1074 bnxt_re_list_del_node(&compl->cnode, &cq->prev_cq_head);
1075 free(compl);
1076 }
1077
1078 return nwc - left;
1079 }
1080
1081
bnxt_re_poll_cq(struct ibv_cq * ibvcq,int nwc,struct ibv_wc * wc)1082 int bnxt_re_poll_cq(struct ibv_cq *ibvcq, int nwc, struct ibv_wc *wc)
1083 {
1084 int dqed = 0, left = 0;
1085 struct bnxt_re_cq *cq;
1086 uint32_t resize = 0;
1087
1088 cq = container_of(ibvcq, struct bnxt_re_cq, ibvcq);
1089 bnxt_re_dp_spin_lock(&cq->cqq->qlock);
1090
1091 left = nwc;
1092 /* Check whether we have anything to be completed from prev cq context */
1093 if (unlikely(!bnxt_re_list_empty(&cq->prev_cq_head))) {
1094 dqed = bnxt_re_poll_resize_cq_list(cq, nwc, wc);
1095 left = nwc - dqed;
1096 if (!left) {
1097 bnxt_re_dp_spin_unlock(&cq->cqq->qlock);
1098 return dqed;
1099 }
1100 }
1101
1102 dqed += bnxt_re_poll_one(cq, left, wc + dqed, &resize);
1103 /* Check if anything is there to flush. */
1104 left = nwc - dqed;
1105 if (left && (!bnxt_re_list_empty(&cq->sfhead) ||
1106 !bnxt_re_list_empty(&cq->rfhead)))
1107 dqed += bnxt_re_poll_flush_lists(cq, left, (wc + dqed));
1108 bnxt_re_dp_spin_unlock(&cq->cqq->qlock);
1109
1110 return dqed;
1111 }
1112
bnxt_re_cleanup_cq(struct bnxt_re_qp * qp,struct bnxt_re_cq * cq)1113 void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq)
1114 {
1115 struct bnxt_re_queue *que = cq->cqq;
1116 struct bnxt_re_req_cqe *scqe;
1117 struct bnxt_re_rc_cqe *rcqe;
1118 struct bnxt_re_bcqe *hdr;
1119 int indx, type;
1120 void *cqe;
1121
1122
1123 bnxt_re_dp_spin_lock(&que->qlock);
1124 for(indx = 0; indx < que->depth; indx++) {
1125 cqe = que->va + indx * bnxt_re_get_cqe_sz();
1126 hdr = cqe + sizeof(struct bnxt_re_req_cqe);
1127 type = (hdr->flg_st_typ_ph >> BNXT_RE_BCQE_TYPE_SHIFT) &
1128 BNXT_RE_BCQE_TYPE_MASK;
1129
1130 if (type == BNXT_RE_WC_TYPE_COFF)
1131 continue;
1132 if (type == BNXT_RE_WC_TYPE_SEND ||
1133 type == BNXT_RE_WC_TYPE_TERM) {
1134 scqe = cqe;
1135 if (scqe->qp_handle == (uint64_t)qp)
1136 scqe->qp_handle = 0ULL;
1137 } else {
1138 rcqe = cqe;
1139 if (rcqe->qp_handle == (uint64_t)qp)
1140 rcqe->qp_handle = 0ULL;
1141 }
1142
1143 }
1144
1145 if (_is_db_drop_recovery_enable(cq->cntx)) {
1146 pthread_spin_lock(&cq->cntx->cq_dbr_res.lock);
1147 bnxt_re_list_del_node(&cq->dbnode, &cq->cntx->cq_dbr_res.head);
1148 pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock);
1149 }
1150 bnxt_re_list_del_node(&qp->snode, &cq->sfhead);
1151 bnxt_re_list_del_node(&qp->rnode, &cq->rfhead);
1152 bnxt_re_dp_spin_unlock(&que->qlock);
1153 }
1154
bnxt_re_cq_event(struct ibv_cq * ibvcq)1155 void bnxt_re_cq_event(struct ibv_cq *ibvcq)
1156 {
1157
1158 }
1159
bnxt_re_arm_cq(struct ibv_cq * ibvcq,int flags)1160 int bnxt_re_arm_cq(struct ibv_cq *ibvcq, int flags)
1161 {
1162 struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq);
1163
1164 bnxt_re_dp_spin_lock(&cq->cqq->qlock);
1165 flags = !flags ? BNXT_RE_QUE_TYPE_CQ_ARMALL :
1166 BNXT_RE_QUE_TYPE_CQ_ARMSE;
1167
1168 bnxt_re_ring_cq_arm_db(cq, flags);
1169 bnxt_re_dp_spin_unlock(&cq->cqq->qlock);
1170
1171 return 0;
1172 }
1173
bnxt_re_check_qp_limits(struct bnxt_re_context * cntx,struct ibv_qp_init_attr * attr)1174 static int bnxt_re_check_qp_limits(struct bnxt_re_context *cntx,
1175 struct ibv_qp_init_attr *attr)
1176 {
1177 struct ibv_device_attr *devattr;
1178 struct bnxt_re_dev *rdev;
1179
1180 rdev = cntx->rdev;
1181 devattr = &rdev->devattr;
1182 if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD)
1183 return EINVAL;
1184 if (attr->cap.max_send_sge > devattr->max_sge)
1185 return EINVAL;
1186 if (attr->cap.max_recv_sge > devattr->max_sge)
1187 return EINVAL;
1188 if (cntx->modes & BNXT_RE_WQE_MODE_VARIABLE) {
1189 if (attr->cap.max_inline_data > BNXT_RE_MAX_INLINE_SIZE_VAR_WQE)
1190 return -EINVAL;
1191 } else if (attr->cap.max_inline_data > BNXT_RE_MAX_INLINE_SIZE) {
1192 return EINVAL;
1193 }
1194 if (attr->cap.max_send_wr > devattr->max_qp_wr)
1195 attr->cap.max_send_wr = devattr->max_qp_wr;
1196 if (attr->cap.max_recv_wr > devattr->max_qp_wr)
1197 attr->cap.max_recv_wr = devattr->max_qp_wr;
1198
1199 return 0;
1200 }
1201
bnxt_re_get_rq_slots(struct bnxt_re_dev * rdev,uint8_t qpmode,uint32_t nrwr,uint32_t nsge,uint32_t * esz)1202 static int bnxt_re_get_rq_slots(struct bnxt_re_dev *rdev, uint8_t qpmode,
1203 uint32_t nrwr, uint32_t nsge, uint32_t *esz)
1204 {
1205 uint32_t max_wqesz;
1206 uint32_t wqe_size;
1207 uint32_t stride;
1208 uint32_t slots;
1209
1210 stride = sizeof(struct bnxt_re_sge);
1211 max_wqesz = bnxt_re_calc_wqe_sz(rdev->devattr.max_sge);
1212
1213 wqe_size = bnxt_re_calc_wqe_sz(nsge);
1214 if (wqe_size > max_wqesz)
1215 return -EINVAL;
1216
1217 if (qpmode == BNXT_RE_WQE_MODE_STATIC)
1218 wqe_size = bnxt_re_calc_wqe_sz(6);
1219
1220 if (esz)
1221 *esz = wqe_size;
1222
1223 slots = (nrwr * wqe_size) / stride;
1224 return slots;
1225 }
1226
1227 #define BNXT_VAR_MAX_SLOT_ALIGN 256
1228
bnxt_re_get_sq_slots(struct bnxt_re_dev * rdev,uint8_t qpmode,uint32_t nswr,uint32_t nsge,uint32_t ils,uint32_t * esize)1229 static int bnxt_re_get_sq_slots(struct bnxt_re_dev *rdev,
1230 uint8_t qpmode, uint32_t nswr,
1231 uint32_t nsge, uint32_t ils, uint32_t *esize)
1232 {
1233 uint32_t max_wqesz;
1234 uint32_t wqe_size;
1235 uint32_t cal_ils;
1236 uint32_t stride;
1237 uint32_t ilsize;
1238 uint32_t hdr_sz;
1239 uint32_t slots;
1240 uint32_t align;
1241
1242 hdr_sz = bnxt_re_get_sqe_hdr_sz();
1243 stride = sizeof(struct bnxt_re_sge);
1244 align = hdr_sz;
1245 if (qpmode == BNXT_RE_WQE_MODE_VARIABLE)
1246 align = stride;
1247 max_wqesz = bnxt_re_calc_wqe_sz(rdev->devattr.max_sge);
1248 ilsize = get_aligned(ils, align);
1249
1250 wqe_size = bnxt_re_calc_wqe_sz(nsge);
1251 if (ilsize) {
1252 cal_ils = hdr_sz + ilsize;
1253 wqe_size = MAX(cal_ils, wqe_size);
1254 wqe_size = get_aligned(wqe_size, align);
1255 }
1256 if (wqe_size > max_wqesz)
1257 return -EINVAL;
1258
1259 if (qpmode == BNXT_RE_WQE_MODE_STATIC)
1260 wqe_size = bnxt_re_calc_wqe_sz(6);
1261
1262 if (esize)
1263 *esize = wqe_size;
1264 slots = (nswr * wqe_size) / stride;
1265 if (qpmode == BNXT_RE_WQE_MODE_VARIABLE)
1266 slots = get_aligned(slots, BNXT_VAR_MAX_SLOT_ALIGN);
1267 return slots;
1268 }
1269
bnxt_re_get_sqmem_size(struct bnxt_re_context * cntx,struct ibv_qp_init_attr * attr,struct bnxt_re_qattr * qattr)1270 static int bnxt_re_get_sqmem_size(struct bnxt_re_context *cntx,
1271 struct ibv_qp_init_attr *attr,
1272 struct bnxt_re_qattr *qattr)
1273 {
1274 uint32_t nsge, nswr, diff = 0;
1275 size_t bytes = 0;
1276 uint32_t npsn;
1277 uint32_t ils;
1278 uint8_t mode;
1279 uint32_t esz;
1280 int nslots;
1281
1282 mode = cntx->modes & BNXT_RE_WQE_MODE_VARIABLE;
1283 nsge = attr->cap.max_send_sge;
1284 diff = bnxt_re_get_diff(cntx->comp_mask);
1285 nswr = attr->cap.max_send_wr + 1 + diff;
1286 nswr = bnxt_re_init_depth(nswr, cntx->comp_mask);
1287 ils = attr->cap.max_inline_data;
1288 nslots = bnxt_re_get_sq_slots(cntx->rdev, mode, nswr,
1289 nsge, ils, &esz);
1290 if (nslots < 0)
1291 return nslots;
1292 npsn = bnxt_re_get_npsn(mode, nswr, nslots);
1293 if (BNXT_RE_HW_RETX(cntx))
1294 npsn = roundup_pow_of_two(npsn);
1295
1296 qattr->nwr = nswr;
1297 qattr->slots = nslots;
1298 qattr->esize = esz;
1299 if (mode)
1300 qattr->sw_nwr = nslots;
1301 else
1302 qattr->sw_nwr = nswr;
1303
1304 bytes = nslots * sizeof(struct bnxt_re_sge); /* ring */
1305 bytes += npsn * bnxt_re_get_psne_size(cntx); /* psn */
1306 qattr->sz_ring = get_aligned(bytes, cntx->rdev->pg_size);
1307 qattr->sz_shad = qattr->sw_nwr * sizeof(struct bnxt_re_wrid); /* shadow */
1308 return 0;
1309 }
1310
bnxt_re_get_rqmem_size(struct bnxt_re_context * cntx,struct ibv_qp_init_attr * attr,struct bnxt_re_qattr * qattr)1311 static int bnxt_re_get_rqmem_size(struct bnxt_re_context *cntx,
1312 struct ibv_qp_init_attr *attr,
1313 struct bnxt_re_qattr *qattr)
1314 {
1315 uint32_t nrwr, nsge;
1316 size_t bytes = 0;
1317 uint32_t esz;
1318 int nslots;
1319
1320 nsge = attr->cap.max_recv_sge;
1321 nrwr = attr->cap.max_recv_wr + 1;
1322 nrwr = bnxt_re_init_depth(nrwr, cntx->comp_mask);
1323 nslots = bnxt_re_get_rq_slots(cntx->rdev, cntx->modes,
1324 nrwr, nsge, &esz);
1325 if (nslots < 0)
1326 return nslots;
1327 qattr->nwr = nrwr;
1328 qattr->slots = nslots;
1329 qattr->esize = esz;
1330 qattr->sw_nwr = nrwr;
1331
1332 bytes = nslots * sizeof(struct bnxt_re_sge);
1333 qattr->sz_ring = get_aligned(bytes, cntx->rdev->pg_size);
1334 qattr->sz_shad = nrwr * sizeof(struct bnxt_re_wrid);
1335 return 0;
1336 }
1337
bnxt_re_get_qpmem_size(struct bnxt_re_context * cntx,struct ibv_qp_init_attr * attr,struct bnxt_re_qattr * qattr)1338 static int bnxt_re_get_qpmem_size(struct bnxt_re_context *cntx,
1339 struct ibv_qp_init_attr *attr,
1340 struct bnxt_re_qattr *qattr)
1341 {
1342 int size = 0;
1343 int tmp;
1344 int rc;
1345
1346 size = sizeof(struct bnxt_re_qp);
1347 tmp = sizeof(struct bnxt_re_joint_queue);
1348 tmp += sizeof(struct bnxt_re_queue);
1349 size += tmp;
1350
1351 rc = bnxt_re_get_sqmem_size(cntx, attr, &qattr[BNXT_RE_QATTR_SQ_INDX]);
1352 if (rc < 0)
1353 return -EINVAL;
1354 size += qattr[BNXT_RE_QATTR_SQ_INDX].sz_ring;
1355 size += qattr[BNXT_RE_QATTR_SQ_INDX].sz_shad;
1356
1357 if (!attr->srq) {
1358 tmp = sizeof(struct bnxt_re_joint_queue);
1359 tmp += sizeof(struct bnxt_re_queue);
1360 size += tmp;
1361 rc = bnxt_re_get_rqmem_size(cntx, attr,
1362 &qattr[BNXT_RE_QATTR_RQ_INDX]);
1363 if (rc < 0)
1364 return -EINVAL;
1365 size += qattr[BNXT_RE_QATTR_RQ_INDX].sz_ring;
1366 size += qattr[BNXT_RE_QATTR_RQ_INDX].sz_shad;
1367 }
1368 return size;
1369 }
1370
bnxt_re_alloc_qpslab(struct bnxt_re_context * cntx,struct ibv_qp_init_attr * attr,struct bnxt_re_qattr * qattr)1371 static void *bnxt_re_alloc_qpslab(struct bnxt_re_context *cntx,
1372 struct ibv_qp_init_attr *attr,
1373 struct bnxt_re_qattr *qattr)
1374 {
1375 int bytes;
1376
1377 bytes = bnxt_re_get_qpmem_size(cntx, attr, qattr);
1378 if (bytes < 0)
1379 return NULL;
1380 return bnxt_re_alloc_mem(bytes, cntx->rdev->pg_size);
1381 }
1382
bnxt_re_alloc_queue_ptr(struct bnxt_re_qp * qp,struct ibv_qp_init_attr * attr)1383 static int bnxt_re_alloc_queue_ptr(struct bnxt_re_qp *qp,
1384 struct ibv_qp_init_attr *attr)
1385 {
1386 int rc = -ENOMEM;
1387 int jqsz, qsz;
1388
1389 jqsz = sizeof(struct bnxt_re_joint_queue);
1390 qsz = sizeof(struct bnxt_re_queue);
1391 qp->jsqq = bnxt_re_get_obj(qp->mem, jqsz);
1392 if (!qp->jsqq)
1393 return rc;
1394 qp->jsqq->hwque = bnxt_re_get_obj(qp->mem, qsz);
1395 if (!qp->jsqq->hwque)
1396 goto fail;
1397
1398 if (!attr->srq) {
1399 qp->jrqq = bnxt_re_get_obj(qp->mem, jqsz);
1400 if (!qp->jrqq)
1401 goto fail;
1402 qp->jrqq->hwque = bnxt_re_get_obj(qp->mem, qsz);
1403 if (!qp->jrqq->hwque)
1404 goto fail;
1405 }
1406
1407 return 0;
1408 fail:
1409 return rc;
1410 }
1411
bnxt_re_alloc_init_swque(struct bnxt_re_joint_queue * jqq,struct bnxt_re_mem * mem,struct bnxt_re_qattr * qattr)1412 static int bnxt_re_alloc_init_swque(struct bnxt_re_joint_queue *jqq,
1413 struct bnxt_re_mem *mem,
1414 struct bnxt_re_qattr *qattr)
1415 {
1416 int indx;
1417
1418 jqq->swque = bnxt_re_get_obj(mem, qattr->sz_shad);
1419 if (!jqq->swque)
1420 return -ENOMEM;
1421 jqq->start_idx = 0;
1422 jqq->last_idx = qattr->sw_nwr - 1;
1423 for (indx = 0; indx < qattr->sw_nwr; indx++)
1424 jqq->swque[indx].next_idx = indx + 1;
1425 jqq->swque[jqq->last_idx].next_idx = 0;
1426 jqq->last_idx = 0;
1427
1428 return 0;
1429 }
1430
bnxt_log2(int n)1431 static inline int bnxt_log2(int n)
1432 {
1433 int t;
1434
1435 if (n <= 0)
1436 return -1;
1437
1438 t = 0;
1439 while ((1 << t) < n)
1440 ++t;
1441
1442 return t;
1443 }
1444
bnxt_re_alloc_queues(struct bnxt_re_qp * qp,struct ibv_qp_init_attr * attr,struct bnxt_re_qattr * qattr)1445 static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp,
1446 struct ibv_qp_init_attr *attr,
1447 struct bnxt_re_qattr *qattr)
1448 {
1449 struct bnxt_re_context *cntx;
1450 struct bnxt_re_queue *que;
1451 uint32_t psn_size;
1452 uint8_t indx;
1453 int ret;
1454
1455 cntx = qp->cntx;
1456
1457 indx = BNXT_RE_QATTR_SQ_INDX;
1458 que = qp->jsqq->hwque;
1459 que->stride = sizeof(struct bnxt_re_sge);
1460 que->depth = qattr[indx].slots;
1461 que->diff = (bnxt_re_get_diff(cntx->comp_mask) * qattr[indx].esize) /
1462 que->stride;
1463 que->va = bnxt_re_get_ring(qp->mem, qattr[indx].sz_ring);
1464 if (!que->va)
1465 return -ENOMEM;
1466 /* PSN-search memory is allocated without checking for
1467 * QP-Type. Kernel driver do not map this memory if it
1468 * is UD-qp. UD-qp use this memory to maintain WC-opcode.
1469 * See definition of bnxt_re_fill_psns() for the use case.
1470 */
1471 que->pad = (que->va + que->depth * que->stride);
1472 psn_size = bnxt_re_get_psne_size(qp->cntx);
1473 que->pad_stride_log2 = (uint32_t)bnxt_log2((double)(psn_size - 1));
1474
1475 ret = bnxt_re_alloc_init_swque(qp->jsqq, qp->mem, &qattr[indx]);
1476 if (ret)
1477 goto fail;
1478
1479 qp->cap.max_swr = qattr[indx].sw_nwr;
1480 qp->jsqq->cntx = qp->cntx;
1481 que->dbtail = (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE) ?
1482 &que->tail : &qp->jsqq->start_idx;
1483
1484 /* Init and adjust MSN table size according to qp mode */
1485 if (!BNXT_RE_HW_RETX(qp->cntx))
1486 goto skip_msn;
1487 que->msn = 0;
1488 que->msn_tbl_sz = 0;
1489 if (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE)
1490 que->msn_tbl_sz = roundup_pow_of_two(qattr->slots) / 2;
1491 else
1492 que->msn_tbl_sz = roundup_pow_of_two(qattr->nwr);
1493 skip_msn:
1494 bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded);
1495
1496 if (qp->jrqq) {
1497 indx = BNXT_RE_QATTR_RQ_INDX;
1498 que = qp->jrqq->hwque;
1499 que->stride = sizeof(struct bnxt_re_sge);
1500 que->depth = qattr[indx].slots;
1501 que->max_slots = qattr[indx].esize / que->stride;
1502 que->dbtail = &qp->jrqq->start_idx;
1503 que->va = bnxt_re_get_ring(qp->mem, qattr[indx].sz_ring);
1504 if (!que->va)
1505 return -ENOMEM;
1506 /* For RQ only bnxt_re_wri.wrid is used. */
1507 ret = bnxt_re_alloc_init_swque(qp->jrqq, qp->mem, &qattr[indx]);
1508 if (ret)
1509 goto fail;
1510
1511 bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded);
1512 qp->cap.max_rwr = qattr[indx].nwr;
1513 qp->jrqq->cntx = qp->cntx;
1514 }
1515
1516 return 0;
1517 fail:
1518 return ret;
1519 }
1520
bnxt_re_async_event(struct ibv_async_event * event)1521 void bnxt_re_async_event(struct ibv_async_event *event)
1522 {
1523 struct ibv_qp *ibvqp;
1524 struct bnxt_re_qp *qp;
1525
1526 switch (event->event_type) {
1527 case IBV_EVENT_CQ_ERR:
1528 break;
1529 case IBV_EVENT_SRQ_ERR:
1530 case IBV_EVENT_QP_FATAL:
1531 case IBV_EVENT_QP_REQ_ERR:
1532 case IBV_EVENT_QP_ACCESS_ERR:
1533 case IBV_EVENT_PATH_MIG_ERR: {
1534 ibvqp = event->element.qp;
1535 qp = to_bnxt_re_qp(ibvqp);
1536 bnxt_re_qp_move_flush_err(qp);
1537 break;
1538 }
1539 case IBV_EVENT_SQ_DRAINED:
1540 case IBV_EVENT_PATH_MIG:
1541 case IBV_EVENT_COMM_EST:
1542 case IBV_EVENT_QP_LAST_WQE_REACHED:
1543 case IBV_EVENT_SRQ_LIMIT_REACHED:
1544 case IBV_EVENT_PORT_ACTIVE:
1545 case IBV_EVENT_PORT_ERR:
1546 default:
1547 break;
1548 }
1549 }
1550
bnxt_re_create_qp(struct ibv_pd * ibvpd,struct ibv_qp_init_attr * attr)1551 struct ibv_qp *bnxt_re_create_qp(struct ibv_pd *ibvpd,
1552 struct ibv_qp_init_attr *attr)
1553 {
1554 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvpd->context);
1555 struct bnxt_re_qp_resp resp = {};
1556 struct ibv_device_attr *devattr;
1557 struct bnxt_re_qp_req req = {};
1558 struct bnxt_re_qattr qattr[2];
1559 struct bnxt_re_qpcap *cap;
1560 struct bnxt_re_dev *rdev;
1561 struct bnxt_re_qp *qp;
1562 void *mem;
1563
1564 if (bnxt_re_check_qp_limits(cntx, attr))
1565 return NULL;
1566
1567 memset(qattr, 0, (2 * sizeof(*qattr)));
1568 mem = bnxt_re_alloc_qpslab(cntx, attr, qattr);
1569 if (!mem)
1570 return NULL;
1571 qp = bnxt_re_get_obj(mem, sizeof(*qp));
1572 if (!qp)
1573 goto fail;
1574 qp->mem = mem;
1575
1576 qp->cctx = cntx->cctx;
1577
1578 qp->cntx = cntx;
1579 qp->qpmode = cntx->modes & BNXT_RE_WQE_MODE_VARIABLE;
1580 /* alloc queue pointers */
1581 if (bnxt_re_alloc_queue_ptr(qp, attr))
1582 goto fail;
1583 /* alloc queues */
1584 if (bnxt_re_alloc_queues(qp, attr, qattr))
1585 goto fail;
1586 /* Fill ibv_cmd */
1587 cap = &qp->cap;
1588 req.qpsva = (uint64_t)qp->jsqq->hwque->va;
1589 req.qprva = qp->jrqq ? (uint64_t)qp->jrqq->hwque->va : 0;
1590 req.qp_handle = (uint64_t)qp;
1591 if (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE)
1592 req.sq_slots = qattr[BNXT_RE_QATTR_SQ_INDX].slots;
1593
1594 if (ibv_cmd_create_qp(ibvpd, &qp->ibvqp, attr, &req.cmd, sizeof(req),
1595 &resp.resp, sizeof(resp)))
1596 goto fail;
1597
1598 qp->qpid = resp.qpid;
1599 qp->qptyp = attr->qp_type;
1600 qp->qpst = IBV_QPS_RESET;
1601 qp->scq = to_bnxt_re_cq(attr->send_cq);
1602 qp->rcq = to_bnxt_re_cq(attr->recv_cq);
1603 if (attr->srq)
1604 qp->srq = to_bnxt_re_srq(attr->srq);
1605 qp->udpi = &cntx->udpi;
1606 qp->rand.seed = qp->qpid;
1607 qp->sq_shadow_db_key = BNXT_RE_DB_KEY_INVALID;
1608 qp->rq_shadow_db_key = BNXT_RE_DB_KEY_INVALID;
1609 qp->sq_msn = 0;
1610
1611 rdev = cntx->rdev;
1612 devattr = &rdev->devattr;
1613 cap->max_ssge = attr->cap.max_send_sge;
1614 cap->max_rsge = attr->cap.max_recv_sge;
1615 cap->max_inline = attr->cap.max_inline_data;
1616 cap->sqsig = attr->sq_sig_all;
1617 cap->is_atomic_cap = devattr->atomic_cap;
1618 INIT_DBLY_LIST_NODE(&qp->snode);
1619 INIT_DBLY_LIST_NODE(&qp->rnode);
1620 INIT_DBLY_LIST_NODE(&qp->dbnode);
1621
1622 /* For SR2, push will be negotiated at modify qp */
1623 if (_is_chip_gen_p5(qp->cctx) && cntx->udpi.wcdpi) {
1624 qp->push_st_en = 1;
1625 qp->max_push_sz = BNXT_RE_MAX_INLINE_SIZE;
1626 }
1627
1628 if (_is_db_drop_recovery_enable(cntx)) {
1629 pthread_spin_lock(&cntx->qp_dbr_res.lock);
1630 bnxt_re_list_add_node(&qp->dbnode, &cntx->qp_dbr_res.head);
1631 pthread_spin_unlock(&cntx->qp_dbr_res.lock);
1632 }
1633 return &qp->ibvqp;
1634 fail:
1635 bnxt_re_free_mem(mem);
1636 return NULL;
1637 }
1638
bnxt_re_modify_qp(struct ibv_qp * ibvqp,struct ibv_qp_attr * attr,int attr_mask)1639 int bnxt_re_modify_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr,
1640 int attr_mask)
1641 {
1642 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
1643 int rc;
1644
1645 struct bnxt_re_modify_ex_resp resp = {};
1646 struct bnxt_re_modify_ex_req req = {};
1647 bool can_issue_mqp_ex = false;
1648
1649 if (bnxt_re_is_mqp_ex_supported(qp->cntx)) {
1650 can_issue_mqp_ex = true;
1651 /* Request for PPP */
1652 if (can_request_ppp(qp, attr, attr_mask)) {
1653 req.comp_mask |= BNXT_RE_MQP_PPP_REQ_EN;
1654 req.dpi = qp->udpi->wcdpi;
1655 }
1656 if (attr_mask & IBV_QP_PATH_MTU)
1657 req.comp_mask |= BNXT_RE_MQP_PATH_MTU_MASK;
1658 }
1659 rc = ibv_cmd_modify_qp_compat(ibvqp, attr, attr_mask,
1660 can_issue_mqp_ex, &req, &resp);
1661 if (!rc) {
1662 if (attr_mask & IBV_QP_STATE) {
1663 qp->qpst = attr->qp_state;
1664 /* transition to reset */
1665 if (qp->qpst == IBV_QPS_RESET) {
1666 qp->jsqq->hwque->head = 0;
1667 qp->jsqq->hwque->tail = 0;
1668 *qp->jsqq->hwque->dbtail = 0;
1669 qp->jsqq->start_idx = 0;
1670 qp->jsqq->last_idx = 0;
1671 bnxt_re_cleanup_cq(qp, qp->scq);
1672 if (qp->jrqq) {
1673 qp->jrqq->hwque->head = 0;
1674 qp->jrqq->hwque->tail = 0;
1675 *qp->jrqq->hwque->dbtail = 0;
1676 qp->jrqq->start_idx = 0;
1677 qp->jrqq->last_idx = 0;
1678 bnxt_re_cleanup_cq(qp, qp->rcq);
1679 }
1680 }
1681 /* Copy if PUSH was enabled */
1682 if (resp.comp_mask & BNXT_RE_MQP_PPP_REQ_EN_MASK) {
1683 qp->push_st_en = BNXT_RE_MQP_PPP_REQ_EN;
1684 /* Set the next posting state
1685 * based on current h/w state
1686 */
1687 qp->push_st_en |=
1688 !(!!(resp.ppp_st_idx &
1689 BNXT_RE_MQP_PPP_STATE)) <<
1690 BNXT_RE_PPP_ST_SHIFT;
1691 qp->ppp_idx =
1692 (resp.ppp_st_idx &
1693 BNXT_RE_MQP_PPP_IDX_MASK);
1694 if (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE)
1695 qp->max_push_sz =
1696 BNXT_RE_MAX_PUSH_SIZE_VAR_WQE;
1697 else
1698 qp->max_push_sz =
1699 BNXT_RE_MAX_INLINE_SIZE;
1700 }
1701 }
1702
1703 if (attr_mask & IBV_QP_SQ_PSN)
1704 qp->sq_psn = attr->sq_psn;
1705
1706 if (resp.comp_mask & BNXT_RE_MQP_PATH_MTU_MASK)
1707 qp->mtu = resp.path_mtu;
1708 else if (attr_mask & IBV_QP_PATH_MTU)
1709 qp->mtu = (0x80 << attr->path_mtu);
1710 }
1711
1712 return rc;
1713 }
1714
bnxt_re_query_qp(struct ibv_qp * ibvqp,struct ibv_qp_attr * attr,int attr_mask,struct ibv_qp_init_attr * init_attr)1715 int bnxt_re_query_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr,
1716 int attr_mask, struct ibv_qp_init_attr *init_attr)
1717 {
1718 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
1719 struct ibv_query_qp cmd = {};
1720 int rc;
1721
1722 rc = ibv_cmd_query_qp(ibvqp, attr, attr_mask, init_attr,
1723 &cmd, sizeof(cmd));
1724 if (!rc)
1725 qp->qpst = ibvqp->state;
1726
1727 return rc;
1728 }
1729
bnxt_re_destroy_qp(struct ibv_qp * ibvqp)1730 int bnxt_re_destroy_qp(struct ibv_qp *ibvqp)
1731 {
1732 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
1733 struct bnxt_re_mem *mem;
1734 int status;
1735
1736 qp->qpst = IBV_QPS_RESET;
1737 if (_is_db_drop_recovery_enable(qp->cntx)) {
1738 pthread_spin_lock(&qp->cntx->qp_dbr_res.lock);
1739 bnxt_re_list_del_node(&qp->dbnode, &qp->cntx->qp_dbr_res.head);
1740 pthread_spin_unlock(&qp->cntx->qp_dbr_res.lock);
1741 }
1742 status = ibv_cmd_destroy_qp(ibvqp);
1743 if (status) {
1744 if (_is_db_drop_recovery_enable(qp->cntx)) {
1745 pthread_spin_lock(&qp->cntx->qp_dbr_res.lock);
1746 bnxt_re_list_add_node(&qp->dbnode,
1747 &qp->cntx->qp_dbr_res.head);
1748 pthread_spin_unlock(&qp->cntx->qp_dbr_res.lock);
1749 }
1750 return status;
1751 }
1752 bnxt_re_cleanup_cq(qp, qp->rcq);
1753 bnxt_re_cleanup_cq(qp, qp->scq);
1754 mem = qp->mem;
1755 bnxt_re_free_mem(mem);
1756 return 0;
1757 }
1758
bnxt_re_put_rx_sge(struct bnxt_re_queue * que,uint32_t * idx,struct ibv_sge * sgl,int nsg)1759 static void bnxt_re_put_rx_sge(struct bnxt_re_queue *que, uint32_t *idx,
1760 struct ibv_sge *sgl, int nsg)
1761 {
1762 struct bnxt_re_sge *sge;
1763 int indx;
1764
1765 for (indx = 0; indx < nsg; indx++) {
1766 sge = bnxt_re_get_hwqe(que, (*idx)++);
1767 sge->pa = htole64(sgl[indx].addr);
1768 sge->lkey = htole32(sgl[indx].lkey);
1769 sge->length = htole32(sgl[indx].length);
1770 }
1771 }
1772
bnxt_re_put_tx_sge(struct bnxt_re_queue * que,uint32_t * idx,struct ibv_sge * sgl,int nsg)1773 static int bnxt_re_put_tx_sge(struct bnxt_re_queue *que, uint32_t *idx,
1774 struct ibv_sge *sgl, int nsg)
1775 {
1776 struct bnxt_re_sge *sge;
1777 int indx;
1778 int len;
1779
1780 len = 0;
1781 for (indx = 0; indx < nsg; indx++) {
1782 sge = bnxt_re_get_hwqe(que, (*idx)++);
1783 sge->pa = htole64(sgl[indx].addr);
1784 sge->lkey = htole32(sgl[indx].lkey);
1785 sge->length = htole32(sgl[indx].length);
1786 len += sgl[indx].length;
1787 }
1788 return len;
1789 }
1790
bnxt_re_calc_inline_len(struct ibv_send_wr * swr)1791 static inline int bnxt_re_calc_inline_len(struct ibv_send_wr *swr)
1792 {
1793 int illen, indx;
1794
1795 illen = 0;
1796 for (indx = 0; indx < swr->num_sge; indx++)
1797 illen += swr->sg_list[indx].length;
1798 return get_aligned(illen, sizeof(struct bnxt_re_sge));
1799 }
1800
bnxt_re_put_inline(struct bnxt_re_queue * que,uint32_t * idx,struct bnxt_re_push_buffer * pbuf,struct ibv_sge * sgl,uint32_t nsg,uint16_t max_ils)1801 static int bnxt_re_put_inline(struct bnxt_re_queue *que, uint32_t *idx,
1802 struct bnxt_re_push_buffer *pbuf,
1803 struct ibv_sge *sgl, uint32_t nsg,
1804 uint16_t max_ils)
1805 {
1806 int len, t_len, offt = 0;
1807 int t_cplen = 0, cplen;
1808 bool pull_dst = true;
1809 void *il_dst = NULL;
1810 void *il_src = NULL;
1811 int alsize;
1812 int indx;
1813
1814 alsize = sizeof(struct bnxt_re_sge);
1815
1816 t_len = 0;
1817 for (indx = 0; indx < nsg; indx++) {
1818 len = sgl[indx].length;
1819 il_src = (void *)sgl[indx].addr;
1820 t_len += len;
1821 if (t_len > max_ils)
1822 goto bad;
1823 while (len) {
1824 if (pull_dst) {
1825 pull_dst = false;
1826 il_dst = bnxt_re_get_hwqe(que, (*idx)++);
1827 if (pbuf)
1828 pbuf->wqe[*idx - 1] =
1829 (__u64)il_dst;
1830 t_cplen = 0;
1831 offt = 0;
1832 }
1833 cplen = MIN(len, alsize);
1834 cplen = MIN(cplen,(alsize - offt));
1835 memcpy(il_dst, il_src, cplen);
1836 t_cplen += cplen;
1837 il_src += cplen;
1838 il_dst += cplen;
1839 offt += cplen;
1840 len -= cplen;
1841 if (t_cplen == alsize)
1842 pull_dst = true;
1843 }
1844 }
1845
1846 return t_len;
1847 bad:
1848 return -ENOMEM;
1849 }
1850
bnxt_re_required_slots(struct bnxt_re_qp * qp,struct ibv_send_wr * wr,uint32_t * wqe_sz,void ** pbuf)1851 static int bnxt_re_required_slots(struct bnxt_re_qp *qp, struct ibv_send_wr *wr,
1852 uint32_t *wqe_sz, void **pbuf)
1853 {
1854 uint32_t wqe_byte;
1855 int ilsize;
1856
1857 if (wr->send_flags & IBV_SEND_INLINE) {
1858 ilsize = bnxt_re_calc_inline_len(wr);
1859 if (ilsize > qp->cap.max_inline)
1860 return -EINVAL;
1861 if (qp->push_st_en && ilsize <= qp->max_push_sz)
1862 *pbuf = bnxt_re_get_pbuf(&qp->push_st_en, qp->ppp_idx, qp->cntx);
1863 wqe_byte = (ilsize + bnxt_re_get_sqe_hdr_sz());
1864 } else {
1865 wqe_byte = bnxt_re_calc_wqe_sz(wr->num_sge);
1866 }
1867
1868 /* que->stride is always 2^4 = 16, thus using hard-coding */
1869 *wqe_sz = wqe_byte >> 4;
1870 if (qp->qpmode == BNXT_RE_WQE_MODE_STATIC)
1871 return 8;
1872 return *wqe_sz;
1873 }
1874
bnxt_re_set_hdr_flags(struct bnxt_re_bsqe * hdr,struct ibv_send_wr * wr,uint32_t slots,uint8_t sqsig)1875 static inline void bnxt_re_set_hdr_flags(struct bnxt_re_bsqe *hdr,
1876 struct ibv_send_wr *wr,
1877 uint32_t slots, uint8_t sqsig)
1878 {
1879 uint32_t send_flags;
1880 uint32_t hdrval = 0;
1881 uint8_t opcd;
1882
1883 send_flags = wr->send_flags;
1884 if (send_flags & IBV_SEND_SIGNALED || sqsig)
1885 hdrval |= ((BNXT_RE_WR_FLAGS_SIGNALED & BNXT_RE_HDR_FLAGS_MASK)
1886 << BNXT_RE_HDR_FLAGS_SHIFT);
1887 if (send_flags & IBV_SEND_FENCE)
1888 hdrval |= ((BNXT_RE_WR_FLAGS_UC_FENCE & BNXT_RE_HDR_FLAGS_MASK)
1889 << BNXT_RE_HDR_FLAGS_SHIFT);
1890 if (send_flags & IBV_SEND_SOLICITED)
1891 hdrval |= ((BNXT_RE_WR_FLAGS_SE & BNXT_RE_HDR_FLAGS_MASK)
1892 << BNXT_RE_HDR_FLAGS_SHIFT);
1893 if (send_flags & IBV_SEND_INLINE)
1894 hdrval |= ((BNXT_RE_WR_FLAGS_INLINE & BNXT_RE_HDR_FLAGS_MASK)
1895 << BNXT_RE_HDR_FLAGS_SHIFT);
1896 hdrval |= (slots & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT;
1897
1898 /* Fill opcode */
1899 opcd = ibv_to_bnxt_re_wr_opcd[wr->opcode];
1900 hdrval |= (opcd & BNXT_RE_HDR_WT_MASK);
1901 hdr->rsv_ws_fl_wt = htole32(hdrval);
1902 }
1903
bnxt_re_build_tx_sge(struct bnxt_re_queue * que,uint32_t * idx,struct bnxt_re_push_buffer * pbuf,struct ibv_send_wr * wr,uint16_t max_il)1904 static int bnxt_re_build_tx_sge(struct bnxt_re_queue *que, uint32_t *idx,
1905 struct bnxt_re_push_buffer *pbuf,
1906 struct ibv_send_wr *wr,
1907 uint16_t max_il)
1908 {
1909 if (wr->send_flags & IBV_SEND_INLINE)
1910 return bnxt_re_put_inline(que, idx, pbuf, wr->sg_list, wr->num_sge, max_il);
1911
1912 return bnxt_re_put_tx_sge(que, idx, wr->sg_list, wr->num_sge);
1913 }
1914
bnxt_re_pull_psn_buff(struct bnxt_re_queue * que,bool hw_retx)1915 static void *bnxt_re_pull_psn_buff(struct bnxt_re_queue *que, bool hw_retx)
1916 {
1917 if (hw_retx)
1918 return (void *)(que->pad + ((que->msn) << que->pad_stride_log2));
1919 return (void *)(que->pad + ((*que->dbtail) << que->pad_stride_log2));
1920 }
1921
bnxt_re_fill_psns_for_msntbl(struct bnxt_re_qp * qp,uint32_t len,uint32_t st_idx,uint8_t opcode)1922 static void bnxt_re_fill_psns_for_msntbl(struct bnxt_re_qp *qp, uint32_t len,
1923 uint32_t st_idx, uint8_t opcode)
1924 {
1925 uint32_t npsn = 0, start_psn = 0, next_psn = 0;
1926 struct bnxt_re_msns *msns;
1927 uint32_t pkt_cnt = 0;
1928
1929 msns = bnxt_re_pull_psn_buff(qp->jsqq->hwque, true);
1930 msns->start_idx_next_psn_start_psn = 0;
1931
1932 if (qp->qptyp == IBV_QPT_RC) {
1933 start_psn = qp->sq_psn;
1934 pkt_cnt = (len / qp->mtu);
1935 if (len % qp->mtu)
1936 pkt_cnt++;
1937 /* Increment the psn even for 0 len packets
1938 * e.g. for opcode rdma-write-with-imm-data
1939 * with length field = 0
1940 */
1941 if (bnxt_re_is_zero_len_pkt(len, opcode))
1942 pkt_cnt = 1;
1943 /* make it 24 bit */
1944 next_psn = qp->sq_psn + pkt_cnt;
1945 npsn = next_psn;
1946 qp->sq_psn = next_psn;
1947 msns->start_idx_next_psn_start_psn |=
1948 bnxt_re_update_msn_tbl(st_idx, npsn, start_psn);
1949 qp->jsqq->hwque->msn++;
1950 qp->jsqq->hwque->msn %= qp->jsqq->hwque->msn_tbl_sz;
1951 }
1952 }
1953
bnxt_re_fill_psns(struct bnxt_re_qp * qp,uint32_t len,uint32_t st_idx,uint8_t opcode)1954 static void bnxt_re_fill_psns(struct bnxt_re_qp *qp, uint32_t len,
1955 uint32_t st_idx, uint8_t opcode)
1956 {
1957 uint32_t opc_spsn = 0, flg_npsn = 0;
1958 struct bnxt_re_psns_ext *psns_ext;
1959 uint32_t pkt_cnt = 0, nxt_psn = 0;
1960 struct bnxt_re_psns *psns;
1961
1962 psns = bnxt_re_pull_psn_buff(qp->jsqq->hwque, false);
1963 psns_ext = (struct bnxt_re_psns_ext *)psns;
1964
1965 if (qp->qptyp == IBV_QPT_RC) {
1966 opc_spsn = qp->sq_psn & BNXT_RE_PSNS_SPSN_MASK;
1967 pkt_cnt = (len / qp->mtu);
1968 if (len % qp->mtu)
1969 pkt_cnt++;
1970 /* Increment the psn even for 0 len packets
1971 * e.g. for opcode rdma-write-with-imm-data
1972 * with length field = 0
1973 */
1974 if (bnxt_re_is_zero_len_pkt(len, opcode))
1975 pkt_cnt = 1;
1976 nxt_psn = ((qp->sq_psn + pkt_cnt) & BNXT_RE_PSNS_NPSN_MASK);
1977 flg_npsn = nxt_psn;
1978 qp->sq_psn = nxt_psn;
1979 }
1980 psns->opc_spsn = htole32(opc_spsn);
1981 psns->flg_npsn = htole32(flg_npsn);
1982 /* Update for Thor p5 not Thor2 */
1983 if (!BNXT_RE_HW_RETX(qp->cntx) && qp->cctx->chip_is_gen_p5_thor2)
1984 psns_ext->st_slot_idx = st_idx;
1985 }
1986
bnxt_re_build_ud_sqe(struct ibv_send_wr * wr,struct bnxt_re_bsqe * hdr,struct bnxt_re_send * sqe)1987 static int bnxt_re_build_ud_sqe(struct ibv_send_wr *wr,
1988 struct bnxt_re_bsqe *hdr,
1989 struct bnxt_re_send *sqe)
1990 {
1991 struct bnxt_re_ah *ah;
1992 uint64_t qkey;
1993
1994 ah = to_bnxt_re_ah(wr->wr.ud.ah);
1995 if (!wr->wr.ud.ah)
1996 return -EINVAL;
1997 qkey = wr->wr.ud.remote_qkey;
1998 hdr->lhdr.qkey_len |= htole64(qkey << 32);
1999 sqe->dst_qp = htole32(wr->wr.ud.remote_qpn);
2000 sqe->avid = htole32(ah->avid & 0xFFFFF);
2001
2002 return 0;
2003 }
2004
bnxt_re_build_cns_sqe(struct ibv_send_wr * wr,struct bnxt_re_bsqe * hdr,void * hdr2)2005 static void bnxt_re_build_cns_sqe(struct ibv_send_wr *wr,
2006 struct bnxt_re_bsqe *hdr,
2007 void *hdr2)
2008 {
2009 struct bnxt_re_atomic *sqe = hdr2;
2010
2011 hdr->key_immd = htole32(wr->wr.atomic.rkey);
2012 hdr->lhdr.rva = htole64(wr->wr.atomic.remote_addr);
2013 sqe->cmp_dt = htole64(wr->wr.atomic.compare_add);
2014 sqe->swp_dt = htole64(wr->wr.atomic.swap);
2015 }
2016
bnxt_re_build_fna_sqe(struct ibv_send_wr * wr,struct bnxt_re_bsqe * hdr,void * hdr2)2017 static void bnxt_re_build_fna_sqe(struct ibv_send_wr *wr,
2018 struct bnxt_re_bsqe *hdr,
2019 void *hdr2)
2020 {
2021 struct bnxt_re_atomic *sqe = hdr2;
2022
2023 hdr->key_immd = htole32(wr->wr.atomic.rkey);
2024 hdr->lhdr.rva = htole64(wr->wr.atomic.remote_addr);
2025 sqe->swp_dt = htole64(wr->wr.atomic.compare_add);
2026 }
2027
bnxt_re_force_rts2rts(struct bnxt_re_qp * qp)2028 void bnxt_re_force_rts2rts(struct bnxt_re_qp *qp)
2029 {
2030 struct ibv_qp_attr attr = {};
2031 int attr_mask;
2032 attr_mask = IBV_QP_STATE;
2033 attr.qp_state = IBV_QPS_RTS;
2034 bnxt_re_modify_qp(&qp->ibvqp, &attr, attr_mask);
2035 qp->wqe_cnt = 0;
2036 }
2037
bnxt_re_post_send(struct ibv_qp * ibvqp,struct ibv_send_wr * wr,struct ibv_send_wr ** bad)2038 int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
2039 struct ibv_send_wr **bad)
2040 {
2041 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
2042 struct bnxt_re_queue *sq = qp->jsqq->hwque;
2043 struct bnxt_re_push_buffer *pbuf = NULL;
2044 bool chip_is_not_gen_p5_thor2;
2045 int slots, ret = 0, len = 0;
2046 uint32_t swq_idx, wqe_size;
2047 struct bnxt_re_wrid *wrid;
2048 struct bnxt_re_rdma *rsqe;
2049 struct bnxt_re_bsqe *hdr;
2050 struct bnxt_re_send *sqe;
2051 bool ring_db = false;
2052 uint32_t idx;
2053
2054 bnxt_re_dp_spin_lock(&sq->qlock);
2055 chip_is_not_gen_p5_thor2 = !qp->cctx->chip_is_gen_p5_thor2;
2056 while (wr) {
2057 slots = bnxt_re_required_slots(qp, wr, &wqe_size, (void **)&pbuf);
2058 if (unlikely(slots < 0 || bnxt_re_is_que_full(sq, slots)) ||
2059 wr->num_sge > qp->cap.max_ssge) {
2060 *bad = wr;
2061 ret = ENOMEM;
2062 goto bad_wr;
2063 }
2064 if ((wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP ||
2065 wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD) &&
2066 !qp->cap.is_atomic_cap) {
2067 *bad = wr;
2068 ret = EINVAL;
2069 goto bad_wr;
2070 }
2071 idx = 0;
2072 len = 0;
2073 hdr = bnxt_re_get_hwqe(sq, idx++);
2074 sqe = bnxt_re_get_hwqe(sq, idx++);
2075 /* populate push buffer */
2076 if (pbuf) {
2077 pbuf->qpid = qp->qpid;
2078 pbuf->wqe[0] = (__u64)hdr;
2079 pbuf->wqe[1] = (__u64)sqe;
2080 pbuf->st_idx = *sq->dbtail;
2081 }
2082 if (wr->num_sge) {
2083 len = bnxt_re_build_tx_sge(sq, &idx, pbuf, wr, qp->cap.max_inline);
2084 if (unlikely(len < 0)) {
2085 ret = ENOMEM;
2086 *bad = wr;
2087 goto bad_wr;
2088 }
2089 }
2090 hdr->lhdr.qkey_len = htole32(len);
2091 bnxt_re_set_hdr_flags(hdr, wr, wqe_size, qp->cap.sqsig);
2092 switch (wr->opcode) {
2093 case IBV_WR_SEND_WITH_IMM:
2094 /* HW is swapping the immediate data before
2095 * sending it out on the wire. To workaround
2096 * this, swap the imm_data value as sent by
2097 * the application so that the value going out
2098 * on the wire is in big-endian format.
2099 */
2100 hdr->key_immd = htole32(be32toh(wr->imm_data));
2101 if (qp->qptyp == IBV_QPT_UD) {
2102 if (chip_is_not_gen_p5_thor2 &&
2103 qp->wqe_cnt == BNXT_RE_UD_QP_STALL)
2104 bnxt_re_force_rts2rts(qp);
2105
2106 len = bnxt_re_build_ud_sqe(wr, hdr, sqe);
2107 }
2108 break;
2109 case IBV_WR_SEND:
2110 if (qp->qptyp == IBV_QPT_UD) {
2111 if (chip_is_not_gen_p5_thor2 &&
2112 qp->wqe_cnt == BNXT_RE_UD_QP_STALL)
2113 bnxt_re_force_rts2rts(qp);
2114
2115 len = bnxt_re_build_ud_sqe(wr, hdr, sqe);
2116 }
2117 break;
2118 case IBV_WR_RDMA_WRITE_WITH_IMM:
2119 hdr->key_immd = htole32(be32toh(wr->imm_data));
2120 case IBV_WR_RDMA_WRITE:
2121 case IBV_WR_RDMA_READ:
2122 rsqe = (struct bnxt_re_rdma *)sqe;
2123 rsqe->rva = htole64(wr->wr.rdma.remote_addr);
2124 rsqe->rkey = htole32(wr->wr.rdma.rkey);
2125 break;
2126 case IBV_WR_ATOMIC_CMP_AND_SWP:
2127 bnxt_re_build_cns_sqe(wr, hdr, sqe);
2128 break;
2129 case IBV_WR_ATOMIC_FETCH_AND_ADD:
2130 bnxt_re_build_fna_sqe(wr, hdr, sqe);
2131 break;
2132 default :
2133 len = -EINVAL;
2134 break;
2135 }
2136
2137 if (unlikely(len < 0)) {
2138 ret = (len == -EINVAL) ? EINVAL : ENOMEM;
2139 *bad = wr;
2140 break;
2141 }
2142 if (BNXT_RE_HW_RETX(qp->cntx))
2143 bnxt_re_fill_psns_for_msntbl(qp, len, *sq->dbtail, wr->opcode);
2144 else
2145 bnxt_re_fill_psns(qp, len, *sq->dbtail, wr->opcode);
2146
2147 wrid = bnxt_re_get_swqe(qp->jsqq, &swq_idx);
2148 wrid->wrid = wr->wr_id;
2149 wrid->bytes = len;
2150 wrid->slots = slots;
2151 wrid->sig = (wr->send_flags & IBV_SEND_SIGNALED || qp->cap.sqsig) ?
2152 IBV_SEND_SIGNALED : 0;
2153 wrid->wc_opcd = ibv_wr_to_wc_opcd[wr->opcode];
2154
2155 bnxt_re_incr_tail(sq, slots);
2156 bnxt_re_jqq_mod_start(qp->jsqq, swq_idx);
2157 ring_db = true;
2158 if (pbuf) {
2159 ring_db = false;
2160 pbuf->tail = *sq->dbtail;
2161 if (_is_chip_thor2(qp->cctx)) {
2162 /* WA for SR2 A0, ring additional db */
2163 ring_db |= _is_chip_a0(qp->cctx);
2164 bnxt_re_fill_ppp(pbuf, qp, len, idx);
2165 } else {
2166 bnxt_re_fill_push_wcb(qp, pbuf, idx);
2167 }
2168
2169 bnxt_re_put_pbuf(qp->cntx, pbuf);
2170 pbuf = NULL;
2171 }
2172 qp->wqe_cnt++;
2173 qp->sq_msn++;
2174 wr = wr->next;
2175 }
2176
2177 bad_wr:
2178 if (ring_db)
2179 bnxt_re_ring_sq_db(qp);
2180
2181 if (pbuf)
2182 bnxt_re_put_pbuf(qp->cntx, pbuf);
2183
2184 bnxt_re_dp_spin_unlock(&sq->qlock);
2185 return ret;
2186 }
2187
bnxt_re_post_recv(struct ibv_qp * ibvqp,struct ibv_recv_wr * wr,struct ibv_recv_wr ** bad)2188 int bnxt_re_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
2189 struct ibv_recv_wr **bad)
2190 {
2191 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
2192 struct bnxt_re_queue *rq = qp->jrqq->hwque;
2193 struct bnxt_re_wrid *swque;
2194 struct bnxt_re_brqe *hdr;
2195 struct bnxt_re_sge *sge;
2196 bool ring_db = false;
2197 uint32_t swq_idx;
2198 uint32_t hdrval;
2199 uint32_t idx;
2200 int rc = 0;
2201
2202 bnxt_re_dp_spin_lock(&rq->qlock);
2203 while (wr) {
2204 if (unlikely(bnxt_re_is_que_full(rq, rq->max_slots) ||
2205 wr->num_sge > qp->cap.max_rsge)) {
2206 *bad = wr;
2207 rc = ENOMEM;
2208 break;
2209 }
2210 swque = bnxt_re_get_swqe(qp->jrqq, &swq_idx);
2211
2212 /*
2213 * Initialize idx to 2 since the length of header wqe is 32 bytes
2214 * i.e. sizeof(struct bnxt_re_brqe) + sizeof(struct bnxt_re_send)
2215 */
2216 idx = 2;
2217 hdr = bnxt_re_get_hwqe_hdr(rq);
2218
2219 if (!wr->num_sge) {
2220 /*
2221 * HW needs at least one SGE for RQ Entries.
2222 * Create an entry if num_sge = 0,
2223 * update the idx and set length of sge to 0.
2224 */
2225 sge = bnxt_re_get_hwqe(rq, idx++);
2226 sge->length = 0;
2227 } else {
2228 /* Fill SGEs */
2229 bnxt_re_put_rx_sge(rq, &idx, wr->sg_list, wr->num_sge);
2230 }
2231 hdrval = BNXT_RE_WR_OPCD_RECV;
2232 hdrval |= ((idx & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT);
2233 hdr->rsv_ws_fl_wt = htole32(hdrval);
2234 hdr->wrid = htole32(swq_idx);
2235
2236 swque->wrid = wr->wr_id;
2237 swque->slots = rq->max_slots;
2238 swque->wc_opcd = BNXT_RE_WC_OPCD_RECV;
2239
2240 bnxt_re_jqq_mod_start(qp->jrqq, swq_idx);
2241 bnxt_re_incr_tail(rq, rq->max_slots);
2242 ring_db = true;
2243 wr = wr->next;
2244 }
2245 if (ring_db)
2246 bnxt_re_ring_rq_db(qp);
2247 bnxt_re_dp_spin_unlock(&rq->qlock);
2248
2249 return rc;
2250 }
2251
bnxt_re_get_srqmem_size(struct bnxt_re_context * cntx,struct ibv_srq_init_attr * attr,struct bnxt_re_qattr * qattr)2252 static size_t bnxt_re_get_srqmem_size(struct bnxt_re_context *cntx,
2253 struct ibv_srq_init_attr *attr,
2254 struct bnxt_re_qattr *qattr)
2255 {
2256 uint32_t stride, nswr;
2257 size_t size = 0;
2258
2259 size = sizeof(struct bnxt_re_srq);
2260 size += sizeof(struct bnxt_re_queue);
2261 /* allocate 1 extra to determin full condition */
2262 nswr = attr->attr.max_wr + 1;
2263 nswr = bnxt_re_init_depth(nswr, cntx->comp_mask);
2264 stride = bnxt_re_get_srqe_sz();
2265
2266 qattr->nwr = nswr;
2267 qattr->slots = nswr;
2268 qattr->esize = stride;
2269
2270 qattr->sz_ring = get_aligned((nswr * stride), cntx->rdev->pg_size);
2271 qattr->sz_shad = nswr * sizeof(struct bnxt_re_wrid); /* shadow */
2272
2273 size += qattr->sz_ring;
2274 size += qattr->sz_shad;
2275 return size;
2276 }
2277
bnxt_re_alloc_srqslab(struct bnxt_re_context * cntx,struct ibv_srq_init_attr * attr,struct bnxt_re_qattr * qattr)2278 static void *bnxt_re_alloc_srqslab(struct bnxt_re_context *cntx,
2279 struct ibv_srq_init_attr *attr,
2280 struct bnxt_re_qattr *qattr)
2281 {
2282 size_t bytes;
2283
2284 bytes = bnxt_re_get_srqmem_size(cntx, attr, qattr);
2285 return bnxt_re_alloc_mem(bytes, cntx->rdev->pg_size);
2286 }
2287
bnxt_re_srq_alloc_queue_ptr(struct bnxt_re_mem * mem)2288 static struct bnxt_re_srq *bnxt_re_srq_alloc_queue_ptr(struct bnxt_re_mem *mem)
2289 {
2290 struct bnxt_re_srq *srq;
2291
2292 srq = bnxt_re_get_obj(mem, sizeof(*srq));
2293 if (!srq)
2294 return NULL;
2295 srq->srqq = bnxt_re_get_obj(mem, sizeof(struct bnxt_re_queue));
2296 if (!srq->srqq)
2297 return NULL;
2298 return srq;
2299 }
2300
bnxt_re_srq_alloc_queue(struct bnxt_re_srq * srq,struct ibv_srq_init_attr * attr,struct bnxt_re_qattr * qattr)2301 static int bnxt_re_srq_alloc_queue(struct bnxt_re_srq *srq,
2302 struct ibv_srq_init_attr *attr,
2303 struct bnxt_re_qattr *qattr)
2304 {
2305 struct bnxt_re_queue *que;
2306 int ret = -ENOMEM;
2307 int idx;
2308
2309 que = srq->srqq;
2310 que->depth = qattr->slots;
2311 que->stride = qattr->esize;
2312 que->va = bnxt_re_get_ring(srq->mem, qattr->sz_ring);
2313 if (!que->va)
2314 goto bail;
2315 bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded);
2316 /* For SRQ only bnxt_re_wrid.wrid is used. */
2317 srq->srwrid = bnxt_re_get_obj(srq->mem, qattr->sz_shad);
2318 if (!srq->srwrid)
2319 goto bail;
2320
2321 srq->start_idx = 0;
2322 srq->last_idx = que->depth - 1;
2323 for (idx = 0; idx < que->depth; idx++)
2324 srq->srwrid[idx].next_idx = idx + 1;
2325 srq->srwrid[srq->last_idx].next_idx = -1;
2326 return 0;
2327 bail:
2328 bnxt_re_dp_spin_destroy(&srq->srqq->qlock);
2329 return ret;
2330 }
2331
bnxt_re_create_srq(struct ibv_pd * ibvpd,struct ibv_srq_init_attr * attr)2332 struct ibv_srq *bnxt_re_create_srq(struct ibv_pd *ibvpd,
2333 struct ibv_srq_init_attr *attr)
2334 {
2335 struct bnxt_re_srq_resp resp = {};
2336 struct bnxt_re_srq_req cmd = {};
2337 struct bnxt_re_qattr qattr = {};
2338 struct bnxt_re_context *uctx;
2339 struct bnxt_re_srq *srq;
2340 void *mem;
2341 int ret;
2342
2343 uctx = to_bnxt_re_context(ibvpd->context);
2344 mem = bnxt_re_alloc_srqslab(uctx, attr, &qattr);
2345 if (!mem)
2346 return NULL;
2347
2348 srq = bnxt_re_srq_alloc_queue_ptr(mem);
2349 if (!srq)
2350 goto fail;
2351 srq->uctx = uctx;
2352 srq->mem = mem;
2353 if (bnxt_re_srq_alloc_queue(srq, attr, &qattr))
2354 goto fail;
2355
2356 cmd.srqva = (uint64_t)srq->srqq->va;
2357 cmd.srq_handle = (uint64_t)srq;
2358 ret = ibv_cmd_create_srq(ibvpd, &srq->ibvsrq, attr,
2359 &cmd.cmd, sizeof(cmd),
2360 &resp.resp, sizeof(resp));
2361 if (ret)
2362 goto fail;
2363
2364 srq->srqid = resp.srqid;
2365 if (resp.srq_page) {
2366 srq->srq_page = mmap(NULL, uctx->rdev->pg_size, PROT_READ, MAP_SHARED,
2367 ibvpd->context->cmd_fd, resp.srq_page);
2368 if (srq->srq_page == MAP_FAILED)
2369 srq->srq_page = NULL;
2370 }
2371
2372 srq->udpi = &uctx->udpi;
2373 srq->cap.max_wr = srq->srqq->depth;
2374 srq->cap.max_sge = attr->attr.max_sge;
2375 srq->cap.srq_limit = attr->attr.srq_limit;
2376 srq->arm_req = false;
2377 srq->rand.seed = srq->srqid;
2378 srq->shadow_db_key = BNXT_RE_DB_KEY_INVALID;
2379
2380 INIT_DBLY_LIST_NODE(&srq->dbnode);
2381 if (_is_db_drop_recovery_enable(uctx)) {
2382 pthread_spin_lock(&uctx->srq_dbr_res.lock);
2383 bnxt_re_list_add_node(&srq->dbnode, &uctx->srq_dbr_res.head);
2384 pthread_spin_unlock(&uctx->srq_dbr_res.lock);
2385 }
2386 return &srq->ibvsrq;
2387 fail:
2388 bnxt_re_free_mem(mem);
2389 return NULL;
2390 }
2391
bnxt_re_modify_srq(struct ibv_srq * ibvsrq,struct ibv_srq_attr * attr,int attr_mask)2392 int bnxt_re_modify_srq(struct ibv_srq *ibvsrq, struct ibv_srq_attr *attr,
2393 int attr_mask)
2394 {
2395 struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq);
2396 struct ibv_modify_srq cmd = {};
2397 int status = 0;
2398
2399 status = ibv_cmd_modify_srq(ibvsrq, attr, attr_mask,
2400 &cmd, sizeof(cmd));
2401 if (!status && ((attr_mask & IBV_SRQ_LIMIT) &&
2402 (srq->cap.srq_limit != attr->srq_limit))) {
2403 srq->cap.srq_limit = attr->srq_limit;
2404 }
2405 srq->arm_req = true;
2406 return status;
2407 }
2408
bnxt_re_destroy_srq(struct ibv_srq * ibvsrq)2409 int bnxt_re_destroy_srq(struct ibv_srq *ibvsrq)
2410 {
2411 struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq);
2412 struct bnxt_re_mem *mem;
2413 int ret;
2414
2415 if (_is_db_drop_recovery_enable(srq->uctx)) {
2416 pthread_spin_lock(&srq->uctx->srq_dbr_res.lock);
2417 bnxt_re_list_del_node(&srq->dbnode, &srq->uctx->srq_dbr_res.head);
2418 pthread_spin_unlock(&srq->uctx->srq_dbr_res.lock);
2419 }
2420 if (srq->srq_page)
2421 munmap(srq->srq_page, srq->uctx->rdev->pg_size);
2422 ret = ibv_cmd_destroy_srq(ibvsrq);
2423 if (ret) {
2424 if (_is_db_drop_recovery_enable(srq->uctx)) {
2425 pthread_spin_lock(&srq->uctx->srq_dbr_res.lock);
2426 bnxt_re_list_add_node(&srq->dbnode,
2427 &srq->uctx->srq_dbr_res.head);
2428 pthread_spin_unlock(&srq->uctx->srq_dbr_res.lock);
2429 }
2430 return ret;
2431 }
2432 bnxt_re_dp_spin_destroy(&srq->srqq->qlock);
2433 mem = srq->mem;
2434 bnxt_re_free_mem(mem);
2435 return 0;
2436 }
2437
bnxt_re_query_srq(struct ibv_srq * ibvsrq,struct ibv_srq_attr * attr)2438 int bnxt_re_query_srq(struct ibv_srq *ibvsrq, struct ibv_srq_attr *attr)
2439 {
2440 struct ibv_query_srq cmd = {};
2441
2442 return ibv_cmd_query_srq(ibvsrq, attr, &cmd, sizeof cmd);
2443 }
2444
bnxt_re_build_srqe(struct bnxt_re_srq * srq,struct ibv_recv_wr * wr,void * srqe)2445 static int bnxt_re_build_srqe(struct bnxt_re_srq *srq,
2446 struct ibv_recv_wr *wr, void *srqe)
2447 {
2448 struct bnxt_re_brqe *hdr = srqe;
2449 struct bnxt_re_wrid *wrid;
2450 struct bnxt_re_sge *sge;
2451 int wqe_sz, len, next;
2452 uint32_t hdrval = 0;
2453 int indx;
2454
2455 sge = (srqe + bnxt_re_get_srqe_hdr_sz());
2456 next = srq->start_idx;
2457 wrid = &srq->srwrid[next];
2458
2459 len = 0;
2460 for (indx = 0; indx < wr->num_sge; indx++, sge++) {
2461 sge->pa = htole64(wr->sg_list[indx].addr);
2462 sge->lkey = htole32(wr->sg_list[indx].lkey);
2463 sge->length = htole32(wr->sg_list[indx].length);
2464 len += wr->sg_list[indx].length;
2465 }
2466
2467 hdrval = BNXT_RE_WR_OPCD_RECV;
2468 wqe_sz = wr->num_sge + (bnxt_re_get_srqe_hdr_sz() >> 4); /* 16B align */
2469 /* HW needs at least one SGE for SRQ Entries.
2470 * Increment SRQ WQE size if num_sge = 0 to
2471 * include the extra SGE. Set the sge length to
2472 * zero.
2473 */
2474 if (!wr->num_sge) {
2475 wqe_sz++;
2476 sge->length = 0;
2477 }
2478 hdrval |= ((wqe_sz & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT);
2479 hdr->rsv_ws_fl_wt = htole32(hdrval);
2480 hdr->wrid = htole32((uint32_t)next);
2481
2482 /* Fill wrid */
2483 wrid->wrid = wr->wr_id;
2484 wrid->bytes = len; /* N.A. for RQE */
2485 wrid->sig = 0; /* N.A. for RQE */
2486
2487 return len;
2488 }
2489
bnxt_re_post_srq_recv(struct ibv_srq * ibvsrq,struct ibv_recv_wr * wr,struct ibv_recv_wr ** bad)2490 int bnxt_re_post_srq_recv(struct ibv_srq *ibvsrq, struct ibv_recv_wr *wr,
2491 struct ibv_recv_wr **bad)
2492 {
2493 struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq);
2494 struct bnxt_re_queue *rq = srq->srqq;
2495 int ret, count = 0;
2496 void *srqe;
2497
2498 bnxt_re_dp_spin_lock(&rq->qlock);
2499 count = rq->tail > rq->head ? rq->tail - rq->head :
2500 rq->depth - rq->head + rq->tail;
2501 while (wr) {
2502 if (srq->start_idx == srq->last_idx ||
2503 wr->num_sge > srq->cap.max_sge) {
2504 *bad = wr;
2505 bnxt_re_dp_spin_unlock(&rq->qlock);
2506 return ENOMEM;
2507 }
2508
2509 srqe = (void *) (rq->va + (rq->tail * rq->stride));
2510 memset(srqe, 0, bnxt_re_get_srqe_sz());
2511 ret = bnxt_re_build_srqe(srq, wr, srqe);
2512 if (ret < 0) {
2513 bnxt_re_dp_spin_unlock(&rq->qlock);
2514 *bad = wr;
2515 return ENOMEM;
2516 }
2517
2518 srq->start_idx = srq->srwrid[srq->start_idx].next_idx;
2519 bnxt_re_incr_tail(rq, 1);
2520 wr = wr->next;
2521 bnxt_re_ring_srq_db(srq);
2522 count++;
2523 if (srq->arm_req == true && count > srq->cap.srq_limit) {
2524 srq->arm_req = false;
2525 bnxt_re_ring_srq_arm(srq);
2526 }
2527 }
2528 bnxt_re_dp_spin_unlock(&rq->qlock);
2529
2530 return 0;
2531 }
2532
bnxt_re_create_ah(struct ibv_pd * ibvpd,struct ibv_ah_attr * attr)2533 struct ibv_ah *bnxt_re_create_ah(struct ibv_pd *ibvpd, struct ibv_ah_attr *attr)
2534 {
2535 struct bnxt_re_context *uctx;
2536 struct bnxt_re_pd *pd;
2537 struct bnxt_re_ah *ah;
2538 int status;
2539 struct ibv_create_ah_resp resp = {};
2540
2541 pd = to_bnxt_re_pd(ibvpd);
2542 uctx = to_bnxt_re_context(ibvpd->context);
2543
2544 ah = calloc(1, sizeof(struct bnxt_re_ah));
2545 if (!ah) {
2546 goto failed;
2547 }
2548
2549 ah->pd = pd;
2550 pthread_mutex_lock(&uctx->shlock);
2551 status = ibv_cmd_create_ah(ibvpd, &ah->ibvah, attr,
2552 &resp, sizeof(resp));
2553
2554 if (status)
2555 {
2556 pthread_mutex_unlock(&uctx->shlock);
2557 free(ah);
2558 goto failed;
2559 }
2560 /* read AV ID now. */
2561 ah->avid = *(uint32_t *)(uctx->shpg + BNXT_RE_SHPG_AVID_OFFT);
2562 pthread_mutex_unlock(&uctx->shlock);
2563
2564 return &ah->ibvah;
2565 failed:
2566 return NULL;
2567 }
2568
bnxt_re_destroy_ah(struct ibv_ah * ibvah)2569 int bnxt_re_destroy_ah(struct ibv_ah *ibvah)
2570 {
2571 struct bnxt_re_ah *ah;
2572 int status;
2573
2574 ah = to_bnxt_re_ah(ibvah);
2575 status = ibv_cmd_destroy_ah(ibvah);
2576 if (status)
2577 return status;
2578 free(ah);
2579
2580 return 0;
2581 }
2582