xref: /freebsd/contrib/ofed/libbnxtre/verbs.c (revision 7937bfbc0ca53fe7cdd0d54414f9296e273a518e)
1 /*
2  * Copyright (c) 2024, Broadcom. All rights reserved.  The term
3  * Broadcom refers to Broadcom Limited and/or its subsidiaries.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
18  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
26  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/mman.h>
31 
32 #include <netinet/in.h>
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <malloc.h>
37 #include <pthread.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 
44 #include "main.h"
45 #include "verbs.h"
46 
47 static int ibv_to_bnxt_re_wr_opcd[11] = {
48 	BNXT_RE_WR_OPCD_RDMA_WRITE,
49 	BNXT_RE_WR_OPCD_RDMA_WRITE_IMM,
50 	BNXT_RE_WR_OPCD_SEND,
51 	BNXT_RE_WR_OPCD_SEND_IMM,
52 	BNXT_RE_WR_OPCD_RDMA_READ,
53 	BNXT_RE_WR_OPCD_ATOMIC_CS,
54 	BNXT_RE_WR_OPCD_ATOMIC_FA,
55 	BNXT_RE_WR_OPCD_INVAL,
56 	BNXT_RE_WR_OPCD_INVAL,
57 	BNXT_RE_WR_OPCD_INVAL,
58 	BNXT_RE_WR_OPCD_INVAL
59 };
60 
61 static int ibv_wr_to_wc_opcd[11] = {
62 	IBV_WC_RDMA_WRITE,
63 	IBV_WC_RDMA_WRITE,
64 	IBV_WC_SEND,
65 	IBV_WC_SEND,
66 	IBV_WC_RDMA_READ,
67 	IBV_WC_COMP_SWAP,
68 	IBV_WC_FETCH_ADD,
69 	0xFF,
70 	0xFF,
71 	0xFF,
72 	0xFF
73 };
74 
75 static int bnxt_re_req_to_ibv_status [12] = {
76 	IBV_WC_SUCCESS,
77 	IBV_WC_BAD_RESP_ERR,
78 	IBV_WC_LOC_LEN_ERR,
79 	IBV_WC_LOC_QP_OP_ERR,
80 	IBV_WC_LOC_PROT_ERR,
81 	IBV_WC_MW_BIND_ERR,
82 	IBV_WC_REM_INV_REQ_ERR,
83 	IBV_WC_REM_ACCESS_ERR,
84 	IBV_WC_REM_OP_ERR,
85 	IBV_WC_RNR_RETRY_EXC_ERR,
86 	IBV_WC_RETRY_EXC_ERR,
87 	IBV_WC_WR_FLUSH_ERR
88 };
89 
90 static int bnxt_re_res_to_ibv_status [9] = {
91 	IBV_WC_SUCCESS,
92 	IBV_WC_LOC_ACCESS_ERR,
93 	IBV_WC_LOC_LEN_ERR,
94 	IBV_WC_LOC_PROT_ERR,
95 	IBV_WC_LOC_QP_OP_ERR,
96 	IBV_WC_MW_BIND_ERR,
97 	IBV_WC_REM_INV_REQ_ERR,
98 	IBV_WC_WR_FLUSH_ERR,
99 	IBV_WC_FATAL_ERR
100 };
101 
102 static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc,
103 			    uint32_t *resize);
104 
105 int bnxt_single_threaded;
106 int bnxt_dyn_debug;
107 int bnxt_re_query_device(struct ibv_context *ibvctx,
108 			 struct ibv_device_attr *dev_attr)
109 {
110 	struct ibv_query_device cmd = {};
111 	uint8_t fw_ver[8];
112 	int status;
113 
114 	memset(dev_attr, 0, sizeof(struct ibv_device_attr));
115 	status = ibv_cmd_query_device(ibvctx, dev_attr, (uint64_t *)&fw_ver,
116 				      &cmd, sizeof(cmd));
117 	snprintf(dev_attr->fw_ver, 64, "%d.%d.%d.%d",
118 		 fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3]);
119 
120 	return status;
121 }
122 
123 int bnxt_re_query_device_compat(struct ibv_context *ibvctx,
124 				struct ibv_device_attr *dev_attr)
125 
126 {
127 	int rc = 0;
128 
129 	rc = bnxt_re_query_device(ibvctx, dev_attr);
130 
131 	return rc;
132 }
133 
134 int bnxt_re_query_port(struct ibv_context *ibvctx, uint8_t port,
135 		       struct ibv_port_attr *port_attr)
136 {
137 	struct ibv_query_port cmd = {};
138 
139 	return ibv_cmd_query_port(ibvctx, port, port_attr, &cmd, sizeof(cmd));
140 }
141 
142 static inline bool bnxt_re_is_wcdpi_enabled(struct bnxt_re_context *cntx)
143 {
144 	return cntx->comp_mask & BNXT_RE_COMP_MASK_UCNTX_WC_DPI_ENABLED;
145 }
146 
147 static int bnxt_re_map_db_page(struct ibv_context *ibvctx,
148 			       uint64_t dbr, uint32_t dpi, uint32_t wcdpi)
149 {
150 	struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx);
151 	struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device);
152 
153 	cntx->udpi.dpindx = dpi;
154 	cntx->udpi.dbpage = mmap(NULL, dev->pg_size, PROT_WRITE,
155 				 MAP_SHARED, ibvctx->cmd_fd, dbr);
156 	if (cntx->udpi.dbpage == MAP_FAILED)
157 		return -ENOMEM;
158 	if (wcdpi) {
159 		cntx->udpi.wcdbpg = mmap(NULL, dev->pg_size, PROT_WRITE,
160 					 MAP_SHARED, ibvctx->cmd_fd,
161 					 BNXT_RE_MAP_WC);
162 		if (cntx->udpi.wcdbpg == MAP_FAILED)
163 			return -ENOMEM;
164 		cntx->udpi.wcdpi = wcdpi;
165 	}
166 
167 	return 0;
168 }
169 
170 struct ibv_pd *bnxt_re_alloc_pd(struct ibv_context *ibvctx)
171 {
172 	struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx);
173 	struct bnxt_re_pd_resp resp = {};
174 	struct ibv_alloc_pd cmd = {};
175 	struct bnxt_re_pd *pd;
176 	uint64_t dbr_map;
177 
178 	pd = calloc(1, sizeof(*pd));
179 	if (!pd)
180 		return NULL;
181 
182 	if (ibv_cmd_alloc_pd(ibvctx, &pd->ibvpd, &cmd, sizeof(cmd),
183 			     &resp.resp, sizeof(resp)))
184 		goto out;
185 
186 	pd->pdid = resp.pdid;
187 	/* Map DB page now. */
188 	if (!cntx->udpi.dbpage) {
189 		uint32_t wcdpi = 0;
190 
191 		if (bnxt_re_is_wcdpi_enabled(cntx) &&
192 		    resp.comp_mask & BNXT_RE_COMP_MASK_PD_HAS_WC_DPI)
193 			wcdpi = resp.wcdpi;
194 		if (bnxt_re_map_db_page(ibvctx, resp.dbr, resp.dpi, wcdpi))
195 			goto fail;
196 		if (cntx->cctx->chip_is_gen_p5_thor2 && cntx->udpi.wcdpi)
197 			bnxt_re_init_pbuf_list(cntx);
198 	}
199 	if (resp.comp_mask & BNXT_RE_COMP_MASK_PD_HAS_DBR_BAR_ADDR) {
200 		dbr_map = resp.dbr_bar_map & 0xFFFFFFFFFFFFF000;
201 		cntx->bar_map = mmap(NULL, 4096, PROT_READ,
202 				     MAP_SHARED, ibvctx->cmd_fd, dbr_map);
203 		if (cntx->bar_map == MAP_FAILED)
204 			goto fail;
205 	}
206 
207 	return &pd->ibvpd;
208 fail:
209 	ibv_cmd_dealloc_pd(&pd->ibvpd);
210 out:
211 	free(pd);
212 	return NULL;
213 }
214 
215 int bnxt_re_free_pd(struct ibv_pd *ibvpd)
216 {
217 	struct bnxt_re_pd *pd = to_bnxt_re_pd(ibvpd);
218 	int status;
219 
220 	status = ibv_cmd_dealloc_pd(ibvpd);
221 	if (status)
222 		return status;
223 	/* DPI un-mapping will be done during uninit_ucontext */
224 	free(pd);
225 
226 	return 0;
227 }
228 
229 struct ibv_mr *get_ibv_mr_from_bnxt_re_mr(struct bnxt_re_mr *mr)
230 {
231 	return &mr->vmr;
232 }
233 
234 struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
235 			      int access)
236 {
237 	struct bnxt_re_mr_resp resp = {};
238 	struct ibv_reg_mr cmd = {};
239 	struct bnxt_re_mr *mr;
240 	uint64_t hw_va;
241 	hw_va = (uint64_t) sva;
242 
243 	mr = calloc(1, sizeof(*mr));
244 	if (!mr)
245 		return NULL;
246 
247 	if (ibv_cmd_reg_mr(ibvpd, sva, len, hw_va, access, &mr->vmr,
248 			   &cmd, sizeof(cmd), &resp.resp, sizeof(resp))) {
249 		free(mr);
250 		return NULL;
251 	}
252 
253 	return get_ibv_mr_from_bnxt_re_mr(mr);
254 }
255 
256 int bnxt_re_dereg_mr(VERBS_MR *ibvmr)
257 {
258 	struct bnxt_re_mr *mr = (struct bnxt_re_mr *)ibvmr;
259 	int status;
260 
261 	status = ibv_cmd_dereg_mr(ibvmr);
262 	if (status)
263 		return status;
264 	free(mr);
265 
266 	return 0;
267 }
268 
269 void *bnxt_re_alloc_cqslab(struct bnxt_re_context *cntx,
270 			   uint32_t ncqe, uint32_t cur)
271 {
272 	struct bnxt_re_mem *mem;
273 	uint32_t depth, sz;
274 
275 	depth = bnxt_re_init_depth(ncqe + 1, cntx->comp_mask);
276 	if (depth > cntx->rdev->max_cq_depth + 1)
277 		depth = cntx->rdev->max_cq_depth + 1;
278 	if (depth == cur)
279 		return NULL;
280 	sz = get_aligned((depth * cntx->rdev->cqe_size), cntx->rdev->pg_size);
281 	mem = bnxt_re_alloc_mem(sz, cntx->rdev->pg_size);
282 	if (mem)
283 		mem->pad = depth;
284 	return mem;
285 }
286 
287 struct ibv_cq *_bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe,
288 				  struct ibv_comp_channel *channel, int vec,
289 				  bool soft_cq)
290 {
291 	struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx);
292 	struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device);
293 	struct bnxt_re_cq_resp resp = {};
294 	struct bnxt_re_cq_req cmd = {};
295 	struct bnxt_re_cq *cq;
296 	bool has_dpi;
297 
298 	if (ncqe > dev->max_cq_depth)
299 		return NULL;
300 
301 	cq = calloc(1, (sizeof(*cq) + sizeof(struct bnxt_re_queue)));
302 	if (!cq)
303 		return NULL;
304 	cq->cqq = (void *)((char *)cq + sizeof(*cq));
305 	if (!cq->cqq)
306 		goto mem;
307 
308 	cq->mem = bnxt_re_alloc_cqslab(cntx, ncqe, 0);
309 	if (!cq->mem)
310 		goto mem;
311 	cq->cqq->depth = cq->mem->pad;
312 	cq->cqq->stride = dev->cqe_size;
313 	/* As an exception no need to call get_ring api we know
314 	 * this is the only consumer
315 	 */
316 	cq->cqq->va = cq->mem->va_head;
317 	if (!cq->cqq->va)
318 		goto fail;
319 
320 	cmd.cq_va = (uint64_t)cq->cqq->va;
321 	cmd.cq_handle = (uint64_t)cq;
322 	if (soft_cq) {
323 		cmd.comp_mask |= BNXT_RE_COMP_MASK_CQ_REQ_HAS_CAP_MASK;
324 		cmd.cq_capab |= BNXT_RE_COMP_MASK_CQ_REQ_CAP_DBR_RECOVERY;
325 	}
326 	if (ibv_cmd_create_cq(ibvctx, ncqe, channel, vec,
327 			      &cq->ibvcq, &cmd.cmd, sizeof(cmd),
328 			      &resp.resp, sizeof(resp)))
329 		goto fail;
330 
331 	has_dpi = resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_DB_INFO;
332 	if (!cntx->udpi.dbpage && has_dpi) {
333 		uint32_t wcdpi = 0;
334 
335 		if (bnxt_re_is_wcdpi_enabled(cntx) &&
336 		    resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_WC_DPI)
337 			wcdpi = resp.wcdpi;
338 		if (bnxt_re_map_db_page(ibvctx, resp.dbr, resp.dpi, wcdpi))
339 			goto fail;
340 		if (cntx->cctx->chip_is_gen_p5_thor2 && cntx->udpi.wcdpi)
341 			bnxt_re_init_pbuf_list(cntx);
342 	}
343 
344 	if (resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_CQ_PAGE) {
345 		cq->cq_page = mmap(NULL, dev->pg_size, PROT_WRITE, MAP_SHARED,
346 				   ibvctx->cmd_fd, resp.cq_page);
347 		if (!cq->cq_page)
348 			fprintf(stderr, DEV "Valid cq_page not mapped\n");
349 	}
350 
351 	cq->cqid = resp.cqid;
352 	cq->phase = resp.phase;
353 	cq->cqq->tail = resp.tail;
354 	cq->udpi = &cntx->udpi;
355 	cq->first_arm = true;
356 	cq->cntx = cntx;
357 	cq->rand.seed = cq->cqid;
358 	cq->shadow_db_key = BNXT_RE_DB_KEY_INVALID;
359 	bnxt_re_dp_spin_init(&cq->cqq->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded);
360 	INIT_DBLY_LIST_HEAD(&cq->sfhead);
361 	INIT_DBLY_LIST_HEAD(&cq->rfhead);
362 	INIT_DBLY_LIST_HEAD(&cq->prev_cq_head);
363 	if (_is_db_drop_recovery_enable(cntx) && !soft_cq) {
364 		INIT_DBLY_LIST_NODE(&cq->dbnode);
365 		pthread_spin_lock(&cntx->cq_dbr_res.lock);
366 		bnxt_re_list_add_node(&cq->dbnode, &cntx->cq_dbr_res.head);
367 		pthread_spin_unlock(&cntx->cq_dbr_res.lock);
368 	}
369 
370 	return &cq->ibvcq;
371 fail:
372 	bnxt_re_free_mem(cq->mem);
373 mem:
374 	free(cq);
375 	return NULL;
376 }
377 
378 struct ibv_cq *bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe,
379 				 struct ibv_comp_channel *channel, int vec)
380 {
381 	struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx);
382 	struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device);
383 	sigset_t block_sig_set, old_sig_set;
384 	int ret;
385 
386 	if (_is_db_drop_recovery_enable(cntx) && !cntx->dbr_cq) {
387 		cntx->dbr_ev_chan =
388 			ibv_create_comp_channel(ibvctx);
389 		if (!cntx->dbr_ev_chan) {
390 			fprintf(stderr,
391 				DEV "Failed to create completion channel\n");
392 			goto free;
393 		}
394 		cntx->dbr_cq = _bnxt_re_create_cq(ibvctx, 1, cntx->dbr_ev_chan, vec, 1);
395 		if (!cntx->dbr_cq) {
396 			fprintf(stderr, DEV "Couldn't create CQ\n");
397 			goto free;
398 		}
399 		cntx->db_recovery_page = mmap(NULL, dev->pg_size, PROT_READ |
400 					      PROT_WRITE, MAP_SHARED,
401 					      ibvctx->cmd_fd, BNXT_RE_DB_RECOVERY_PAGE);
402 		if (cntx->db_recovery_page == MAP_FAILED) {
403 			fprintf(stderr, DEV "Couldn't map DB recovery page\n");
404 			goto free;
405 		}
406 		/* Create pthread to handle the doorbell drop events. This thread is
407 		 * not going to handle any signals. Before creation block all the
408 		 * signals, and after creation restore the old signal mask.
409 		 */
410 		sigfillset(&block_sig_set);
411 		pthread_sigmask(SIG_BLOCK, &block_sig_set, &old_sig_set);
412 		ret = pthread_create(&cntx->dbr_thread, NULL, bnxt_re_dbr_thread, cntx);
413 		if (ret) {
414 			fprintf(stderr, DEV "Couldn't create pthread\n");
415 			pthread_sigmask(SIG_SETMASK, &old_sig_set, NULL);
416 			goto free;
417 		}
418 		pthread_sigmask(SIG_SETMASK, &old_sig_set, NULL);
419 		INIT_DBLY_LIST_HEAD(&cntx->qp_dbr_res.head);
420 		pthread_spin_init(&cntx->qp_dbr_res.lock, PTHREAD_PROCESS_PRIVATE);
421 		INIT_DBLY_LIST_HEAD(&cntx->cq_dbr_res.head);
422 		pthread_spin_init(&cntx->cq_dbr_res.lock, PTHREAD_PROCESS_PRIVATE);
423 		INIT_DBLY_LIST_HEAD(&cntx->srq_dbr_res.head);
424 		pthread_spin_init(&cntx->srq_dbr_res.lock, PTHREAD_PROCESS_PRIVATE);
425 	}
426 	return(_bnxt_re_create_cq(ibvctx, ncqe, channel, vec, 0));
427 free:
428 	if (cntx->dbr_ev_chan) {
429 		ret = ibv_destroy_comp_channel(cntx->dbr_ev_chan);
430 		if (ret)
431 			fprintf(stderr, DEV "ibv_destroy_comp_channel error\n");
432 	}
433 
434 	if (cntx->dbr_cq) {
435 		if (cntx->db_recovery_page)
436 			munmap(cntx->db_recovery_page, dev->pg_size);
437 		ret = ibv_destroy_cq(cntx->dbr_cq);
438 		if (ret)
439 			fprintf(stderr, DEV "ibv_destroy_cq error\n");
440 	}
441 	return NULL;
442 }
443 
444 int bnxt_re_poll_kernel_cq(struct bnxt_re_cq *cq)
445 {
446 	struct ibv_wc tmp_wc;
447 	int rc;
448 
449 	rc = ibv_cmd_poll_cq(&cq->ibvcq, 1, &tmp_wc);
450 	if (unlikely(rc))
451 		fprintf(stderr, "ibv_cmd_poll_cq failed: %d\n", rc);
452 	return rc;
453 }
454 
455 #define BNXT_RE_QUEUE_START_PHASE		0x01
456 
457 /*
458  * Function to complete the last steps in CQ resize. Invoke poll function
459  * in the kernel driver; this serves as a signal to the driver to complete CQ
460  * resize steps required. Free memory mapped for the original CQ and switch
461  * over to the memory mapped for CQ with the new size. Finally Ack the Cutoff
462  * CQE. This function must be called under cq->cqq.lock.
463  */
464 void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq)
465 {
466 	struct bnxt_re_context *cntx = to_bnxt_re_context(cq->ibvcq.context);
467 
468 	bnxt_re_poll_kernel_cq(cq);
469 	bnxt_re_free_mem(cq->mem);
470 
471 	cq->mem = cq->resize_mem;
472 	cq->resize_mem = NULL;
473 	/* As an exception no need to call get_ring api we know
474 	 * this is the only consumer
475 	 */
476 	cq->cqq->va = cq->mem->va_head;
477 	/*
478 	 * We don't want to memcpy() the entire cqq structure below; otherwise
479 	 * we'd end up overwriting cq->cqq.lock that is held by the caller.
480 	 * So we copy the members piecemeal. cqq->head, cqq->tail implicitly
481 	 * set to 0 before cutoff_ack DB.
482 	 */
483 	cq->cqq->depth = cq->mem->pad;
484 	cq->cqq->stride = cntx->rdev->cqe_size;
485 	cq->cqq->head = 0;
486 	cq->cqq->tail = 0;
487 	cq->phase = BNXT_RE_QUEUE_START_PHASE;
488 	/* Reset epoch portion of the flags */
489 	cq->cqq->flags &= ~(BNXT_RE_FLAG_EPOCH_TAIL_MASK |
490 			    BNXT_RE_FLAG_EPOCH_HEAD_MASK);
491 	bnxt_re_ring_cq_arm_db(cq, BNXT_RE_QUE_TYPE_CQ_CUT_ACK);
492 }
493 
494 int bnxt_re_resize_cq(struct ibv_cq *ibvcq, int ncqe)
495 {
496 	struct bnxt_re_context *cntx = to_bnxt_re_context(ibvcq->context);
497 	struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvcq->context->device);
498 	struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq);
499 	struct bnxt_re_resize_cq_req req = {};
500 	uint32_t exit_cnt = 20;
501 
502 	struct ibv_resize_cq_resp resp = {};
503 	int rc = 0;
504 
505 	if (ncqe > dev->max_cq_depth)
506 		return -EINVAL;
507 
508 	bnxt_re_dp_spin_lock(&cq->cqq->qlock);
509 	cq->resize_mem = bnxt_re_alloc_cqslab(cntx, ncqe, cq->cqq->depth);
510 	if (unlikely(!cq->resize_mem)) {
511 		rc = -ENOMEM;
512 		goto done;
513 	}
514 	/* As an exception no need to call get_ring api we know
515 	 * this is the only consumer
516 	 */
517 	req.cq_va = (uint64_t)cq->resize_mem->va_head;
518 	rc = ibv_cmd_resize_cq(ibvcq, ncqe, &req.cmd,
519 			       sizeof(req), &resp, sizeof(resp));
520 	if (unlikely(rc)) {
521 		bnxt_re_free_mem(cq->resize_mem);
522 		goto done;
523 	}
524 
525 	while(true) {
526 		struct ibv_wc tmp_wc = {0};
527 		uint32_t resize = 0;
528 		int dqed = 0;
529 
530 		struct bnxt_re_work_compl *compl = NULL;
531 		dqed = bnxt_re_poll_one(cq, 1, &tmp_wc, &resize);
532 		if (resize) {
533 			break;
534 		}
535 		if (dqed) {
536 			compl = calloc(1, sizeof(*compl));
537 			if (unlikely(!compl)) {
538 				fprintf(stderr, "%s: No Memory.. Continue\n", __func__);
539 				break;
540 			}
541 			memcpy(&compl->wc, &tmp_wc, sizeof(tmp_wc));
542 			bnxt_re_list_add_node(&compl->cnode, &cq->prev_cq_head);
543 			compl = NULL;
544 			memset(&tmp_wc, 0, sizeof(tmp_wc));
545 		} else {
546 			exit_cnt--;
547 			if (unlikely(!exit_cnt)) {
548 				rc = -EIO;
549 				break;
550 			} else {
551 				/* wait for 100 milli seconds */
552 				bnxt_re_sub_sec_busy_wait(100 * 1000000);
553 			}
554 		}
555 	}
556 done:
557 	bnxt_re_dp_spin_unlock(&cq->cqq->qlock);
558 	return rc;
559 }
560 
561 static void bnxt_re_destroy_resize_cq_list(struct bnxt_re_cq *cq)
562 {
563 	struct bnxt_re_list_node *cur, *tmp;
564 	struct bnxt_re_work_compl *compl;
565 
566 	if (bnxt_re_list_empty(&cq->prev_cq_head))
567 		return;
568 
569 	list_for_each_node_safe(cur, tmp, &cq->prev_cq_head) {
570 		compl = list_node(cur, struct bnxt_re_work_compl, cnode);
571 		bnxt_re_list_del_node(&compl->cnode, &cq->prev_cq_head);
572 		free(compl);
573 	}
574 
575 }
576 
577 int bnxt_re_destroy_cq(struct ibv_cq *ibvcq)
578 {
579 	struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq);
580 	int status;
581 
582 	if (_is_db_drop_recovery_enable(cq->cntx) &&
583 		ibvcq != cq->cntx->dbr_cq) {
584 		pthread_spin_lock(&cq->cntx->cq_dbr_res.lock);
585 		bnxt_re_list_del_node(&cq->dbnode,
586 				      &cq->cntx->cq_dbr_res.head);
587 		pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock);
588 	}
589 	status = ibv_cmd_destroy_cq(ibvcq);
590 	if (status) {
591 		if (_is_db_drop_recovery_enable(cq->cntx) &&
592 			ibvcq != cq->cntx->dbr_cq) {
593 			pthread_spin_lock(&cq->cntx->cq_dbr_res.lock);
594 			bnxt_re_list_add_node(&cq->dbnode,
595 					      &cq->cntx->cq_dbr_res.head);
596 			pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock);
597 		}
598 		return status;
599 	}
600 	bnxt_re_destroy_resize_cq_list(cq);
601 	bnxt_re_free_mem(cq->mem);
602 	free(cq);
603 	return 0;
604 }
605 
606 static uint8_t bnxt_re_poll_err_scqe(struct bnxt_re_qp *qp,
607 				     struct ibv_wc *ibvwc,
608 				     struct bnxt_re_req_cqe *scqe,
609 				     uint32_t flg_val, int *cnt)
610 {
611 	struct bnxt_re_queue *sq = qp->jsqq->hwque;
612 	struct bnxt_re_wrid *swrid;
613 	struct bnxt_re_cq *scq;
614 	uint8_t status;
615 	uint32_t head;
616 
617 	scq = to_bnxt_re_cq(qp->ibvqp.send_cq);
618 
619 	head = qp->jsqq->last_idx;
620 	swrid = &qp->jsqq->swque[head];
621 
622 	*cnt = 1;
623 	status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) &
624 		  BNXT_RE_BCQE_STATUS_MASK;
625 	ibvwc->status = bnxt_re_req_to_ibv_status[status];
626 	ibvwc->wc_flags = 0;
627 	ibvwc->wr_id = swrid->wrid;
628 	ibvwc->qp_num = qp->qpid;
629 	ibvwc->opcode = swrid->wc_opcd;
630 	ibvwc->byte_len = 0;
631 
632 	bnxt_re_incr_head(sq, swrid->slots);
633 	bnxt_re_jqq_mod_last(qp->jsqq, head);
634 
635 	if (qp->qpst != IBV_QPS_ERR)
636 		qp->qpst = IBV_QPS_ERR;
637 	bnxt_re_list_add_node(&qp->snode, &scq->sfhead);
638 	bnxt_re_trace("%s: qp_num = 0x%x status = %d\n",
639 		      __func__, ibvwc->qp_num, ibvwc->status)
640 
641 	return false;
642 }
643 
644 static uint8_t bnxt_re_poll_success_scqe(struct bnxt_re_qp *qp,
645 				struct ibv_wc *ibvwc,
646 				struct bnxt_re_req_cqe *scqe, int *cnt)
647 {
648 	struct bnxt_re_queue *sq = qp->jsqq->hwque;
649 	struct bnxt_re_wrid *swrid;
650 	uint8_t pcqe = false;
651 	uint32_t cindx, head;
652 
653 	head = qp->jsqq->last_idx;
654 	swrid = &qp->jsqq->swque[head];
655 	cindx = le32toh(scqe->con_indx) % qp->cap.max_swr;
656 
657 	if (!(swrid->sig & IBV_SEND_SIGNALED)) {
658 		*cnt = 0;
659 	 } else {
660 		ibvwc->status = IBV_WC_SUCCESS;
661 		ibvwc->wc_flags = 0;
662 		ibvwc->qp_num = qp->qpid;
663 		ibvwc->wr_id = swrid->wrid;
664 		ibvwc->opcode = swrid->wc_opcd;
665 		if (ibvwc->opcode == IBV_WC_RDMA_READ ||
666 		    ibvwc->opcode == IBV_WC_COMP_SWAP ||
667 		    ibvwc->opcode == IBV_WC_FETCH_ADD)
668 			ibvwc->byte_len = swrid->bytes;
669 		*cnt = 1;
670 	}
671 	bnxt_re_incr_head(sq, swrid->slots);
672 	bnxt_re_jqq_mod_last(qp->jsqq, head);
673 	if (qp->jsqq->last_idx != cindx)
674 		pcqe = true;
675 
676 	return pcqe;
677 }
678 
679 static uint8_t bnxt_re_poll_scqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc,
680 				 void *cqe, uint32_t flg_val, int *cnt)
681 {
682 	uint8_t status, pcqe = false;
683 
684 	status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) &
685 		  BNXT_RE_BCQE_STATUS_MASK;
686 	if (status == BNXT_RE_REQ_ST_OK)
687 		pcqe = bnxt_re_poll_success_scqe(qp, ibvwc, cqe, cnt);
688 	else
689 		pcqe = bnxt_re_poll_err_scqe(qp, ibvwc, cqe, flg_val, cnt);
690 
691 	return pcqe;
692 }
693 
694 static void bnxt_re_release_srqe(struct bnxt_re_srq *srq, int tag)
695 {
696 	bnxt_re_dp_spin_lock(&srq->srqq->qlock);
697 	srq->srwrid[srq->last_idx].next_idx = tag;
698 	srq->last_idx = tag;
699 	srq->srwrid[srq->last_idx].next_idx = -1;
700 	bnxt_re_dp_spin_unlock(&srq->srqq->qlock);
701 }
702 
703 static int bnxt_re_poll_err_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc,
704 				 struct bnxt_re_bcqe *hdr,
705 				 uint32_t flg_val, void *cqe)
706 {
707 	struct bnxt_re_wrid *swque;
708 	struct bnxt_re_queue *rq;
709 	struct bnxt_re_cq *rcq;
710 	uint8_t status, cnt;
711 	uint32_t head = 0;
712 
713 	rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq);
714 
715 	status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) &
716 		  BNXT_RE_BCQE_STATUS_MASK;
717 	/* skip h/w flush errors */
718 	if (status == BNXT_RE_RSP_ST_HW_FLUSH)
719 		return 0;
720 
721 	if (!qp->srq) {
722 		rq = qp->jrqq->hwque;
723 		head = qp->jrqq->last_idx;
724 		swque = &qp->jrqq->swque[head];
725 		ibvwc->wr_id = swque->wrid;
726 		cnt = swque->slots;
727 	} else {
728 		struct bnxt_re_srq *srq;
729 		int tag;
730 
731 		srq = qp->srq;
732 		rq = srq->srqq;
733 		cnt = 1;
734 		tag = le32toh(hdr->qphi_rwrid) & BNXT_RE_BCQE_RWRID_MASK;
735 		ibvwc->wr_id = srq->srwrid[tag].wrid;
736 		bnxt_re_release_srqe(srq, tag);
737 	}
738 
739 	ibvwc->status = bnxt_re_res_to_ibv_status[status];
740 	ibvwc->qp_num = qp->qpid;
741 	ibvwc->opcode = IBV_WC_RECV;
742 	ibvwc->byte_len = 0;
743 	ibvwc->wc_flags = 0;
744 	if (qp->qptyp == IBV_QPT_UD)
745 		ibvwc->src_qp = 0;
746 
747 	if (!qp->srq)
748 		bnxt_re_jqq_mod_last(qp->jrqq, head);
749 	bnxt_re_incr_head(rq, cnt);
750 
751 	if (!qp->srq)
752 		bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead);
753 
754 	bnxt_re_trace("%s: qp_num = 0x%x status = %d\n",
755 		      __func__, ibvwc->qp_num, ibvwc->status)
756 	return 1;
757 }
758 
759 static void bnxt_re_fill_ud_cqe(struct ibv_wc *ibvwc,
760 				struct bnxt_re_bcqe *hdr, void *cqe,
761 				uint8_t flags)
762 {
763 	struct bnxt_re_ud_cqe *ucqe = cqe;
764 	uint32_t qpid;
765 
766 	qpid = ((le32toh(hdr->qphi_rwrid) >> BNXT_RE_BCQE_SRCQP_SHIFT) &
767 		 BNXT_RE_BCQE_SRCQP_SHIFT) << 0x10; /* higher 8 bits of 24 */
768 	qpid |= (le64toh(ucqe->qplo_mac) >> BNXT_RE_UD_CQE_SRCQPLO_SHIFT) &
769 		 BNXT_RE_UD_CQE_SRCQPLO_MASK; /*lower 16 of 24 */
770 	ibvwc->src_qp = qpid;
771 	ibvwc->wc_flags |= IBV_WC_GRH;
772 	ibvwc->sl = (flags & BNXT_RE_UD_FLAGS_IP_VER_MASK) >>
773 		     BNXT_RE_UD_FLAGS_IP_VER_SFT;
774 	/*IB-stack ABI in user do not ask for MAC to be reported. */
775 }
776 
777 static void bnxt_re_poll_success_rcqe(struct bnxt_re_qp *qp,
778 				      struct ibv_wc *ibvwc,
779 				      struct bnxt_re_bcqe *hdr,
780 				      uint32_t flg_val, void *cqe)
781 {
782 	uint8_t flags, is_imm, is_rdma;
783 	struct bnxt_re_rc_cqe *rcqe;
784 	struct bnxt_re_wrid *swque;
785 	struct bnxt_re_queue *rq;
786 	uint32_t head = 0;
787 	uint32_t rcqe_len;
788 	uint8_t cnt;
789 
790 	rcqe = cqe;
791 	if (!qp->srq) {
792 		rq = qp->jrqq->hwque;
793 		head = qp->jrqq->last_idx;
794 		swque = &qp->jrqq->swque[head];
795 		cnt = swque->slots;
796 		ibvwc->wr_id = swque->wrid;
797 	} else {
798 		struct bnxt_re_srq *srq;
799 		int tag;
800 
801 		srq = qp->srq;
802 		rq = srq->srqq;
803 		cnt = 1;
804 		tag = le32toh(hdr->qphi_rwrid) & BNXT_RE_BCQE_RWRID_MASK;
805 		ibvwc->wr_id = srq->srwrid[tag].wrid;
806 		bnxt_re_release_srqe(srq, tag);
807 	}
808 
809 	ibvwc->status = IBV_WC_SUCCESS;
810 	ibvwc->qp_num = qp->qpid;
811 	rcqe_len = le32toh(rcqe->length);
812 	ibvwc->byte_len = (qp->qptyp == IBV_QPT_UD) ?
813 			  rcqe_len & BNXT_RE_UD_CQE_LEN_MASK : rcqe_len;
814 	ibvwc->opcode = IBV_WC_RECV;
815 
816 	flags = (flg_val >> BNXT_RE_BCQE_FLAGS_SHIFT) &
817 		 BNXT_RE_BCQE_FLAGS_MASK;
818 	is_imm = (flags & BNXT_RE_RC_FLAGS_IMM_MASK) >>
819 		     BNXT_RE_RC_FLAGS_IMM_SHIFT;
820 	is_rdma = (flags & BNXT_RE_RC_FLAGS_RDMA_MASK) >>
821 		   BNXT_RE_RC_FLAGS_RDMA_SHIFT;
822 	ibvwc->wc_flags = 0;
823 	if (is_imm) {
824 		ibvwc->wc_flags |= IBV_WC_WITH_IMM;
825 		/* The HW is returning imm_data in little-endian format,
826 		 * swap to Big Endian as expected by application
827 		 */
828 		ibvwc->imm_data = htobe32(le32toh(rcqe->imm_key));
829 		if (is_rdma)
830 			ibvwc->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
831 	}
832 
833 	if (qp->qptyp == IBV_QPT_UD) {
834 		bnxt_re_fill_ud_cqe(ibvwc, hdr, cqe, flags);
835 	}
836 
837 	if (!qp->srq)
838 		bnxt_re_jqq_mod_last(qp->jrqq, head);
839 	bnxt_re_incr_head(rq, cnt);
840 }
841 
842 static uint8_t bnxt_re_poll_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc,
843 				 void *cqe, uint32_t flg_val, int *cnt)
844 {
845 	struct bnxt_re_bcqe *hdr;
846 	uint8_t status, pcqe = false;
847 
848 	hdr = cqe + sizeof(struct bnxt_re_rc_cqe);
849 
850 	status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) &
851 		  BNXT_RE_BCQE_STATUS_MASK;
852 	*cnt = 1;
853 	if (status == BNXT_RE_RSP_ST_OK)
854 		bnxt_re_poll_success_rcqe(qp, ibvwc, hdr, flg_val, cqe);
855 	else
856 		*cnt = bnxt_re_poll_err_rcqe(qp, ibvwc, hdr, flg_val, cqe);
857 
858 	return pcqe;
859 }
860 
861 static void bnxt_re_qp_move_flush_err(struct bnxt_re_qp *qp)
862 {
863 	struct bnxt_re_cq *scq, *rcq;
864 
865 	scq = to_bnxt_re_cq(qp->ibvqp.send_cq);
866 	rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq);
867 
868 	if (qp->qpst != IBV_QPS_ERR)
869 		qp->qpst = IBV_QPS_ERR;
870 	bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead);
871 	bnxt_re_list_add_node(&qp->snode, &scq->sfhead);
872 }
873 
874 /* Always return false */
875 static uint8_t bnxt_re_poll_term_cqe(struct bnxt_re_qp *qp, int *cnt)
876 {
877 	/* For now just add the QP to flush list without
878 	 * considering the index reported in the CQE.
879 	 * Continue reporting flush completions until the
880 	 * SQ and RQ are empty.
881 	 */
882 	*cnt = 0;
883 	if (qp->qpst != IBV_QPS_RESET)
884 		bnxt_re_qp_move_flush_err(qp);
885 
886 	return false;
887 }
888 
889 static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc,
890 			    uint32_t *resize)
891 {
892 	int type, cnt = 0, dqed = 0, hw_polled = 0;
893 	struct bnxt_re_queue *cqq = cq->cqq;
894 	struct bnxt_re_req_cqe *scqe;
895 	struct bnxt_re_ud_cqe *rcqe;
896 	uint64_t *qp_handle = NULL;
897 	struct bnxt_re_bcqe *hdr;
898 	struct bnxt_re_qp *qp;
899 	uint8_t pcqe = false;
900 	uint32_t flg_val;
901 	void *cqe;
902 
903 	while (nwc) {
904 		cqe = cqq->va + cqq->head * bnxt_re_get_cqe_sz();
905 		hdr = cqe + sizeof(struct bnxt_re_req_cqe);
906 		flg_val = le32toh(hdr->flg_st_typ_ph);
907 		if (unlikely(!bnxt_re_is_cqe_valid(flg_val, cq->phase)))
908 			break;
909 		type = (flg_val >> BNXT_RE_BCQE_TYPE_SHIFT) &
910 			BNXT_RE_BCQE_TYPE_MASK;
911 		switch (type) {
912 		case BNXT_RE_WC_TYPE_SEND:
913 			scqe = cqe;
914 			qp_handle = (uint64_t *)&scqe->qp_handle;
915 			qp = (struct bnxt_re_qp *)
916 			     (uintptr_t)le64toh(scqe->qp_handle);
917 			if (!qp)
918 				break; /*stale cqe. should be rung.*/
919 			pcqe = bnxt_re_poll_scqe(qp, wc, cqe, flg_val, &cnt);
920 			break;
921 		case BNXT_RE_WC_TYPE_RECV_RC:
922 		case BNXT_RE_WC_TYPE_RECV_UD:
923 			rcqe = cqe;
924 			qp_handle = (uint64_t *)&rcqe->qp_handle;
925 			qp = (struct bnxt_re_qp *)
926 			     (uintptr_t)le64toh(rcqe->qp_handle);
927 			if (!qp)
928 				break; /*stale cqe. should be rung.*/
929 			pcqe = bnxt_re_poll_rcqe(qp, wc, cqe, flg_val, &cnt);
930 			break;
931 		case BNXT_RE_WC_TYPE_RECV_RAW:
932 			break;
933 		case BNXT_RE_WC_TYPE_TERM:
934 			scqe = cqe;
935 			qp_handle = (uint64_t *)&scqe->qp_handle;
936 			qp = (struct bnxt_re_qp *)
937 			     (uintptr_t)le64toh(scqe->qp_handle);
938 			if (!qp)
939 				break;
940 			pcqe = bnxt_re_poll_term_cqe(qp, &cnt);
941 			break;
942 		case BNXT_RE_WC_TYPE_COFF:
943 			/* Stop further processing and return */
944 			bnxt_re_resize_cq_complete(cq);
945 			if (unlikely(resize))
946 				*resize = 1;
947 			return dqed;
948 		default:
949 			break;
950 		};
951 
952 		if (pcqe)
953 			goto skipp_real;
954 
955 		hw_polled++;
956 		if (qp_handle) {
957 			*qp_handle = 0x0ULL; /* mark cqe as read */
958 			qp_handle = NULL;
959 		}
960 		bnxt_re_incr_head(cq->cqq, 1);
961 		bnxt_re_change_cq_phase(cq);
962 skipp_real:
963 		if (cnt) {
964 			cnt = 0;
965 			dqed++;
966 			nwc--;
967 			wc++;
968 		}
969 	}
970 
971 	if (likely(hw_polled))
972 		bnxt_re_ring_cq_db(cq);
973 
974 	return dqed;
975 }
976 
977 static int bnxt_re_poll_flush_wcs(struct bnxt_re_joint_queue *jqq,
978 				  struct ibv_wc *ibvwc, uint32_t qpid,
979 				  int nwc)
980 {
981 	struct bnxt_re_queue *que;
982 	struct bnxt_re_wrid *wrid;
983 	uint32_t cnt = 0;
984 
985 	que = jqq->hwque;
986 	while(nwc) {
987 		if (bnxt_re_is_que_empty(que))
988 			break;
989 		wrid = &jqq->swque[jqq->last_idx];
990 		ibvwc->status = IBV_WC_WR_FLUSH_ERR;
991 		ibvwc->opcode = wrid->wc_opcd;
992 		ibvwc->wr_id = wrid->wrid;
993 		ibvwc->qp_num = qpid;
994 		ibvwc->byte_len = 0;
995 		ibvwc->wc_flags = 0;
996 
997 		bnxt_re_jqq_mod_last(jqq, jqq->last_idx);
998 		bnxt_re_incr_head(que, wrid->slots);
999 		nwc--;
1000 		cnt++;
1001 		ibvwc++;
1002 	}
1003 
1004 	return cnt;
1005 }
1006 
1007 static int bnxt_re_poll_flush_wqes(struct bnxt_re_cq *cq,
1008 				   struct bnxt_re_list_head *lhead,
1009 				   struct ibv_wc *ibvwc,
1010 				   uint32_t nwc)
1011 {
1012 	struct bnxt_re_list_node *cur, *tmp;
1013 	struct bnxt_re_joint_queue *jqq;
1014 	struct bnxt_re_qp *qp;
1015 	bool sq_list = false;
1016 	uint32_t polled = 0;
1017 
1018 	sq_list = (lhead == &cq->sfhead) ? true : false;
1019 	if (!bnxt_re_list_empty(lhead)) {
1020 		list_for_each_node_safe(cur, tmp, lhead) {
1021 			if (sq_list) {
1022 				qp = list_node(cur, struct bnxt_re_qp, snode);
1023 				jqq = qp->jsqq;
1024 			} else {
1025 				qp = list_node(cur, struct bnxt_re_qp, rnode);
1026 				jqq = qp->jrqq;
1027 				if (!jqq) /* Using srq no need to flush */
1028 					goto done;
1029 			}
1030 
1031 			if (bnxt_re_is_que_empty(jqq->hwque))
1032 				continue;
1033 			polled += bnxt_re_poll_flush_wcs(jqq, ibvwc + polled,
1034 							 qp->qpid, nwc - polled);
1035 			if (!(nwc - polled))
1036 				break;
1037 		}
1038 	}
1039 done:
1040 	return polled;
1041 }
1042 
1043 static int bnxt_re_poll_flush_lists(struct bnxt_re_cq *cq, uint32_t nwc,
1044 				    struct ibv_wc *ibvwc)
1045 {
1046 	int left, polled = 0;
1047 
1048 	polled  = bnxt_re_poll_flush_wqes(cq, &cq->sfhead, ibvwc, nwc);
1049 	left = nwc - polled;
1050 
1051 	if (!left)
1052 		return polled;
1053 
1054 	polled  += bnxt_re_poll_flush_wqes(cq, &cq->rfhead,
1055 					   ibvwc + polled, left);
1056 	return polled;
1057 }
1058 
1059 static int bnxt_re_poll_resize_cq_list(struct bnxt_re_cq *cq, uint32_t nwc,
1060 				       struct ibv_wc *ibvwc)
1061 {
1062 	struct bnxt_re_list_node *cur, *tmp;
1063 	struct bnxt_re_work_compl *compl;
1064 	int left;
1065 
1066 	left = nwc;
1067 	list_for_each_node_safe(cur, tmp, &cq->prev_cq_head) {
1068 		compl = list_node(cur, struct bnxt_re_work_compl, cnode);
1069 		if (!left)
1070 			break;
1071 		memcpy(ibvwc, &compl->wc, sizeof(*ibvwc));
1072 		ibvwc++;
1073 		left--;
1074 		bnxt_re_list_del_node(&compl->cnode, &cq->prev_cq_head);
1075 		free(compl);
1076 	}
1077 
1078 	return nwc - left;
1079 }
1080 
1081 
1082 int bnxt_re_poll_cq(struct ibv_cq *ibvcq, int nwc, struct ibv_wc *wc)
1083 {
1084 	int dqed = 0, left = 0;
1085 	struct bnxt_re_cq *cq;
1086 	uint32_t resize = 0;
1087 
1088 	cq = container_of(ibvcq, struct bnxt_re_cq, ibvcq);
1089 	bnxt_re_dp_spin_lock(&cq->cqq->qlock);
1090 
1091 	left = nwc;
1092 	/* Check  whether we have anything to be completed from prev cq context */
1093 	if (unlikely(!bnxt_re_list_empty(&cq->prev_cq_head))) {
1094 		dqed = bnxt_re_poll_resize_cq_list(cq, nwc, wc);
1095 		left = nwc - dqed;
1096 		if (!left) {
1097 			bnxt_re_dp_spin_unlock(&cq->cqq->qlock);
1098 			return dqed;
1099 		}
1100 	}
1101 
1102 	dqed += bnxt_re_poll_one(cq, left, wc + dqed, &resize);
1103 	/* Check if anything is there to flush. */
1104 	left = nwc - dqed;
1105 	if (left && (!bnxt_re_list_empty(&cq->sfhead) ||
1106 		     !bnxt_re_list_empty(&cq->rfhead)))
1107 		dqed += bnxt_re_poll_flush_lists(cq, left, (wc + dqed));
1108 	bnxt_re_dp_spin_unlock(&cq->cqq->qlock);
1109 
1110 	return dqed;
1111 }
1112 
1113 void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq)
1114 {
1115 	struct bnxt_re_queue *que = cq->cqq;
1116 	struct bnxt_re_req_cqe *scqe;
1117 	struct bnxt_re_rc_cqe *rcqe;
1118 	struct bnxt_re_bcqe *hdr;
1119 	int indx, type;
1120 	void *cqe;
1121 
1122 
1123 	bnxt_re_dp_spin_lock(&que->qlock);
1124 	for(indx = 0; indx < que->depth; indx++) {
1125 		cqe = que->va + indx * bnxt_re_get_cqe_sz();
1126 		hdr = cqe + sizeof(struct bnxt_re_req_cqe);
1127 		type = (hdr->flg_st_typ_ph >> BNXT_RE_BCQE_TYPE_SHIFT) &
1128 			BNXT_RE_BCQE_TYPE_MASK;
1129 
1130 		if (type == BNXT_RE_WC_TYPE_COFF)
1131 			continue;
1132 		if (type == BNXT_RE_WC_TYPE_SEND ||
1133 		    type == BNXT_RE_WC_TYPE_TERM) {
1134 			scqe = cqe;
1135 			if (scqe->qp_handle == (uint64_t)qp)
1136 				scqe->qp_handle = 0ULL;
1137 		} else {
1138 			rcqe = cqe;
1139 			if (rcqe->qp_handle == (uint64_t)qp)
1140 				rcqe->qp_handle = 0ULL;
1141 		}
1142 
1143 	}
1144 
1145 	if (_is_db_drop_recovery_enable(cq->cntx)) {
1146 		pthread_spin_lock(&cq->cntx->cq_dbr_res.lock);
1147 		bnxt_re_list_del_node(&cq->dbnode, &cq->cntx->cq_dbr_res.head);
1148 		pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock);
1149 	}
1150 	bnxt_re_list_del_node(&qp->snode, &cq->sfhead);
1151 	bnxt_re_list_del_node(&qp->rnode, &cq->rfhead);
1152 	bnxt_re_dp_spin_unlock(&que->qlock);
1153 }
1154 
1155 void bnxt_re_cq_event(struct ibv_cq *ibvcq)
1156 {
1157 
1158 }
1159 
1160 int bnxt_re_arm_cq(struct ibv_cq *ibvcq, int flags)
1161 {
1162 	struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq);
1163 
1164 	bnxt_re_dp_spin_lock(&cq->cqq->qlock);
1165 	flags = !flags ? BNXT_RE_QUE_TYPE_CQ_ARMALL :
1166 			 BNXT_RE_QUE_TYPE_CQ_ARMSE;
1167 
1168 	bnxt_re_ring_cq_arm_db(cq, flags);
1169 	bnxt_re_dp_spin_unlock(&cq->cqq->qlock);
1170 
1171 	return 0;
1172 }
1173 
1174 static int bnxt_re_check_qp_limits(struct bnxt_re_context *cntx,
1175 				   struct ibv_qp_init_attr *attr)
1176 {
1177 	struct ibv_device_attr *devattr;
1178 	struct bnxt_re_dev *rdev;
1179 
1180 	rdev = cntx->rdev;
1181 	devattr = &rdev->devattr;
1182 	if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD)
1183 		return EINVAL;
1184 	if (attr->cap.max_send_sge > devattr->max_sge)
1185 		return EINVAL;
1186 	if (attr->cap.max_recv_sge > devattr->max_sge)
1187 		return EINVAL;
1188 	if (cntx->modes & BNXT_RE_WQE_MODE_VARIABLE) {
1189 		if (attr->cap.max_inline_data > BNXT_RE_MAX_INLINE_SIZE_VAR_WQE)
1190 			return -EINVAL;
1191 	} else if (attr->cap.max_inline_data > BNXT_RE_MAX_INLINE_SIZE) {
1192 		return EINVAL;
1193 	}
1194 	if (attr->cap.max_send_wr > devattr->max_qp_wr)
1195 		attr->cap.max_send_wr = devattr->max_qp_wr;
1196 	if (attr->cap.max_recv_wr > devattr->max_qp_wr)
1197 		attr->cap.max_recv_wr = devattr->max_qp_wr;
1198 
1199 	return 0;
1200 }
1201 
1202 static int bnxt_re_get_rq_slots(struct bnxt_re_dev *rdev, uint8_t qpmode,
1203 				uint32_t nrwr, uint32_t nsge, uint32_t *esz)
1204 {
1205 	uint32_t max_wqesz;
1206 	uint32_t wqe_size;
1207 	uint32_t stride;
1208 	uint32_t slots;
1209 
1210 	stride = sizeof(struct bnxt_re_sge);
1211 	max_wqesz = bnxt_re_calc_wqe_sz(rdev->devattr.max_sge);
1212 
1213 	wqe_size = bnxt_re_calc_wqe_sz(nsge);
1214 	if (wqe_size > max_wqesz)
1215 		return -EINVAL;
1216 
1217 	if (qpmode == BNXT_RE_WQE_MODE_STATIC)
1218 		wqe_size = bnxt_re_calc_wqe_sz(6);
1219 
1220 	if (esz)
1221 		*esz = wqe_size;
1222 
1223 	slots = (nrwr * wqe_size) / stride;
1224 	return slots;
1225 }
1226 
1227 static int bnxt_re_get_sq_slots(struct bnxt_re_dev *rdev,
1228 				uint8_t qpmode, uint32_t nswr,
1229 				uint32_t nsge, uint32_t ils, uint32_t *esize)
1230 {
1231 	uint32_t max_wqesz;
1232 	uint32_t wqe_size;
1233 	uint32_t cal_ils;
1234 	uint32_t stride;
1235 	uint32_t ilsize;
1236 	uint32_t hdr_sz;
1237 	uint32_t slots;
1238 
1239 	hdr_sz = bnxt_re_get_sqe_hdr_sz();
1240 	stride = sizeof(struct bnxt_re_sge);
1241 	max_wqesz = bnxt_re_calc_wqe_sz(rdev->devattr.max_sge);
1242 	ilsize = get_aligned(ils, hdr_sz);
1243 
1244 	wqe_size = bnxt_re_calc_wqe_sz(nsge);
1245 	if (ilsize) {
1246 		cal_ils = hdr_sz + ilsize;
1247 		wqe_size = MAX(cal_ils, wqe_size);
1248 		wqe_size = get_aligned(wqe_size, hdr_sz);
1249 	}
1250 	if (wqe_size > max_wqesz)
1251 		return -EINVAL;
1252 
1253 	if (qpmode == BNXT_RE_WQE_MODE_STATIC)
1254 		wqe_size = bnxt_re_calc_wqe_sz(6);
1255 
1256 	if (esize)
1257 		*esize = wqe_size;
1258 	slots = (nswr * wqe_size) / stride;
1259 	return slots;
1260 }
1261 
1262 static int bnxt_re_get_sqmem_size(struct bnxt_re_context *cntx,
1263 				  struct ibv_qp_init_attr *attr,
1264 				  struct bnxt_re_qattr *qattr)
1265 {
1266 	uint32_t nsge, nswr, diff = 0;
1267 	size_t bytes = 0;
1268 	uint32_t npsn;
1269 	uint32_t ils;
1270 	uint8_t mode;
1271 	uint32_t esz;
1272 	int nslots;
1273 
1274 	mode = cntx->modes & BNXT_RE_WQE_MODE_VARIABLE;
1275 	nsge = attr->cap.max_send_sge;
1276 	diff = bnxt_re_get_diff(cntx->comp_mask);
1277 	nswr = attr->cap.max_send_wr + 1 + diff;
1278 	nswr = bnxt_re_init_depth(nswr, cntx->comp_mask);
1279 	ils = attr->cap.max_inline_data;
1280 	nslots = bnxt_re_get_sq_slots(cntx->rdev, mode, nswr,
1281 				      nsge, ils, &esz);
1282 	if (nslots < 0)
1283 		return nslots;
1284 	npsn = bnxt_re_get_npsn(mode, nswr, nslots);
1285 	if (BNXT_RE_HW_RETX(cntx))
1286 		npsn = roundup_pow_of_two(npsn);
1287 
1288 	qattr->nwr = nswr;
1289 	qattr->slots = nslots;
1290 	qattr->esize = esz;
1291 
1292 	bytes = nslots * sizeof(struct bnxt_re_sge); /* ring */
1293 	bytes += npsn * bnxt_re_get_psne_size(cntx); /* psn */
1294 	qattr->sz_ring = get_aligned(bytes, cntx->rdev->pg_size);
1295 	qattr->sz_shad = nswr * sizeof(struct bnxt_re_wrid); /* shadow */
1296 	return 0;
1297 }
1298 
1299 static int bnxt_re_get_rqmem_size(struct bnxt_re_context *cntx,
1300 				  struct ibv_qp_init_attr *attr,
1301 				  struct bnxt_re_qattr *qattr)
1302 {
1303 	uint32_t nrwr, nsge;
1304 	size_t bytes = 0;
1305 	uint32_t esz;
1306 	int nslots;
1307 
1308 	nsge = attr->cap.max_recv_sge;
1309 	nrwr = attr->cap.max_recv_wr + 1;
1310 	nrwr = bnxt_re_init_depth(nrwr, cntx->comp_mask);
1311 	nslots = bnxt_re_get_rq_slots(cntx->rdev, cntx->modes,
1312 				      nrwr, nsge, &esz);
1313 	if (nslots < 0)
1314 		return nslots;
1315 	qattr->nwr = nrwr;
1316 	qattr->slots = nslots;
1317 	qattr->esize = esz;
1318 
1319 	bytes = nslots * sizeof(struct bnxt_re_sge);
1320 	qattr->sz_ring = get_aligned(bytes, cntx->rdev->pg_size);
1321 	qattr->sz_shad = nrwr * sizeof(struct bnxt_re_wrid);
1322 	return 0;
1323 }
1324 
1325 static int bnxt_re_get_qpmem_size(struct bnxt_re_context *cntx,
1326 				  struct ibv_qp_init_attr *attr,
1327 				  struct bnxt_re_qattr *qattr)
1328 {
1329 	int size = 0;
1330 	int tmp;
1331 	int rc;
1332 
1333 	size = sizeof(struct bnxt_re_qp);
1334 	tmp = sizeof(struct bnxt_re_joint_queue);
1335 	tmp += sizeof(struct bnxt_re_queue);
1336 	size += tmp;
1337 
1338 	rc = bnxt_re_get_sqmem_size(cntx, attr, &qattr[BNXT_RE_QATTR_SQ_INDX]);
1339 	if (rc < 0)
1340 		return -EINVAL;
1341 	size += qattr[BNXT_RE_QATTR_SQ_INDX].sz_ring;
1342 	size += qattr[BNXT_RE_QATTR_SQ_INDX].sz_shad;
1343 
1344 	if (!attr->srq) {
1345 		tmp = sizeof(struct bnxt_re_joint_queue);
1346 		tmp += sizeof(struct bnxt_re_queue);
1347 		size += tmp;
1348 		rc = bnxt_re_get_rqmem_size(cntx, attr,
1349 					    &qattr[BNXT_RE_QATTR_RQ_INDX]);
1350 		if (rc < 0)
1351 			return -EINVAL;
1352 		size += qattr[BNXT_RE_QATTR_RQ_INDX].sz_ring;
1353 		size += qattr[BNXT_RE_QATTR_RQ_INDX].sz_shad;
1354 	}
1355 	return size;
1356 }
1357 
1358 static void *bnxt_re_alloc_qpslab(struct bnxt_re_context *cntx,
1359 				  struct ibv_qp_init_attr *attr,
1360 				  struct bnxt_re_qattr *qattr)
1361 {
1362 	int bytes;
1363 
1364 	bytes = bnxt_re_get_qpmem_size(cntx, attr, qattr);
1365 	if (bytes < 0)
1366 		return NULL;
1367 	return bnxt_re_alloc_mem(bytes, cntx->rdev->pg_size);
1368 }
1369 
1370 static int bnxt_re_alloc_queue_ptr(struct bnxt_re_qp *qp,
1371 				   struct ibv_qp_init_attr *attr)
1372 {
1373 	int rc = -ENOMEM;
1374 	int jqsz, qsz;
1375 
1376 	jqsz = sizeof(struct bnxt_re_joint_queue);
1377 	qsz = sizeof(struct bnxt_re_queue);
1378 	qp->jsqq = bnxt_re_get_obj(qp->mem, jqsz);
1379 	if (!qp->jsqq)
1380 		return rc;
1381 	qp->jsqq->hwque = bnxt_re_get_obj(qp->mem, qsz);
1382 	if (!qp->jsqq->hwque)
1383 		goto fail;
1384 
1385 	if (!attr->srq) {
1386 		qp->jrqq = bnxt_re_get_obj(qp->mem, jqsz);
1387 		if (!qp->jrqq)
1388 			goto fail;
1389 		qp->jrqq->hwque = bnxt_re_get_obj(qp->mem, qsz);
1390 		if (!qp->jrqq->hwque)
1391 			goto fail;
1392 	}
1393 
1394 	return 0;
1395 fail:
1396 	return rc;
1397 }
1398 
1399 static int bnxt_re_alloc_init_swque(struct bnxt_re_joint_queue *jqq,
1400 				    struct bnxt_re_mem *mem,
1401 				    struct bnxt_re_qattr *qattr)
1402 {
1403 	int indx;
1404 
1405 	jqq->swque = bnxt_re_get_obj(mem, qattr->sz_shad);
1406 	if (!jqq->swque)
1407 		return -ENOMEM;
1408 	jqq->start_idx = 0;
1409 	jqq->last_idx = qattr->nwr - 1;
1410 	for (indx = 0; indx < qattr->nwr; indx++)
1411 		jqq->swque[indx].next_idx = indx + 1;
1412 	jqq->swque[jqq->last_idx].next_idx = 0;
1413 	jqq->last_idx = 0;
1414 
1415 	return 0;
1416 }
1417 
1418 static inline int bnxt_log2(int n)
1419 {
1420 	int t;
1421 
1422 	if (n <= 0)
1423 		return -1;
1424 
1425 	t = 0;
1426 	while ((1 << t) < n)
1427 		++t;
1428 
1429 	return t;
1430 }
1431 
1432 static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp,
1433 				struct ibv_qp_init_attr *attr,
1434 				struct bnxt_re_qattr *qattr)
1435 {
1436 	struct bnxt_re_context *cntx;
1437 	struct bnxt_re_queue *que;
1438 	uint32_t psn_size;
1439 	uint8_t indx;
1440 	int ret;
1441 
1442 	cntx = qp->cntx;
1443 
1444 	indx = BNXT_RE_QATTR_SQ_INDX;
1445 	que = qp->jsqq->hwque;
1446 	que->stride = sizeof(struct bnxt_re_sge);
1447 	que->depth = qattr[indx].slots;
1448 	que->diff = (bnxt_re_get_diff(cntx->comp_mask) * qattr[indx].esize) /
1449 		     que->stride;
1450 	que->va = bnxt_re_get_ring(qp->mem, qattr[indx].sz_ring);
1451 	if (!que->va)
1452 		return -ENOMEM;
1453 	/* PSN-search memory is allocated without checking for
1454 	 * QP-Type. Kernel driver do not map this memory if it
1455 	 * is UD-qp. UD-qp use this memory to maintain WC-opcode.
1456 	 * See definition of bnxt_re_fill_psns() for the use case.
1457 	 */
1458 	que->pad = (que->va + que->depth * que->stride);
1459 	psn_size = bnxt_re_get_psne_size(qp->cntx);
1460 	que->pad_stride_log2 = (uint32_t)bnxt_log2((double)psn_size);
1461 
1462 	ret = bnxt_re_alloc_init_swque(qp->jsqq, qp->mem, &qattr[indx]);
1463 	if (ret)
1464 		goto fail;
1465 
1466 	qp->cap.max_swr = qattr[indx].nwr;
1467 	qp->jsqq->cntx = qp->cntx;
1468 	que->dbtail = (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE) ?
1469 		       &que->tail : &qp->jsqq->start_idx;
1470 
1471 	/* Init and adjust MSN table size according to qp mode */
1472 	if (!BNXT_RE_HW_RETX(qp->cntx))
1473 		goto skip_msn;
1474 	que->msn = 0;
1475 	que->msn_tbl_sz = 0;
1476 	if (qp->qpmode & BNXT_RE_WQE_MODE_VARIABLE)
1477 		que->msn_tbl_sz = roundup_pow_of_two(qattr->slots) / 2;
1478 	else
1479 		que->msn_tbl_sz = roundup_pow_of_two(qattr->nwr);
1480 skip_msn:
1481 	bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded);
1482 
1483 	if (qp->jrqq) {
1484 		indx = BNXT_RE_QATTR_RQ_INDX;
1485 		que = qp->jrqq->hwque;
1486 		que->stride = sizeof(struct bnxt_re_sge);
1487 		que->depth = qattr[indx].slots;
1488 		que->max_slots = qattr[indx].esize / que->stride;
1489 		que->dbtail = &qp->jrqq->start_idx;
1490 		que->va = bnxt_re_get_ring(qp->mem, qattr[indx].sz_ring);
1491 		if (!que->va)
1492 			return -ENOMEM;
1493 		/* For RQ only bnxt_re_wri.wrid is used. */
1494 		ret = bnxt_re_alloc_init_swque(qp->jrqq, qp->mem, &qattr[indx]);
1495 		if (ret)
1496 			goto fail;
1497 
1498 		bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded);
1499 		qp->cap.max_rwr = qattr[indx].nwr;
1500 		qp->jrqq->cntx = qp->cntx;
1501 	}
1502 
1503 	return 0;
1504 fail:
1505 	return ret;
1506 }
1507 
1508 void bnxt_re_async_event(struct ibv_async_event *event)
1509 {
1510 	struct ibv_qp *ibvqp;
1511 	struct bnxt_re_qp *qp;
1512 
1513 	switch (event->event_type) {
1514 	case IBV_EVENT_CQ_ERR:
1515 		break;
1516 	case IBV_EVENT_SRQ_ERR:
1517 	case IBV_EVENT_QP_FATAL:
1518 	case IBV_EVENT_QP_REQ_ERR:
1519 	case IBV_EVENT_QP_ACCESS_ERR:
1520 	case IBV_EVENT_PATH_MIG_ERR: {
1521 		ibvqp = event->element.qp;
1522 		qp = to_bnxt_re_qp(ibvqp);
1523 		bnxt_re_qp_move_flush_err(qp);
1524 		break;
1525 	}
1526 	case IBV_EVENT_SQ_DRAINED:
1527 	case IBV_EVENT_PATH_MIG:
1528 	case IBV_EVENT_COMM_EST:
1529 	case IBV_EVENT_QP_LAST_WQE_REACHED:
1530 	case IBV_EVENT_SRQ_LIMIT_REACHED:
1531 	case IBV_EVENT_PORT_ACTIVE:
1532 	case IBV_EVENT_PORT_ERR:
1533 	default:
1534 		break;
1535 	}
1536 }
1537 
1538 struct ibv_qp *bnxt_re_create_qp(struct ibv_pd *ibvpd,
1539 				 struct ibv_qp_init_attr *attr)
1540 {
1541 	struct bnxt_re_context *cntx = to_bnxt_re_context(ibvpd->context);
1542 	struct bnxt_re_qp_resp resp = {};
1543 	struct ibv_device_attr *devattr;
1544 	struct bnxt_re_qp_req req = {};
1545 	struct bnxt_re_qattr qattr[2];
1546 	struct bnxt_re_qpcap *cap;
1547 	struct bnxt_re_dev *rdev;
1548 	struct bnxt_re_qp *qp;
1549 	void *mem;
1550 
1551 	if (bnxt_re_check_qp_limits(cntx, attr))
1552 		return NULL;
1553 
1554 	memset(qattr, 0, (2 * sizeof(*qattr)));
1555 	mem = bnxt_re_alloc_qpslab(cntx, attr, qattr);
1556 	if (!mem)
1557 		return NULL;
1558 	qp = bnxt_re_get_obj(mem, sizeof(*qp));
1559 	if (!qp)
1560 		goto fail;
1561 	qp->mem = mem;
1562 
1563 	qp->cctx = cntx->cctx;
1564 
1565 	qp->cntx = cntx;
1566 	qp->qpmode = cntx->modes & BNXT_RE_WQE_MODE_VARIABLE;
1567 	/* alloc queue pointers */
1568 	if (bnxt_re_alloc_queue_ptr(qp, attr))
1569 		goto fail;
1570 	/* alloc queues */
1571 	if (bnxt_re_alloc_queues(qp, attr, qattr))
1572 		goto fail;
1573 	/* Fill ibv_cmd */
1574 	cap = &qp->cap;
1575 	req.qpsva = (uint64_t)qp->jsqq->hwque->va;
1576 	req.qprva = qp->jrqq ? (uint64_t)qp->jrqq->hwque->va : 0;
1577 	req.qp_handle = (uint64_t)qp;
1578 
1579 	if (ibv_cmd_create_qp(ibvpd, &qp->ibvqp, attr, &req.cmd, sizeof(req),
1580 			      &resp.resp, sizeof(resp)))
1581 		goto fail;
1582 
1583 	qp->qpid = resp.qpid;
1584 	qp->qptyp = attr->qp_type;
1585 	qp->qpst = IBV_QPS_RESET;
1586 	qp->scq = to_bnxt_re_cq(attr->send_cq);
1587 	qp->rcq = to_bnxt_re_cq(attr->recv_cq);
1588 	if (attr->srq)
1589 		qp->srq = to_bnxt_re_srq(attr->srq);
1590 	qp->udpi = &cntx->udpi;
1591 	qp->rand.seed = qp->qpid;
1592 	qp->sq_shadow_db_key = BNXT_RE_DB_KEY_INVALID;
1593 	qp->rq_shadow_db_key = BNXT_RE_DB_KEY_INVALID;
1594 	qp->sq_msn = 0;
1595 
1596 	rdev = cntx->rdev;
1597 	devattr = &rdev->devattr;
1598 	cap->max_ssge = attr->cap.max_send_sge;
1599 	cap->max_rsge = attr->cap.max_recv_sge;
1600 	cap->max_inline = attr->cap.max_inline_data;
1601 	cap->sqsig = attr->sq_sig_all;
1602 	cap->is_atomic_cap = devattr->atomic_cap;
1603 	INIT_DBLY_LIST_NODE(&qp->snode);
1604 	INIT_DBLY_LIST_NODE(&qp->rnode);
1605 	INIT_DBLY_LIST_NODE(&qp->dbnode);
1606 
1607 	/* For SR2, push will be negotiated at modify qp */
1608 	if (_is_chip_gen_p5(qp->cctx) && cntx->udpi.wcdpi) {
1609 		qp->push_st_en = 1;
1610 		qp->max_push_sz = BNXT_RE_MAX_INLINE_SIZE;
1611 	}
1612 
1613 	if (_is_db_drop_recovery_enable(cntx)) {
1614 		pthread_spin_lock(&cntx->qp_dbr_res.lock);
1615 		bnxt_re_list_add_node(&qp->dbnode, &cntx->qp_dbr_res.head);
1616 		pthread_spin_unlock(&cntx->qp_dbr_res.lock);
1617 	}
1618 	return &qp->ibvqp;
1619 fail:
1620 	bnxt_re_free_mem(mem);
1621 	return NULL;
1622 }
1623 
1624 int bnxt_re_modify_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr,
1625 		      int attr_mask)
1626 {
1627 	struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
1628 	int rc;
1629 
1630 	struct bnxt_re_modify_ex_resp resp = {};
1631 	struct bnxt_re_modify_ex_req req = {};
1632 	bool can_issue_mqp_ex = false;
1633 
1634 	if (bnxt_re_is_mqp_ex_supported(qp->cntx)) {
1635 		can_issue_mqp_ex = true;
1636 		/* Request for PPP */
1637 		if (can_request_ppp(qp, attr, attr_mask)) {
1638 			req.comp_mask |= BNXT_RE_MQP_PPP_REQ_EN;
1639 			req.dpi = qp->udpi->wcdpi;
1640 		}
1641 		if (attr_mask & IBV_QP_PATH_MTU)
1642 			req.comp_mask |= BNXT_RE_MQP_PATH_MTU_MASK;
1643 	}
1644 	rc = ibv_cmd_modify_qp_compat(ibvqp, attr, attr_mask,
1645 				      can_issue_mqp_ex, &req, &resp);
1646 	if (!rc) {
1647 		if (attr_mask & IBV_QP_STATE) {
1648 			qp->qpst = attr->qp_state;
1649 			/* transition to reset */
1650 			if (qp->qpst == IBV_QPS_RESET) {
1651 				qp->jsqq->hwque->head = 0;
1652 				qp->jsqq->hwque->tail = 0;
1653 				*qp->jsqq->hwque->dbtail = 0;
1654 				qp->jsqq->start_idx = 0;
1655 				qp->jsqq->last_idx = 0;
1656 				bnxt_re_cleanup_cq(qp, qp->scq);
1657 				if (qp->jrqq) {
1658 					qp->jrqq->hwque->head = 0;
1659 					qp->jrqq->hwque->tail = 0;
1660 					*qp->jrqq->hwque->dbtail = 0;
1661 					qp->jrqq->start_idx = 0;
1662 					qp->jrqq->last_idx = 0;
1663 					bnxt_re_cleanup_cq(qp, qp->rcq);
1664 				}
1665 			}
1666 			/* Copy if PUSH was enabled */
1667 			if (resp.comp_mask & BNXT_RE_MQP_PPP_REQ_EN_MASK) {
1668 				qp->push_st_en = BNXT_RE_MQP_PPP_REQ_EN;
1669 				/* Set the next posting state
1670 				 * based on current h/w state
1671 				 */
1672 				qp->push_st_en |=
1673 					!(!!(resp.ppp_st_idx &
1674 					     BNXT_RE_MQP_PPP_STATE)) <<
1675 					 BNXT_RE_PPP_ST_SHIFT;
1676 				qp->ppp_idx =
1677 					(resp.ppp_st_idx &
1678 					 BNXT_RE_MQP_PPP_IDX_MASK);
1679 				if (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE)
1680 					qp->max_push_sz =
1681 						BNXT_RE_MAX_PUSH_SIZE_VAR_WQE;
1682 				else
1683 					qp->max_push_sz =
1684 						BNXT_RE_MAX_INLINE_SIZE;
1685 			}
1686 		}
1687 
1688 		if (attr_mask & IBV_QP_SQ_PSN)
1689 			qp->sq_psn = attr->sq_psn;
1690 
1691 		if (resp.comp_mask & BNXT_RE_MQP_PATH_MTU_MASK)
1692 			qp->mtu = resp.path_mtu;
1693 		else if (attr_mask & IBV_QP_PATH_MTU)
1694 			qp->mtu = (0x80 << attr->path_mtu);
1695 	}
1696 
1697 	return rc;
1698 }
1699 
1700 int bnxt_re_query_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr,
1701 		     int attr_mask, struct ibv_qp_init_attr *init_attr)
1702 {
1703 	struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
1704 	struct ibv_query_qp cmd = {};
1705 	int rc;
1706 
1707 	rc = ibv_cmd_query_qp(ibvqp, attr, attr_mask, init_attr,
1708 			      &cmd, sizeof(cmd));
1709 	if (!rc)
1710 		qp->qpst = ibvqp->state;
1711 
1712 	return rc;
1713 }
1714 
1715 int bnxt_re_destroy_qp(struct ibv_qp *ibvqp)
1716 {
1717 	struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
1718 	struct bnxt_re_mem *mem;
1719 	int status;
1720 
1721 	qp->qpst = IBV_QPS_RESET;
1722 	if (_is_db_drop_recovery_enable(qp->cntx)) {
1723 		pthread_spin_lock(&qp->cntx->qp_dbr_res.lock);
1724 		bnxt_re_list_del_node(&qp->dbnode, &qp->cntx->qp_dbr_res.head);
1725 		pthread_spin_unlock(&qp->cntx->qp_dbr_res.lock);
1726 	}
1727 	status = ibv_cmd_destroy_qp(ibvqp);
1728 	if (status) {
1729 		if (_is_db_drop_recovery_enable(qp->cntx)) {
1730 			pthread_spin_lock(&qp->cntx->qp_dbr_res.lock);
1731 			bnxt_re_list_add_node(&qp->dbnode,
1732 					      &qp->cntx->qp_dbr_res.head);
1733 			pthread_spin_unlock(&qp->cntx->qp_dbr_res.lock);
1734 		}
1735 		return status;
1736 	}
1737 	bnxt_re_cleanup_cq(qp, qp->rcq);
1738 	bnxt_re_cleanup_cq(qp, qp->scq);
1739 	mem = qp->mem;
1740 	bnxt_re_free_mem(mem);
1741 	return 0;
1742 }
1743 
1744 static void bnxt_re_put_rx_sge(struct bnxt_re_queue *que, uint32_t *idx,
1745 			       struct ibv_sge *sgl, int nsg)
1746 {
1747 	struct bnxt_re_sge *sge;
1748 	int indx;
1749 
1750 	for (indx = 0; indx < nsg; indx++) {
1751 		sge = bnxt_re_get_hwqe(que, (*idx)++);
1752 		sge->pa = htole64(sgl[indx].addr);
1753 		sge->lkey = htole32(sgl[indx].lkey);
1754 		sge->length = htole32(sgl[indx].length);
1755 	}
1756 }
1757 
1758 static int bnxt_re_put_tx_sge(struct bnxt_re_queue *que, uint32_t *idx,
1759 			      struct ibv_sge *sgl, int nsg)
1760 {
1761 	struct bnxt_re_sge *sge;
1762 	int indx;
1763 	int len;
1764 
1765 	len = 0;
1766 	for (indx = 0; indx < nsg; indx++) {
1767 		sge = bnxt_re_get_hwqe(que, (*idx)++);
1768 		sge->pa = htole64(sgl[indx].addr);
1769 		sge->lkey = htole32(sgl[indx].lkey);
1770 		sge->length = htole32(sgl[indx].length);
1771 		len += sgl[indx].length;
1772 	}
1773 	return len;
1774 }
1775 
1776 static inline int bnxt_re_calc_inline_len(struct ibv_send_wr *swr)
1777 {
1778 	int illen, indx;
1779 
1780 	illen = 0;
1781 	for (indx = 0; indx < swr->num_sge; indx++)
1782 		illen += swr->sg_list[indx].length;
1783 	return get_aligned(illen, sizeof(struct bnxt_re_sge));
1784 }
1785 
1786 static int bnxt_re_put_inline(struct bnxt_re_queue *que, uint32_t *idx,
1787 			      struct bnxt_re_push_buffer *pbuf,
1788 			      struct ibv_sge *sgl, uint32_t nsg,
1789 			      uint16_t max_ils)
1790 {
1791 	int len, t_len, offt = 0;
1792 	int t_cplen = 0, cplen;
1793 	bool pull_dst = true;
1794 	void *il_dst = NULL;
1795 	void *il_src = NULL;
1796 	int alsize;
1797 	int indx;
1798 
1799 	alsize = sizeof(struct bnxt_re_sge);
1800 
1801 	t_len = 0;
1802 	for (indx = 0; indx < nsg; indx++) {
1803 		len = sgl[indx].length;
1804 		il_src = (void *)sgl[indx].addr;
1805 		t_len += len;
1806 		if (t_len > max_ils)
1807 			goto bad;
1808 		while (len) {
1809 			if (pull_dst) {
1810 				pull_dst = false;
1811 				il_dst = bnxt_re_get_hwqe(que, (*idx)++);
1812 				if (pbuf)
1813 					pbuf->wqe[*idx - 1] =
1814 					(__u64)il_dst;
1815 				t_cplen = 0;
1816 				offt = 0;
1817 			}
1818 			cplen = MIN(len, alsize);
1819 			cplen = MIN(cplen,(alsize - offt));
1820 			memcpy(il_dst, il_src, cplen);
1821 			t_cplen += cplen;
1822 			il_src += cplen;
1823 			il_dst += cplen;
1824 			offt += cplen;
1825 			len -= cplen;
1826 			if (t_cplen == alsize)
1827 				pull_dst = true;
1828 		}
1829 	}
1830 
1831 	return t_len;
1832 bad:
1833 	return -ENOMEM;
1834 }
1835 
1836 static int bnxt_re_required_slots(struct bnxt_re_qp *qp, struct ibv_send_wr *wr,
1837 				  uint32_t *wqe_sz, void **pbuf)
1838 {
1839 	uint32_t wqe_byte;
1840 	int ilsize;
1841 
1842 	if (wr->send_flags & IBV_SEND_INLINE) {
1843 		ilsize = bnxt_re_calc_inline_len(wr);
1844 		if (ilsize > qp->cap.max_inline)
1845 			return -EINVAL;
1846 		if (qp->push_st_en && ilsize <= qp->max_push_sz)
1847 			*pbuf = bnxt_re_get_pbuf(&qp->push_st_en, qp->ppp_idx, qp->cntx);
1848 		wqe_byte = (ilsize + bnxt_re_get_sqe_hdr_sz());
1849 	} else {
1850 		wqe_byte = bnxt_re_calc_wqe_sz(wr->num_sge);
1851 	}
1852 
1853 	/* que->stride is always 2^4 = 16, thus using hard-coding */
1854 	*wqe_sz = wqe_byte >> 4;
1855 	if (qp->qpmode == BNXT_RE_WQE_MODE_STATIC)
1856 		return 8;
1857 	return *wqe_sz;
1858 }
1859 
1860 static inline void bnxt_re_set_hdr_flags(struct bnxt_re_bsqe *hdr,
1861 					 struct ibv_send_wr *wr,
1862 					 uint32_t slots, uint8_t sqsig)
1863 {
1864 	uint32_t send_flags;
1865 	uint32_t hdrval = 0;
1866 	uint8_t opcd;
1867 
1868 	send_flags = wr->send_flags;
1869 	if (send_flags & IBV_SEND_SIGNALED || sqsig)
1870 		hdrval |= ((BNXT_RE_WR_FLAGS_SIGNALED & BNXT_RE_HDR_FLAGS_MASK)
1871 			    << BNXT_RE_HDR_FLAGS_SHIFT);
1872 	if (send_flags & IBV_SEND_FENCE)
1873 		hdrval |= ((BNXT_RE_WR_FLAGS_UC_FENCE & BNXT_RE_HDR_FLAGS_MASK)
1874 			    << BNXT_RE_HDR_FLAGS_SHIFT);
1875 	if (send_flags & IBV_SEND_SOLICITED)
1876 		hdrval |= ((BNXT_RE_WR_FLAGS_SE & BNXT_RE_HDR_FLAGS_MASK)
1877 			    << BNXT_RE_HDR_FLAGS_SHIFT);
1878 	if (send_flags & IBV_SEND_INLINE)
1879 		hdrval |= ((BNXT_RE_WR_FLAGS_INLINE & BNXT_RE_HDR_FLAGS_MASK)
1880 			    << BNXT_RE_HDR_FLAGS_SHIFT);
1881 	hdrval |= (slots & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT;
1882 
1883 	/* Fill opcode */
1884 	opcd = ibv_to_bnxt_re_wr_opcd[wr->opcode];
1885 	hdrval |= (opcd & BNXT_RE_HDR_WT_MASK);
1886 	hdr->rsv_ws_fl_wt = htole32(hdrval);
1887 }
1888 
1889 static int bnxt_re_build_tx_sge(struct bnxt_re_queue *que, uint32_t *idx,
1890 				struct bnxt_re_push_buffer *pbuf,
1891 				struct ibv_send_wr *wr,
1892 				uint16_t max_il)
1893 {
1894 	if (wr->send_flags & IBV_SEND_INLINE)
1895 		return bnxt_re_put_inline(que, idx, pbuf, wr->sg_list, wr->num_sge, max_il);
1896 
1897 	return bnxt_re_put_tx_sge(que, idx, wr->sg_list, wr->num_sge);
1898 }
1899 
1900 static void *bnxt_re_pull_psn_buff(struct bnxt_re_queue *que, bool hw_retx)
1901 {
1902 	if (hw_retx)
1903 		return (void *)(que->pad + ((que->msn) << que->pad_stride_log2));
1904 	return (void *)(que->pad + ((*que->dbtail) << que->pad_stride_log2));
1905 }
1906 
1907 static void bnxt_re_fill_psns_for_msntbl(struct bnxt_re_qp *qp, uint32_t len,
1908 					 uint32_t st_idx, uint8_t opcode)
1909 {
1910 	uint32_t npsn = 0, start_psn = 0, next_psn = 0;
1911 	struct bnxt_re_msns *msns;
1912 	uint32_t pkt_cnt = 0;
1913 
1914 	msns = bnxt_re_pull_psn_buff(qp->jsqq->hwque, true);
1915 	msns->start_idx_next_psn_start_psn = 0;
1916 
1917 	if (qp->qptyp == IBV_QPT_RC) {
1918 		start_psn = qp->sq_psn;
1919 		pkt_cnt = (len / qp->mtu);
1920 		if (len % qp->mtu)
1921 			pkt_cnt++;
1922 		/* Increment the psn even for 0 len packets
1923 		 * e.g. for opcode rdma-write-with-imm-data
1924 		 * with length field = 0
1925 		 */
1926 		if (bnxt_re_is_zero_len_pkt(len, opcode))
1927 			pkt_cnt = 1;
1928 		/* make it 24 bit */
1929 		next_psn = qp->sq_psn + pkt_cnt;
1930 		npsn = next_psn;
1931 		qp->sq_psn = next_psn;
1932 		msns->start_idx_next_psn_start_psn |=
1933 			bnxt_re_update_msn_tbl(st_idx, npsn, start_psn);
1934 		qp->jsqq->hwque->msn++;
1935 		qp->jsqq->hwque->msn %= qp->jsqq->hwque->msn_tbl_sz;
1936 	}
1937 }
1938 
1939 static void bnxt_re_fill_psns(struct bnxt_re_qp *qp, uint32_t len,
1940 			      uint32_t st_idx, uint8_t opcode)
1941 {
1942 	uint32_t opc_spsn = 0, flg_npsn = 0;
1943 	struct bnxt_re_psns_ext *psns_ext;
1944 	uint32_t pkt_cnt = 0, nxt_psn = 0;
1945 	struct bnxt_re_psns *psns;
1946 
1947 	psns = bnxt_re_pull_psn_buff(qp->jsqq->hwque, false);
1948 	psns_ext = (struct bnxt_re_psns_ext *)psns;
1949 
1950 	if (qp->qptyp == IBV_QPT_RC) {
1951 		opc_spsn = qp->sq_psn & BNXT_RE_PSNS_SPSN_MASK;
1952 		pkt_cnt = (len / qp->mtu);
1953 		if (len % qp->mtu)
1954 			pkt_cnt++;
1955 		/* Increment the psn even for 0 len packets
1956 		 * e.g. for opcode rdma-write-with-imm-data
1957 		 * with length field = 0
1958 		 */
1959 		if (bnxt_re_is_zero_len_pkt(len, opcode))
1960 			pkt_cnt = 1;
1961 		nxt_psn = ((qp->sq_psn + pkt_cnt) & BNXT_RE_PSNS_NPSN_MASK);
1962 		flg_npsn = nxt_psn;
1963 		qp->sq_psn = nxt_psn;
1964 	}
1965 	psns->opc_spsn = htole32(opc_spsn);
1966 	psns->flg_npsn = htole32(flg_npsn);
1967 	/* Update for Thor p5 not Thor2 */
1968 	if (!BNXT_RE_HW_RETX(qp->cntx) && qp->cctx->chip_is_gen_p5_thor2)
1969 		psns_ext->st_slot_idx = st_idx;
1970 }
1971 
1972 static int bnxt_re_build_ud_sqe(struct ibv_send_wr *wr,
1973 				struct bnxt_re_bsqe *hdr,
1974 				struct bnxt_re_send *sqe)
1975 {
1976 	struct bnxt_re_ah *ah;
1977 	uint64_t qkey;
1978 
1979 	ah = to_bnxt_re_ah(wr->wr.ud.ah);
1980 	if (!wr->wr.ud.ah)
1981 		return -EINVAL;
1982 	qkey = wr->wr.ud.remote_qkey;
1983 	hdr->lhdr.qkey_len |= htole64(qkey << 32);
1984 	sqe->dst_qp = htole32(wr->wr.ud.remote_qpn);
1985 	sqe->avid = htole32(ah->avid & 0xFFFFF);
1986 
1987 	return 0;
1988 }
1989 
1990 static void bnxt_re_build_cns_sqe(struct ibv_send_wr *wr,
1991 				  struct bnxt_re_bsqe *hdr,
1992 				  void *hdr2)
1993 {
1994 	struct bnxt_re_atomic *sqe = hdr2;
1995 
1996 	hdr->key_immd = htole32(wr->wr.atomic.rkey);
1997 	hdr->lhdr.rva = htole64(wr->wr.atomic.remote_addr);
1998 	sqe->cmp_dt = htole64(wr->wr.atomic.compare_add);
1999 	sqe->swp_dt = htole64(wr->wr.atomic.swap);
2000 }
2001 
2002 static void bnxt_re_build_fna_sqe(struct ibv_send_wr *wr,
2003 				  struct bnxt_re_bsqe *hdr,
2004 				  void *hdr2)
2005 {
2006 	struct bnxt_re_atomic *sqe = hdr2;
2007 
2008 	hdr->key_immd = htole32(wr->wr.atomic.rkey);
2009 	hdr->lhdr.rva = htole64(wr->wr.atomic.remote_addr);
2010 	sqe->swp_dt = htole64(wr->wr.atomic.compare_add);
2011 }
2012 
2013 void bnxt_re_force_rts2rts(struct bnxt_re_qp *qp)
2014 {
2015 	struct ibv_qp_attr attr = {};
2016 	int attr_mask;
2017 	attr_mask = IBV_QP_STATE;
2018 	attr.qp_state = IBV_QPS_RTS;
2019 	bnxt_re_modify_qp(&qp->ibvqp, &attr, attr_mask);
2020 	qp->wqe_cnt = 0;
2021 }
2022 
2023 int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
2024 		      struct ibv_send_wr **bad)
2025 {
2026 	struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
2027 	struct bnxt_re_queue *sq = qp->jsqq->hwque;
2028 	struct bnxt_re_push_buffer *pbuf = NULL;
2029 	bool chip_is_not_gen_p5_thor2;
2030 	int slots, ret = 0, len = 0;
2031 	uint32_t swq_idx, wqe_size;
2032 	struct bnxt_re_wrid *wrid;
2033 	struct bnxt_re_rdma *rsqe;
2034 	struct bnxt_re_bsqe *hdr;
2035 	struct bnxt_re_send *sqe;
2036 	bool ring_db = false;
2037 	uint32_t idx;
2038 
2039 	bnxt_re_dp_spin_lock(&sq->qlock);
2040 	chip_is_not_gen_p5_thor2 = !qp->cctx->chip_is_gen_p5_thor2;
2041 	while (wr) {
2042 		slots = bnxt_re_required_slots(qp, wr, &wqe_size, (void **)&pbuf);
2043 		if (unlikely(slots < 0 || bnxt_re_is_que_full(sq, slots)) ||
2044 		    wr->num_sge > qp->cap.max_ssge) {
2045 			*bad = wr;
2046 			ret = ENOMEM;
2047 			goto bad_wr;
2048 		}
2049 		if ((wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP ||
2050 		     wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD) &&
2051 		     !qp->cap.is_atomic_cap) {
2052 			*bad = wr;
2053 			ret = EINVAL;
2054 			goto bad_wr;
2055 		}
2056 		idx = 0;
2057 		len = 0;
2058 		hdr = bnxt_re_get_hwqe(sq, idx++);
2059 		sqe = bnxt_re_get_hwqe(sq, idx++);
2060 		/* populate push buffer */
2061 		if (pbuf) {
2062 			pbuf->qpid = qp->qpid;
2063 			pbuf->wqe[0] = (__u64)hdr;
2064 			pbuf->wqe[1] = (__u64)sqe;
2065 			pbuf->st_idx = *sq->dbtail;
2066 		}
2067 		if (wr->num_sge) {
2068 			len = bnxt_re_build_tx_sge(sq, &idx, pbuf, wr, qp->cap.max_inline);
2069 			if (unlikely(len < 0)) {
2070 				ret = ENOMEM;
2071 				*bad = wr;
2072 				goto bad_wr;
2073 			}
2074 		}
2075 		hdr->lhdr.qkey_len = htole32(len);
2076 		bnxt_re_set_hdr_flags(hdr, wr, wqe_size, qp->cap.sqsig);
2077 		switch (wr->opcode) {
2078 		case IBV_WR_SEND_WITH_IMM:
2079 			/* HW is swapping the immediate data before
2080 			 * sending it out on the wire. To workaround
2081 			 * this, swap the imm_data value as sent by
2082 			 * the application so that the value going out
2083 			 * on the wire is in big-endian format.
2084 			 */
2085 			hdr->key_immd = htole32(be32toh(wr->imm_data));
2086 			if (qp->qptyp == IBV_QPT_UD) {
2087 				if (chip_is_not_gen_p5_thor2 &&
2088 				    qp->wqe_cnt == BNXT_RE_UD_QP_STALL)
2089 					bnxt_re_force_rts2rts(qp);
2090 
2091 				len = bnxt_re_build_ud_sqe(wr, hdr, sqe);
2092 			}
2093 			break;
2094 		case IBV_WR_SEND:
2095 			if (qp->qptyp == IBV_QPT_UD) {
2096 				if (chip_is_not_gen_p5_thor2 &&
2097 				    qp->wqe_cnt == BNXT_RE_UD_QP_STALL)
2098 					bnxt_re_force_rts2rts(qp);
2099 
2100 				len = bnxt_re_build_ud_sqe(wr, hdr, sqe);
2101 			}
2102 			break;
2103 		case IBV_WR_RDMA_WRITE_WITH_IMM:
2104 			hdr->key_immd = htole32(be32toh(wr->imm_data));
2105 		case IBV_WR_RDMA_WRITE:
2106 		case IBV_WR_RDMA_READ:
2107 			rsqe = (struct bnxt_re_rdma *)sqe;
2108 			rsqe->rva = htole64(wr->wr.rdma.remote_addr);
2109 			rsqe->rkey = htole32(wr->wr.rdma.rkey);
2110 			break;
2111 		case IBV_WR_ATOMIC_CMP_AND_SWP:
2112 			bnxt_re_build_cns_sqe(wr, hdr, sqe);
2113 			break;
2114 		case IBV_WR_ATOMIC_FETCH_AND_ADD:
2115 			bnxt_re_build_fna_sqe(wr, hdr, sqe);
2116 			break;
2117 		default :
2118 			len = -EINVAL;
2119 			break;
2120 		}
2121 
2122 		if (unlikely(len < 0)) {
2123 			ret = (len == -EINVAL) ? EINVAL : ENOMEM;
2124 			*bad = wr;
2125 			break;
2126 		}
2127 		if (BNXT_RE_HW_RETX(qp->cntx))
2128 			bnxt_re_fill_psns_for_msntbl(qp, len, *sq->dbtail, wr->opcode);
2129 		else
2130 			bnxt_re_fill_psns(qp, len, *sq->dbtail, wr->opcode);
2131 
2132 		wrid = bnxt_re_get_swqe(qp->jsqq, &swq_idx);
2133 		wrid->wrid = wr->wr_id;
2134 		wrid->bytes = len;
2135 		wrid->slots = slots;
2136 		wrid->sig = (wr->send_flags & IBV_SEND_SIGNALED || qp->cap.sqsig) ?
2137 			     IBV_SEND_SIGNALED : 0;
2138 		wrid->wc_opcd = ibv_wr_to_wc_opcd[wr->opcode];
2139 
2140 		bnxt_re_incr_tail(sq, slots);
2141 		bnxt_re_jqq_mod_start(qp->jsqq, swq_idx);
2142 		ring_db = true;
2143 		if (pbuf) {
2144 			ring_db = false;
2145 			pbuf->tail = *sq->dbtail;
2146 			if (_is_chip_thor2(qp->cctx)) {
2147 				/* WA for SR2 A0, ring additional db */
2148 				ring_db |= _is_chip_a0(qp->cctx);
2149 				bnxt_re_fill_ppp(pbuf, qp, len, idx);
2150 			} else {
2151 				bnxt_re_fill_push_wcb(qp, pbuf, idx);
2152 			}
2153 
2154 			bnxt_re_put_pbuf(qp->cntx, pbuf);
2155 			pbuf = NULL;
2156 		}
2157 		qp->wqe_cnt++;
2158 		qp->sq_msn++;
2159 		wr = wr->next;
2160 	}
2161 
2162 bad_wr:
2163 	if (ring_db)
2164 		bnxt_re_ring_sq_db(qp);
2165 
2166 	if (pbuf)
2167 		bnxt_re_put_pbuf(qp->cntx, pbuf);
2168 
2169 	bnxt_re_dp_spin_unlock(&sq->qlock);
2170 	return ret;
2171 }
2172 
2173 int bnxt_re_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
2174 		      struct ibv_recv_wr **bad)
2175 {
2176 	struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp);
2177 	struct bnxt_re_queue *rq = qp->jrqq->hwque;
2178 	struct bnxt_re_wrid *swque;
2179 	struct bnxt_re_brqe *hdr;
2180 	struct bnxt_re_sge *sge;
2181 	bool ring_db = false;
2182 	uint32_t swq_idx;
2183 	uint32_t hdrval;
2184 	uint32_t idx;
2185 	int rc = 0;
2186 
2187 	bnxt_re_dp_spin_lock(&rq->qlock);
2188 	while (wr) {
2189 		if (unlikely(bnxt_re_is_que_full(rq, rq->max_slots) ||
2190 			     wr->num_sge > qp->cap.max_rsge)) {
2191 			*bad = wr;
2192 			rc = ENOMEM;
2193 			break;
2194 		}
2195 		swque = bnxt_re_get_swqe(qp->jrqq, &swq_idx);
2196 
2197 		/*
2198 		 * Initialize idx to 2 since the length of header wqe is 32 bytes
2199 		 * i.e. sizeof(struct bnxt_re_brqe) + sizeof(struct bnxt_re_send)
2200 		 */
2201 		idx = 2;
2202 		hdr = bnxt_re_get_hwqe_hdr(rq);
2203 
2204 		if (!wr->num_sge) {
2205 			/*
2206 			 * HW needs at least one SGE for RQ Entries.
2207 			 * Create an entry if num_sge = 0,
2208 			 * update the idx and set length of sge to 0.
2209 			 */
2210 			sge = bnxt_re_get_hwqe(rq, idx++);
2211 			sge->length = 0;
2212 		} else {
2213 			/* Fill SGEs */
2214 			bnxt_re_put_rx_sge(rq, &idx, wr->sg_list, wr->num_sge);
2215 		}
2216 		hdrval = BNXT_RE_WR_OPCD_RECV;
2217 		hdrval |= ((idx & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT);
2218 		hdr->rsv_ws_fl_wt = htole32(hdrval);
2219 		hdr->wrid = htole32(swq_idx);
2220 
2221 		swque->wrid = wr->wr_id;
2222 		swque->slots = rq->max_slots;
2223 		swque->wc_opcd = BNXT_RE_WC_OPCD_RECV;
2224 
2225 		bnxt_re_jqq_mod_start(qp->jrqq, swq_idx);
2226 		bnxt_re_incr_tail(rq, rq->max_slots);
2227 		ring_db = true;
2228 		wr = wr->next;
2229 	}
2230 	if (ring_db)
2231 		bnxt_re_ring_rq_db(qp);
2232 	bnxt_re_dp_spin_unlock(&rq->qlock);
2233 
2234 	return rc;
2235 }
2236 
2237 static size_t bnxt_re_get_srqmem_size(struct bnxt_re_context *cntx,
2238 				      struct ibv_srq_init_attr *attr,
2239 				      struct bnxt_re_qattr *qattr)
2240 {
2241 	uint32_t stride, nswr;
2242 	size_t size = 0;
2243 
2244 	size = sizeof(struct bnxt_re_srq);
2245 	size += sizeof(struct bnxt_re_queue);
2246 	/* allocate 1 extra to determin full condition */
2247 	nswr = attr->attr.max_wr + 1;
2248 	nswr = bnxt_re_init_depth(nswr, cntx->comp_mask);
2249 	stride = bnxt_re_get_srqe_sz();
2250 
2251 	qattr->nwr = nswr;
2252 	qattr->slots = nswr;
2253 	qattr->esize = stride;
2254 
2255 	qattr->sz_ring = get_aligned((nswr * stride), cntx->rdev->pg_size);
2256 	qattr->sz_shad = nswr * sizeof(struct bnxt_re_wrid); /* shadow */
2257 
2258 	size += qattr->sz_ring;
2259 	size += qattr->sz_shad;
2260 	return size;
2261 }
2262 
2263 static void *bnxt_re_alloc_srqslab(struct bnxt_re_context *cntx,
2264 				   struct ibv_srq_init_attr *attr,
2265 				   struct bnxt_re_qattr *qattr)
2266 {
2267 	size_t bytes;
2268 
2269 	bytes = bnxt_re_get_srqmem_size(cntx, attr, qattr);
2270 	return bnxt_re_alloc_mem(bytes, cntx->rdev->pg_size);
2271 }
2272 
2273 static struct bnxt_re_srq *bnxt_re_srq_alloc_queue_ptr(struct bnxt_re_mem *mem)
2274 {
2275 	struct bnxt_re_srq *srq;
2276 
2277 	srq = bnxt_re_get_obj(mem, sizeof(*srq));
2278 	if (!srq)
2279 		return NULL;
2280 	srq->srqq = bnxt_re_get_obj(mem, sizeof(struct bnxt_re_queue));
2281 	if (!srq->srqq)
2282 		return NULL;
2283 	return srq;
2284 }
2285 
2286 static int bnxt_re_srq_alloc_queue(struct bnxt_re_srq *srq,
2287 				   struct ibv_srq_init_attr *attr,
2288 				   struct bnxt_re_qattr *qattr)
2289 {
2290 	struct bnxt_re_queue *que;
2291 	int ret = -ENOMEM;
2292 	int idx;
2293 
2294 	que = srq->srqq;
2295 	que->depth = qattr->slots;
2296 	que->stride = qattr->esize;
2297 	que->va = bnxt_re_get_ring(srq->mem, qattr->sz_ring);
2298 	if (!que->va)
2299 		goto bail;
2300 	bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded);
2301 	/* For SRQ only bnxt_re_wrid.wrid is used. */
2302 	srq->srwrid = bnxt_re_get_obj(srq->mem, qattr->sz_shad);
2303 	if (!srq->srwrid)
2304 		goto bail;
2305 
2306 	srq->start_idx = 0;
2307 	srq->last_idx = que->depth - 1;
2308 	for (idx = 0; idx < que->depth; idx++)
2309 		srq->srwrid[idx].next_idx = idx + 1;
2310 	srq->srwrid[srq->last_idx].next_idx = -1;
2311 	return 0;
2312 bail:
2313 	bnxt_re_dp_spin_destroy(&srq->srqq->qlock);
2314 	return ret;
2315 }
2316 
2317 struct ibv_srq *bnxt_re_create_srq(struct ibv_pd *ibvpd,
2318 				   struct ibv_srq_init_attr *attr)
2319 {
2320 	struct bnxt_re_srq_resp resp = {};
2321 	struct bnxt_re_srq_req cmd = {};
2322 	struct bnxt_re_qattr qattr = {};
2323 	struct bnxt_re_context *uctx;
2324 	struct bnxt_re_srq *srq;
2325 	void *mem;
2326 	int ret;
2327 
2328 	uctx = to_bnxt_re_context(ibvpd->context);
2329 	mem = bnxt_re_alloc_srqslab(uctx, attr, &qattr);
2330 	if (!mem)
2331 		return NULL;
2332 
2333 	srq = bnxt_re_srq_alloc_queue_ptr(mem);
2334 	if (!srq)
2335 		goto fail;
2336 	srq->uctx = uctx;
2337 	srq->mem = mem;
2338 	if (bnxt_re_srq_alloc_queue(srq, attr, &qattr))
2339 		goto fail;
2340 
2341 	cmd.srqva = (uint64_t)srq->srqq->va;
2342 	cmd.srq_handle = (uint64_t)srq;
2343 	ret = ibv_cmd_create_srq(ibvpd, &srq->ibvsrq, attr,
2344 				 &cmd.cmd, sizeof(cmd),
2345 				 &resp.resp, sizeof(resp));
2346 	if (ret)
2347 		goto fail;
2348 
2349 	srq->srqid = resp.srqid;
2350 	srq->udpi = &uctx->udpi;
2351 	srq->cap.max_wr = srq->srqq->depth;
2352 	srq->cap.max_sge = attr->attr.max_sge;
2353 	srq->cap.srq_limit = attr->attr.srq_limit;
2354 	srq->arm_req = false;
2355 	srq->rand.seed = srq->srqid;
2356 	srq->shadow_db_key = BNXT_RE_DB_KEY_INVALID;
2357 
2358 	INIT_DBLY_LIST_NODE(&srq->dbnode);
2359 	if (_is_db_drop_recovery_enable(uctx)) {
2360 		pthread_spin_lock(&uctx->srq_dbr_res.lock);
2361 		bnxt_re_list_add_node(&srq->dbnode, &uctx->srq_dbr_res.head);
2362 		pthread_spin_unlock(&uctx->srq_dbr_res.lock);
2363 	}
2364 	return &srq->ibvsrq;
2365 fail:
2366 	bnxt_re_free_mem(mem);
2367 	return NULL;
2368 }
2369 
2370 int bnxt_re_modify_srq(struct ibv_srq *ibvsrq, struct ibv_srq_attr *attr,
2371 		       int attr_mask)
2372 {
2373 	struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq);
2374 	struct ibv_modify_srq cmd = {};
2375 	int status = 0;
2376 
2377 	status =  ibv_cmd_modify_srq(ibvsrq, attr, attr_mask,
2378 				     &cmd, sizeof(cmd));
2379 	if (!status && ((attr_mask & IBV_SRQ_LIMIT) &&
2380 			(srq->cap.srq_limit != attr->srq_limit))) {
2381 		srq->cap.srq_limit = attr->srq_limit;
2382 	}
2383 	srq->arm_req = true;
2384 	return status;
2385 }
2386 
2387 int bnxt_re_destroy_srq(struct ibv_srq *ibvsrq)
2388 {
2389 	struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq);
2390 	struct bnxt_re_mem *mem;
2391 	int ret;
2392 
2393 	if (_is_db_drop_recovery_enable(srq->uctx)) {
2394 		pthread_spin_lock(&srq->uctx->srq_dbr_res.lock);
2395 		bnxt_re_list_del_node(&srq->dbnode, &srq->uctx->srq_dbr_res.head);
2396 		pthread_spin_unlock(&srq->uctx->srq_dbr_res.lock);
2397 	}
2398 	ret = ibv_cmd_destroy_srq(ibvsrq);
2399 	if (ret) {
2400 		if (_is_db_drop_recovery_enable(srq->uctx)) {
2401 			pthread_spin_lock(&srq->uctx->srq_dbr_res.lock);
2402 			bnxt_re_list_add_node(&srq->dbnode,
2403 					      &srq->uctx->srq_dbr_res.head);
2404 			pthread_spin_unlock(&srq->uctx->srq_dbr_res.lock);
2405 		}
2406 		return ret;
2407 	}
2408 	bnxt_re_dp_spin_destroy(&srq->srqq->qlock);
2409 	mem = srq->mem;
2410 	bnxt_re_free_mem(mem);
2411 	return 0;
2412 }
2413 
2414 int bnxt_re_query_srq(struct ibv_srq *ibvsrq, struct ibv_srq_attr *attr)
2415 {
2416 	struct ibv_query_srq cmd = {};
2417 
2418 	return ibv_cmd_query_srq(ibvsrq, attr, &cmd, sizeof cmd);
2419 }
2420 
2421 static int bnxt_re_build_srqe(struct bnxt_re_srq *srq,
2422 			      struct ibv_recv_wr *wr, void *srqe)
2423 {
2424 	struct bnxt_re_brqe *hdr = srqe;
2425 	struct bnxt_re_wrid *wrid;
2426 	struct bnxt_re_sge *sge;
2427 	int wqe_sz, len, next;
2428 	uint32_t hdrval = 0;
2429 	int indx;
2430 
2431 	sge = (srqe + bnxt_re_get_srqe_hdr_sz());
2432 	next = srq->start_idx;
2433 	wrid = &srq->srwrid[next];
2434 
2435 	len = 0;
2436 	for (indx = 0; indx < wr->num_sge; indx++, sge++) {
2437 		sge->pa = htole64(wr->sg_list[indx].addr);
2438 		sge->lkey = htole32(wr->sg_list[indx].lkey);
2439 		sge->length = htole32(wr->sg_list[indx].length);
2440 		len += wr->sg_list[indx].length;
2441 	}
2442 
2443 	hdrval = BNXT_RE_WR_OPCD_RECV;
2444 	wqe_sz = wr->num_sge + (bnxt_re_get_srqe_hdr_sz() >> 4); /* 16B align */
2445 	/* HW needs at least one SGE for SRQ Entries.
2446 	 * Increment SRQ WQE size if num_sge = 0 to
2447 	 * include the extra SGE. Set the sge length to
2448 	 * zero.
2449 	 */
2450 	if (!wr->num_sge) {
2451 		wqe_sz++;
2452 		sge->length = 0;
2453 	}
2454 	hdrval |= ((wqe_sz & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT);
2455 	hdr->rsv_ws_fl_wt = htole32(hdrval);
2456 	hdr->wrid = htole32((uint32_t)next);
2457 
2458 	/* Fill wrid */
2459 	wrid->wrid = wr->wr_id;
2460 	wrid->bytes = len; /* N.A. for RQE */
2461 	wrid->sig = 0; /* N.A. for RQE */
2462 
2463 	return len;
2464 }
2465 
2466 int bnxt_re_post_srq_recv(struct ibv_srq *ibvsrq, struct ibv_recv_wr *wr,
2467 			  struct ibv_recv_wr **bad)
2468 {
2469 	struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq);
2470 	struct bnxt_re_queue *rq = srq->srqq;
2471 	int ret, count = 0;
2472 	void *srqe;
2473 
2474 	bnxt_re_dp_spin_lock(&rq->qlock);
2475 	count = rq->tail > rq->head ? rq->tail - rq->head :
2476 			   rq->depth - rq->head + rq->tail;
2477 	while (wr) {
2478 		if (srq->start_idx == srq->last_idx ||
2479 		    wr->num_sge > srq->cap.max_sge) {
2480 			*bad = wr;
2481 			bnxt_re_dp_spin_unlock(&rq->qlock);
2482 			return ENOMEM;
2483 		}
2484 
2485 		srqe = (void *) (rq->va + (rq->tail * rq->stride));
2486 		memset(srqe, 0, bnxt_re_get_srqe_sz());
2487 		ret = bnxt_re_build_srqe(srq, wr, srqe);
2488 		if (ret < 0) {
2489 			bnxt_re_dp_spin_unlock(&rq->qlock);
2490 			*bad = wr;
2491 			return ENOMEM;
2492 		}
2493 
2494 		srq->start_idx = srq->srwrid[srq->start_idx].next_idx;
2495 		bnxt_re_incr_tail(rq, 1);
2496 		wr = wr->next;
2497 		bnxt_re_ring_srq_db(srq);
2498 		count++;
2499 		if (srq->arm_req == true && count > srq->cap.srq_limit) {
2500 			srq->arm_req = false;
2501 			bnxt_re_ring_srq_arm(srq);
2502 		}
2503 	}
2504 	bnxt_re_dp_spin_unlock(&rq->qlock);
2505 
2506 	return 0;
2507 }
2508 
2509 struct ibv_ah *bnxt_re_create_ah(struct ibv_pd *ibvpd, struct ibv_ah_attr *attr)
2510 {
2511 	struct bnxt_re_context *uctx;
2512 	struct bnxt_re_pd *pd;
2513 	struct bnxt_re_ah *ah;
2514 	int status;
2515 	struct ibv_create_ah_resp resp = {};
2516 
2517 	pd = to_bnxt_re_pd(ibvpd);
2518 	uctx = to_bnxt_re_context(ibvpd->context);
2519 
2520 	ah = calloc(1, sizeof(struct bnxt_re_ah));
2521 	if (!ah) {
2522 		goto failed;
2523 	}
2524 
2525 	ah->pd = pd;
2526 	pthread_mutex_lock(&uctx->shlock);
2527 	status = ibv_cmd_create_ah(ibvpd, &ah->ibvah, attr,
2528 				   &resp, sizeof(resp));
2529 
2530 	if (status)
2531 	{
2532 		pthread_mutex_unlock(&uctx->shlock);
2533 		free(ah);
2534 		goto failed;
2535 	}
2536 	/* read AV ID now. */
2537 	ah->avid = *(uint32_t *)(uctx->shpg + BNXT_RE_SHPG_AVID_OFFT);
2538 	pthread_mutex_unlock(&uctx->shlock);
2539 
2540 	return &ah->ibvah;
2541 failed:
2542 	return NULL;
2543 }
2544 
2545 int bnxt_re_destroy_ah(struct ibv_ah *ibvah)
2546 {
2547 	struct bnxt_re_ah *ah;
2548 	int status;
2549 
2550 	ah = to_bnxt_re_ah(ibvah);
2551 	status = ibv_cmd_destroy_ah(ibvah);
2552 	if (status)
2553 		return status;
2554 	free(ah);
2555 
2556 	return 0;
2557 }
2558