xref: /linux/drivers/infiniband/hw/qedr/verbs.c (revision 17cfcb68af3bc7d5e8ae08779b1853310a2949f3)
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38 
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45 #include <rdma/uverbs_ioctl.h>
46 
47 #include <linux/qed/common_hsi.h>
48 #include "qedr_hsi_rdma.h"
49 #include <linux/qed/qed_if.h>
50 #include "qedr.h"
51 #include "verbs.h"
52 #include <rdma/qedr-abi.h>
53 #include "qedr_roce_cm.h"
54 
55 #define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
56 #define	RDMA_MAX_SGE_PER_SRQ	(4)
57 #define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
58 
59 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
60 
61 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
62 					size_t len)
63 {
64 	size_t min_len = min_t(size_t, len, udata->outlen);
65 
66 	return ib_copy_to_udata(udata, src, min_len);
67 }
68 
69 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
70 {
71 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
72 		return -EINVAL;
73 
74 	*pkey = QEDR_ROCE_PKEY_DEFAULT;
75 	return 0;
76 }
77 
78 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
79 		      int index, union ib_gid *sgid)
80 {
81 	struct qedr_dev *dev = get_qedr_dev(ibdev);
82 
83 	memset(sgid->raw, 0, sizeof(sgid->raw));
84 	ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
85 
86 	DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
87 		 sgid->global.interface_id, sgid->global.subnet_prefix);
88 
89 	return 0;
90 }
91 
92 int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
93 {
94 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
95 	struct qedr_device_attr *qattr = &dev->attr;
96 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
97 
98 	srq_attr->srq_limit = srq->srq_limit;
99 	srq_attr->max_wr = qattr->max_srq_wr;
100 	srq_attr->max_sge = qattr->max_sge;
101 
102 	return 0;
103 }
104 
105 int qedr_query_device(struct ib_device *ibdev,
106 		      struct ib_device_attr *attr, struct ib_udata *udata)
107 {
108 	struct qedr_dev *dev = get_qedr_dev(ibdev);
109 	struct qedr_device_attr *qattr = &dev->attr;
110 
111 	if (!dev->rdma_ctx) {
112 		DP_ERR(dev,
113 		       "qedr_query_device called with invalid params rdma_ctx=%p\n",
114 		       dev->rdma_ctx);
115 		return -EINVAL;
116 	}
117 
118 	memset(attr, 0, sizeof(*attr));
119 
120 	attr->fw_ver = qattr->fw_ver;
121 	attr->sys_image_guid = qattr->sys_image_guid;
122 	attr->max_mr_size = qattr->max_mr_size;
123 	attr->page_size_cap = qattr->page_size_caps;
124 	attr->vendor_id = qattr->vendor_id;
125 	attr->vendor_part_id = qattr->vendor_part_id;
126 	attr->hw_ver = qattr->hw_ver;
127 	attr->max_qp = qattr->max_qp;
128 	attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
129 	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
130 	    IB_DEVICE_RC_RNR_NAK_GEN |
131 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
132 
133 	attr->max_send_sge = qattr->max_sge;
134 	attr->max_recv_sge = qattr->max_sge;
135 	attr->max_sge_rd = qattr->max_sge;
136 	attr->max_cq = qattr->max_cq;
137 	attr->max_cqe = qattr->max_cqe;
138 	attr->max_mr = qattr->max_mr;
139 	attr->max_mw = qattr->max_mw;
140 	attr->max_pd = qattr->max_pd;
141 	attr->atomic_cap = dev->atomic_cap;
142 	attr->max_fmr = qattr->max_fmr;
143 	attr->max_map_per_fmr = 16;
144 	attr->max_qp_init_rd_atom =
145 	    1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
146 	attr->max_qp_rd_atom =
147 	    min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
148 		attr->max_qp_init_rd_atom);
149 
150 	attr->max_srq = qattr->max_srq;
151 	attr->max_srq_sge = qattr->max_srq_sge;
152 	attr->max_srq_wr = qattr->max_srq_wr;
153 
154 	attr->local_ca_ack_delay = qattr->dev_ack_delay;
155 	attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
156 	attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
157 	attr->max_ah = qattr->max_ah;
158 
159 	return 0;
160 }
161 
162 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
163 					    u8 *ib_width)
164 {
165 	switch (speed) {
166 	case 1000:
167 		*ib_speed = IB_SPEED_SDR;
168 		*ib_width = IB_WIDTH_1X;
169 		break;
170 	case 10000:
171 		*ib_speed = IB_SPEED_QDR;
172 		*ib_width = IB_WIDTH_1X;
173 		break;
174 
175 	case 20000:
176 		*ib_speed = IB_SPEED_DDR;
177 		*ib_width = IB_WIDTH_4X;
178 		break;
179 
180 	case 25000:
181 		*ib_speed = IB_SPEED_EDR;
182 		*ib_width = IB_WIDTH_1X;
183 		break;
184 
185 	case 40000:
186 		*ib_speed = IB_SPEED_QDR;
187 		*ib_width = IB_WIDTH_4X;
188 		break;
189 
190 	case 50000:
191 		*ib_speed = IB_SPEED_HDR;
192 		*ib_width = IB_WIDTH_1X;
193 		break;
194 
195 	case 100000:
196 		*ib_speed = IB_SPEED_EDR;
197 		*ib_width = IB_WIDTH_4X;
198 		break;
199 
200 	default:
201 		/* Unsupported */
202 		*ib_speed = IB_SPEED_SDR;
203 		*ib_width = IB_WIDTH_1X;
204 	}
205 }
206 
207 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
208 {
209 	struct qedr_dev *dev;
210 	struct qed_rdma_port *rdma_port;
211 
212 	dev = get_qedr_dev(ibdev);
213 
214 	if (!dev->rdma_ctx) {
215 		DP_ERR(dev, "rdma_ctx is NULL\n");
216 		return -EINVAL;
217 	}
218 
219 	rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
220 
221 	/* *attr being zeroed by the caller, avoid zeroing it here */
222 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
223 		attr->state = IB_PORT_ACTIVE;
224 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
225 	} else {
226 		attr->state = IB_PORT_DOWN;
227 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
228 	}
229 	attr->max_mtu = IB_MTU_4096;
230 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
231 	attr->lid = 0;
232 	attr->lmc = 0;
233 	attr->sm_lid = 0;
234 	attr->sm_sl = 0;
235 	attr->ip_gids = true;
236 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
237 		attr->gid_tbl_len = 1;
238 		attr->pkey_tbl_len = 1;
239 	} else {
240 		attr->gid_tbl_len = QEDR_MAX_SGID;
241 		attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
242 	}
243 	attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
244 	attr->qkey_viol_cntr = 0;
245 	get_link_speed_and_width(rdma_port->link_speed,
246 				 &attr->active_speed, &attr->active_width);
247 	attr->max_msg_sz = rdma_port->max_msg_size;
248 	attr->max_vl_num = 4;
249 
250 	return 0;
251 }
252 
253 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
254 		     struct ib_port_modify *props)
255 {
256 	return 0;
257 }
258 
259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
260 			 unsigned long len)
261 {
262 	struct qedr_mm *mm;
263 
264 	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
265 	if (!mm)
266 		return -ENOMEM;
267 
268 	mm->key.phy_addr = phy_addr;
269 	/* This function might be called with a length which is not a multiple
270 	 * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
271 	 * forces this granularity by increasing the requested size if needed.
272 	 * When qedr_mmap is called, it will search the list with the updated
273 	 * length as a key. To prevent search failures, the length is rounded up
274 	 * in advance to PAGE_SIZE.
275 	 */
276 	mm->key.len = roundup(len, PAGE_SIZE);
277 	INIT_LIST_HEAD(&mm->entry);
278 
279 	mutex_lock(&uctx->mm_list_lock);
280 	list_add(&mm->entry, &uctx->mm_head);
281 	mutex_unlock(&uctx->mm_list_lock);
282 
283 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
284 		 "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
285 		 (unsigned long long)mm->key.phy_addr,
286 		 (unsigned long)mm->key.len, uctx);
287 
288 	return 0;
289 }
290 
291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
292 			     unsigned long len)
293 {
294 	bool found = false;
295 	struct qedr_mm *mm;
296 
297 	mutex_lock(&uctx->mm_list_lock);
298 	list_for_each_entry(mm, &uctx->mm_head, entry) {
299 		if (len != mm->key.len || phy_addr != mm->key.phy_addr)
300 			continue;
301 
302 		found = true;
303 		break;
304 	}
305 	mutex_unlock(&uctx->mm_list_lock);
306 	DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
307 		 "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
308 		 mm->key.phy_addr, mm->key.len, uctx, found);
309 
310 	return found;
311 }
312 
313 int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
314 {
315 	struct ib_device *ibdev = uctx->device;
316 	int rc;
317 	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
318 	struct qedr_alloc_ucontext_resp uresp = {};
319 	struct qedr_dev *dev = get_qedr_dev(ibdev);
320 	struct qed_rdma_add_user_out_params oparams;
321 
322 	if (!udata)
323 		return -EFAULT;
324 
325 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
326 	if (rc) {
327 		DP_ERR(dev,
328 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
329 		       rc);
330 		return rc;
331 	}
332 
333 	ctx->dpi = oparams.dpi;
334 	ctx->dpi_addr = oparams.dpi_addr;
335 	ctx->dpi_phys_addr = oparams.dpi_phys_addr;
336 	ctx->dpi_size = oparams.dpi_size;
337 	INIT_LIST_HEAD(&ctx->mm_head);
338 	mutex_init(&ctx->mm_list_lock);
339 
340 	uresp.dpm_enabled = dev->user_dpm_enabled;
341 	uresp.wids_enabled = 1;
342 	uresp.wid_count = oparams.wid_count;
343 	uresp.db_pa = ctx->dpi_phys_addr;
344 	uresp.db_size = ctx->dpi_size;
345 	uresp.max_send_wr = dev->attr.max_sqe;
346 	uresp.max_recv_wr = dev->attr.max_rqe;
347 	uresp.max_srq_wr = dev->attr.max_srq_wr;
348 	uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
349 	uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
350 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
351 	uresp.max_cqes = QEDR_MAX_CQES;
352 
353 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
354 	if (rc)
355 		return rc;
356 
357 	ctx->dev = dev;
358 
359 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
360 	if (rc)
361 		return rc;
362 
363 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
364 		 &ctx->ibucontext);
365 	return 0;
366 }
367 
368 void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
369 {
370 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
371 	struct qedr_mm *mm, *tmp;
372 
373 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
374 		 uctx);
375 	uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
376 
377 	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
378 		DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
379 			 "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
380 			 mm->key.phy_addr, mm->key.len, uctx);
381 		list_del(&mm->entry);
382 		kfree(mm);
383 	}
384 }
385 
386 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
387 {
388 	struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
389 	struct qedr_dev *dev = get_qedr_dev(context->device);
390 	unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
391 	unsigned long len = (vma->vm_end - vma->vm_start);
392 	unsigned long dpi_start;
393 
394 	dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
395 
396 	DP_DEBUG(dev, QEDR_MSG_INIT,
397 		 "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
398 		 (void *)vma->vm_start, (void *)vma->vm_end,
399 		 (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
400 
401 	if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
402 		DP_ERR(dev,
403 		       "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
404 		       (void *)vma->vm_start, (void *)vma->vm_end);
405 		return -EINVAL;
406 	}
407 
408 	if (!qedr_search_mmap(ucontext, phys_addr, len)) {
409 		DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
410 		       vma->vm_pgoff);
411 		return -EINVAL;
412 	}
413 
414 	if (phys_addr < dpi_start ||
415 	    ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
416 		DP_ERR(dev,
417 		       "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
418 		       (void *)phys_addr, (void *)dpi_start,
419 		       ucontext->dpi_size);
420 		return -EINVAL;
421 	}
422 
423 	if (vma->vm_flags & VM_READ) {
424 		DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
425 		return -EINVAL;
426 	}
427 
428 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
429 	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
430 				  vma->vm_page_prot);
431 }
432 
433 int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
434 {
435 	struct ib_device *ibdev = ibpd->device;
436 	struct qedr_dev *dev = get_qedr_dev(ibdev);
437 	struct qedr_pd *pd = get_qedr_pd(ibpd);
438 	u16 pd_id;
439 	int rc;
440 
441 	DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
442 		 udata ? "User Lib" : "Kernel");
443 
444 	if (!dev->rdma_ctx) {
445 		DP_ERR(dev, "invalid RDMA context\n");
446 		return -EINVAL;
447 	}
448 
449 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
450 	if (rc)
451 		return rc;
452 
453 	pd->pd_id = pd_id;
454 
455 	if (udata) {
456 		struct qedr_alloc_pd_uresp uresp = {
457 			.pd_id = pd_id,
458 		};
459 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
460 			udata, struct qedr_ucontext, ibucontext);
461 
462 		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
463 		if (rc) {
464 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
465 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
466 			return rc;
467 		}
468 
469 		pd->uctx = context;
470 		pd->uctx->pd = pd;
471 	}
472 
473 	return 0;
474 }
475 
476 void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
477 {
478 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
479 	struct qedr_pd *pd = get_qedr_pd(ibpd);
480 
481 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
482 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
483 }
484 
485 static void qedr_free_pbl(struct qedr_dev *dev,
486 			  struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
487 {
488 	struct pci_dev *pdev = dev->pdev;
489 	int i;
490 
491 	for (i = 0; i < pbl_info->num_pbls; i++) {
492 		if (!pbl[i].va)
493 			continue;
494 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
495 				  pbl[i].va, pbl[i].pa);
496 	}
497 
498 	kfree(pbl);
499 }
500 
501 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
502 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
503 
504 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
505 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
506 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
507 
508 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
509 					   struct qedr_pbl_info *pbl_info,
510 					   gfp_t flags)
511 {
512 	struct pci_dev *pdev = dev->pdev;
513 	struct qedr_pbl *pbl_table;
514 	dma_addr_t *pbl_main_tbl;
515 	dma_addr_t pa;
516 	void *va;
517 	int i;
518 
519 	pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
520 	if (!pbl_table)
521 		return ERR_PTR(-ENOMEM);
522 
523 	for (i = 0; i < pbl_info->num_pbls; i++) {
524 		va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, &pa,
525 					flags);
526 		if (!va)
527 			goto err;
528 
529 		pbl_table[i].va = va;
530 		pbl_table[i].pa = pa;
531 	}
532 
533 	/* Two-Layer PBLs, if we have more than one pbl we need to initialize
534 	 * the first one with physical pointers to all of the rest
535 	 */
536 	pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
537 	for (i = 0; i < pbl_info->num_pbls - 1; i++)
538 		pbl_main_tbl[i] = pbl_table[i + 1].pa;
539 
540 	return pbl_table;
541 
542 err:
543 	for (i--; i >= 0; i--)
544 		dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
545 				  pbl_table[i].va, pbl_table[i].pa);
546 
547 	qedr_free_pbl(dev, pbl_info, pbl_table);
548 
549 	return ERR_PTR(-ENOMEM);
550 }
551 
552 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
553 				struct qedr_pbl_info *pbl_info,
554 				u32 num_pbes, int two_layer_capable)
555 {
556 	u32 pbl_capacity;
557 	u32 pbl_size;
558 	u32 num_pbls;
559 
560 	if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
561 		if (num_pbes > MAX_PBES_TWO_LAYER) {
562 			DP_ERR(dev, "prepare pbl table: too many pages %d\n",
563 			       num_pbes);
564 			return -EINVAL;
565 		}
566 
567 		/* calculate required pbl page size */
568 		pbl_size = MIN_FW_PBL_PAGE_SIZE;
569 		pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
570 			       NUM_PBES_ON_PAGE(pbl_size);
571 
572 		while (pbl_capacity < num_pbes) {
573 			pbl_size *= 2;
574 			pbl_capacity = pbl_size / sizeof(u64);
575 			pbl_capacity = pbl_capacity * pbl_capacity;
576 		}
577 
578 		num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
579 		num_pbls++;	/* One for the layer0 ( points to the pbls) */
580 		pbl_info->two_layered = true;
581 	} else {
582 		/* One layered PBL */
583 		num_pbls = 1;
584 		pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
585 				 roundup_pow_of_two((num_pbes * sizeof(u64))));
586 		pbl_info->two_layered = false;
587 	}
588 
589 	pbl_info->num_pbls = num_pbls;
590 	pbl_info->pbl_size = pbl_size;
591 	pbl_info->num_pbes = num_pbes;
592 
593 	DP_DEBUG(dev, QEDR_MSG_MR,
594 		 "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
595 		 pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
596 
597 	return 0;
598 }
599 
600 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
601 			       struct qedr_pbl *pbl,
602 			       struct qedr_pbl_info *pbl_info, u32 pg_shift)
603 {
604 	int pbe_cnt, total_num_pbes = 0;
605 	u32 fw_pg_cnt, fw_pg_per_umem_pg;
606 	struct qedr_pbl *pbl_tbl;
607 	struct sg_dma_page_iter sg_iter;
608 	struct regpair *pbe;
609 	u64 pg_addr;
610 
611 	if (!pbl_info->num_pbes)
612 		return;
613 
614 	/* If we have a two layered pbl, the first pbl points to the rest
615 	 * of the pbls and the first entry lays on the second pbl in the table
616 	 */
617 	if (pbl_info->two_layered)
618 		pbl_tbl = &pbl[1];
619 	else
620 		pbl_tbl = pbl;
621 
622 	pbe = (struct regpair *)pbl_tbl->va;
623 	if (!pbe) {
624 		DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
625 		return;
626 	}
627 
628 	pbe_cnt = 0;
629 
630 	fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift);
631 
632 	for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
633 		pg_addr = sg_page_iter_dma_address(&sg_iter);
634 		for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
635 			pbe->lo = cpu_to_le32(pg_addr);
636 			pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
637 
638 			pg_addr += BIT(pg_shift);
639 			pbe_cnt++;
640 			total_num_pbes++;
641 			pbe++;
642 
643 			if (total_num_pbes == pbl_info->num_pbes)
644 				return;
645 
646 			/* If the given pbl is full storing the pbes,
647 			 * move to next pbl.
648 			 */
649 			if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
650 				pbl_tbl++;
651 				pbe = (struct regpair *)pbl_tbl->va;
652 				pbe_cnt = 0;
653 			}
654 
655 			fw_pg_cnt++;
656 		}
657 	}
658 }
659 
660 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
661 			      struct qedr_cq *cq, struct ib_udata *udata)
662 {
663 	struct qedr_create_cq_uresp uresp;
664 	int rc;
665 
666 	memset(&uresp, 0, sizeof(uresp));
667 
668 	uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
669 	uresp.icid = cq->icid;
670 
671 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
672 	if (rc)
673 		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
674 
675 	return rc;
676 }
677 
678 static void consume_cqe(struct qedr_cq *cq)
679 {
680 	if (cq->latest_cqe == cq->toggle_cqe)
681 		cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
682 
683 	cq->latest_cqe = qed_chain_consume(&cq->pbl);
684 }
685 
686 static inline int qedr_align_cq_entries(int entries)
687 {
688 	u64 size, aligned_size;
689 
690 	/* We allocate an extra entry that we don't report to the FW. */
691 	size = (entries + 1) * QEDR_CQE_SIZE;
692 	aligned_size = ALIGN(size, PAGE_SIZE);
693 
694 	return aligned_size / QEDR_CQE_SIZE;
695 }
696 
697 static inline int qedr_init_user_queue(struct ib_udata *udata,
698 				       struct qedr_dev *dev,
699 				       struct qedr_userq *q, u64 buf_addr,
700 				       size_t buf_len, int access, int dmasync,
701 				       int alloc_and_init)
702 {
703 	u32 fw_pages;
704 	int rc;
705 
706 	q->buf_addr = buf_addr;
707 	q->buf_len = buf_len;
708 	q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync);
709 	if (IS_ERR(q->umem)) {
710 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
711 		       PTR_ERR(q->umem));
712 		return PTR_ERR(q->umem);
713 	}
714 
715 	fw_pages = ib_umem_page_count(q->umem) <<
716 	    (PAGE_SHIFT - FW_PAGE_SHIFT);
717 
718 	rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
719 	if (rc)
720 		goto err0;
721 
722 	if (alloc_and_init) {
723 		q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
724 		if (IS_ERR(q->pbl_tbl)) {
725 			rc = PTR_ERR(q->pbl_tbl);
726 			goto err0;
727 		}
728 		qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
729 				   FW_PAGE_SHIFT);
730 	} else {
731 		q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
732 		if (!q->pbl_tbl) {
733 			rc = -ENOMEM;
734 			goto err0;
735 		}
736 	}
737 
738 	return 0;
739 
740 err0:
741 	ib_umem_release(q->umem);
742 	q->umem = NULL;
743 
744 	return rc;
745 }
746 
747 static inline void qedr_init_cq_params(struct qedr_cq *cq,
748 				       struct qedr_ucontext *ctx,
749 				       struct qedr_dev *dev, int vector,
750 				       int chain_entries, int page_cnt,
751 				       u64 pbl_ptr,
752 				       struct qed_rdma_create_cq_in_params
753 				       *params)
754 {
755 	memset(params, 0, sizeof(*params));
756 	params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
757 	params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
758 	params->cnq_id = vector;
759 	params->cq_size = chain_entries - 1;
760 	params->dpi = (ctx) ? ctx->dpi : dev->dpi;
761 	params->pbl_num_pages = page_cnt;
762 	params->pbl_ptr = pbl_ptr;
763 	params->pbl_two_level = 0;
764 }
765 
766 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
767 {
768 	cq->db.data.agg_flags = flags;
769 	cq->db.data.value = cpu_to_le32(cons);
770 	writeq(cq->db.raw, cq->db_addr);
771 }
772 
773 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
774 {
775 	struct qedr_cq *cq = get_qedr_cq(ibcq);
776 	unsigned long sflags;
777 	struct qedr_dev *dev;
778 
779 	dev = get_qedr_dev(ibcq->device);
780 
781 	if (cq->destroyed) {
782 		DP_ERR(dev,
783 		       "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
784 		       cq, cq->icid);
785 		return -EINVAL;
786 	}
787 
788 
789 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
790 		return 0;
791 
792 	spin_lock_irqsave(&cq->cq_lock, sflags);
793 
794 	cq->arm_flags = 0;
795 
796 	if (flags & IB_CQ_SOLICITED)
797 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
798 
799 	if (flags & IB_CQ_NEXT_COMP)
800 		cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
801 
802 	doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
803 
804 	spin_unlock_irqrestore(&cq->cq_lock, sflags);
805 
806 	return 0;
807 }
808 
809 int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
810 		   struct ib_udata *udata)
811 {
812 	struct ib_device *ibdev = ibcq->device;
813 	struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
814 		udata, struct qedr_ucontext, ibucontext);
815 	struct qed_rdma_destroy_cq_out_params destroy_oparams;
816 	struct qed_rdma_destroy_cq_in_params destroy_iparams;
817 	struct qedr_dev *dev = get_qedr_dev(ibdev);
818 	struct qed_rdma_create_cq_in_params params;
819 	struct qedr_create_cq_ureq ureq = {};
820 	int vector = attr->comp_vector;
821 	int entries = attr->cqe;
822 	struct qedr_cq *cq = get_qedr_cq(ibcq);
823 	int chain_entries;
824 	int page_cnt;
825 	u64 pbl_ptr;
826 	u16 icid;
827 	int rc;
828 
829 	DP_DEBUG(dev, QEDR_MSG_INIT,
830 		 "create_cq: called from %s. entries=%d, vector=%d\n",
831 		 udata ? "User Lib" : "Kernel", entries, vector);
832 
833 	if (entries > QEDR_MAX_CQES) {
834 		DP_ERR(dev,
835 		       "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
836 		       entries, QEDR_MAX_CQES);
837 		return -EINVAL;
838 	}
839 
840 	chain_entries = qedr_align_cq_entries(entries);
841 	chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
842 
843 	if (udata) {
844 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
845 			DP_ERR(dev,
846 			       "create cq: problem copying data from user space\n");
847 			goto err0;
848 		}
849 
850 		if (!ureq.len) {
851 			DP_ERR(dev,
852 			       "create cq: cannot create a cq with 0 entries\n");
853 			goto err0;
854 		}
855 
856 		cq->cq_type = QEDR_CQ_TYPE_USER;
857 
858 		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
859 					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1,
860 					  1);
861 		if (rc)
862 			goto err0;
863 
864 		pbl_ptr = cq->q.pbl_tbl->pa;
865 		page_cnt = cq->q.pbl_info.num_pbes;
866 
867 		cq->ibcq.cqe = chain_entries;
868 	} else {
869 		cq->cq_type = QEDR_CQ_TYPE_KERNEL;
870 
871 		rc = dev->ops->common->chain_alloc(dev->cdev,
872 						   QED_CHAIN_USE_TO_CONSUME,
873 						   QED_CHAIN_MODE_PBL,
874 						   QED_CHAIN_CNT_TYPE_U32,
875 						   chain_entries,
876 						   sizeof(union rdma_cqe),
877 						   &cq->pbl, NULL);
878 		if (rc)
879 			goto err1;
880 
881 		page_cnt = qed_chain_get_page_cnt(&cq->pbl);
882 		pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
883 		cq->ibcq.cqe = cq->pbl.capacity;
884 	}
885 
886 	qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
887 			    pbl_ptr, &params);
888 
889 	rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
890 	if (rc)
891 		goto err2;
892 
893 	cq->icid = icid;
894 	cq->sig = QEDR_CQ_MAGIC_NUMBER;
895 	spin_lock_init(&cq->cq_lock);
896 
897 	if (udata) {
898 		rc = qedr_copy_cq_uresp(dev, cq, udata);
899 		if (rc)
900 			goto err3;
901 	} else {
902 		/* Generate doorbell address. */
903 		cq->db_addr = dev->db_addr +
904 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
905 		cq->db.data.icid = cq->icid;
906 		cq->db.data.params = DB_AGG_CMD_SET <<
907 		    RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
908 
909 		/* point to the very last element, passing it we will toggle */
910 		cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
911 		cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
912 		cq->latest_cqe = NULL;
913 		consume_cqe(cq);
914 		cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
915 	}
916 
917 	DP_DEBUG(dev, QEDR_MSG_CQ,
918 		 "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
919 		 cq->icid, cq, params.cq_size);
920 
921 	return 0;
922 
923 err3:
924 	destroy_iparams.icid = cq->icid;
925 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
926 				  &destroy_oparams);
927 err2:
928 	if (udata)
929 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
930 	else
931 		dev->ops->common->chain_free(dev->cdev, &cq->pbl);
932 err1:
933 	if (udata)
934 		ib_umem_release(cq->q.umem);
935 err0:
936 	return -EINVAL;
937 }
938 
939 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
940 {
941 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
942 	struct qedr_cq *cq = get_qedr_cq(ibcq);
943 
944 	DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
945 
946 	return 0;
947 }
948 
949 #define QEDR_DESTROY_CQ_MAX_ITERATIONS		(10)
950 #define QEDR_DESTROY_CQ_ITER_DURATION		(10)
951 
952 void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
953 {
954 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
955 	struct qed_rdma_destroy_cq_out_params oparams;
956 	struct qed_rdma_destroy_cq_in_params iparams;
957 	struct qedr_cq *cq = get_qedr_cq(ibcq);
958 	int iter;
959 
960 	DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
961 
962 	cq->destroyed = 1;
963 
964 	/* GSIs CQs are handled by driver, so they don't exist in the FW */
965 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
966 		return;
967 
968 	iparams.icid = cq->icid;
969 	dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
970 	dev->ops->common->chain_free(dev->cdev, &cq->pbl);
971 
972 	if (udata) {
973 		qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
974 		ib_umem_release(cq->q.umem);
975 	}
976 
977 	/* We don't want the IRQ handler to handle a non-existing CQ so we
978 	 * wait until all CNQ interrupts, if any, are received. This will always
979 	 * happen and will always happen very fast. If not, then a serious error
980 	 * has occured. That is why we can use a long delay.
981 	 * We spin for a short time so we don’t lose time on context switching
982 	 * in case all the completions are handled in that span. Otherwise
983 	 * we sleep for a while and check again. Since the CNQ may be
984 	 * associated with (only) the current CPU we use msleep to allow the
985 	 * current CPU to be freed.
986 	 * The CNQ notification is increased in qedr_irq_handler().
987 	 */
988 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
989 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
990 		udelay(QEDR_DESTROY_CQ_ITER_DURATION);
991 		iter--;
992 	}
993 
994 	iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
995 	while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
996 		msleep(QEDR_DESTROY_CQ_ITER_DURATION);
997 		iter--;
998 	}
999 
1000 	/* Note that we don't need to have explicit code to wait for the
1001 	 * completion of the event handler because it is invoked from the EQ.
1002 	 * Since the destroy CQ ramrod has also been received on the EQ we can
1003 	 * be certain that there's no event handler in process.
1004 	 */
1005 }
1006 
1007 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1008 					  struct ib_qp_attr *attr,
1009 					  int attr_mask,
1010 					  struct qed_rdma_modify_qp_in_params
1011 					  *qp_params)
1012 {
1013 	const struct ib_gid_attr *gid_attr;
1014 	enum rdma_network_type nw_type;
1015 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1016 	u32 ipv4_addr;
1017 	int ret;
1018 	int i;
1019 
1020 	gid_attr = grh->sgid_attr;
1021 	ret = rdma_read_gid_l2_fields(gid_attr, &qp_params->vlan_id, NULL);
1022 	if (ret)
1023 		return ret;
1024 
1025 	nw_type = rdma_gid_attr_network_type(gid_attr);
1026 	switch (nw_type) {
1027 	case RDMA_NETWORK_IPV6:
1028 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1029 		       sizeof(qp_params->sgid));
1030 		memcpy(&qp_params->dgid.bytes[0],
1031 		       &grh->dgid,
1032 		       sizeof(qp_params->dgid));
1033 		qp_params->roce_mode = ROCE_V2_IPV6;
1034 		SET_FIELD(qp_params->modify_flags,
1035 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1036 		break;
1037 	case RDMA_NETWORK_IB:
1038 		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
1039 		       sizeof(qp_params->sgid));
1040 		memcpy(&qp_params->dgid.bytes[0],
1041 		       &grh->dgid,
1042 		       sizeof(qp_params->dgid));
1043 		qp_params->roce_mode = ROCE_V1;
1044 		break;
1045 	case RDMA_NETWORK_IPV4:
1046 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1047 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1048 		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
1049 		qp_params->sgid.ipv4_addr = ipv4_addr;
1050 		ipv4_addr =
1051 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
1052 		qp_params->dgid.ipv4_addr = ipv4_addr;
1053 		SET_FIELD(qp_params->modify_flags,
1054 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1055 		qp_params->roce_mode = ROCE_V2_IPV4;
1056 		break;
1057 	}
1058 
1059 	for (i = 0; i < 4; i++) {
1060 		qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1061 		qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1062 	}
1063 
1064 	if (qp_params->vlan_id >= VLAN_CFI_MASK)
1065 		qp_params->vlan_id = 0;
1066 
1067 	return 0;
1068 }
1069 
1070 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1071 			       struct ib_qp_init_attr *attrs,
1072 			       struct ib_udata *udata)
1073 {
1074 	struct qedr_device_attr *qattr = &dev->attr;
1075 
1076 	/* QP0... attrs->qp_type == IB_QPT_GSI */
1077 	if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1078 		DP_DEBUG(dev, QEDR_MSG_QP,
1079 			 "create qp: unsupported qp type=0x%x requested\n",
1080 			 attrs->qp_type);
1081 		return -EINVAL;
1082 	}
1083 
1084 	if (attrs->cap.max_send_wr > qattr->max_sqe) {
1085 		DP_ERR(dev,
1086 		       "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1087 		       attrs->cap.max_send_wr, qattr->max_sqe);
1088 		return -EINVAL;
1089 	}
1090 
1091 	if (attrs->cap.max_inline_data > qattr->max_inline) {
1092 		DP_ERR(dev,
1093 		       "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1094 		       attrs->cap.max_inline_data, qattr->max_inline);
1095 		return -EINVAL;
1096 	}
1097 
1098 	if (attrs->cap.max_send_sge > qattr->max_sge) {
1099 		DP_ERR(dev,
1100 		       "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1101 		       attrs->cap.max_send_sge, qattr->max_sge);
1102 		return -EINVAL;
1103 	}
1104 
1105 	if (attrs->cap.max_recv_sge > qattr->max_sge) {
1106 		DP_ERR(dev,
1107 		       "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1108 		       attrs->cap.max_recv_sge, qattr->max_sge);
1109 		return -EINVAL;
1110 	}
1111 
1112 	/* Unprivileged user space cannot create special QP */
1113 	if (udata && attrs->qp_type == IB_QPT_GSI) {
1114 		DP_ERR(dev,
1115 		       "create qp: userspace can't create special QPs of type=0x%x\n",
1116 		       attrs->qp_type);
1117 		return -EINVAL;
1118 	}
1119 
1120 	return 0;
1121 }
1122 
1123 static int qedr_copy_srq_uresp(struct qedr_dev *dev,
1124 			       struct qedr_srq *srq, struct ib_udata *udata)
1125 {
1126 	struct qedr_create_srq_uresp uresp = {};
1127 	int rc;
1128 
1129 	uresp.srq_id = srq->srq_id;
1130 
1131 	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1132 	if (rc)
1133 		DP_ERR(dev, "create srq: problem copying data to user space\n");
1134 
1135 	return rc;
1136 }
1137 
1138 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1139 			       struct qedr_create_qp_uresp *uresp,
1140 			       struct qedr_qp *qp)
1141 {
1142 	/* iWARP requires two doorbells per RQ. */
1143 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1144 		uresp->rq_db_offset =
1145 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1146 		uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1147 	} else {
1148 		uresp->rq_db_offset =
1149 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1150 	}
1151 
1152 	uresp->rq_icid = qp->icid;
1153 }
1154 
1155 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1156 			       struct qedr_create_qp_uresp *uresp,
1157 			       struct qedr_qp *qp)
1158 {
1159 	uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1160 
1161 	/* iWARP uses the same cid for rq and sq */
1162 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1163 		uresp->sq_icid = qp->icid;
1164 	else
1165 		uresp->sq_icid = qp->icid + 1;
1166 }
1167 
1168 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1169 			      struct qedr_qp *qp, struct ib_udata *udata)
1170 {
1171 	struct qedr_create_qp_uresp uresp;
1172 	int rc;
1173 
1174 	memset(&uresp, 0, sizeof(uresp));
1175 	qedr_copy_sq_uresp(dev, &uresp, qp);
1176 	qedr_copy_rq_uresp(dev, &uresp, qp);
1177 
1178 	uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1179 	uresp.qp_id = qp->qp_id;
1180 
1181 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1182 	if (rc)
1183 		DP_ERR(dev,
1184 		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
1185 		       qp->icid);
1186 
1187 	return rc;
1188 }
1189 
1190 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1191 				      struct qedr_qp *qp,
1192 				      struct qedr_pd *pd,
1193 				      struct ib_qp_init_attr *attrs)
1194 {
1195 	spin_lock_init(&qp->q_lock);
1196 	atomic_set(&qp->refcnt, 1);
1197 	qp->pd = pd;
1198 	qp->qp_type = attrs->qp_type;
1199 	qp->max_inline_data = attrs->cap.max_inline_data;
1200 	qp->sq.max_sges = attrs->cap.max_send_sge;
1201 	qp->state = QED_ROCE_QP_STATE_RESET;
1202 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1203 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
1204 	qp->dev = dev;
1205 
1206 	if (attrs->srq) {
1207 		qp->srq = get_qedr_srq(attrs->srq);
1208 	} else {
1209 		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1210 		qp->rq.max_sges = attrs->cap.max_recv_sge;
1211 		DP_DEBUG(dev, QEDR_MSG_QP,
1212 			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1213 			 qp->rq.max_sges, qp->rq_cq->icid);
1214 	}
1215 
1216 	DP_DEBUG(dev, QEDR_MSG_QP,
1217 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1218 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
1219 		 qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1220 	DP_DEBUG(dev, QEDR_MSG_QP,
1221 		 "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1222 		 qp->sq.max_sges, qp->sq_cq->icid);
1223 }
1224 
1225 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1226 {
1227 	qp->sq.db = dev->db_addr +
1228 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1229 	qp->sq.db_data.data.icid = qp->icid + 1;
1230 	if (!qp->srq) {
1231 		qp->rq.db = dev->db_addr +
1232 			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1233 		qp->rq.db_data.data.icid = qp->icid;
1234 	}
1235 }
1236 
1237 static int qedr_check_srq_params(struct qedr_dev *dev,
1238 				 struct ib_srq_init_attr *attrs,
1239 				 struct ib_udata *udata)
1240 {
1241 	struct qedr_device_attr *qattr = &dev->attr;
1242 
1243 	if (attrs->attr.max_wr > qattr->max_srq_wr) {
1244 		DP_ERR(dev,
1245 		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
1246 		       attrs->attr.max_wr, qattr->max_srq_wr);
1247 		return -EINVAL;
1248 	}
1249 
1250 	if (attrs->attr.max_sge > qattr->max_sge) {
1251 		DP_ERR(dev,
1252 		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
1253 		       attrs->attr.max_sge, qattr->max_sge);
1254 		return -EINVAL;
1255 	}
1256 
1257 	return 0;
1258 }
1259 
1260 static void qedr_free_srq_user_params(struct qedr_srq *srq)
1261 {
1262 	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1263 	ib_umem_release(srq->usrq.umem);
1264 	ib_umem_release(srq->prod_umem);
1265 }
1266 
1267 static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
1268 {
1269 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1270 	struct qedr_dev *dev = srq->dev;
1271 
1272 	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
1273 
1274 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1275 			  hw_srq->virt_prod_pair_addr,
1276 			  hw_srq->phy_prod_pair_addr);
1277 }
1278 
1279 static int qedr_init_srq_user_params(struct ib_udata *udata,
1280 				     struct qedr_srq *srq,
1281 				     struct qedr_create_srq_ureq *ureq,
1282 				     int access, int dmasync)
1283 {
1284 	struct scatterlist *sg;
1285 	int rc;
1286 
1287 	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
1288 				  ureq->srq_len, access, dmasync, 1);
1289 	if (rc)
1290 		return rc;
1291 
1292 	srq->prod_umem =
1293 		ib_umem_get(udata, ureq->prod_pair_addr,
1294 			    sizeof(struct rdma_srq_producers), access, dmasync);
1295 	if (IS_ERR(srq->prod_umem)) {
1296 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
1297 		ib_umem_release(srq->usrq.umem);
1298 		DP_ERR(srq->dev,
1299 		       "create srq: failed ib_umem_get for producer, got %ld\n",
1300 		       PTR_ERR(srq->prod_umem));
1301 		return PTR_ERR(srq->prod_umem);
1302 	}
1303 
1304 	sg = srq->prod_umem->sg_head.sgl;
1305 	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
1306 
1307 	return 0;
1308 }
1309 
1310 static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
1311 					struct qedr_dev *dev,
1312 					struct ib_srq_init_attr *init_attr)
1313 {
1314 	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
1315 	dma_addr_t phy_prod_pair_addr;
1316 	u32 num_elems;
1317 	void *va;
1318 	int rc;
1319 
1320 	va = dma_alloc_coherent(&dev->pdev->dev,
1321 				sizeof(struct rdma_srq_producers),
1322 				&phy_prod_pair_addr, GFP_KERNEL);
1323 	if (!va) {
1324 		DP_ERR(dev,
1325 		       "create srq: failed to allocate dma memory for producer\n");
1326 		return -ENOMEM;
1327 	}
1328 
1329 	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
1330 	hw_srq->virt_prod_pair_addr = va;
1331 
1332 	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
1333 	rc = dev->ops->common->chain_alloc(dev->cdev,
1334 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1335 					   QED_CHAIN_MODE_PBL,
1336 					   QED_CHAIN_CNT_TYPE_U32,
1337 					   num_elems,
1338 					   QEDR_SRQ_WQE_ELEM_SIZE,
1339 					   &hw_srq->pbl, NULL);
1340 	if (rc)
1341 		goto err0;
1342 
1343 	hw_srq->num_elems = num_elems;
1344 
1345 	return 0;
1346 
1347 err0:
1348 	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
1349 			  va, phy_prod_pair_addr);
1350 	return rc;
1351 }
1352 
1353 int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1354 		    struct ib_udata *udata)
1355 {
1356 	struct qed_rdma_destroy_srq_in_params destroy_in_params;
1357 	struct qed_rdma_create_srq_in_params in_params = {};
1358 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1359 	struct qed_rdma_create_srq_out_params out_params;
1360 	struct qedr_pd *pd = get_qedr_pd(ibsrq->pd);
1361 	struct qedr_create_srq_ureq ureq = {};
1362 	u64 pbl_base_addr, phy_prod_pair_addr;
1363 	struct qedr_srq_hwq_info *hw_srq;
1364 	u32 page_cnt, page_size;
1365 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1366 	int rc = 0;
1367 
1368 	DP_DEBUG(dev, QEDR_MSG_QP,
1369 		 "create SRQ called from %s (pd %p)\n",
1370 		 (udata) ? "User lib" : "kernel", pd);
1371 
1372 	rc = qedr_check_srq_params(dev, init_attr, udata);
1373 	if (rc)
1374 		return -EINVAL;
1375 
1376 	srq->dev = dev;
1377 	hw_srq = &srq->hw_srq;
1378 	spin_lock_init(&srq->lock);
1379 
1380 	hw_srq->max_wr = init_attr->attr.max_wr;
1381 	hw_srq->max_sges = init_attr->attr.max_sge;
1382 
1383 	if (udata) {
1384 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
1385 			DP_ERR(dev,
1386 			       "create srq: problem copying data from user space\n");
1387 			goto err0;
1388 		}
1389 
1390 		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0);
1391 		if (rc)
1392 			goto err0;
1393 
1394 		page_cnt = srq->usrq.pbl_info.num_pbes;
1395 		pbl_base_addr = srq->usrq.pbl_tbl->pa;
1396 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1397 		page_size = PAGE_SIZE;
1398 	} else {
1399 		struct qed_chain *pbl;
1400 
1401 		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
1402 		if (rc)
1403 			goto err0;
1404 
1405 		pbl = &hw_srq->pbl;
1406 		page_cnt = qed_chain_get_page_cnt(pbl);
1407 		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
1408 		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
1409 		page_size = QED_CHAIN_PAGE_SIZE;
1410 	}
1411 
1412 	in_params.pd_id = pd->pd_id;
1413 	in_params.pbl_base_addr = pbl_base_addr;
1414 	in_params.prod_pair_addr = phy_prod_pair_addr;
1415 	in_params.num_pages = page_cnt;
1416 	in_params.page_size = page_size;
1417 
1418 	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
1419 	if (rc)
1420 		goto err1;
1421 
1422 	srq->srq_id = out_params.srq_id;
1423 
1424 	if (udata) {
1425 		rc = qedr_copy_srq_uresp(dev, srq, udata);
1426 		if (rc)
1427 			goto err2;
1428 	}
1429 
1430 	rc = xa_insert_irq(&dev->srqs, srq->srq_id, srq, GFP_KERNEL);
1431 	if (rc)
1432 		goto err2;
1433 
1434 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1435 		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
1436 	return 0;
1437 
1438 err2:
1439 	destroy_in_params.srq_id = srq->srq_id;
1440 
1441 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
1442 err1:
1443 	if (udata)
1444 		qedr_free_srq_user_params(srq);
1445 	else
1446 		qedr_free_srq_kernel_params(srq);
1447 err0:
1448 	return -EFAULT;
1449 }
1450 
1451 void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1452 {
1453 	struct qed_rdma_destroy_srq_in_params in_params = {};
1454 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1455 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1456 
1457 	xa_erase_irq(&dev->srqs, srq->srq_id);
1458 	in_params.srq_id = srq->srq_id;
1459 	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
1460 
1461 	if (ibsrq->uobject)
1462 		qedr_free_srq_user_params(srq);
1463 	else
1464 		qedr_free_srq_kernel_params(srq);
1465 
1466 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1467 		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
1468 		 srq->srq_id);
1469 }
1470 
1471 int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
1472 		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
1473 {
1474 	struct qed_rdma_modify_srq_in_params in_params = {};
1475 	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
1476 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
1477 	int rc;
1478 
1479 	if (attr_mask & IB_SRQ_MAX_WR) {
1480 		DP_ERR(dev,
1481 		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
1482 		       attr_mask, srq);
1483 		return -EINVAL;
1484 	}
1485 
1486 	if (attr_mask & IB_SRQ_LIMIT) {
1487 		if (attr->srq_limit >= srq->hw_srq.max_wr) {
1488 			DP_ERR(dev,
1489 			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
1490 			       attr->srq_limit, srq->hw_srq.max_wr);
1491 			return -EINVAL;
1492 		}
1493 
1494 		in_params.srq_id = srq->srq_id;
1495 		in_params.wqe_limit = attr->srq_limit;
1496 		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
1497 		if (rc)
1498 			return rc;
1499 	}
1500 
1501 	srq->srq_limit = attr->srq_limit;
1502 
1503 	DP_DEBUG(dev, QEDR_MSG_SRQ,
1504 		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
1505 
1506 	return 0;
1507 }
1508 
1509 static inline void
1510 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1511 			      struct qedr_pd *pd,
1512 			      struct qedr_qp *qp,
1513 			      struct ib_qp_init_attr *attrs,
1514 			      bool fmr_and_reserved_lkey,
1515 			      struct qed_rdma_create_qp_in_params *params)
1516 {
1517 	/* QP handle to be written in an async event */
1518 	params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1519 	params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1520 
1521 	params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1522 	params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1523 	params->pd = pd->pd_id;
1524 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1525 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1526 	params->stats_queue = 0;
1527 	params->srq_id = 0;
1528 	params->use_srq = false;
1529 
1530 	if (!qp->srq) {
1531 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1532 
1533 	} else {
1534 		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1535 		params->srq_id = qp->srq->srq_id;
1536 		params->use_srq = true;
1537 	}
1538 }
1539 
1540 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1541 {
1542 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1543 		 "qp=%p. "
1544 		 "sq_addr=0x%llx, "
1545 		 "sq_len=%zd, "
1546 		 "rq_addr=0x%llx, "
1547 		 "rq_len=%zd"
1548 		 "\n",
1549 		 qp,
1550 		 qp->usq.buf_addr,
1551 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1552 }
1553 
1554 static inline void
1555 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1556 			    struct qedr_qp *qp,
1557 			    struct qed_rdma_create_qp_out_params *out_params)
1558 {
1559 	qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1560 	qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1561 
1562 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1563 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
1564 	if (!qp->srq) {
1565 		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1566 		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1567 	}
1568 
1569 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1570 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
1571 }
1572 
1573 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1574 {
1575 	ib_umem_release(qp->usq.umem);
1576 	qp->usq.umem = NULL;
1577 
1578 	ib_umem_release(qp->urq.umem);
1579 	qp->urq.umem = NULL;
1580 }
1581 
1582 static int qedr_create_user_qp(struct qedr_dev *dev,
1583 			       struct qedr_qp *qp,
1584 			       struct ib_pd *ibpd,
1585 			       struct ib_udata *udata,
1586 			       struct ib_qp_init_attr *attrs)
1587 {
1588 	struct qed_rdma_create_qp_in_params in_params;
1589 	struct qed_rdma_create_qp_out_params out_params;
1590 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1591 	struct qedr_create_qp_ureq ureq;
1592 	int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1593 	int rc = -EINVAL;
1594 
1595 	memset(&ureq, 0, sizeof(ureq));
1596 	rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1597 	if (rc) {
1598 		DP_ERR(dev, "Problem copying data from user space\n");
1599 		return rc;
1600 	}
1601 
1602 	/* SQ - read access only (0), dma sync not required (0) */
1603 	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
1604 				  ureq.sq_len, 0, 0, alloc_and_init);
1605 	if (rc)
1606 		return rc;
1607 
1608 	if (!qp->srq) {
1609 		/* RQ - read access only (0), dma sync not required (0) */
1610 		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
1611 					  ureq.rq_len, 0, 0, alloc_and_init);
1612 		if (rc)
1613 			return rc;
1614 	}
1615 
1616 	memset(&in_params, 0, sizeof(in_params));
1617 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1618 	in_params.qp_handle_lo = ureq.qp_handle_lo;
1619 	in_params.qp_handle_hi = ureq.qp_handle_hi;
1620 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1621 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1622 	if (!qp->srq) {
1623 		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1624 		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1625 	}
1626 
1627 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1628 					      &in_params, &out_params);
1629 
1630 	if (!qp->qed_qp) {
1631 		rc = -ENOMEM;
1632 		goto err1;
1633 	}
1634 
1635 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1636 		qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1637 
1638 	qp->qp_id = out_params.qp_id;
1639 	qp->icid = out_params.icid;
1640 
1641 	rc = qedr_copy_qp_uresp(dev, qp, udata);
1642 	if (rc)
1643 		goto err;
1644 
1645 	qedr_qp_user_print(dev, qp);
1646 
1647 	return 0;
1648 err:
1649 	rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1650 	if (rc)
1651 		DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1652 
1653 err1:
1654 	qedr_cleanup_user(dev, qp);
1655 	return rc;
1656 }
1657 
1658 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1659 {
1660 	qp->sq.db = dev->db_addr +
1661 	    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1662 	qp->sq.db_data.data.icid = qp->icid;
1663 
1664 	qp->rq.db = dev->db_addr +
1665 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1666 	qp->rq.db_data.data.icid = qp->icid;
1667 	qp->rq.iwarp_db2 = dev->db_addr +
1668 			   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1669 	qp->rq.iwarp_db2_data.data.icid = qp->icid;
1670 	qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1671 }
1672 
1673 static int
1674 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1675 			   struct qedr_qp *qp,
1676 			   struct qed_rdma_create_qp_in_params *in_params,
1677 			   u32 n_sq_elems, u32 n_rq_elems)
1678 {
1679 	struct qed_rdma_create_qp_out_params out_params;
1680 	int rc;
1681 
1682 	rc = dev->ops->common->chain_alloc(dev->cdev,
1683 					   QED_CHAIN_USE_TO_PRODUCE,
1684 					   QED_CHAIN_MODE_PBL,
1685 					   QED_CHAIN_CNT_TYPE_U32,
1686 					   n_sq_elems,
1687 					   QEDR_SQE_ELEMENT_SIZE,
1688 					   &qp->sq.pbl, NULL);
1689 
1690 	if (rc)
1691 		return rc;
1692 
1693 	in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1694 	in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1695 
1696 	rc = dev->ops->common->chain_alloc(dev->cdev,
1697 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1698 					   QED_CHAIN_MODE_PBL,
1699 					   QED_CHAIN_CNT_TYPE_U32,
1700 					   n_rq_elems,
1701 					   QEDR_RQE_ELEMENT_SIZE,
1702 					   &qp->rq.pbl, NULL);
1703 	if (rc)
1704 		return rc;
1705 
1706 	in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1707 	in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1708 
1709 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1710 					      in_params, &out_params);
1711 
1712 	if (!qp->qed_qp)
1713 		return -EINVAL;
1714 
1715 	qp->qp_id = out_params.qp_id;
1716 	qp->icid = out_params.icid;
1717 
1718 	qedr_set_roce_db_info(dev, qp);
1719 	return rc;
1720 }
1721 
1722 static int
1723 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1724 			    struct qedr_qp *qp,
1725 			    struct qed_rdma_create_qp_in_params *in_params,
1726 			    u32 n_sq_elems, u32 n_rq_elems)
1727 {
1728 	struct qed_rdma_create_qp_out_params out_params;
1729 	struct qed_chain_ext_pbl ext_pbl;
1730 	int rc;
1731 
1732 	in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1733 						     QEDR_SQE_ELEMENT_SIZE,
1734 						     QED_CHAIN_MODE_PBL);
1735 	in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1736 						     QEDR_RQE_ELEMENT_SIZE,
1737 						     QED_CHAIN_MODE_PBL);
1738 
1739 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1740 					      in_params, &out_params);
1741 
1742 	if (!qp->qed_qp)
1743 		return -EINVAL;
1744 
1745 	/* Now we allocate the chain */
1746 	ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1747 	ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1748 
1749 	rc = dev->ops->common->chain_alloc(dev->cdev,
1750 					   QED_CHAIN_USE_TO_PRODUCE,
1751 					   QED_CHAIN_MODE_PBL,
1752 					   QED_CHAIN_CNT_TYPE_U32,
1753 					   n_sq_elems,
1754 					   QEDR_SQE_ELEMENT_SIZE,
1755 					   &qp->sq.pbl, &ext_pbl);
1756 
1757 	if (rc)
1758 		goto err;
1759 
1760 	ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1761 	ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1762 
1763 	rc = dev->ops->common->chain_alloc(dev->cdev,
1764 					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1765 					   QED_CHAIN_MODE_PBL,
1766 					   QED_CHAIN_CNT_TYPE_U32,
1767 					   n_rq_elems,
1768 					   QEDR_RQE_ELEMENT_SIZE,
1769 					   &qp->rq.pbl, &ext_pbl);
1770 
1771 	if (rc)
1772 		goto err;
1773 
1774 	qp->qp_id = out_params.qp_id;
1775 	qp->icid = out_params.icid;
1776 
1777 	qedr_set_iwarp_db_info(dev, qp);
1778 	return rc;
1779 
1780 err:
1781 	dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1782 
1783 	return rc;
1784 }
1785 
1786 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1787 {
1788 	dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1789 	kfree(qp->wqe_wr_id);
1790 
1791 	dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1792 	kfree(qp->rqe_wr_id);
1793 }
1794 
1795 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1796 				 struct qedr_qp *qp,
1797 				 struct ib_pd *ibpd,
1798 				 struct ib_qp_init_attr *attrs)
1799 {
1800 	struct qed_rdma_create_qp_in_params in_params;
1801 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1802 	int rc = -EINVAL;
1803 	u32 n_rq_elems;
1804 	u32 n_sq_elems;
1805 	u32 n_sq_entries;
1806 
1807 	memset(&in_params, 0, sizeof(in_params));
1808 
1809 	/* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1810 	 * the ring. The ring should allow at least a single WR, even if the
1811 	 * user requested none, due to allocation issues.
1812 	 * We should add an extra WR since the prod and cons indices of
1813 	 * wqe_wr_id are managed in such a way that the WQ is considered full
1814 	 * when (prod+1)%max_wr==cons. We currently don't do that because we
1815 	 * double the number of entries due an iSER issue that pushes far more
1816 	 * WRs than indicated. If we decline its ib_post_send() then we get
1817 	 * error prints in the dmesg we'd like to avoid.
1818 	 */
1819 	qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1820 			      dev->attr.max_sqe);
1821 
1822 	qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
1823 				GFP_KERNEL);
1824 	if (!qp->wqe_wr_id) {
1825 		DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1826 		return -ENOMEM;
1827 	}
1828 
1829 	/* QP handle to be written in CQE */
1830 	in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1831 	in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1832 
1833 	/* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1834 	 * the ring. There ring should allow at least a single WR, even if the
1835 	 * user requested none, due to allocation issues.
1836 	 */
1837 	qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1838 
1839 	/* Allocate driver internal RQ array */
1840 	qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
1841 				GFP_KERNEL);
1842 	if (!qp->rqe_wr_id) {
1843 		DP_ERR(dev,
1844 		       "create qp: failed RQ shadow memory allocation\n");
1845 		kfree(qp->wqe_wr_id);
1846 		return -ENOMEM;
1847 	}
1848 
1849 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1850 
1851 	n_sq_entries = attrs->cap.max_send_wr;
1852 	n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1853 	n_sq_entries = max_t(u32, n_sq_entries, 1);
1854 	n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1855 
1856 	n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1857 
1858 	if (rdma_protocol_iwarp(&dev->ibdev, 1))
1859 		rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1860 						 n_sq_elems, n_rq_elems);
1861 	else
1862 		rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1863 						n_sq_elems, n_rq_elems);
1864 	if (rc)
1865 		qedr_cleanup_kernel(dev, qp);
1866 
1867 	return rc;
1868 }
1869 
1870 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1871 			     struct ib_qp_init_attr *attrs,
1872 			     struct ib_udata *udata)
1873 {
1874 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1875 	struct qedr_pd *pd = get_qedr_pd(ibpd);
1876 	struct qedr_qp *qp;
1877 	struct ib_qp *ibqp;
1878 	int rc = 0;
1879 
1880 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1881 		 udata ? "user library" : "kernel", pd);
1882 
1883 	rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
1884 	if (rc)
1885 		return ERR_PTR(rc);
1886 
1887 	DP_DEBUG(dev, QEDR_MSG_QP,
1888 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1889 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
1890 		 get_qedr_cq(attrs->send_cq),
1891 		 get_qedr_cq(attrs->send_cq)->icid,
1892 		 get_qedr_cq(attrs->recv_cq),
1893 		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
1894 
1895 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1896 	if (!qp) {
1897 		DP_ERR(dev, "create qp: failed allocating memory\n");
1898 		return ERR_PTR(-ENOMEM);
1899 	}
1900 
1901 	qedr_set_common_qp_params(dev, qp, pd, attrs);
1902 
1903 	if (attrs->qp_type == IB_QPT_GSI) {
1904 		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1905 		if (IS_ERR(ibqp))
1906 			kfree(qp);
1907 		return ibqp;
1908 	}
1909 
1910 	if (udata)
1911 		rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1912 	else
1913 		rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1914 
1915 	if (rc)
1916 		goto err;
1917 
1918 	qp->ibqp.qp_num = qp->qp_id;
1919 
1920 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1921 		rc = xa_insert_irq(&dev->qps, qp->qp_id, qp, GFP_KERNEL);
1922 		if (rc)
1923 			goto err;
1924 	}
1925 
1926 	return &qp->ibqp;
1927 
1928 err:
1929 	kfree(qp);
1930 
1931 	return ERR_PTR(-EFAULT);
1932 }
1933 
1934 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1935 {
1936 	switch (qp_state) {
1937 	case QED_ROCE_QP_STATE_RESET:
1938 		return IB_QPS_RESET;
1939 	case QED_ROCE_QP_STATE_INIT:
1940 		return IB_QPS_INIT;
1941 	case QED_ROCE_QP_STATE_RTR:
1942 		return IB_QPS_RTR;
1943 	case QED_ROCE_QP_STATE_RTS:
1944 		return IB_QPS_RTS;
1945 	case QED_ROCE_QP_STATE_SQD:
1946 		return IB_QPS_SQD;
1947 	case QED_ROCE_QP_STATE_ERR:
1948 		return IB_QPS_ERR;
1949 	case QED_ROCE_QP_STATE_SQE:
1950 		return IB_QPS_SQE;
1951 	}
1952 	return IB_QPS_ERR;
1953 }
1954 
1955 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1956 					enum ib_qp_state qp_state)
1957 {
1958 	switch (qp_state) {
1959 	case IB_QPS_RESET:
1960 		return QED_ROCE_QP_STATE_RESET;
1961 	case IB_QPS_INIT:
1962 		return QED_ROCE_QP_STATE_INIT;
1963 	case IB_QPS_RTR:
1964 		return QED_ROCE_QP_STATE_RTR;
1965 	case IB_QPS_RTS:
1966 		return QED_ROCE_QP_STATE_RTS;
1967 	case IB_QPS_SQD:
1968 		return QED_ROCE_QP_STATE_SQD;
1969 	case IB_QPS_ERR:
1970 		return QED_ROCE_QP_STATE_ERR;
1971 	default:
1972 		return QED_ROCE_QP_STATE_ERR;
1973 	}
1974 }
1975 
1976 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1977 {
1978 	qed_chain_reset(&qph->pbl);
1979 	qph->prod = 0;
1980 	qph->cons = 0;
1981 	qph->wqe_cons = 0;
1982 	qph->db_data.data.value = cpu_to_le16(0);
1983 }
1984 
1985 static int qedr_update_qp_state(struct qedr_dev *dev,
1986 				struct qedr_qp *qp,
1987 				enum qed_roce_qp_state cur_state,
1988 				enum qed_roce_qp_state new_state)
1989 {
1990 	int status = 0;
1991 
1992 	if (new_state == cur_state)
1993 		return 0;
1994 
1995 	switch (cur_state) {
1996 	case QED_ROCE_QP_STATE_RESET:
1997 		switch (new_state) {
1998 		case QED_ROCE_QP_STATE_INIT:
1999 			qp->prev_wqe_size = 0;
2000 			qedr_reset_qp_hwq_info(&qp->sq);
2001 			qedr_reset_qp_hwq_info(&qp->rq);
2002 			break;
2003 		default:
2004 			status = -EINVAL;
2005 			break;
2006 		}
2007 		break;
2008 	case QED_ROCE_QP_STATE_INIT:
2009 		switch (new_state) {
2010 		case QED_ROCE_QP_STATE_RTR:
2011 			/* Update doorbell (in case post_recv was
2012 			 * done before move to RTR)
2013 			 */
2014 
2015 			if (rdma_protocol_roce(&dev->ibdev, 1)) {
2016 				writel(qp->rq.db_data.raw, qp->rq.db);
2017 			}
2018 			break;
2019 		case QED_ROCE_QP_STATE_ERR:
2020 			break;
2021 		default:
2022 			/* Invalid state change. */
2023 			status = -EINVAL;
2024 			break;
2025 		}
2026 		break;
2027 	case QED_ROCE_QP_STATE_RTR:
2028 		/* RTR->XXX */
2029 		switch (new_state) {
2030 		case QED_ROCE_QP_STATE_RTS:
2031 			break;
2032 		case QED_ROCE_QP_STATE_ERR:
2033 			break;
2034 		default:
2035 			/* Invalid state change. */
2036 			status = -EINVAL;
2037 			break;
2038 		}
2039 		break;
2040 	case QED_ROCE_QP_STATE_RTS:
2041 		/* RTS->XXX */
2042 		switch (new_state) {
2043 		case QED_ROCE_QP_STATE_SQD:
2044 			break;
2045 		case QED_ROCE_QP_STATE_ERR:
2046 			break;
2047 		default:
2048 			/* Invalid state change. */
2049 			status = -EINVAL;
2050 			break;
2051 		}
2052 		break;
2053 	case QED_ROCE_QP_STATE_SQD:
2054 		/* SQD->XXX */
2055 		switch (new_state) {
2056 		case QED_ROCE_QP_STATE_RTS:
2057 		case QED_ROCE_QP_STATE_ERR:
2058 			break;
2059 		default:
2060 			/* Invalid state change. */
2061 			status = -EINVAL;
2062 			break;
2063 		}
2064 		break;
2065 	case QED_ROCE_QP_STATE_ERR:
2066 		/* ERR->XXX */
2067 		switch (new_state) {
2068 		case QED_ROCE_QP_STATE_RESET:
2069 			if ((qp->rq.prod != qp->rq.cons) ||
2070 			    (qp->sq.prod != qp->sq.cons)) {
2071 				DP_NOTICE(dev,
2072 					  "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
2073 					  qp->rq.prod, qp->rq.cons, qp->sq.prod,
2074 					  qp->sq.cons);
2075 				status = -EINVAL;
2076 			}
2077 			break;
2078 		default:
2079 			status = -EINVAL;
2080 			break;
2081 		}
2082 		break;
2083 	default:
2084 		status = -EINVAL;
2085 		break;
2086 	}
2087 
2088 	return status;
2089 }
2090 
2091 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2092 		   int attr_mask, struct ib_udata *udata)
2093 {
2094 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2095 	struct qed_rdma_modify_qp_in_params qp_params = { 0 };
2096 	struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
2097 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
2098 	enum ib_qp_state old_qp_state, new_qp_state;
2099 	enum qed_roce_qp_state cur_state;
2100 	int rc = 0;
2101 
2102 	DP_DEBUG(dev, QEDR_MSG_QP,
2103 		 "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
2104 		 attr->qp_state);
2105 
2106 	old_qp_state = qedr_get_ibqp_state(qp->state);
2107 	if (attr_mask & IB_QP_STATE)
2108 		new_qp_state = attr->qp_state;
2109 	else
2110 		new_qp_state = old_qp_state;
2111 
2112 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2113 		if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
2114 					ibqp->qp_type, attr_mask)) {
2115 			DP_ERR(dev,
2116 			       "modify qp: invalid attribute mask=0x%x specified for\n"
2117 			       "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
2118 			       attr_mask, qp->qp_id, ibqp->qp_type,
2119 			       old_qp_state, new_qp_state);
2120 			rc = -EINVAL;
2121 			goto err;
2122 		}
2123 	}
2124 
2125 	/* Translate the masks... */
2126 	if (attr_mask & IB_QP_STATE) {
2127 		SET_FIELD(qp_params.modify_flags,
2128 			  QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
2129 		qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
2130 	}
2131 
2132 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
2133 		qp_params.sqd_async = true;
2134 
2135 	if (attr_mask & IB_QP_PKEY_INDEX) {
2136 		SET_FIELD(qp_params.modify_flags,
2137 			  QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
2138 		if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
2139 			rc = -EINVAL;
2140 			goto err;
2141 		}
2142 
2143 		qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
2144 	}
2145 
2146 	if (attr_mask & IB_QP_QKEY)
2147 		qp->qkey = attr->qkey;
2148 
2149 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
2150 		SET_FIELD(qp_params.modify_flags,
2151 			  QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
2152 		qp_params.incoming_rdma_read_en = attr->qp_access_flags &
2153 						  IB_ACCESS_REMOTE_READ;
2154 		qp_params.incoming_rdma_write_en = attr->qp_access_flags &
2155 						   IB_ACCESS_REMOTE_WRITE;
2156 		qp_params.incoming_atomic_en = attr->qp_access_flags &
2157 					       IB_ACCESS_REMOTE_ATOMIC;
2158 	}
2159 
2160 	if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
2161 		if (rdma_protocol_iwarp(&dev->ibdev, 1))
2162 			return -EINVAL;
2163 
2164 		if (attr_mask & IB_QP_PATH_MTU) {
2165 			if (attr->path_mtu < IB_MTU_256 ||
2166 			    attr->path_mtu > IB_MTU_4096) {
2167 				pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
2168 				rc = -EINVAL;
2169 				goto err;
2170 			}
2171 			qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
2172 				      ib_mtu_enum_to_int(iboe_get_mtu
2173 							 (dev->ndev->mtu)));
2174 		}
2175 
2176 		if (!qp->mtu) {
2177 			qp->mtu =
2178 			ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2179 			pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
2180 		}
2181 
2182 		SET_FIELD(qp_params.modify_flags,
2183 			  QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
2184 
2185 		qp_params.traffic_class_tos = grh->traffic_class;
2186 		qp_params.flow_label = grh->flow_label;
2187 		qp_params.hop_limit_ttl = grh->hop_limit;
2188 
2189 		qp->sgid_idx = grh->sgid_index;
2190 
2191 		rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
2192 		if (rc) {
2193 			DP_ERR(dev,
2194 			       "modify qp: problems with GID index %d (rc=%d)\n",
2195 			       grh->sgid_index, rc);
2196 			return rc;
2197 		}
2198 
2199 		rc = qedr_get_dmac(dev, &attr->ah_attr,
2200 				   qp_params.remote_mac_addr);
2201 		if (rc)
2202 			return rc;
2203 
2204 		qp_params.use_local_mac = true;
2205 		ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2206 
2207 		DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2208 			 qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2209 			 qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2210 		DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2211 			 qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2212 			 qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2213 		DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2214 			 qp_params.remote_mac_addr);
2215 
2216 		qp_params.mtu = qp->mtu;
2217 		qp_params.lb_indication = false;
2218 	}
2219 
2220 	if (!qp_params.mtu) {
2221 		/* Stay with current MTU */
2222 		if (qp->mtu)
2223 			qp_params.mtu = qp->mtu;
2224 		else
2225 			qp_params.mtu =
2226 			    ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2227 	}
2228 
2229 	if (attr_mask & IB_QP_TIMEOUT) {
2230 		SET_FIELD(qp_params.modify_flags,
2231 			  QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2232 
2233 		/* The received timeout value is an exponent used like this:
2234 		 *    "12.7.34 LOCAL ACK TIMEOUT
2235 		 *    Value representing the transport (ACK) timeout for use by
2236 		 *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2237 		 * The FW expects timeout in msec so we need to divide the usec
2238 		 * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2239 		 * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2240 		 * The value of zero means infinite so we use a 'max_t' to make
2241 		 * sure that sub 1 msec values will be configured as 1 msec.
2242 		 */
2243 		if (attr->timeout)
2244 			qp_params.ack_timeout =
2245 					1 << max_t(int, attr->timeout - 8, 0);
2246 		else
2247 			qp_params.ack_timeout = 0;
2248 	}
2249 
2250 	if (attr_mask & IB_QP_RETRY_CNT) {
2251 		SET_FIELD(qp_params.modify_flags,
2252 			  QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2253 		qp_params.retry_cnt = attr->retry_cnt;
2254 	}
2255 
2256 	if (attr_mask & IB_QP_RNR_RETRY) {
2257 		SET_FIELD(qp_params.modify_flags,
2258 			  QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2259 		qp_params.rnr_retry_cnt = attr->rnr_retry;
2260 	}
2261 
2262 	if (attr_mask & IB_QP_RQ_PSN) {
2263 		SET_FIELD(qp_params.modify_flags,
2264 			  QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2265 		qp_params.rq_psn = attr->rq_psn;
2266 		qp->rq_psn = attr->rq_psn;
2267 	}
2268 
2269 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2270 		if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2271 			rc = -EINVAL;
2272 			DP_ERR(dev,
2273 			       "unsupported max_rd_atomic=%d, supported=%d\n",
2274 			       attr->max_rd_atomic,
2275 			       dev->attr.max_qp_req_rd_atomic_resc);
2276 			goto err;
2277 		}
2278 
2279 		SET_FIELD(qp_params.modify_flags,
2280 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2281 		qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2282 	}
2283 
2284 	if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2285 		SET_FIELD(qp_params.modify_flags,
2286 			  QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2287 		qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2288 	}
2289 
2290 	if (attr_mask & IB_QP_SQ_PSN) {
2291 		SET_FIELD(qp_params.modify_flags,
2292 			  QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2293 		qp_params.sq_psn = attr->sq_psn;
2294 		qp->sq_psn = attr->sq_psn;
2295 	}
2296 
2297 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2298 		if (attr->max_dest_rd_atomic >
2299 		    dev->attr.max_qp_resp_rd_atomic_resc) {
2300 			DP_ERR(dev,
2301 			       "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2302 			       attr->max_dest_rd_atomic,
2303 			       dev->attr.max_qp_resp_rd_atomic_resc);
2304 
2305 			rc = -EINVAL;
2306 			goto err;
2307 		}
2308 
2309 		SET_FIELD(qp_params.modify_flags,
2310 			  QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2311 		qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2312 	}
2313 
2314 	if (attr_mask & IB_QP_DEST_QPN) {
2315 		SET_FIELD(qp_params.modify_flags,
2316 			  QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2317 
2318 		qp_params.dest_qp = attr->dest_qp_num;
2319 		qp->dest_qp_num = attr->dest_qp_num;
2320 	}
2321 
2322 	cur_state = qp->state;
2323 
2324 	/* Update the QP state before the actual ramrod to prevent a race with
2325 	 * fast path. Modifying the QP state to error will cause the device to
2326 	 * flush the CQEs and while polling the flushed CQEs will considered as
2327 	 * a potential issue if the QP isn't in error state.
2328 	 */
2329 	if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2330 	    !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2331 		qp->state = QED_ROCE_QP_STATE_ERR;
2332 
2333 	if (qp->qp_type != IB_QPT_GSI)
2334 		rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2335 					      qp->qed_qp, &qp_params);
2336 
2337 	if (attr_mask & IB_QP_STATE) {
2338 		if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2339 			rc = qedr_update_qp_state(dev, qp, cur_state,
2340 						  qp_params.new_state);
2341 		qp->state = qp_params.new_state;
2342 	}
2343 
2344 err:
2345 	return rc;
2346 }
2347 
2348 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2349 {
2350 	int ib_qp_acc_flags = 0;
2351 
2352 	if (params->incoming_rdma_write_en)
2353 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2354 	if (params->incoming_rdma_read_en)
2355 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2356 	if (params->incoming_atomic_en)
2357 		ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2358 	ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2359 	return ib_qp_acc_flags;
2360 }
2361 
2362 int qedr_query_qp(struct ib_qp *ibqp,
2363 		  struct ib_qp_attr *qp_attr,
2364 		  int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2365 {
2366 	struct qed_rdma_query_qp_out_params params;
2367 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2368 	struct qedr_dev *dev = qp->dev;
2369 	int rc = 0;
2370 
2371 	memset(&params, 0, sizeof(params));
2372 
2373 	rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2374 	if (rc)
2375 		goto err;
2376 
2377 	memset(qp_attr, 0, sizeof(*qp_attr));
2378 	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2379 
2380 	qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2381 	qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2382 	qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2383 	qp_attr->path_mig_state = IB_MIG_MIGRATED;
2384 	qp_attr->rq_psn = params.rq_psn;
2385 	qp_attr->sq_psn = params.sq_psn;
2386 	qp_attr->dest_qp_num = params.dest_qp;
2387 
2388 	qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2389 
2390 	qp_attr->cap.max_send_wr = qp->sq.max_wr;
2391 	qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2392 	qp_attr->cap.max_send_sge = qp->sq.max_sges;
2393 	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2394 	qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2395 	qp_init_attr->cap = qp_attr->cap;
2396 
2397 	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2398 	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2399 			params.flow_label, qp->sgid_idx,
2400 			params.hop_limit_ttl, params.traffic_class_tos);
2401 	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2402 	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2403 	rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2404 	qp_attr->timeout = params.timeout;
2405 	qp_attr->rnr_retry = params.rnr_retry;
2406 	qp_attr->retry_cnt = params.retry_cnt;
2407 	qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2408 	qp_attr->pkey_index = params.pkey_index;
2409 	qp_attr->port_num = 1;
2410 	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2411 	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2412 	qp_attr->alt_pkey_index = 0;
2413 	qp_attr->alt_port_num = 0;
2414 	qp_attr->alt_timeout = 0;
2415 	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2416 
2417 	qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2418 	qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2419 	qp_attr->max_rd_atomic = params.max_rd_atomic;
2420 	qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2421 
2422 	DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2423 		 qp_attr->cap.max_inline_data);
2424 
2425 err:
2426 	return rc;
2427 }
2428 
2429 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp,
2430 				  struct ib_udata *udata)
2431 {
2432 	int rc = 0;
2433 
2434 	if (qp->qp_type != IB_QPT_GSI) {
2435 		rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2436 		if (rc)
2437 			return rc;
2438 	}
2439 
2440 	if (udata)
2441 		qedr_cleanup_user(dev, qp);
2442 	else
2443 		qedr_cleanup_kernel(dev, qp);
2444 
2445 	return 0;
2446 }
2447 
2448 int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
2449 {
2450 	struct qedr_qp *qp = get_qedr_qp(ibqp);
2451 	struct qedr_dev *dev = qp->dev;
2452 	struct ib_qp_attr attr;
2453 	int attr_mask = 0;
2454 
2455 	DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2456 		 qp, qp->qp_type);
2457 
2458 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
2459 		if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2460 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
2461 		    (qp->state != QED_ROCE_QP_STATE_INIT)) {
2462 
2463 			attr.qp_state = IB_QPS_ERR;
2464 			attr_mask |= IB_QP_STATE;
2465 
2466 			/* Change the QP state to ERROR */
2467 			qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2468 		}
2469 	} else {
2470 		/* Wait for the connect/accept to complete */
2471 		if (qp->ep) {
2472 			int wait_count = 1;
2473 
2474 			while (qp->ep->during_connect) {
2475 				DP_DEBUG(dev, QEDR_MSG_QP,
2476 					 "Still in during connect/accept\n");
2477 
2478 				msleep(100);
2479 				if (wait_count++ > 200) {
2480 					DP_NOTICE(dev,
2481 						  "during connect timeout\n");
2482 					break;
2483 				}
2484 			}
2485 		}
2486 	}
2487 
2488 	if (qp->qp_type == IB_QPT_GSI)
2489 		qedr_destroy_gsi_qp(dev);
2490 
2491 	qedr_free_qp_resources(dev, qp, udata);
2492 
2493 	if (atomic_dec_and_test(&qp->refcnt) &&
2494 	    rdma_protocol_iwarp(&dev->ibdev, 1)) {
2495 		xa_erase_irq(&dev->qps, qp->qp_id);
2496 		kfree(qp);
2497 	}
2498 	return 0;
2499 }
2500 
2501 int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags,
2502 		   struct ib_udata *udata)
2503 {
2504 	struct qedr_ah *ah = get_qedr_ah(ibah);
2505 
2506 	rdma_copy_ah_attr(&ah->attr, attr);
2507 
2508 	return 0;
2509 }
2510 
2511 void qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
2512 {
2513 	struct qedr_ah *ah = get_qedr_ah(ibah);
2514 
2515 	rdma_destroy_ah_attr(&ah->attr);
2516 }
2517 
2518 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2519 {
2520 	struct qedr_pbl *pbl, *tmp;
2521 
2522 	if (info->pbl_table)
2523 		list_add_tail(&info->pbl_table->list_entry,
2524 			      &info->free_pbl_list);
2525 
2526 	if (!list_empty(&info->inuse_pbl_list))
2527 		list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2528 
2529 	list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2530 		list_del(&pbl->list_entry);
2531 		qedr_free_pbl(dev, &info->pbl_info, pbl);
2532 	}
2533 }
2534 
2535 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2536 			size_t page_list_len, bool two_layered)
2537 {
2538 	struct qedr_pbl *tmp;
2539 	int rc;
2540 
2541 	INIT_LIST_HEAD(&info->free_pbl_list);
2542 	INIT_LIST_HEAD(&info->inuse_pbl_list);
2543 
2544 	rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2545 				  page_list_len, two_layered);
2546 	if (rc)
2547 		goto done;
2548 
2549 	info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2550 	if (IS_ERR(info->pbl_table)) {
2551 		rc = PTR_ERR(info->pbl_table);
2552 		goto done;
2553 	}
2554 
2555 	DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2556 		 &info->pbl_table->pa);
2557 
2558 	/* in usual case we use 2 PBLs, so we add one to free
2559 	 * list and allocating another one
2560 	 */
2561 	tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2562 	if (IS_ERR(tmp)) {
2563 		DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2564 		goto done;
2565 	}
2566 
2567 	list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2568 
2569 	DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2570 
2571 done:
2572 	if (rc)
2573 		free_mr_info(dev, info);
2574 
2575 	return rc;
2576 }
2577 
2578 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2579 			       u64 usr_addr, int acc, struct ib_udata *udata)
2580 {
2581 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2582 	struct qedr_mr *mr;
2583 	struct qedr_pd *pd;
2584 	int rc = -ENOMEM;
2585 
2586 	pd = get_qedr_pd(ibpd);
2587 	DP_DEBUG(dev, QEDR_MSG_MR,
2588 		 "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2589 		 pd->pd_id, start, len, usr_addr, acc);
2590 
2591 	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2592 		return ERR_PTR(-EINVAL);
2593 
2594 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2595 	if (!mr)
2596 		return ERR_PTR(rc);
2597 
2598 	mr->type = QEDR_MR_USER;
2599 
2600 	mr->umem = ib_umem_get(udata, start, len, acc, 0);
2601 	if (IS_ERR(mr->umem)) {
2602 		rc = -EFAULT;
2603 		goto err0;
2604 	}
2605 
2606 	rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2607 	if (rc)
2608 		goto err1;
2609 
2610 	qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2611 			   &mr->info.pbl_info, PAGE_SHIFT);
2612 
2613 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2614 	if (rc) {
2615 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2616 		goto err1;
2617 	}
2618 
2619 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2620 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2621 	mr->hw_mr.key = 0;
2622 	mr->hw_mr.pd = pd->pd_id;
2623 	mr->hw_mr.local_read = 1;
2624 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2625 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2626 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2627 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2628 	mr->hw_mr.mw_bind = false;
2629 	mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2630 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2631 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2632 	mr->hw_mr.page_size_log = PAGE_SHIFT;
2633 	mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2634 	mr->hw_mr.length = len;
2635 	mr->hw_mr.vaddr = usr_addr;
2636 	mr->hw_mr.zbva = false;
2637 	mr->hw_mr.phy_mr = false;
2638 	mr->hw_mr.dma_mr = false;
2639 
2640 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2641 	if (rc) {
2642 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2643 		goto err2;
2644 	}
2645 
2646 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2647 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2648 	    mr->hw_mr.remote_atomic)
2649 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2650 
2651 	DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2652 		 mr->ibmr.lkey);
2653 	return &mr->ibmr;
2654 
2655 err2:
2656 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2657 err1:
2658 	qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2659 err0:
2660 	kfree(mr);
2661 	return ERR_PTR(rc);
2662 }
2663 
2664 int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
2665 {
2666 	struct qedr_mr *mr = get_qedr_mr(ib_mr);
2667 	struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2668 	int rc = 0;
2669 
2670 	rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2671 	if (rc)
2672 		return rc;
2673 
2674 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2675 
2676 	if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2677 		qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2678 
2679 	/* it could be user registered memory. */
2680 	ib_umem_release(mr->umem);
2681 
2682 	kfree(mr);
2683 
2684 	return rc;
2685 }
2686 
2687 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2688 				       int max_page_list_len)
2689 {
2690 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2691 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2692 	struct qedr_mr *mr;
2693 	int rc = -ENOMEM;
2694 
2695 	DP_DEBUG(dev, QEDR_MSG_MR,
2696 		 "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2697 		 max_page_list_len);
2698 
2699 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2700 	if (!mr)
2701 		return ERR_PTR(rc);
2702 
2703 	mr->dev = dev;
2704 	mr->type = QEDR_MR_FRMR;
2705 
2706 	rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2707 	if (rc)
2708 		goto err0;
2709 
2710 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2711 	if (rc) {
2712 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2713 		goto err0;
2714 	}
2715 
2716 	/* Index only, 18 bit long, lkey = itid << 8 | key */
2717 	mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2718 	mr->hw_mr.key = 0;
2719 	mr->hw_mr.pd = pd->pd_id;
2720 	mr->hw_mr.local_read = 1;
2721 	mr->hw_mr.local_write = 0;
2722 	mr->hw_mr.remote_read = 0;
2723 	mr->hw_mr.remote_write = 0;
2724 	mr->hw_mr.remote_atomic = 0;
2725 	mr->hw_mr.mw_bind = false;
2726 	mr->hw_mr.pbl_ptr = 0;
2727 	mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2728 	mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2729 	mr->hw_mr.fbo = 0;
2730 	mr->hw_mr.length = 0;
2731 	mr->hw_mr.vaddr = 0;
2732 	mr->hw_mr.zbva = false;
2733 	mr->hw_mr.phy_mr = true;
2734 	mr->hw_mr.dma_mr = false;
2735 
2736 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2737 	if (rc) {
2738 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2739 		goto err1;
2740 	}
2741 
2742 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2743 	mr->ibmr.rkey = mr->ibmr.lkey;
2744 
2745 	DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2746 	return mr;
2747 
2748 err1:
2749 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2750 err0:
2751 	kfree(mr);
2752 	return ERR_PTR(rc);
2753 }
2754 
2755 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
2756 			    u32 max_num_sg, struct ib_udata *udata)
2757 {
2758 	struct qedr_mr *mr;
2759 
2760 	if (mr_type != IB_MR_TYPE_MEM_REG)
2761 		return ERR_PTR(-EINVAL);
2762 
2763 	mr = __qedr_alloc_mr(ibpd, max_num_sg);
2764 
2765 	if (IS_ERR(mr))
2766 		return ERR_PTR(-EINVAL);
2767 
2768 	return &mr->ibmr;
2769 }
2770 
2771 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2772 {
2773 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2774 	struct qedr_pbl *pbl_table;
2775 	struct regpair *pbe;
2776 	u32 pbes_in_page;
2777 
2778 	if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2779 		DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2780 		return -ENOMEM;
2781 	}
2782 
2783 	DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2784 		 mr->npages, addr);
2785 
2786 	pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2787 	pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2788 	pbe = (struct regpair *)pbl_table->va;
2789 	pbe +=  mr->npages % pbes_in_page;
2790 	pbe->lo = cpu_to_le32((u32)addr);
2791 	pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2792 
2793 	mr->npages++;
2794 
2795 	return 0;
2796 }
2797 
2798 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2799 {
2800 	int work = info->completed - info->completed_handled - 1;
2801 
2802 	DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2803 	while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2804 		struct qedr_pbl *pbl;
2805 
2806 		/* Free all the page list that are possible to be freed
2807 		 * (all the ones that were invalidated), under the assumption
2808 		 * that if an FMR was completed successfully that means that
2809 		 * if there was an invalidate operation before it also ended
2810 		 */
2811 		pbl = list_first_entry(&info->inuse_pbl_list,
2812 				       struct qedr_pbl, list_entry);
2813 		list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2814 		info->completed_handled++;
2815 	}
2816 }
2817 
2818 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2819 		   int sg_nents, unsigned int *sg_offset)
2820 {
2821 	struct qedr_mr *mr = get_qedr_mr(ibmr);
2822 
2823 	mr->npages = 0;
2824 
2825 	handle_completed_mrs(mr->dev, &mr->info);
2826 	return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2827 }
2828 
2829 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2830 {
2831 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2832 	struct qedr_pd *pd = get_qedr_pd(ibpd);
2833 	struct qedr_mr *mr;
2834 	int rc;
2835 
2836 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2837 	if (!mr)
2838 		return ERR_PTR(-ENOMEM);
2839 
2840 	mr->type = QEDR_MR_DMA;
2841 
2842 	rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2843 	if (rc) {
2844 		DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2845 		goto err1;
2846 	}
2847 
2848 	/* index only, 18 bit long, lkey = itid << 8 | key */
2849 	mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2850 	mr->hw_mr.pd = pd->pd_id;
2851 	mr->hw_mr.local_read = 1;
2852 	mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2853 	mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2854 	mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2855 	mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2856 	mr->hw_mr.dma_mr = true;
2857 
2858 	rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2859 	if (rc) {
2860 		DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2861 		goto err2;
2862 	}
2863 
2864 	mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2865 	if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2866 	    mr->hw_mr.remote_atomic)
2867 		mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2868 
2869 	DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2870 	return &mr->ibmr;
2871 
2872 err2:
2873 	dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2874 err1:
2875 	kfree(mr);
2876 	return ERR_PTR(rc);
2877 }
2878 
2879 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2880 {
2881 	return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2882 }
2883 
2884 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2885 {
2886 	int i, len = 0;
2887 
2888 	for (i = 0; i < num_sge; i++)
2889 		len += sg_list[i].length;
2890 
2891 	return len;
2892 }
2893 
2894 static void swap_wqe_data64(u64 *p)
2895 {
2896 	int i;
2897 
2898 	for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2899 		*p = cpu_to_be64(cpu_to_le64(*p));
2900 }
2901 
2902 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2903 				       struct qedr_qp *qp, u8 *wqe_size,
2904 				       const struct ib_send_wr *wr,
2905 				       const struct ib_send_wr **bad_wr,
2906 				       u8 *bits, u8 bit)
2907 {
2908 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2909 	char *seg_prt, *wqe;
2910 	int i, seg_siz;
2911 
2912 	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2913 		DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2914 		*bad_wr = wr;
2915 		return 0;
2916 	}
2917 
2918 	if (!data_size)
2919 		return data_size;
2920 
2921 	*bits |= bit;
2922 
2923 	seg_prt = NULL;
2924 	wqe = NULL;
2925 	seg_siz = 0;
2926 
2927 	/* Copy data inline */
2928 	for (i = 0; i < wr->num_sge; i++) {
2929 		u32 len = wr->sg_list[i].length;
2930 		void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2931 
2932 		while (len > 0) {
2933 			u32 cur;
2934 
2935 			/* New segment required */
2936 			if (!seg_siz) {
2937 				wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2938 				seg_prt = wqe;
2939 				seg_siz = sizeof(struct rdma_sq_common_wqe);
2940 				(*wqe_size)++;
2941 			}
2942 
2943 			/* Calculate currently allowed length */
2944 			cur = min_t(u32, len, seg_siz);
2945 			memcpy(seg_prt, src, cur);
2946 
2947 			/* Update segment variables */
2948 			seg_prt += cur;
2949 			seg_siz -= cur;
2950 
2951 			/* Update sge variables */
2952 			src += cur;
2953 			len -= cur;
2954 
2955 			/* Swap fully-completed segments */
2956 			if (!seg_siz)
2957 				swap_wqe_data64((u64 *)wqe);
2958 		}
2959 	}
2960 
2961 	/* swap last not completed segment */
2962 	if (seg_siz)
2963 		swap_wqe_data64((u64 *)wqe);
2964 
2965 	return data_size;
2966 }
2967 
2968 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
2969 	do {							\
2970 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2971 		(sge)->length = cpu_to_le32(vlength);		\
2972 		(sge)->flags = cpu_to_le32(vflags);		\
2973 	} while (0)
2974 
2975 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)			\
2976 	do {							\
2977 		DMA_REGPAIR_LE(hdr->wr_id, vwr_id);		\
2978 		(hdr)->num_sges = num_sge;			\
2979 	} while (0)
2980 
2981 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)			\
2982 	do {							\
2983 		DMA_REGPAIR_LE(sge->addr, vaddr);		\
2984 		(sge)->length = cpu_to_le32(vlength);		\
2985 		(sge)->l_key = cpu_to_le32(vlkey);		\
2986 	} while (0)
2987 
2988 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2989 				const struct ib_send_wr *wr)
2990 {
2991 	u32 data_size = 0;
2992 	int i;
2993 
2994 	for (i = 0; i < wr->num_sge; i++) {
2995 		struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2996 
2997 		DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2998 		sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2999 		sge->length = cpu_to_le32(wr->sg_list[i].length);
3000 		data_size += wr->sg_list[i].length;
3001 	}
3002 
3003 	if (wqe_size)
3004 		*wqe_size += wr->num_sge;
3005 
3006 	return data_size;
3007 }
3008 
3009 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
3010 				     struct qedr_qp *qp,
3011 				     struct rdma_sq_rdma_wqe_1st *rwqe,
3012 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
3013 				     const struct ib_send_wr *wr,
3014 				     const struct ib_send_wr **bad_wr)
3015 {
3016 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
3017 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
3018 
3019 	if (wr->send_flags & IB_SEND_INLINE &&
3020 	    (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
3021 	     wr->opcode == IB_WR_RDMA_WRITE)) {
3022 		u8 flags = 0;
3023 
3024 		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
3025 		return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
3026 						   bad_wr, &rwqe->flags, flags);
3027 	}
3028 
3029 	return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
3030 }
3031 
3032 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
3033 				     struct qedr_qp *qp,
3034 				     struct rdma_sq_send_wqe_1st *swqe,
3035 				     struct rdma_sq_send_wqe_2st *swqe2,
3036 				     const struct ib_send_wr *wr,
3037 				     const struct ib_send_wr **bad_wr)
3038 {
3039 	memset(swqe2, 0, sizeof(*swqe2));
3040 	if (wr->send_flags & IB_SEND_INLINE) {
3041 		u8 flags = 0;
3042 
3043 		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
3044 		return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
3045 						   bad_wr, &swqe->flags, flags);
3046 	}
3047 
3048 	return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
3049 }
3050 
3051 static int qedr_prepare_reg(struct qedr_qp *qp,
3052 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
3053 			    const struct ib_reg_wr *wr)
3054 {
3055 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
3056 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
3057 
3058 	fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
3059 	fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
3060 	fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
3061 	fwqe1->l_key = wr->key;
3062 
3063 	fwqe2->access_ctrl = 0;
3064 
3065 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
3066 		   !!(wr->access & IB_ACCESS_REMOTE_READ));
3067 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
3068 		   !!(wr->access & IB_ACCESS_REMOTE_WRITE));
3069 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
3070 		   !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
3071 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
3072 	SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
3073 		   !!(wr->access & IB_ACCESS_LOCAL_WRITE));
3074 	fwqe2->fmr_ctrl = 0;
3075 
3076 	SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
3077 		   ilog2(mr->ibmr.page_size) - 12);
3078 
3079 	fwqe2->length_hi = 0;
3080 	fwqe2->length_lo = mr->ibmr.length;
3081 	fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
3082 	fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
3083 
3084 	qp->wqe_wr_id[qp->sq.prod].mr = mr;
3085 
3086 	return 0;
3087 }
3088 
3089 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
3090 {
3091 	switch (opcode) {
3092 	case IB_WR_RDMA_WRITE:
3093 	case IB_WR_RDMA_WRITE_WITH_IMM:
3094 		return IB_WC_RDMA_WRITE;
3095 	case IB_WR_SEND_WITH_IMM:
3096 	case IB_WR_SEND:
3097 	case IB_WR_SEND_WITH_INV:
3098 		return IB_WC_SEND;
3099 	case IB_WR_RDMA_READ:
3100 	case IB_WR_RDMA_READ_WITH_INV:
3101 		return IB_WC_RDMA_READ;
3102 	case IB_WR_ATOMIC_CMP_AND_SWP:
3103 		return IB_WC_COMP_SWAP;
3104 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3105 		return IB_WC_FETCH_ADD;
3106 	case IB_WR_REG_MR:
3107 		return IB_WC_REG_MR;
3108 	case IB_WR_LOCAL_INV:
3109 		return IB_WC_LOCAL_INV;
3110 	default:
3111 		return IB_WC_SEND;
3112 	}
3113 }
3114 
3115 static inline bool qedr_can_post_send(struct qedr_qp *qp,
3116 				      const struct ib_send_wr *wr)
3117 {
3118 	int wq_is_full, err_wr, pbl_is_full;
3119 	struct qedr_dev *dev = qp->dev;
3120 
3121 	/* prevent SQ overflow and/or processing of a bad WR */
3122 	err_wr = wr->num_sge > qp->sq.max_sges;
3123 	wq_is_full = qedr_wq_is_full(&qp->sq);
3124 	pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
3125 		      QEDR_MAX_SQE_ELEMENTS_PER_SQE;
3126 	if (wq_is_full || err_wr || pbl_is_full) {
3127 		if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
3128 			DP_ERR(dev,
3129 			       "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
3130 			       qp);
3131 			qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
3132 		}
3133 
3134 		if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
3135 			DP_ERR(dev,
3136 			       "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
3137 			       qp);
3138 			qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
3139 		}
3140 
3141 		if (pbl_is_full &&
3142 		    !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
3143 			DP_ERR(dev,
3144 			       "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
3145 			       qp);
3146 			qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
3147 		}
3148 		return false;
3149 	}
3150 	return true;
3151 }
3152 
3153 static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3154 			    const struct ib_send_wr **bad_wr)
3155 {
3156 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3157 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3158 	struct rdma_sq_atomic_wqe_1st *awqe1;
3159 	struct rdma_sq_atomic_wqe_2nd *awqe2;
3160 	struct rdma_sq_atomic_wqe_3rd *awqe3;
3161 	struct rdma_sq_send_wqe_2st *swqe2;
3162 	struct rdma_sq_local_inv_wqe *iwqe;
3163 	struct rdma_sq_rdma_wqe_2nd *rwqe2;
3164 	struct rdma_sq_send_wqe_1st *swqe;
3165 	struct rdma_sq_rdma_wqe_1st *rwqe;
3166 	struct rdma_sq_fmr_wqe_1st *fwqe1;
3167 	struct rdma_sq_common_wqe *wqe;
3168 	u32 length;
3169 	int rc = 0;
3170 	bool comp;
3171 
3172 	if (!qedr_can_post_send(qp, wr)) {
3173 		*bad_wr = wr;
3174 		return -ENOMEM;
3175 	}
3176 
3177 	wqe = qed_chain_produce(&qp->sq.pbl);
3178 	qp->wqe_wr_id[qp->sq.prod].signaled =
3179 		!!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
3180 
3181 	wqe->flags = 0;
3182 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
3183 		   !!(wr->send_flags & IB_SEND_SOLICITED));
3184 	comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
3185 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
3186 	SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
3187 		   !!(wr->send_flags & IB_SEND_FENCE));
3188 	wqe->prev_wqe_size = qp->prev_wqe_size;
3189 
3190 	qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
3191 
3192 	switch (wr->opcode) {
3193 	case IB_WR_SEND_WITH_IMM:
3194 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3195 			rc = -EINVAL;
3196 			*bad_wr = wr;
3197 			break;
3198 		}
3199 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3200 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3201 		swqe->wqe_size = 2;
3202 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3203 
3204 		swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3205 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3206 						   wr, bad_wr);
3207 		swqe->length = cpu_to_le32(length);
3208 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3209 		qp->prev_wqe_size = swqe->wqe_size;
3210 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3211 		break;
3212 	case IB_WR_SEND:
3213 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3214 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3215 
3216 		swqe->wqe_size = 2;
3217 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3218 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3219 						   wr, bad_wr);
3220 		swqe->length = cpu_to_le32(length);
3221 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3222 		qp->prev_wqe_size = swqe->wqe_size;
3223 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3224 		break;
3225 	case IB_WR_SEND_WITH_INV:
3226 		wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3227 		swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3228 		swqe2 = qed_chain_produce(&qp->sq.pbl);
3229 		swqe->wqe_size = 2;
3230 		swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3231 		length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3232 						   wr, bad_wr);
3233 		swqe->length = cpu_to_le32(length);
3234 		qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3235 		qp->prev_wqe_size = swqe->wqe_size;
3236 		qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3237 		break;
3238 
3239 	case IB_WR_RDMA_WRITE_WITH_IMM:
3240 		if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3241 			rc = -EINVAL;
3242 			*bad_wr = wr;
3243 			break;
3244 		}
3245 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3246 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3247 
3248 		rwqe->wqe_size = 2;
3249 		rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3250 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3251 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3252 						   wr, bad_wr);
3253 		rwqe->length = cpu_to_le32(length);
3254 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3255 		qp->prev_wqe_size = rwqe->wqe_size;
3256 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3257 		break;
3258 	case IB_WR_RDMA_WRITE:
3259 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3260 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3261 
3262 		rwqe->wqe_size = 2;
3263 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3264 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3265 						   wr, bad_wr);
3266 		rwqe->length = cpu_to_le32(length);
3267 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3268 		qp->prev_wqe_size = rwqe->wqe_size;
3269 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3270 		break;
3271 	case IB_WR_RDMA_READ_WITH_INV:
3272 		SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3273 		/* fallthrough -- same is identical to RDMA READ */
3274 
3275 	case IB_WR_RDMA_READ:
3276 		wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3277 		rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3278 
3279 		rwqe->wqe_size = 2;
3280 		rwqe2 = qed_chain_produce(&qp->sq.pbl);
3281 		length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3282 						   wr, bad_wr);
3283 		rwqe->length = cpu_to_le32(length);
3284 		qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3285 		qp->prev_wqe_size = rwqe->wqe_size;
3286 		qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3287 		break;
3288 
3289 	case IB_WR_ATOMIC_CMP_AND_SWP:
3290 	case IB_WR_ATOMIC_FETCH_AND_ADD:
3291 		awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3292 		awqe1->wqe_size = 4;
3293 
3294 		awqe2 = qed_chain_produce(&qp->sq.pbl);
3295 		DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3296 		awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3297 
3298 		awqe3 = qed_chain_produce(&qp->sq.pbl);
3299 
3300 		if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3301 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3302 			DMA_REGPAIR_LE(awqe3->swap_data,
3303 				       atomic_wr(wr)->compare_add);
3304 		} else {
3305 			wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3306 			DMA_REGPAIR_LE(awqe3->swap_data,
3307 				       atomic_wr(wr)->swap);
3308 			DMA_REGPAIR_LE(awqe3->cmp_data,
3309 				       atomic_wr(wr)->compare_add);
3310 		}
3311 
3312 		qedr_prepare_sq_sges(qp, NULL, wr);
3313 
3314 		qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3315 		qp->prev_wqe_size = awqe1->wqe_size;
3316 		break;
3317 
3318 	case IB_WR_LOCAL_INV:
3319 		iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3320 		iwqe->wqe_size = 1;
3321 
3322 		iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3323 		iwqe->inv_l_key = wr->ex.invalidate_rkey;
3324 		qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3325 		qp->prev_wqe_size = iwqe->wqe_size;
3326 		break;
3327 	case IB_WR_REG_MR:
3328 		DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3329 		wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3330 		fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3331 		fwqe1->wqe_size = 2;
3332 
3333 		rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3334 		if (rc) {
3335 			DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3336 			*bad_wr = wr;
3337 			break;
3338 		}
3339 
3340 		qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3341 		qp->prev_wqe_size = fwqe1->wqe_size;
3342 		break;
3343 	default:
3344 		DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3345 		rc = -EINVAL;
3346 		*bad_wr = wr;
3347 		break;
3348 	}
3349 
3350 	if (*bad_wr) {
3351 		u16 value;
3352 
3353 		/* Restore prod to its position before
3354 		 * this WR was processed
3355 		 */
3356 		value = le16_to_cpu(qp->sq.db_data.data.value);
3357 		qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3358 
3359 		/* Restore prev_wqe_size */
3360 		qp->prev_wqe_size = wqe->prev_wqe_size;
3361 		rc = -EINVAL;
3362 		DP_ERR(dev, "POST SEND FAILED\n");
3363 	}
3364 
3365 	return rc;
3366 }
3367 
3368 int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
3369 		   const struct ib_send_wr **bad_wr)
3370 {
3371 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3372 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3373 	unsigned long flags;
3374 	int rc = 0;
3375 
3376 	*bad_wr = NULL;
3377 
3378 	if (qp->qp_type == IB_QPT_GSI)
3379 		return qedr_gsi_post_send(ibqp, wr, bad_wr);
3380 
3381 	spin_lock_irqsave(&qp->q_lock, flags);
3382 
3383 	if (rdma_protocol_roce(&dev->ibdev, 1)) {
3384 		if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3385 		    (qp->state != QED_ROCE_QP_STATE_ERR) &&
3386 		    (qp->state != QED_ROCE_QP_STATE_SQD)) {
3387 			spin_unlock_irqrestore(&qp->q_lock, flags);
3388 			*bad_wr = wr;
3389 			DP_DEBUG(dev, QEDR_MSG_CQ,
3390 				 "QP in wrong state! QP icid=0x%x state %d\n",
3391 				 qp->icid, qp->state);
3392 			return -EINVAL;
3393 		}
3394 	}
3395 
3396 	while (wr) {
3397 		rc = __qedr_post_send(ibqp, wr, bad_wr);
3398 		if (rc)
3399 			break;
3400 
3401 		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
3402 
3403 		qedr_inc_sw_prod(&qp->sq);
3404 
3405 		qp->sq.db_data.data.value++;
3406 
3407 		wr = wr->next;
3408 	}
3409 
3410 	/* Trigger doorbell
3411 	 * If there was a failure in the first WR then it will be triggered in
3412 	 * vane. However this is not harmful (as long as the producer value is
3413 	 * unchanged). For performance reasons we avoid checking for this
3414 	 * redundant doorbell.
3415 	 *
3416 	 * qp->wqe_wr_id is accessed during qedr_poll_cq, as
3417 	 * soon as we give the doorbell, we could get a completion
3418 	 * for this wr, therefore we need to make sure that the
3419 	 * memory is updated before giving the doorbell.
3420 	 * During qedr_poll_cq, rmb is called before accessing the
3421 	 * cqe. This covers for the smp_rmb as well.
3422 	 */
3423 	smp_wmb();
3424 	writel(qp->sq.db_data.raw, qp->sq.db);
3425 
3426 	spin_unlock_irqrestore(&qp->q_lock, flags);
3427 
3428 	return rc;
3429 }
3430 
3431 static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
3432 {
3433 	u32 used;
3434 
3435 	/* Calculate number of elements used based on producer
3436 	 * count and consumer count and subtract it from max
3437 	 * work request supported so that we get elements left.
3438 	 */
3439 	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
3440 
3441 	return hw_srq->max_wr - used;
3442 }
3443 
3444 int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
3445 		       const struct ib_recv_wr **bad_wr)
3446 {
3447 	struct qedr_srq *srq = get_qedr_srq(ibsrq);
3448 	struct qedr_srq_hwq_info *hw_srq;
3449 	struct qedr_dev *dev = srq->dev;
3450 	struct qed_chain *pbl;
3451 	unsigned long flags;
3452 	int status = 0;
3453 	u32 num_sge;
3454 	u32 offset;
3455 
3456 	spin_lock_irqsave(&srq->lock, flags);
3457 
3458 	hw_srq = &srq->hw_srq;
3459 	pbl = &srq->hw_srq.pbl;
3460 	while (wr) {
3461 		struct rdma_srq_wqe_header *hdr;
3462 		int i;
3463 
3464 		if (!qedr_srq_elem_left(hw_srq) ||
3465 		    wr->num_sge > srq->hw_srq.max_sges) {
3466 			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
3467 			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
3468 			       wr->num_sge, srq->hw_srq.max_sges);
3469 			status = -ENOMEM;
3470 			*bad_wr = wr;
3471 			break;
3472 		}
3473 
3474 		hdr = qed_chain_produce(pbl);
3475 		num_sge = wr->num_sge;
3476 		/* Set number of sge and work request id in header */
3477 		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
3478 
3479 		srq->hw_srq.wr_prod_cnt++;
3480 		hw_srq->wqe_prod++;
3481 		hw_srq->sge_prod++;
3482 
3483 		DP_DEBUG(dev, QEDR_MSG_SRQ,
3484 			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
3485 			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
3486 
3487 		for (i = 0; i < wr->num_sge; i++) {
3488 			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
3489 
3490 			/* Set SGE length, lkey and address */
3491 			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
3492 				    wr->sg_list[i].length, wr->sg_list[i].lkey);
3493 
3494 			DP_DEBUG(dev, QEDR_MSG_SRQ,
3495 				 "[%d]: len %d key %x addr %x:%x\n",
3496 				 i, srq_sge->length, srq_sge->l_key,
3497 				 srq_sge->addr.hi, srq_sge->addr.lo);
3498 			hw_srq->sge_prod++;
3499 		}
3500 
3501 		/* Flush WQE and SGE information before
3502 		 * updating producer.
3503 		 */
3504 		wmb();
3505 
3506 		/* SRQ producer is 8 bytes. Need to update SGE producer index
3507 		 * in first 4 bytes and need to update WQE producer in
3508 		 * next 4 bytes.
3509 		 */
3510 		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
3511 		offset = offsetof(struct rdma_srq_producers, wqe_prod);
3512 		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
3513 			hw_srq->wqe_prod;
3514 
3515 		/* Flush producer after updating it. */
3516 		wmb();
3517 		wr = wr->next;
3518 	}
3519 
3520 	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
3521 		 qed_chain_get_elem_left(pbl));
3522 	spin_unlock_irqrestore(&srq->lock, flags);
3523 
3524 	return status;
3525 }
3526 
3527 int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
3528 		   const struct ib_recv_wr **bad_wr)
3529 {
3530 	struct qedr_qp *qp = get_qedr_qp(ibqp);
3531 	struct qedr_dev *dev = qp->dev;
3532 	unsigned long flags;
3533 	int status = 0;
3534 
3535 	if (qp->qp_type == IB_QPT_GSI)
3536 		return qedr_gsi_post_recv(ibqp, wr, bad_wr);
3537 
3538 	spin_lock_irqsave(&qp->q_lock, flags);
3539 
3540 	if (qp->state == QED_ROCE_QP_STATE_RESET) {
3541 		spin_unlock_irqrestore(&qp->q_lock, flags);
3542 		*bad_wr = wr;
3543 		return -EINVAL;
3544 	}
3545 
3546 	while (wr) {
3547 		int i;
3548 
3549 		if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
3550 		    QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
3551 		    wr->num_sge > qp->rq.max_sges) {
3552 			DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
3553 			       qed_chain_get_elem_left_u32(&qp->rq.pbl),
3554 			       QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
3555 			       qp->rq.max_sges);
3556 			status = -ENOMEM;
3557 			*bad_wr = wr;
3558 			break;
3559 		}
3560 		for (i = 0; i < wr->num_sge; i++) {
3561 			u32 flags = 0;
3562 			struct rdma_rq_sge *rqe =
3563 			    qed_chain_produce(&qp->rq.pbl);
3564 
3565 			/* First one must include the number
3566 			 * of SGE in the list
3567 			 */
3568 			if (!i)
3569 				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
3570 					  wr->num_sge);
3571 
3572 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO,
3573 				  wr->sg_list[i].lkey);
3574 
3575 			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
3576 				   wr->sg_list[i].length, flags);
3577 		}
3578 
3579 		/* Special case of no sges. FW requires between 1-4 sges...
3580 		 * in this case we need to post 1 sge with length zero. this is
3581 		 * because rdma write with immediate consumes an RQ.
3582 		 */
3583 		if (!wr->num_sge) {
3584 			u32 flags = 0;
3585 			struct rdma_rq_sge *rqe =
3586 			    qed_chain_produce(&qp->rq.pbl);
3587 
3588 			/* First one must include the number
3589 			 * of SGE in the list
3590 			 */
3591 			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY_LO, 0);
3592 			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
3593 
3594 			RQ_SGE_SET(rqe, 0, 0, flags);
3595 			i = 1;
3596 		}
3597 
3598 		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
3599 		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
3600 
3601 		qedr_inc_sw_prod(&qp->rq);
3602 
3603 		/* qp->rqe_wr_id is accessed during qedr_poll_cq, as
3604 		 * soon as we give the doorbell, we could get a completion
3605 		 * for this wr, therefore we need to make sure that the
3606 		 * memory is update before giving the doorbell.
3607 		 * During qedr_poll_cq, rmb is called before accessing the
3608 		 * cqe. This covers for the smp_rmb as well.
3609 		 */
3610 		smp_wmb();
3611 
3612 		qp->rq.db_data.data.value++;
3613 
3614 		writel(qp->rq.db_data.raw, qp->rq.db);
3615 
3616 		if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
3617 			writel(qp->rq.iwarp_db2_data.raw, qp->rq.iwarp_db2);
3618 		}
3619 
3620 		wr = wr->next;
3621 	}
3622 
3623 	spin_unlock_irqrestore(&qp->q_lock, flags);
3624 
3625 	return status;
3626 }
3627 
3628 static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
3629 {
3630 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3631 
3632 	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
3633 		cq->pbl_toggle;
3634 }
3635 
3636 static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
3637 {
3638 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3639 	struct qedr_qp *qp;
3640 
3641 	qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
3642 						   resp_cqe->qp_handle.lo,
3643 						   u64);
3644 	return qp;
3645 }
3646 
3647 static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
3648 {
3649 	struct rdma_cqe_requester *resp_cqe = &cqe->req;
3650 
3651 	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
3652 }
3653 
3654 /* Return latest CQE (needs processing) */
3655 static union rdma_cqe *get_cqe(struct qedr_cq *cq)
3656 {
3657 	return cq->latest_cqe;
3658 }
3659 
3660 /* In fmr we need to increase the number of fmr completed counter for the fmr
3661  * algorithm determining whether we can free a pbl or not.
3662  * we need to perform this whether the work request was signaled or not. for
3663  * this purpose we call this function from the condition that checks if a wr
3664  * should be skipped, to make sure we don't miss it ( possibly this fmr
3665  * operation was not signalted)
3666  */
3667 static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
3668 {
3669 	if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
3670 		qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3671 }
3672 
3673 static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
3674 		       struct qedr_cq *cq, int num_entries,
3675 		       struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
3676 		       int force)
3677 {
3678 	u16 cnt = 0;
3679 
3680 	while (num_entries && qp->sq.wqe_cons != hw_cons) {
3681 		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
3682 			qedr_chk_if_fmr(qp);
3683 			/* skip WC */
3684 			goto next_cqe;
3685 		}
3686 
3687 		/* fill WC */
3688 		wc->status = status;
3689 		wc->vendor_err = 0;
3690 		wc->wc_flags = 0;
3691 		wc->src_qp = qp->id;
3692 		wc->qp = &qp->ibqp;
3693 
3694 		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
3695 		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
3696 
3697 		switch (wc->opcode) {
3698 		case IB_WC_RDMA_WRITE:
3699 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3700 			break;
3701 		case IB_WC_COMP_SWAP:
3702 		case IB_WC_FETCH_ADD:
3703 			wc->byte_len = 8;
3704 			break;
3705 		case IB_WC_REG_MR:
3706 			qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
3707 			break;
3708 		case IB_WC_RDMA_READ:
3709 		case IB_WC_SEND:
3710 			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
3711 			break;
3712 		default:
3713 			break;
3714 		}
3715 
3716 		num_entries--;
3717 		wc++;
3718 		cnt++;
3719 next_cqe:
3720 		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
3721 			qed_chain_consume(&qp->sq.pbl);
3722 		qedr_inc_sw_cons(&qp->sq);
3723 	}
3724 
3725 	return cnt;
3726 }
3727 
3728 static int qedr_poll_cq_req(struct qedr_dev *dev,
3729 			    struct qedr_qp *qp, struct qedr_cq *cq,
3730 			    int num_entries, struct ib_wc *wc,
3731 			    struct rdma_cqe_requester *req)
3732 {
3733 	int cnt = 0;
3734 
3735 	switch (req->status) {
3736 	case RDMA_CQE_REQ_STS_OK:
3737 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3738 				  IB_WC_SUCCESS, 0);
3739 		break;
3740 	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
3741 		if (qp->state != QED_ROCE_QP_STATE_ERR)
3742 			DP_DEBUG(dev, QEDR_MSG_CQ,
3743 				 "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3744 				 cq->icid, qp->icid);
3745 		cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
3746 				  IB_WC_WR_FLUSH_ERR, 1);
3747 		break;
3748 	default:
3749 		/* process all WQE before the cosumer */
3750 		qp->state = QED_ROCE_QP_STATE_ERR;
3751 		cnt = process_req(dev, qp, cq, num_entries, wc,
3752 				  req->sq_cons - 1, IB_WC_SUCCESS, 0);
3753 		wc += cnt;
3754 		/* if we have extra WC fill it with actual error info */
3755 		if (cnt < num_entries) {
3756 			enum ib_wc_status wc_status;
3757 
3758 			switch (req->status) {
3759 			case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
3760 				DP_ERR(dev,
3761 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3762 				       cq->icid, qp->icid);
3763 				wc_status = IB_WC_BAD_RESP_ERR;
3764 				break;
3765 			case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
3766 				DP_ERR(dev,
3767 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3768 				       cq->icid, qp->icid);
3769 				wc_status = IB_WC_LOC_LEN_ERR;
3770 				break;
3771 			case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
3772 				DP_ERR(dev,
3773 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3774 				       cq->icid, qp->icid);
3775 				wc_status = IB_WC_LOC_QP_OP_ERR;
3776 				break;
3777 			case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
3778 				DP_ERR(dev,
3779 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3780 				       cq->icid, qp->icid);
3781 				wc_status = IB_WC_LOC_PROT_ERR;
3782 				break;
3783 			case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
3784 				DP_ERR(dev,
3785 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3786 				       cq->icid, qp->icid);
3787 				wc_status = IB_WC_MW_BIND_ERR;
3788 				break;
3789 			case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
3790 				DP_ERR(dev,
3791 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3792 				       cq->icid, qp->icid);
3793 				wc_status = IB_WC_REM_INV_REQ_ERR;
3794 				break;
3795 			case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
3796 				DP_ERR(dev,
3797 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3798 				       cq->icid, qp->icid);
3799 				wc_status = IB_WC_REM_ACCESS_ERR;
3800 				break;
3801 			case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
3802 				DP_ERR(dev,
3803 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3804 				       cq->icid, qp->icid);
3805 				wc_status = IB_WC_REM_OP_ERR;
3806 				break;
3807 			case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
3808 				DP_ERR(dev,
3809 				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3810 				       cq->icid, qp->icid);
3811 				wc_status = IB_WC_RNR_RETRY_EXC_ERR;
3812 				break;
3813 			case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
3814 				DP_ERR(dev,
3815 				       "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3816 				       cq->icid, qp->icid);
3817 				wc_status = IB_WC_RETRY_EXC_ERR;
3818 				break;
3819 			default:
3820 				DP_ERR(dev,
3821 				       "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
3822 				       cq->icid, qp->icid);
3823 				wc_status = IB_WC_GENERAL_ERR;
3824 			}
3825 			cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
3826 					   wc_status, 1);
3827 		}
3828 	}
3829 
3830 	return cnt;
3831 }
3832 
3833 static inline int qedr_cqe_resp_status_to_ib(u8 status)
3834 {
3835 	switch (status) {
3836 	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
3837 		return IB_WC_LOC_ACCESS_ERR;
3838 	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
3839 		return IB_WC_LOC_LEN_ERR;
3840 	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
3841 		return IB_WC_LOC_QP_OP_ERR;
3842 	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
3843 		return IB_WC_LOC_PROT_ERR;
3844 	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
3845 		return IB_WC_MW_BIND_ERR;
3846 	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
3847 		return IB_WC_REM_INV_RD_REQ_ERR;
3848 	case RDMA_CQE_RESP_STS_OK:
3849 		return IB_WC_SUCCESS;
3850 	default:
3851 		return IB_WC_GENERAL_ERR;
3852 	}
3853 }
3854 
3855 static inline int qedr_set_ok_cqe_resp_wc(struct rdma_cqe_responder *resp,
3856 					  struct ib_wc *wc)
3857 {
3858 	wc->status = IB_WC_SUCCESS;
3859 	wc->byte_len = le32_to_cpu(resp->length);
3860 
3861 	if (resp->flags & QEDR_RESP_IMM) {
3862 		wc->ex.imm_data = cpu_to_be32(le32_to_cpu(resp->imm_data_or_inv_r_Key));
3863 		wc->wc_flags |= IB_WC_WITH_IMM;
3864 
3865 		if (resp->flags & QEDR_RESP_RDMA)
3866 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
3867 
3868 		if (resp->flags & QEDR_RESP_INV)
3869 			return -EINVAL;
3870 
3871 	} else if (resp->flags & QEDR_RESP_INV) {
3872 		wc->ex.imm_data = le32_to_cpu(resp->imm_data_or_inv_r_Key);
3873 		wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3874 
3875 		if (resp->flags & QEDR_RESP_RDMA)
3876 			return -EINVAL;
3877 
3878 	} else if (resp->flags & QEDR_RESP_RDMA) {
3879 		return -EINVAL;
3880 	}
3881 
3882 	return 0;
3883 }
3884 
3885 static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3886 			       struct qedr_cq *cq, struct ib_wc *wc,
3887 			       struct rdma_cqe_responder *resp, u64 wr_id)
3888 {
3889 	/* Must fill fields before qedr_set_ok_cqe_resp_wc() */
3890 	wc->opcode = IB_WC_RECV;
3891 	wc->wc_flags = 0;
3892 
3893 	if (likely(resp->status == RDMA_CQE_RESP_STS_OK)) {
3894 		if (qedr_set_ok_cqe_resp_wc(resp, wc))
3895 			DP_ERR(dev,
3896 			       "CQ %p (icid=%d) has invalid CQE responder flags=0x%x\n",
3897 			       cq, cq->icid, resp->flags);
3898 
3899 	} else {
3900 		wc->status = qedr_cqe_resp_status_to_ib(resp->status);
3901 		if (wc->status == IB_WC_GENERAL_ERR)
3902 			DP_ERR(dev,
3903 			       "CQ %p (icid=%d) contains an invalid CQE status %d\n",
3904 			       cq, cq->icid, resp->status);
3905 	}
3906 
3907 	/* Fill the rest of the WC */
3908 	wc->vendor_err = 0;
3909 	wc->src_qp = qp->id;
3910 	wc->qp = &qp->ibqp;
3911 	wc->wr_id = wr_id;
3912 }
3913 
3914 static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
3915 				struct qedr_cq *cq, struct ib_wc *wc,
3916 				struct rdma_cqe_responder *resp)
3917 {
3918 	struct qedr_srq *srq = qp->srq;
3919 	u64 wr_id;
3920 
3921 	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
3922 			 le32_to_cpu(resp->srq_wr_id.lo), u64);
3923 
3924 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
3925 		wc->status = IB_WC_WR_FLUSH_ERR;
3926 		wc->vendor_err = 0;
3927 		wc->wr_id = wr_id;
3928 		wc->byte_len = 0;
3929 		wc->src_qp = qp->id;
3930 		wc->qp = &qp->ibqp;
3931 		wc->wr_id = wr_id;
3932 	} else {
3933 		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3934 	}
3935 	srq->hw_srq.wr_cons_cnt++;
3936 
3937 	return 1;
3938 }
3939 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
3940 			    struct qedr_cq *cq, struct ib_wc *wc,
3941 			    struct rdma_cqe_responder *resp)
3942 {
3943 	u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3944 
3945 	__process_resp_one(dev, qp, cq, wc, resp, wr_id);
3946 
3947 	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3948 		qed_chain_consume(&qp->rq.pbl);
3949 	qedr_inc_sw_cons(&qp->rq);
3950 
3951 	return 1;
3952 }
3953 
3954 static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
3955 			      int num_entries, struct ib_wc *wc, u16 hw_cons)
3956 {
3957 	u16 cnt = 0;
3958 
3959 	while (num_entries && qp->rq.wqe_cons != hw_cons) {
3960 		/* fill WC */
3961 		wc->status = IB_WC_WR_FLUSH_ERR;
3962 		wc->vendor_err = 0;
3963 		wc->wc_flags = 0;
3964 		wc->src_qp = qp->id;
3965 		wc->byte_len = 0;
3966 		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
3967 		wc->qp = &qp->ibqp;
3968 		num_entries--;
3969 		wc++;
3970 		cnt++;
3971 		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
3972 			qed_chain_consume(&qp->rq.pbl);
3973 		qedr_inc_sw_cons(&qp->rq);
3974 	}
3975 
3976 	return cnt;
3977 }
3978 
3979 static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
3980 				 struct rdma_cqe_responder *resp, int *update)
3981 {
3982 	if (le16_to_cpu(resp->rq_cons_or_srq_id) == qp->rq.wqe_cons) {
3983 		consume_cqe(cq);
3984 		*update |= 1;
3985 	}
3986 }
3987 
3988 static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
3989 				 struct qedr_cq *cq, int num_entries,
3990 				 struct ib_wc *wc,
3991 				 struct rdma_cqe_responder *resp)
3992 {
3993 	int cnt;
3994 
3995 	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
3996 	consume_cqe(cq);
3997 
3998 	return cnt;
3999 }
4000 
4001 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
4002 			     struct qedr_cq *cq, int num_entries,
4003 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
4004 			     int *update)
4005 {
4006 	int cnt;
4007 
4008 	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
4009 		cnt = process_resp_flush(qp, cq, num_entries, wc,
4010 					 resp->rq_cons_or_srq_id);
4011 		try_consume_resp_cqe(cq, qp, resp, update);
4012 	} else {
4013 		cnt = process_resp_one(dev, qp, cq, wc, resp);
4014 		consume_cqe(cq);
4015 		*update |= 1;
4016 	}
4017 
4018 	return cnt;
4019 }
4020 
4021 static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
4022 				struct rdma_cqe_requester *req, int *update)
4023 {
4024 	if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
4025 		consume_cqe(cq);
4026 		*update |= 1;
4027 	}
4028 }
4029 
4030 int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
4031 {
4032 	struct qedr_dev *dev = get_qedr_dev(ibcq->device);
4033 	struct qedr_cq *cq = get_qedr_cq(ibcq);
4034 	union rdma_cqe *cqe;
4035 	u32 old_cons, new_cons;
4036 	unsigned long flags;
4037 	int update = 0;
4038 	int done = 0;
4039 
4040 	if (cq->destroyed) {
4041 		DP_ERR(dev,
4042 		       "warning: poll was invoked after destroy for cq %p (icid=%d)\n",
4043 		       cq, cq->icid);
4044 		return 0;
4045 	}
4046 
4047 	if (cq->cq_type == QEDR_CQ_TYPE_GSI)
4048 		return qedr_gsi_poll_cq(ibcq, num_entries, wc);
4049 
4050 	spin_lock_irqsave(&cq->cq_lock, flags);
4051 	cqe = cq->latest_cqe;
4052 	old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4053 	while (num_entries && is_valid_cqe(cq, cqe)) {
4054 		struct qedr_qp *qp;
4055 		int cnt = 0;
4056 
4057 		/* prevent speculative reads of any field of CQE */
4058 		rmb();
4059 
4060 		qp = cqe_get_qp(cqe);
4061 		if (!qp) {
4062 			WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
4063 			break;
4064 		}
4065 
4066 		wc->qp = &qp->ibqp;
4067 
4068 		switch (cqe_get_type(cqe)) {
4069 		case RDMA_CQE_TYPE_REQUESTER:
4070 			cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
4071 					       &cqe->req);
4072 			try_consume_req_cqe(cq, qp, &cqe->req, &update);
4073 			break;
4074 		case RDMA_CQE_TYPE_RESPONDER_RQ:
4075 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
4076 						&cqe->resp, &update);
4077 			break;
4078 		case RDMA_CQE_TYPE_RESPONDER_SRQ:
4079 			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
4080 						    wc, &cqe->resp);
4081 			update = 1;
4082 			break;
4083 		case RDMA_CQE_TYPE_INVALID:
4084 		default:
4085 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
4086 			       cqe_get_type(cqe));
4087 		}
4088 		num_entries -= cnt;
4089 		wc += cnt;
4090 		done += cnt;
4091 
4092 		cqe = get_cqe(cq);
4093 	}
4094 	new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
4095 
4096 	cq->cq_cons += new_cons - old_cons;
4097 
4098 	if (update)
4099 		/* doorbell notifies abount latest VALID entry,
4100 		 * but chain already point to the next INVALID one
4101 		 */
4102 		doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
4103 
4104 	spin_unlock_irqrestore(&cq->cq_lock, flags);
4105 	return done;
4106 }
4107 
4108 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
4109 		     u8 port_num,
4110 		     const struct ib_wc *in_wc,
4111 		     const struct ib_grh *in_grh,
4112 		     const struct ib_mad_hdr *mad_hdr,
4113 		     size_t in_mad_size, struct ib_mad_hdr *out_mad,
4114 		     size_t *out_mad_size, u16 *out_mad_pkey_index)
4115 {
4116 	struct qedr_dev *dev = get_qedr_dev(ibdev);
4117 
4118 	DP_DEBUG(dev, QEDR_MSG_GSI,
4119 		 "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
4120 		 mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
4121 		 mad_hdr->class_specific, mad_hdr->class_version,
4122 		 mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
4123 	return IB_MAD_RESULT_SUCCESS;
4124 }
4125