xref: /freebsd/sys/dev/irdma/irdma_verbs.c (revision fd6b1cc34e4175c1e223f42540debb74cfe3c3e6)
1 /*-
2  * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
3  *
4  * Copyright (c) 2015 - 2023 Intel Corporation
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenFabrics.org BSD license below:
11  *
12  *   Redistribution and use in source and binary forms, with or
13  *   without modification, are permitted provided that the following
14  *   conditions are met:
15  *
16  *    - Redistributions of source code must retain the above
17  *	copyright notice, this list of conditions and the following
18  *	disclaimer.
19  *
20  *    - Redistributions in binary form must reproduce the above
21  *	copyright notice, this list of conditions and the following
22  *	disclaimer in the documentation and/or other materials
23  *	provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include "irdma_main.h"
36 
37 /**
38  * irdma_query_device - get device attributes
39  * @ibdev: device pointer from stack
40  * @props: returning device attributes
41  * @udata: user data
42  */
43 static int
44 irdma_query_device(struct ib_device *ibdev,
45 		   struct ib_device_attr *props,
46 		   struct ib_udata *udata)
47 {
48 	struct irdma_device *iwdev = to_iwdev(ibdev);
49 	struct irdma_pci_f *rf = iwdev->rf;
50 	struct pci_dev *pcidev = iwdev->rf->pcidev;
51 	struct irdma_hw_attrs *hw_attrs = &rf->sc_dev.hw_attrs;
52 
53 	if (udata->inlen || udata->outlen)
54 		return -EINVAL;
55 
56 	memset(props, 0, sizeof(*props));
57 	addrconf_addr_eui48((u8 *)&props->sys_image_guid,
58 			    if_getlladdr(iwdev->netdev));
59 	props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 |
60 	    irdma_fw_minor_ver(&rf->sc_dev);
61 	props->device_cap_flags = IB_DEVICE_MEM_WINDOW |
62 	    IB_DEVICE_MEM_MGT_EXTENSIONS;
63 	props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
64 	props->vendor_id = pcidev->vendor;
65 	props->vendor_part_id = pcidev->device;
66 	props->hw_ver = pcidev->revision;
67 	props->page_size_cap = hw_attrs->page_size_cap;
68 	props->max_mr_size = hw_attrs->max_mr_size;
69 	props->max_qp = rf->max_qp - rf->used_qps;
70 	props->max_qp_wr = hw_attrs->max_qp_wr;
71 	set_max_sge(props, rf);
72 	props->max_cq = rf->max_cq - rf->used_cqs;
73 	props->max_cqe = rf->max_cqe - 1;
74 	props->max_mr = rf->max_mr - rf->used_mrs;
75 	props->max_pd = rf->max_pd - rf->used_pds;
76 	props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
77 	props->max_qp_rd_atom = hw_attrs->max_hw_ird;
78 	props->max_qp_init_rd_atom = hw_attrs->max_hw_ord;
79 	if (rdma_protocol_roce(ibdev, 1)) {
80 		props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN;
81 		props->max_pkeys = IRDMA_PKEY_TBL_SZ;
82 		props->max_ah = rf->max_ah;
83 		if (hw_attrs->uk_attrs.hw_rev == IRDMA_GEN_2) {
84 			props->max_mcast_grp = rf->max_mcg;
85 			props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
86 			props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX;
87 		}
88 	}
89 	props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR;
90 	if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_2)
91 		props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
92 
93 	return 0;
94 }
95 
96 static int
97 irdma_mmap_legacy(struct irdma_ucontext *ucontext,
98 		  struct vm_area_struct *vma)
99 {
100 	u64 pfn;
101 
102 	if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE)
103 		return -EINVAL;
104 
105 	vma->vm_private_data = ucontext;
106 	pfn = ((uintptr_t)ucontext->iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET] +
107 	       pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT;
108 
109 	return rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn, PAGE_SIZE,
110 				 pgprot_noncached(vma->vm_page_prot), NULL);
111 }
112 
113 static void
114 irdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
115 {
116 	struct irdma_user_mmap_entry *entry = to_irdma_mmap_entry(rdma_entry);
117 
118 	kfree(entry);
119 }
120 
121 struct rdma_user_mmap_entry *
122 irdma_user_mmap_entry_insert(struct irdma_ucontext *ucontext, u64 bar_offset,
123 			     enum irdma_mmap_flag mmap_flag, u64 *mmap_offset)
124 {
125 	struct irdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
126 	int ret;
127 
128 	if (!entry)
129 		return NULL;
130 
131 	entry->bar_offset = bar_offset;
132 	entry->mmap_flag = mmap_flag;
133 
134 	ret = rdma_user_mmap_entry_insert(&ucontext->ibucontext,
135 					  &entry->rdma_entry, PAGE_SIZE);
136 	if (ret) {
137 		kfree(entry);
138 		return NULL;
139 	}
140 	*mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
141 
142 	return &entry->rdma_entry;
143 }
144 
145 /**
146  * irdma_mmap - user memory map
147  * @context: context created during alloc
148  * @vma: kernel info for user memory map
149  */
150 static int
151 irdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
152 {
153 	struct rdma_user_mmap_entry *rdma_entry;
154 	struct irdma_user_mmap_entry *entry;
155 	struct irdma_ucontext *ucontext;
156 	u64 pfn;
157 	int ret;
158 
159 	ucontext = to_ucontext(context);
160 
161 	/* Legacy support for libi40iw with hard-coded mmap key */
162 	if (ucontext->legacy_mode)
163 		return irdma_mmap_legacy(ucontext, vma);
164 
165 	rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
166 	if (!rdma_entry) {
167 		irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
168 			    "pgoff[0x%lx] does not have valid entry\n",
169 			    vma->vm_pgoff);
170 		return -EINVAL;
171 	}
172 
173 	entry = to_irdma_mmap_entry(rdma_entry);
174 	irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
175 		    "bar_offset [0x%lx] mmap_flag [%d]\n", entry->bar_offset,
176 		    entry->mmap_flag);
177 
178 	pfn = (entry->bar_offset +
179 	       pci_resource_start(ucontext->iwdev->rf->pcidev, 0)) >> PAGE_SHIFT;
180 
181 	switch (entry->mmap_flag) {
182 	case IRDMA_MMAP_IO_NC:
183 		ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
184 					pgprot_noncached(vma->vm_page_prot),
185 					rdma_entry);
186 		break;
187 	case IRDMA_MMAP_IO_WC:
188 		ret = rdma_user_mmap_io(context, vma, pfn, PAGE_SIZE,
189 					pgprot_writecombine(vma->vm_page_prot),
190 					rdma_entry);
191 		break;
192 	default:
193 		ret = -EINVAL;
194 	}
195 
196 	if (ret)
197 		irdma_debug(&ucontext->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
198 			    "bar_offset [0x%lx] mmap_flag[%d] err[%d]\n",
199 			    entry->bar_offset, entry->mmap_flag, ret);
200 	rdma_user_mmap_entry_put(rdma_entry);
201 
202 	return ret;
203 }
204 
205 /**
206  * irdma_alloc_push_page - allocate a push page for qp
207  * @iwqp: qp pointer
208  */
209 static void
210 irdma_alloc_push_page(struct irdma_qp *iwqp)
211 {
212 	struct irdma_cqp_request *cqp_request;
213 	struct cqp_cmds_info *cqp_info;
214 	struct irdma_device *iwdev = iwqp->iwdev;
215 	struct irdma_sc_qp *qp = &iwqp->sc_qp;
216 	int status;
217 
218 	cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
219 	if (!cqp_request)
220 		return;
221 
222 	cqp_info = &cqp_request->info;
223 	cqp_info->cqp_cmd = IRDMA_OP_MANAGE_PUSH_PAGE;
224 	cqp_info->post_sq = 1;
225 	cqp_info->in.u.manage_push_page.info.push_idx = 0;
226 	cqp_info->in.u.manage_push_page.info.qs_handle =
227 	    qp->vsi->qos[qp->user_pri].qs_handle;
228 	cqp_info->in.u.manage_push_page.info.free_page = 0;
229 	cqp_info->in.u.manage_push_page.info.push_page_type = 0;
230 	cqp_info->in.u.manage_push_page.cqp = &iwdev->rf->cqp.sc_cqp;
231 	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
232 
233 	status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
234 	if (!status && cqp_request->compl_info.op_ret_val <
235 	    iwdev->rf->sc_dev.hw_attrs.max_hw_device_pages) {
236 		qp->push_idx = cqp_request->compl_info.op_ret_val;
237 		qp->push_offset = 0;
238 	}
239 
240 	irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
241 }
242 
243 /**
244  * irdma_get_pbl - Retrieve pbl from a list given a virtual
245  * address
246  * @va: user virtual address
247  * @pbl_list: pbl list to search in (QP's or CQ's)
248  */
249 struct irdma_pbl *
250 irdma_get_pbl(unsigned long va,
251 	      struct list_head *pbl_list)
252 {
253 	struct irdma_pbl *iwpbl;
254 
255 	list_for_each_entry(iwpbl, pbl_list, list) {
256 		if (iwpbl->user_base == va) {
257 			list_del(&iwpbl->list);
258 			iwpbl->on_list = false;
259 			return iwpbl;
260 		}
261 	}
262 
263 	return NULL;
264 }
265 
266 /**
267  * irdma_clean_cqes - clean cq entries for qp
268  * @iwqp: qp ptr (user or kernel)
269  * @iwcq: cq ptr
270  */
271 void
272 irdma_clean_cqes(struct irdma_qp *iwqp, struct irdma_cq *iwcq)
273 {
274 	struct irdma_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
275 	unsigned long flags;
276 
277 	spin_lock_irqsave(&iwcq->lock, flags);
278 	irdma_uk_clean_cq(&iwqp->sc_qp.qp_uk, ukcq);
279 	spin_unlock_irqrestore(&iwcq->lock, flags);
280 }
281 
282 static u64 irdma_compute_push_wqe_offset(struct irdma_device *iwdev, u32 page_idx){
283 	u64 bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET];
284 
285 	if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) {
286 		/* skip over db page */
287 		bar_off += IRDMA_HW_PAGE_SIZE;
288 		/* skip over reserved space */
289 		bar_off += IRDMA_PF_BAR_RSVD;
290 	}
291 
292 	/* push wqe page */
293 	bar_off += (u64)page_idx * IRDMA_HW_PAGE_SIZE;
294 
295 	return bar_off;
296 }
297 
298 void
299 irdma_remove_push_mmap_entries(struct irdma_qp *iwqp)
300 {
301 	if (iwqp->push_db_mmap_entry) {
302 		rdma_user_mmap_entry_remove(iwqp->push_db_mmap_entry);
303 		iwqp->push_db_mmap_entry = NULL;
304 	}
305 	if (iwqp->push_wqe_mmap_entry) {
306 		rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry);
307 		iwqp->push_wqe_mmap_entry = NULL;
308 	}
309 }
310 
311 static int
312 irdma_setup_push_mmap_entries(struct irdma_ucontext *ucontext,
313 			      struct irdma_qp *iwqp,
314 			      u64 *push_wqe_mmap_key,
315 			      u64 *push_db_mmap_key)
316 {
317 	struct irdma_device *iwdev = ucontext->iwdev;
318 	u64 bar_off;
319 
320 	WARN_ON_ONCE(iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_2);
321 
322 	bar_off = irdma_compute_push_wqe_offset(iwdev, iwqp->sc_qp.push_idx);
323 
324 	iwqp->push_wqe_mmap_entry = irdma_user_mmap_entry_insert(ucontext,
325 								 bar_off, IRDMA_MMAP_IO_WC,
326 								 push_wqe_mmap_key);
327 	if (!iwqp->push_wqe_mmap_entry)
328 		return -ENOMEM;
329 
330 	/* push doorbell page */
331 	bar_off += IRDMA_HW_PAGE_SIZE;
332 	iwqp->push_db_mmap_entry = irdma_user_mmap_entry_insert(ucontext,
333 								bar_off, IRDMA_MMAP_IO_NC,
334 								push_db_mmap_key);
335 	if (!iwqp->push_db_mmap_entry) {
336 		rdma_user_mmap_entry_remove(iwqp->push_wqe_mmap_entry);
337 		return -ENOMEM;
338 	}
339 
340 	return 0;
341 }
342 
343 /**
344  * irdma_setup_virt_qp - setup for allocation of virtual qp
345  * @iwdev: irdma device
346  * @iwqp: qp ptr
347  * @init_info: initialize info to return
348  */
349 void
350 irdma_setup_virt_qp(struct irdma_device *iwdev,
351 		    struct irdma_qp *iwqp,
352 		    struct irdma_qp_init_info *init_info)
353 {
354 	struct irdma_pbl *iwpbl = iwqp->iwpbl;
355 	struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
356 
357 	iwqp->page = qpmr->sq_page;
358 	init_info->shadow_area_pa = qpmr->shadow;
359 	if (iwpbl->pbl_allocated) {
360 		init_info->virtual_map = true;
361 		init_info->sq_pa = qpmr->sq_pbl.idx;
362 		init_info->rq_pa = qpmr->rq_pbl.idx;
363 	} else {
364 		init_info->sq_pa = qpmr->sq_pbl.addr;
365 		init_info->rq_pa = qpmr->rq_pbl.addr;
366 	}
367 }
368 
369 /**
370  * irdma_setup_umode_qp - setup sq and rq size in user mode qp
371  * @udata: user data
372  * @iwdev: iwarp device
373  * @iwqp: qp ptr (user or kernel)
374  * @info: initialize info to return
375  * @init_attr: Initial QP create attributes
376  */
377 int
378 irdma_setup_umode_qp(struct ib_udata *udata,
379 		     struct irdma_device *iwdev,
380 		     struct irdma_qp *iwqp,
381 		     struct irdma_qp_init_info *info,
382 		     struct ib_qp_init_attr *init_attr)
383 {
384 	struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
385 	struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
386 	struct irdma_create_qp_req req = {0};
387 	unsigned long flags;
388 	int ret;
389 
390 	ret = ib_copy_from_udata(&req, udata,
391 				 min(sizeof(req), udata->inlen));
392 	if (ret) {
393 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
394 			    "ib_copy_from_data fail\n");
395 		return ret;
396 	}
397 
398 	iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
399 	iwqp->user_mode = 1;
400 	if (req.user_wqe_bufs) {
401 		info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode;
402 		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
403 		iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs,
404 					    &ucontext->qp_reg_mem_list);
405 		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
406 
407 		if (!iwqp->iwpbl) {
408 			ret = -ENODATA;
409 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
410 				    "no pbl info\n");
411 			return ret;
412 		}
413 	}
414 
415 	if (!ucontext->use_raw_attrs) {
416 		/**
417 		 * Maintain backward compat with older ABI which passes sq and
418 		 * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr.
419 		 * There is no way to compute the correct value of
420 		 * iwqp->max_send_wr/max_recv_wr in the kernel.
421 		 */
422 		iwqp->max_send_wr = init_attr->cap.max_send_wr;
423 		iwqp->max_recv_wr = init_attr->cap.max_recv_wr;
424 		ukinfo->sq_size = init_attr->cap.max_send_wr;
425 		ukinfo->rq_size = init_attr->cap.max_recv_wr;
426 		irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift, &ukinfo->rq_shift);
427 	} else {
428 		ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth,
429 						   &ukinfo->sq_shift);
430 		if (ret)
431 			return ret;
432 
433 		ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth,
434 						   &ukinfo->rq_shift);
435 		if (ret)
436 			return ret;
437 
438 		iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
439 		iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
440 		ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
441 		ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
442 	}
443 	if (req.comp_mask & IRDMA_CREATE_QP_USE_START_WQE_IDX &&
444 	    iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE)
445 		ukinfo->start_wqe_idx = 4;
446 	irdma_setup_virt_qp(iwdev, iwqp, info);
447 
448 	return 0;
449 }
450 
451 /**
452  * irdma_setup_kmode_qp - setup initialization for kernel mode qp
453  * @iwdev: iwarp device
454  * @iwqp: qp ptr (user or kernel)
455  * @info: initialize info to return
456  * @init_attr: Initial QP create attributes
457  */
458 int
459 irdma_setup_kmode_qp(struct irdma_device *iwdev,
460 		     struct irdma_qp *iwqp,
461 		     struct irdma_qp_init_info *info,
462 		     struct ib_qp_init_attr *init_attr)
463 {
464 	struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem;
465 	u32 size;
466 	int status;
467 	struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
468 
469 	status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth,
470 					      &ukinfo->sq_shift);
471 	if (status)
472 		return status;
473 
474 	status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth,
475 					      &ukinfo->rq_shift);
476 	if (status)
477 		return status;
478 
479 	iwqp->kqp.sq_wrid_mem =
480 	    kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL);
481 	if (!iwqp->kqp.sq_wrid_mem)
482 		return -ENOMEM;
483 
484 	iwqp->kqp.rq_wrid_mem =
485 	    kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL);
486 	if (!iwqp->kqp.rq_wrid_mem) {
487 		kfree(iwqp->kqp.sq_wrid_mem);
488 		iwqp->kqp.sq_wrid_mem = NULL;
489 		return -ENOMEM;
490 	}
491 
492 	iwqp->kqp.sig_trk_mem = kcalloc(ukinfo->sq_depth, sizeof(u32), GFP_KERNEL);
493 	memset(iwqp->kqp.sig_trk_mem, 0, ukinfo->sq_depth * sizeof(u32));
494 	if (!iwqp->kqp.sig_trk_mem) {
495 		kfree(iwqp->kqp.sq_wrid_mem);
496 		iwqp->kqp.sq_wrid_mem = NULL;
497 		kfree(iwqp->kqp.rq_wrid_mem);
498 		iwqp->kqp.rq_wrid_mem = NULL;
499 		return -ENOMEM;
500 	}
501 	ukinfo->sq_sigwrtrk_array = (void *)iwqp->kqp.sig_trk_mem;
502 	ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem;
503 	ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem;
504 
505 	size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE;
506 	size += (IRDMA_SHADOW_AREA_SIZE << 3);
507 
508 	mem->size = size;
509 	mem->va = irdma_allocate_dma_mem(&iwdev->rf->hw, mem, mem->size,
510 					 256);
511 	if (!mem->va) {
512 		kfree(iwqp->kqp.sq_wrid_mem);
513 		iwqp->kqp.sq_wrid_mem = NULL;
514 		kfree(iwqp->kqp.rq_wrid_mem);
515 		iwqp->kqp.rq_wrid_mem = NULL;
516 		return -ENOMEM;
517 	}
518 
519 	ukinfo->sq = mem->va;
520 	info->sq_pa = mem->pa;
521 	ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth];
522 	info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE);
523 	ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem;
524 	info->shadow_area_pa = info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE);
525 	ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
526 	ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
527 	ukinfo->qp_id = iwqp->ibqp.qp_num;
528 
529 	iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
530 	iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
531 	init_attr->cap.max_send_wr = iwqp->max_send_wr;
532 	init_attr->cap.max_recv_wr = iwqp->max_recv_wr;
533 
534 	return 0;
535 }
536 
537 int
538 irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp)
539 {
540 	struct irdma_pci_f *rf = iwqp->iwdev->rf;
541 	struct irdma_cqp_request *cqp_request;
542 	struct cqp_cmds_info *cqp_info;
543 	struct irdma_create_qp_info *qp_info;
544 	int status;
545 
546 	cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
547 	if (!cqp_request)
548 		return -ENOMEM;
549 
550 	cqp_info = &cqp_request->info;
551 	qp_info = &cqp_request->info.in.u.qp_create.info;
552 	memset(qp_info, 0, sizeof(*qp_info));
553 	qp_info->mac_valid = true;
554 	qp_info->cq_num_valid = true;
555 	qp_info->next_iwarp_state = IRDMA_QP_STATE_IDLE;
556 
557 	cqp_info->cqp_cmd = IRDMA_OP_QP_CREATE;
558 	cqp_info->post_sq = 1;
559 	cqp_info->in.u.qp_create.qp = &iwqp->sc_qp;
560 	cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
561 	status = irdma_handle_cqp_op(rf, cqp_request);
562 	irdma_put_cqp_request(&rf->cqp, cqp_request);
563 
564 	return status;
565 }
566 
567 void
568 irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
569 				   struct irdma_qp_host_ctx_info *ctx_info)
570 {
571 	struct irdma_device *iwdev = iwqp->iwdev;
572 	struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
573 	struct irdma_roce_offload_info *roce_info;
574 	struct irdma_udp_offload_info *udp_info;
575 
576 	udp_info = &iwqp->udp_info;
577 	udp_info->snd_mss = ib_mtu_enum_to_int(ib_mtu_int_to_enum(iwdev->vsi.mtu));
578 	udp_info->cwnd = iwdev->roce_cwnd;
579 	udp_info->rexmit_thresh = 2;
580 	udp_info->rnr_nak_thresh = 2;
581 	udp_info->src_port = 0xc000;
582 	udp_info->dst_port = ROCE_V2_UDP_DPORT;
583 	roce_info = &iwqp->roce_info;
584 	ether_addr_copy(roce_info->mac_addr, if_getlladdr(iwdev->netdev));
585 
586 	roce_info->rd_en = true;
587 	roce_info->wr_rdresp_en = true;
588 	roce_info->dcqcn_en = false;
589 	roce_info->rtomin = iwdev->roce_rtomin;
590 
591 	roce_info->ack_credits = iwdev->roce_ackcreds;
592 	roce_info->ird_size = dev->hw_attrs.max_hw_ird;
593 	roce_info->ord_size = dev->hw_attrs.max_hw_ord;
594 
595 	if (!iwqp->user_mode) {
596 		roce_info->priv_mode_en = true;
597 		roce_info->fast_reg_en = true;
598 		roce_info->udprivcq_en = true;
599 	}
600 	roce_info->roce_tver = 0;
601 
602 	ctx_info->roce_info = &iwqp->roce_info;
603 	ctx_info->udp_info = &iwqp->udp_info;
604 	irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
605 }
606 
607 void
608 irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp,
609 				 struct irdma_qp_host_ctx_info *ctx_info)
610 {
611 	struct irdma_device *iwdev = iwqp->iwdev;
612 	struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
613 	struct irdma_iwarp_offload_info *iwarp_info;
614 
615 	iwarp_info = &iwqp->iwarp_info;
616 	ether_addr_copy(iwarp_info->mac_addr, if_getlladdr(iwdev->netdev));
617 	iwarp_info->rd_en = true;
618 	iwarp_info->wr_rdresp_en = true;
619 	iwarp_info->ecn_en = true;
620 	iwarp_info->rtomin = 5;
621 
622 	if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
623 		iwarp_info->ib_rd_en = true;
624 	if (!iwqp->user_mode) {
625 		iwarp_info->priv_mode_en = true;
626 		iwarp_info->fast_reg_en = true;
627 	}
628 	iwarp_info->ddp_ver = 1;
629 	iwarp_info->rdmap_ver = 1;
630 
631 	ctx_info->iwarp_info = &iwqp->iwarp_info;
632 	ctx_info->iwarp_info_valid = true;
633 	irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
634 	ctx_info->iwarp_info_valid = false;
635 }
636 
637 int
638 irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
639 			struct irdma_device *iwdev)
640 {
641 	struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
642 	struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs;
643 
644 	if (init_attr->create_flags)
645 		return -EOPNOTSUPP;
646 
647 	if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline ||
648 	    init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
649 	    init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta ||
650 	    init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta ||
651 	    init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags)
652 		return -EINVAL;
653 
654 	if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
655 		if (init_attr->qp_type != IB_QPT_RC &&
656 		    init_attr->qp_type != IB_QPT_UD &&
657 		    init_attr->qp_type != IB_QPT_GSI)
658 			return -EOPNOTSUPP;
659 	} else {
660 		if (init_attr->qp_type != IB_QPT_RC)
661 			return -EOPNOTSUPP;
662 	}
663 
664 	return 0;
665 }
666 
667 void
668 irdma_sched_qp_flush_work(struct irdma_qp *iwqp)
669 {
670 	unsigned long flags;
671 
672 	if (iwqp->sc_qp.qp_uk.destroy_pending)
673 		return;
674 	irdma_qp_add_ref(&iwqp->ibqp);
675 	spin_lock_irqsave(&iwqp->dwork_flush_lock, flags);
676 	if (mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
677 			     msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS)))
678 		irdma_qp_rem_ref(&iwqp->ibqp);
679 	spin_unlock_irqrestore(&iwqp->dwork_flush_lock, flags);
680 }
681 
682 void
683 irdma_flush_worker(struct work_struct *work)
684 {
685 	struct delayed_work *dwork = to_delayed_work(work);
686 	struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
687 
688 	irdma_generate_flush_completions(iwqp);
689 	/* For the add in irdma_sched_qp_flush_work */
690 	irdma_qp_rem_ref(&iwqp->ibqp);
691 }
692 
693 static int
694 irdma_get_ib_acc_flags(struct irdma_qp *iwqp)
695 {
696 	int acc_flags = 0;
697 
698 	if (rdma_protocol_roce(iwqp->ibqp.device, 1)) {
699 		if (iwqp->roce_info.wr_rdresp_en) {
700 			acc_flags |= IB_ACCESS_LOCAL_WRITE;
701 			acc_flags |= IB_ACCESS_REMOTE_WRITE;
702 		}
703 		if (iwqp->roce_info.rd_en)
704 			acc_flags |= IB_ACCESS_REMOTE_READ;
705 	} else {
706 		if (iwqp->iwarp_info.wr_rdresp_en) {
707 			acc_flags |= IB_ACCESS_LOCAL_WRITE;
708 			acc_flags |= IB_ACCESS_REMOTE_WRITE;
709 		}
710 		if (iwqp->iwarp_info.rd_en)
711 			acc_flags |= IB_ACCESS_REMOTE_READ;
712 	}
713 	return acc_flags;
714 }
715 
716 /**
717  * irdma_query_qp - query qp attributes
718  * @ibqp: qp pointer
719  * @attr: attributes pointer
720  * @attr_mask: Not used
721  * @init_attr: qp attributes to return
722  */
723 static int
724 irdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
725 	       int attr_mask, struct ib_qp_init_attr *init_attr)
726 {
727 	struct irdma_qp *iwqp = to_iwqp(ibqp);
728 	struct irdma_sc_qp *qp = &iwqp->sc_qp;
729 
730 	memset(attr, 0, sizeof(*attr));
731 	memset(init_attr, 0, sizeof(*init_attr));
732 
733 	attr->qp_state = iwqp->ibqp_state;
734 	attr->cur_qp_state = iwqp->ibqp_state;
735 	attr->cap.max_send_wr = iwqp->max_send_wr;
736 	attr->cap.max_recv_wr = iwqp->max_recv_wr;
737 	attr->cap.max_inline_data = qp->qp_uk.max_inline_data;
738 	attr->cap.max_send_sge = qp->qp_uk.max_sq_frag_cnt;
739 	attr->cap.max_recv_sge = qp->qp_uk.max_rq_frag_cnt;
740 	attr->qp_access_flags = irdma_get_ib_acc_flags(iwqp);
741 	attr->port_num = 1;
742 	if (rdma_protocol_roce(ibqp->device, 1)) {
743 		attr->path_mtu = ib_mtu_int_to_enum(iwqp->udp_info.snd_mss);
744 		attr->qkey = iwqp->roce_info.qkey;
745 		attr->rq_psn = iwqp->udp_info.epsn;
746 		attr->sq_psn = iwqp->udp_info.psn_nxt;
747 		attr->dest_qp_num = iwqp->roce_info.dest_qp;
748 		attr->pkey_index = iwqp->roce_info.p_key;
749 		attr->retry_cnt = iwqp->udp_info.rexmit_thresh;
750 		attr->rnr_retry = iwqp->udp_info.rnr_nak_thresh;
751 		attr->max_rd_atomic = iwqp->roce_info.ord_size;
752 		attr->max_dest_rd_atomic = iwqp->roce_info.ird_size;
753 	}
754 
755 	init_attr->event_handler = iwqp->ibqp.event_handler;
756 	init_attr->qp_context = iwqp->ibqp.qp_context;
757 	init_attr->send_cq = iwqp->ibqp.send_cq;
758 	init_attr->recv_cq = iwqp->ibqp.recv_cq;
759 	init_attr->cap = attr->cap;
760 
761 	return 0;
762 }
763 
764 static int
765 irdma_wait_for_suspend(struct irdma_qp *iwqp)
766 {
767 	if (!wait_event_timeout(iwqp->iwdev->suspend_wq,
768 				!iwqp->suspend_pending,
769 				msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) {
770 		iwqp->suspend_pending = false;
771 		irdma_dev_warn(&iwqp->iwdev->ibdev,
772 			       "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n",
773 			       iwqp->ibqp.qp_num, iwqp->last_aeq);
774 		return -EBUSY;
775 	}
776 
777 	return 0;
778 }
779 
780 /**
781  * irdma_modify_qp_roce - modify qp request
782  * @ibqp: qp's pointer for modify
783  * @attr: access attributes
784  * @attr_mask: state mask
785  * @udata: user data
786  */
787 int
788 irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
789 		     int attr_mask, struct ib_udata *udata)
790 {
791 #define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush)
792 #define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid)
793 	struct irdma_pd *iwpd = to_iwpd(ibqp->pd);
794 	struct irdma_qp *iwqp = to_iwqp(ibqp);
795 	struct irdma_device *iwdev = iwqp->iwdev;
796 	struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
797 	struct irdma_qp_host_ctx_info *ctx_info;
798 	struct irdma_roce_offload_info *roce_info;
799 	struct irdma_udp_offload_info *udp_info;
800 	struct irdma_modify_qp_info info = {0};
801 	struct irdma_modify_qp_resp uresp = {};
802 	struct irdma_modify_qp_req ureq;
803 	unsigned long flags;
804 	u8 issue_modify_qp = 0;
805 	int ret = 0;
806 
807 	ctx_info = &iwqp->ctx_info;
808 	roce_info = &iwqp->roce_info;
809 	udp_info = &iwqp->udp_info;
810 
811 	if (udata) {
812 		if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) ||
813 		    (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN))
814 			return -EINVAL;
815 	}
816 
817 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
818 		return -EOPNOTSUPP;
819 
820 	if (attr_mask & IB_QP_DEST_QPN)
821 		roce_info->dest_qp = attr->dest_qp_num;
822 
823 	if (attr_mask & IB_QP_PKEY_INDEX) {
824 		ret = irdma_query_pkey(ibqp->device, 0, attr->pkey_index,
825 				       &roce_info->p_key);
826 		if (ret)
827 			return ret;
828 	}
829 
830 	if (attr_mask & IB_QP_QKEY)
831 		roce_info->qkey = attr->qkey;
832 
833 	if (attr_mask & IB_QP_PATH_MTU)
834 		udp_info->snd_mss = ib_mtu_enum_to_int(attr->path_mtu);
835 
836 	if (attr_mask & IB_QP_SQ_PSN) {
837 		udp_info->psn_nxt = attr->sq_psn;
838 		udp_info->lsn = 0xffff;
839 		udp_info->psn_una = attr->sq_psn;
840 		udp_info->psn_max = attr->sq_psn;
841 	}
842 
843 	if (attr_mask & IB_QP_RQ_PSN)
844 		udp_info->epsn = attr->rq_psn;
845 
846 	if (attr_mask & IB_QP_RNR_RETRY)
847 		udp_info->rnr_nak_thresh = attr->rnr_retry;
848 
849 	if (attr_mask & IB_QP_RETRY_CNT)
850 		udp_info->rexmit_thresh = attr->retry_cnt;
851 
852 	ctx_info->roce_info->pd_id = iwpd->sc_pd.pd_id;
853 
854 	if (attr_mask & IB_QP_AV) {
855 		struct irdma_av *av = &iwqp->roce_ah.av;
856 		u16 vlan_id = VLAN_N_VID;
857 		u32 local_ip[4] = {};
858 
859 		memset(&iwqp->roce_ah, 0, sizeof(iwqp->roce_ah));
860 		if (attr->ah_attr.ah_flags & IB_AH_GRH) {
861 			udp_info->ttl = attr->ah_attr.grh.hop_limit;
862 			udp_info->flow_label = attr->ah_attr.grh.flow_label;
863 			udp_info->tos = attr->ah_attr.grh.traffic_class;
864 
865 			udp_info->src_port = kc_rdma_get_udp_sport(udp_info->flow_label,
866 								   ibqp->qp_num,
867 								   roce_info->dest_qp);
868 
869 			irdma_qp_rem_qos(&iwqp->sc_qp);
870 			dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri);
871 			if (iwqp->sc_qp.vsi->dscp_mode)
872 				ctx_info->user_pri =
873 				    iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(udp_info->tos)];
874 			else
875 				ctx_info->user_pri = rt_tos2priority(udp_info->tos);
876 		}
877 		ret = kc_irdma_set_roce_cm_info(iwqp, attr, &vlan_id);
878 		if (ret)
879 			return ret;
880 		if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri))
881 			return -ENOMEM;
882 		iwqp->sc_qp.user_pri = ctx_info->user_pri;
883 		irdma_qp_add_qos(&iwqp->sc_qp);
884 
885 		if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
886 			vlan_id = 0;
887 		if (vlan_id < VLAN_N_VID) {
888 			udp_info->insert_vlan_tag = true;
889 			udp_info->vlan_tag = vlan_id |
890 			    ctx_info->user_pri << VLAN_PRIO_SHIFT;
891 		} else {
892 			udp_info->insert_vlan_tag = false;
893 		}
894 
895 		av->attrs = attr->ah_attr;
896 		rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid);
897 		if (av->net_type == RDMA_NETWORK_IPV6) {
898 			__be32 *daddr =
899 			av->dgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32;
900 			__be32 *saddr =
901 			av->sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32;
902 
903 			irdma_copy_ip_ntohl(&udp_info->dest_ip_addr[0], daddr);
904 			irdma_copy_ip_ntohl(&udp_info->local_ipaddr[0], saddr);
905 
906 			udp_info->ipv4 = false;
907 			irdma_copy_ip_ntohl(local_ip, daddr);
908 		} else if (av->net_type == RDMA_NETWORK_IPV4) {
909 			__be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr;
910 			__be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr;
911 
912 			local_ip[0] = ntohl(daddr);
913 
914 			udp_info->ipv4 = true;
915 			udp_info->dest_ip_addr[0] = 0;
916 			udp_info->dest_ip_addr[1] = 0;
917 			udp_info->dest_ip_addr[2] = 0;
918 			udp_info->dest_ip_addr[3] = local_ip[0];
919 
920 			udp_info->local_ipaddr[0] = 0;
921 			udp_info->local_ipaddr[1] = 0;
922 			udp_info->local_ipaddr[2] = 0;
923 			udp_info->local_ipaddr[3] = ntohl(saddr);
924 		} else {
925 			return -EINVAL;
926 		}
927 		udp_info->arp_idx =
928 		    irdma_add_arp(iwdev->rf, local_ip,
929 				  ah_attr_to_dmac(attr->ah_attr));
930 	}
931 
932 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
933 		if (attr->max_rd_atomic > dev->hw_attrs.max_hw_ord) {
934 			irdma_dev_err(&iwdev->ibdev,
935 				      "rd_atomic = %d, above max_hw_ord=%d\n",
936 				      attr->max_rd_atomic,
937 				      dev->hw_attrs.max_hw_ord);
938 			return -EINVAL;
939 		}
940 		if (attr->max_rd_atomic)
941 			roce_info->ord_size = attr->max_rd_atomic;
942 		info.ord_valid = true;
943 	}
944 
945 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
946 		if (attr->max_dest_rd_atomic > dev->hw_attrs.max_hw_ird) {
947 			irdma_dev_err(&iwdev->ibdev,
948 				      "rd_atomic = %d, above max_hw_ird=%d\n",
949 				      attr->max_rd_atomic,
950 				      dev->hw_attrs.max_hw_ird);
951 			return -EINVAL;
952 		}
953 		if (attr->max_dest_rd_atomic)
954 			roce_info->ird_size = attr->max_dest_rd_atomic;
955 	}
956 
957 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
958 		if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
959 			roce_info->wr_rdresp_en = true;
960 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
961 			roce_info->wr_rdresp_en = true;
962 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
963 			roce_info->rd_en = true;
964 	}
965 
966 	wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend));
967 
968 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
969 		    "caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d attr_mask=0x%x\n",
970 		    __builtin_return_address(0), ibqp->qp_num, attr->qp_state,
971 		    iwqp->ibqp_state, iwqp->iwarp_state, attr_mask);
972 
973 	spin_lock_irqsave(&iwqp->lock, flags);
974 	if (attr_mask & IB_QP_STATE) {
975 		if (!ib_modify_qp_is_ok(iwqp->ibqp_state, attr->qp_state,
976 					iwqp->ibqp.qp_type, attr_mask)) {
977 			irdma_dev_warn(&iwdev->ibdev,
978 				       "modify_qp invalid for qp_id=%d, old_state=0x%x, new_state=0x%x\n",
979 				       iwqp->ibqp.qp_num, iwqp->ibqp_state,
980 				       attr->qp_state);
981 			ret = -EINVAL;
982 			goto exit;
983 		}
984 		info.curr_iwarp_state = iwqp->iwarp_state;
985 
986 		switch (attr->qp_state) {
987 		case IB_QPS_INIT:
988 			if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
989 				ret = -EINVAL;
990 				goto exit;
991 			}
992 
993 			if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) {
994 				info.next_iwarp_state = IRDMA_QP_STATE_IDLE;
995 				issue_modify_qp = 1;
996 			}
997 			break;
998 		case IB_QPS_RTR:
999 			if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
1000 				ret = -EINVAL;
1001 				goto exit;
1002 			}
1003 			info.arp_cache_idx_valid = true;
1004 			info.cq_num_valid = true;
1005 			info.next_iwarp_state = IRDMA_QP_STATE_RTR;
1006 			issue_modify_qp = 1;
1007 			break;
1008 		case IB_QPS_RTS:
1009 			if (iwqp->ibqp_state < IB_QPS_RTR ||
1010 			    iwqp->ibqp_state == IB_QPS_ERR) {
1011 				ret = -EINVAL;
1012 				goto exit;
1013 			}
1014 
1015 			info.arp_cache_idx_valid = true;
1016 			info.cq_num_valid = true;
1017 			info.ord_valid = true;
1018 			info.next_iwarp_state = IRDMA_QP_STATE_RTS;
1019 			issue_modify_qp = 1;
1020 			if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2)
1021 				iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp);
1022 			udp_info->cwnd = iwdev->roce_cwnd;
1023 			roce_info->ack_credits = iwdev->roce_ackcreds;
1024 			if (iwdev->push_mode && udata &&
1025 			    iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX &&
1026 			    dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
1027 				spin_unlock_irqrestore(&iwqp->lock, flags);
1028 				irdma_alloc_push_page(iwqp);
1029 				spin_lock_irqsave(&iwqp->lock, flags);
1030 			}
1031 			break;
1032 		case IB_QPS_SQD:
1033 			if (iwqp->iwarp_state == IRDMA_QP_STATE_SQD)
1034 				goto exit;
1035 
1036 			if (iwqp->iwarp_state != IRDMA_QP_STATE_RTS) {
1037 				ret = -EINVAL;
1038 				goto exit;
1039 			}
1040 
1041 			info.next_iwarp_state = IRDMA_QP_STATE_SQD;
1042 			issue_modify_qp = 1;
1043 			iwqp->suspend_pending = true;
1044 			break;
1045 		case IB_QPS_SQE:
1046 		case IB_QPS_ERR:
1047 		case IB_QPS_RESET:
1048 			if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
1049 				spin_unlock_irqrestore(&iwqp->lock, flags);
1050 				if (udata && udata->inlen) {
1051 					if (ib_copy_from_udata(&ureq, udata,
1052 							       min(sizeof(ureq), udata->inlen)))
1053 						return -EINVAL;
1054 
1055 					irdma_flush_wqes(iwqp,
1056 							 (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) |
1057 							 (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) |
1058 							 IRDMA_REFLUSH);
1059 				}
1060 				return 0;
1061 			}
1062 
1063 			info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
1064 			issue_modify_qp = 1;
1065 			break;
1066 		default:
1067 			ret = -EINVAL;
1068 			goto exit;
1069 		}
1070 
1071 		iwqp->ibqp_state = attr->qp_state;
1072 	}
1073 
1074 	ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
1075 	ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
1076 	irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
1077 	spin_unlock_irqrestore(&iwqp->lock, flags);
1078 
1079 	if (attr_mask & IB_QP_STATE) {
1080 		if (issue_modify_qp) {
1081 			ctx_info->rem_endpoint_idx = udp_info->arp_idx;
1082 			if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
1083 				return -EINVAL;
1084 			if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) {
1085 				ret = irdma_wait_for_suspend(iwqp);
1086 				if (ret)
1087 					return ret;
1088 			}
1089 			spin_lock_irqsave(&iwqp->lock, flags);
1090 			if (iwqp->iwarp_state == info.curr_iwarp_state) {
1091 				iwqp->iwarp_state = info.next_iwarp_state;
1092 				iwqp->ibqp_state = attr->qp_state;
1093 			}
1094 			if (iwqp->ibqp_state > IB_QPS_RTS &&
1095 			    !iwqp->flush_issued) {
1096 				spin_unlock_irqrestore(&iwqp->lock, flags);
1097 				irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
1098 						 IRDMA_FLUSH_RQ |
1099 						 IRDMA_FLUSH_WAIT);
1100 				iwqp->flush_issued = 1;
1101 
1102 			} else {
1103 				spin_unlock_irqrestore(&iwqp->lock, flags);
1104 			}
1105 		} else {
1106 			iwqp->ibqp_state = attr->qp_state;
1107 		}
1108 		if (udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
1109 			struct irdma_ucontext *ucontext;
1110 
1111 			ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
1112 			if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX &&
1113 			    !iwqp->push_wqe_mmap_entry &&
1114 			    !irdma_setup_push_mmap_entries(ucontext, iwqp,
1115 							   &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) {
1116 				uresp.push_valid = 1;
1117 				uresp.push_offset = iwqp->sc_qp.push_offset;
1118 			}
1119 			uresp.rd_fence_rate = iwdev->rd_fence_rate;
1120 			ret = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp),
1121 								  udata->outlen));
1122 			if (ret) {
1123 				irdma_remove_push_mmap_entries(iwqp);
1124 				irdma_debug(&iwdev->rf->sc_dev,
1125 					    IRDMA_DEBUG_VERBS,
1126 					    "copy_to_udata failed\n");
1127 				return ret;
1128 			}
1129 		}
1130 	}
1131 
1132 	return 0;
1133 exit:
1134 	spin_unlock_irqrestore(&iwqp->lock, flags);
1135 
1136 	return ret;
1137 }
1138 
1139 /**
1140  * irdma_modify_qp - modify qp request
1141  * @ibqp: qp's pointer for modify
1142  * @attr: access attributes
1143  * @attr_mask: state mask
1144  * @udata: user data
1145  */
1146 int
1147 irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1148 		struct ib_udata *udata)
1149 {
1150 #define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush)
1151 #define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid)
1152 	struct irdma_qp *iwqp = to_iwqp(ibqp);
1153 	struct irdma_device *iwdev = iwqp->iwdev;
1154 	struct irdma_sc_dev *dev = &iwdev->rf->sc_dev;
1155 	struct irdma_qp_host_ctx_info *ctx_info;
1156 	struct irdma_tcp_offload_info *tcp_info;
1157 	struct irdma_iwarp_offload_info *offload_info;
1158 	struct irdma_modify_qp_info info = {0};
1159 	struct irdma_modify_qp_resp uresp = {};
1160 	struct irdma_modify_qp_req ureq = {};
1161 	u8 issue_modify_qp = 0;
1162 	u8 dont_wait = 0;
1163 	int err;
1164 	unsigned long flags;
1165 
1166 	if (udata) {
1167 		if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) ||
1168 		    (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN))
1169 			return -EINVAL;
1170 	}
1171 
1172 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1173 		return -EOPNOTSUPP;
1174 
1175 	ctx_info = &iwqp->ctx_info;
1176 	offload_info = &iwqp->iwarp_info;
1177 	tcp_info = &iwqp->tcp_info;
1178 	wait_event(iwqp->mod_qp_waitq, !atomic_read(&iwqp->hw_mod_qp_pend));
1179 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
1180 		    "caller: %pS qp_id=%d to_ibqpstate=%d ibqpstate=%d irdma_qpstate=%d last_aeq=%d hw_tcp_state=%d hw_iwarp_state=%d attr_mask=0x%x\n",
1181 		    __builtin_return_address(0), ibqp->qp_num, attr->qp_state,
1182 		    iwqp->ibqp_state, iwqp->iwarp_state, iwqp->last_aeq,
1183 		    iwqp->hw_tcp_state, iwqp->hw_iwarp_state, attr_mask);
1184 
1185 	spin_lock_irqsave(&iwqp->lock, flags);
1186 	if (attr_mask & IB_QP_STATE) {
1187 		info.curr_iwarp_state = iwqp->iwarp_state;
1188 		switch (attr->qp_state) {
1189 		case IB_QPS_INIT:
1190 		case IB_QPS_RTR:
1191 			if (iwqp->iwarp_state > IRDMA_QP_STATE_IDLE) {
1192 				err = -EINVAL;
1193 				goto exit;
1194 			}
1195 
1196 			if (iwqp->iwarp_state == IRDMA_QP_STATE_INVALID) {
1197 				info.next_iwarp_state = IRDMA_QP_STATE_IDLE;
1198 				issue_modify_qp = 1;
1199 			}
1200 			if (iwdev->push_mode && udata &&
1201 			    iwqp->sc_qp.push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX &&
1202 			    dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
1203 				spin_unlock_irqrestore(&iwqp->lock, flags);
1204 				irdma_alloc_push_page(iwqp);
1205 				spin_lock_irqsave(&iwqp->lock, flags);
1206 			}
1207 			break;
1208 		case IB_QPS_RTS:
1209 			if (iwqp->iwarp_state > IRDMA_QP_STATE_RTS ||
1210 			    !iwqp->cm_id) {
1211 				err = -EINVAL;
1212 				goto exit;
1213 			}
1214 
1215 			issue_modify_qp = 1;
1216 			iwqp->hw_tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
1217 			iwqp->hte_added = 1;
1218 			info.next_iwarp_state = IRDMA_QP_STATE_RTS;
1219 			info.tcp_ctx_valid = true;
1220 			info.ord_valid = true;
1221 			info.arp_cache_idx_valid = true;
1222 			info.cq_num_valid = true;
1223 			break;
1224 		case IB_QPS_SQD:
1225 			if (iwqp->hw_iwarp_state > IRDMA_QP_STATE_RTS) {
1226 				err = 0;
1227 				goto exit;
1228 			}
1229 
1230 			if (iwqp->iwarp_state == IRDMA_QP_STATE_CLOSING ||
1231 			    iwqp->iwarp_state < IRDMA_QP_STATE_RTS) {
1232 				err = 0;
1233 				goto exit;
1234 			}
1235 
1236 			if (iwqp->iwarp_state > IRDMA_QP_STATE_CLOSING) {
1237 				err = -EINVAL;
1238 				goto exit;
1239 			}
1240 
1241 			info.next_iwarp_state = IRDMA_QP_STATE_CLOSING;
1242 			issue_modify_qp = 1;
1243 			break;
1244 		case IB_QPS_SQE:
1245 			if (iwqp->iwarp_state >= IRDMA_QP_STATE_TERMINATE) {
1246 				err = -EINVAL;
1247 				goto exit;
1248 			}
1249 
1250 			info.next_iwarp_state = IRDMA_QP_STATE_TERMINATE;
1251 			issue_modify_qp = 1;
1252 			break;
1253 		case IB_QPS_ERR:
1254 		case IB_QPS_RESET:
1255 			if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
1256 				spin_unlock_irqrestore(&iwqp->lock, flags);
1257 				if (udata && udata->inlen) {
1258 					if (ib_copy_from_udata(&ureq, udata,
1259 							       min(sizeof(ureq), udata->inlen)))
1260 						return -EINVAL;
1261 
1262 					irdma_flush_wqes(iwqp,
1263 							 (ureq.sq_flush ? IRDMA_FLUSH_SQ : 0) |
1264 							 (ureq.rq_flush ? IRDMA_FLUSH_RQ : 0) |
1265 							 IRDMA_REFLUSH);
1266 				}
1267 				return 0;
1268 			}
1269 
1270 			if (iwqp->sc_qp.term_flags) {
1271 				spin_unlock_irqrestore(&iwqp->lock, flags);
1272 				irdma_terminate_del_timer(&iwqp->sc_qp);
1273 				spin_lock_irqsave(&iwqp->lock, flags);
1274 			}
1275 			info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
1276 			if (iwqp->hw_tcp_state > IRDMA_TCP_STATE_CLOSED &&
1277 			    iwdev->iw_status &&
1278 			    iwqp->hw_tcp_state != IRDMA_TCP_STATE_TIME_WAIT)
1279 				info.reset_tcp_conn = true;
1280 			else
1281 				dont_wait = 1;
1282 
1283 			issue_modify_qp = 1;
1284 			info.next_iwarp_state = IRDMA_QP_STATE_ERROR;
1285 			break;
1286 		default:
1287 			err = -EINVAL;
1288 			goto exit;
1289 		}
1290 
1291 		iwqp->ibqp_state = attr->qp_state;
1292 	}
1293 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1294 		ctx_info->iwarp_info_valid = true;
1295 		if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
1296 			offload_info->wr_rdresp_en = true;
1297 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
1298 			offload_info->wr_rdresp_en = true;
1299 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
1300 			offload_info->rd_en = true;
1301 	}
1302 
1303 	if (ctx_info->iwarp_info_valid) {
1304 		ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
1305 		ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
1306 		irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
1307 	}
1308 	spin_unlock_irqrestore(&iwqp->lock, flags);
1309 
1310 	if (attr_mask & IB_QP_STATE) {
1311 		if (issue_modify_qp) {
1312 			ctx_info->rem_endpoint_idx = tcp_info->arp_idx;
1313 			if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
1314 				return -EINVAL;
1315 		}
1316 
1317 		spin_lock_irqsave(&iwqp->lock, flags);
1318 		if (iwqp->iwarp_state == info.curr_iwarp_state) {
1319 			iwqp->iwarp_state = info.next_iwarp_state;
1320 			iwqp->ibqp_state = attr->qp_state;
1321 		}
1322 		spin_unlock_irqrestore(&iwqp->lock, flags);
1323 	}
1324 
1325 	if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
1326 		if (dont_wait) {
1327 			if (iwqp->hw_tcp_state) {
1328 				spin_lock_irqsave(&iwqp->lock, flags);
1329 				iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
1330 				iwqp->last_aeq = IRDMA_AE_RESET_SENT;
1331 				spin_unlock_irqrestore(&iwqp->lock, flags);
1332 			}
1333 			irdma_cm_disconn(iwqp);
1334 		} else {
1335 			int close_timer_started;
1336 
1337 			spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
1338 
1339 			if (iwqp->cm_node) {
1340 				atomic_inc(&iwqp->cm_node->refcnt);
1341 				spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
1342 				close_timer_started = atomic_inc_return(&iwqp->close_timer_started);
1343 				if (iwqp->cm_id && close_timer_started == 1)
1344 					irdma_schedule_cm_timer(iwqp->cm_node,
1345 								(struct irdma_puda_buf *)iwqp,
1346 								IRDMA_TIMER_TYPE_CLOSE, 1, 0);
1347 
1348 				irdma_rem_ref_cm_node(iwqp->cm_node);
1349 			} else {
1350 				spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
1351 			}
1352 		}
1353 	}
1354 	if (attr_mask & IB_QP_STATE && udata && udata->outlen &&
1355 	    dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
1356 		struct irdma_ucontext *ucontext;
1357 
1358 		ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
1359 		if (iwqp->sc_qp.push_idx != IRDMA_INVALID_PUSH_PAGE_INDEX &&
1360 		    !iwqp->push_wqe_mmap_entry &&
1361 		    !irdma_setup_push_mmap_entries(ucontext, iwqp,
1362 						   &uresp.push_wqe_mmap_key, &uresp.push_db_mmap_key)) {
1363 			uresp.push_valid = 1;
1364 			uresp.push_offset = iwqp->sc_qp.push_offset;
1365 		}
1366 		uresp.rd_fence_rate = iwdev->rd_fence_rate;
1367 
1368 		err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp),
1369 							  udata->outlen));
1370 		if (err) {
1371 			irdma_remove_push_mmap_entries(iwqp);
1372 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
1373 				    "copy_to_udata failed\n");
1374 			return err;
1375 		}
1376 	}
1377 
1378 	return 0;
1379 exit:
1380 	spin_unlock_irqrestore(&iwqp->lock, flags);
1381 
1382 	return err;
1383 }
1384 
1385 /**
1386  * irdma_cq_free_rsrc - free up resources for cq
1387  * @rf: RDMA PCI function
1388  * @iwcq: cq ptr
1389  */
1390 void
1391 irdma_cq_free_rsrc(struct irdma_pci_f *rf, struct irdma_cq *iwcq)
1392 {
1393 	struct irdma_sc_cq *cq = &iwcq->sc_cq;
1394 
1395 	if (!iwcq->user_mode) {
1396 		irdma_free_dma_mem(rf->sc_dev.hw, &iwcq->kmem);
1397 		irdma_free_dma_mem(rf->sc_dev.hw, &iwcq->kmem_shadow);
1398 	}
1399 
1400 	irdma_free_rsrc(rf, rf->allocated_cqs, cq->cq_uk.cq_id);
1401 }
1402 
1403 /**
1404  * irdma_free_cqbuf - worker to free a cq buffer
1405  * @work: provides access to the cq buffer to free
1406  */
1407 static void
1408 irdma_free_cqbuf(struct work_struct *work)
1409 {
1410 	struct irdma_cq_buf *cq_buf = container_of(work, struct irdma_cq_buf, work);
1411 
1412 	irdma_free_dma_mem(cq_buf->hw, &cq_buf->kmem_buf);
1413 	kfree(cq_buf);
1414 }
1415 
1416 /**
1417  * irdma_process_resize_list - remove resized cq buffers from the resize_list
1418  * @iwcq: cq which owns the resize_list
1419  * @iwdev: irdma device
1420  * @lcqe_buf: the buffer where the last cqe is received
1421  */
1422 int
1423 irdma_process_resize_list(struct irdma_cq *iwcq,
1424 			  struct irdma_device *iwdev,
1425 			  struct irdma_cq_buf *lcqe_buf)
1426 {
1427 	struct list_head *tmp_node, *list_node;
1428 	struct irdma_cq_buf *cq_buf;
1429 	int cnt = 0;
1430 
1431 	list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) {
1432 		cq_buf = list_entry(list_node, struct irdma_cq_buf, list);
1433 		if (cq_buf == lcqe_buf)
1434 			return cnt;
1435 
1436 		list_del(&cq_buf->list);
1437 		queue_work(iwdev->cleanup_wq, &cq_buf->work);
1438 		cnt++;
1439 	}
1440 
1441 	return cnt;
1442 }
1443 
1444 /**
1445  * irdma_resize_cq - resize cq
1446  * @ibcq: cq to be resized
1447  * @entries: desired cq size
1448  * @udata: user data
1449  */
1450 static int
1451 irdma_resize_cq(struct ib_cq *ibcq, int entries,
1452 		struct ib_udata *udata)
1453 {
1454 #define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer)
1455 	struct irdma_cq *iwcq = to_iwcq(ibcq);
1456 	struct irdma_sc_dev *dev = iwcq->sc_cq.dev;
1457 	struct irdma_cqp_request *cqp_request;
1458 	struct cqp_cmds_info *cqp_info;
1459 	struct irdma_modify_cq_info *m_info;
1460 	struct irdma_modify_cq_info info = {0};
1461 	struct irdma_dma_mem kmem_buf;
1462 	struct irdma_cq_mr *cqmr_buf;
1463 	struct irdma_pbl *iwpbl_buf;
1464 	struct irdma_device *iwdev;
1465 	struct irdma_pci_f *rf;
1466 	struct irdma_cq_buf *cq_buf = NULL;
1467 	unsigned long flags;
1468 	int ret;
1469 
1470 	iwdev = to_iwdev(ibcq->device);
1471 	rf = iwdev->rf;
1472 
1473 	if (!(rf->sc_dev.hw_attrs.uk_attrs.feature_flags &
1474 	      IRDMA_FEATURE_CQ_RESIZE))
1475 		return -EOPNOTSUPP;
1476 
1477 	if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN)
1478 		return -EINVAL;
1479 
1480 	if (entries > rf->max_cqe)
1481 		return -EINVAL;
1482 
1483 	if (!iwcq->user_mode) {
1484 		entries++;
1485 		if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
1486 			entries *= 2;
1487 	}
1488 
1489 	info.cq_size = max(entries, 4);
1490 
1491 	if (info.cq_size == iwcq->sc_cq.cq_uk.cq_size - 1)
1492 		return 0;
1493 
1494 	if (udata) {
1495 		struct irdma_resize_cq_req req = {};
1496 		struct irdma_ucontext *ucontext =
1497 		rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
1498 
1499 		/* CQ resize not supported with legacy GEN_1 libi40iw */
1500 		if (ucontext->legacy_mode)
1501 			return -EOPNOTSUPP;
1502 
1503 		if (ib_copy_from_udata(&req, udata,
1504 				       min(sizeof(req), udata->inlen)))
1505 			return -EINVAL;
1506 
1507 		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
1508 		iwpbl_buf = irdma_get_pbl((unsigned long)req.user_cq_buffer,
1509 					  &ucontext->cq_reg_mem_list);
1510 		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
1511 
1512 		if (!iwpbl_buf)
1513 			return -ENOMEM;
1514 
1515 		cqmr_buf = &iwpbl_buf->cq_mr;
1516 		if (iwpbl_buf->pbl_allocated) {
1517 			info.virtual_map = true;
1518 			info.pbl_chunk_size = 1;
1519 			info.first_pm_pbl_idx = cqmr_buf->cq_pbl.idx;
1520 		} else {
1521 			info.cq_pa = cqmr_buf->cq_pbl.addr;
1522 		}
1523 	} else {
1524 		/* Kmode CQ resize */
1525 		int rsize;
1526 
1527 		rsize = info.cq_size * sizeof(struct irdma_cqe);
1528 		kmem_buf.size = round_up(rsize, 256);
1529 		kmem_buf.va = irdma_allocate_dma_mem(dev->hw, &kmem_buf,
1530 						     kmem_buf.size, 256);
1531 		if (!kmem_buf.va)
1532 			return -ENOMEM;
1533 
1534 		info.cq_base = kmem_buf.va;
1535 		info.cq_pa = kmem_buf.pa;
1536 		cq_buf = kzalloc(sizeof(*cq_buf), GFP_KERNEL);
1537 		if (!cq_buf) {
1538 			ret = -ENOMEM;
1539 			goto error;
1540 		}
1541 	}
1542 
1543 	cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
1544 	if (!cqp_request) {
1545 		ret = -ENOMEM;
1546 		goto error;
1547 	}
1548 
1549 	info.shadow_read_threshold = iwcq->sc_cq.shadow_read_threshold;
1550 	info.cq_resize = true;
1551 
1552 	cqp_info = &cqp_request->info;
1553 	m_info = &cqp_info->in.u.cq_modify.info;
1554 	memcpy(m_info, &info, sizeof(*m_info));
1555 
1556 	cqp_info->cqp_cmd = IRDMA_OP_CQ_MODIFY;
1557 	cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq;
1558 	cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request;
1559 	cqp_info->post_sq = 1;
1560 	ret = irdma_handle_cqp_op(rf, cqp_request);
1561 	irdma_put_cqp_request(&rf->cqp, cqp_request);
1562 	if (ret)
1563 		goto error;
1564 
1565 	spin_lock_irqsave(&iwcq->lock, flags);
1566 	if (cq_buf) {
1567 		cq_buf->kmem_buf = iwcq->kmem;
1568 		cq_buf->hw = dev->hw;
1569 		memcpy(&cq_buf->cq_uk, &iwcq->sc_cq.cq_uk, sizeof(cq_buf->cq_uk));
1570 		INIT_WORK(&cq_buf->work, irdma_free_cqbuf);
1571 		list_add_tail(&cq_buf->list, &iwcq->resize_list);
1572 		iwcq->kmem = kmem_buf;
1573 	}
1574 
1575 	irdma_sc_cq_resize(&iwcq->sc_cq, &info);
1576 	ibcq->cqe = info.cq_size - 1;
1577 	spin_unlock_irqrestore(&iwcq->lock, flags);
1578 
1579 	return 0;
1580 error:
1581 	if (!udata)
1582 		irdma_free_dma_mem(dev->hw, &kmem_buf);
1583 	kfree(cq_buf);
1584 
1585 	return ret;
1586 }
1587 
1588 /**
1589  * irdma_get_mr_access - get hw MR access permissions from IB access flags
1590  * @access: IB access flags
1591  * @hw_rev: Hardware version
1592  */
1593 static inline u16 irdma_get_mr_access(int access, u8 hw_rev)
1594 {
1595 	u16 hw_access = 0;
1596 
1597 	hw_access |= (access & IB_ACCESS_LOCAL_WRITE) ?
1598 	    IRDMA_ACCESS_FLAGS_LOCALWRITE : 0;
1599 	hw_access |= (access & IB_ACCESS_REMOTE_WRITE) ?
1600 	    IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0;
1601 	hw_access |= (access & IB_ACCESS_REMOTE_READ) ?
1602 	    IRDMA_ACCESS_FLAGS_REMOTEREAD : 0;
1603 	hw_access |= (access & IB_ZERO_BASED) ?
1604 	    IRDMA_ACCESS_FLAGS_ZERO_BASED : 0;
1605 	hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD;
1606 
1607 	return hw_access;
1608 }
1609 
1610 /**
1611  * irdma_free_stag - free stag resource
1612  * @iwdev: irdma device
1613  * @stag: stag to free
1614  */
1615 void
1616 irdma_free_stag(struct irdma_device *iwdev, u32 stag)
1617 {
1618 	u32 stag_idx;
1619 
1620 	stag_idx = (stag & iwdev->rf->mr_stagmask) >> IRDMA_CQPSQ_STAG_IDX_S;
1621 	irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_mrs, stag_idx);
1622 }
1623 
1624 /**
1625  * irdma_create_stag - create random stag
1626  * @iwdev: irdma device
1627  */
1628 u32
1629 irdma_create_stag(struct irdma_device *iwdev)
1630 {
1631 	u32 stag;
1632 	u32 stag_index = 0;
1633 	u32 next_stag_index;
1634 	u32 driver_key;
1635 	u32 random;
1636 	u8 consumer_key;
1637 	int ret;
1638 
1639 	get_random_bytes(&random, sizeof(random));
1640 	consumer_key = (u8)random;
1641 
1642 	driver_key = random & ~iwdev->rf->mr_stagmask;
1643 	next_stag_index = (random & iwdev->rf->mr_stagmask) >> 8;
1644 	next_stag_index %= iwdev->rf->max_mr;
1645 
1646 	ret = irdma_alloc_rsrc(iwdev->rf, iwdev->rf->allocated_mrs,
1647 			       iwdev->rf->max_mr, &stag_index,
1648 			       &next_stag_index);
1649 	if (ret)
1650 		return 0;
1651 	stag = stag_index << IRDMA_CQPSQ_STAG_IDX_S;
1652 	stag |= driver_key;
1653 	stag += (u32)consumer_key;
1654 
1655 	return stag;
1656 }
1657 
1658 /**
1659  * irdma_check_mem_contiguous - check if pbls stored in arr are contiguous
1660  * @arr: lvl1 pbl array
1661  * @npages: page count
1662  * @pg_size: page size
1663  *
1664  */
1665 static bool
1666 irdma_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
1667 {
1668 	u32 pg_idx;
1669 
1670 	for (pg_idx = 0; pg_idx < npages; pg_idx++) {
1671 		if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
1672 			return false;
1673 	}
1674 
1675 	return true;
1676 }
1677 
1678 /**
1679  * irdma_check_mr_contiguous - check if MR is physically contiguous
1680  * @palloc: pbl allocation struct
1681  * @pg_size: page size
1682  */
1683 static bool
1684 irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc,
1685 			  u32 pg_size)
1686 {
1687 	struct irdma_pble_level2 *lvl2 = &palloc->level2;
1688 	struct irdma_pble_info *leaf = lvl2->leaf;
1689 	u64 *arr = NULL;
1690 	u64 *start_addr = NULL;
1691 	int i;
1692 	bool ret;
1693 
1694 	if (palloc->level == PBLE_LEVEL_1) {
1695 		arr = palloc->level1.addr;
1696 		ret = irdma_check_mem_contiguous(arr, palloc->total_cnt,
1697 						 pg_size);
1698 		return ret;
1699 	}
1700 
1701 	start_addr = leaf->addr;
1702 
1703 	for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
1704 		arr = leaf->addr;
1705 		if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
1706 			return false;
1707 		ret = irdma_check_mem_contiguous(arr, leaf->cnt, pg_size);
1708 		if (!ret)
1709 			return false;
1710 	}
1711 
1712 	return true;
1713 }
1714 
1715 /**
1716  * irdma_setup_pbles - copy user pg address to pble's
1717  * @rf: RDMA PCI function
1718  * @iwmr: mr pointer for this memory registration
1719  * @lvl: requested pble levels
1720  */
1721 static int
1722 irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
1723 		  u8 lvl)
1724 {
1725 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
1726 	struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
1727 	struct irdma_pble_info *pinfo;
1728 	u64 *pbl;
1729 	int status;
1730 	enum irdma_pble_level level = PBLE_LEVEL_1;
1731 
1732 	if (lvl) {
1733 		status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
1734 					lvl);
1735 		if (status)
1736 			return status;
1737 
1738 		iwpbl->pbl_allocated = true;
1739 		level = palloc->level;
1740 		pinfo = (level == PBLE_LEVEL_1) ? &palloc->level1 :
1741 		    palloc->level2.leaf;
1742 		pbl = pinfo->addr;
1743 	} else {
1744 		pbl = iwmr->pgaddrmem;
1745 	}
1746 
1747 	irdma_copy_user_pgaddrs(iwmr, pbl, level);
1748 
1749 	if (lvl)
1750 		iwmr->pgaddrmem[0] = *pbl;
1751 
1752 	return 0;
1753 }
1754 
1755 /**
1756  * irdma_handle_q_mem - handle memory for qp and cq
1757  * @iwdev: irdma device
1758  * @req: information for q memory management
1759  * @iwpbl: pble struct
1760  * @lvl: pble level mask
1761  */
1762 static int
1763 irdma_handle_q_mem(struct irdma_device *iwdev,
1764 		   struct irdma_mem_reg_req *req,
1765 		   struct irdma_pbl *iwpbl, u8 lvl)
1766 {
1767 	struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
1768 	struct irdma_mr *iwmr = iwpbl->iwmr;
1769 	struct irdma_qp_mr *qpmr = &iwpbl->qp_mr;
1770 	struct irdma_cq_mr *cqmr = &iwpbl->cq_mr;
1771 	struct irdma_hmc_pble *hmc_p;
1772 	u64 *arr = iwmr->pgaddrmem;
1773 	u32 pg_size, total;
1774 	int err = 0;
1775 	bool ret = true;
1776 
1777 	pg_size = iwmr->page_size;
1778 	err = irdma_setup_pbles(iwdev->rf, iwmr, lvl);
1779 	if (err)
1780 		return err;
1781 
1782 	if (lvl)
1783 		arr = palloc->level1.addr;
1784 
1785 	switch (iwmr->type) {
1786 	case IRDMA_MEMREG_TYPE_QP:
1787 		total = req->sq_pages + req->rq_pages;
1788 		hmc_p = &qpmr->sq_pbl;
1789 		qpmr->shadow = (dma_addr_t) arr[total];
1790 		if (lvl) {
1791 			ret = irdma_check_mem_contiguous(arr, req->sq_pages,
1792 							 pg_size);
1793 			if (ret)
1794 				ret = irdma_check_mem_contiguous(&arr[req->sq_pages],
1795 								 req->rq_pages,
1796 								 pg_size);
1797 		}
1798 
1799 		if (!ret) {
1800 			hmc_p->idx = palloc->level1.idx;
1801 			hmc_p = &qpmr->rq_pbl;
1802 			hmc_p->idx = palloc->level1.idx + req->sq_pages;
1803 		} else {
1804 			hmc_p->addr = arr[0];
1805 			hmc_p = &qpmr->rq_pbl;
1806 			hmc_p->addr = arr[req->sq_pages];
1807 		}
1808 		break;
1809 	case IRDMA_MEMREG_TYPE_CQ:
1810 		hmc_p = &cqmr->cq_pbl;
1811 
1812 		if (!cqmr->split)
1813 			cqmr->shadow = (dma_addr_t) arr[req->cq_pages];
1814 
1815 		if (lvl)
1816 			ret = irdma_check_mem_contiguous(arr, req->cq_pages,
1817 							 pg_size);
1818 
1819 		if (!ret)
1820 			hmc_p->idx = palloc->level1.idx;
1821 		else
1822 			hmc_p->addr = arr[0];
1823 		break;
1824 	default:
1825 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "MR type error\n");
1826 		err = -EINVAL;
1827 	}
1828 
1829 	if (lvl && ret) {
1830 		irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
1831 		iwpbl->pbl_allocated = false;
1832 	}
1833 
1834 	return err;
1835 }
1836 
1837 /**
1838  * irdma_hw_alloc_stag - cqp command to allocate stag
1839  * @iwdev: irdma device
1840  * @iwmr: irdma mr pointer
1841  */
1842 int
1843 irdma_hw_alloc_stag(struct irdma_device *iwdev,
1844 		    struct irdma_mr *iwmr)
1845 {
1846 	struct irdma_allocate_stag_info *info;
1847 	struct ib_pd *pd = iwmr->ibmr.pd;
1848 	struct irdma_pd *iwpd = to_iwpd(pd);
1849 	struct irdma_cqp_request *cqp_request;
1850 	struct cqp_cmds_info *cqp_info;
1851 	int status;
1852 
1853 	cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
1854 	if (!cqp_request)
1855 		return -ENOMEM;
1856 
1857 	cqp_info = &cqp_request->info;
1858 	info = &cqp_info->in.u.alloc_stag.info;
1859 	memset(info, 0, sizeof(*info));
1860 	info->page_size = PAGE_SIZE;
1861 	info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
1862 	info->pd_id = iwpd->sc_pd.pd_id;
1863 	info->total_len = iwmr->len;
1864 	info->all_memory = (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) ? true : false;
1865 	info->remote_access = true;
1866 	cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG;
1867 	cqp_info->post_sq = 1;
1868 	cqp_info->in.u.alloc_stag.dev = &iwdev->rf->sc_dev;
1869 	cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
1870 	status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
1871 	irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
1872 	if (!status)
1873 		iwmr->is_hwreg = 1;
1874 
1875 	return status;
1876 }
1877 
1878 /**
1879  * irdma_set_page - populate pbl list for fmr
1880  * @ibmr: ib mem to access iwarp mr pointer
1881  * @addr: page dma address fro pbl list
1882  */
1883 static int
1884 irdma_set_page(struct ib_mr *ibmr, u64 addr)
1885 {
1886 	struct irdma_mr *iwmr = to_iwmr(ibmr);
1887 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
1888 	struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
1889 	u64 *pbl;
1890 
1891 	if (unlikely(iwmr->npages == iwmr->page_cnt))
1892 		return -ENOMEM;
1893 
1894 	if (palloc->level == PBLE_LEVEL_2) {
1895 		struct irdma_pble_info *palloc_info =
1896 		palloc->level2.leaf + (iwmr->npages >> PBLE_512_SHIFT);
1897 
1898 		palloc_info->addr[iwmr->npages & (PBLE_PER_PAGE - 1)] = addr;
1899 	} else {
1900 		pbl = palloc->level1.addr;
1901 		pbl[iwmr->npages] = addr;
1902 	}
1903 
1904 	iwmr->npages++;
1905 	return 0;
1906 }
1907 
1908 /**
1909  * irdma_map_mr_sg - map of sg list for fmr
1910  * @ibmr: ib mem to access iwarp mr pointer
1911  * @sg: scatter gather list
1912  * @sg_nents: number of sg pages
1913  * @sg_offset: scatter gather list for fmr
1914  */
1915 static int
1916 irdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1917 		int sg_nents, unsigned int *sg_offset)
1918 {
1919 	struct irdma_mr *iwmr = to_iwmr(ibmr);
1920 
1921 	iwmr->npages = 0;
1922 
1923 	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, irdma_set_page);
1924 }
1925 
1926 /**
1927  * irdma_hwreg_mr - send cqp command for memory registration
1928  * @iwdev: irdma device
1929  * @iwmr: irdma mr pointer
1930  * @access: access for MR
1931  */
1932 int
1933 irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
1934 	       u16 access)
1935 {
1936 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
1937 	struct irdma_reg_ns_stag_info *stag_info;
1938 	struct ib_pd *pd = iwmr->ibmr.pd;
1939 	struct irdma_pd *iwpd = to_iwpd(pd);
1940 	struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
1941 	struct irdma_cqp_request *cqp_request;
1942 	struct cqp_cmds_info *cqp_info;
1943 	int ret;
1944 
1945 	cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
1946 	if (!cqp_request)
1947 		return -ENOMEM;
1948 
1949 	cqp_info = &cqp_request->info;
1950 	stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
1951 	memset(stag_info, 0, sizeof(*stag_info));
1952 	stag_info->va = iwpbl->user_base;
1953 	stag_info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
1954 	stag_info->stag_key = (u8)iwmr->stag;
1955 	stag_info->total_len = iwmr->len;
1956 	stag_info->all_memory = (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) ? true : false;
1957 	stag_info->access_rights = irdma_get_mr_access(access,
1958 						       iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev);
1959 	stag_info->pd_id = iwpd->sc_pd.pd_id;
1960 	if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED)
1961 		stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED;
1962 	else
1963 		stag_info->addr_type = IRDMA_ADDR_TYPE_VA_BASED;
1964 	stag_info->page_size = iwmr->page_size;
1965 
1966 	if (iwpbl->pbl_allocated) {
1967 		if (palloc->level == PBLE_LEVEL_1) {
1968 			stag_info->first_pm_pbl_index = palloc->level1.idx;
1969 			stag_info->chunk_size = 1;
1970 		} else {
1971 			stag_info->first_pm_pbl_index = palloc->level2.root.idx;
1972 			stag_info->chunk_size = 3;
1973 		}
1974 	} else {
1975 		stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
1976 	}
1977 
1978 	cqp_info->cqp_cmd = IRDMA_OP_MR_REG_NON_SHARED;
1979 	cqp_info->post_sq = 1;
1980 	cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev;
1981 	cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
1982 	ret = irdma_handle_cqp_op(iwdev->rf, cqp_request);
1983 	irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
1984 
1985 	if (!ret)
1986 		iwmr->is_hwreg = 1;
1987 
1988 	return ret;
1989 }
1990 
1991 /*
1992  * irdma_alloc_iwmr - Allocate iwmr @region - memory region @pd - protection domain @virt - virtual address @reg_type -
1993  * registration type
1994  */
1995 static struct irdma_mr *
1996 irdma_alloc_iwmr(struct ib_umem *region,
1997 		 struct ib_pd *pd, u64 virt,
1998 		 enum irdma_memreg_type reg_type)
1999 {
2000 	struct irdma_pbl *iwpbl;
2001 	struct irdma_mr *iwmr;
2002 
2003 	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
2004 	if (!iwmr)
2005 		return ERR_PTR(-ENOMEM);
2006 
2007 	iwpbl = &iwmr->iwpbl;
2008 	iwpbl->iwmr = iwmr;
2009 	iwmr->region = region;
2010 	iwmr->ibmr.pd = pd;
2011 	iwmr->ibmr.device = pd->device;
2012 	iwmr->ibmr.iova = virt;
2013 	iwmr->type = reg_type;
2014 
2015 	/* Some OOT versions of irdma_copy_user_pg_addr require the pg mask */
2016 	iwmr->page_msk = ~(IRDMA_HW_PAGE_SIZE - 1);
2017 	iwmr->page_size = IRDMA_HW_PAGE_SIZE;
2018 	iwmr->len = region->length;
2019 	iwpbl->user_base = virt;
2020 	iwmr->page_cnt = irdma_ib_umem_num_dma_blocks(region, iwmr->page_size, virt);
2021 
2022 	return iwmr;
2023 }
2024 
2025 static void
2026 irdma_free_iwmr(struct irdma_mr *iwmr)
2027 {
2028 	kfree(iwmr);
2029 }
2030 
2031 /*
2032  * irdma_reg_user_mr_type_mem - Handle memory registration
2033  * @iwmr - irdma mr
2034  * @access - access rights
2035  * @create_stag - flag to create stag or not
2036  */
2037 static int
2038 irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access,
2039 			   bool create_stag)
2040 {
2041 	struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
2042 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
2043 	u32 stag = 0;
2044 	int err;
2045 	u8 lvl;
2046 
2047 	lvl = iwmr->page_cnt != 1 ? PBLE_LEVEL_1 | PBLE_LEVEL_2 : PBLE_LEVEL_0;
2048 
2049 	err = irdma_setup_pbles(iwdev->rf, iwmr, lvl);
2050 	if (err)
2051 		return err;
2052 
2053 	if (lvl) {
2054 		err = irdma_check_mr_contiguous(&iwpbl->pble_alloc,
2055 						iwmr->page_size);
2056 		if (err) {
2057 			irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
2058 			iwpbl->pbl_allocated = false;
2059 		}
2060 	}
2061 
2062 	if (create_stag) {
2063 		stag = irdma_create_stag(iwdev);
2064 		if (!stag) {
2065 			err = -ENOMEM;
2066 			goto free_pble;
2067 		}
2068 
2069 		iwmr->stag = stag;
2070 		iwmr->ibmr.rkey = stag;
2071 		iwmr->ibmr.lkey = stag;
2072 	}
2073 	iwmr->access = access;
2074 	err = irdma_hwreg_mr(iwdev, iwmr, access);
2075 	if (err)
2076 		goto err_hwreg;
2077 
2078 	return 0;
2079 
2080 err_hwreg:
2081 	if (stag)
2082 		irdma_free_stag(iwdev, stag);
2083 
2084 free_pble:
2085 	if (iwpbl->pble_alloc.level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
2086 		irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
2087 
2088 	return err;
2089 }
2090 
2091 /*
2092  * irdma_reg_user_mr_type_qp - Handle QP memory registration @req - memory reg req @udata - user info @iwmr - irdma mr
2093  */
2094 static int
2095 irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
2096 			  struct ib_udata *udata,
2097 			  struct irdma_mr *iwmr)
2098 {
2099 	struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
2100 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
2101 	struct irdma_ucontext *ucontext;
2102 	unsigned long flags;
2103 	u32 total;
2104 	int err;
2105 	u8 lvl;
2106 
2107 	total = req.sq_pages + req.rq_pages + IRDMA_SHADOW_PGCNT;
2108 	if (total > iwmr->page_cnt)
2109 		return -EINVAL;
2110 
2111 	total = req.sq_pages + req.rq_pages;
2112 	lvl = total > 2 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
2113 	err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
2114 	if (err)
2115 		return err;
2116 
2117 	ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
2118 	spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
2119 	list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
2120 	iwpbl->on_list = true;
2121 	spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
2122 
2123 	return 0;
2124 }
2125 
2126 /*
2127  * irdma_reg_user_mr_type_cq - Handle CQ memory registration @req - memory reg req @udata - user info @iwmr - irdma mr
2128  */
2129 static int
2130 irdma_reg_user_mr_type_cq(struct irdma_mem_reg_req req,
2131 			  struct ib_udata *udata,
2132 			  struct irdma_mr *iwmr)
2133 {
2134 	struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
2135 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
2136 	struct irdma_ucontext *ucontext;
2137 	unsigned long flags;
2138 	u32 total;
2139 	int err;
2140 	u8 lvl;
2141 
2142 	total = req.cq_pages +
2143 	    ((iwdev->rf->sc_dev.hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_CQ_RESIZE) ? 0 : IRDMA_SHADOW_PGCNT);
2144 	if (total > iwmr->page_cnt)
2145 		return -EINVAL;
2146 
2147 	lvl = req.cq_pages > 1 ? PBLE_LEVEL_1 : PBLE_LEVEL_0;
2148 	err = irdma_handle_q_mem(iwdev, &req, iwpbl, lvl);
2149 	if (err)
2150 		return err;
2151 
2152 	ucontext = rdma_udata_to_drv_context(udata, struct irdma_ucontext, ibucontext);
2153 	spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
2154 	list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
2155 	iwpbl->on_list = true;
2156 	spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
2157 
2158 	return 0;
2159 }
2160 
2161 /**
2162  * irdma_reg_user_mr - Register a user memory region
2163  * @pd: ptr of pd
2164  * @start: virtual start address
2165  * @len: length of mr
2166  * @virt: virtual address
2167  * @access: access of mr
2168  * @udata: user data
2169  */
2170 static struct ib_mr *
2171 irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
2172 		  u64 virt, int access,
2173 		  struct ib_udata *udata)
2174 {
2175 #define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages)
2176 	struct irdma_device *iwdev = to_iwdev(pd->device);
2177 	struct irdma_mem_reg_req req = {};
2178 	struct ib_umem *region;
2179 	struct irdma_mr *iwmr;
2180 	int err;
2181 
2182 	if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
2183 		return ERR_PTR(-EINVAL);
2184 
2185 	if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN)
2186 		return ERR_PTR(-EINVAL);
2187 
2188 	region = ib_umem_get(pd->uobject->context, start, len, access, 0);
2189 
2190 	if (IS_ERR(region)) {
2191 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
2192 			    "Failed to create ib_umem region\n");
2193 		return (struct ib_mr *)region;
2194 	}
2195 
2196 	if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) {
2197 		ib_umem_release(region);
2198 		return ERR_PTR(-EFAULT);
2199 	}
2200 
2201 	iwmr = irdma_alloc_iwmr(region, pd, virt, req.reg_type);
2202 	if (IS_ERR(iwmr)) {
2203 		ib_umem_release(region);
2204 		return (struct ib_mr *)iwmr;
2205 	}
2206 
2207 	switch (req.reg_type) {
2208 	case IRDMA_MEMREG_TYPE_QP:
2209 		err = irdma_reg_user_mr_type_qp(req, udata, iwmr);
2210 		if (err)
2211 			goto error;
2212 
2213 		break;
2214 	case IRDMA_MEMREG_TYPE_CQ:
2215 		err = irdma_reg_user_mr_type_cq(req, udata, iwmr);
2216 		if (err)
2217 			goto error;
2218 
2219 		break;
2220 	case IRDMA_MEMREG_TYPE_MEM:
2221 		err = irdma_reg_user_mr_type_mem(iwmr, access, true);
2222 		if (err)
2223 			goto error;
2224 
2225 		break;
2226 	default:
2227 		err = -EINVAL;
2228 		goto error;
2229 	}
2230 
2231 	return &iwmr->ibmr;
2232 
2233 error:
2234 	ib_umem_release(region);
2235 	irdma_free_iwmr(iwmr);
2236 
2237 	return ERR_PTR(err);
2238 }
2239 
2240 int
2241 irdma_hwdereg_mr(struct ib_mr *ib_mr)
2242 {
2243 	struct irdma_device *iwdev = to_iwdev(ib_mr->device);
2244 	struct irdma_mr *iwmr = to_iwmr(ib_mr);
2245 	struct irdma_pd *iwpd = to_iwpd(ib_mr->pd);
2246 	struct irdma_dealloc_stag_info *info;
2247 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
2248 	struct irdma_cqp_request *cqp_request;
2249 	struct cqp_cmds_info *cqp_info;
2250 	int status;
2251 
2252 	/*
2253 	 * Skip HW MR de-register when it is already de-registered during an MR re-reregister and the re-registration
2254 	 * fails
2255 	 */
2256 	if (!iwmr->is_hwreg)
2257 		return 0;
2258 
2259 	cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
2260 	if (!cqp_request)
2261 		return -ENOMEM;
2262 
2263 	cqp_info = &cqp_request->info;
2264 	info = &cqp_info->in.u.dealloc_stag.info;
2265 	memset(info, 0, sizeof(*info));
2266 	info->pd_id = iwpd->sc_pd.pd_id;
2267 	info->stag_idx = RS_64_1(ib_mr->rkey, IRDMA_CQPSQ_STAG_IDX_S);
2268 	info->mr = true;
2269 	if (iwpbl->pbl_allocated)
2270 		info->dealloc_pbl = true;
2271 
2272 	cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
2273 	cqp_info->post_sq = 1;
2274 	cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
2275 	cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
2276 	status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
2277 	irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
2278 
2279 	if (!status)
2280 		iwmr->is_hwreg = 0;
2281 
2282 	return status;
2283 }
2284 
2285 /*
2286  * irdma_rereg_mr_trans - Re-register a user MR for a change translation. @iwmr: ptr of iwmr @start: virtual start
2287  * address @len: length of mr @virt: virtual address
2288  *
2289  * Re-register a user memory region when a change translation is requested. Re-register a new region while reusing the
2290  * stag from the original registration.
2291  */
2292 struct ib_mr *
2293 irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len,
2294 		     u64 virt, struct ib_udata *udata)
2295 {
2296 	struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
2297 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
2298 	struct ib_pd *pd = iwmr->ibmr.pd;
2299 	struct ib_umem *region;
2300 	int err;
2301 
2302 	region = ib_umem_get(pd->uobject->context, start, len, iwmr->access, 0);
2303 
2304 	if (IS_ERR(region)) {
2305 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
2306 			    "Failed to create ib_umem region\n");
2307 		return (struct ib_mr *)region;
2308 	}
2309 
2310 	iwmr->region = region;
2311 	iwmr->ibmr.iova = virt;
2312 	iwmr->ibmr.pd = pd;
2313 	iwmr->page_size = PAGE_SIZE;
2314 
2315 	iwmr->len = region->length;
2316 	iwpbl->user_base = virt;
2317 	iwmr->page_cnt = irdma_ib_umem_num_dma_blocks(region, iwmr->page_size,
2318 						      virt);
2319 
2320 	err = irdma_reg_user_mr_type_mem(iwmr, iwmr->access, false);
2321 	if (err)
2322 		goto err;
2323 
2324 	return &iwmr->ibmr;
2325 
2326 err:
2327 	ib_umem_release(region);
2328 	return ERR_PTR(err);
2329 }
2330 
2331 /**
2332  * irdma_reg_phys_mr - register kernel physical memory
2333  * @pd: ibpd pointer
2334  * @addr: physical address of memory to register
2335  * @size: size of memory to register
2336  * @access: Access rights
2337  * @iova_start: start of virtual address for physical buffers
2338  */
2339 struct ib_mr *
2340 irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access,
2341 		  u64 *iova_start)
2342 {
2343 	struct irdma_device *iwdev = to_iwdev(pd->device);
2344 	struct irdma_pbl *iwpbl;
2345 	struct irdma_mr *iwmr;
2346 	u32 stag;
2347 	int ret;
2348 
2349 	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
2350 	if (!iwmr)
2351 		return ERR_PTR(-ENOMEM);
2352 
2353 	iwmr->ibmr.pd = pd;
2354 	iwmr->ibmr.device = pd->device;
2355 	iwpbl = &iwmr->iwpbl;
2356 	iwpbl->iwmr = iwmr;
2357 	iwmr->type = IRDMA_MEMREG_TYPE_MEM;
2358 	iwpbl->user_base = *iova_start;
2359 	stag = irdma_create_stag(iwdev);
2360 	if (!stag) {
2361 		ret = -ENOMEM;
2362 		goto err;
2363 	}
2364 
2365 	iwmr->stag = stag;
2366 	iwmr->ibmr.iova = *iova_start;
2367 	iwmr->ibmr.rkey = stag;
2368 	iwmr->ibmr.lkey = stag;
2369 	iwmr->page_cnt = 1;
2370 	iwmr->pgaddrmem[0] = addr;
2371 	iwmr->len = size;
2372 	iwmr->page_size = SZ_4K;
2373 	ret = irdma_hwreg_mr(iwdev, iwmr, access);
2374 	if (ret) {
2375 		irdma_free_stag(iwdev, stag);
2376 		goto err;
2377 	}
2378 
2379 	return &iwmr->ibmr;
2380 
2381 err:
2382 	kfree(iwmr);
2383 
2384 	return ERR_PTR(ret);
2385 }
2386 
2387 /**
2388  * irdma_get_dma_mr - register physical mem
2389  * @pd: ptr of pd
2390  * @acc: access for memory
2391  */
2392 static struct ib_mr *
2393 irdma_get_dma_mr(struct ib_pd *pd, int acc)
2394 {
2395 	u64 kva = 0;
2396 
2397 	return irdma_reg_phys_mr(pd, 0, 0, acc, &kva);
2398 }
2399 
2400 /**
2401  * irdma_del_memlist - Deleting pbl list entries for CQ/QP
2402  * @iwmr: iwmr for IB's user page addresses
2403  * @ucontext: ptr to user context
2404  */
2405 void
2406 irdma_del_memlist(struct irdma_mr *iwmr,
2407 		  struct irdma_ucontext *ucontext)
2408 {
2409 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
2410 	unsigned long flags;
2411 
2412 	switch (iwmr->type) {
2413 	case IRDMA_MEMREG_TYPE_CQ:
2414 		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
2415 		if (iwpbl->on_list) {
2416 			iwpbl->on_list = false;
2417 			list_del(&iwpbl->list);
2418 		}
2419 		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
2420 		break;
2421 	case IRDMA_MEMREG_TYPE_QP:
2422 		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
2423 		if (iwpbl->on_list) {
2424 			iwpbl->on_list = false;
2425 			list_del(&iwpbl->list);
2426 		}
2427 		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
2428 		break;
2429 	default:
2430 		break;
2431 	}
2432 }
2433 
2434 /**
2435  * irdma_post_send -  kernel application wr
2436  * @ibqp: qp ptr for wr
2437  * @ib_wr: work request ptr
2438  * @bad_wr: return of bad wr if err
2439  */
2440 static int
2441 irdma_post_send(struct ib_qp *ibqp,
2442 		const struct ib_send_wr *ib_wr,
2443 		const struct ib_send_wr **bad_wr)
2444 {
2445 	struct irdma_qp *iwqp;
2446 	struct irdma_qp_uk *ukqp;
2447 	struct irdma_sc_dev *dev;
2448 	struct irdma_post_sq_info info;
2449 	int err = 0;
2450 	unsigned long flags;
2451 	bool inv_stag;
2452 	struct irdma_ah *ah;
2453 
2454 	iwqp = to_iwqp(ibqp);
2455 	ukqp = &iwqp->sc_qp.qp_uk;
2456 	dev = &iwqp->iwdev->rf->sc_dev;
2457 
2458 	spin_lock_irqsave(&iwqp->lock, flags);
2459 	while (ib_wr) {
2460 		memset(&info, 0, sizeof(info));
2461 		inv_stag = false;
2462 		info.wr_id = (ib_wr->wr_id);
2463 		if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
2464 			info.signaled = true;
2465 		if (ib_wr->send_flags & IB_SEND_FENCE)
2466 			info.read_fence = true;
2467 		switch (ib_wr->opcode) {
2468 		case IB_WR_SEND_WITH_IMM:
2469 			if (ukqp->qp_caps & IRDMA_SEND_WITH_IMM) {
2470 				info.imm_data_valid = true;
2471 				info.imm_data = ntohl(ib_wr->ex.imm_data);
2472 			} else {
2473 				err = -EINVAL;
2474 				break;
2475 			}
2476 			/* fallthrough */
2477 		case IB_WR_SEND:
2478 		case IB_WR_SEND_WITH_INV:
2479 			if (ib_wr->opcode == IB_WR_SEND ||
2480 			    ib_wr->opcode == IB_WR_SEND_WITH_IMM) {
2481 				if (ib_wr->send_flags & IB_SEND_SOLICITED)
2482 					info.op_type = IRDMA_OP_TYPE_SEND_SOL;
2483 				else
2484 					info.op_type = IRDMA_OP_TYPE_SEND;
2485 			} else {
2486 				if (ib_wr->send_flags & IB_SEND_SOLICITED)
2487 					info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV;
2488 				else
2489 					info.op_type = IRDMA_OP_TYPE_SEND_INV;
2490 				info.stag_to_inv = ib_wr->ex.invalidate_rkey;
2491 			}
2492 
2493 			info.op.send.num_sges = ib_wr->num_sge;
2494 			info.op.send.sg_list = ib_wr->sg_list;
2495 			if (iwqp->ibqp.qp_type == IB_QPT_UD ||
2496 			    iwqp->ibqp.qp_type == IB_QPT_GSI) {
2497 				ah = to_iwah(ud_wr(ib_wr)->ah);
2498 				info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
2499 				info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
2500 				info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
2501 			}
2502 
2503 			if (ib_wr->send_flags & IB_SEND_INLINE)
2504 				err = irdma_uk_inline_send(ukqp, &info, false);
2505 			else
2506 				err = irdma_uk_send(ukqp, &info, false);
2507 			break;
2508 		case IB_WR_RDMA_WRITE_WITH_IMM:
2509 			if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) {
2510 				info.imm_data_valid = true;
2511 				info.imm_data = ntohl(ib_wr->ex.imm_data);
2512 			} else {
2513 				err = -EINVAL;
2514 				break;
2515 			}
2516 			/* fallthrough */
2517 		case IB_WR_RDMA_WRITE:
2518 			if (ib_wr->send_flags & IB_SEND_SOLICITED)
2519 				info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL;
2520 			else
2521 				info.op_type = IRDMA_OP_TYPE_RDMA_WRITE;
2522 
2523 			info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
2524 			info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
2525 			info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr;
2526 			info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
2527 			if (ib_wr->send_flags & IB_SEND_INLINE)
2528 				err = irdma_uk_inline_rdma_write(ukqp, &info, false);
2529 			else
2530 				err = irdma_uk_rdma_write(ukqp, &info, false);
2531 			break;
2532 		case IB_WR_RDMA_READ_WITH_INV:
2533 			inv_stag = true;
2534 			/* fallthrough */
2535 		case IB_WR_RDMA_READ:
2536 			if (ib_wr->num_sge >
2537 			    dev->hw_attrs.uk_attrs.max_hw_read_sges) {
2538 				err = -EINVAL;
2539 				break;
2540 			}
2541 			info.op_type = IRDMA_OP_TYPE_RDMA_READ;
2542 			info.op.rdma_read.rem_addr.addr = rdma_wr(ib_wr)->remote_addr;
2543 			info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
2544 			info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list;
2545 			info.op.rdma_read.num_lo_sges = ib_wr->num_sge;
2546 			err = irdma_uk_rdma_read(ukqp, &info, inv_stag, false);
2547 			break;
2548 		case IB_WR_LOCAL_INV:
2549 			info.op_type = IRDMA_OP_TYPE_INV_STAG;
2550 			info.local_fence = info.read_fence;
2551 			info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
2552 			err = irdma_uk_stag_local_invalidate(ukqp, &info, true);
2553 			break;
2554 		case IB_WR_REG_MR:{
2555 				struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
2556 				struct irdma_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
2557 				struct irdma_fast_reg_stag_info stag_info = {0};
2558 
2559 				stag_info.signaled = info.signaled;
2560 				stag_info.read_fence = info.read_fence;
2561 				stag_info.access_rights =
2562 				    irdma_get_mr_access(reg_wr(ib_wr)->access,
2563 							dev->hw_attrs.uk_attrs.hw_rev);
2564 				stag_info.stag_key = reg_wr(ib_wr)->key & 0xff;
2565 				stag_info.stag_idx = reg_wr(ib_wr)->key >> 8;
2566 				stag_info.page_size = reg_wr(ib_wr)->mr->page_size;
2567 				stag_info.wr_id = ib_wr->wr_id;
2568 				stag_info.addr_type = IRDMA_ADDR_TYPE_VA_BASED;
2569 				stag_info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
2570 				stag_info.total_len = iwmr->ibmr.length;
2571 				if (palloc->level == PBLE_LEVEL_2) {
2572 					stag_info.chunk_size = 3;
2573 					stag_info.first_pm_pbl_index = palloc->level2.root.idx;
2574 				} else {
2575 					stag_info.chunk_size = 1;
2576 					stag_info.first_pm_pbl_index = palloc->level1.idx;
2577 				}
2578 				stag_info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
2579 				err = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info,
2580 								true);
2581 				break;
2582 			}
2583 		default:
2584 			err = -EINVAL;
2585 			irdma_debug(&iwqp->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
2586 				    "upost_send bad opcode = 0x%x\n",
2587 				    ib_wr->opcode);
2588 			break;
2589 		}
2590 
2591 		if (err)
2592 			break;
2593 		ib_wr = ib_wr->next;
2594 	}
2595 
2596 	if (!iwqp->flush_issued) {
2597 		if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
2598 			irdma_uk_qp_post_wr(ukqp);
2599 		spin_unlock_irqrestore(&iwqp->lock, flags);
2600 	} else {
2601 		spin_unlock_irqrestore(&iwqp->lock, flags);
2602 		irdma_sched_qp_flush_work(iwqp);
2603 	}
2604 
2605 	if (err)
2606 		*bad_wr = ib_wr;
2607 
2608 	return err;
2609 }
2610 
2611 /**
2612  * irdma_post_recv - post receive wr for kernel application
2613  * @ibqp: ib qp pointer
2614  * @ib_wr: work request for receive
2615  * @bad_wr: bad wr caused an error
2616  */
2617 static int
2618 irdma_post_recv(struct ib_qp *ibqp,
2619 		const struct ib_recv_wr *ib_wr,
2620 		const struct ib_recv_wr **bad_wr)
2621 {
2622 	struct irdma_qp *iwqp = to_iwqp(ibqp);
2623 	struct irdma_qp_uk *ukqp = &iwqp->sc_qp.qp_uk;
2624 	struct irdma_post_rq_info post_recv = {0};
2625 	unsigned long flags;
2626 	int err = 0;
2627 
2628 	spin_lock_irqsave(&iwqp->lock, flags);
2629 
2630 	while (ib_wr) {
2631 		if (ib_wr->num_sge > ukqp->max_rq_frag_cnt) {
2632 			err = -EINVAL;
2633 			goto out;
2634 		}
2635 		post_recv.num_sges = ib_wr->num_sge;
2636 		post_recv.wr_id = ib_wr->wr_id;
2637 		post_recv.sg_list = ib_wr->sg_list;
2638 		err = irdma_uk_post_receive(ukqp, &post_recv);
2639 		if (err) {
2640 			irdma_debug(&iwqp->iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
2641 				    "post_recv err %d\n", err);
2642 			goto out;
2643 		}
2644 
2645 		ib_wr = ib_wr->next;
2646 	}
2647 
2648 out:
2649 	spin_unlock_irqrestore(&iwqp->lock, flags);
2650 	if (iwqp->flush_issued)
2651 		irdma_sched_qp_flush_work(iwqp);
2652 
2653 	if (err)
2654 		*bad_wr = ib_wr;
2655 
2656 	return err;
2657 }
2658 
2659 /**
2660  * irdma_flush_err_to_ib_wc_status - return change flush error code to IB status
2661  * @opcode: iwarp flush code
2662  */
2663 static enum ib_wc_status
2664 irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode)
2665 {
2666 	switch (opcode) {
2667 	case FLUSH_PROT_ERR:
2668 		return IB_WC_LOC_PROT_ERR;
2669 	case FLUSH_REM_ACCESS_ERR:
2670 		return IB_WC_REM_ACCESS_ERR;
2671 	case FLUSH_LOC_QP_OP_ERR:
2672 		return IB_WC_LOC_QP_OP_ERR;
2673 	case FLUSH_REM_OP_ERR:
2674 		return IB_WC_REM_OP_ERR;
2675 	case FLUSH_LOC_LEN_ERR:
2676 		return IB_WC_LOC_LEN_ERR;
2677 	case FLUSH_GENERAL_ERR:
2678 		return IB_WC_WR_FLUSH_ERR;
2679 	case FLUSH_MW_BIND_ERR:
2680 		return IB_WC_MW_BIND_ERR;
2681 	case FLUSH_REM_INV_REQ_ERR:
2682 		return IB_WC_REM_INV_REQ_ERR;
2683 	case FLUSH_RETRY_EXC_ERR:
2684 		return IB_WC_RETRY_EXC_ERR;
2685 	case FLUSH_FATAL_ERR:
2686 	default:
2687 		return IB_WC_FATAL_ERR;
2688 	}
2689 }
2690 
2691 /**
2692  * irdma_process_cqe - process cqe info
2693  * @entry: processed cqe
2694  * @cq_poll_info: cqe info
2695  */
2696 static void
2697 irdma_process_cqe(struct ib_wc *entry,
2698 		  struct irdma_cq_poll_info *cq_poll_info)
2699 {
2700 	struct irdma_sc_qp *qp;
2701 
2702 	entry->wc_flags = 0;
2703 	entry->pkey_index = 0;
2704 	entry->wr_id = cq_poll_info->wr_id;
2705 
2706 	qp = cq_poll_info->qp_handle;
2707 	entry->qp = qp->qp_uk.back_qp;
2708 
2709 	if (cq_poll_info->error) {
2710 		entry->status = (cq_poll_info->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ?
2711 		    irdma_flush_err_to_ib_wc_status(cq_poll_info->minor_err) : IB_WC_GENERAL_ERR;
2712 
2713 		entry->vendor_err = cq_poll_info->major_err << 16 |
2714 		    cq_poll_info->minor_err;
2715 	} else {
2716 		entry->status = IB_WC_SUCCESS;
2717 		if (cq_poll_info->imm_valid) {
2718 			entry->ex.imm_data = htonl(cq_poll_info->imm_data);
2719 			entry->wc_flags |= IB_WC_WITH_IMM;
2720 		}
2721 		if (cq_poll_info->ud_smac_valid) {
2722 			ether_addr_copy(entry->smac, cq_poll_info->ud_smac);
2723 			entry->wc_flags |= IB_WC_WITH_SMAC;
2724 		}
2725 
2726 		if (cq_poll_info->ud_vlan_valid) {
2727 			u16 vlan = cq_poll_info->ud_vlan & EVL_VLID_MASK;
2728 
2729 			entry->sl = cq_poll_info->ud_vlan >> VLAN_PRIO_SHIFT;
2730 			if (vlan) {
2731 				entry->vlan_id = vlan;
2732 				entry->wc_flags |= IB_WC_WITH_VLAN;
2733 			}
2734 		} else {
2735 			entry->sl = 0;
2736 		}
2737 	}
2738 
2739 	if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) {
2740 		set_ib_wc_op_sq(cq_poll_info, entry);
2741 	} else {
2742 		set_ib_wc_op_rq(cq_poll_info, entry,
2743 				qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ?
2744 				true : false);
2745 		if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD &&
2746 		    cq_poll_info->stag_invalid_set) {
2747 			entry->ex.invalidate_rkey = cq_poll_info->inv_stag;
2748 			entry->wc_flags |= IB_WC_WITH_INVALIDATE;
2749 		}
2750 	}
2751 
2752 	if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) {
2753 		entry->src_qp = cq_poll_info->ud_src_qpn;
2754 		entry->slid = 0;
2755 		entry->wc_flags |=
2756 		    (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
2757 		entry->network_hdr_type = cq_poll_info->ipv4 ?
2758 		    RDMA_NETWORK_IPV4 :
2759 		    RDMA_NETWORK_IPV6;
2760 	} else {
2761 		entry->src_qp = cq_poll_info->qp_id;
2762 	}
2763 
2764 	entry->byte_len = cq_poll_info->bytes_xfered;
2765 }
2766 
2767 /**
2768  * irdma_poll_one - poll one entry of the CQ
2769  * @ukcq: ukcq to poll
2770  * @cur_cqe: current CQE info to be filled in
2771  * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ
2772  *
2773  * Returns the internal irdma device error code or 0 on success
2774  */
2775 static inline int
2776 irdma_poll_one(struct irdma_cq_uk *ukcq,
2777 	       struct irdma_cq_poll_info *cur_cqe,
2778 	       struct ib_wc *entry)
2779 {
2780 	int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe);
2781 
2782 	if (ret)
2783 		return ret;
2784 
2785 	irdma_process_cqe(entry, cur_cqe);
2786 
2787 	return 0;
2788 }
2789 
2790 /**
2791  * __irdma_poll_cq - poll cq for completion (kernel apps)
2792  * @iwcq: cq to poll
2793  * @num_entries: number of entries to poll
2794  * @entry: wr of a completed entry
2795  */
2796 static int
2797 __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc *entry)
2798 {
2799 	struct list_head *tmp_node, *list_node;
2800 	struct irdma_cq_buf *last_buf = NULL;
2801 	struct irdma_cq_poll_info *cur_cqe = &iwcq->cur_cqe;
2802 	struct irdma_cq_buf *cq_buf;
2803 	int ret;
2804 	struct irdma_device *iwdev;
2805 	struct irdma_cq_uk *ukcq;
2806 	bool cq_new_cqe = false;
2807 	int resized_bufs = 0;
2808 	int npolled = 0;
2809 
2810 	iwdev = to_iwdev(iwcq->ibcq.device);
2811 	ukcq = &iwcq->sc_cq.cq_uk;
2812 
2813 	/* go through the list of previously resized CQ buffers */
2814 	list_for_each_safe(list_node, tmp_node, &iwcq->resize_list) {
2815 		cq_buf = container_of(list_node, struct irdma_cq_buf, list);
2816 		while (npolled < num_entries) {
2817 			ret = irdma_poll_one(&cq_buf->cq_uk, cur_cqe, entry + npolled);
2818 			if (!ret) {
2819 				++npolled;
2820 				cq_new_cqe = true;
2821 				continue;
2822 			}
2823 			if (ret == -ENOENT)
2824 				break;
2825 			/* QP using the CQ is destroyed. Skip reporting this CQE */
2826 			if (ret == -EFAULT) {
2827 				cq_new_cqe = true;
2828 				continue;
2829 			}
2830 			goto error;
2831 		}
2832 
2833 		/* save the resized CQ buffer which received the last cqe */
2834 		if (cq_new_cqe)
2835 			last_buf = cq_buf;
2836 		cq_new_cqe = false;
2837 	}
2838 
2839 	/* check the current CQ for new cqes */
2840 	while (npolled < num_entries) {
2841 		ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
2842 		if (ret == -ENOENT) {
2843 			ret = irdma_generated_cmpls(iwcq, cur_cqe);
2844 			if (!ret)
2845 				irdma_process_cqe(entry + npolled, cur_cqe);
2846 		}
2847 		if (!ret) {
2848 			++npolled;
2849 			cq_new_cqe = true;
2850 			continue;
2851 		}
2852 
2853 		if (ret == -ENOENT)
2854 			break;
2855 		/* QP using the CQ is destroyed. Skip reporting this CQE */
2856 		if (ret == -EFAULT) {
2857 			cq_new_cqe = true;
2858 			continue;
2859 		}
2860 		goto error;
2861 	}
2862 
2863 	if (cq_new_cqe)
2864 		/* all previous CQ resizes are complete */
2865 		resized_bufs = irdma_process_resize_list(iwcq, iwdev, NULL);
2866 	else if (last_buf)
2867 		/* only CQ resizes up to the last_buf are complete */
2868 		resized_bufs = irdma_process_resize_list(iwcq, iwdev, last_buf);
2869 	if (resized_bufs)
2870 		/* report to the HW the number of complete CQ resizes */
2871 		irdma_uk_cq_set_resized_cnt(ukcq, resized_bufs);
2872 
2873 	return npolled;
2874 error:
2875 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
2876 		    "%s: Error polling CQ, irdma_err: %d\n", __func__, ret);
2877 
2878 	return ret;
2879 }
2880 
2881 /**
2882  * irdma_poll_cq - poll cq for completion (kernel apps)
2883  * @ibcq: cq to poll
2884  * @num_entries: number of entries to poll
2885  * @entry: wr of a completed entry
2886  */
2887 static int
2888 irdma_poll_cq(struct ib_cq *ibcq, int num_entries,
2889 	      struct ib_wc *entry)
2890 {
2891 	struct irdma_cq *iwcq;
2892 	unsigned long flags;
2893 	int ret;
2894 
2895 	iwcq = to_iwcq(ibcq);
2896 
2897 	spin_lock_irqsave(&iwcq->lock, flags);
2898 	ret = __irdma_poll_cq(iwcq, num_entries, entry);
2899 	spin_unlock_irqrestore(&iwcq->lock, flags);
2900 
2901 	return ret;
2902 }
2903 
2904 /**
2905  * irdma_req_notify_cq - arm cq kernel application
2906  * @ibcq: cq to arm
2907  * @notify_flags: notofication flags
2908  */
2909 static int
2910 irdma_req_notify_cq(struct ib_cq *ibcq,
2911 		    enum ib_cq_notify_flags notify_flags)
2912 {
2913 	struct irdma_cq *iwcq;
2914 	struct irdma_cq_uk *ukcq;
2915 	unsigned long flags;
2916 	enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT;
2917 	bool promo_event = false;
2918 	int ret = 0;
2919 
2920 	iwcq = to_iwcq(ibcq);
2921 	ukcq = &iwcq->sc_cq.cq_uk;
2922 
2923 	spin_lock_irqsave(&iwcq->lock, flags);
2924 	if (notify_flags == IB_CQ_SOLICITED) {
2925 		cq_notify = IRDMA_CQ_COMPL_SOLICITED;
2926 	} else {
2927 		if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED)
2928 			promo_event = true;
2929 	}
2930 
2931 	if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
2932 		iwcq->last_notify = cq_notify;
2933 		irdma_uk_cq_request_notification(ukcq, cq_notify);
2934 	}
2935 
2936 	if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
2937 	    (!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated)))
2938 		ret = 1;
2939 	spin_unlock_irqrestore(&iwcq->lock, flags);
2940 
2941 	return ret;
2942 }
2943 
2944 /**
2945  * mcast_list_add -  Add a new mcast item to list
2946  * @rf: RDMA PCI function
2947  * @new_elem: pointer to element to add
2948  */
2949 static void
2950 mcast_list_add(struct irdma_pci_f *rf,
2951 	       struct mc_table_list *new_elem)
2952 {
2953 	list_add(&new_elem->list, &rf->mc_qht_list.list);
2954 }
2955 
2956 /**
2957  * mcast_list_del - Remove an mcast item from list
2958  * @mc_qht_elem: pointer to mcast table list element
2959  */
2960 static void
2961 mcast_list_del(struct mc_table_list *mc_qht_elem)
2962 {
2963 	if (mc_qht_elem)
2964 		list_del(&mc_qht_elem->list);
2965 }
2966 
2967 /**
2968  * mcast_list_lookup_ip - Search mcast list for address
2969  * @rf: RDMA PCI function
2970  * @ip_mcast: pointer to mcast IP address
2971  */
2972 static struct mc_table_list *
2973 mcast_list_lookup_ip(struct irdma_pci_f *rf,
2974 		     u32 *ip_mcast)
2975 {
2976 	struct mc_table_list *mc_qht_el;
2977 	struct list_head *pos, *q;
2978 
2979 	list_for_each_safe(pos, q, &rf->mc_qht_list.list) {
2980 		mc_qht_el = list_entry(pos, struct mc_table_list, list);
2981 		if (!memcmp(mc_qht_el->mc_info.dest_ip, ip_mcast,
2982 			    sizeof(mc_qht_el->mc_info.dest_ip)))
2983 			return mc_qht_el;
2984 	}
2985 
2986 	return NULL;
2987 }
2988 
2989 /**
2990  * irdma_mcast_cqp_op - perform a mcast cqp operation
2991  * @iwdev: irdma device
2992  * @mc_grp_ctx: mcast group info
2993  * @op: operation
2994  *
2995  * returns error status
2996  */
2997 static int
2998 irdma_mcast_cqp_op(struct irdma_device *iwdev,
2999 		   struct irdma_mcast_grp_info *mc_grp_ctx, u8 op)
3000 {
3001 	struct cqp_cmds_info *cqp_info;
3002 	struct irdma_cqp_request *cqp_request;
3003 	int status;
3004 
3005 	cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
3006 	if (!cqp_request)
3007 		return -ENOMEM;
3008 
3009 	cqp_request->info.in.u.mc_create.info = *mc_grp_ctx;
3010 	cqp_info = &cqp_request->info;
3011 	cqp_info->cqp_cmd = op;
3012 	cqp_info->post_sq = 1;
3013 	cqp_info->in.u.mc_create.scratch = (uintptr_t)cqp_request;
3014 	cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp;
3015 	status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
3016 	irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
3017 
3018 	return status;
3019 }
3020 
3021 /**
3022  * irdma_attach_mcast - attach a qp to a multicast group
3023  * @ibqp: ptr to qp
3024  * @ibgid: pointer to global ID
3025  * @lid: local ID
3026  *
3027  * returns error status
3028  */
3029 static int
3030 irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
3031 {
3032 	struct irdma_qp *iwqp = to_iwqp(ibqp);
3033 	struct irdma_device *iwdev = iwqp->iwdev;
3034 	struct irdma_pci_f *rf = iwdev->rf;
3035 	struct mc_table_list *mc_qht_elem;
3036 	struct irdma_mcast_grp_ctx_entry_info mcg_info = {0};
3037 	unsigned long flags;
3038 	u32 ip_addr[4] = {0};
3039 	u32 mgn;
3040 	u32 no_mgs;
3041 	int ret = 0;
3042 	bool ipv4;
3043 	u16 vlan_id;
3044 	union irdma_sockaddr sgid_addr;
3045 	unsigned char dmac[ETHER_ADDR_LEN];
3046 
3047 	rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid);
3048 
3049 	if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) {
3050 		irdma_copy_ip_ntohl(ip_addr,
3051 				    sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32);
3052 		irdma_get_vlan_mac_ipv6(iwqp->cm_id, ip_addr, &vlan_id, NULL);
3053 		ipv4 = false;
3054 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
3055 			    "qp_id=%d, IP6address=%x:%x:%x:%x\n", ibqp->qp_num,
3056 			    IRDMA_PRINT_IP6(ip_addr));
3057 		irdma_mcast_mac_v6(ip_addr, dmac);
3058 	} else {
3059 		ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
3060 		ipv4 = true;
3061 		vlan_id = irdma_get_vlan_ipv4(iwqp->cm_id, ip_addr);
3062 		irdma_mcast_mac_v4(ip_addr, dmac);
3063 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
3064 			    "qp_id=%d, IP4address=%x, MAC=%x:%x:%x:%x:%x:%x\n",
3065 			    ibqp->qp_num, ip_addr[0], dmac[0], dmac[1], dmac[2],
3066 			    dmac[3], dmac[4], dmac[5]);
3067 	}
3068 
3069 	spin_lock_irqsave(&rf->qh_list_lock, flags);
3070 	mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr);
3071 	if (!mc_qht_elem) {
3072 		struct irdma_dma_mem *dma_mem_mc;
3073 
3074 		spin_unlock_irqrestore(&rf->qh_list_lock, flags);
3075 		mc_qht_elem = kzalloc(sizeof(*mc_qht_elem), GFP_KERNEL);
3076 		if (!mc_qht_elem)
3077 			return -ENOMEM;
3078 
3079 		mc_qht_elem->mc_info.ipv4_valid = ipv4;
3080 		memcpy(mc_qht_elem->mc_info.dest_ip, ip_addr,
3081 		       sizeof(mc_qht_elem->mc_info.dest_ip));
3082 		ret = irdma_alloc_rsrc(rf, rf->allocated_mcgs, rf->max_mcg,
3083 				       &mgn, &rf->next_mcg);
3084 		if (ret) {
3085 			kfree(mc_qht_elem);
3086 			return -ENOMEM;
3087 		}
3088 
3089 		mc_qht_elem->mc_info.mgn = mgn;
3090 		dma_mem_mc = &mc_qht_elem->mc_grp_ctx.dma_mem_mc;
3091 		dma_mem_mc->size = sizeof(u64)* IRDMA_MAX_MGS_PER_CTX;
3092 		dma_mem_mc->va = irdma_allocate_dma_mem(&rf->hw, dma_mem_mc,
3093 							dma_mem_mc->size,
3094 							IRDMA_HW_PAGE_SIZE);
3095 		if (!dma_mem_mc->va) {
3096 			irdma_free_rsrc(rf, rf->allocated_mcgs, mgn);
3097 			kfree(mc_qht_elem);
3098 			return -ENOMEM;
3099 		}
3100 
3101 		mc_qht_elem->mc_grp_ctx.mg_id = (u16)mgn;
3102 		memcpy(mc_qht_elem->mc_grp_ctx.dest_ip_addr, ip_addr,
3103 		       sizeof(mc_qht_elem->mc_grp_ctx.dest_ip_addr));
3104 		mc_qht_elem->mc_grp_ctx.ipv4_valid = ipv4;
3105 		mc_qht_elem->mc_grp_ctx.vlan_id = vlan_id;
3106 		if (vlan_id < VLAN_N_VID)
3107 			mc_qht_elem->mc_grp_ctx.vlan_valid = true;
3108 		mc_qht_elem->mc_grp_ctx.hmc_fcn_id = iwdev->rf->sc_dev.hmc_fn_id;
3109 		mc_qht_elem->mc_grp_ctx.qs_handle =
3110 		    iwqp->sc_qp.vsi->qos[iwqp->sc_qp.user_pri].qs_handle;
3111 		ether_addr_copy(mc_qht_elem->mc_grp_ctx.dest_mac_addr, dmac);
3112 
3113 		spin_lock_irqsave(&rf->qh_list_lock, flags);
3114 		mcast_list_add(rf, mc_qht_elem);
3115 	} else {
3116 		if (mc_qht_elem->mc_grp_ctx.no_of_mgs ==
3117 		    IRDMA_MAX_MGS_PER_CTX) {
3118 			spin_unlock_irqrestore(&rf->qh_list_lock, flags);
3119 			return -ENOMEM;
3120 		}
3121 	}
3122 
3123 	mcg_info.qp_id = iwqp->ibqp.qp_num;
3124 	no_mgs = mc_qht_elem->mc_grp_ctx.no_of_mgs;
3125 	irdma_sc_add_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
3126 	spin_unlock_irqrestore(&rf->qh_list_lock, flags);
3127 
3128 	/* Only if there is a change do we need to modify or create */
3129 	if (!no_mgs) {
3130 		ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
3131 					 IRDMA_OP_MC_CREATE);
3132 	} else if (no_mgs != mc_qht_elem->mc_grp_ctx.no_of_mgs) {
3133 		ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
3134 					 IRDMA_OP_MC_MODIFY);
3135 	} else {
3136 		return 0;
3137 	}
3138 
3139 	if (ret)
3140 		goto error;
3141 
3142 	return 0;
3143 
3144 error:
3145 	irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
3146 	if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) {
3147 		mcast_list_del(mc_qht_elem);
3148 		irdma_free_dma_mem(&rf->hw,
3149 				   &mc_qht_elem->mc_grp_ctx.dma_mem_mc);
3150 		irdma_free_rsrc(rf, rf->allocated_mcgs,
3151 				mc_qht_elem->mc_grp_ctx.mg_id);
3152 		kfree(mc_qht_elem);
3153 	}
3154 
3155 	return ret;
3156 }
3157 
3158 /**
3159  * irdma_detach_mcast - detach a qp from a multicast group
3160  * @ibqp: ptr to qp
3161  * @ibgid: pointer to global ID
3162  * @lid: local ID
3163  *
3164  * returns error status
3165  */
3166 static int
3167 irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
3168 {
3169 	struct irdma_qp *iwqp = to_iwqp(ibqp);
3170 	struct irdma_device *iwdev = iwqp->iwdev;
3171 	struct irdma_pci_f *rf = iwdev->rf;
3172 	u32 ip_addr[4] = {0};
3173 	struct mc_table_list *mc_qht_elem;
3174 	struct irdma_mcast_grp_ctx_entry_info mcg_info = {0};
3175 	int ret;
3176 	unsigned long flags;
3177 	union irdma_sockaddr sgid_addr;
3178 
3179 	rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid);
3180 	if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid))
3181 		irdma_copy_ip_ntohl(ip_addr,
3182 				    sgid_addr.saddr_in6.sin6_addr.__u6_addr.__u6_addr32);
3183 	else
3184 		ip_addr[0] = ntohl(sgid_addr.saddr_in.sin_addr.s_addr);
3185 
3186 	spin_lock_irqsave(&rf->qh_list_lock, flags);
3187 	mc_qht_elem = mcast_list_lookup_ip(rf, ip_addr);
3188 	if (!mc_qht_elem) {
3189 		spin_unlock_irqrestore(&rf->qh_list_lock, flags);
3190 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
3191 			    "address not found MCG\n");
3192 		return 0;
3193 	}
3194 
3195 	mcg_info.qp_id = iwqp->ibqp.qp_num;
3196 	irdma_sc_del_mcast_grp(&mc_qht_elem->mc_grp_ctx, &mcg_info);
3197 	if (!mc_qht_elem->mc_grp_ctx.no_of_mgs) {
3198 		mcast_list_del(mc_qht_elem);
3199 		spin_unlock_irqrestore(&rf->qh_list_lock, flags);
3200 		ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
3201 					 IRDMA_OP_MC_DESTROY);
3202 		if (ret) {
3203 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
3204 				    "failed MC_DESTROY MCG\n");
3205 			spin_lock_irqsave(&rf->qh_list_lock, flags);
3206 			mcast_list_add(rf, mc_qht_elem);
3207 			spin_unlock_irqrestore(&rf->qh_list_lock, flags);
3208 			return -EAGAIN;
3209 		}
3210 
3211 		irdma_free_dma_mem(&rf->hw,
3212 				   &mc_qht_elem->mc_grp_ctx.dma_mem_mc);
3213 		irdma_free_rsrc(rf, rf->allocated_mcgs,
3214 				mc_qht_elem->mc_grp_ctx.mg_id);
3215 		kfree(mc_qht_elem);
3216 	} else {
3217 		spin_unlock_irqrestore(&rf->qh_list_lock, flags);
3218 		ret = irdma_mcast_cqp_op(iwdev, &mc_qht_elem->mc_grp_ctx,
3219 					 IRDMA_OP_MC_MODIFY);
3220 		if (ret) {
3221 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS,
3222 				    "failed Modify MCG\n");
3223 			return ret;
3224 		}
3225 	}
3226 
3227 	return 0;
3228 }
3229 
3230 /**
3231  * irdma_query_ah - Query address handle
3232  * @ibah: pointer to address handle
3233  * @ah_attr: address handle attributes
3234  */
3235 static int
3236 irdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
3237 {
3238 	struct irdma_ah *ah = to_iwah(ibah);
3239 
3240 	memset(ah_attr, 0, sizeof(*ah_attr));
3241 	if (ah->av.attrs.ah_flags & IB_AH_GRH) {
3242 		ah_attr->ah_flags = IB_AH_GRH;
3243 		ah_attr->grh.flow_label = ah->sc_ah.ah_info.flow_label;
3244 		ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos;
3245 		ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl;
3246 		ah_attr->grh.sgid_index = ah->sgid_index;
3247 		ah_attr->grh.sgid_index = ah->sgid_index;
3248 		memcpy(&ah_attr->grh.dgid, &ah->dgid,
3249 		       sizeof(ah_attr->grh.dgid));
3250 	}
3251 
3252 	return 0;
3253 }
3254 
3255 static if_t irdma_get_netdev(struct ib_device *ibdev, u8 port_num){
3256 	struct irdma_device *iwdev = to_iwdev(ibdev);
3257 
3258 	if (iwdev->netdev) {
3259 		dev_hold(iwdev->netdev);
3260 		return iwdev->netdev;
3261 	}
3262 
3263 	return NULL;
3264 }
3265 
3266 static void
3267 irdma_set_device_ops(struct ib_device *ibdev)
3268 {
3269 	struct ib_device *dev_ops = ibdev;
3270 
3271 	dev_ops->ops.driver_id = RDMA_DRIVER_I40IW;
3272 	dev_ops->ops.size_ib_ah = IRDMA_SET_RDMA_OBJ_SIZE(ib_ah, irdma_ah, ibah);
3273 	dev_ops->ops.size_ib_cq = IRDMA_SET_RDMA_OBJ_SIZE(ib_cq, irdma_cq, ibcq);
3274 	dev_ops->ops.size_ib_pd = IRDMA_SET_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd);
3275 	dev_ops->ops.size_ib_ucontext = IRDMA_SET_RDMA_OBJ_SIZE(ib_ucontext,
3276 								irdma_ucontext,
3277 								ibucontext);
3278 
3279 	dev_ops->alloc_hw_stats = irdma_alloc_hw_stats;
3280 	dev_ops->alloc_mr = irdma_alloc_mr;
3281 	dev_ops->alloc_pd = irdma_alloc_pd;
3282 	dev_ops->alloc_ucontext = irdma_alloc_ucontext;
3283 	dev_ops->create_cq = irdma_create_cq;
3284 	dev_ops->create_qp = irdma_create_qp;
3285 	dev_ops->dealloc_pd = irdma_dealloc_pd;
3286 	dev_ops->dealloc_ucontext = irdma_dealloc_ucontext;
3287 	dev_ops->dereg_mr = irdma_dereg_mr;
3288 	dev_ops->destroy_cq = irdma_destroy_cq;
3289 	dev_ops->destroy_qp = irdma_destroy_qp;
3290 	dev_ops->disassociate_ucontext = irdma_disassociate_ucontext;
3291 	dev_ops->get_dev_fw_str = irdma_get_dev_fw_str;
3292 	dev_ops->get_dma_mr = irdma_get_dma_mr;
3293 	dev_ops->get_hw_stats = irdma_get_hw_stats;
3294 	dev_ops->get_netdev = irdma_get_netdev;
3295 	dev_ops->map_mr_sg = irdma_map_mr_sg;
3296 	dev_ops->mmap = irdma_mmap;
3297 	dev_ops->mmap_free = irdma_mmap_free;
3298 	dev_ops->poll_cq = irdma_poll_cq;
3299 	dev_ops->post_recv = irdma_post_recv;
3300 	dev_ops->post_send = irdma_post_send;
3301 	dev_ops->query_device = irdma_query_device;
3302 	dev_ops->query_port = irdma_query_port;
3303 	dev_ops->modify_port = irdma_modify_port;
3304 	dev_ops->query_qp = irdma_query_qp;
3305 	dev_ops->reg_user_mr = irdma_reg_user_mr;
3306 	dev_ops->rereg_user_mr = irdma_rereg_user_mr;
3307 	dev_ops->req_notify_cq = irdma_req_notify_cq;
3308 	dev_ops->resize_cq = irdma_resize_cq;
3309 }
3310 
3311 static void
3312 irdma_set_device_mcast_ops(struct ib_device *ibdev)
3313 {
3314 	struct ib_device *dev_ops = ibdev;
3315 
3316 	dev_ops->attach_mcast = irdma_attach_mcast;
3317 	dev_ops->detach_mcast = irdma_detach_mcast;
3318 }
3319 
3320 static void
3321 irdma_set_device_roce_ops(struct ib_device *ibdev)
3322 {
3323 	struct ib_device *dev_ops = ibdev;
3324 
3325 	dev_ops->create_ah = irdma_create_ah;
3326 	dev_ops->destroy_ah = irdma_destroy_ah;
3327 	dev_ops->get_link_layer = irdma_get_link_layer;
3328 	dev_ops->get_port_immutable = irdma_roce_port_immutable;
3329 	dev_ops->modify_qp = irdma_modify_qp_roce;
3330 	dev_ops->query_ah = irdma_query_ah;
3331 	dev_ops->query_gid = irdma_query_gid_roce;
3332 	dev_ops->query_pkey = irdma_query_pkey;
3333 	ibdev->add_gid = irdma_add_gid;
3334 	ibdev->del_gid = irdma_del_gid;
3335 }
3336 
3337 static void
3338 irdma_set_device_iw_ops(struct ib_device *ibdev)
3339 {
3340 	struct ib_device *dev_ops = ibdev;
3341 
3342 	ibdev->uverbs_cmd_mask |=
3343 	    (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
3344 	    (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
3345 
3346 	dev_ops->create_ah = irdma_create_ah_stub;
3347 	dev_ops->destroy_ah = irdma_destroy_ah_stub;
3348 	dev_ops->get_port_immutable = irdma_iw_port_immutable;
3349 	dev_ops->modify_qp = irdma_modify_qp;
3350 	dev_ops->query_gid = irdma_query_gid;
3351 	dev_ops->query_pkey = irdma_iw_query_pkey;
3352 }
3353 
3354 static inline void
3355 irdma_set_device_gen1_ops(struct ib_device *ibdev)
3356 {
3357 }
3358 
3359 /**
3360  * irdma_init_roce_device - initialization of roce rdma device
3361  * @iwdev: irdma device
3362  */
3363 static void
3364 irdma_init_roce_device(struct irdma_device *iwdev)
3365 {
3366 	kc_set_roce_uverbs_cmd_mask(iwdev);
3367 	iwdev->ibdev.node_type = RDMA_NODE_IB_CA;
3368 	addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
3369 			    if_getlladdr(iwdev->netdev));
3370 	irdma_set_device_roce_ops(&iwdev->ibdev);
3371 	if (iwdev->rf->rdma_ver == IRDMA_GEN_2)
3372 		irdma_set_device_mcast_ops(&iwdev->ibdev);
3373 }
3374 
3375 /**
3376  * irdma_init_iw_device - initialization of iwarp rdma device
3377  * @iwdev: irdma device
3378  */
3379 static int
3380 irdma_init_iw_device(struct irdma_device *iwdev)
3381 {
3382 	if_t netdev = iwdev->netdev;
3383 
3384 	iwdev->ibdev.node_type = RDMA_NODE_RNIC;
3385 	addrconf_addr_eui48((u8 *)&iwdev->ibdev.node_guid,
3386 			    if_getlladdr(netdev));
3387 	iwdev->ibdev.iwcm = kzalloc(sizeof(*iwdev->ibdev.iwcm), GFP_KERNEL);
3388 	if (!iwdev->ibdev.iwcm)
3389 		return -ENOMEM;
3390 
3391 	iwdev->ibdev.iwcm->add_ref = irdma_qp_add_ref;
3392 	iwdev->ibdev.iwcm->rem_ref = irdma_qp_rem_ref;
3393 	iwdev->ibdev.iwcm->get_qp = irdma_get_qp;
3394 	iwdev->ibdev.iwcm->connect = irdma_connect;
3395 	iwdev->ibdev.iwcm->accept = irdma_accept;
3396 	iwdev->ibdev.iwcm->reject = irdma_reject;
3397 	iwdev->ibdev.iwcm->create_listen = irdma_create_listen;
3398 	iwdev->ibdev.iwcm->destroy_listen = irdma_destroy_listen;
3399 	memcpy(iwdev->ibdev.iwcm->ifname, if_name(netdev),
3400 	       sizeof(iwdev->ibdev.iwcm->ifname));
3401 	irdma_set_device_iw_ops(&iwdev->ibdev);
3402 
3403 	return 0;
3404 }
3405 
3406 /**
3407  * irdma_init_rdma_device - initialization of rdma device
3408  * @iwdev: irdma device
3409  */
3410 static int
3411 irdma_init_rdma_device(struct irdma_device *iwdev)
3412 {
3413 	int ret;
3414 
3415 	iwdev->ibdev.owner = THIS_MODULE;
3416 	iwdev->ibdev.uverbs_abi_ver = IRDMA_ABI_VER;
3417 	kc_set_rdma_uverbs_cmd_mask(iwdev);
3418 
3419 	if (iwdev->roce_mode) {
3420 		irdma_init_roce_device(iwdev);
3421 	} else {
3422 		ret = irdma_init_iw_device(iwdev);
3423 		if (ret)
3424 			return ret;
3425 	}
3426 
3427 	iwdev->ibdev.phys_port_cnt = 1;
3428 	iwdev->ibdev.num_comp_vectors = iwdev->rf->ceqs_count;
3429 	iwdev->ibdev.dev.parent = iwdev->rf->dev_ctx.dev;
3430 	set_ibdev_dma_device(iwdev->ibdev, &iwdev->rf->pcidev->dev);
3431 	irdma_set_device_ops(&iwdev->ibdev);
3432 	if (iwdev->rf->rdma_ver == IRDMA_GEN_1)
3433 		irdma_set_device_gen1_ops(&iwdev->ibdev);
3434 
3435 	return 0;
3436 }
3437 
3438 /**
3439  * irdma_port_ibevent - indicate port event
3440  * @iwdev: irdma device
3441  */
3442 void
3443 irdma_port_ibevent(struct irdma_device *iwdev)
3444 {
3445 	struct ib_event event;
3446 
3447 	event.device = &iwdev->ibdev;
3448 	event.element.port_num = 1;
3449 	event.event =
3450 	    iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
3451 	ib_dispatch_event(&event);
3452 }
3453 
3454 /**
3455  * irdma_ib_unregister_device - unregister rdma device from IB
3456  * core
3457  * @iwdev: irdma device
3458  */
3459 void
3460 irdma_ib_unregister_device(struct irdma_device *iwdev)
3461 {
3462 	iwdev->iw_status = 0;
3463 	irdma_port_ibevent(iwdev);
3464 	ib_unregister_device(&iwdev->ibdev);
3465 	dev_put(iwdev->netdev);
3466 	kfree(iwdev->ibdev.iwcm);
3467 	iwdev->ibdev.iwcm = NULL;
3468 }
3469 
3470 /**
3471  * irdma_ib_register_device - register irdma device to IB core
3472  * @iwdev: irdma device
3473  */
3474 int
3475 irdma_ib_register_device(struct irdma_device *iwdev)
3476 {
3477 	int ret;
3478 
3479 	ret = irdma_init_rdma_device(iwdev);
3480 	if (ret)
3481 		return ret;
3482 
3483 	dev_hold(iwdev->netdev);
3484 	sprintf(iwdev->ibdev.name, "irdma-%s", if_name(iwdev->netdev));
3485 	ret = ib_register_device(&iwdev->ibdev, NULL);
3486 	if (ret)
3487 		goto error;
3488 
3489 	iwdev->iw_status = 1;
3490 	irdma_port_ibevent(iwdev);
3491 
3492 	return 0;
3493 
3494 error:
3495 	kfree(iwdev->ibdev.iwcm);
3496 	iwdev->ibdev.iwcm = NULL;
3497 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_VERBS, "Register RDMA device fail\n");
3498 
3499 	return ret;
3500 }
3501