xref: /linux/drivers/infiniband/hw/erdma/erdma_verbs.c (revision 981368e1440b76f68b1ac8f5fb14e739f80ecc4e)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8 /* Copyright (c) 2008-2019, IBM Corporation */
9 
10 /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
11 
12 #include <linux/vmalloc.h>
13 #include <net/addrconf.h>
14 #include <rdma/erdma-abi.h>
15 #include <rdma/ib_umem.h>
16 #include <rdma/uverbs_ioctl.h>
17 
18 #include "erdma.h"
19 #include "erdma_cm.h"
20 #include "erdma_verbs.h"
21 
22 static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg,
23 				      u64 *addr0, u64 *addr1)
24 {
25 	struct erdma_mtt *mtt = mem->mtt;
26 
27 	if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) {
28 		*addr0 = mtt->buf_dma;
29 		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
30 				   ERDMA_MR_MTT_1LEVEL);
31 	} else {
32 		*addr0 = mtt->buf[0];
33 		memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1));
34 		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
35 				   ERDMA_MR_MTT_0LEVEL);
36 	}
37 }
38 
39 static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
40 {
41 	struct erdma_dev *dev = to_edev(qp->ibqp.device);
42 	struct erdma_pd *pd = to_epd(qp->ibqp.pd);
43 	struct erdma_cmdq_create_qp_req req;
44 	struct erdma_uqp *user_qp;
45 	u64 resp0, resp1;
46 	int err;
47 
48 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
49 				CMDQ_OPCODE_CREATE_QP);
50 
51 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
52 			      ilog2(qp->attrs.sq_size)) |
53 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
54 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
55 			      ilog2(qp->attrs.rq_size)) |
56 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
57 
58 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
59 		u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
60 
61 		req.sq_cqn_mtt_cfg =
62 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
63 				   pgsz_range) |
64 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
65 		req.rq_cqn_mtt_cfg =
66 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
67 				   pgsz_range) |
68 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
69 
70 		req.sq_mtt_cfg =
71 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
72 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
73 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
74 				   ERDMA_MR_MTT_0LEVEL);
75 		req.rq_mtt_cfg = req.sq_mtt_cfg;
76 
77 		req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
78 		req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
79 		req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
80 					  (qp->attrs.sq_size << SQEBB_SHIFT);
81 		req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
82 					  (qp->attrs.rq_size << RQE_SHIFT);
83 	} else {
84 		user_qp = &qp->user_qp;
85 		req.sq_cqn_mtt_cfg = FIELD_PREP(
86 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
87 			ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
88 		req.sq_cqn_mtt_cfg |=
89 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
90 
91 		req.rq_cqn_mtt_cfg = FIELD_PREP(
92 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
93 			ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
94 		req.rq_cqn_mtt_cfg |=
95 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
96 
97 		req.sq_mtt_cfg = user_qp->sq_mem.page_offset;
98 		req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
99 					     user_qp->sq_mem.mtt_nents);
100 
101 		req.rq_mtt_cfg = user_qp->rq_mem.page_offset;
102 		req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
103 					     user_qp->rq_mem.mtt_nents);
104 
105 		assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg,
106 					  &req.sq_buf_addr, req.sq_mtt_entry);
107 		assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg,
108 					  &req.rq_buf_addr, req.rq_mtt_entry);
109 
110 		req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
111 		req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
112 
113 		if (uctx->ext_db.enable) {
114 			req.sq_cqn_mtt_cfg |=
115 				FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1);
116 			req.db_cfg =
117 				FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK,
118 					   uctx->ext_db.sdb_off) |
119 				FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK,
120 					   uctx->ext_db.rdb_off);
121 		}
122 	}
123 
124 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
125 				  &resp1);
126 	if (!err)
127 		qp->attrs.cookie =
128 			FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
129 
130 	return err;
131 }
132 
133 static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
134 {
135 	struct erdma_pd *pd = to_epd(mr->ibmr.pd);
136 	struct erdma_cmdq_reg_mr_req req;
137 	u32 mtt_level;
138 
139 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
140 
141 	if (mr->type == ERDMA_MR_TYPE_FRMR ||
142 	    mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) {
143 		if (mr->mem.mtt->continuous) {
144 			req.phy_addr[0] = mr->mem.mtt->buf_dma;
145 			mtt_level = ERDMA_MR_MTT_1LEVEL;
146 		} else {
147 			req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist);
148 			mtt_level = mr->mem.mtt->level;
149 		}
150 	} else {
151 		memcpy(req.phy_addr, mr->mem.mtt->buf,
152 		       MTT_SIZE(mr->mem.page_cnt));
153 		mtt_level = ERDMA_MR_MTT_0LEVEL;
154 	}
155 
156 	req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
157 		   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
158 		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
159 	req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
160 		   FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
161 		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
162 	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
163 			      ilog2(mr->mem.page_size)) |
164 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) |
165 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
166 
167 	if (mr->type == ERDMA_MR_TYPE_DMA)
168 		goto post_cmd;
169 
170 	if (mr->type == ERDMA_MR_TYPE_NORMAL) {
171 		req.start_va = mr->mem.va;
172 		req.size = mr->mem.len;
173 	}
174 
175 	if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) {
176 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1);
177 		req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK,
178 				       PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT);
179 		req.size_h = upper_32_bits(mr->mem.len);
180 		req.mtt_cnt_h = mr->mem.page_cnt >> 20;
181 	}
182 
183 post_cmd:
184 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
185 }
186 
187 static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
188 {
189 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
190 	struct erdma_cmdq_create_cq_req req;
191 	struct erdma_mem *mem;
192 	u32 page_size;
193 
194 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
195 				CMDQ_OPCODE_CREATE_CQ);
196 
197 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
198 		   FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
199 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
200 
201 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
202 		page_size = SZ_32M;
203 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
204 				       ilog2(page_size) - ERDMA_HW_PAGE_SHIFT);
205 		req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
206 		req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
207 
208 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
209 			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
210 				       ERDMA_MR_MTT_0LEVEL);
211 
212 		req.first_page_offset = 0;
213 		req.cq_db_info_addr =
214 			cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
215 	} else {
216 		mem = &cq->user_cq.qbuf_mem;
217 		req.cfg0 |=
218 			FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
219 				   ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT);
220 		if (mem->mtt_nents == 1) {
221 			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]);
222 			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]);
223 			req.cfg1 |=
224 				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
225 					   ERDMA_MR_MTT_0LEVEL);
226 		} else {
227 			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma);
228 			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma);
229 			req.cfg1 |=
230 				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
231 					   ERDMA_MR_MTT_1LEVEL);
232 		}
233 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
234 				       mem->mtt_nents);
235 
236 		req.first_page_offset = mem->page_offset;
237 		req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
238 
239 		if (uctx->ext_db.enable) {
240 			req.cfg1 |= FIELD_PREP(
241 				ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
242 			req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,
243 					      uctx->ext_db.cdb_off);
244 		}
245 	}
246 
247 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
248 }
249 
250 static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
251 {
252 	int idx;
253 	unsigned long flags;
254 
255 	spin_lock_irqsave(&res_cb->lock, flags);
256 	idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
257 				 res_cb->next_alloc_idx);
258 	if (idx == res_cb->max_cap) {
259 		idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
260 		if (idx == res_cb->max_cap) {
261 			res_cb->next_alloc_idx = 1;
262 			spin_unlock_irqrestore(&res_cb->lock, flags);
263 			return -ENOSPC;
264 		}
265 	}
266 
267 	set_bit(idx, res_cb->bitmap);
268 	res_cb->next_alloc_idx = idx + 1;
269 	spin_unlock_irqrestore(&res_cb->lock, flags);
270 
271 	return idx;
272 }
273 
274 static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
275 {
276 	unsigned long flags;
277 	u32 used;
278 
279 	spin_lock_irqsave(&res_cb->lock, flags);
280 	used = __test_and_clear_bit(idx, res_cb->bitmap);
281 	spin_unlock_irqrestore(&res_cb->lock, flags);
282 	WARN_ON(!used);
283 }
284 
285 static struct rdma_user_mmap_entry *
286 erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
287 			     u32 size, u8 mmap_flag, u64 *mmap_offset)
288 {
289 	struct erdma_user_mmap_entry *entry =
290 		kzalloc(sizeof(*entry), GFP_KERNEL);
291 	int ret;
292 
293 	if (!entry)
294 		return NULL;
295 
296 	entry->address = (u64)address;
297 	entry->mmap_flag = mmap_flag;
298 
299 	size = PAGE_ALIGN(size);
300 
301 	ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
302 					  size);
303 	if (ret) {
304 		kfree(entry);
305 		return NULL;
306 	}
307 
308 	*mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
309 
310 	return &entry->rdma_entry;
311 }
312 
313 int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
314 		       struct ib_udata *unused)
315 {
316 	struct erdma_dev *dev = to_edev(ibdev);
317 
318 	memset(attr, 0, sizeof(*attr));
319 
320 	attr->max_mr_size = dev->attrs.max_mr_size;
321 	attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
322 	attr->vendor_part_id = dev->pdev->device;
323 	attr->hw_ver = dev->pdev->revision;
324 	attr->max_qp = dev->attrs.max_qp - 1;
325 	attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
326 	attr->max_qp_rd_atom = dev->attrs.max_ord;
327 	attr->max_qp_init_rd_atom = dev->attrs.max_ird;
328 	attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
329 	attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
330 	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
331 	ibdev->local_dma_lkey = dev->attrs.local_dma_key;
332 	attr->max_send_sge = dev->attrs.max_send_sge;
333 	attr->max_recv_sge = dev->attrs.max_recv_sge;
334 	attr->max_sge_rd = dev->attrs.max_sge_rd;
335 	attr->max_cq = dev->attrs.max_cq - 1;
336 	attr->max_cqe = dev->attrs.max_cqe;
337 	attr->max_mr = dev->attrs.max_mr;
338 	attr->max_pd = dev->attrs.max_pd;
339 	attr->max_mw = dev->attrs.max_mw;
340 	attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
341 	attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
342 
343 	if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
344 		attr->atomic_cap = IB_ATOMIC_GLOB;
345 
346 	attr->fw_ver = dev->attrs.fw_version;
347 
348 	if (dev->netdev)
349 		addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
350 				    dev->netdev->dev_addr);
351 
352 	return 0;
353 }
354 
355 int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
356 		    union ib_gid *gid)
357 {
358 	struct erdma_dev *dev = to_edev(ibdev);
359 
360 	memset(gid, 0, sizeof(*gid));
361 	ether_addr_copy(gid->raw, dev->attrs.peer_addr);
362 
363 	return 0;
364 }
365 
366 int erdma_query_port(struct ib_device *ibdev, u32 port,
367 		     struct ib_port_attr *attr)
368 {
369 	struct erdma_dev *dev = to_edev(ibdev);
370 	struct net_device *ndev = dev->netdev;
371 
372 	memset(attr, 0, sizeof(*attr));
373 
374 	attr->gid_tbl_len = 1;
375 	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
376 	attr->max_msg_sz = -1;
377 
378 	if (!ndev)
379 		goto out;
380 
381 	ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
382 	attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
383 	attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
384 	if (netif_running(ndev) && netif_carrier_ok(ndev))
385 		dev->state = IB_PORT_ACTIVE;
386 	else
387 		dev->state = IB_PORT_DOWN;
388 	attr->state = dev->state;
389 
390 out:
391 	if (dev->state == IB_PORT_ACTIVE)
392 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
393 	else
394 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
395 
396 	return 0;
397 }
398 
399 int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
400 			     struct ib_port_immutable *port_immutable)
401 {
402 	port_immutable->gid_tbl_len = 1;
403 	port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
404 
405 	return 0;
406 }
407 
408 int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
409 {
410 	struct erdma_pd *pd = to_epd(ibpd);
411 	struct erdma_dev *dev = to_edev(ibpd->device);
412 	int pdn;
413 
414 	pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
415 	if (pdn < 0)
416 		return pdn;
417 
418 	pd->pdn = pdn;
419 
420 	return 0;
421 }
422 
423 int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
424 {
425 	struct erdma_pd *pd = to_epd(ibpd);
426 	struct erdma_dev *dev = to_edev(ibpd->device);
427 
428 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
429 
430 	return 0;
431 }
432 
433 static void erdma_flush_worker(struct work_struct *work)
434 {
435 	struct delayed_work *dwork = to_delayed_work(work);
436 	struct erdma_qp *qp =
437 		container_of(dwork, struct erdma_qp, reflush_dwork);
438 	struct erdma_cmdq_reflush_req req;
439 
440 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
441 				CMDQ_OPCODE_REFLUSH);
442 	req.qpn = QP_ID(qp);
443 	req.sq_pi = qp->kern_qp.sq_pi;
444 	req.rq_pi = qp->kern_qp.rq_pi;
445 	erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
446 }
447 
448 static int erdma_qp_validate_cap(struct erdma_dev *dev,
449 				 struct ib_qp_init_attr *attrs)
450 {
451 	if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
452 	    (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
453 	    (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
454 	    (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
455 	    (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
456 	    !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
457 		return -EINVAL;
458 	}
459 
460 	return 0;
461 }
462 
463 static int erdma_qp_validate_attr(struct erdma_dev *dev,
464 				  struct ib_qp_init_attr *attrs)
465 {
466 	if (attrs->qp_type != IB_QPT_RC)
467 		return -EOPNOTSUPP;
468 
469 	if (attrs->srq)
470 		return -EOPNOTSUPP;
471 
472 	if (!attrs->send_cq || !attrs->recv_cq)
473 		return -EOPNOTSUPP;
474 
475 	return 0;
476 }
477 
478 static void free_kernel_qp(struct erdma_qp *qp)
479 {
480 	struct erdma_dev *dev = qp->dev;
481 
482 	vfree(qp->kern_qp.swr_tbl);
483 	vfree(qp->kern_qp.rwr_tbl);
484 
485 	if (qp->kern_qp.sq_buf)
486 		dma_free_coherent(
487 			&dev->pdev->dev,
488 			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
489 			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
490 
491 	if (qp->kern_qp.rq_buf)
492 		dma_free_coherent(
493 			&dev->pdev->dev,
494 			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
495 			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
496 }
497 
498 static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
499 			  struct ib_qp_init_attr *attrs)
500 {
501 	struct erdma_kqp *kqp = &qp->kern_qp;
502 	int size;
503 
504 	if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
505 		kqp->sig_all = 1;
506 
507 	kqp->sq_pi = 0;
508 	kqp->sq_ci = 0;
509 	kqp->rq_pi = 0;
510 	kqp->rq_ci = 0;
511 	kqp->hw_sq_db =
512 		dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
513 	kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
514 
515 	kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64));
516 	kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64));
517 	if (!kqp->swr_tbl || !kqp->rwr_tbl)
518 		goto err_out;
519 
520 	size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
521 	kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
522 					 &kqp->sq_buf_dma_addr, GFP_KERNEL);
523 	if (!kqp->sq_buf)
524 		goto err_out;
525 
526 	size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
527 	kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
528 					 &kqp->rq_buf_dma_addr, GFP_KERNEL);
529 	if (!kqp->rq_buf)
530 		goto err_out;
531 
532 	kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
533 	kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
534 
535 	return 0;
536 
537 err_out:
538 	free_kernel_qp(qp);
539 	return -ENOMEM;
540 }
541 
542 static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem)
543 {
544 	struct erdma_mtt *mtt = mem->mtt;
545 	struct ib_block_iter biter;
546 	u32 idx = 0;
547 
548 	while (mtt->low_level)
549 		mtt = mtt->low_level;
550 
551 	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size)
552 		mtt->buf[idx++] = rdma_block_iter_dma_address(&biter);
553 }
554 
555 static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
556 					       size_t size)
557 {
558 	struct erdma_mtt *mtt;
559 
560 	mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
561 	if (!mtt)
562 		return ERR_PTR(-ENOMEM);
563 
564 	mtt->size = size;
565 	mtt->buf = kzalloc(mtt->size, GFP_KERNEL);
566 	if (!mtt->buf)
567 		goto err_free_mtt;
568 
569 	mtt->continuous = true;
570 	mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size,
571 				      DMA_TO_DEVICE);
572 	if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma))
573 		goto err_free_mtt_buf;
574 
575 	return mtt;
576 
577 err_free_mtt_buf:
578 	kfree(mtt->buf);
579 
580 err_free_mtt:
581 	kfree(mtt);
582 
583 	return ERR_PTR(-ENOMEM);
584 }
585 
586 static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev,
587 				     struct erdma_mtt *mtt)
588 {
589 	dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE);
590 	vfree(mtt->sglist);
591 }
592 
593 static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
594 				      struct erdma_mtt *mtt)
595 {
596 	erdma_destroy_mtt_buf_sg(dev, mtt);
597 	vfree(mtt->buf);
598 	kfree(mtt);
599 }
600 
601 static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
602 				  struct erdma_mtt *low_mtt)
603 {
604 	struct scatterlist *sg;
605 	u32 idx = 0, i;
606 
607 	for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i)
608 		mtt->buf[idx++] = sg_dma_address(sg);
609 }
610 
611 static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt)
612 {
613 	struct scatterlist *sglist;
614 	void *buf = mtt->buf;
615 	u32 npages, i, nsg;
616 	struct page *pg;
617 
618 	/* Failed if buf is not page aligned */
619 	if ((uintptr_t)buf & ~PAGE_MASK)
620 		return -EINVAL;
621 
622 	npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE);
623 	sglist = vzalloc(npages * sizeof(*sglist));
624 	if (!sglist)
625 		return -ENOMEM;
626 
627 	sg_init_table(sglist, npages);
628 	for (i = 0; i < npages; i++) {
629 		pg = vmalloc_to_page(buf);
630 		if (!pg)
631 			goto err;
632 		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
633 		buf += PAGE_SIZE;
634 	}
635 
636 	nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE);
637 	if (!nsg)
638 		goto err;
639 
640 	mtt->sglist = sglist;
641 	mtt->nsg = nsg;
642 
643 	return 0;
644 err:
645 	vfree(sglist);
646 
647 	return -ENOMEM;
648 }
649 
650 static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
651 						  size_t size)
652 {
653 	struct erdma_mtt *mtt;
654 	int ret = -ENOMEM;
655 
656 	mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
657 	if (!mtt)
658 		return NULL;
659 
660 	mtt->size = ALIGN(size, PAGE_SIZE);
661 	mtt->buf = vzalloc(mtt->size);
662 	mtt->continuous = false;
663 	if (!mtt->buf)
664 		goto err_free_mtt;
665 
666 	ret = erdma_create_mtt_buf_sg(dev, mtt);
667 	if (ret)
668 		goto err_free_mtt_buf;
669 
670 	ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n",
671 		  mtt->size, mtt->nsg);
672 
673 	return mtt;
674 
675 err_free_mtt_buf:
676 	vfree(mtt->buf);
677 
678 err_free_mtt:
679 	kfree(mtt);
680 
681 	return ERR_PTR(ret);
682 }
683 
684 static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
685 					  bool force_continuous)
686 {
687 	struct erdma_mtt *mtt, *tmp_mtt;
688 	int ret, level = 0;
689 
690 	ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size,
691 		  force_continuous);
692 
693 	if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA))
694 		force_continuous = true;
695 
696 	if (force_continuous)
697 		return erdma_create_cont_mtt(dev, size);
698 
699 	mtt = erdma_create_scatter_mtt(dev, size);
700 	if (IS_ERR(mtt))
701 		return mtt;
702 	level = 1;
703 
704 	/* convergence the mtt table. */
705 	while (mtt->nsg != 1 && level <= 3) {
706 		tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg));
707 		if (IS_ERR(tmp_mtt)) {
708 			ret = PTR_ERR(tmp_mtt);
709 			goto err_free_mtt;
710 		}
711 		erdma_init_middle_mtt(tmp_mtt, mtt);
712 		tmp_mtt->low_level = mtt;
713 		mtt = tmp_mtt;
714 		level++;
715 	}
716 
717 	if (level > 3) {
718 		ret = -ENOMEM;
719 		goto err_free_mtt;
720 	}
721 
722 	mtt->level = level;
723 	ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
724 		  mtt->level, mtt->sglist[0].dma_address);
725 
726 	return mtt;
727 err_free_mtt:
728 	while (mtt) {
729 		tmp_mtt = mtt->low_level;
730 		erdma_destroy_scatter_mtt(dev, mtt);
731 		mtt = tmp_mtt;
732 	}
733 
734 	return ERR_PTR(ret);
735 }
736 
737 static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt)
738 {
739 	struct erdma_mtt *tmp_mtt;
740 
741 	if (mtt->continuous) {
742 		dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size,
743 				 DMA_TO_DEVICE);
744 		kfree(mtt->buf);
745 		kfree(mtt);
746 	} else {
747 		while (mtt) {
748 			tmp_mtt = mtt->low_level;
749 			erdma_destroy_scatter_mtt(dev, mtt);
750 			mtt = tmp_mtt;
751 		}
752 	}
753 }
754 
755 static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
756 			   u64 start, u64 len, int access, u64 virt,
757 			   unsigned long req_page_size, bool force_continuous)
758 {
759 	int ret = 0;
760 
761 	mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
762 	if (IS_ERR(mem->umem)) {
763 		ret = PTR_ERR(mem->umem);
764 		mem->umem = NULL;
765 		return ret;
766 	}
767 
768 	mem->va = virt;
769 	mem->len = len;
770 	mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
771 	mem->page_offset = start & (mem->page_size - 1);
772 	mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
773 	mem->page_cnt = mem->mtt_nents;
774 	mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt),
775 				    force_continuous);
776 	if (IS_ERR(mem->mtt)) {
777 		ret = PTR_ERR(mem->mtt);
778 		goto error_ret;
779 	}
780 
781 	erdma_fill_bottom_mtt(dev, mem);
782 
783 	return 0;
784 
785 error_ret:
786 	if (mem->umem) {
787 		ib_umem_release(mem->umem);
788 		mem->umem = NULL;
789 	}
790 
791 	return ret;
792 }
793 
794 static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
795 {
796 	if (mem->mtt)
797 		erdma_destroy_mtt(dev, mem->mtt);
798 
799 	if (mem->umem) {
800 		ib_umem_release(mem->umem);
801 		mem->umem = NULL;
802 	}
803 }
804 
805 static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
806 				    u64 dbrecords_va,
807 				    struct erdma_user_dbrecords_page **dbr_page,
808 				    dma_addr_t *dma_addr)
809 {
810 	struct erdma_user_dbrecords_page *page = NULL;
811 	int rv = 0;
812 
813 	mutex_lock(&ctx->dbrecords_page_mutex);
814 
815 	list_for_each_entry(page, &ctx->dbrecords_page_list, list)
816 		if (page->va == (dbrecords_va & PAGE_MASK))
817 			goto found;
818 
819 	page = kmalloc(sizeof(*page), GFP_KERNEL);
820 	if (!page) {
821 		rv = -ENOMEM;
822 		goto out;
823 	}
824 
825 	page->va = (dbrecords_va & PAGE_MASK);
826 	page->refcnt = 0;
827 
828 	page->umem = ib_umem_get(ctx->ibucontext.device,
829 				 dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
830 	if (IS_ERR(page->umem)) {
831 		rv = PTR_ERR(page->umem);
832 		kfree(page);
833 		goto out;
834 	}
835 
836 	list_add(&page->list, &ctx->dbrecords_page_list);
837 
838 found:
839 	*dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
840 		    (dbrecords_va & ~PAGE_MASK);
841 	*dbr_page = page;
842 	page->refcnt++;
843 
844 out:
845 	mutex_unlock(&ctx->dbrecords_page_mutex);
846 	return rv;
847 }
848 
849 static void
850 erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
851 			   struct erdma_user_dbrecords_page **dbr_page)
852 {
853 	if (!ctx || !(*dbr_page))
854 		return;
855 
856 	mutex_lock(&ctx->dbrecords_page_mutex);
857 	if (--(*dbr_page)->refcnt == 0) {
858 		list_del(&(*dbr_page)->list);
859 		ib_umem_release((*dbr_page)->umem);
860 		kfree(*dbr_page);
861 	}
862 
863 	*dbr_page = NULL;
864 	mutex_unlock(&ctx->dbrecords_page_mutex);
865 }
866 
867 static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
868 			u64 va, u32 len, u64 db_info_va)
869 {
870 	dma_addr_t db_info_dma_addr;
871 	u32 rq_offset;
872 	int ret;
873 
874 	if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) +
875 		   qp->attrs.rq_size * RQE_SIZE))
876 		return -EINVAL;
877 
878 	ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va,
879 			      qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
880 			      (SZ_1M - SZ_4K), true);
881 	if (ret)
882 		return ret;
883 
884 	rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
885 	qp->user_qp.rq_offset = rq_offset;
886 
887 	ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset,
888 			      qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
889 			      (SZ_1M - SZ_4K), true);
890 	if (ret)
891 		goto put_sq_mtt;
892 
893 	ret = erdma_map_user_dbrecords(uctx, db_info_va,
894 				       &qp->user_qp.user_dbr_page,
895 				       &db_info_dma_addr);
896 	if (ret)
897 		goto put_rq_mtt;
898 
899 	qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
900 	qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
901 
902 	return 0;
903 
904 put_rq_mtt:
905 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
906 
907 put_sq_mtt:
908 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
909 
910 	return ret;
911 }
912 
913 static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
914 {
915 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
916 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
917 	erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
918 }
919 
920 int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
921 		    struct ib_udata *udata)
922 {
923 	struct erdma_qp *qp = to_eqp(ibqp);
924 	struct erdma_dev *dev = to_edev(ibqp->device);
925 	struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
926 		udata, struct erdma_ucontext, ibucontext);
927 	struct erdma_ureq_create_qp ureq;
928 	struct erdma_uresp_create_qp uresp;
929 	int ret;
930 
931 	ret = erdma_qp_validate_cap(dev, attrs);
932 	if (ret)
933 		goto err_out;
934 
935 	ret = erdma_qp_validate_attr(dev, attrs);
936 	if (ret)
937 		goto err_out;
938 
939 	qp->scq = to_ecq(attrs->send_cq);
940 	qp->rcq = to_ecq(attrs->recv_cq);
941 	qp->dev = dev;
942 	qp->attrs.cc = dev->attrs.cc;
943 
944 	init_rwsem(&qp->state_lock);
945 	kref_init(&qp->ref);
946 	init_completion(&qp->safe_free);
947 
948 	ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
949 			      XA_LIMIT(1, dev->attrs.max_qp - 1),
950 			      &dev->next_alloc_qpn, GFP_KERNEL);
951 	if (ret < 0) {
952 		ret = -ENOMEM;
953 		goto err_out;
954 	}
955 
956 	qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
957 					       ERDMA_MAX_WQEBB_PER_SQE);
958 	qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
959 
960 	if (uctx) {
961 		ret = ib_copy_from_udata(&ureq, udata,
962 					 min(sizeof(ureq), udata->inlen));
963 		if (ret)
964 			goto err_out_xa;
965 
966 		ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
967 				   ureq.db_record_va);
968 		if (ret)
969 			goto err_out_xa;
970 
971 		memset(&uresp, 0, sizeof(uresp));
972 
973 		uresp.num_sqe = qp->attrs.sq_size;
974 		uresp.num_rqe = qp->attrs.rq_size;
975 		uresp.qp_id = QP_ID(qp);
976 		uresp.rq_offset = qp->user_qp.rq_offset;
977 
978 		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
979 		if (ret)
980 			goto err_out_cmd;
981 	} else {
982 		init_kernel_qp(dev, qp, attrs);
983 	}
984 
985 	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
986 	qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
987 	qp->attrs.state = ERDMA_QP_STATE_IDLE;
988 	INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
989 
990 	ret = create_qp_cmd(uctx, qp);
991 	if (ret)
992 		goto err_out_cmd;
993 
994 	spin_lock_init(&qp->lock);
995 
996 	return 0;
997 
998 err_out_cmd:
999 	if (uctx)
1000 		free_user_qp(qp, uctx);
1001 	else
1002 		free_kernel_qp(qp);
1003 err_out_xa:
1004 	xa_erase(&dev->qp_xa, QP_ID(qp));
1005 err_out:
1006 	return ret;
1007 }
1008 
1009 static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
1010 {
1011 	int stag_idx;
1012 
1013 	stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
1014 	if (stag_idx < 0)
1015 		return stag_idx;
1016 
1017 	/* For now, we always let key field be zero. */
1018 	*stag = (stag_idx << 8);
1019 
1020 	return 0;
1021 }
1022 
1023 struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
1024 {
1025 	struct erdma_dev *dev = to_edev(ibpd->device);
1026 	struct erdma_mr *mr;
1027 	u32 stag;
1028 	int ret;
1029 
1030 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1031 	if (!mr)
1032 		return ERR_PTR(-ENOMEM);
1033 
1034 	ret = erdma_create_stag(dev, &stag);
1035 	if (ret)
1036 		goto out_free;
1037 
1038 	mr->type = ERDMA_MR_TYPE_DMA;
1039 
1040 	mr->ibmr.lkey = stag;
1041 	mr->ibmr.rkey = stag;
1042 	mr->ibmr.pd = ibpd;
1043 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
1044 	ret = regmr_cmd(dev, mr);
1045 	if (ret)
1046 		goto out_remove_stag;
1047 
1048 	return &mr->ibmr;
1049 
1050 out_remove_stag:
1051 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1052 		       mr->ibmr.lkey >> 8);
1053 
1054 out_free:
1055 	kfree(mr);
1056 
1057 	return ERR_PTR(ret);
1058 }
1059 
1060 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
1061 				u32 max_num_sg)
1062 {
1063 	struct erdma_mr *mr;
1064 	struct erdma_dev *dev = to_edev(ibpd->device);
1065 	int ret;
1066 	u32 stag;
1067 
1068 	if (mr_type != IB_MR_TYPE_MEM_REG)
1069 		return ERR_PTR(-EOPNOTSUPP);
1070 
1071 	if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
1072 		return ERR_PTR(-EINVAL);
1073 
1074 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1075 	if (!mr)
1076 		return ERR_PTR(-ENOMEM);
1077 
1078 	ret = erdma_create_stag(dev, &stag);
1079 	if (ret)
1080 		goto out_free;
1081 
1082 	mr->type = ERDMA_MR_TYPE_FRMR;
1083 
1084 	mr->ibmr.lkey = stag;
1085 	mr->ibmr.rkey = stag;
1086 	mr->ibmr.pd = ibpd;
1087 	/* update it in FRMR. */
1088 	mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
1089 		     ERDMA_MR_ACC_RW;
1090 
1091 	mr->mem.page_size = PAGE_SIZE; /* update it later. */
1092 	mr->mem.page_cnt = max_num_sg;
1093 	mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true);
1094 	if (IS_ERR(mr->mem.mtt)) {
1095 		ret = PTR_ERR(mr->mem.mtt);
1096 		goto out_remove_stag;
1097 	}
1098 
1099 	ret = regmr_cmd(dev, mr);
1100 	if (ret)
1101 		goto out_destroy_mtt;
1102 
1103 	return &mr->ibmr;
1104 
1105 out_destroy_mtt:
1106 	erdma_destroy_mtt(dev, mr->mem.mtt);
1107 
1108 out_remove_stag:
1109 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1110 		       mr->ibmr.lkey >> 8);
1111 
1112 out_free:
1113 	kfree(mr);
1114 
1115 	return ERR_PTR(ret);
1116 }
1117 
1118 static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
1119 {
1120 	struct erdma_mr *mr = to_emr(ibmr);
1121 
1122 	if (mr->mem.mtt_nents >= mr->mem.page_cnt)
1123 		return -1;
1124 
1125 	mr->mem.mtt->buf[mr->mem.mtt_nents] = addr;
1126 	mr->mem.mtt_nents++;
1127 
1128 	return 0;
1129 }
1130 
1131 int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1132 		    unsigned int *sg_offset)
1133 {
1134 	struct erdma_mr *mr = to_emr(ibmr);
1135 	int num;
1136 
1137 	mr->mem.mtt_nents = 0;
1138 
1139 	num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
1140 			     erdma_set_page);
1141 
1142 	return num;
1143 }
1144 
1145 struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
1146 				u64 virt, int access, struct ib_udata *udata)
1147 {
1148 	struct erdma_mr *mr = NULL;
1149 	struct erdma_dev *dev = to_edev(ibpd->device);
1150 	u32 stag;
1151 	int ret;
1152 
1153 	if (!len || len > dev->attrs.max_mr_size)
1154 		return ERR_PTR(-EINVAL);
1155 
1156 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1157 	if (!mr)
1158 		return ERR_PTR(-ENOMEM);
1159 
1160 	ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
1161 			      SZ_2G - SZ_4K, false);
1162 	if (ret)
1163 		goto err_out_free;
1164 
1165 	ret = erdma_create_stag(dev, &stag);
1166 	if (ret)
1167 		goto err_out_put_mtt;
1168 
1169 	mr->ibmr.lkey = mr->ibmr.rkey = stag;
1170 	mr->ibmr.pd = ibpd;
1171 	mr->mem.va = virt;
1172 	mr->mem.len = len;
1173 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
1174 	mr->valid = 1;
1175 	mr->type = ERDMA_MR_TYPE_NORMAL;
1176 
1177 	ret = regmr_cmd(dev, mr);
1178 	if (ret)
1179 		goto err_out_mr;
1180 
1181 	return &mr->ibmr;
1182 
1183 err_out_mr:
1184 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1185 		       mr->ibmr.lkey >> 8);
1186 
1187 err_out_put_mtt:
1188 	put_mtt_entries(dev, &mr->mem);
1189 
1190 err_out_free:
1191 	kfree(mr);
1192 
1193 	return ERR_PTR(ret);
1194 }
1195 
1196 int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1197 {
1198 	struct erdma_mr *mr;
1199 	struct erdma_dev *dev = to_edev(ibmr->device);
1200 	struct erdma_cmdq_dereg_mr_req req;
1201 	int ret;
1202 
1203 	mr = to_emr(ibmr);
1204 
1205 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1206 				CMDQ_OPCODE_DEREG_MR);
1207 
1208 	req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
1209 		  FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
1210 
1211 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1212 	if (ret)
1213 		return ret;
1214 
1215 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
1216 
1217 	put_mtt_entries(dev, &mr->mem);
1218 
1219 	kfree(mr);
1220 	return 0;
1221 }
1222 
1223 int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1224 {
1225 	struct erdma_cq *cq = to_ecq(ibcq);
1226 	struct erdma_dev *dev = to_edev(ibcq->device);
1227 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1228 		udata, struct erdma_ucontext, ibucontext);
1229 	int err;
1230 	struct erdma_cmdq_destroy_cq_req req;
1231 
1232 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1233 				CMDQ_OPCODE_DESTROY_CQ);
1234 	req.cqn = cq->cqn;
1235 
1236 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1237 	if (err)
1238 		return err;
1239 
1240 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
1241 		dma_free_coherent(&dev->pdev->dev,
1242 				  WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1243 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1244 	} else {
1245 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1246 		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1247 	}
1248 
1249 	xa_erase(&dev->cq_xa, cq->cqn);
1250 
1251 	return 0;
1252 }
1253 
1254 int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1255 {
1256 	struct erdma_qp *qp = to_eqp(ibqp);
1257 	struct erdma_dev *dev = to_edev(ibqp->device);
1258 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1259 		udata, struct erdma_ucontext, ibucontext);
1260 	struct erdma_qp_attrs qp_attrs;
1261 	int err;
1262 	struct erdma_cmdq_destroy_qp_req req;
1263 
1264 	down_write(&qp->state_lock);
1265 	qp_attrs.state = ERDMA_QP_STATE_ERROR;
1266 	erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
1267 	up_write(&qp->state_lock);
1268 
1269 	cancel_delayed_work_sync(&qp->reflush_dwork);
1270 
1271 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1272 				CMDQ_OPCODE_DESTROY_QP);
1273 	req.qpn = QP_ID(qp);
1274 
1275 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1276 	if (err)
1277 		return err;
1278 
1279 	erdma_qp_put(qp);
1280 	wait_for_completion(&qp->safe_free);
1281 
1282 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
1283 		vfree(qp->kern_qp.swr_tbl);
1284 		vfree(qp->kern_qp.rwr_tbl);
1285 		dma_free_coherent(
1286 			&dev->pdev->dev,
1287 			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
1288 			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
1289 		dma_free_coherent(
1290 			&dev->pdev->dev,
1291 			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
1292 			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
1293 	} else {
1294 		put_mtt_entries(dev, &qp->user_qp.sq_mem);
1295 		put_mtt_entries(dev, &qp->user_qp.rq_mem);
1296 		erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1297 	}
1298 
1299 	if (qp->cep)
1300 		erdma_cep_put(qp->cep);
1301 	xa_erase(&dev->qp_xa, QP_ID(qp));
1302 
1303 	return 0;
1304 }
1305 
1306 void erdma_qp_get_ref(struct ib_qp *ibqp)
1307 {
1308 	erdma_qp_get(to_eqp(ibqp));
1309 }
1310 
1311 void erdma_qp_put_ref(struct ib_qp *ibqp)
1312 {
1313 	erdma_qp_put(to_eqp(ibqp));
1314 }
1315 
1316 int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1317 {
1318 	struct rdma_user_mmap_entry *rdma_entry;
1319 	struct erdma_user_mmap_entry *entry;
1320 	pgprot_t prot;
1321 	int err;
1322 
1323 	rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1324 	if (!rdma_entry)
1325 		return -EINVAL;
1326 
1327 	entry = to_emmap(rdma_entry);
1328 
1329 	switch (entry->mmap_flag) {
1330 	case ERDMA_MMAP_IO_NC:
1331 		/* map doorbell. */
1332 		prot = pgprot_device(vma->vm_page_prot);
1333 		break;
1334 	default:
1335 		err = -EINVAL;
1336 		goto put_entry;
1337 	}
1338 
1339 	err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1340 				prot, rdma_entry);
1341 
1342 put_entry:
1343 	rdma_user_mmap_entry_put(rdma_entry);
1344 	return err;
1345 }
1346 
1347 void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1348 {
1349 	struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1350 
1351 	kfree(entry);
1352 }
1353 
1354 static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
1355 			      bool ext_db_en)
1356 {
1357 	struct erdma_cmdq_ext_db_req req = {};
1358 	u64 val0, val1;
1359 	int ret;
1360 
1361 	/*
1362 	 * CAP_SYS_RAWIO is required if hardware does not support extend
1363 	 * doorbell mechanism.
1364 	 */
1365 	if (!ext_db_en && !capable(CAP_SYS_RAWIO))
1366 		return -EPERM;
1367 
1368 	if (!ext_db_en) {
1369 		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET;
1370 		ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1371 		ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1372 		return 0;
1373 	}
1374 
1375 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1376 				CMDQ_OPCODE_ALLOC_DB);
1377 
1378 	req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
1379 		  FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
1380 		  FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
1381 
1382 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1);
1383 	if (ret)
1384 		return ret;
1385 
1386 	ctx->ext_db.enable = true;
1387 	ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB);
1388 	ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB);
1389 	ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB);
1390 
1391 	ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT);
1392 	ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT);
1393 	ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT);
1394 
1395 	return 0;
1396 }
1397 
1398 static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
1399 {
1400 	struct erdma_cmdq_ext_db_req req = {};
1401 	int ret;
1402 
1403 	if (!ctx->ext_db.enable)
1404 		return;
1405 
1406 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1407 				CMDQ_OPCODE_FREE_DB);
1408 
1409 	req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
1410 		  FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
1411 		  FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
1412 
1413 	req.sdb_off = ctx->ext_db.sdb_off;
1414 	req.rdb_off = ctx->ext_db.rdb_off;
1415 	req.cdb_off = ctx->ext_db.cdb_off;
1416 
1417 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1418 	if (ret)
1419 		ibdev_err_ratelimited(&dev->ibdev,
1420 				      "free db resources failed %d", ret);
1421 }
1422 
1423 static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1424 {
1425 	rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1426 	rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1427 	rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1428 }
1429 
1430 int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1431 {
1432 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1433 	struct erdma_dev *dev = to_edev(ibctx->device);
1434 	int ret;
1435 	struct erdma_uresp_alloc_ctx uresp = {};
1436 
1437 	if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1438 		ret = -ENOMEM;
1439 		goto err_out;
1440 	}
1441 
1442 	if (udata->outlen < sizeof(uresp)) {
1443 		ret = -EINVAL;
1444 		goto err_out;
1445 	}
1446 
1447 	INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1448 	mutex_init(&ctx->dbrecords_page_mutex);
1449 
1450 	ret = alloc_db_resources(dev, ctx,
1451 				 !!(dev->attrs.cap_flags &
1452 				    ERDMA_DEV_CAP_FLAGS_EXTEND_DB));
1453 	if (ret)
1454 		goto err_out;
1455 
1456 	ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1457 		ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1458 	if (!ctx->sq_db_mmap_entry) {
1459 		ret = -ENOMEM;
1460 		goto err_free_ext_db;
1461 	}
1462 
1463 	ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1464 		ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1465 	if (!ctx->rq_db_mmap_entry) {
1466 		ret = -EINVAL;
1467 		goto err_put_mmap_entries;
1468 	}
1469 
1470 	ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1471 		ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1472 	if (!ctx->cq_db_mmap_entry) {
1473 		ret = -EINVAL;
1474 		goto err_put_mmap_entries;
1475 	}
1476 
1477 	uresp.dev_id = dev->pdev->device;
1478 
1479 	ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1480 	if (ret)
1481 		goto err_put_mmap_entries;
1482 
1483 	return 0;
1484 
1485 err_put_mmap_entries:
1486 	erdma_uctx_user_mmap_entries_remove(ctx);
1487 
1488 err_free_ext_db:
1489 	free_db_resources(dev, ctx);
1490 
1491 err_out:
1492 	atomic_dec(&dev->num_ctx);
1493 	return ret;
1494 }
1495 
1496 void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1497 {
1498 	struct erdma_dev *dev = to_edev(ibctx->device);
1499 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1500 
1501 	erdma_uctx_user_mmap_entries_remove(ctx);
1502 	free_db_resources(dev, ctx);
1503 	atomic_dec(&dev->num_ctx);
1504 }
1505 
1506 static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
1507 	[IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
1508 	[IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
1509 	[IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
1510 	[IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
1511 	[IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
1512 	[IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
1513 	[IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
1514 };
1515 
1516 int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1517 		    struct ib_udata *udata)
1518 {
1519 	struct erdma_qp_attrs new_attrs;
1520 	enum erdma_qp_attr_mask erdma_attr_mask = 0;
1521 	struct erdma_qp *qp = to_eqp(ibqp);
1522 	int ret = 0;
1523 
1524 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1525 		return -EOPNOTSUPP;
1526 
1527 	memset(&new_attrs, 0, sizeof(new_attrs));
1528 
1529 	if (attr_mask & IB_QP_STATE) {
1530 		new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
1531 
1532 		erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
1533 	}
1534 
1535 	down_write(&qp->state_lock);
1536 
1537 	ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
1538 
1539 	up_write(&qp->state_lock);
1540 
1541 	return ret;
1542 }
1543 
1544 int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1545 		   int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1546 {
1547 	struct erdma_qp *qp;
1548 	struct erdma_dev *dev;
1549 
1550 	if (ibqp && qp_attr && qp_init_attr) {
1551 		qp = to_eqp(ibqp);
1552 		dev = to_edev(ibqp->device);
1553 	} else {
1554 		return -EINVAL;
1555 	}
1556 
1557 	qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1558 	qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1559 
1560 	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1561 	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1562 	qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1563 	qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1564 
1565 	qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1566 	qp_attr->max_rd_atomic = qp->attrs.irq_size;
1567 	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1568 
1569 	qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1570 				   IB_ACCESS_REMOTE_WRITE |
1571 				   IB_ACCESS_REMOTE_READ;
1572 
1573 	qp_init_attr->cap = qp_attr->cap;
1574 
1575 	return 0;
1576 }
1577 
1578 static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1579 			      struct erdma_ureq_create_cq *ureq)
1580 {
1581 	int ret;
1582 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1583 
1584 	ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va,
1585 			      ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1586 			      true);
1587 	if (ret)
1588 		return ret;
1589 
1590 	ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1591 				       &cq->user_cq.user_dbr_page,
1592 				       &cq->user_cq.db_info_dma_addr);
1593 	if (ret)
1594 		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1595 
1596 	return ret;
1597 }
1598 
1599 static int erdma_init_kernel_cq(struct erdma_cq *cq)
1600 {
1601 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1602 
1603 	cq->kern_cq.qbuf =
1604 		dma_alloc_coherent(&dev->pdev->dev,
1605 				   WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1606 				   &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1607 	if (!cq->kern_cq.qbuf)
1608 		return -ENOMEM;
1609 
1610 	cq->kern_cq.db_record =
1611 		(u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
1612 	spin_lock_init(&cq->kern_cq.lock);
1613 	/* use default cqdb addr */
1614 	cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1615 
1616 	return 0;
1617 }
1618 
1619 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1620 		    struct ib_udata *udata)
1621 {
1622 	struct erdma_cq *cq = to_ecq(ibcq);
1623 	struct erdma_dev *dev = to_edev(ibcq->device);
1624 	unsigned int depth = attr->cqe;
1625 	int ret;
1626 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1627 		udata, struct erdma_ucontext, ibucontext);
1628 
1629 	if (depth > dev->attrs.max_cqe)
1630 		return -EINVAL;
1631 
1632 	depth = roundup_pow_of_two(depth);
1633 	cq->ibcq.cqe = depth;
1634 	cq->depth = depth;
1635 	cq->assoc_eqn = attr->comp_vector + 1;
1636 
1637 	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1638 			      XA_LIMIT(1, dev->attrs.max_cq - 1),
1639 			      &dev->next_alloc_cqn, GFP_KERNEL);
1640 	if (ret < 0)
1641 		return ret;
1642 
1643 	if (!rdma_is_kernel_res(&ibcq->res)) {
1644 		struct erdma_ureq_create_cq ureq;
1645 		struct erdma_uresp_create_cq uresp;
1646 
1647 		ret = ib_copy_from_udata(&ureq, udata,
1648 					 min(udata->inlen, sizeof(ureq)));
1649 		if (ret)
1650 			goto err_out_xa;
1651 
1652 		ret = erdma_init_user_cq(ctx, cq, &ureq);
1653 		if (ret)
1654 			goto err_out_xa;
1655 
1656 		uresp.cq_id = cq->cqn;
1657 		uresp.num_cqe = depth;
1658 
1659 		ret = ib_copy_to_udata(udata, &uresp,
1660 				       min(sizeof(uresp), udata->outlen));
1661 		if (ret)
1662 			goto err_free_res;
1663 	} else {
1664 		ret = erdma_init_kernel_cq(cq);
1665 		if (ret)
1666 			goto err_out_xa;
1667 	}
1668 
1669 	ret = create_cq_cmd(ctx, cq);
1670 	if (ret)
1671 		goto err_free_res;
1672 
1673 	return 0;
1674 
1675 err_free_res:
1676 	if (!rdma_is_kernel_res(&ibcq->res)) {
1677 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1678 		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1679 	} else {
1680 		dma_free_coherent(&dev->pdev->dev,
1681 				  WARPPED_BUFSIZE(depth << CQE_SHIFT),
1682 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1683 	}
1684 
1685 err_out_xa:
1686 	xa_erase(&dev->cq_xa, cq->cqn);
1687 
1688 	return ret;
1689 }
1690 
1691 void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
1692 {
1693 	struct erdma_cmdq_config_mtu_req req;
1694 
1695 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1696 				CMDQ_OPCODE_CONF_MTU);
1697 	req.mtu = mtu;
1698 
1699 	erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1700 }
1701 
1702 void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
1703 {
1704 	struct ib_event event;
1705 
1706 	event.device = &dev->ibdev;
1707 	event.element.port_num = 1;
1708 	event.event = reason;
1709 
1710 	ib_dispatch_event(&event);
1711 }
1712