xref: /linux/drivers/infiniband/hw/erdma/erdma_verbs.c (revision e65e175b07bef5974045cc42238de99057669ca7)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8 /* Copyright (c) 2008-2019, IBM Corporation */
9 
10 /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
11 
12 #include <linux/vmalloc.h>
13 #include <net/addrconf.h>
14 #include <rdma/erdma-abi.h>
15 #include <rdma/ib_umem.h>
16 #include <rdma/uverbs_ioctl.h>
17 
18 #include "erdma.h"
19 #include "erdma_cm.h"
20 #include "erdma_verbs.h"
21 
22 static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp)
23 {
24 	struct erdma_cmdq_create_qp_req req;
25 	struct erdma_pd *pd = to_epd(qp->ibqp.pd);
26 	struct erdma_uqp *user_qp;
27 	u64 resp0, resp1;
28 	int err;
29 
30 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
31 				CMDQ_OPCODE_CREATE_QP);
32 
33 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
34 			      ilog2(qp->attrs.sq_size)) |
35 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
36 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
37 			      ilog2(qp->attrs.rq_size)) |
38 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
39 
40 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
41 		u32 pgsz_range = ilog2(SZ_1M) - PAGE_SHIFT;
42 
43 		req.sq_cqn_mtt_cfg =
44 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
45 				   pgsz_range) |
46 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
47 		req.rq_cqn_mtt_cfg =
48 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
49 				   pgsz_range) |
50 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
51 
52 		req.sq_mtt_cfg =
53 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
54 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
55 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
56 				   ERDMA_MR_INLINE_MTT);
57 		req.rq_mtt_cfg = req.sq_mtt_cfg;
58 
59 		req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
60 		req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
61 		req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
62 					  (qp->attrs.sq_size << SQEBB_SHIFT);
63 		req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
64 					  (qp->attrs.rq_size << RQE_SHIFT);
65 	} else {
66 		user_qp = &qp->user_qp;
67 		req.sq_cqn_mtt_cfg = FIELD_PREP(
68 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
69 			ilog2(user_qp->sq_mtt.page_size) - PAGE_SHIFT);
70 		req.sq_cqn_mtt_cfg |=
71 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
72 
73 		req.rq_cqn_mtt_cfg = FIELD_PREP(
74 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
75 			ilog2(user_qp->rq_mtt.page_size) - PAGE_SHIFT);
76 		req.rq_cqn_mtt_cfg |=
77 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
78 
79 		req.sq_mtt_cfg = user_qp->sq_mtt.page_offset;
80 		req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
81 					     user_qp->sq_mtt.mtt_nents) |
82 				  FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
83 					     user_qp->sq_mtt.mtt_type);
84 
85 		req.rq_mtt_cfg = user_qp->rq_mtt.page_offset;
86 		req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
87 					     user_qp->rq_mtt.mtt_nents) |
88 				  FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
89 					     user_qp->rq_mtt.mtt_type);
90 
91 		req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0];
92 		req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0];
93 
94 		req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
95 		req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
96 	}
97 
98 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
99 				  &resp1);
100 	if (!err)
101 		qp->attrs.cookie =
102 			FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
103 
104 	return err;
105 }
106 
107 static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
108 {
109 	struct erdma_cmdq_reg_mr_req req;
110 	struct erdma_pd *pd = to_epd(mr->ibmr.pd);
111 	u64 *phy_addr;
112 	int i;
113 
114 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
115 
116 	req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
117 		   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
118 		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
119 	req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
120 		   FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
121 		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
122 	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
123 			      ilog2(mr->mem.page_size)) |
124 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
125 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
126 
127 	if (mr->type == ERDMA_MR_TYPE_DMA)
128 		goto post_cmd;
129 
130 	if (mr->type == ERDMA_MR_TYPE_NORMAL) {
131 		req.start_va = mr->mem.va;
132 		req.size = mr->mem.len;
133 	}
134 
135 	if (mr->type == ERDMA_MR_TYPE_FRMR ||
136 	    mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) {
137 		phy_addr = req.phy_addr;
138 		*phy_addr = mr->mem.mtt_entry[0];
139 	} else {
140 		phy_addr = req.phy_addr;
141 		for (i = 0; i < mr->mem.mtt_nents; i++)
142 			*phy_addr++ = mr->mem.mtt_entry[i];
143 	}
144 
145 post_cmd:
146 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
147 }
148 
149 static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq)
150 {
151 	struct erdma_cmdq_create_cq_req req;
152 	u32 page_size;
153 	struct erdma_mem *mtt;
154 
155 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
156 				CMDQ_OPCODE_CREATE_CQ);
157 
158 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
159 		   FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
160 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
161 
162 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
163 		page_size = SZ_32M;
164 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
165 				       ilog2(page_size) - PAGE_SHIFT);
166 		req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
167 		req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
168 
169 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
170 			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
171 				       ERDMA_MR_INLINE_MTT);
172 
173 		req.first_page_offset = 0;
174 		req.cq_db_info_addr =
175 			cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
176 	} else {
177 		mtt = &cq->user_cq.qbuf_mtt;
178 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
179 				       ilog2(mtt->page_size) - PAGE_SHIFT);
180 		if (mtt->mtt_nents == 1) {
181 			req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf);
182 			req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf);
183 		} else {
184 			req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]);
185 			req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]);
186 		}
187 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
188 				       mtt->mtt_nents);
189 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
190 				       mtt->mtt_type);
191 
192 		req.first_page_offset = mtt->page_offset;
193 		req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
194 	}
195 
196 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
197 }
198 
199 static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
200 {
201 	int idx;
202 	unsigned long flags;
203 
204 	spin_lock_irqsave(&res_cb->lock, flags);
205 	idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
206 				 res_cb->next_alloc_idx);
207 	if (idx == res_cb->max_cap) {
208 		idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
209 		if (idx == res_cb->max_cap) {
210 			res_cb->next_alloc_idx = 1;
211 			spin_unlock_irqrestore(&res_cb->lock, flags);
212 			return -ENOSPC;
213 		}
214 	}
215 
216 	set_bit(idx, res_cb->bitmap);
217 	res_cb->next_alloc_idx = idx + 1;
218 	spin_unlock_irqrestore(&res_cb->lock, flags);
219 
220 	return idx;
221 }
222 
223 static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
224 {
225 	unsigned long flags;
226 	u32 used;
227 
228 	spin_lock_irqsave(&res_cb->lock, flags);
229 	used = __test_and_clear_bit(idx, res_cb->bitmap);
230 	spin_unlock_irqrestore(&res_cb->lock, flags);
231 	WARN_ON(!used);
232 }
233 
234 static struct rdma_user_mmap_entry *
235 erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
236 			     u32 size, u8 mmap_flag, u64 *mmap_offset)
237 {
238 	struct erdma_user_mmap_entry *entry =
239 		kzalloc(sizeof(*entry), GFP_KERNEL);
240 	int ret;
241 
242 	if (!entry)
243 		return NULL;
244 
245 	entry->address = (u64)address;
246 	entry->mmap_flag = mmap_flag;
247 
248 	size = PAGE_ALIGN(size);
249 
250 	ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
251 					  size);
252 	if (ret) {
253 		kfree(entry);
254 		return NULL;
255 	}
256 
257 	*mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
258 
259 	return &entry->rdma_entry;
260 }
261 
262 int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
263 		       struct ib_udata *unused)
264 {
265 	struct erdma_dev *dev = to_edev(ibdev);
266 
267 	memset(attr, 0, sizeof(*attr));
268 
269 	attr->max_mr_size = dev->attrs.max_mr_size;
270 	attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
271 	attr->vendor_part_id = dev->pdev->device;
272 	attr->hw_ver = dev->pdev->revision;
273 	attr->max_qp = dev->attrs.max_qp - 1;
274 	attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
275 	attr->max_qp_rd_atom = dev->attrs.max_ord;
276 	attr->max_qp_init_rd_atom = dev->attrs.max_ird;
277 	attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
278 	attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
279 	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
280 	ibdev->local_dma_lkey = dev->attrs.local_dma_key;
281 	attr->max_send_sge = dev->attrs.max_send_sge;
282 	attr->max_recv_sge = dev->attrs.max_recv_sge;
283 	attr->max_sge_rd = dev->attrs.max_sge_rd;
284 	attr->max_cq = dev->attrs.max_cq - 1;
285 	attr->max_cqe = dev->attrs.max_cqe;
286 	attr->max_mr = dev->attrs.max_mr;
287 	attr->max_pd = dev->attrs.max_pd;
288 	attr->max_mw = dev->attrs.max_mw;
289 	attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
290 	attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
291 
292 	if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
293 		attr->atomic_cap = IB_ATOMIC_GLOB;
294 
295 	attr->fw_ver = dev->attrs.fw_version;
296 
297 	if (dev->netdev)
298 		addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
299 				    dev->netdev->dev_addr);
300 
301 	return 0;
302 }
303 
304 int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
305 		    union ib_gid *gid)
306 {
307 	struct erdma_dev *dev = to_edev(ibdev);
308 
309 	memset(gid, 0, sizeof(*gid));
310 	ether_addr_copy(gid->raw, dev->attrs.peer_addr);
311 
312 	return 0;
313 }
314 
315 int erdma_query_port(struct ib_device *ibdev, u32 port,
316 		     struct ib_port_attr *attr)
317 {
318 	struct erdma_dev *dev = to_edev(ibdev);
319 	struct net_device *ndev = dev->netdev;
320 
321 	memset(attr, 0, sizeof(*attr));
322 
323 	attr->gid_tbl_len = 1;
324 	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
325 	attr->max_msg_sz = -1;
326 
327 	if (!ndev)
328 		goto out;
329 
330 	ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
331 	attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
332 	attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
333 	if (netif_running(ndev) && netif_carrier_ok(ndev))
334 		dev->state = IB_PORT_ACTIVE;
335 	else
336 		dev->state = IB_PORT_DOWN;
337 	attr->state = dev->state;
338 
339 out:
340 	if (dev->state == IB_PORT_ACTIVE)
341 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
342 	else
343 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
344 
345 	return 0;
346 }
347 
348 int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
349 			     struct ib_port_immutable *port_immutable)
350 {
351 	port_immutable->gid_tbl_len = 1;
352 	port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
353 
354 	return 0;
355 }
356 
357 int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
358 {
359 	struct erdma_pd *pd = to_epd(ibpd);
360 	struct erdma_dev *dev = to_edev(ibpd->device);
361 	int pdn;
362 
363 	pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
364 	if (pdn < 0)
365 		return pdn;
366 
367 	pd->pdn = pdn;
368 
369 	return 0;
370 }
371 
372 int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
373 {
374 	struct erdma_pd *pd = to_epd(ibpd);
375 	struct erdma_dev *dev = to_edev(ibpd->device);
376 
377 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
378 
379 	return 0;
380 }
381 
382 static void erdma_flush_worker(struct work_struct *work)
383 {
384 	struct delayed_work *dwork = to_delayed_work(work);
385 	struct erdma_qp *qp =
386 		container_of(dwork, struct erdma_qp, reflush_dwork);
387 	struct erdma_cmdq_reflush_req req;
388 
389 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
390 				CMDQ_OPCODE_REFLUSH);
391 	req.qpn = QP_ID(qp);
392 	req.sq_pi = qp->kern_qp.sq_pi;
393 	req.rq_pi = qp->kern_qp.rq_pi;
394 	erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
395 }
396 
397 static int erdma_qp_validate_cap(struct erdma_dev *dev,
398 				 struct ib_qp_init_attr *attrs)
399 {
400 	if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
401 	    (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
402 	    (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
403 	    (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
404 	    (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
405 	    !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
406 		return -EINVAL;
407 	}
408 
409 	return 0;
410 }
411 
412 static int erdma_qp_validate_attr(struct erdma_dev *dev,
413 				  struct ib_qp_init_attr *attrs)
414 {
415 	if (attrs->qp_type != IB_QPT_RC)
416 		return -EOPNOTSUPP;
417 
418 	if (attrs->srq)
419 		return -EOPNOTSUPP;
420 
421 	if (!attrs->send_cq || !attrs->recv_cq)
422 		return -EOPNOTSUPP;
423 
424 	return 0;
425 }
426 
427 static void free_kernel_qp(struct erdma_qp *qp)
428 {
429 	struct erdma_dev *dev = qp->dev;
430 
431 	vfree(qp->kern_qp.swr_tbl);
432 	vfree(qp->kern_qp.rwr_tbl);
433 
434 	if (qp->kern_qp.sq_buf)
435 		dma_free_coherent(
436 			&dev->pdev->dev,
437 			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
438 			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
439 
440 	if (qp->kern_qp.rq_buf)
441 		dma_free_coherent(
442 			&dev->pdev->dev,
443 			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
444 			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
445 }
446 
447 static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
448 			  struct ib_qp_init_attr *attrs)
449 {
450 	struct erdma_kqp *kqp = &qp->kern_qp;
451 	int size;
452 
453 	if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
454 		kqp->sig_all = 1;
455 
456 	kqp->sq_pi = 0;
457 	kqp->sq_ci = 0;
458 	kqp->rq_pi = 0;
459 	kqp->rq_ci = 0;
460 	kqp->hw_sq_db =
461 		dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
462 	kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
463 
464 	kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64));
465 	kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64));
466 	if (!kqp->swr_tbl || !kqp->rwr_tbl)
467 		goto err_out;
468 
469 	size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
470 	kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
471 					 &kqp->sq_buf_dma_addr, GFP_KERNEL);
472 	if (!kqp->sq_buf)
473 		goto err_out;
474 
475 	size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
476 	kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
477 					 &kqp->rq_buf_dma_addr, GFP_KERNEL);
478 	if (!kqp->rq_buf)
479 		goto err_out;
480 
481 	kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
482 	kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
483 
484 	return 0;
485 
486 err_out:
487 	free_kernel_qp(qp);
488 	return -ENOMEM;
489 }
490 
491 static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
492 			   u64 start, u64 len, int access, u64 virt,
493 			   unsigned long req_page_size, u8 force_indirect_mtt)
494 {
495 	struct ib_block_iter biter;
496 	uint64_t *phy_addr = NULL;
497 	int ret = 0;
498 
499 	mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
500 	if (IS_ERR(mem->umem)) {
501 		ret = PTR_ERR(mem->umem);
502 		mem->umem = NULL;
503 		return ret;
504 	}
505 
506 	mem->va = virt;
507 	mem->len = len;
508 	mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
509 	mem->page_offset = start & (mem->page_size - 1);
510 	mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
511 	mem->page_cnt = mem->mtt_nents;
512 
513 	if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES ||
514 	    force_indirect_mtt) {
515 		mem->mtt_type = ERDMA_MR_INDIRECT_MTT;
516 		mem->mtt_buf =
517 			alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL);
518 		if (!mem->mtt_buf) {
519 			ret = -ENOMEM;
520 			goto error_ret;
521 		}
522 		phy_addr = mem->mtt_buf;
523 	} else {
524 		mem->mtt_type = ERDMA_MR_INLINE_MTT;
525 		phy_addr = mem->mtt_entry;
526 	}
527 
528 	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) {
529 		*phy_addr = rdma_block_iter_dma_address(&biter);
530 		phy_addr++;
531 	}
532 
533 	if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) {
534 		mem->mtt_entry[0] =
535 			dma_map_single(&dev->pdev->dev, mem->mtt_buf,
536 				       MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
537 		if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) {
538 			free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
539 			mem->mtt_buf = NULL;
540 			ret = -ENOMEM;
541 			goto error_ret;
542 		}
543 	}
544 
545 	return 0;
546 
547 error_ret:
548 	if (mem->umem) {
549 		ib_umem_release(mem->umem);
550 		mem->umem = NULL;
551 	}
552 
553 	return ret;
554 }
555 
556 static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
557 {
558 	if (mem->mtt_buf) {
559 		dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0],
560 				 MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
561 		free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
562 	}
563 
564 	if (mem->umem) {
565 		ib_umem_release(mem->umem);
566 		mem->umem = NULL;
567 	}
568 }
569 
570 static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
571 				    u64 dbrecords_va,
572 				    struct erdma_user_dbrecords_page **dbr_page,
573 				    dma_addr_t *dma_addr)
574 {
575 	struct erdma_user_dbrecords_page *page = NULL;
576 	int rv = 0;
577 
578 	mutex_lock(&ctx->dbrecords_page_mutex);
579 
580 	list_for_each_entry(page, &ctx->dbrecords_page_list, list)
581 		if (page->va == (dbrecords_va & PAGE_MASK))
582 			goto found;
583 
584 	page = kmalloc(sizeof(*page), GFP_KERNEL);
585 	if (!page) {
586 		rv = -ENOMEM;
587 		goto out;
588 	}
589 
590 	page->va = (dbrecords_va & PAGE_MASK);
591 	page->refcnt = 0;
592 
593 	page->umem = ib_umem_get(ctx->ibucontext.device,
594 				 dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
595 	if (IS_ERR(page->umem)) {
596 		rv = PTR_ERR(page->umem);
597 		kfree(page);
598 		goto out;
599 	}
600 
601 	list_add(&page->list, &ctx->dbrecords_page_list);
602 
603 found:
604 	*dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
605 		    (dbrecords_va & ~PAGE_MASK);
606 	*dbr_page = page;
607 	page->refcnt++;
608 
609 out:
610 	mutex_unlock(&ctx->dbrecords_page_mutex);
611 	return rv;
612 }
613 
614 static void
615 erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
616 			   struct erdma_user_dbrecords_page **dbr_page)
617 {
618 	if (!ctx || !(*dbr_page))
619 		return;
620 
621 	mutex_lock(&ctx->dbrecords_page_mutex);
622 	if (--(*dbr_page)->refcnt == 0) {
623 		list_del(&(*dbr_page)->list);
624 		ib_umem_release((*dbr_page)->umem);
625 		kfree(*dbr_page);
626 	}
627 
628 	*dbr_page = NULL;
629 	mutex_unlock(&ctx->dbrecords_page_mutex);
630 }
631 
632 static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
633 			u64 va, u32 len, u64 db_info_va)
634 {
635 	dma_addr_t db_info_dma_addr;
636 	u32 rq_offset;
637 	int ret;
638 
639 	if (len < (PAGE_ALIGN(qp->attrs.sq_size * SQEBB_SIZE) +
640 		   qp->attrs.rq_size * RQE_SIZE))
641 		return -EINVAL;
642 
643 	ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va,
644 			      qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
645 			      (SZ_1M - SZ_4K), 1);
646 	if (ret)
647 		return ret;
648 
649 	rq_offset = PAGE_ALIGN(qp->attrs.sq_size << SQEBB_SHIFT);
650 	qp->user_qp.rq_offset = rq_offset;
651 
652 	ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset,
653 			      qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
654 			      (SZ_1M - SZ_4K), 1);
655 	if (ret)
656 		goto put_sq_mtt;
657 
658 	ret = erdma_map_user_dbrecords(uctx, db_info_va,
659 				       &qp->user_qp.user_dbr_page,
660 				       &db_info_dma_addr);
661 	if (ret)
662 		goto put_rq_mtt;
663 
664 	qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
665 	qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
666 
667 	return 0;
668 
669 put_rq_mtt:
670 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
671 
672 put_sq_mtt:
673 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
674 
675 	return ret;
676 }
677 
678 static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
679 {
680 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
681 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
682 	erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
683 }
684 
685 int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
686 		    struct ib_udata *udata)
687 {
688 	struct erdma_qp *qp = to_eqp(ibqp);
689 	struct erdma_dev *dev = to_edev(ibqp->device);
690 	struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
691 		udata, struct erdma_ucontext, ibucontext);
692 	struct erdma_ureq_create_qp ureq;
693 	struct erdma_uresp_create_qp uresp;
694 	int ret;
695 
696 	ret = erdma_qp_validate_cap(dev, attrs);
697 	if (ret)
698 		goto err_out;
699 
700 	ret = erdma_qp_validate_attr(dev, attrs);
701 	if (ret)
702 		goto err_out;
703 
704 	qp->scq = to_ecq(attrs->send_cq);
705 	qp->rcq = to_ecq(attrs->recv_cq);
706 	qp->dev = dev;
707 	qp->attrs.cc = dev->attrs.cc;
708 
709 	init_rwsem(&qp->state_lock);
710 	kref_init(&qp->ref);
711 	init_completion(&qp->safe_free);
712 
713 	ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
714 			      XA_LIMIT(1, dev->attrs.max_qp - 1),
715 			      &dev->next_alloc_qpn, GFP_KERNEL);
716 	if (ret < 0) {
717 		ret = -ENOMEM;
718 		goto err_out;
719 	}
720 
721 	qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
722 					       ERDMA_MAX_WQEBB_PER_SQE);
723 	qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
724 
725 	if (uctx) {
726 		ret = ib_copy_from_udata(&ureq, udata,
727 					 min(sizeof(ureq), udata->inlen));
728 		if (ret)
729 			goto err_out_xa;
730 
731 		ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
732 				   ureq.db_record_va);
733 		if (ret)
734 			goto err_out_xa;
735 
736 		memset(&uresp, 0, sizeof(uresp));
737 
738 		uresp.num_sqe = qp->attrs.sq_size;
739 		uresp.num_rqe = qp->attrs.rq_size;
740 		uresp.qp_id = QP_ID(qp);
741 		uresp.rq_offset = qp->user_qp.rq_offset;
742 
743 		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
744 		if (ret)
745 			goto err_out_cmd;
746 	} else {
747 		init_kernel_qp(dev, qp, attrs);
748 	}
749 
750 	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
751 	qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
752 	qp->attrs.state = ERDMA_QP_STATE_IDLE;
753 	INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
754 
755 	ret = create_qp_cmd(dev, qp);
756 	if (ret)
757 		goto err_out_cmd;
758 
759 	spin_lock_init(&qp->lock);
760 
761 	return 0;
762 
763 err_out_cmd:
764 	if (uctx)
765 		free_user_qp(qp, uctx);
766 	else
767 		free_kernel_qp(qp);
768 err_out_xa:
769 	xa_erase(&dev->qp_xa, QP_ID(qp));
770 err_out:
771 	return ret;
772 }
773 
774 static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
775 {
776 	int stag_idx;
777 
778 	stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
779 	if (stag_idx < 0)
780 		return stag_idx;
781 
782 	/* For now, we always let key field be zero. */
783 	*stag = (stag_idx << 8);
784 
785 	return 0;
786 }
787 
788 struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
789 {
790 	struct erdma_dev *dev = to_edev(ibpd->device);
791 	struct erdma_mr *mr;
792 	u32 stag;
793 	int ret;
794 
795 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
796 	if (!mr)
797 		return ERR_PTR(-ENOMEM);
798 
799 	ret = erdma_create_stag(dev, &stag);
800 	if (ret)
801 		goto out_free;
802 
803 	mr->type = ERDMA_MR_TYPE_DMA;
804 
805 	mr->ibmr.lkey = stag;
806 	mr->ibmr.rkey = stag;
807 	mr->ibmr.pd = ibpd;
808 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
809 	ret = regmr_cmd(dev, mr);
810 	if (ret)
811 		goto out_remove_stag;
812 
813 	return &mr->ibmr;
814 
815 out_remove_stag:
816 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
817 		       mr->ibmr.lkey >> 8);
818 
819 out_free:
820 	kfree(mr);
821 
822 	return ERR_PTR(ret);
823 }
824 
825 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
826 				u32 max_num_sg)
827 {
828 	struct erdma_mr *mr;
829 	struct erdma_dev *dev = to_edev(ibpd->device);
830 	int ret;
831 	u32 stag;
832 
833 	if (mr_type != IB_MR_TYPE_MEM_REG)
834 		return ERR_PTR(-EOPNOTSUPP);
835 
836 	if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
837 		return ERR_PTR(-EINVAL);
838 
839 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
840 	if (!mr)
841 		return ERR_PTR(-ENOMEM);
842 
843 	ret = erdma_create_stag(dev, &stag);
844 	if (ret)
845 		goto out_free;
846 
847 	mr->type = ERDMA_MR_TYPE_FRMR;
848 
849 	mr->ibmr.lkey = stag;
850 	mr->ibmr.rkey = stag;
851 	mr->ibmr.pd = ibpd;
852 	/* update it in FRMR. */
853 	mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
854 		     ERDMA_MR_ACC_RW;
855 
856 	mr->mem.page_size = PAGE_SIZE; /* update it later. */
857 	mr->mem.page_cnt = max_num_sg;
858 	mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT;
859 	mr->mem.mtt_buf =
860 		alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL);
861 	if (!mr->mem.mtt_buf) {
862 		ret = -ENOMEM;
863 		goto out_remove_stag;
864 	}
865 
866 	mr->mem.mtt_entry[0] =
867 		dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf,
868 			       MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
869 	if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) {
870 		ret = -ENOMEM;
871 		goto out_free_mtt;
872 	}
873 
874 	ret = regmr_cmd(dev, mr);
875 	if (ret)
876 		goto out_dma_unmap;
877 
878 	return &mr->ibmr;
879 
880 out_dma_unmap:
881 	dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0],
882 			 MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
883 out_free_mtt:
884 	free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt));
885 
886 out_remove_stag:
887 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
888 		       mr->ibmr.lkey >> 8);
889 
890 out_free:
891 	kfree(mr);
892 
893 	return ERR_PTR(ret);
894 }
895 
896 static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
897 {
898 	struct erdma_mr *mr = to_emr(ibmr);
899 
900 	if (mr->mem.mtt_nents >= mr->mem.page_cnt)
901 		return -1;
902 
903 	*((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr;
904 	mr->mem.mtt_nents++;
905 
906 	return 0;
907 }
908 
909 int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
910 		    unsigned int *sg_offset)
911 {
912 	struct erdma_mr *mr = to_emr(ibmr);
913 	int num;
914 
915 	mr->mem.mtt_nents = 0;
916 
917 	num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
918 			     erdma_set_page);
919 
920 	return num;
921 }
922 
923 struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
924 				u64 virt, int access, struct ib_udata *udata)
925 {
926 	struct erdma_mr *mr = NULL;
927 	struct erdma_dev *dev = to_edev(ibpd->device);
928 	u32 stag;
929 	int ret;
930 
931 	if (!len || len > dev->attrs.max_mr_size)
932 		return ERR_PTR(-EINVAL);
933 
934 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
935 	if (!mr)
936 		return ERR_PTR(-ENOMEM);
937 
938 	ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
939 			      SZ_2G - SZ_4K, 0);
940 	if (ret)
941 		goto err_out_free;
942 
943 	ret = erdma_create_stag(dev, &stag);
944 	if (ret)
945 		goto err_out_put_mtt;
946 
947 	mr->ibmr.lkey = mr->ibmr.rkey = stag;
948 	mr->ibmr.pd = ibpd;
949 	mr->mem.va = virt;
950 	mr->mem.len = len;
951 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
952 	mr->valid = 1;
953 	mr->type = ERDMA_MR_TYPE_NORMAL;
954 
955 	ret = regmr_cmd(dev, mr);
956 	if (ret)
957 		goto err_out_mr;
958 
959 	return &mr->ibmr;
960 
961 err_out_mr:
962 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
963 		       mr->ibmr.lkey >> 8);
964 
965 err_out_put_mtt:
966 	put_mtt_entries(dev, &mr->mem);
967 
968 err_out_free:
969 	kfree(mr);
970 
971 	return ERR_PTR(ret);
972 }
973 
974 int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
975 {
976 	struct erdma_mr *mr;
977 	struct erdma_dev *dev = to_edev(ibmr->device);
978 	struct erdma_cmdq_dereg_mr_req req;
979 	int ret;
980 
981 	mr = to_emr(ibmr);
982 
983 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
984 				CMDQ_OPCODE_DEREG_MR);
985 
986 	req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
987 		  FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
988 
989 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
990 	if (ret)
991 		return ret;
992 
993 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
994 
995 	put_mtt_entries(dev, &mr->mem);
996 
997 	kfree(mr);
998 	return 0;
999 }
1000 
1001 int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1002 {
1003 	struct erdma_cq *cq = to_ecq(ibcq);
1004 	struct erdma_dev *dev = to_edev(ibcq->device);
1005 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1006 		udata, struct erdma_ucontext, ibucontext);
1007 	int err;
1008 	struct erdma_cmdq_destroy_cq_req req;
1009 
1010 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1011 				CMDQ_OPCODE_DESTROY_CQ);
1012 	req.cqn = cq->cqn;
1013 
1014 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1015 	if (err)
1016 		return err;
1017 
1018 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
1019 		dma_free_coherent(&dev->pdev->dev,
1020 				  WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1021 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1022 	} else {
1023 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1024 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1025 	}
1026 
1027 	xa_erase(&dev->cq_xa, cq->cqn);
1028 
1029 	return 0;
1030 }
1031 
1032 int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1033 {
1034 	struct erdma_qp *qp = to_eqp(ibqp);
1035 	struct erdma_dev *dev = to_edev(ibqp->device);
1036 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1037 		udata, struct erdma_ucontext, ibucontext);
1038 	struct erdma_qp_attrs qp_attrs;
1039 	int err;
1040 	struct erdma_cmdq_destroy_qp_req req;
1041 
1042 	down_write(&qp->state_lock);
1043 	qp_attrs.state = ERDMA_QP_STATE_ERROR;
1044 	erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
1045 	up_write(&qp->state_lock);
1046 
1047 	cancel_delayed_work_sync(&qp->reflush_dwork);
1048 
1049 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1050 				CMDQ_OPCODE_DESTROY_QP);
1051 	req.qpn = QP_ID(qp);
1052 
1053 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1054 	if (err)
1055 		return err;
1056 
1057 	erdma_qp_put(qp);
1058 	wait_for_completion(&qp->safe_free);
1059 
1060 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
1061 		vfree(qp->kern_qp.swr_tbl);
1062 		vfree(qp->kern_qp.rwr_tbl);
1063 		dma_free_coherent(
1064 			&dev->pdev->dev,
1065 			WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
1066 			qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
1067 		dma_free_coherent(
1068 			&dev->pdev->dev,
1069 			WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
1070 			qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
1071 	} else {
1072 		put_mtt_entries(dev, &qp->user_qp.sq_mtt);
1073 		put_mtt_entries(dev, &qp->user_qp.rq_mtt);
1074 		erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1075 	}
1076 
1077 	if (qp->cep)
1078 		erdma_cep_put(qp->cep);
1079 	xa_erase(&dev->qp_xa, QP_ID(qp));
1080 
1081 	return 0;
1082 }
1083 
1084 void erdma_qp_get_ref(struct ib_qp *ibqp)
1085 {
1086 	erdma_qp_get(to_eqp(ibqp));
1087 }
1088 
1089 void erdma_qp_put_ref(struct ib_qp *ibqp)
1090 {
1091 	erdma_qp_put(to_eqp(ibqp));
1092 }
1093 
1094 int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1095 {
1096 	struct rdma_user_mmap_entry *rdma_entry;
1097 	struct erdma_user_mmap_entry *entry;
1098 	pgprot_t prot;
1099 	int err;
1100 
1101 	rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1102 	if (!rdma_entry)
1103 		return -EINVAL;
1104 
1105 	entry = to_emmap(rdma_entry);
1106 
1107 	switch (entry->mmap_flag) {
1108 	case ERDMA_MMAP_IO_NC:
1109 		/* map doorbell. */
1110 		prot = pgprot_device(vma->vm_page_prot);
1111 		break;
1112 	default:
1113 		return -EINVAL;
1114 	}
1115 
1116 	err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1117 				prot, rdma_entry);
1118 
1119 	rdma_user_mmap_entry_put(rdma_entry);
1120 	return err;
1121 }
1122 
1123 void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1124 {
1125 	struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1126 
1127 	kfree(entry);
1128 }
1129 
1130 #define ERDMA_SDB_PAGE 0
1131 #define ERDMA_SDB_ENTRY 1
1132 #define ERDMA_SDB_SHARED 2
1133 
1134 static void alloc_db_resources(struct erdma_dev *dev,
1135 			       struct erdma_ucontext *ctx)
1136 {
1137 	u32 bitmap_idx;
1138 	struct erdma_devattr *attrs = &dev->attrs;
1139 
1140 	if (attrs->disable_dwqe)
1141 		goto alloc_normal_db;
1142 
1143 	/* Try to alloc independent SDB page. */
1144 	spin_lock(&dev->db_bitmap_lock);
1145 	bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages);
1146 	if (bitmap_idx != attrs->dwqe_pages) {
1147 		set_bit(bitmap_idx, dev->sdb_page);
1148 		spin_unlock(&dev->db_bitmap_lock);
1149 
1150 		ctx->sdb_type = ERDMA_SDB_PAGE;
1151 		ctx->sdb_idx = bitmap_idx;
1152 		ctx->sdb_page_idx = bitmap_idx;
1153 		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1154 			   (bitmap_idx << PAGE_SHIFT);
1155 		ctx->sdb_page_off = 0;
1156 
1157 		return;
1158 	}
1159 
1160 	bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries);
1161 	if (bitmap_idx != attrs->dwqe_entries) {
1162 		set_bit(bitmap_idx, dev->sdb_entry);
1163 		spin_unlock(&dev->db_bitmap_lock);
1164 
1165 		ctx->sdb_type = ERDMA_SDB_ENTRY;
1166 		ctx->sdb_idx = bitmap_idx;
1167 		ctx->sdb_page_idx = attrs->dwqe_pages +
1168 				    bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1169 		ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1170 
1171 		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1172 			   (ctx->sdb_page_idx << PAGE_SHIFT);
1173 
1174 		return;
1175 	}
1176 
1177 	spin_unlock(&dev->db_bitmap_lock);
1178 
1179 alloc_normal_db:
1180 	ctx->sdb_type = ERDMA_SDB_SHARED;
1181 	ctx->sdb_idx = 0;
1182 	ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX;
1183 	ctx->sdb_page_off = 0;
1184 
1185 	ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT);
1186 }
1187 
1188 static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1189 {
1190 	rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1191 	rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1192 	rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1193 }
1194 
1195 int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1196 {
1197 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1198 	struct erdma_dev *dev = to_edev(ibctx->device);
1199 	int ret;
1200 	struct erdma_uresp_alloc_ctx uresp = {};
1201 
1202 	if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1203 		ret = -ENOMEM;
1204 		goto err_out;
1205 	}
1206 
1207 	INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1208 	mutex_init(&ctx->dbrecords_page_mutex);
1209 
1210 	alloc_db_resources(dev, ctx);
1211 
1212 	ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1213 	ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1214 
1215 	if (udata->outlen < sizeof(uresp)) {
1216 		ret = -EINVAL;
1217 		goto err_out;
1218 	}
1219 
1220 	ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1221 		ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1222 	if (!ctx->sq_db_mmap_entry) {
1223 		ret = -ENOMEM;
1224 		goto err_out;
1225 	}
1226 
1227 	ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1228 		ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1229 	if (!ctx->rq_db_mmap_entry) {
1230 		ret = -EINVAL;
1231 		goto err_out;
1232 	}
1233 
1234 	ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1235 		ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1236 	if (!ctx->cq_db_mmap_entry) {
1237 		ret = -EINVAL;
1238 		goto err_out;
1239 	}
1240 
1241 	uresp.dev_id = dev->pdev->device;
1242 	uresp.sdb_type = ctx->sdb_type;
1243 	uresp.sdb_offset = ctx->sdb_page_off;
1244 
1245 	ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1246 	if (ret)
1247 		goto err_out;
1248 
1249 	return 0;
1250 
1251 err_out:
1252 	erdma_uctx_user_mmap_entries_remove(ctx);
1253 	atomic_dec(&dev->num_ctx);
1254 	return ret;
1255 }
1256 
1257 void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1258 {
1259 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1260 	struct erdma_dev *dev = to_edev(ibctx->device);
1261 
1262 	spin_lock(&dev->db_bitmap_lock);
1263 	if (ctx->sdb_type == ERDMA_SDB_PAGE)
1264 		clear_bit(ctx->sdb_idx, dev->sdb_page);
1265 	else if (ctx->sdb_type == ERDMA_SDB_ENTRY)
1266 		clear_bit(ctx->sdb_idx, dev->sdb_entry);
1267 
1268 	erdma_uctx_user_mmap_entries_remove(ctx);
1269 
1270 	spin_unlock(&dev->db_bitmap_lock);
1271 
1272 	atomic_dec(&dev->num_ctx);
1273 }
1274 
1275 static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
1276 	[IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
1277 	[IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
1278 	[IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
1279 	[IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
1280 	[IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
1281 	[IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
1282 	[IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
1283 };
1284 
1285 int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1286 		    struct ib_udata *udata)
1287 {
1288 	struct erdma_qp_attrs new_attrs;
1289 	enum erdma_qp_attr_mask erdma_attr_mask = 0;
1290 	struct erdma_qp *qp = to_eqp(ibqp);
1291 	int ret = 0;
1292 
1293 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1294 		return -EOPNOTSUPP;
1295 
1296 	memset(&new_attrs, 0, sizeof(new_attrs));
1297 
1298 	if (attr_mask & IB_QP_STATE) {
1299 		new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
1300 
1301 		erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
1302 	}
1303 
1304 	down_write(&qp->state_lock);
1305 
1306 	ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
1307 
1308 	up_write(&qp->state_lock);
1309 
1310 	return ret;
1311 }
1312 
1313 int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1314 		   int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1315 {
1316 	struct erdma_qp *qp;
1317 	struct erdma_dev *dev;
1318 
1319 	if (ibqp && qp_attr && qp_init_attr) {
1320 		qp = to_eqp(ibqp);
1321 		dev = to_edev(ibqp->device);
1322 	} else {
1323 		return -EINVAL;
1324 	}
1325 
1326 	qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1327 	qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1328 
1329 	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1330 	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1331 	qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1332 	qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1333 
1334 	qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1335 	qp_attr->max_rd_atomic = qp->attrs.irq_size;
1336 	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1337 
1338 	qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1339 				   IB_ACCESS_REMOTE_WRITE |
1340 				   IB_ACCESS_REMOTE_READ;
1341 
1342 	qp_init_attr->cap = qp_attr->cap;
1343 
1344 	return 0;
1345 }
1346 
1347 static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1348 			      struct erdma_ureq_create_cq *ureq)
1349 {
1350 	int ret;
1351 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1352 
1353 	ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va,
1354 			      ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1355 			      1);
1356 	if (ret)
1357 		return ret;
1358 
1359 	ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1360 				       &cq->user_cq.user_dbr_page,
1361 				       &cq->user_cq.db_info_dma_addr);
1362 	if (ret)
1363 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1364 
1365 	return ret;
1366 }
1367 
1368 static int erdma_init_kernel_cq(struct erdma_cq *cq)
1369 {
1370 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1371 
1372 	cq->kern_cq.qbuf =
1373 		dma_alloc_coherent(&dev->pdev->dev,
1374 				   WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1375 				   &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1376 	if (!cq->kern_cq.qbuf)
1377 		return -ENOMEM;
1378 
1379 	cq->kern_cq.db_record =
1380 		(u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
1381 	spin_lock_init(&cq->kern_cq.lock);
1382 	/* use default cqdb addr */
1383 	cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1384 
1385 	return 0;
1386 }
1387 
1388 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1389 		    struct ib_udata *udata)
1390 {
1391 	struct erdma_cq *cq = to_ecq(ibcq);
1392 	struct erdma_dev *dev = to_edev(ibcq->device);
1393 	unsigned int depth = attr->cqe;
1394 	int ret;
1395 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1396 		udata, struct erdma_ucontext, ibucontext);
1397 
1398 	if (depth > dev->attrs.max_cqe)
1399 		return -EINVAL;
1400 
1401 	depth = roundup_pow_of_two(depth);
1402 	cq->ibcq.cqe = depth;
1403 	cq->depth = depth;
1404 	cq->assoc_eqn = attr->comp_vector + 1;
1405 
1406 	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1407 			      XA_LIMIT(1, dev->attrs.max_cq - 1),
1408 			      &dev->next_alloc_cqn, GFP_KERNEL);
1409 	if (ret < 0)
1410 		return ret;
1411 
1412 	if (!rdma_is_kernel_res(&ibcq->res)) {
1413 		struct erdma_ureq_create_cq ureq;
1414 		struct erdma_uresp_create_cq uresp;
1415 
1416 		ret = ib_copy_from_udata(&ureq, udata,
1417 					 min(udata->inlen, sizeof(ureq)));
1418 		if (ret)
1419 			goto err_out_xa;
1420 
1421 		ret = erdma_init_user_cq(ctx, cq, &ureq);
1422 		if (ret)
1423 			goto err_out_xa;
1424 
1425 		uresp.cq_id = cq->cqn;
1426 		uresp.num_cqe = depth;
1427 
1428 		ret = ib_copy_to_udata(udata, &uresp,
1429 				       min(sizeof(uresp), udata->outlen));
1430 		if (ret)
1431 			goto err_free_res;
1432 	} else {
1433 		ret = erdma_init_kernel_cq(cq);
1434 		if (ret)
1435 			goto err_out_xa;
1436 	}
1437 
1438 	ret = create_cq_cmd(dev, cq);
1439 	if (ret)
1440 		goto err_free_res;
1441 
1442 	return 0;
1443 
1444 err_free_res:
1445 	if (!rdma_is_kernel_res(&ibcq->res)) {
1446 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1447 		put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1448 	} else {
1449 		dma_free_coherent(&dev->pdev->dev,
1450 				  WARPPED_BUFSIZE(depth << CQE_SHIFT),
1451 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1452 	}
1453 
1454 err_out_xa:
1455 	xa_erase(&dev->cq_xa, cq->cqn);
1456 
1457 	return ret;
1458 }
1459 
1460 void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
1461 {
1462 	struct erdma_cmdq_config_mtu_req req;
1463 
1464 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1465 				CMDQ_OPCODE_CONF_MTU);
1466 	req.mtu = mtu;
1467 
1468 	erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1469 }
1470 
1471 void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
1472 {
1473 	struct ib_event event;
1474 
1475 	event.device = &dev->ibdev;
1476 	event.element.port_num = 1;
1477 	event.event = reason;
1478 
1479 	ib_dispatch_event(&event);
1480 }
1481