xref: /linux/drivers/infiniband/hw/erdma/erdma_verbs.c (revision 54b3bce9721141f6aee4909591b5c02e7ba4bd7b)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6 
7 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
8 /* Copyright (c) 2008-2019, IBM Corporation */
9 
10 /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
11 
12 #include <linux/vmalloc.h>
13 #include <net/addrconf.h>
14 #include <rdma/erdma-abi.h>
15 #include <rdma/iter.h>
16 #include <rdma/uverbs_ioctl.h>
17 
18 #include "erdma.h"
19 #include "erdma_cm.h"
20 #include "erdma_verbs.h"
21 
22 static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg,
23 				      u64 *addr0, u64 *addr1)
24 {
25 	struct erdma_mtt *mtt = mem->mtt;
26 
27 	if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) {
28 		*addr0 = mtt->buf_dma;
29 		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
30 				   ERDMA_MR_MTT_1LEVEL);
31 	} else {
32 		*addr0 = mtt->buf[0];
33 		memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1));
34 		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
35 				   ERDMA_MR_MTT_0LEVEL);
36 	}
37 }
38 
39 static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
40 {
41 	struct erdma_dev *dev = to_edev(qp->ibqp.device);
42 	struct erdma_pd *pd = to_epd(qp->ibqp.pd);
43 	struct erdma_cmdq_create_qp_req req;
44 	struct erdma_uqp *user_qp;
45 	u64 resp0, resp1;
46 	int err;
47 
48 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
49 				CMDQ_OPCODE_CREATE_QP);
50 
51 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
52 			      ilog2(qp->attrs.sq_size)) |
53 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
54 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
55 			      ilog2(qp->attrs.rq_size)) |
56 		   FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
57 
58 	if (qp->ibqp.qp_type == IB_QPT_RC)
59 		req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
60 				      ERDMA_QPT_RC);
61 	else
62 		req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
63 				      ERDMA_QPT_UD);
64 
65 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
66 		u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
67 
68 		req.sq_cqn_mtt_cfg =
69 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
70 				   pgsz_range) |
71 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
72 		req.rq_cqn_mtt_cfg =
73 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
74 				   pgsz_range) |
75 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
76 
77 		req.sq_mtt_cfg =
78 			FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
79 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
80 			FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
81 				   ERDMA_MR_MTT_0LEVEL);
82 		req.rq_mtt_cfg = req.sq_mtt_cfg;
83 
84 		req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
85 		req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
86 		req.sq_dbrec_dma = qp->kern_qp.sq_dbrec_dma;
87 		req.rq_dbrec_dma = qp->kern_qp.rq_dbrec_dma;
88 	} else {
89 		user_qp = &qp->user_qp;
90 		req.sq_cqn_mtt_cfg = FIELD_PREP(
91 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
92 			ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
93 		req.sq_cqn_mtt_cfg |=
94 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
95 
96 		req.rq_cqn_mtt_cfg = FIELD_PREP(
97 			ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
98 			ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
99 		req.rq_cqn_mtt_cfg |=
100 			FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
101 
102 		req.sq_mtt_cfg = user_qp->sq_mem.page_offset;
103 		req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
104 					     user_qp->sq_mem.mtt_nents);
105 
106 		req.rq_mtt_cfg = user_qp->rq_mem.page_offset;
107 		req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
108 					     user_qp->rq_mem.mtt_nents);
109 
110 		assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg,
111 					  &req.sq_buf_addr, req.sq_mtt_entry);
112 		assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg,
113 					  &req.rq_buf_addr, req.rq_mtt_entry);
114 
115 		req.sq_dbrec_dma = user_qp->sq_dbrec_dma;
116 		req.rq_dbrec_dma = user_qp->rq_dbrec_dma;
117 
118 		if (uctx->ext_db.enable) {
119 			req.sq_cqn_mtt_cfg |=
120 				FIELD_PREP(ERDMA_CMD_CREATE_QP_DB_CFG_MASK, 1);
121 			req.db_cfg =
122 				FIELD_PREP(ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK,
123 					   uctx->ext_db.sdb_off) |
124 				FIELD_PREP(ERDMA_CMD_CREATE_QP_RQDB_CFG_MASK,
125 					   uctx->ext_db.rdb_off);
126 		}
127 	}
128 
129 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1,
130 				  true);
131 	if (!err && erdma_device_iwarp(dev))
132 		qp->attrs.iwarp.cookie =
133 			FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
134 
135 	return err;
136 }
137 
138 static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
139 {
140 	struct erdma_pd *pd = to_epd(mr->ibmr.pd);
141 	u32 mtt_level = ERDMA_MR_MTT_0LEVEL;
142 	struct erdma_cmdq_reg_mr_req req;
143 
144 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
145 
146 	if (mr->type == ERDMA_MR_TYPE_FRMR ||
147 	    mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) {
148 		if (mr->mem.mtt->continuous) {
149 			req.phy_addr[0] = mr->mem.mtt->buf_dma;
150 			mtt_level = ERDMA_MR_MTT_1LEVEL;
151 		} else {
152 			req.phy_addr[0] = mr->mem.mtt->dma_addrs[0];
153 			mtt_level = mr->mem.mtt->level;
154 		}
155 	} else if (mr->type != ERDMA_MR_TYPE_DMA) {
156 		memcpy(req.phy_addr, mr->mem.mtt->buf,
157 		       MTT_SIZE(mr->mem.page_cnt));
158 	}
159 
160 	req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
161 		   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
162 		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
163 	req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
164 		   FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
165 		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
166 	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
167 			      ilog2(mr->mem.page_size)) |
168 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) |
169 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
170 
171 	if (mr->type == ERDMA_MR_TYPE_DMA)
172 		goto post_cmd;
173 
174 	if (mr->type == ERDMA_MR_TYPE_NORMAL) {
175 		req.start_va = mr->mem.va;
176 		req.size = mr->mem.len;
177 	}
178 
179 	if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) {
180 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1);
181 		req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK,
182 				       PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT);
183 		req.size_h = upper_32_bits(mr->mem.len);
184 		req.mtt_cnt_h = mr->mem.page_cnt >> 20;
185 	}
186 
187 post_cmd:
188 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
189 				   true);
190 }
191 
192 static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
193 {
194 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
195 	struct erdma_cmdq_create_cq_req req;
196 	struct erdma_mem *mem;
197 	u32 page_size;
198 
199 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
200 				CMDQ_OPCODE_CREATE_CQ);
201 
202 	req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
203 		   FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
204 	req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
205 
206 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
207 		page_size = SZ_32M;
208 		req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
209 				       ilog2(page_size) - ERDMA_HW_PAGE_SHIFT);
210 		req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
211 		req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
212 
213 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
214 			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
215 				       ERDMA_MR_MTT_0LEVEL);
216 
217 		req.first_page_offset = 0;
218 		req.cq_dbrec_dma = cq->kern_cq.dbrec_dma;
219 	} else {
220 		mem = &cq->user_cq.qbuf_mem;
221 		req.cfg0 |=
222 			FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
223 				   ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT);
224 		if (mem->mtt_nents == 1) {
225 			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]);
226 			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]);
227 			req.cfg1 |=
228 				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
229 					   ERDMA_MR_MTT_0LEVEL);
230 		} else {
231 			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma);
232 			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma);
233 			req.cfg1 |=
234 				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
235 					   ERDMA_MR_MTT_1LEVEL);
236 		}
237 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
238 				       mem->mtt_nents);
239 
240 		req.first_page_offset = mem->page_offset;
241 		req.cq_dbrec_dma = cq->user_cq.dbrec_dma;
242 
243 		if (uctx->ext_db.enable) {
244 			req.cfg1 |= FIELD_PREP(
245 				ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK, 1);
246 			req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_DB_CFG_MASK,
247 					      uctx->ext_db.cdb_off);
248 		}
249 	}
250 
251 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
252 				   true);
253 }
254 
255 static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
256 {
257 	int idx;
258 	unsigned long flags;
259 
260 	spin_lock_irqsave(&res_cb->lock, flags);
261 	idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
262 				 res_cb->next_alloc_idx);
263 	if (idx == res_cb->max_cap) {
264 		idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
265 		if (idx == res_cb->max_cap) {
266 			res_cb->next_alloc_idx = 1;
267 			spin_unlock_irqrestore(&res_cb->lock, flags);
268 			return -ENOSPC;
269 		}
270 	}
271 
272 	set_bit(idx, res_cb->bitmap);
273 	res_cb->next_alloc_idx = idx + 1;
274 	spin_unlock_irqrestore(&res_cb->lock, flags);
275 
276 	return idx;
277 }
278 
279 static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
280 {
281 	unsigned long flags;
282 	u32 used;
283 
284 	spin_lock_irqsave(&res_cb->lock, flags);
285 	used = __test_and_clear_bit(idx, res_cb->bitmap);
286 	spin_unlock_irqrestore(&res_cb->lock, flags);
287 	WARN_ON(!used);
288 }
289 
290 static struct rdma_user_mmap_entry *
291 erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
292 			     u32 size, u8 mmap_flag, u64 *mmap_offset)
293 {
294 	struct erdma_user_mmap_entry *entry = kzalloc_obj(*entry);
295 	int ret;
296 
297 	if (!entry)
298 		return NULL;
299 
300 	entry->address = (u64)address;
301 	entry->mmap_flag = mmap_flag;
302 
303 	size = PAGE_ALIGN(size);
304 
305 	ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
306 					  size);
307 	if (ret) {
308 		kfree(entry);
309 		return NULL;
310 	}
311 
312 	*mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
313 
314 	return &entry->rdma_entry;
315 }
316 
317 int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
318 		       struct ib_udata *unused)
319 {
320 	struct erdma_dev *dev = to_edev(ibdev);
321 
322 	memset(attr, 0, sizeof(*attr));
323 
324 	attr->max_mr_size = dev->attrs.max_mr_size;
325 	attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
326 	attr->vendor_part_id = dev->pdev->device;
327 	attr->hw_ver = dev->pdev->revision;
328 	attr->max_qp = dev->attrs.max_qp - 1;
329 	attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
330 	attr->max_qp_rd_atom = dev->attrs.max_ord;
331 	attr->max_qp_init_rd_atom = dev->attrs.max_ird;
332 	attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
333 	attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
334 	attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
335 	ibdev->local_dma_lkey = dev->attrs.local_dma_key;
336 	attr->max_send_sge = dev->attrs.max_send_sge;
337 	attr->max_recv_sge = dev->attrs.max_recv_sge;
338 	attr->max_sge_rd = dev->attrs.max_sge_rd;
339 	attr->max_cq = dev->attrs.max_cq - 1;
340 	attr->max_cqe = dev->attrs.max_cqe;
341 	attr->max_mr = dev->attrs.max_mr;
342 	attr->max_pd = dev->attrs.max_pd;
343 	attr->max_mw = dev->attrs.max_mw;
344 	attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
345 	attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
346 
347 	if (erdma_device_rocev2(dev)) {
348 		attr->max_pkeys = ERDMA_MAX_PKEYS;
349 		attr->max_ah = dev->attrs.max_ah;
350 	}
351 
352 	if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
353 		attr->atomic_cap = IB_ATOMIC_GLOB;
354 
355 	attr->fw_ver = dev->attrs.fw_version;
356 
357 	if (dev->netdev)
358 		addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
359 				    dev->netdev->dev_addr);
360 
361 	return 0;
362 }
363 
364 int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
365 		    union ib_gid *gid)
366 {
367 	struct erdma_dev *dev = to_edev(ibdev);
368 
369 	memset(gid, 0, sizeof(*gid));
370 	ether_addr_copy(gid->raw, dev->attrs.peer_addr);
371 
372 	return 0;
373 }
374 
375 int erdma_query_port(struct ib_device *ibdev, u32 port,
376 		     struct ib_port_attr *attr)
377 {
378 	struct erdma_dev *dev = to_edev(ibdev);
379 	struct net_device *ndev = dev->netdev;
380 
381 	memset(attr, 0, sizeof(*attr));
382 
383 	if (erdma_device_iwarp(dev)) {
384 		attr->gid_tbl_len = 1;
385 	} else {
386 		attr->gid_tbl_len = dev->attrs.max_gid;
387 		attr->ip_gids = true;
388 		attr->pkey_tbl_len = ERDMA_MAX_PKEYS;
389 	}
390 
391 	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
392 	attr->max_msg_sz = -1;
393 
394 	if (!ndev)
395 		goto out;
396 
397 	ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
398 	attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
399 	attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
400 	attr->state = ib_get_curr_port_state(ndev);
401 
402 out:
403 	if (attr->state == IB_PORT_ACTIVE)
404 		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
405 	else
406 		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
407 
408 	return 0;
409 }
410 
411 int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
412 			     struct ib_port_immutable *port_immutable)
413 {
414 	struct erdma_dev *dev = to_edev(ibdev);
415 
416 	if (erdma_device_iwarp(dev)) {
417 		port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
418 		port_immutable->gid_tbl_len = 1;
419 	} else {
420 		port_immutable->core_cap_flags =
421 			RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
422 		port_immutable->max_mad_size = IB_MGMT_MAD_SIZE;
423 		port_immutable->gid_tbl_len = dev->attrs.max_gid;
424 		port_immutable->pkey_tbl_len = ERDMA_MAX_PKEYS;
425 	}
426 
427 	return 0;
428 }
429 
430 int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
431 {
432 	struct erdma_pd *pd = to_epd(ibpd);
433 	struct erdma_dev *dev = to_edev(ibpd->device);
434 	int pdn;
435 
436 	pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
437 	if (pdn < 0)
438 		return pdn;
439 
440 	pd->pdn = pdn;
441 
442 	return 0;
443 }
444 
445 int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
446 {
447 	struct erdma_pd *pd = to_epd(ibpd);
448 	struct erdma_dev *dev = to_edev(ibpd->device);
449 
450 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
451 
452 	return 0;
453 }
454 
455 static void erdma_flush_worker(struct work_struct *work)
456 {
457 	struct delayed_work *dwork = to_delayed_work(work);
458 	struct erdma_qp *qp =
459 		container_of(dwork, struct erdma_qp, reflush_dwork);
460 	struct erdma_cmdq_reflush_req req;
461 
462 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
463 				CMDQ_OPCODE_REFLUSH);
464 	req.qpn = QP_ID(qp);
465 	req.sq_pi = qp->kern_qp.sq_pi;
466 	req.rq_pi = qp->kern_qp.rq_pi;
467 	erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL,
468 			    true);
469 }
470 
471 static int erdma_qp_validate_cap(struct erdma_dev *dev,
472 				 struct ib_qp_init_attr *attrs)
473 {
474 	if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
475 	    (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
476 	    (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
477 	    (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
478 	    (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
479 	    !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
480 		return -EINVAL;
481 	}
482 
483 	return 0;
484 }
485 
486 static int erdma_qp_validate_attr(struct erdma_dev *dev,
487 				  struct ib_qp_init_attr *attrs)
488 {
489 	if (erdma_device_iwarp(dev) && attrs->qp_type != IB_QPT_RC)
490 		return -EOPNOTSUPP;
491 
492 	if (erdma_device_rocev2(dev) && attrs->qp_type != IB_QPT_RC &&
493 	    attrs->qp_type != IB_QPT_UD && attrs->qp_type != IB_QPT_GSI)
494 		return -EOPNOTSUPP;
495 
496 	if (attrs->srq)
497 		return -EOPNOTSUPP;
498 
499 	if (!attrs->send_cq || !attrs->recv_cq)
500 		return -EOPNOTSUPP;
501 
502 	return 0;
503 }
504 
505 static void free_kernel_qp(struct erdma_qp *qp)
506 {
507 	struct erdma_dev *dev = qp->dev;
508 
509 	vfree(qp->kern_qp.swr_tbl);
510 	vfree(qp->kern_qp.rwr_tbl);
511 
512 	if (qp->kern_qp.sq_buf)
513 		dma_free_coherent(&dev->pdev->dev,
514 				  qp->attrs.sq_size << SQEBB_SHIFT,
515 				  qp->kern_qp.sq_buf,
516 				  qp->kern_qp.sq_buf_dma_addr);
517 
518 	if (qp->kern_qp.sq_dbrec)
519 		dma_pool_free(dev->db_pool, qp->kern_qp.sq_dbrec,
520 			      qp->kern_qp.sq_dbrec_dma);
521 
522 	if (qp->kern_qp.rq_buf)
523 		dma_free_coherent(&dev->pdev->dev,
524 				  qp->attrs.rq_size << RQE_SHIFT,
525 				  qp->kern_qp.rq_buf,
526 				  qp->kern_qp.rq_buf_dma_addr);
527 
528 	if (qp->kern_qp.rq_dbrec)
529 		dma_pool_free(dev->db_pool, qp->kern_qp.rq_dbrec,
530 			      qp->kern_qp.rq_dbrec_dma);
531 }
532 
533 static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
534 			  struct ib_qp_init_attr *attrs)
535 {
536 	struct erdma_kqp *kqp = &qp->kern_qp;
537 	int size;
538 
539 	if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
540 		kqp->sig_all = 1;
541 
542 	kqp->sq_pi = 0;
543 	kqp->sq_ci = 0;
544 	kqp->rq_pi = 0;
545 	kqp->rq_ci = 0;
546 	kqp->hw_sq_db =
547 		dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
548 	kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
549 
550 	kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64));
551 	kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64));
552 	if (!kqp->swr_tbl || !kqp->rwr_tbl)
553 		goto err_out;
554 
555 	size = qp->attrs.sq_size << SQEBB_SHIFT;
556 	kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
557 					 &kqp->sq_buf_dma_addr, GFP_KERNEL);
558 	if (!kqp->sq_buf)
559 		goto err_out;
560 
561 	kqp->sq_dbrec =
562 		dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->sq_dbrec_dma);
563 	if (!kqp->sq_dbrec)
564 		goto err_out;
565 
566 	size = qp->attrs.rq_size << RQE_SHIFT;
567 	kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
568 					 &kqp->rq_buf_dma_addr, GFP_KERNEL);
569 	if (!kqp->rq_buf)
570 		goto err_out;
571 
572 	kqp->rq_dbrec =
573 		dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->rq_dbrec_dma);
574 	if (!kqp->rq_dbrec)
575 		goto err_out;
576 
577 	return 0;
578 
579 err_out:
580 	free_kernel_qp(qp);
581 	return -ENOMEM;
582 }
583 
584 static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem)
585 {
586 	struct erdma_mtt *mtt = mem->mtt;
587 	struct ib_block_iter biter;
588 	u32 idx = 0;
589 
590 	while (mtt->low_level)
591 		mtt = mtt->low_level;
592 
593 	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size)
594 		mtt->buf[idx++] = rdma_block_iter_dma_address(&biter);
595 }
596 
597 static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
598 					       size_t size)
599 {
600 	struct erdma_mtt *mtt;
601 
602 	mtt = kzalloc_obj(*mtt);
603 	if (!mtt)
604 		return ERR_PTR(-ENOMEM);
605 
606 	mtt->size = size;
607 	mtt->buf = kzalloc(mtt->size, GFP_KERNEL);
608 	if (!mtt->buf)
609 		goto err_free_mtt;
610 
611 	mtt->continuous = true;
612 	mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size,
613 				      DMA_TO_DEVICE);
614 	if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma))
615 		goto err_free_mtt_buf;
616 
617 	return mtt;
618 
619 err_free_mtt_buf:
620 	kfree(mtt->buf);
621 
622 err_free_mtt:
623 	kfree(mtt);
624 
625 	return ERR_PTR(-ENOMEM);
626 }
627 
628 static void erdma_unmap_page_list(struct erdma_dev *dev, dma_addr_t *pg_dma,
629 				  u32 npages)
630 {
631 	u32 i;
632 
633 	for (i = 0; i < npages; i++)
634 		dma_unmap_page(&dev->pdev->dev, pg_dma[i], PAGE_SIZE,
635 			       DMA_TO_DEVICE);
636 }
637 
638 static void erdma_destroy_mtt_buf_dma_addrs(struct erdma_dev *dev,
639 					    struct erdma_mtt *mtt)
640 {
641 	erdma_unmap_page_list(dev, mtt->dma_addrs, mtt->npages);
642 	vfree(mtt->dma_addrs);
643 }
644 
645 static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
646 				      struct erdma_mtt *mtt)
647 {
648 	erdma_destroy_mtt_buf_dma_addrs(dev, mtt);
649 	vfree(mtt->buf);
650 	kfree(mtt);
651 }
652 
653 static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
654 				  struct erdma_mtt *low_mtt)
655 {
656 	dma_addr_t *pg_addr = mtt->buf;
657 	u32 i;
658 
659 	for (i = 0; i < low_mtt->npages; i++)
660 		pg_addr[i] = low_mtt->dma_addrs[i];
661 }
662 
663 static u32 vmalloc_to_dma_addrs(struct erdma_dev *dev, dma_addr_t **dma_addrs,
664 				void *buf, u64 len)
665 {
666 	dma_addr_t *pg_dma;
667 	struct page *pg;
668 	u32 npages, i;
669 	void *addr;
670 
671 	npages = (PAGE_ALIGN((u64)buf + len) - PAGE_ALIGN_DOWN((u64)buf)) >>
672 		 PAGE_SHIFT;
673 	pg_dma = vcalloc(npages, sizeof(*pg_dma));
674 	if (!pg_dma)
675 		return 0;
676 
677 	addr = buf;
678 	for (i = 0; i < npages; i++) {
679 		pg = vmalloc_to_page(addr);
680 		if (!pg)
681 			goto err;
682 
683 		pg_dma[i] = dma_map_page(&dev->pdev->dev, pg, 0, PAGE_SIZE,
684 					 DMA_TO_DEVICE);
685 		if (dma_mapping_error(&dev->pdev->dev, pg_dma[i]))
686 			goto err;
687 
688 		addr += PAGE_SIZE;
689 	}
690 
691 	*dma_addrs = pg_dma;
692 
693 	return npages;
694 err:
695 	erdma_unmap_page_list(dev, pg_dma, i);
696 	vfree(pg_dma);
697 
698 	return 0;
699 }
700 
701 static int erdma_create_mtt_buf_dma_addrs(struct erdma_dev *dev,
702 					  struct erdma_mtt *mtt)
703 {
704 	dma_addr_t *addrs;
705 	u32 npages;
706 
707 	/* Failed if buf is not page aligned */
708 	if ((uintptr_t)mtt->buf & ~PAGE_MASK)
709 		return -EINVAL;
710 
711 	npages = vmalloc_to_dma_addrs(dev, &addrs, mtt->buf, mtt->size);
712 	if (!npages)
713 		return -ENOMEM;
714 
715 	mtt->dma_addrs = addrs;
716 	mtt->npages = npages;
717 
718 	return 0;
719 }
720 
721 static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
722 						  size_t size)
723 {
724 	struct erdma_mtt *mtt;
725 	int ret = -ENOMEM;
726 
727 	mtt = kzalloc_obj(*mtt);
728 	if (!mtt)
729 		return ERR_PTR(-ENOMEM);
730 
731 	mtt->size = ALIGN(size, PAGE_SIZE);
732 	mtt->buf = vzalloc(mtt->size);
733 	mtt->continuous = false;
734 	if (!mtt->buf)
735 		goto err_free_mtt;
736 
737 	ret = erdma_create_mtt_buf_dma_addrs(dev, mtt);
738 	if (ret)
739 		goto err_free_mtt_buf;
740 
741 	ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, npages:%u\n",
742 		  mtt->size, mtt->npages);
743 
744 	return mtt;
745 
746 err_free_mtt_buf:
747 	vfree(mtt->buf);
748 
749 err_free_mtt:
750 	kfree(mtt);
751 
752 	return ERR_PTR(ret);
753 }
754 
755 static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
756 					  bool force_continuous)
757 {
758 	struct erdma_mtt *mtt, *tmp_mtt;
759 	int ret, level = 0;
760 
761 	ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size,
762 		  force_continuous);
763 
764 	if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA))
765 		force_continuous = true;
766 
767 	if (force_continuous)
768 		return erdma_create_cont_mtt(dev, size);
769 
770 	mtt = erdma_create_scatter_mtt(dev, size);
771 	if (IS_ERR(mtt))
772 		return mtt;
773 	level = 1;
774 
775 	/* convergence the mtt table. */
776 	while (mtt->npages != 1 && level <= 3) {
777 		tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->npages));
778 		if (IS_ERR(tmp_mtt)) {
779 			ret = PTR_ERR(tmp_mtt);
780 			goto err_free_mtt;
781 		}
782 		erdma_init_middle_mtt(tmp_mtt, mtt);
783 		tmp_mtt->low_level = mtt;
784 		mtt = tmp_mtt;
785 		level++;
786 	}
787 
788 	if (level > 3) {
789 		ret = -ENOMEM;
790 		goto err_free_mtt;
791 	}
792 
793 	mtt->level = level;
794 	ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
795 		  mtt->level, mtt->dma_addrs[0]);
796 
797 	return mtt;
798 err_free_mtt:
799 	while (mtt) {
800 		tmp_mtt = mtt->low_level;
801 		erdma_destroy_scatter_mtt(dev, mtt);
802 		mtt = tmp_mtt;
803 	}
804 
805 	return ERR_PTR(ret);
806 }
807 
808 static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt)
809 {
810 	struct erdma_mtt *tmp_mtt;
811 
812 	if (mtt->continuous) {
813 		dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size,
814 				 DMA_TO_DEVICE);
815 		kfree(mtt->buf);
816 		kfree(mtt);
817 	} else {
818 		while (mtt) {
819 			tmp_mtt = mtt->low_level;
820 			erdma_destroy_scatter_mtt(dev, mtt);
821 			mtt = tmp_mtt;
822 		}
823 	}
824 }
825 
826 static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
827 			   u64 start, u64 len, int access, u64 virt,
828 			   unsigned long req_page_size, bool force_continuous)
829 {
830 	int ret = 0;
831 
832 	mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
833 	if (IS_ERR(mem->umem)) {
834 		ret = PTR_ERR(mem->umem);
835 		mem->umem = NULL;
836 		return ret;
837 	}
838 
839 	mem->va = virt;
840 	mem->len = len;
841 	mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
842 	mem->page_offset = start & (mem->page_size - 1);
843 	mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
844 	mem->page_cnt = mem->mtt_nents;
845 	mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt),
846 				    force_continuous);
847 	if (IS_ERR(mem->mtt)) {
848 		ret = PTR_ERR(mem->mtt);
849 		goto error_ret;
850 	}
851 
852 	erdma_fill_bottom_mtt(dev, mem);
853 
854 	return 0;
855 
856 error_ret:
857 	if (mem->umem) {
858 		ib_umem_release(mem->umem);
859 		mem->umem = NULL;
860 	}
861 
862 	return ret;
863 }
864 
865 static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
866 {
867 	if (mem->mtt)
868 		erdma_destroy_mtt(dev, mem->mtt);
869 
870 	if (mem->umem) {
871 		ib_umem_release(mem->umem);
872 		mem->umem = NULL;
873 	}
874 }
875 
876 static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
877 				    u64 dbrecords_va,
878 				    struct erdma_user_dbrecords_page **dbr_page,
879 				    dma_addr_t *dma_addr)
880 {
881 	struct erdma_user_dbrecords_page *page = NULL;
882 	int rv = 0;
883 
884 	mutex_lock(&ctx->dbrecords_page_mutex);
885 
886 	list_for_each_entry(page, &ctx->dbrecords_page_list, list)
887 		if (page->va == (dbrecords_va & PAGE_MASK))
888 			goto found;
889 
890 	page = kmalloc_obj(*page);
891 	if (!page) {
892 		rv = -ENOMEM;
893 		goto out;
894 	}
895 
896 	page->va = (dbrecords_va & PAGE_MASK);
897 	page->refcnt = 0;
898 
899 	page->umem = ib_umem_get(ctx->ibucontext.device,
900 				 dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
901 	if (IS_ERR(page->umem)) {
902 		rv = PTR_ERR(page->umem);
903 		kfree(page);
904 		goto out;
905 	}
906 
907 	list_add(&page->list, &ctx->dbrecords_page_list);
908 
909 found:
910 	*dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
911 		    (dbrecords_va & ~PAGE_MASK);
912 	*dbr_page = page;
913 	page->refcnt++;
914 
915 out:
916 	mutex_unlock(&ctx->dbrecords_page_mutex);
917 	return rv;
918 }
919 
920 static void
921 erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
922 			   struct erdma_user_dbrecords_page **dbr_page)
923 {
924 	if (!ctx || !(*dbr_page))
925 		return;
926 
927 	mutex_lock(&ctx->dbrecords_page_mutex);
928 	if (--(*dbr_page)->refcnt == 0) {
929 		list_del(&(*dbr_page)->list);
930 		ib_umem_release((*dbr_page)->umem);
931 		kfree(*dbr_page);
932 	}
933 
934 	*dbr_page = NULL;
935 	mutex_unlock(&ctx->dbrecords_page_mutex);
936 }
937 
938 static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
939 			u64 va, u32 len, u64 dbrec_va)
940 {
941 	dma_addr_t dbrec_dma;
942 	u32 rq_offset;
943 	int ret;
944 
945 	if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) +
946 		   qp->attrs.rq_size * RQE_SIZE))
947 		return -EINVAL;
948 
949 	ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va,
950 			      qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
951 			      (SZ_1M - SZ_4K), true);
952 	if (ret)
953 		return ret;
954 
955 	rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
956 	qp->user_qp.rq_offset = rq_offset;
957 
958 	ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset,
959 			      qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
960 			      (SZ_1M - SZ_4K), true);
961 	if (ret)
962 		goto put_sq_mtt;
963 
964 	ret = erdma_map_user_dbrecords(uctx, dbrec_va,
965 				       &qp->user_qp.user_dbr_page,
966 				       &dbrec_dma);
967 	if (ret)
968 		goto put_rq_mtt;
969 
970 	qp->user_qp.sq_dbrec_dma = dbrec_dma;
971 	qp->user_qp.rq_dbrec_dma = dbrec_dma + ERDMA_DB_SIZE;
972 
973 	return 0;
974 
975 put_rq_mtt:
976 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
977 
978 put_sq_mtt:
979 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
980 
981 	return ret;
982 }
983 
984 static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
985 {
986 	put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
987 	put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
988 	erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
989 }
990 
991 int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
992 		    struct ib_udata *udata)
993 {
994 	struct erdma_qp *qp = to_eqp(ibqp);
995 	struct erdma_dev *dev = to_edev(ibqp->device);
996 	struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
997 		udata, struct erdma_ucontext, ibucontext);
998 	struct erdma_ureq_create_qp ureq;
999 	struct erdma_uresp_create_qp uresp;
1000 	void *old_entry;
1001 	int ret = 0;
1002 
1003 	ret = erdma_qp_validate_cap(dev, attrs);
1004 	if (ret)
1005 		goto err_out;
1006 
1007 	ret = erdma_qp_validate_attr(dev, attrs);
1008 	if (ret)
1009 		goto err_out;
1010 
1011 	qp->scq = to_ecq(attrs->send_cq);
1012 	qp->rcq = to_ecq(attrs->recv_cq);
1013 	qp->dev = dev;
1014 	qp->attrs.cc = dev->attrs.cc;
1015 
1016 	init_rwsem(&qp->state_lock);
1017 	kref_init(&qp->ref);
1018 	init_completion(&qp->safe_free);
1019 
1020 	if (qp->ibqp.qp_type == IB_QPT_GSI) {
1021 		old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL);
1022 		if (xa_is_err(old_entry))
1023 			ret = xa_err(old_entry);
1024 		else
1025 			qp->ibqp.qp_num = 1;
1026 	} else {
1027 		ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
1028 				      XA_LIMIT(1, dev->attrs.max_qp - 1),
1029 				      &dev->next_alloc_qpn, GFP_KERNEL);
1030 	}
1031 
1032 	if (ret < 0) {
1033 		ret = -ENOMEM;
1034 		goto err_out;
1035 	}
1036 
1037 	qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
1038 					       ERDMA_MAX_WQEBB_PER_SQE);
1039 	qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
1040 
1041 	if (uctx) {
1042 		ret = ib_copy_validate_udata_in(udata, ureq, rsvd0);
1043 		if (ret)
1044 			goto err_out_xa;
1045 
1046 		ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
1047 				   ureq.db_record_va);
1048 		if (ret)
1049 			goto err_out_xa;
1050 
1051 		memset(&uresp, 0, sizeof(uresp));
1052 
1053 		uresp.num_sqe = qp->attrs.sq_size;
1054 		uresp.num_rqe = qp->attrs.rq_size;
1055 		uresp.qp_id = QP_ID(qp);
1056 		uresp.rq_offset = qp->user_qp.rq_offset;
1057 
1058 		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1059 		if (ret)
1060 			goto err_out_cmd;
1061 	} else {
1062 		ret = init_kernel_qp(dev, qp, attrs);
1063 		if (ret)
1064 			goto err_out_xa;
1065 	}
1066 
1067 	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
1068 	qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
1069 
1070 	if (erdma_device_iwarp(qp->dev))
1071 		qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
1072 	else
1073 		qp->attrs.rocev2.state = ERDMA_QPS_ROCEV2_RESET;
1074 
1075 	INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
1076 
1077 	ret = create_qp_cmd(uctx, qp);
1078 	if (ret)
1079 		goto err_out_cmd;
1080 
1081 	spin_lock_init(&qp->lock);
1082 
1083 	return 0;
1084 
1085 err_out_cmd:
1086 	if (uctx)
1087 		free_user_qp(qp, uctx);
1088 	else
1089 		free_kernel_qp(qp);
1090 err_out_xa:
1091 	xa_erase(&dev->qp_xa, QP_ID(qp));
1092 err_out:
1093 	return ret;
1094 }
1095 
1096 static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
1097 {
1098 	int stag_idx;
1099 
1100 	stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
1101 	if (stag_idx < 0)
1102 		return stag_idx;
1103 
1104 	/* For now, we always let key field be zero. */
1105 	*stag = (stag_idx << 8);
1106 
1107 	return 0;
1108 }
1109 
1110 struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
1111 {
1112 	struct erdma_dev *dev = to_edev(ibpd->device);
1113 	struct erdma_mr *mr;
1114 	u32 stag;
1115 	int ret;
1116 
1117 	mr = kzalloc_obj(*mr);
1118 	if (!mr)
1119 		return ERR_PTR(-ENOMEM);
1120 
1121 	ret = erdma_create_stag(dev, &stag);
1122 	if (ret)
1123 		goto out_free;
1124 
1125 	mr->type = ERDMA_MR_TYPE_DMA;
1126 
1127 	mr->ibmr.lkey = stag;
1128 	mr->ibmr.rkey = stag;
1129 	mr->ibmr.pd = ibpd;
1130 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
1131 	ret = regmr_cmd(dev, mr);
1132 	if (ret)
1133 		goto out_remove_stag;
1134 
1135 	return &mr->ibmr;
1136 
1137 out_remove_stag:
1138 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1139 		       mr->ibmr.lkey >> 8);
1140 
1141 out_free:
1142 	kfree(mr);
1143 
1144 	return ERR_PTR(ret);
1145 }
1146 
1147 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
1148 				u32 max_num_sg)
1149 {
1150 	struct erdma_mr *mr;
1151 	struct erdma_dev *dev = to_edev(ibpd->device);
1152 	int ret;
1153 	u32 stag;
1154 
1155 	if (mr_type != IB_MR_TYPE_MEM_REG)
1156 		return ERR_PTR(-EOPNOTSUPP);
1157 
1158 	if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
1159 		return ERR_PTR(-EINVAL);
1160 
1161 	mr = kzalloc_obj(*mr);
1162 	if (!mr)
1163 		return ERR_PTR(-ENOMEM);
1164 
1165 	ret = erdma_create_stag(dev, &stag);
1166 	if (ret)
1167 		goto out_free;
1168 
1169 	mr->type = ERDMA_MR_TYPE_FRMR;
1170 
1171 	mr->ibmr.lkey = stag;
1172 	mr->ibmr.rkey = stag;
1173 	mr->ibmr.pd = ibpd;
1174 	/* update it in FRMR. */
1175 	mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
1176 		     ERDMA_MR_ACC_RW;
1177 
1178 	mr->mem.page_size = PAGE_SIZE; /* update it later. */
1179 	mr->mem.page_cnt = max_num_sg;
1180 	mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true);
1181 	if (IS_ERR(mr->mem.mtt)) {
1182 		ret = PTR_ERR(mr->mem.mtt);
1183 		goto out_remove_stag;
1184 	}
1185 
1186 	ret = regmr_cmd(dev, mr);
1187 	if (ret)
1188 		goto out_destroy_mtt;
1189 
1190 	return &mr->ibmr;
1191 
1192 out_destroy_mtt:
1193 	erdma_destroy_mtt(dev, mr->mem.mtt);
1194 
1195 out_remove_stag:
1196 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1197 		       mr->ibmr.lkey >> 8);
1198 
1199 out_free:
1200 	kfree(mr);
1201 
1202 	return ERR_PTR(ret);
1203 }
1204 
1205 static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
1206 {
1207 	struct erdma_mr *mr = to_emr(ibmr);
1208 
1209 	if (mr->mem.mtt_nents >= mr->mem.page_cnt)
1210 		return -1;
1211 
1212 	mr->mem.mtt->buf[mr->mem.mtt_nents] = addr;
1213 	mr->mem.mtt_nents++;
1214 
1215 	return 0;
1216 }
1217 
1218 int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1219 		    unsigned int *sg_offset)
1220 {
1221 	struct erdma_mr *mr = to_emr(ibmr);
1222 	int num;
1223 
1224 	mr->mem.mtt_nents = 0;
1225 
1226 	num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
1227 			     erdma_set_page);
1228 
1229 	return num;
1230 }
1231 
1232 struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
1233 				u64 virt, int access, struct ib_dmah *dmah,
1234 				struct ib_udata *udata)
1235 {
1236 	struct erdma_mr *mr = NULL;
1237 	struct erdma_dev *dev = to_edev(ibpd->device);
1238 	u32 stag;
1239 	int ret;
1240 
1241 	if (dmah)
1242 		return ERR_PTR(-EOPNOTSUPP);
1243 
1244 	if (!len || len > dev->attrs.max_mr_size)
1245 		return ERR_PTR(-EINVAL);
1246 
1247 	mr = kzalloc_obj(*mr);
1248 	if (!mr)
1249 		return ERR_PTR(-ENOMEM);
1250 
1251 	ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
1252 			      SZ_2G - SZ_4K, false);
1253 	if (ret)
1254 		goto err_out_free;
1255 
1256 	ret = erdma_create_stag(dev, &stag);
1257 	if (ret)
1258 		goto err_out_put_mtt;
1259 
1260 	mr->ibmr.lkey = mr->ibmr.rkey = stag;
1261 	mr->ibmr.pd = ibpd;
1262 	mr->mem.va = virt;
1263 	mr->mem.len = len;
1264 	mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
1265 	mr->valid = 1;
1266 	mr->type = ERDMA_MR_TYPE_NORMAL;
1267 
1268 	ret = regmr_cmd(dev, mr);
1269 	if (ret)
1270 		goto err_out_mr;
1271 
1272 	return &mr->ibmr;
1273 
1274 err_out_mr:
1275 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
1276 		       mr->ibmr.lkey >> 8);
1277 
1278 err_out_put_mtt:
1279 	put_mtt_entries(dev, &mr->mem);
1280 
1281 err_out_free:
1282 	kfree(mr);
1283 
1284 	return ERR_PTR(ret);
1285 }
1286 
1287 int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1288 {
1289 	struct erdma_mr *mr;
1290 	struct erdma_dev *dev = to_edev(ibmr->device);
1291 	struct erdma_cmdq_dereg_mr_req req;
1292 	int ret;
1293 
1294 	mr = to_emr(ibmr);
1295 
1296 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1297 				CMDQ_OPCODE_DEREG_MR);
1298 
1299 	req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
1300 		  FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
1301 
1302 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
1303 				  true);
1304 	if (ret)
1305 		return ret;
1306 
1307 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
1308 
1309 	put_mtt_entries(dev, &mr->mem);
1310 
1311 	kfree(mr);
1312 	return 0;
1313 }
1314 
1315 int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1316 {
1317 	struct erdma_cq *cq = to_ecq(ibcq);
1318 	struct erdma_dev *dev = to_edev(ibcq->device);
1319 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1320 		udata, struct erdma_ucontext, ibucontext);
1321 	int err;
1322 	struct erdma_cmdq_destroy_cq_req req;
1323 
1324 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1325 				CMDQ_OPCODE_DESTROY_CQ);
1326 	req.cqn = cq->cqn;
1327 
1328 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
1329 				  true);
1330 	if (err)
1331 		return err;
1332 
1333 	if (rdma_is_kernel_res(&cq->ibcq.res)) {
1334 		dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
1335 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1336 		dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
1337 			      cq->kern_cq.dbrec_dma);
1338 	} else {
1339 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1340 		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1341 	}
1342 
1343 	xa_erase(&dev->cq_xa, cq->cqn);
1344 
1345 	return 0;
1346 }
1347 
1348 int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1349 {
1350 	struct erdma_qp *qp = to_eqp(ibqp);
1351 	struct erdma_dev *dev = to_edev(ibqp->device);
1352 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1353 		udata, struct erdma_ucontext, ibucontext);
1354 	struct erdma_cmdq_destroy_qp_req req;
1355 	union erdma_mod_qp_params params;
1356 	int err;
1357 
1358 	down_write(&qp->state_lock);
1359 	if (erdma_device_iwarp(dev)) {
1360 		params.iwarp.state = ERDMA_QPS_IWARP_ERROR;
1361 		erdma_modify_qp_state_iwarp(qp, &params.iwarp,
1362 					    ERDMA_QPA_IWARP_STATE);
1363 	} else {
1364 		params.rocev2.state = ERDMA_QPS_ROCEV2_ERROR;
1365 		erdma_modify_qp_state_rocev2(qp, &params.rocev2,
1366 					     ERDMA_QPA_ROCEV2_STATE);
1367 	}
1368 	up_write(&qp->state_lock);
1369 
1370 	cancel_delayed_work_sync(&qp->reflush_dwork);
1371 
1372 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1373 				CMDQ_OPCODE_DESTROY_QP);
1374 	req.qpn = QP_ID(qp);
1375 
1376 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
1377 				  true);
1378 	if (err)
1379 		return err;
1380 
1381 	erdma_qp_put(qp);
1382 	wait_for_completion(&qp->safe_free);
1383 
1384 	if (rdma_is_kernel_res(&qp->ibqp.res)) {
1385 		free_kernel_qp(qp);
1386 	} else {
1387 		put_mtt_entries(dev, &qp->user_qp.sq_mem);
1388 		put_mtt_entries(dev, &qp->user_qp.rq_mem);
1389 		erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1390 	}
1391 
1392 	if (qp->cep)
1393 		erdma_cep_put(qp->cep);
1394 	xa_erase(&dev->qp_xa, QP_ID(qp));
1395 
1396 	return 0;
1397 }
1398 
1399 void erdma_qp_get_ref(struct ib_qp *ibqp)
1400 {
1401 	erdma_qp_get(to_eqp(ibqp));
1402 }
1403 
1404 void erdma_qp_put_ref(struct ib_qp *ibqp)
1405 {
1406 	erdma_qp_put(to_eqp(ibqp));
1407 }
1408 
1409 int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1410 {
1411 	struct rdma_user_mmap_entry *rdma_entry;
1412 	struct erdma_user_mmap_entry *entry;
1413 	pgprot_t prot;
1414 	int err;
1415 
1416 	rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1417 	if (!rdma_entry)
1418 		return -EINVAL;
1419 
1420 	entry = to_emmap(rdma_entry);
1421 
1422 	switch (entry->mmap_flag) {
1423 	case ERDMA_MMAP_IO_NC:
1424 		/* map doorbell. */
1425 		prot = pgprot_device(vma->vm_page_prot);
1426 		break;
1427 	default:
1428 		err = -EINVAL;
1429 		goto put_entry;
1430 	}
1431 
1432 	err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1433 				prot, rdma_entry);
1434 
1435 put_entry:
1436 	rdma_user_mmap_entry_put(rdma_entry);
1437 	return err;
1438 }
1439 
1440 void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1441 {
1442 	struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1443 
1444 	kfree(entry);
1445 }
1446 
1447 static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
1448 			      bool ext_db_en)
1449 {
1450 	struct erdma_cmdq_ext_db_req req = {};
1451 	u64 val0, val1;
1452 	int ret;
1453 
1454 	/*
1455 	 * CAP_SYS_RAWIO is required if hardware does not support extend
1456 	 * doorbell mechanism.
1457 	 */
1458 	if (!ext_db_en && !capable(CAP_SYS_RAWIO))
1459 		return -EPERM;
1460 
1461 	if (!ext_db_en) {
1462 		ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET;
1463 		ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1464 		ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1465 		return 0;
1466 	}
1467 
1468 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1469 				CMDQ_OPCODE_ALLOC_DB);
1470 
1471 	req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
1472 		  FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
1473 		  FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
1474 
1475 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1,
1476 				  true);
1477 	if (ret)
1478 		return ret;
1479 
1480 	ctx->ext_db.enable = true;
1481 	ctx->ext_db.sdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_SDB);
1482 	ctx->ext_db.rdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_RDB);
1483 	ctx->ext_db.cdb_off = ERDMA_GET(val0, ALLOC_DB_RESP_CDB);
1484 
1485 	ctx->sdb = dev->func_bar_addr + (ctx->ext_db.sdb_off << PAGE_SHIFT);
1486 	ctx->cdb = dev->func_bar_addr + (ctx->ext_db.rdb_off << PAGE_SHIFT);
1487 	ctx->rdb = dev->func_bar_addr + (ctx->ext_db.cdb_off << PAGE_SHIFT);
1488 
1489 	return 0;
1490 }
1491 
1492 static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
1493 {
1494 	struct erdma_cmdq_ext_db_req req = {};
1495 	int ret;
1496 
1497 	if (!ctx->ext_db.enable)
1498 		return;
1499 
1500 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1501 				CMDQ_OPCODE_FREE_DB);
1502 
1503 	req.cfg = FIELD_PREP(ERDMA_CMD_EXT_DB_CQ_EN_MASK, 1) |
1504 		  FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
1505 		  FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
1506 
1507 	req.sdb_off = ctx->ext_db.sdb_off;
1508 	req.rdb_off = ctx->ext_db.rdb_off;
1509 	req.cdb_off = ctx->ext_db.cdb_off;
1510 
1511 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
1512 				  true);
1513 	if (ret)
1514 		ibdev_err_ratelimited(&dev->ibdev,
1515 				      "free db resources failed %d", ret);
1516 }
1517 
1518 static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1519 {
1520 	rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1521 	rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1522 	rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1523 }
1524 
1525 int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1526 {
1527 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1528 	struct erdma_dev *dev = to_edev(ibctx->device);
1529 	int ret;
1530 	struct erdma_uresp_alloc_ctx uresp = {};
1531 
1532 	if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1533 		ret = -ENOMEM;
1534 		goto err_out;
1535 	}
1536 
1537 	if (udata->outlen < sizeof(uresp)) {
1538 		ret = -EINVAL;
1539 		goto err_out;
1540 	}
1541 
1542 	INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1543 	mutex_init(&ctx->dbrecords_page_mutex);
1544 
1545 	ret = alloc_db_resources(dev, ctx,
1546 				 !!(dev->attrs.cap_flags &
1547 				    ERDMA_DEV_CAP_FLAGS_EXTEND_DB));
1548 	if (ret)
1549 		goto err_out;
1550 
1551 	ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1552 		ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1553 	if (!ctx->sq_db_mmap_entry) {
1554 		ret = -ENOMEM;
1555 		goto err_free_ext_db;
1556 	}
1557 
1558 	ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1559 		ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1560 	if (!ctx->rq_db_mmap_entry) {
1561 		ret = -EINVAL;
1562 		goto err_put_mmap_entries;
1563 	}
1564 
1565 	ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1566 		ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1567 	if (!ctx->cq_db_mmap_entry) {
1568 		ret = -EINVAL;
1569 		goto err_put_mmap_entries;
1570 	}
1571 
1572 	uresp.dev_id = dev->pdev->device;
1573 
1574 	ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1575 	if (ret)
1576 		goto err_put_mmap_entries;
1577 
1578 	return 0;
1579 
1580 err_put_mmap_entries:
1581 	erdma_uctx_user_mmap_entries_remove(ctx);
1582 
1583 err_free_ext_db:
1584 	free_db_resources(dev, ctx);
1585 
1586 err_out:
1587 	atomic_dec(&dev->num_ctx);
1588 	return ret;
1589 }
1590 
1591 void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1592 {
1593 	struct erdma_dev *dev = to_edev(ibctx->device);
1594 	struct erdma_ucontext *ctx = to_ectx(ibctx);
1595 
1596 	erdma_uctx_user_mmap_entries_remove(ctx);
1597 	free_db_resources(dev, ctx);
1598 	atomic_dec(&dev->num_ctx);
1599 }
1600 
1601 static void erdma_attr_to_av(const struct rdma_ah_attr *ah_attr,
1602 			     struct erdma_av *av, u16 sport)
1603 {
1604 	const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
1605 
1606 	av->port = rdma_ah_get_port_num(ah_attr);
1607 	av->sgid_index = grh->sgid_index;
1608 	av->hop_limit = grh->hop_limit;
1609 	av->traffic_class = grh->traffic_class;
1610 	av->sl = rdma_ah_get_sl(ah_attr);
1611 
1612 	av->flow_label = grh->flow_label;
1613 	av->udp_sport = sport;
1614 
1615 	ether_addr_copy(av->dmac, ah_attr->roce.dmac);
1616 	memcpy(av->dgid, grh->dgid.raw, ERDMA_ROCEV2_GID_SIZE);
1617 
1618 	if (ipv6_addr_v4mapped((struct in6_addr *)&grh->dgid))
1619 		av->ntype = ERDMA_NETWORK_TYPE_IPV4;
1620 	else
1621 		av->ntype = ERDMA_NETWORK_TYPE_IPV6;
1622 }
1623 
1624 static void erdma_av_to_attr(struct erdma_av *av, struct rdma_ah_attr *ah_attr)
1625 {
1626 	ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
1627 
1628 	rdma_ah_set_sl(ah_attr, av->sl);
1629 	rdma_ah_set_port_num(ah_attr, av->port);
1630 	rdma_ah_set_ah_flags(ah_attr, IB_AH_GRH);
1631 
1632 	rdma_ah_set_grh(ah_attr, NULL, av->flow_label, av->sgid_index,
1633 			av->hop_limit, av->traffic_class);
1634 	rdma_ah_set_dgid_raw(ah_attr, av->dgid);
1635 }
1636 
1637 static int ib_qps_to_erdma_qps[ERDMA_PROTO_COUNT][IB_QPS_ERR + 1] = {
1638 	[ERDMA_PROTO_IWARP] = {
1639 		[IB_QPS_RESET] = ERDMA_QPS_IWARP_IDLE,
1640 		[IB_QPS_INIT] = ERDMA_QPS_IWARP_IDLE,
1641 		[IB_QPS_RTR] = ERDMA_QPS_IWARP_RTR,
1642 		[IB_QPS_RTS] = ERDMA_QPS_IWARP_RTS,
1643 		[IB_QPS_SQD] = ERDMA_QPS_IWARP_CLOSING,
1644 		[IB_QPS_SQE] = ERDMA_QPS_IWARP_TERMINATE,
1645 		[IB_QPS_ERR] = ERDMA_QPS_IWARP_ERROR,
1646 	},
1647 	[ERDMA_PROTO_ROCEV2] = {
1648 		[IB_QPS_RESET] = ERDMA_QPS_ROCEV2_RESET,
1649 		[IB_QPS_INIT] = ERDMA_QPS_ROCEV2_INIT,
1650 		[IB_QPS_RTR] = ERDMA_QPS_ROCEV2_RTR,
1651 		[IB_QPS_RTS] = ERDMA_QPS_ROCEV2_RTS,
1652 		[IB_QPS_SQD] = ERDMA_QPS_ROCEV2_SQD,
1653 		[IB_QPS_SQE] = ERDMA_QPS_ROCEV2_SQE,
1654 		[IB_QPS_ERR] = ERDMA_QPS_ROCEV2_ERROR,
1655 	},
1656 };
1657 
1658 static int erdma_qps_to_ib_qps[ERDMA_PROTO_COUNT][ERDMA_QPS_ROCEV2_COUNT] = {
1659 	[ERDMA_PROTO_IWARP] = {
1660 		[ERDMA_QPS_IWARP_IDLE] = IB_QPS_INIT,
1661 		[ERDMA_QPS_IWARP_RTR] = IB_QPS_RTR,
1662 		[ERDMA_QPS_IWARP_RTS] = IB_QPS_RTS,
1663 		[ERDMA_QPS_IWARP_CLOSING] = IB_QPS_ERR,
1664 		[ERDMA_QPS_IWARP_TERMINATE] = IB_QPS_ERR,
1665 		[ERDMA_QPS_IWARP_ERROR] = IB_QPS_ERR,
1666 	},
1667 	[ERDMA_PROTO_ROCEV2] = {
1668 		[ERDMA_QPS_ROCEV2_RESET] = IB_QPS_RESET,
1669 		[ERDMA_QPS_ROCEV2_INIT] = IB_QPS_INIT,
1670 		[ERDMA_QPS_ROCEV2_RTR] = IB_QPS_RTR,
1671 		[ERDMA_QPS_ROCEV2_RTS] = IB_QPS_RTS,
1672 		[ERDMA_QPS_ROCEV2_SQD] = IB_QPS_SQD,
1673 		[ERDMA_QPS_ROCEV2_SQE] = IB_QPS_SQE,
1674 		[ERDMA_QPS_ROCEV2_ERROR] = IB_QPS_ERR,
1675 	},
1676 };
1677 
1678 static inline enum erdma_qps_iwarp ib_to_iwarp_qps(enum ib_qp_state state)
1679 {
1680 	return ib_qps_to_erdma_qps[ERDMA_PROTO_IWARP][state];
1681 }
1682 
1683 static inline enum erdma_qps_rocev2 ib_to_rocev2_qps(enum ib_qp_state state)
1684 {
1685 	return ib_qps_to_erdma_qps[ERDMA_PROTO_ROCEV2][state];
1686 }
1687 
1688 static inline enum ib_qp_state iwarp_to_ib_qps(enum erdma_qps_iwarp state)
1689 {
1690 	return erdma_qps_to_ib_qps[ERDMA_PROTO_IWARP][state];
1691 }
1692 
1693 static inline enum ib_qp_state rocev2_to_ib_qps(enum erdma_qps_rocev2 state)
1694 {
1695 	return erdma_qps_to_ib_qps[ERDMA_PROTO_ROCEV2][state];
1696 }
1697 
1698 static int erdma_check_qp_attrs(struct erdma_qp *qp, struct ib_qp_attr *attr,
1699 				int attr_mask)
1700 {
1701 	enum ib_qp_state cur_state, nxt_state;
1702 	struct erdma_dev *dev = qp->dev;
1703 	int ret = -EINVAL;
1704 
1705 	if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) {
1706 		ret = -EOPNOTSUPP;
1707 		goto out;
1708 	}
1709 
1710 	if ((attr_mask & IB_QP_PORT) &&
1711 	    !rdma_is_port_valid(&dev->ibdev, attr->port_num))
1712 		goto out;
1713 
1714 	if (erdma_device_rocev2(dev)) {
1715 		cur_state = (attr_mask & IB_QP_CUR_STATE) ?
1716 				    attr->cur_qp_state :
1717 				    rocev2_to_ib_qps(qp->attrs.rocev2.state);
1718 
1719 		nxt_state = (attr_mask & IB_QP_STATE) ? attr->qp_state :
1720 							cur_state;
1721 
1722 		if (!ib_modify_qp_is_ok(cur_state, nxt_state, qp->ibqp.qp_type,
1723 					attr_mask))
1724 			goto out;
1725 
1726 		if ((attr_mask & IB_QP_AV) &&
1727 		    erdma_check_gid_attr(
1728 			    rdma_ah_read_grh(&attr->ah_attr)->sgid_attr))
1729 			goto out;
1730 
1731 		if ((attr_mask & IB_QP_PKEY_INDEX) &&
1732 		    attr->pkey_index >= ERDMA_MAX_PKEYS)
1733 			goto out;
1734 	}
1735 
1736 	return 0;
1737 
1738 out:
1739 	return ret;
1740 }
1741 
1742 static void erdma_init_mod_qp_params_rocev2(
1743 	struct erdma_qp *qp, struct erdma_mod_qp_params_rocev2 *params,
1744 	int *erdma_attr_mask, struct ib_qp_attr *attr, int ib_attr_mask)
1745 {
1746 	enum erdma_qpa_mask_rocev2 to_modify_attrs = 0;
1747 	enum erdma_qps_rocev2 cur_state, nxt_state;
1748 	u16 udp_sport;
1749 
1750 	if (ib_attr_mask & IB_QP_CUR_STATE)
1751 		cur_state = ib_to_rocev2_qps(attr->cur_qp_state);
1752 	else
1753 		cur_state = qp->attrs.rocev2.state;
1754 
1755 	if (ib_attr_mask & IB_QP_STATE)
1756 		nxt_state = ib_to_rocev2_qps(attr->qp_state);
1757 	else
1758 		nxt_state = cur_state;
1759 
1760 	to_modify_attrs |= ERDMA_QPA_ROCEV2_STATE;
1761 	params->state = nxt_state;
1762 
1763 	if (ib_attr_mask & IB_QP_QKEY) {
1764 		to_modify_attrs |= ERDMA_QPA_ROCEV2_QKEY;
1765 		params->qkey = attr->qkey;
1766 	}
1767 
1768 	if (ib_attr_mask & IB_QP_SQ_PSN) {
1769 		to_modify_attrs |= ERDMA_QPA_ROCEV2_SQ_PSN;
1770 		params->sq_psn = attr->sq_psn;
1771 	}
1772 
1773 	if (ib_attr_mask & IB_QP_RQ_PSN) {
1774 		to_modify_attrs |= ERDMA_QPA_ROCEV2_RQ_PSN;
1775 		params->rq_psn = attr->rq_psn;
1776 	}
1777 
1778 	if (ib_attr_mask & IB_QP_DEST_QPN) {
1779 		to_modify_attrs |= ERDMA_QPA_ROCEV2_DST_QPN;
1780 		params->dst_qpn = attr->dest_qp_num;
1781 	}
1782 
1783 	if (ib_attr_mask & IB_QP_AV) {
1784 		to_modify_attrs |= ERDMA_QPA_ROCEV2_AV;
1785 		udp_sport = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
1786 					       QP_ID(qp), params->dst_qpn);
1787 		erdma_attr_to_av(&attr->ah_attr, &params->av, udp_sport);
1788 	}
1789 
1790 	*erdma_attr_mask = to_modify_attrs;
1791 }
1792 
1793 int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1794 		    struct ib_udata *udata)
1795 {
1796 	struct erdma_qp *qp = to_eqp(ibqp);
1797 	union erdma_mod_qp_params params;
1798 	int ret = 0, erdma_attr_mask = 0;
1799 
1800 	down_write(&qp->state_lock);
1801 
1802 	ret = erdma_check_qp_attrs(qp, attr, attr_mask);
1803 	if (ret)
1804 		goto out;
1805 
1806 	if (erdma_device_iwarp(qp->dev)) {
1807 		if (attr_mask & IB_QP_STATE) {
1808 			erdma_attr_mask |= ERDMA_QPA_IWARP_STATE;
1809 			params.iwarp.state = ib_to_iwarp_qps(attr->qp_state);
1810 		}
1811 
1812 		ret = erdma_modify_qp_state_iwarp(qp, &params.iwarp,
1813 						  erdma_attr_mask);
1814 	} else {
1815 		erdma_init_mod_qp_params_rocev2(
1816 			qp, &params.rocev2, &erdma_attr_mask, attr, attr_mask);
1817 
1818 		ret = erdma_modify_qp_state_rocev2(qp, &params.rocev2,
1819 						   erdma_attr_mask);
1820 	}
1821 
1822 out:
1823 	up_write(&qp->state_lock);
1824 	return ret;
1825 }
1826 
1827 static enum ib_qp_state query_qp_state(struct erdma_qp *qp)
1828 {
1829 	if (erdma_device_iwarp(qp->dev))
1830 		return iwarp_to_ib_qps(qp->attrs.iwarp.state);
1831 	else
1832 		return rocev2_to_ib_qps(qp->attrs.rocev2.state);
1833 }
1834 
1835 int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1836 		   int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1837 {
1838 	struct erdma_cmdq_query_qp_req_rocev2 req;
1839 	struct erdma_dev *dev;
1840 	struct erdma_qp *qp;
1841 	u64 resp0, resp1;
1842 	int ret;
1843 
1844 	if (ibqp && qp_attr && qp_init_attr) {
1845 		qp = to_eqp(ibqp);
1846 		dev = to_edev(ibqp->device);
1847 	} else {
1848 		return -EINVAL;
1849 	}
1850 
1851 	qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1852 	qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1853 
1854 	qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1855 	qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1856 	qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1857 	qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1858 
1859 	qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1860 	qp_attr->max_rd_atomic = qp->attrs.irq_size;
1861 	qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1862 
1863 	qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1864 				   IB_ACCESS_REMOTE_WRITE |
1865 				   IB_ACCESS_REMOTE_READ;
1866 
1867 	qp_init_attr->cap = qp_attr->cap;
1868 
1869 	if (erdma_device_rocev2(dev)) {
1870 		/* Query hardware to get some attributes */
1871 		erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1872 					CMDQ_OPCODE_QUERY_QP);
1873 		req.qpn = QP_ID(qp);
1874 
1875 		ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
1876 					  &resp1, true);
1877 		if (ret)
1878 			return ret;
1879 
1880 		qp_attr->sq_psn =
1881 			FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK, resp0);
1882 		qp_attr->rq_psn =
1883 			FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK, resp0);
1884 		qp_attr->qp_state = rocev2_to_ib_qps(FIELD_GET(
1885 			ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK, resp0));
1886 		qp_attr->cur_qp_state = qp_attr->qp_state;
1887 		qp_attr->sq_draining = FIELD_GET(
1888 			ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK, resp0);
1889 
1890 		qp_attr->pkey_index = 0;
1891 		qp_attr->dest_qp_num = qp->attrs.rocev2.dst_qpn;
1892 
1893 		if (qp->ibqp.qp_type == IB_QPT_RC)
1894 			erdma_av_to_attr(&qp->attrs.rocev2.av,
1895 					 &qp_attr->ah_attr);
1896 	} else {
1897 		qp_attr->qp_state = query_qp_state(qp);
1898 		qp_attr->cur_qp_state = qp_attr->qp_state;
1899 	}
1900 
1901 	return 0;
1902 }
1903 
1904 static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1905 			      struct erdma_ureq_create_cq *ureq)
1906 {
1907 	int ret;
1908 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1909 
1910 	ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va,
1911 			      ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1912 			      true);
1913 	if (ret)
1914 		return ret;
1915 
1916 	ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1917 				       &cq->user_cq.user_dbr_page,
1918 				       &cq->user_cq.dbrec_dma);
1919 	if (ret)
1920 		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
1921 
1922 	return ret;
1923 }
1924 
1925 static int erdma_init_kernel_cq(struct erdma_cq *cq)
1926 {
1927 	struct erdma_dev *dev = to_edev(cq->ibcq.device);
1928 
1929 	cq->kern_cq.qbuf =
1930 		dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
1931 				   &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1932 	if (!cq->kern_cq.qbuf)
1933 		return -ENOMEM;
1934 
1935 	cq->kern_cq.dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL,
1936 					    &cq->kern_cq.dbrec_dma);
1937 	if (!cq->kern_cq.dbrec)
1938 		goto err_out;
1939 
1940 	spin_lock_init(&cq->kern_cq.lock);
1941 	/* use default cqdb addr */
1942 	cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1943 
1944 	return 0;
1945 
1946 err_out:
1947 	dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
1948 			  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1949 
1950 	return -ENOMEM;
1951 }
1952 
1953 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1954 		    struct uverbs_attr_bundle *attrs)
1955 {
1956 	struct ib_udata *udata = &attrs->driver_udata;
1957 	struct erdma_cq *cq = to_ecq(ibcq);
1958 	struct erdma_dev *dev = to_edev(ibcq->device);
1959 	unsigned int depth = attr->cqe;
1960 	int ret;
1961 	struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1962 		udata, struct erdma_ucontext, ibucontext);
1963 
1964 	if (depth > dev->attrs.max_cqe)
1965 		return -EINVAL;
1966 
1967 	depth = roundup_pow_of_two(depth);
1968 	cq->ibcq.cqe = depth;
1969 	cq->depth = depth;
1970 	cq->assoc_eqn = attr->comp_vector + 1;
1971 
1972 	ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1973 			      XA_LIMIT(1, dev->attrs.max_cq - 1),
1974 			      &dev->next_alloc_cqn, GFP_KERNEL);
1975 	if (ret < 0)
1976 		return ret;
1977 
1978 	if (!rdma_is_kernel_res(&ibcq->res)) {
1979 		struct erdma_ureq_create_cq ureq;
1980 		struct erdma_uresp_create_cq uresp;
1981 
1982 		ret = ib_copy_validate_udata_in(udata, ureq, rsvd0);
1983 		if (ret)
1984 			goto err_out_xa;
1985 
1986 		ret = erdma_init_user_cq(ctx, cq, &ureq);
1987 		if (ret)
1988 			goto err_out_xa;
1989 
1990 		uresp.cq_id = cq->cqn;
1991 		uresp.num_cqe = depth;
1992 
1993 		ret = ib_copy_to_udata(udata, &uresp,
1994 				       min(sizeof(uresp), udata->outlen));
1995 		if (ret)
1996 			goto err_free_res;
1997 	} else {
1998 		ret = erdma_init_kernel_cq(cq);
1999 		if (ret)
2000 			goto err_out_xa;
2001 	}
2002 
2003 	ret = create_cq_cmd(ctx, cq);
2004 	if (ret)
2005 		goto err_free_res;
2006 
2007 	return 0;
2008 
2009 err_free_res:
2010 	if (!rdma_is_kernel_res(&ibcq->res)) {
2011 		erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
2012 		put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
2013 	} else {
2014 		dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
2015 				  cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
2016 		dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
2017 			      cq->kern_cq.dbrec_dma);
2018 	}
2019 
2020 err_out_xa:
2021 	xa_erase(&dev->cq_xa, cq->cqn);
2022 
2023 	return ret;
2024 }
2025 
2026 void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext)
2027 {
2028 }
2029 
2030 void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
2031 {
2032 	struct erdma_cmdq_config_mtu_req req;
2033 
2034 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
2035 				CMDQ_OPCODE_CONF_MTU);
2036 	req.mtu = mtu;
2037 
2038 	erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, true);
2039 }
2040 
2041 void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
2042 {
2043 	struct ib_event event;
2044 
2045 	event.device = &dev->ibdev;
2046 	event.element.port_num = 1;
2047 	event.event = reason;
2048 
2049 	ib_dispatch_event(&event);
2050 }
2051 
2052 enum counters {
2053 	ERDMA_STATS_TX_REQS_CNT,
2054 	ERDMA_STATS_TX_PACKETS_CNT,
2055 	ERDMA_STATS_TX_BYTES_CNT,
2056 	ERDMA_STATS_TX_DISABLE_DROP_CNT,
2057 	ERDMA_STATS_TX_BPS_METER_DROP_CNT,
2058 	ERDMA_STATS_TX_PPS_METER_DROP_CNT,
2059 
2060 	ERDMA_STATS_RX_PACKETS_CNT,
2061 	ERDMA_STATS_RX_BYTES_CNT,
2062 	ERDMA_STATS_RX_DISABLE_DROP_CNT,
2063 	ERDMA_STATS_RX_BPS_METER_DROP_CNT,
2064 	ERDMA_STATS_RX_PPS_METER_DROP_CNT,
2065 
2066 	ERDMA_STATS_MAX
2067 };
2068 
2069 static const struct rdma_stat_desc erdma_descs[] = {
2070 	[ERDMA_STATS_TX_REQS_CNT].name = "tx_reqs_cnt",
2071 	[ERDMA_STATS_TX_PACKETS_CNT].name = "tx_packets_cnt",
2072 	[ERDMA_STATS_TX_BYTES_CNT].name = "tx_bytes_cnt",
2073 	[ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "tx_disable_drop_cnt",
2074 	[ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "tx_bps_limit_drop_cnt",
2075 	[ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "tx_pps_limit_drop_cnt",
2076 	[ERDMA_STATS_RX_PACKETS_CNT].name = "rx_packets_cnt",
2077 	[ERDMA_STATS_RX_BYTES_CNT].name = "rx_bytes_cnt",
2078 	[ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "rx_disable_drop_cnt",
2079 	[ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "rx_bps_limit_drop_cnt",
2080 	[ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "rx_pps_limit_drop_cnt",
2081 };
2082 
2083 struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
2084 						u32 port_num)
2085 {
2086 	return rdma_alloc_hw_stats_struct(erdma_descs, ERDMA_STATS_MAX,
2087 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
2088 }
2089 
2090 static int erdma_query_hw_stats(struct erdma_dev *dev,
2091 				struct rdma_hw_stats *stats)
2092 {
2093 	struct erdma_cmdq_query_stats_resp *resp;
2094 	struct erdma_cmdq_query_req req;
2095 	dma_addr_t dma_addr;
2096 	int err;
2097 
2098 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
2099 				CMDQ_OPCODE_GET_STATS);
2100 
2101 	resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr);
2102 	if (!resp)
2103 		return -ENOMEM;
2104 
2105 	req.target_addr = dma_addr;
2106 	req.target_length = ERDMA_HW_RESP_SIZE;
2107 
2108 	err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
2109 				  true);
2110 	if (err)
2111 		goto out;
2112 
2113 	if (resp->hdr.magic != ERDMA_HW_RESP_MAGIC) {
2114 		err = -EINVAL;
2115 		goto out;
2116 	}
2117 
2118 	memcpy(&stats->value[0], &resp->tx_req_cnt,
2119 	       sizeof(u64) * stats->num_counters);
2120 
2121 out:
2122 	dma_pool_free(dev->resp_pool, resp, dma_addr);
2123 
2124 	return err;
2125 }
2126 
2127 int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
2128 		       u32 port, int index)
2129 {
2130 	struct erdma_dev *dev = to_edev(ibdev);
2131 	int ret;
2132 
2133 	if (port == 0)
2134 		return 0;
2135 
2136 	ret = erdma_query_hw_stats(dev, stats);
2137 	if (ret)
2138 		return ret;
2139 
2140 	return stats->num_counters;
2141 }
2142 
2143 enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, u32 port_num)
2144 {
2145 	return IB_LINK_LAYER_ETHERNET;
2146 }
2147 
2148 static int erdma_set_gid(struct erdma_dev *dev, u8 op, u32 idx,
2149 			 const union ib_gid *gid)
2150 {
2151 	struct erdma_cmdq_set_gid_req req;
2152 	u8 ntype;
2153 
2154 	req.cfg = FIELD_PREP(ERDMA_CMD_SET_GID_SGID_IDX_MASK, idx) |
2155 		  FIELD_PREP(ERDMA_CMD_SET_GID_OP_MASK, op);
2156 
2157 	if (op == ERDMA_SET_GID_OP_ADD) {
2158 		if (ipv6_addr_v4mapped((struct in6_addr *)gid))
2159 			ntype = ERDMA_NETWORK_TYPE_IPV4;
2160 		else
2161 			ntype = ERDMA_NETWORK_TYPE_IPV6;
2162 
2163 		req.cfg |= FIELD_PREP(ERDMA_CMD_SET_GID_NTYPE_MASK, ntype);
2164 
2165 		memcpy(&req.gid, gid, ERDMA_ROCEV2_GID_SIZE);
2166 	}
2167 
2168 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
2169 				CMDQ_OPCODE_SET_GID);
2170 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
2171 				   true);
2172 }
2173 
2174 int erdma_add_gid(const struct ib_gid_attr *attr, void **context)
2175 {
2176 	struct erdma_dev *dev = to_edev(attr->device);
2177 	int ret;
2178 
2179 	ret = erdma_check_gid_attr(attr);
2180 	if (ret)
2181 		return ret;
2182 
2183 	return erdma_set_gid(dev, ERDMA_SET_GID_OP_ADD, attr->index,
2184 			     &attr->gid);
2185 }
2186 
2187 int erdma_del_gid(const struct ib_gid_attr *attr, void **context)
2188 {
2189 	return erdma_set_gid(to_edev(attr->device), ERDMA_SET_GID_OP_DEL,
2190 			     attr->index, NULL);
2191 }
2192 
2193 int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
2194 {
2195 	if (index >= ERDMA_MAX_PKEYS)
2196 		return -EINVAL;
2197 
2198 	*pkey = ERDMA_DEFAULT_PKEY;
2199 	return 0;
2200 }
2201 
2202 void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av)
2203 {
2204 	av_cfg->cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_AV_FL_MASK, av->flow_label) |
2205 		       FIELD_PREP(ERDMA_CMD_CREATE_AV_NTYPE_MASK, av->ntype);
2206 
2207 	av_cfg->traffic_class = av->traffic_class;
2208 	av_cfg->hop_limit = av->hop_limit;
2209 	av_cfg->sl = av->sl;
2210 
2211 	av_cfg->udp_sport = av->udp_sport;
2212 	av_cfg->sgid_index = av->sgid_index;
2213 
2214 	ether_addr_copy(av_cfg->dmac, av->dmac);
2215 	memcpy(av_cfg->dgid, av->dgid, ERDMA_ROCEV2_GID_SIZE);
2216 }
2217 
2218 int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
2219 		    struct ib_udata *udata)
2220 {
2221 	const struct ib_global_route *grh =
2222 		rdma_ah_read_grh(init_attr->ah_attr);
2223 	struct erdma_dev *dev = to_edev(ibah->device);
2224 	struct erdma_pd *pd = to_epd(ibah->pd);
2225 	struct erdma_ah *ah = to_eah(ibah);
2226 	struct erdma_cmdq_create_ah_req req;
2227 	u32 udp_sport;
2228 	int ret;
2229 
2230 	ret = erdma_check_gid_attr(grh->sgid_attr);
2231 	if (ret)
2232 		return ret;
2233 
2234 	ret = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_AH]);
2235 	if (ret < 0)
2236 		return ret;
2237 
2238 	ah->ahn = ret;
2239 
2240 	if (grh->flow_label)
2241 		udp_sport = rdma_flow_label_to_udp_sport(grh->flow_label);
2242 	else
2243 		udp_sport =
2244 			IB_ROCE_UDP_ENCAP_VALID_PORT_MIN + (ah->ahn & 0x3FFF);
2245 
2246 	erdma_attr_to_av(init_attr->ah_attr, &ah->av, udp_sport);
2247 
2248 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
2249 				CMDQ_OPCODE_CREATE_AH);
2250 
2251 	req.pdn = pd->pdn;
2252 	req.ahn = ah->ahn;
2253 	erdma_set_av_cfg(&req.av_cfg, &ah->av);
2254 
2255 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
2256 				  init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
2257 	if (ret) {
2258 		erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
2259 		return ret;
2260 	}
2261 
2262 	return 0;
2263 }
2264 
2265 int erdma_destroy_ah(struct ib_ah *ibah, u32 flags)
2266 {
2267 	struct erdma_dev *dev = to_edev(ibah->device);
2268 	struct erdma_pd *pd = to_epd(ibah->pd);
2269 	struct erdma_ah *ah = to_eah(ibah);
2270 	struct erdma_cmdq_destroy_ah_req req;
2271 	int ret;
2272 
2273 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
2274 				CMDQ_OPCODE_DESTROY_AH);
2275 
2276 	req.pdn = pd->pdn;
2277 	req.ahn = ah->ahn;
2278 
2279 	ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
2280 				  flags & RDMA_DESTROY_AH_SLEEPABLE);
2281 	if (ret)
2282 		return ret;
2283 
2284 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
2285 
2286 	return 0;
2287 }
2288 
2289 int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
2290 {
2291 	struct erdma_ah *ah = to_eah(ibah);
2292 
2293 	memset(ah_attr, 0, sizeof(*ah_attr));
2294 	erdma_av_to_attr(&ah->av, ah_attr);
2295 
2296 	return 0;
2297 }
2298