xref: /linux/drivers/infiniband/hw/erdma/erdma_qp.c (revision 001821b0e79716c4e17c71d8e053a23599a7a508)
1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 
3 /* Authors: Cheng Xu <chengyou@linux.alibaba.com> */
4 /*          Kai Shen <kaishen@linux.alibaba.com> */
5 /* Copyright (c) 2020-2021, Alibaba Group */
6 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
7 /* Copyright (c) 2008-2019, IBM Corporation */
8 
9 #include "erdma_cm.h"
10 #include "erdma_verbs.h"
11 
12 void erdma_qp_llp_close(struct erdma_qp *qp)
13 {
14 	struct erdma_qp_attrs qp_attrs;
15 
16 	down_write(&qp->state_lock);
17 
18 	switch (qp->attrs.state) {
19 	case ERDMA_QP_STATE_RTS:
20 	case ERDMA_QP_STATE_RTR:
21 	case ERDMA_QP_STATE_IDLE:
22 	case ERDMA_QP_STATE_TERMINATE:
23 		qp_attrs.state = ERDMA_QP_STATE_CLOSING;
24 		erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
25 		break;
26 	case ERDMA_QP_STATE_CLOSING:
27 		qp->attrs.state = ERDMA_QP_STATE_IDLE;
28 		break;
29 	default:
30 		break;
31 	}
32 
33 	if (qp->cep) {
34 		erdma_cep_put(qp->cep);
35 		qp->cep = NULL;
36 	}
37 
38 	up_write(&qp->state_lock);
39 }
40 
41 struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
42 {
43 	struct erdma_qp *qp = find_qp_by_qpn(to_edev(ibdev), id);
44 
45 	if (qp)
46 		return &qp->ibqp;
47 
48 	return NULL;
49 }
50 
51 static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
52 					struct erdma_qp_attrs *attrs,
53 					enum erdma_qp_attr_mask mask)
54 {
55 	int ret;
56 	struct erdma_dev *dev = qp->dev;
57 	struct erdma_cmdq_modify_qp_req req;
58 	struct tcp_sock *tp;
59 	struct erdma_cep *cep = qp->cep;
60 	struct sockaddr_storage local_addr, remote_addr;
61 
62 	if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE))
63 		return -EINVAL;
64 
65 	if (!(mask & ERDMA_QP_ATTR_MPA))
66 		return -EINVAL;
67 
68 	ret = getname_local(cep->sock, &local_addr);
69 	if (ret < 0)
70 		return ret;
71 
72 	ret = getname_peer(cep->sock, &remote_addr);
73 	if (ret < 0)
74 		return ret;
75 
76 	qp->attrs.state = ERDMA_QP_STATE_RTS;
77 
78 	tp = tcp_sk(qp->cep->sock->sk);
79 
80 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
81 				CMDQ_OPCODE_MODIFY_QP);
82 
83 	req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) |
84 		  FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) |
85 		  FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
86 
87 	req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie);
88 	req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
89 	req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
90 	req.dport = to_sockaddr_in(remote_addr).sin_port;
91 	req.sport = to_sockaddr_in(local_addr).sin_port;
92 
93 	req.send_nxt = tp->snd_nxt;
94 	/* rsvd tcp seq for mpa-rsp in server. */
95 	if (qp->attrs.qp_type == ERDMA_QP_PASSIVE)
96 		req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len;
97 	req.recv_nxt = tp->rcv_nxt;
98 
99 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
100 }
101 
102 static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
103 					 struct erdma_qp_attrs *attrs,
104 					 enum erdma_qp_attr_mask mask)
105 {
106 	struct erdma_dev *dev = qp->dev;
107 	struct erdma_cmdq_modify_qp_req req;
108 
109 	qp->attrs.state = attrs->state;
110 
111 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
112 				CMDQ_OPCODE_MODIFY_QP);
113 
114 	req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) |
115 		  FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
116 
117 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
118 }
119 
120 int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
121 			     enum erdma_qp_attr_mask mask)
122 {
123 	bool need_reflush = false;
124 	int drop_conn, ret = 0;
125 
126 	if (!mask)
127 		return 0;
128 
129 	if (!(mask & ERDMA_QP_ATTR_STATE))
130 		return 0;
131 
132 	switch (qp->attrs.state) {
133 	case ERDMA_QP_STATE_IDLE:
134 	case ERDMA_QP_STATE_RTR:
135 		if (attrs->state == ERDMA_QP_STATE_RTS) {
136 			ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
137 		} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
138 			qp->attrs.state = ERDMA_QP_STATE_ERROR;
139 			need_reflush = true;
140 			if (qp->cep) {
141 				erdma_cep_put(qp->cep);
142 				qp->cep = NULL;
143 			}
144 			ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
145 		}
146 		break;
147 	case ERDMA_QP_STATE_RTS:
148 		drop_conn = 0;
149 
150 		if (attrs->state == ERDMA_QP_STATE_CLOSING ||
151 		    attrs->state == ERDMA_QP_STATE_TERMINATE ||
152 		    attrs->state == ERDMA_QP_STATE_ERROR) {
153 			ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
154 			drop_conn = 1;
155 			need_reflush = true;
156 		}
157 
158 		if (drop_conn)
159 			erdma_qp_cm_drop(qp);
160 
161 		break;
162 	case ERDMA_QP_STATE_TERMINATE:
163 		if (attrs->state == ERDMA_QP_STATE_ERROR)
164 			qp->attrs.state = ERDMA_QP_STATE_ERROR;
165 		break;
166 	case ERDMA_QP_STATE_CLOSING:
167 		if (attrs->state == ERDMA_QP_STATE_IDLE) {
168 			qp->attrs.state = ERDMA_QP_STATE_IDLE;
169 		} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
170 			ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
171 			qp->attrs.state = ERDMA_QP_STATE_ERROR;
172 		} else if (attrs->state != ERDMA_QP_STATE_CLOSING) {
173 			return -ECONNABORTED;
174 		}
175 		break;
176 	default:
177 		break;
178 	}
179 
180 	if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
181 		qp->flags |= ERDMA_QP_IN_FLUSHING;
182 		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
183 				 usecs_to_jiffies(100));
184 	}
185 
186 	return ret;
187 }
188 
189 static void erdma_qp_safe_free(struct kref *ref)
190 {
191 	struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
192 
193 	complete(&qp->safe_free);
194 }
195 
196 void erdma_qp_put(struct erdma_qp *qp)
197 {
198 	WARN_ON(kref_read(&qp->ref) < 1);
199 	kref_put(&qp->ref, erdma_qp_safe_free);
200 }
201 
202 void erdma_qp_get(struct erdma_qp *qp)
203 {
204 	kref_get(&qp->ref);
205 }
206 
207 static int fill_inline_data(struct erdma_qp *qp,
208 			    const struct ib_send_wr *send_wr, u16 wqe_idx,
209 			    u32 sgl_offset, __le32 *length_field)
210 {
211 	u32 remain_size, copy_size, data_off, bytes = 0;
212 	char *data;
213 	int i = 0;
214 
215 	wqe_idx += (sgl_offset >> SQEBB_SHIFT);
216 	sgl_offset &= (SQEBB_SIZE - 1);
217 	data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx, qp->attrs.sq_size,
218 			       SQEBB_SHIFT);
219 
220 	while (i < send_wr->num_sge) {
221 		bytes += send_wr->sg_list[i].length;
222 		if (bytes > (int)ERDMA_MAX_INLINE)
223 			return -EINVAL;
224 
225 		remain_size = send_wr->sg_list[i].length;
226 		data_off = 0;
227 
228 		while (1) {
229 			copy_size = min(remain_size, SQEBB_SIZE - sgl_offset);
230 
231 			memcpy(data + sgl_offset,
232 			       (void *)(uintptr_t)send_wr->sg_list[i].addr +
233 				       data_off,
234 			       copy_size);
235 			remain_size -= copy_size;
236 			data_off += copy_size;
237 			sgl_offset += copy_size;
238 			wqe_idx += (sgl_offset >> SQEBB_SHIFT);
239 			sgl_offset &= (SQEBB_SIZE - 1);
240 
241 			data = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
242 					       qp->attrs.sq_size, SQEBB_SHIFT);
243 			if (!remain_size)
244 				break;
245 		}
246 
247 		i++;
248 	}
249 	*length_field = cpu_to_le32(bytes);
250 
251 	return bytes;
252 }
253 
254 static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
255 		    u16 wqe_idx, u32 sgl_offset, __le32 *length_field)
256 {
257 	int i = 0;
258 	u32 bytes = 0;
259 	char *sgl;
260 
261 	if (send_wr->num_sge > qp->dev->attrs.max_send_sge)
262 		return -EINVAL;
263 
264 	if (sgl_offset & 0xF)
265 		return -EINVAL;
266 
267 	while (i < send_wr->num_sge) {
268 		wqe_idx += (sgl_offset >> SQEBB_SHIFT);
269 		sgl_offset &= (SQEBB_SIZE - 1);
270 		sgl = get_queue_entry(qp->kern_qp.sq_buf, wqe_idx,
271 				      qp->attrs.sq_size, SQEBB_SHIFT);
272 
273 		bytes += send_wr->sg_list[i].length;
274 		memcpy(sgl + sgl_offset, &send_wr->sg_list[i],
275 		       sizeof(struct ib_sge));
276 
277 		sgl_offset += sizeof(struct ib_sge);
278 		i++;
279 	}
280 
281 	*length_field = cpu_to_le32(bytes);
282 	return 0;
283 }
284 
285 static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
286 			      const struct ib_send_wr *send_wr)
287 {
288 	u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
289 	u32 idx = *pi & (qp->attrs.sq_size - 1);
290 	enum ib_wr_opcode op = send_wr->opcode;
291 	struct erdma_atomic_sqe *atomic_sqe;
292 	struct erdma_readreq_sqe *read_sqe;
293 	struct erdma_reg_mr_sqe *regmr_sge;
294 	struct erdma_write_sqe *write_sqe;
295 	struct erdma_send_sqe *send_sqe;
296 	struct ib_rdma_wr *rdma_wr;
297 	struct erdma_sge *sge;
298 	__le32 *length_field;
299 	struct erdma_mr *mr;
300 	u64 wqe_hdr, *entry;
301 	u32 attrs;
302 	int ret;
303 
304 	entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
305 				SQEBB_SHIFT);
306 
307 	/* Clear the SQE header section. */
308 	*entry = 0;
309 
310 	qp->kern_qp.swr_tbl[idx] = send_wr->wr_id;
311 	flags = send_wr->send_flags;
312 	wqe_hdr = FIELD_PREP(
313 		ERDMA_SQE_HDR_CE_MASK,
314 		((flags & IB_SEND_SIGNALED) || qp->kern_qp.sig_all) ? 1 : 0);
315 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SE_MASK,
316 			      flags & IB_SEND_SOLICITED ? 1 : 0);
317 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_FENCE_MASK,
318 			      flags & IB_SEND_FENCE ? 1 : 0);
319 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_INLINE_MASK,
320 			      flags & IB_SEND_INLINE ? 1 : 0);
321 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp));
322 
323 	switch (op) {
324 	case IB_WR_RDMA_WRITE:
325 	case IB_WR_RDMA_WRITE_WITH_IMM:
326 		hw_op = ERDMA_OP_WRITE;
327 		if (op == IB_WR_RDMA_WRITE_WITH_IMM)
328 			hw_op = ERDMA_OP_WRITE_WITH_IMM;
329 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
330 		rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
331 		write_sqe = (struct erdma_write_sqe *)entry;
332 
333 		write_sqe->imm_data = send_wr->ex.imm_data;
334 		write_sqe->sink_stag = cpu_to_le32(rdma_wr->rkey);
335 		write_sqe->sink_to_h =
336 			cpu_to_le32(upper_32_bits(rdma_wr->remote_addr));
337 		write_sqe->sink_to_l =
338 			cpu_to_le32(lower_32_bits(rdma_wr->remote_addr));
339 
340 		length_field = &write_sqe->length;
341 		wqe_size = sizeof(struct erdma_write_sqe);
342 		sgl_offset = wqe_size;
343 		break;
344 	case IB_WR_RDMA_READ:
345 	case IB_WR_RDMA_READ_WITH_INV:
346 		read_sqe = (struct erdma_readreq_sqe *)entry;
347 		if (unlikely(send_wr->num_sge != 1))
348 			return -EINVAL;
349 		hw_op = ERDMA_OP_READ;
350 		if (op == IB_WR_RDMA_READ_WITH_INV) {
351 			hw_op = ERDMA_OP_READ_WITH_INV;
352 			read_sqe->invalid_stag =
353 				cpu_to_le32(send_wr->ex.invalidate_rkey);
354 		}
355 
356 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
357 		rdma_wr = container_of(send_wr, struct ib_rdma_wr, wr);
358 		read_sqe->length = cpu_to_le32(send_wr->sg_list[0].length);
359 		read_sqe->sink_stag = cpu_to_le32(send_wr->sg_list[0].lkey);
360 		read_sqe->sink_to_l =
361 			cpu_to_le32(lower_32_bits(send_wr->sg_list[0].addr));
362 		read_sqe->sink_to_h =
363 			cpu_to_le32(upper_32_bits(send_wr->sg_list[0].addr));
364 
365 		sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
366 				      qp->attrs.sq_size, SQEBB_SHIFT);
367 		sge->addr = cpu_to_le64(rdma_wr->remote_addr);
368 		sge->key = cpu_to_le32(rdma_wr->rkey);
369 		sge->length = cpu_to_le32(send_wr->sg_list[0].length);
370 		wqe_size = sizeof(struct erdma_readreq_sqe) +
371 			   send_wr->num_sge * sizeof(struct ib_sge);
372 
373 		goto out;
374 	case IB_WR_SEND:
375 	case IB_WR_SEND_WITH_IMM:
376 	case IB_WR_SEND_WITH_INV:
377 		send_sqe = (struct erdma_send_sqe *)entry;
378 		hw_op = ERDMA_OP_SEND;
379 		if (op == IB_WR_SEND_WITH_IMM) {
380 			hw_op = ERDMA_OP_SEND_WITH_IMM;
381 			send_sqe->imm_data = send_wr->ex.imm_data;
382 		} else if (op == IB_WR_SEND_WITH_INV) {
383 			hw_op = ERDMA_OP_SEND_WITH_INV;
384 			send_sqe->invalid_stag =
385 				cpu_to_le32(send_wr->ex.invalidate_rkey);
386 		}
387 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
388 		length_field = &send_sqe->length;
389 		wqe_size = sizeof(struct erdma_send_sqe);
390 		sgl_offset = wqe_size;
391 
392 		break;
393 	case IB_WR_REG_MR:
394 		wqe_hdr |=
395 			FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, ERDMA_OP_REG_MR);
396 		regmr_sge = (struct erdma_reg_mr_sqe *)entry;
397 		mr = to_emr(reg_wr(send_wr)->mr);
398 
399 		mr->access = ERDMA_MR_ACC_LR |
400 			     to_erdma_access_flags(reg_wr(send_wr)->access);
401 		regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
402 		regmr_sge->length = cpu_to_le32(mr->ibmr.length);
403 		regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
404 		attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
405 			FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
406 				   mr->mem.mtt_nents);
407 
408 		if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) {
409 			attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0);
410 			/* Copy SGLs to SQE content to accelerate */
411 			memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
412 					       qp->attrs.sq_size, SQEBB_SHIFT),
413 			       mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents));
414 			wqe_size = sizeof(struct erdma_reg_mr_sqe) +
415 				   MTT_SIZE(mr->mem.mtt_nents);
416 		} else {
417 			attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 1);
418 			wqe_size = sizeof(struct erdma_reg_mr_sqe);
419 		}
420 
421 		regmr_sge->attrs = cpu_to_le32(attrs);
422 		goto out;
423 	case IB_WR_LOCAL_INV:
424 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
425 				      ERDMA_OP_LOCAL_INV);
426 		regmr_sge = (struct erdma_reg_mr_sqe *)entry;
427 		regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
428 		wqe_size = sizeof(struct erdma_reg_mr_sqe);
429 		goto out;
430 	case IB_WR_ATOMIC_CMP_AND_SWP:
431 	case IB_WR_ATOMIC_FETCH_AND_ADD:
432 		atomic_sqe = (struct erdma_atomic_sqe *)entry;
433 		if (op == IB_WR_ATOMIC_CMP_AND_SWP) {
434 			wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
435 					      ERDMA_OP_ATOMIC_CAS);
436 			atomic_sqe->fetchadd_swap_data =
437 				cpu_to_le64(atomic_wr(send_wr)->swap);
438 			atomic_sqe->cmp_data =
439 				cpu_to_le64(atomic_wr(send_wr)->compare_add);
440 		} else {
441 			wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
442 					      ERDMA_OP_ATOMIC_FAA);
443 			atomic_sqe->fetchadd_swap_data =
444 				cpu_to_le64(atomic_wr(send_wr)->compare_add);
445 		}
446 
447 		sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
448 				      qp->attrs.sq_size, SQEBB_SHIFT);
449 		sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr);
450 		sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey);
451 		sge++;
452 
453 		sge->addr = cpu_to_le64(send_wr->sg_list[0].addr);
454 		sge->key = cpu_to_le32(send_wr->sg_list[0].lkey);
455 		sge->length = cpu_to_le32(send_wr->sg_list[0].length);
456 
457 		wqe_size = sizeof(*atomic_sqe);
458 		goto out;
459 	default:
460 		return -EOPNOTSUPP;
461 	}
462 
463 	if (flags & IB_SEND_INLINE) {
464 		ret = fill_inline_data(qp, send_wr, idx, sgl_offset,
465 				       length_field);
466 		if (ret < 0)
467 			return -EINVAL;
468 		wqe_size += ret;
469 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK, ret);
470 	} else {
471 		ret = fill_sgl(qp, send_wr, idx, sgl_offset, length_field);
472 		if (ret)
473 			return -EINVAL;
474 		wqe_size += send_wr->num_sge * sizeof(struct ib_sge);
475 		wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_SGL_LEN_MASK,
476 				      send_wr->num_sge);
477 	}
478 
479 out:
480 	wqebb_cnt = SQEBB_COUNT(wqe_size);
481 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_CNT_MASK, wqebb_cnt - 1);
482 	*pi += wqebb_cnt;
483 	wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, *pi);
484 
485 	*entry = wqe_hdr;
486 
487 	return 0;
488 }
489 
490 static void kick_sq_db(struct erdma_qp *qp, u16 pi)
491 {
492 	u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
493 		      FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
494 
495 	*(u64 *)qp->kern_qp.sq_dbrec = db_data;
496 	writeq(db_data, qp->kern_qp.hw_sq_db);
497 }
498 
499 int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
500 		    const struct ib_send_wr **bad_send_wr)
501 {
502 	struct erdma_qp *qp = to_eqp(ibqp);
503 	int ret = 0;
504 	const struct ib_send_wr *wr = send_wr;
505 	unsigned long flags;
506 	u16 sq_pi;
507 
508 	if (!send_wr)
509 		return -EINVAL;
510 
511 	spin_lock_irqsave(&qp->lock, flags);
512 	sq_pi = qp->kern_qp.sq_pi;
513 
514 	while (wr) {
515 		if ((u16)(sq_pi - qp->kern_qp.sq_ci) >= qp->attrs.sq_size) {
516 			ret = -ENOMEM;
517 			*bad_send_wr = send_wr;
518 			break;
519 		}
520 
521 		ret = erdma_push_one_sqe(qp, &sq_pi, wr);
522 		if (ret) {
523 			*bad_send_wr = wr;
524 			break;
525 		}
526 		qp->kern_qp.sq_pi = sq_pi;
527 		kick_sq_db(qp, sq_pi);
528 
529 		wr = wr->next;
530 	}
531 	spin_unlock_irqrestore(&qp->lock, flags);
532 
533 	if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
534 		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
535 				 usecs_to_jiffies(100));
536 
537 	return ret;
538 }
539 
540 static int erdma_post_recv_one(struct erdma_qp *qp,
541 			       const struct ib_recv_wr *recv_wr)
542 {
543 	struct erdma_rqe *rqe =
544 		get_queue_entry(qp->kern_qp.rq_buf, qp->kern_qp.rq_pi,
545 				qp->attrs.rq_size, RQE_SHIFT);
546 
547 	rqe->qe_idx = cpu_to_le16(qp->kern_qp.rq_pi + 1);
548 	rqe->qpn = cpu_to_le32(QP_ID(qp));
549 
550 	if (recv_wr->num_sge == 0) {
551 		rqe->length = 0;
552 	} else if (recv_wr->num_sge == 1) {
553 		rqe->stag = cpu_to_le32(recv_wr->sg_list[0].lkey);
554 		rqe->to = cpu_to_le64(recv_wr->sg_list[0].addr);
555 		rqe->length = cpu_to_le32(recv_wr->sg_list[0].length);
556 	} else {
557 		return -EINVAL;
558 	}
559 
560 	*(u64 *)qp->kern_qp.rq_dbrec = *(u64 *)rqe;
561 	writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
562 
563 	qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
564 		recv_wr->wr_id;
565 	qp->kern_qp.rq_pi++;
566 
567 	return 0;
568 }
569 
570 int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
571 		    const struct ib_recv_wr **bad_recv_wr)
572 {
573 	const struct ib_recv_wr *wr = recv_wr;
574 	struct erdma_qp *qp = to_eqp(ibqp);
575 	unsigned long flags;
576 	int ret;
577 
578 	spin_lock_irqsave(&qp->lock, flags);
579 
580 	while (wr) {
581 		ret = erdma_post_recv_one(qp, wr);
582 		if (ret) {
583 			*bad_recv_wr = wr;
584 			break;
585 		}
586 		wr = wr->next;
587 	}
588 
589 	spin_unlock_irqrestore(&qp->lock, flags);
590 
591 	if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
592 		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
593 				 usecs_to_jiffies(100));
594 
595 	return ret;
596 }
597