xref: /linux/drivers/infiniband/hw/cxgb4/qp.c (revision dc0d1c4519095a6c6bbd9ec4a808674aba502741)
1 /*
2  * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/module.h>
34 
35 #include "iw_cxgb4.h"
36 
37 static int db_delay_usecs = 1;
38 module_param(db_delay_usecs, int, 0644);
39 MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
40 
41 static int ocqp_support = 1;
42 module_param(ocqp_support, int, 0644);
43 MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
44 
45 int db_fc_threshold = 1000;
46 module_param(db_fc_threshold, int, 0644);
47 MODULE_PARM_DESC(db_fc_threshold,
48 		 "QP count/threshold that triggers"
49 		 " automatic db flow control mode (default = 1000)");
50 
51 int db_coalescing_threshold;
52 module_param(db_coalescing_threshold, int, 0644);
53 MODULE_PARM_DESC(db_coalescing_threshold,
54 		 "QP count/threshold that triggers"
55 		 " disabling db coalescing (default = 0)");
56 
57 static int max_fr_immd = T4_MAX_FR_IMMD;
58 module_param(max_fr_immd, int, 0644);
59 MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate");
60 
61 static int alloc_ird(struct c4iw_dev *dev, u32 ird)
62 {
63 	int ret = 0;
64 
65 	spin_lock_irq(&dev->lock);
66 	if (ird <= dev->avail_ird)
67 		dev->avail_ird -= ird;
68 	else
69 		ret = -ENOMEM;
70 	spin_unlock_irq(&dev->lock);
71 
72 	if (ret)
73 		dev_warn(&dev->rdev.lldi.pdev->dev,
74 			 "device IRD resources exhausted\n");
75 
76 	return ret;
77 }
78 
79 static void free_ird(struct c4iw_dev *dev, int ird)
80 {
81 	spin_lock_irq(&dev->lock);
82 	dev->avail_ird += ird;
83 	spin_unlock_irq(&dev->lock);
84 }
85 
86 static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
87 {
88 	unsigned long flag;
89 	spin_lock_irqsave(&qhp->lock, flag);
90 	qhp->attr.state = state;
91 	spin_unlock_irqrestore(&qhp->lock, flag);
92 }
93 
94 static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
95 {
96 	c4iw_ocqp_pool_free(rdev, sq->dma_addr, sq->memsize);
97 }
98 
99 static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
100 {
101 	dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue,
102 			  pci_unmap_addr(sq, mapping));
103 }
104 
105 static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
106 {
107 	if (t4_sq_onchip(sq))
108 		dealloc_oc_sq(rdev, sq);
109 	else
110 		dealloc_host_sq(rdev, sq);
111 }
112 
113 static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
114 {
115 	if (!ocqp_support || !ocqp_supported(&rdev->lldi))
116 		return -ENOSYS;
117 	sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize);
118 	if (!sq->dma_addr)
119 		return -ENOMEM;
120 	sq->phys_addr = rdev->oc_mw_pa + sq->dma_addr -
121 			rdev->lldi.vr->ocq.start;
122 	sq->queue = (__force union t4_wr *)(rdev->oc_mw_kva + sq->dma_addr -
123 					    rdev->lldi.vr->ocq.start);
124 	sq->flags |= T4_SQ_ONCHIP;
125 	return 0;
126 }
127 
128 static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
129 {
130 	sq->queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), sq->memsize,
131 				       &(sq->dma_addr), GFP_KERNEL);
132 	if (!sq->queue)
133 		return -ENOMEM;
134 	sq->phys_addr = virt_to_phys(sq->queue);
135 	pci_unmap_addr_set(sq, mapping, sq->dma_addr);
136 	return 0;
137 }
138 
139 static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
140 {
141 	int ret = -ENOSYS;
142 	if (user)
143 		ret = alloc_oc_sq(rdev, sq);
144 	if (ret)
145 		ret = alloc_host_sq(rdev, sq);
146 	return ret;
147 }
148 
149 static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
150 		      struct c4iw_dev_ucontext *uctx, int has_rq)
151 {
152 	/*
153 	 * uP clears EQ contexts when the connection exits rdma mode,
154 	 * so no need to post a RESET WR for these EQs.
155 	 */
156 	dealloc_sq(rdev, &wq->sq);
157 	kfree(wq->sq.sw_sq);
158 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
159 
160 	if (has_rq) {
161 		dma_free_coherent(&rdev->lldi.pdev->dev,
162 				  wq->rq.memsize, wq->rq.queue,
163 				  dma_unmap_addr(&wq->rq, mapping));
164 		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
165 		kfree(wq->rq.sw_rq);
166 		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
167 	}
168 	return 0;
169 }
170 
171 /*
172  * Determine the BAR2 virtual address and qid. If pbar2_pa is not NULL,
173  * then this is a user mapping so compute the page-aligned physical address
174  * for mapping.
175  */
176 void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
177 			      enum cxgb4_bar2_qtype qtype,
178 			      unsigned int *pbar2_qid, u64 *pbar2_pa)
179 {
180 	u64 bar2_qoffset;
181 	int ret;
182 
183 	ret = cxgb4_bar2_sge_qregs(rdev->lldi.ports[0], qid, qtype,
184 				   pbar2_pa ? 1 : 0,
185 				   &bar2_qoffset, pbar2_qid);
186 	if (ret)
187 		return NULL;
188 
189 	if (pbar2_pa)
190 		*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
191 
192 	if (is_t4(rdev->lldi.adapter_type))
193 		return NULL;
194 
195 	return rdev->bar2_kva + bar2_qoffset;
196 }
197 
198 static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
199 		     struct t4_cq *rcq, struct t4_cq *scq,
200 		     struct c4iw_dev_ucontext *uctx,
201 		     struct c4iw_wr_wait *wr_waitp,
202 		     int need_rq)
203 {
204 	int user = (uctx != &rdev->uctx);
205 	struct fw_ri_res_wr *res_wr;
206 	struct fw_ri_res *res;
207 	int wr_len;
208 	struct sk_buff *skb;
209 	int ret = 0;
210 	int eqsize;
211 
212 	wq->sq.qid = c4iw_get_qpid(rdev, uctx);
213 	if (!wq->sq.qid)
214 		return -ENOMEM;
215 
216 	if (need_rq) {
217 		wq->rq.qid = c4iw_get_qpid(rdev, uctx);
218 		if (!wq->rq.qid) {
219 			ret = -ENOMEM;
220 			goto free_sq_qid;
221 		}
222 	}
223 
224 	if (!user) {
225 		wq->sq.sw_sq = kcalloc(wq->sq.size, sizeof(*wq->sq.sw_sq),
226 				       GFP_KERNEL);
227 		if (!wq->sq.sw_sq) {
228 			ret = -ENOMEM;
229 			goto free_rq_qid;//FIXME
230 		}
231 
232 		if (need_rq) {
233 			wq->rq.sw_rq = kcalloc(wq->rq.size,
234 					       sizeof(*wq->rq.sw_rq),
235 					       GFP_KERNEL);
236 			if (!wq->rq.sw_rq) {
237 				ret = -ENOMEM;
238 				goto free_sw_sq;
239 			}
240 		}
241 	}
242 
243 	if (need_rq) {
244 		/*
245 		 * RQT must be a power of 2 and at least 16 deep.
246 		 */
247 		wq->rq.rqt_size =
248 			roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
249 		wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
250 		if (!wq->rq.rqt_hwaddr) {
251 			ret = -ENOMEM;
252 			goto free_sw_rq;
253 		}
254 	}
255 
256 	ret = alloc_sq(rdev, &wq->sq, user);
257 	if (ret)
258 		goto free_hwaddr;
259 	memset(wq->sq.queue, 0, wq->sq.memsize);
260 	dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
261 
262 	if (need_rq) {
263 		wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
264 						  wq->rq.memsize,
265 						  &wq->rq.dma_addr,
266 						  GFP_KERNEL);
267 		if (!wq->rq.queue) {
268 			ret = -ENOMEM;
269 			goto free_sq;
270 		}
271 		pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
272 			 wq->sq.queue,
273 			 (unsigned long long)virt_to_phys(wq->sq.queue),
274 			 wq->rq.queue,
275 			 (unsigned long long)virt_to_phys(wq->rq.queue));
276 		memset(wq->rq.queue, 0, wq->rq.memsize);
277 		dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
278 	}
279 
280 	wq->db = rdev->lldi.db_reg;
281 
282 	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
283 					 &wq->sq.bar2_qid,
284 					 user ? &wq->sq.bar2_pa : NULL);
285 	if (need_rq)
286 		wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
287 						 T4_BAR2_QTYPE_EGRESS,
288 						 &wq->rq.bar2_qid,
289 						 user ? &wq->rq.bar2_pa : NULL);
290 
291 	/*
292 	 * User mode must have bar2 access.
293 	 */
294 	if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
295 		pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
296 			pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
297 		goto free_dma;
298 	}
299 
300 	wq->rdev = rdev;
301 	wq->rq.msn = 1;
302 
303 	/* build fw_ri_res_wr */
304 	wr_len = sizeof *res_wr + 2 * sizeof *res;
305 	if (need_rq)
306 		wr_len += sizeof(*res);
307 	skb = alloc_skb(wr_len, GFP_KERNEL);
308 	if (!skb) {
309 		ret = -ENOMEM;
310 		goto free_dma;
311 	}
312 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
313 
314 	res_wr = __skb_put_zero(skb, wr_len);
315 	res_wr->op_nres = cpu_to_be32(
316 			FW_WR_OP_V(FW_RI_RES_WR) |
317 			FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
318 			FW_WR_COMPL_F);
319 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
320 	res_wr->cookie = (uintptr_t)wr_waitp;
321 	res = res_wr->res;
322 	res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
323 	res->u.sqrq.op = FW_RI_RES_OP_WRITE;
324 
325 	/*
326 	 * eqsize is the number of 64B entries plus the status page size.
327 	 */
328 	eqsize = wq->sq.size * T4_SQ_NUM_SLOTS +
329 		rdev->hw_queue.t4_eq_status_entries;
330 
331 	res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
332 		FW_RI_RES_WR_HOSTFCMODE_V(0) |	/* no host cidx updates */
333 		FW_RI_RES_WR_CPRIO_V(0) |	/* don't keep in chip cache */
334 		FW_RI_RES_WR_PCIECHN_V(0) |	/* set by uP at ri_init time */
335 		(t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_ONCHIP_F : 0) |
336 		FW_RI_RES_WR_IQID_V(scq->cqid));
337 	res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
338 		FW_RI_RES_WR_DCAEN_V(0) |
339 		FW_RI_RES_WR_DCACPU_V(0) |
340 		FW_RI_RES_WR_FBMIN_V(2) |
341 		(t4_sq_onchip(&wq->sq) ? FW_RI_RES_WR_FBMAX_V(2) :
342 					 FW_RI_RES_WR_FBMAX_V(3)) |
343 		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
344 		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
345 		FW_RI_RES_WR_EQSIZE_V(eqsize));
346 	res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
347 	res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
348 
349 	if (need_rq) {
350 		res++;
351 		res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
352 		res->u.sqrq.op = FW_RI_RES_OP_WRITE;
353 
354 		/*
355 		 * eqsize is the number of 64B entries plus the status page size
356 		 */
357 		eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
358 			rdev->hw_queue.t4_eq_status_entries;
359 		res->u.sqrq.fetchszm_to_iqid =
360 			/* no host cidx updates */
361 			cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
362 			/* don't keep in chip cache */
363 			FW_RI_RES_WR_CPRIO_V(0) |
364 			/* set by uP at ri_init time */
365 			FW_RI_RES_WR_PCIECHN_V(0) |
366 			FW_RI_RES_WR_IQID_V(rcq->cqid));
367 		res->u.sqrq.dcaen_to_eqsize =
368 			cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
369 			FW_RI_RES_WR_DCACPU_V(0) |
370 			FW_RI_RES_WR_FBMIN_V(2) |
371 			FW_RI_RES_WR_FBMAX_V(3) |
372 			FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
373 			FW_RI_RES_WR_CIDXFTHRESH_V(0) |
374 			FW_RI_RES_WR_EQSIZE_V(eqsize));
375 		res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
376 		res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
377 	}
378 
379 	c4iw_init_wr_wait(wr_waitp);
380 	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
381 	if (ret)
382 		goto free_dma;
383 
384 	pr_debug("sqid 0x%x rqid 0x%x kdb 0x%p sq_bar2_addr %p rq_bar2_addr %p\n",
385 		 wq->sq.qid, wq->rq.qid, wq->db,
386 		 wq->sq.bar2_va, wq->rq.bar2_va);
387 
388 	return 0;
389 free_dma:
390 	if (need_rq)
391 		dma_free_coherent(&rdev->lldi.pdev->dev,
392 				  wq->rq.memsize, wq->rq.queue,
393 				  dma_unmap_addr(&wq->rq, mapping));
394 free_sq:
395 	dealloc_sq(rdev, &wq->sq);
396 free_hwaddr:
397 	if (need_rq)
398 		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
399 free_sw_rq:
400 	if (need_rq)
401 		kfree(wq->rq.sw_rq);
402 free_sw_sq:
403 	kfree(wq->sq.sw_sq);
404 free_rq_qid:
405 	if (need_rq)
406 		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
407 free_sq_qid:
408 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
409 	return ret;
410 }
411 
412 static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
413 		      const struct ib_send_wr *wr, int max, u32 *plenp)
414 {
415 	u8 *dstp, *srcp;
416 	u32 plen = 0;
417 	int i;
418 	int rem, len;
419 
420 	dstp = (u8 *)immdp->data;
421 	for (i = 0; i < wr->num_sge; i++) {
422 		if ((plen + wr->sg_list[i].length) > max)
423 			return -EMSGSIZE;
424 		srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
425 		plen += wr->sg_list[i].length;
426 		rem = wr->sg_list[i].length;
427 		while (rem) {
428 			if (dstp == (u8 *)&sq->queue[sq->size])
429 				dstp = (u8 *)sq->queue;
430 			if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
431 				len = rem;
432 			else
433 				len = (u8 *)&sq->queue[sq->size] - dstp;
434 			memcpy(dstp, srcp, len);
435 			dstp += len;
436 			srcp += len;
437 			rem -= len;
438 		}
439 	}
440 	len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
441 	if (len)
442 		memset(dstp, 0, len);
443 	immdp->op = FW_RI_DATA_IMMD;
444 	immdp->r1 = 0;
445 	immdp->r2 = 0;
446 	immdp->immdlen = cpu_to_be32(plen);
447 	*plenp = plen;
448 	return 0;
449 }
450 
451 static int build_isgl(__be64 *queue_start, __be64 *queue_end,
452 		      struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
453 		      int num_sge, u32 *plenp)
454 
455 {
456 	int i;
457 	u32 plen = 0;
458 	__be64 *flitp;
459 
460 	if ((__be64 *)isglp == queue_end)
461 		isglp = (struct fw_ri_isgl *)queue_start;
462 
463 	flitp = (__be64 *)isglp->sge;
464 
465 	for (i = 0; i < num_sge; i++) {
466 		if ((plen + sg_list[i].length) < plen)
467 			return -EMSGSIZE;
468 		plen += sg_list[i].length;
469 		*flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
470 				     sg_list[i].length);
471 		if (++flitp == queue_end)
472 			flitp = queue_start;
473 		*flitp = cpu_to_be64(sg_list[i].addr);
474 		if (++flitp == queue_end)
475 			flitp = queue_start;
476 	}
477 	*flitp = (__force __be64)0;
478 	isglp->op = FW_RI_DATA_ISGL;
479 	isglp->r1 = 0;
480 	isglp->nsge = cpu_to_be16(num_sge);
481 	isglp->r2 = 0;
482 	if (plenp)
483 		*plenp = plen;
484 	return 0;
485 }
486 
487 static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
488 			   const struct ib_send_wr *wr, u8 *len16)
489 {
490 	u32 plen;
491 	int size;
492 	int ret;
493 
494 	if (wr->num_sge > T4_MAX_SEND_SGE)
495 		return -EINVAL;
496 	switch (wr->opcode) {
497 	case IB_WR_SEND:
498 		if (wr->send_flags & IB_SEND_SOLICITED)
499 			wqe->send.sendop_pkd = cpu_to_be32(
500 				FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE));
501 		else
502 			wqe->send.sendop_pkd = cpu_to_be32(
503 				FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND));
504 		wqe->send.stag_inv = 0;
505 		break;
506 	case IB_WR_SEND_WITH_INV:
507 		if (wr->send_flags & IB_SEND_SOLICITED)
508 			wqe->send.sendop_pkd = cpu_to_be32(
509 				FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_SE_INV));
510 		else
511 			wqe->send.sendop_pkd = cpu_to_be32(
512 				FW_RI_SEND_WR_SENDOP_V(FW_RI_SEND_WITH_INV));
513 		wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
514 		break;
515 
516 	default:
517 		return -EINVAL;
518 	}
519 	wqe->send.r3 = 0;
520 	wqe->send.r4 = 0;
521 
522 	plen = 0;
523 	if (wr->num_sge) {
524 		if (wr->send_flags & IB_SEND_INLINE) {
525 			ret = build_immd(sq, wqe->send.u.immd_src, wr,
526 					 T4_MAX_SEND_INLINE, &plen);
527 			if (ret)
528 				return ret;
529 			size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
530 			       plen;
531 		} else {
532 			ret = build_isgl((__be64 *)sq->queue,
533 					 (__be64 *)&sq->queue[sq->size],
534 					 wqe->send.u.isgl_src,
535 					 wr->sg_list, wr->num_sge, &plen);
536 			if (ret)
537 				return ret;
538 			size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
539 			       wr->num_sge * sizeof(struct fw_ri_sge);
540 		}
541 	} else {
542 		wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
543 		wqe->send.u.immd_src[0].r1 = 0;
544 		wqe->send.u.immd_src[0].r2 = 0;
545 		wqe->send.u.immd_src[0].immdlen = 0;
546 		size = sizeof wqe->send + sizeof(struct fw_ri_immd);
547 		plen = 0;
548 	}
549 	*len16 = DIV_ROUND_UP(size, 16);
550 	wqe->send.plen = cpu_to_be32(plen);
551 	return 0;
552 }
553 
554 static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
555 			    const struct ib_send_wr *wr, u8 *len16)
556 {
557 	u32 plen;
558 	int size;
559 	int ret;
560 
561 	if (wr->num_sge > T4_MAX_SEND_SGE)
562 		return -EINVAL;
563 
564 	/*
565 	 * iWARP protocol supports 64 bit immediate data but rdma api
566 	 * limits it to 32bit.
567 	 */
568 	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
569 		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data;
570 	else
571 		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0;
572 	wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
573 	wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
574 	if (wr->num_sge) {
575 		if (wr->send_flags & IB_SEND_INLINE) {
576 			ret = build_immd(sq, wqe->write.u.immd_src, wr,
577 					 T4_MAX_WRITE_INLINE, &plen);
578 			if (ret)
579 				return ret;
580 			size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
581 			       plen;
582 		} else {
583 			ret = build_isgl((__be64 *)sq->queue,
584 					 (__be64 *)&sq->queue[sq->size],
585 					 wqe->write.u.isgl_src,
586 					 wr->sg_list, wr->num_sge, &plen);
587 			if (ret)
588 				return ret;
589 			size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
590 			       wr->num_sge * sizeof(struct fw_ri_sge);
591 		}
592 	} else {
593 		wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
594 		wqe->write.u.immd_src[0].r1 = 0;
595 		wqe->write.u.immd_src[0].r2 = 0;
596 		wqe->write.u.immd_src[0].immdlen = 0;
597 		size = sizeof wqe->write + sizeof(struct fw_ri_immd);
598 		plen = 0;
599 	}
600 	*len16 = DIV_ROUND_UP(size, 16);
601 	wqe->write.plen = cpu_to_be32(plen);
602 	return 0;
603 }
604 
605 static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp,
606 			    struct ib_send_wr *wr)
607 {
608 	memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16);
609 	memset(immdp->r1, 0, 6);
610 	immdp->op = FW_RI_DATA_IMMD;
611 	immdp->immdlen = 16;
612 }
613 
614 static void build_rdma_write_cmpl(struct t4_sq *sq,
615 				  struct fw_ri_rdma_write_cmpl_wr *wcwr,
616 				  const struct ib_send_wr *wr, u8 *len16)
617 {
618 	u32 plen;
619 	int size;
620 
621 	/*
622 	 * This code assumes the struct fields preceding the write isgl
623 	 * fit in one 64B WR slot.  This is because the WQE is built
624 	 * directly in the dma queue, and wrapping is only handled
625 	 * by the code buildling sgls.  IE the "fixed part" of the wr
626 	 * structs must all fit in 64B.  The WQE build code should probably be
627 	 * redesigned to avoid this restriction, but for now just add
628 	 * the BUILD_BUG_ON() to catch if this WQE struct gets too big.
629 	 */
630 	BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64);
631 
632 	wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
633 	wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
634 	wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
635 	wcwr->r2 = 0;
636 	wcwr->r3 = 0;
637 
638 	/* SEND_INV SGL */
639 	if (wr->next->send_flags & IB_SEND_INLINE)
640 		build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next);
641 	else
642 		build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
643 			   &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL);
644 
645 	/* WRITE SGL */
646 	build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
647 		   wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen);
648 
649 	size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) +
650 		wr->num_sge * sizeof(struct fw_ri_sge);
651 	wcwr->plen = cpu_to_be32(plen);
652 	*len16 = DIV_ROUND_UP(size, 16);
653 }
654 
655 static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
656 			   u8 *len16)
657 {
658 	if (wr->num_sge > 1)
659 		return -EINVAL;
660 	if (wr->num_sge && wr->sg_list[0].length) {
661 		wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
662 		wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
663 							>> 32));
664 		wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr);
665 		wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
666 		wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
667 		wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
668 							 >> 32));
669 		wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr));
670 	} else {
671 		wqe->read.stag_src = cpu_to_be32(2);
672 		wqe->read.to_src_hi = 0;
673 		wqe->read.to_src_lo = 0;
674 		wqe->read.stag_sink = cpu_to_be32(2);
675 		wqe->read.plen = 0;
676 		wqe->read.to_sink_hi = 0;
677 		wqe->read.to_sink_lo = 0;
678 	}
679 	wqe->read.r2 = 0;
680 	wqe->read.r5 = 0;
681 	*len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
682 	return 0;
683 }
684 
685 static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr)
686 {
687 	bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) ||
688 			     qhp->sq_sig_all;
689 	bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
690 			      qhp->sq_sig_all;
691 	struct t4_swsqe *swsqe;
692 	union t4_wr *wqe;
693 	u16 write_wrid;
694 	u8 len16;
695 	u16 idx;
696 
697 	/*
698 	 * The sw_sq entries still look like a WRITE and a SEND and consume
699 	 * 2 slots. The FW WR, however, will be a single uber-WR.
700 	 */
701 	wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
702 	       qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
703 	build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16);
704 
705 	/* WRITE swsqe */
706 	swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
707 	swsqe->opcode = FW_RI_RDMA_WRITE;
708 	swsqe->idx = qhp->wq.sq.pidx;
709 	swsqe->complete = 0;
710 	swsqe->signaled = write_signaled;
711 	swsqe->flushed = 0;
712 	swsqe->wr_id = wr->wr_id;
713 	if (c4iw_wr_log) {
714 		swsqe->sge_ts =
715 			cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
716 		swsqe->host_time = ktime_get();
717 	}
718 
719 	write_wrid = qhp->wq.sq.pidx;
720 
721 	/* just bump the sw_sq */
722 	qhp->wq.sq.in_use++;
723 	if (++qhp->wq.sq.pidx == qhp->wq.sq.size)
724 		qhp->wq.sq.pidx = 0;
725 
726 	/* SEND_WITH_INV swsqe */
727 	swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
728 	swsqe->opcode = FW_RI_SEND_WITH_INV;
729 	swsqe->idx = qhp->wq.sq.pidx;
730 	swsqe->complete = 0;
731 	swsqe->signaled = send_signaled;
732 	swsqe->flushed = 0;
733 	swsqe->wr_id = wr->next->wr_id;
734 	if (c4iw_wr_log) {
735 		swsqe->sge_ts =
736 			cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
737 		swsqe->host_time = ktime_get();
738 	}
739 
740 	wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0;
741 	wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx;
742 
743 	init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR,
744 		    write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16);
745 	t4_sq_produce(&qhp->wq, len16);
746 	idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
747 
748 	t4_ring_sq_db(&qhp->wq, idx, wqe);
749 }
750 
751 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
752 			   const struct ib_recv_wr *wr, u8 *len16)
753 {
754 	int ret;
755 
756 	ret = build_isgl((__be64 *)qhp->wq.rq.queue,
757 			 (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
758 			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
759 	if (ret)
760 		return ret;
761 	*len16 = DIV_ROUND_UP(sizeof wqe->recv +
762 			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
763 	return 0;
764 }
765 
766 static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr,
767 			  u8 *len16)
768 {
769 	int ret;
770 
771 	ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
772 			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
773 	if (ret)
774 		return ret;
775 	*len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
776 			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
777 	return 0;
778 }
779 
780 static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
781 			      const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
782 			      u8 *len16)
783 {
784 	__be64 *p = (__be64 *)fr->pbl;
785 
786 	fr->r2 = cpu_to_be32(0);
787 	fr->stag = cpu_to_be32(mhp->ibmr.rkey);
788 
789 	fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
790 		FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) |
791 		FW_RI_TPTE_STAGSTATE_V(1) |
792 		FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) |
793 		FW_RI_TPTE_PDID_V(mhp->attr.pdid));
794 	fr->tpte.locread_to_qpid = cpu_to_be32(
795 		FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) |
796 		FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) |
797 		FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12));
798 	fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V(
799 		PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3));
800 	fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0);
801 	fr->tpte.len_hi = cpu_to_be32(0);
802 	fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length);
803 	fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
804 	fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
805 
806 	p[0] = cpu_to_be64((u64)mhp->mpl[0]);
807 	p[1] = cpu_to_be64((u64)mhp->mpl[1]);
808 
809 	*len16 = DIV_ROUND_UP(sizeof(*fr), 16);
810 }
811 
812 static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
813 			const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
814 			u8 *len16, bool dsgl_supported)
815 {
816 	struct fw_ri_immd *imdp;
817 	__be64 *p;
818 	int i;
819 	int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
820 	int rem;
821 
822 	if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl))
823 		return -EINVAL;
824 
825 	wqe->fr.qpbinde_to_dcacpu = 0;
826 	wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12;
827 	wqe->fr.addr_type = FW_RI_VA_BASED_TO;
828 	wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access);
829 	wqe->fr.len_hi = 0;
830 	wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
831 	wqe->fr.stag = cpu_to_be32(wr->key);
832 	wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
833 	wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
834 					0xffffffff);
835 
836 	if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) {
837 		struct fw_ri_dsgl *sglp;
838 
839 		for (i = 0; i < mhp->mpl_len; i++)
840 			mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]);
841 
842 		sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
843 		sglp->op = FW_RI_DATA_DSGL;
844 		sglp->r1 = 0;
845 		sglp->nsge = cpu_to_be16(1);
846 		sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
847 		sglp->len0 = cpu_to_be32(pbllen);
848 
849 		*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
850 	} else {
851 		imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
852 		imdp->op = FW_RI_DATA_IMMD;
853 		imdp->r1 = 0;
854 		imdp->r2 = 0;
855 		imdp->immdlen = cpu_to_be32(pbllen);
856 		p = (__be64 *)(imdp + 1);
857 		rem = pbllen;
858 		for (i = 0; i < mhp->mpl_len; i++) {
859 			*p = cpu_to_be64((u64)mhp->mpl[i]);
860 			rem -= sizeof(*p);
861 			if (++p == (__be64 *)&sq->queue[sq->size])
862 				p = (__be64 *)sq->queue;
863 		}
864 		while (rem) {
865 			*p = 0;
866 			rem -= sizeof(*p);
867 			if (++p == (__be64 *)&sq->queue[sq->size])
868 				p = (__be64 *)sq->queue;
869 		}
870 		*len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp)
871 				      + pbllen, 16);
872 	}
873 	return 0;
874 }
875 
876 static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
877 			  u8 *len16)
878 {
879 	wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
880 	wqe->inv.r2 = 0;
881 	*len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
882 	return 0;
883 }
884 
885 static void free_qp_work(struct work_struct *work)
886 {
887 	struct c4iw_ucontext *ucontext;
888 	struct c4iw_qp *qhp;
889 	struct c4iw_dev *rhp;
890 
891 	qhp = container_of(work, struct c4iw_qp, free_work);
892 	ucontext = qhp->ucontext;
893 	rhp = qhp->rhp;
894 
895 	pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
896 	destroy_qp(&rhp->rdev, &qhp->wq,
897 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
898 
899 	if (ucontext)
900 		c4iw_put_ucontext(ucontext);
901 	c4iw_put_wr_wait(qhp->wr_waitp);
902 	kfree(qhp);
903 }
904 
905 static void queue_qp_free(struct kref *kref)
906 {
907 	struct c4iw_qp *qhp;
908 
909 	qhp = container_of(kref, struct c4iw_qp, kref);
910 	pr_debug("qhp %p\n", qhp);
911 	queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
912 }
913 
914 void c4iw_qp_add_ref(struct ib_qp *qp)
915 {
916 	pr_debug("ib_qp %p\n", qp);
917 	kref_get(&to_c4iw_qp(qp)->kref);
918 }
919 
920 void c4iw_qp_rem_ref(struct ib_qp *qp)
921 {
922 	pr_debug("ib_qp %p\n", qp);
923 	kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
924 }
925 
926 static void add_to_fc_list(struct list_head *head, struct list_head *entry)
927 {
928 	if (list_empty(entry))
929 		list_add_tail(entry, head);
930 }
931 
932 static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
933 {
934 	unsigned long flags;
935 
936 	spin_lock_irqsave(&qhp->rhp->lock, flags);
937 	spin_lock(&qhp->lock);
938 	if (qhp->rhp->db_state == NORMAL)
939 		t4_ring_sq_db(&qhp->wq, inc, NULL);
940 	else {
941 		add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
942 		qhp->wq.sq.wq_pidx_inc += inc;
943 	}
944 	spin_unlock(&qhp->lock);
945 	spin_unlock_irqrestore(&qhp->rhp->lock, flags);
946 	return 0;
947 }
948 
949 static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
950 {
951 	unsigned long flags;
952 
953 	spin_lock_irqsave(&qhp->rhp->lock, flags);
954 	spin_lock(&qhp->lock);
955 	if (qhp->rhp->db_state == NORMAL)
956 		t4_ring_rq_db(&qhp->wq, inc, NULL);
957 	else {
958 		add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
959 		qhp->wq.rq.wq_pidx_inc += inc;
960 	}
961 	spin_unlock(&qhp->lock);
962 	spin_unlock_irqrestore(&qhp->rhp->lock, flags);
963 	return 0;
964 }
965 
966 static int ib_to_fw_opcode(int ib_opcode)
967 {
968 	int opcode;
969 
970 	switch (ib_opcode) {
971 	case IB_WR_SEND_WITH_INV:
972 		opcode = FW_RI_SEND_WITH_INV;
973 		break;
974 	case IB_WR_SEND:
975 		opcode = FW_RI_SEND;
976 		break;
977 	case IB_WR_RDMA_WRITE:
978 		opcode = FW_RI_RDMA_WRITE;
979 		break;
980 	case IB_WR_RDMA_WRITE_WITH_IMM:
981 		opcode = FW_RI_WRITE_IMMEDIATE;
982 		break;
983 	case IB_WR_RDMA_READ:
984 	case IB_WR_RDMA_READ_WITH_INV:
985 		opcode = FW_RI_READ_REQ;
986 		break;
987 	case IB_WR_REG_MR:
988 		opcode = FW_RI_FAST_REGISTER;
989 		break;
990 	case IB_WR_LOCAL_INV:
991 		opcode = FW_RI_LOCAL_INV;
992 		break;
993 	default:
994 		opcode = -EINVAL;
995 	}
996 	return opcode;
997 }
998 
999 static int complete_sq_drain_wr(struct c4iw_qp *qhp,
1000 				const struct ib_send_wr *wr)
1001 {
1002 	struct t4_cqe cqe = {};
1003 	struct c4iw_cq *schp;
1004 	unsigned long flag;
1005 	struct t4_cq *cq;
1006 	int opcode;
1007 
1008 	schp = to_c4iw_cq(qhp->ibqp.send_cq);
1009 	cq = &schp->cq;
1010 
1011 	opcode = ib_to_fw_opcode(wr->opcode);
1012 	if (opcode < 0)
1013 		return opcode;
1014 
1015 	cqe.u.drain_cookie = wr->wr_id;
1016 	cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
1017 				 CQE_OPCODE_V(opcode) |
1018 				 CQE_TYPE_V(1) |
1019 				 CQE_SWCQE_V(1) |
1020 				 CQE_DRAIN_V(1) |
1021 				 CQE_QPID_V(qhp->wq.sq.qid));
1022 
1023 	spin_lock_irqsave(&schp->lock, flag);
1024 	cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
1025 	cq->sw_queue[cq->sw_pidx] = cqe;
1026 	t4_swcq_produce(cq);
1027 	spin_unlock_irqrestore(&schp->lock, flag);
1028 
1029 	if (t4_clear_cq_armed(&schp->cq)) {
1030 		spin_lock_irqsave(&schp->comp_handler_lock, flag);
1031 		(*schp->ibcq.comp_handler)(&schp->ibcq,
1032 					   schp->ibcq.cq_context);
1033 		spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1034 	}
1035 	return 0;
1036 }
1037 
1038 static int complete_sq_drain_wrs(struct c4iw_qp *qhp,
1039 				 const struct ib_send_wr *wr,
1040 				 const struct ib_send_wr **bad_wr)
1041 {
1042 	int ret = 0;
1043 
1044 	while (wr) {
1045 		ret = complete_sq_drain_wr(qhp, wr);
1046 		if (ret) {
1047 			*bad_wr = wr;
1048 			break;
1049 		}
1050 		wr = wr->next;
1051 	}
1052 	return ret;
1053 }
1054 
1055 static void complete_rq_drain_wr(struct c4iw_qp *qhp,
1056 				 const struct ib_recv_wr *wr)
1057 {
1058 	struct t4_cqe cqe = {};
1059 	struct c4iw_cq *rchp;
1060 	unsigned long flag;
1061 	struct t4_cq *cq;
1062 
1063 	rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
1064 	cq = &rchp->cq;
1065 
1066 	cqe.u.drain_cookie = wr->wr_id;
1067 	cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
1068 				 CQE_OPCODE_V(FW_RI_SEND) |
1069 				 CQE_TYPE_V(0) |
1070 				 CQE_SWCQE_V(1) |
1071 				 CQE_DRAIN_V(1) |
1072 				 CQE_QPID_V(qhp->wq.sq.qid));
1073 
1074 	spin_lock_irqsave(&rchp->lock, flag);
1075 	cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
1076 	cq->sw_queue[cq->sw_pidx] = cqe;
1077 	t4_swcq_produce(cq);
1078 	spin_unlock_irqrestore(&rchp->lock, flag);
1079 
1080 	if (t4_clear_cq_armed(&rchp->cq)) {
1081 		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1082 		(*rchp->ibcq.comp_handler)(&rchp->ibcq,
1083 					   rchp->ibcq.cq_context);
1084 		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1085 	}
1086 }
1087 
1088 static void complete_rq_drain_wrs(struct c4iw_qp *qhp,
1089 				  const struct ib_recv_wr *wr)
1090 {
1091 	while (wr) {
1092 		complete_rq_drain_wr(qhp, wr);
1093 		wr = wr->next;
1094 	}
1095 }
1096 
1097 int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
1098 		   const struct ib_send_wr **bad_wr)
1099 {
1100 	int err = 0;
1101 	u8 len16 = 0;
1102 	enum fw_wr_opcodes fw_opcode = 0;
1103 	enum fw_ri_wr_flags fw_flags;
1104 	struct c4iw_qp *qhp;
1105 	struct c4iw_dev *rhp;
1106 	union t4_wr *wqe = NULL;
1107 	u32 num_wrs;
1108 	struct t4_swsqe *swsqe;
1109 	unsigned long flag;
1110 	u16 idx = 0;
1111 
1112 	qhp = to_c4iw_qp(ibqp);
1113 	rhp = qhp->rhp;
1114 	spin_lock_irqsave(&qhp->lock, flag);
1115 
1116 	/*
1117 	 * If the qp has been flushed, then just insert a special
1118 	 * drain cqe.
1119 	 */
1120 	if (qhp->wq.flushed) {
1121 		spin_unlock_irqrestore(&qhp->lock, flag);
1122 		err = complete_sq_drain_wrs(qhp, wr, bad_wr);
1123 		return err;
1124 	}
1125 	num_wrs = t4_sq_avail(&qhp->wq);
1126 	if (num_wrs == 0) {
1127 		spin_unlock_irqrestore(&qhp->lock, flag);
1128 		*bad_wr = wr;
1129 		return -ENOMEM;
1130 	}
1131 
1132 	/*
1133 	 * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
1134 	 * the response for small NVMEe-oF READ requests.  If the chain is
1135 	 * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths
1136 	 * meet the requirements of the fw_ri_write_cmpl_wr work request,
1137 	 * then build and post the write_cmpl WR.  If any of the tests
1138 	 * below are not true, then we continue on with the tradtional WRITE
1139 	 * and SEND WRs.
1140 	 */
1141 	if (qhp->rhp->rdev.lldi.write_cmpl_support &&
1142 	    CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >=
1143 	    CHELSIO_T5 &&
1144 	    wr && wr->next && !wr->next->next &&
1145 	    wr->opcode == IB_WR_RDMA_WRITE &&
1146 	    wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
1147 	    wr->next->opcode == IB_WR_SEND_WITH_INV &&
1148 	    wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
1149 	    wr->next->num_sge == 1 && num_wrs >= 2) {
1150 		post_write_cmpl(qhp, wr);
1151 		spin_unlock_irqrestore(&qhp->lock, flag);
1152 		return 0;
1153 	}
1154 
1155 	while (wr) {
1156 		if (num_wrs == 0) {
1157 			err = -ENOMEM;
1158 			*bad_wr = wr;
1159 			break;
1160 		}
1161 		wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
1162 		      qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
1163 
1164 		fw_flags = 0;
1165 		if (wr->send_flags & IB_SEND_SOLICITED)
1166 			fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
1167 		if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all)
1168 			fw_flags |= FW_RI_COMPLETION_FLAG;
1169 		swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
1170 		switch (wr->opcode) {
1171 		case IB_WR_SEND_WITH_INV:
1172 		case IB_WR_SEND:
1173 			if (wr->send_flags & IB_SEND_FENCE)
1174 				fw_flags |= FW_RI_READ_FENCE_FLAG;
1175 			fw_opcode = FW_RI_SEND_WR;
1176 			if (wr->opcode == IB_WR_SEND)
1177 				swsqe->opcode = FW_RI_SEND;
1178 			else
1179 				swsqe->opcode = FW_RI_SEND_WITH_INV;
1180 			err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
1181 			break;
1182 		case IB_WR_RDMA_WRITE_WITH_IMM:
1183 			if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) {
1184 				err = -EINVAL;
1185 				break;
1186 			}
1187 			fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
1188 			/*FALLTHROUGH*/
1189 		case IB_WR_RDMA_WRITE:
1190 			fw_opcode = FW_RI_RDMA_WRITE_WR;
1191 			swsqe->opcode = FW_RI_RDMA_WRITE;
1192 			err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
1193 			break;
1194 		case IB_WR_RDMA_READ:
1195 		case IB_WR_RDMA_READ_WITH_INV:
1196 			fw_opcode = FW_RI_RDMA_READ_WR;
1197 			swsqe->opcode = FW_RI_READ_REQ;
1198 			if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
1199 				c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey);
1200 				fw_flags = FW_RI_RDMA_READ_INVALIDATE;
1201 			} else {
1202 				fw_flags = 0;
1203 			}
1204 			err = build_rdma_read(wqe, wr, &len16);
1205 			if (err)
1206 				break;
1207 			swsqe->read_len = wr->sg_list[0].length;
1208 			if (!qhp->wq.sq.oldest_read)
1209 				qhp->wq.sq.oldest_read = swsqe;
1210 			break;
1211 		case IB_WR_REG_MR: {
1212 			struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
1213 
1214 			swsqe->opcode = FW_RI_FAST_REGISTER;
1215 			if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
1216 			    !mhp->attr.state && mhp->mpl_len <= 2) {
1217 				fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
1218 				build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
1219 						  mhp, &len16);
1220 			} else {
1221 				fw_opcode = FW_RI_FR_NSMR_WR;
1222 				err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
1223 				       mhp, &len16,
1224 				       rhp->rdev.lldi.ulptx_memwrite_dsgl);
1225 				if (err)
1226 					break;
1227 			}
1228 			mhp->attr.state = 1;
1229 			break;
1230 		}
1231 		case IB_WR_LOCAL_INV:
1232 			if (wr->send_flags & IB_SEND_FENCE)
1233 				fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
1234 			fw_opcode = FW_RI_INV_LSTAG_WR;
1235 			swsqe->opcode = FW_RI_LOCAL_INV;
1236 			err = build_inv_stag(wqe, wr, &len16);
1237 			c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey);
1238 			break;
1239 		default:
1240 			pr_warn("%s post of type=%d TBD!\n", __func__,
1241 				wr->opcode);
1242 			err = -EINVAL;
1243 		}
1244 		if (err) {
1245 			*bad_wr = wr;
1246 			break;
1247 		}
1248 		swsqe->idx = qhp->wq.sq.pidx;
1249 		swsqe->complete = 0;
1250 		swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
1251 				  qhp->sq_sig_all;
1252 		swsqe->flushed = 0;
1253 		swsqe->wr_id = wr->wr_id;
1254 		if (c4iw_wr_log) {
1255 			swsqe->sge_ts = cxgb4_read_sge_timestamp(
1256 					rhp->rdev.lldi.ports[0]);
1257 			swsqe->host_time = ktime_get();
1258 		}
1259 
1260 		init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
1261 
1262 		pr_debug("cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u\n",
1263 			 (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
1264 			 swsqe->opcode, swsqe->read_len);
1265 		wr = wr->next;
1266 		num_wrs--;
1267 		t4_sq_produce(&qhp->wq, len16);
1268 		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
1269 	}
1270 	if (!rhp->rdev.status_page->db_off) {
1271 		t4_ring_sq_db(&qhp->wq, idx, wqe);
1272 		spin_unlock_irqrestore(&qhp->lock, flag);
1273 	} else {
1274 		spin_unlock_irqrestore(&qhp->lock, flag);
1275 		ring_kernel_sq_db(qhp, idx);
1276 	}
1277 	return err;
1278 }
1279 
1280 int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
1281 		      const struct ib_recv_wr **bad_wr)
1282 {
1283 	int err = 0;
1284 	struct c4iw_qp *qhp;
1285 	union t4_recv_wr *wqe = NULL;
1286 	u32 num_wrs;
1287 	u8 len16 = 0;
1288 	unsigned long flag;
1289 	u16 idx = 0;
1290 
1291 	qhp = to_c4iw_qp(ibqp);
1292 	spin_lock_irqsave(&qhp->lock, flag);
1293 
1294 	/*
1295 	 * If the qp has been flushed, then just insert a special
1296 	 * drain cqe.
1297 	 */
1298 	if (qhp->wq.flushed) {
1299 		spin_unlock_irqrestore(&qhp->lock, flag);
1300 		complete_rq_drain_wrs(qhp, wr);
1301 		return err;
1302 	}
1303 	num_wrs = t4_rq_avail(&qhp->wq);
1304 	if (num_wrs == 0) {
1305 		spin_unlock_irqrestore(&qhp->lock, flag);
1306 		*bad_wr = wr;
1307 		return -ENOMEM;
1308 	}
1309 	while (wr) {
1310 		if (wr->num_sge > T4_MAX_RECV_SGE) {
1311 			err = -EINVAL;
1312 			*bad_wr = wr;
1313 			break;
1314 		}
1315 		wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
1316 					   qhp->wq.rq.wq_pidx *
1317 					   T4_EQ_ENTRY_SIZE);
1318 		if (num_wrs)
1319 			err = build_rdma_recv(qhp, wqe, wr, &len16);
1320 		else
1321 			err = -ENOMEM;
1322 		if (err) {
1323 			*bad_wr = wr;
1324 			break;
1325 		}
1326 
1327 		qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
1328 		if (c4iw_wr_log) {
1329 			qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].sge_ts =
1330 				cxgb4_read_sge_timestamp(
1331 						qhp->rhp->rdev.lldi.ports[0]);
1332 			qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].host_time =
1333 				ktime_get();
1334 		}
1335 
1336 		wqe->recv.opcode = FW_RI_RECV_WR;
1337 		wqe->recv.r1 = 0;
1338 		wqe->recv.wrid = qhp->wq.rq.pidx;
1339 		wqe->recv.r2[0] = 0;
1340 		wqe->recv.r2[1] = 0;
1341 		wqe->recv.r2[2] = 0;
1342 		wqe->recv.len16 = len16;
1343 		pr_debug("cookie 0x%llx pidx %u\n",
1344 			 (unsigned long long)wr->wr_id, qhp->wq.rq.pidx);
1345 		t4_rq_produce(&qhp->wq, len16);
1346 		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
1347 		wr = wr->next;
1348 		num_wrs--;
1349 	}
1350 	if (!qhp->rhp->rdev.status_page->db_off) {
1351 		t4_ring_rq_db(&qhp->wq, idx, wqe);
1352 		spin_unlock_irqrestore(&qhp->lock, flag);
1353 	} else {
1354 		spin_unlock_irqrestore(&qhp->lock, flag);
1355 		ring_kernel_rq_db(qhp, idx);
1356 	}
1357 	return err;
1358 }
1359 
1360 static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
1361 			 u64 wr_id, u8 len16)
1362 {
1363 	struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
1364 
1365 	pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
1366 		 __func__, srq->cidx, srq->pidx, srq->wq_pidx,
1367 		 srq->in_use, srq->ooo_count,
1368 		 (unsigned long long)wr_id, srq->pending_cidx,
1369 		 srq->pending_pidx, srq->pending_in_use);
1370 	pwr->wr_id = wr_id;
1371 	pwr->len16 = len16;
1372 	memcpy(&pwr->wqe, wqe, len16 * 16);
1373 	t4_srq_produce_pending_wr(srq);
1374 }
1375 
1376 int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
1377 		       const struct ib_recv_wr **bad_wr)
1378 {
1379 	union t4_recv_wr *wqe, lwqe;
1380 	struct c4iw_srq *srq;
1381 	unsigned long flag;
1382 	u8 len16 = 0;
1383 	u16 idx = 0;
1384 	int err = 0;
1385 	u32 num_wrs;
1386 
1387 	srq = to_c4iw_srq(ibsrq);
1388 	spin_lock_irqsave(&srq->lock, flag);
1389 	num_wrs = t4_srq_avail(&srq->wq);
1390 	if (num_wrs == 0) {
1391 		spin_unlock_irqrestore(&srq->lock, flag);
1392 		return -ENOMEM;
1393 	}
1394 	while (wr) {
1395 		if (wr->num_sge > T4_MAX_RECV_SGE) {
1396 			err = -EINVAL;
1397 			*bad_wr = wr;
1398 			break;
1399 		}
1400 		wqe = &lwqe;
1401 		if (num_wrs)
1402 			err = build_srq_recv(wqe, wr, &len16);
1403 		else
1404 			err = -ENOMEM;
1405 		if (err) {
1406 			*bad_wr = wr;
1407 			break;
1408 		}
1409 
1410 		wqe->recv.opcode = FW_RI_RECV_WR;
1411 		wqe->recv.r1 = 0;
1412 		wqe->recv.wrid = srq->wq.pidx;
1413 		wqe->recv.r2[0] = 0;
1414 		wqe->recv.r2[1] = 0;
1415 		wqe->recv.r2[2] = 0;
1416 		wqe->recv.len16 = len16;
1417 
1418 		if (srq->wq.ooo_count ||
1419 		    srq->wq.pending_in_use ||
1420 		    srq->wq.sw_rq[srq->wq.pidx].valid) {
1421 			defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
1422 		} else {
1423 			srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
1424 			srq->wq.sw_rq[srq->wq.pidx].valid = 1;
1425 			c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
1426 			pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
1427 				 __func__, srq->wq.cidx,
1428 				 srq->wq.pidx, srq->wq.wq_pidx,
1429 				 srq->wq.in_use,
1430 				 (unsigned long long)wr->wr_id);
1431 			t4_srq_produce(&srq->wq, len16);
1432 			idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
1433 		}
1434 		wr = wr->next;
1435 		num_wrs--;
1436 	}
1437 	if (idx)
1438 		t4_ring_srq_db(&srq->wq, idx, len16, wqe);
1439 	spin_unlock_irqrestore(&srq->lock, flag);
1440 	return err;
1441 }
1442 
1443 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
1444 				    u8 *ecode)
1445 {
1446 	int status;
1447 	int tagged;
1448 	int opcode;
1449 	int rqtype;
1450 	int send_inv;
1451 
1452 	if (!err_cqe) {
1453 		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
1454 		*ecode = 0;
1455 		return;
1456 	}
1457 
1458 	status = CQE_STATUS(err_cqe);
1459 	opcode = CQE_OPCODE(err_cqe);
1460 	rqtype = RQ_TYPE(err_cqe);
1461 	send_inv = (opcode == FW_RI_SEND_WITH_INV) ||
1462 		   (opcode == FW_RI_SEND_WITH_SE_INV);
1463 	tagged = (opcode == FW_RI_RDMA_WRITE) ||
1464 		 (rqtype && (opcode == FW_RI_READ_RESP));
1465 
1466 	switch (status) {
1467 	case T4_ERR_STAG:
1468 		if (send_inv) {
1469 			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1470 			*ecode = RDMAP_CANT_INV_STAG;
1471 		} else {
1472 			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1473 			*ecode = RDMAP_INV_STAG;
1474 		}
1475 		break;
1476 	case T4_ERR_PDID:
1477 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1478 		if ((opcode == FW_RI_SEND_WITH_INV) ||
1479 		    (opcode == FW_RI_SEND_WITH_SE_INV))
1480 			*ecode = RDMAP_CANT_INV_STAG;
1481 		else
1482 			*ecode = RDMAP_STAG_NOT_ASSOC;
1483 		break;
1484 	case T4_ERR_QPID:
1485 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1486 		*ecode = RDMAP_STAG_NOT_ASSOC;
1487 		break;
1488 	case T4_ERR_ACCESS:
1489 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1490 		*ecode = RDMAP_ACC_VIOL;
1491 		break;
1492 	case T4_ERR_WRAP:
1493 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1494 		*ecode = RDMAP_TO_WRAP;
1495 		break;
1496 	case T4_ERR_BOUND:
1497 		if (tagged) {
1498 			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1499 			*ecode = DDPT_BASE_BOUNDS;
1500 		} else {
1501 			*layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
1502 			*ecode = RDMAP_BASE_BOUNDS;
1503 		}
1504 		break;
1505 	case T4_ERR_INVALIDATE_SHARED_MR:
1506 	case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
1507 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1508 		*ecode = RDMAP_CANT_INV_STAG;
1509 		break;
1510 	case T4_ERR_ECC:
1511 	case T4_ERR_ECC_PSTAG:
1512 	case T4_ERR_INTERNAL_ERR:
1513 		*layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
1514 		*ecode = 0;
1515 		break;
1516 	case T4_ERR_OUT_OF_RQE:
1517 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1518 		*ecode = DDPU_INV_MSN_NOBUF;
1519 		break;
1520 	case T4_ERR_PBL_ADDR_BOUND:
1521 		*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1522 		*ecode = DDPT_BASE_BOUNDS;
1523 		break;
1524 	case T4_ERR_CRC:
1525 		*layer_type = LAYER_MPA|DDP_LLP;
1526 		*ecode = MPA_CRC_ERR;
1527 		break;
1528 	case T4_ERR_MARKER:
1529 		*layer_type = LAYER_MPA|DDP_LLP;
1530 		*ecode = MPA_MARKER_ERR;
1531 		break;
1532 	case T4_ERR_PDU_LEN_ERR:
1533 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1534 		*ecode = DDPU_MSG_TOOBIG;
1535 		break;
1536 	case T4_ERR_DDP_VERSION:
1537 		if (tagged) {
1538 			*layer_type = LAYER_DDP|DDP_TAGGED_ERR;
1539 			*ecode = DDPT_INV_VERS;
1540 		} else {
1541 			*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1542 			*ecode = DDPU_INV_VERS;
1543 		}
1544 		break;
1545 	case T4_ERR_RDMA_VERSION:
1546 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1547 		*ecode = RDMAP_INV_VERS;
1548 		break;
1549 	case T4_ERR_OPCODE:
1550 		*layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
1551 		*ecode = RDMAP_INV_OPCODE;
1552 		break;
1553 	case T4_ERR_DDP_QUEUE_NUM:
1554 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1555 		*ecode = DDPU_INV_QN;
1556 		break;
1557 	case T4_ERR_MSN:
1558 	case T4_ERR_MSN_GAP:
1559 	case T4_ERR_MSN_RANGE:
1560 	case T4_ERR_IRD_OVERFLOW:
1561 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1562 		*ecode = DDPU_INV_MSN_RANGE;
1563 		break;
1564 	case T4_ERR_TBIT:
1565 		*layer_type = LAYER_DDP|DDP_LOCAL_CATA;
1566 		*ecode = 0;
1567 		break;
1568 	case T4_ERR_MO:
1569 		*layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
1570 		*ecode = DDPU_INV_MO;
1571 		break;
1572 	default:
1573 		*layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
1574 		*ecode = 0;
1575 		break;
1576 	}
1577 }
1578 
1579 static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
1580 			   gfp_t gfp)
1581 {
1582 	struct fw_ri_wr *wqe;
1583 	struct sk_buff *skb;
1584 	struct terminate_message *term;
1585 
1586 	pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid,
1587 		 qhp->ep->hwtid);
1588 
1589 	skb = skb_dequeue(&qhp->ep->com.ep_skb_list);
1590 	if (WARN_ON(!skb))
1591 		return;
1592 
1593 	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
1594 
1595 	wqe = __skb_put_zero(skb, sizeof(*wqe));
1596 	wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
1597 	wqe->flowid_len16 = cpu_to_be32(
1598 		FW_WR_FLOWID_V(qhp->ep->hwtid) |
1599 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1600 
1601 	wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
1602 	wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
1603 	term = (struct terminate_message *)wqe->u.terminate.termmsg;
1604 	if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
1605 		term->layer_etype = qhp->attr.layer_etype;
1606 		term->ecode = qhp->attr.ecode;
1607 	} else
1608 		build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
1609 	c4iw_ofld_send(&qhp->rhp->rdev, skb);
1610 }
1611 
1612 /*
1613  * Assumes qhp lock is held.
1614  */
1615 static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1616 		       struct c4iw_cq *schp)
1617 {
1618 	int count;
1619 	int rq_flushed = 0, sq_flushed;
1620 	unsigned long flag;
1621 
1622 	pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
1623 
1624 	/* locking hierarchy: cqs lock first, then qp lock. */
1625 	spin_lock_irqsave(&rchp->lock, flag);
1626 	if (schp != rchp)
1627 		spin_lock(&schp->lock);
1628 	spin_lock(&qhp->lock);
1629 
1630 	if (qhp->wq.flushed) {
1631 		spin_unlock(&qhp->lock);
1632 		if (schp != rchp)
1633 			spin_unlock(&schp->lock);
1634 		spin_unlock_irqrestore(&rchp->lock, flag);
1635 		return;
1636 	}
1637 	qhp->wq.flushed = 1;
1638 	t4_set_wq_in_error(&qhp->wq, 0);
1639 
1640 	c4iw_flush_hw_cq(rchp, qhp);
1641 	if (!qhp->srq) {
1642 		c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
1643 		rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
1644 	}
1645 
1646 	if (schp != rchp)
1647 		c4iw_flush_hw_cq(schp, qhp);
1648 	sq_flushed = c4iw_flush_sq(qhp);
1649 
1650 	spin_unlock(&qhp->lock);
1651 	if (schp != rchp)
1652 		spin_unlock(&schp->lock);
1653 	spin_unlock_irqrestore(&rchp->lock, flag);
1654 
1655 	if (schp == rchp) {
1656 		if ((rq_flushed || sq_flushed) &&
1657 		    t4_clear_cq_armed(&rchp->cq)) {
1658 			spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1659 			(*rchp->ibcq.comp_handler)(&rchp->ibcq,
1660 						   rchp->ibcq.cq_context);
1661 			spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1662 		}
1663 	} else {
1664 		if (rq_flushed && t4_clear_cq_armed(&rchp->cq)) {
1665 			spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1666 			(*rchp->ibcq.comp_handler)(&rchp->ibcq,
1667 						   rchp->ibcq.cq_context);
1668 			spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1669 		}
1670 		if (sq_flushed && t4_clear_cq_armed(&schp->cq)) {
1671 			spin_lock_irqsave(&schp->comp_handler_lock, flag);
1672 			(*schp->ibcq.comp_handler)(&schp->ibcq,
1673 						   schp->ibcq.cq_context);
1674 			spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1675 		}
1676 	}
1677 }
1678 
1679 static void flush_qp(struct c4iw_qp *qhp)
1680 {
1681 	struct c4iw_cq *rchp, *schp;
1682 	unsigned long flag;
1683 
1684 	rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
1685 	schp = to_c4iw_cq(qhp->ibqp.send_cq);
1686 
1687 	if (qhp->ibqp.uobject) {
1688 		t4_set_wq_in_error(&qhp->wq, 0);
1689 		t4_set_cq_in_error(&rchp->cq);
1690 		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1691 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
1692 		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1693 		if (schp != rchp) {
1694 			t4_set_cq_in_error(&schp->cq);
1695 			spin_lock_irqsave(&schp->comp_handler_lock, flag);
1696 			(*schp->ibcq.comp_handler)(&schp->ibcq,
1697 					schp->ibcq.cq_context);
1698 			spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1699 		}
1700 		return;
1701 	}
1702 	__flush_qp(qhp, rchp, schp);
1703 }
1704 
1705 static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1706 		     struct c4iw_ep *ep)
1707 {
1708 	struct fw_ri_wr *wqe;
1709 	int ret;
1710 	struct sk_buff *skb;
1711 
1712 	pr_debug("qhp %p qid 0x%x tid %u\n", qhp, qhp->wq.sq.qid, ep->hwtid);
1713 
1714 	skb = skb_dequeue(&ep->com.ep_skb_list);
1715 	if (WARN_ON(!skb))
1716 		return -ENOMEM;
1717 
1718 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1719 
1720 	wqe = __skb_put_zero(skb, sizeof(*wqe));
1721 	wqe->op_compl = cpu_to_be32(
1722 		FW_WR_OP_V(FW_RI_INIT_WR) |
1723 		FW_WR_COMPL_F);
1724 	wqe->flowid_len16 = cpu_to_be32(
1725 		FW_WR_FLOWID_V(ep->hwtid) |
1726 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1727 	wqe->cookie = (uintptr_t)ep->com.wr_waitp;
1728 
1729 	wqe->u.fini.type = FW_RI_TYPE_FINI;
1730 
1731 	ret = c4iw_ref_send_wait(&rhp->rdev, skb, ep->com.wr_waitp,
1732 				 qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
1733 
1734 	pr_debug("ret %d\n", ret);
1735 	return ret;
1736 }
1737 
1738 static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
1739 {
1740 	pr_debug("p2p_type = %d\n", p2p_type);
1741 	memset(&init->u, 0, sizeof init->u);
1742 	switch (p2p_type) {
1743 	case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
1744 		init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
1745 		init->u.write.stag_sink = cpu_to_be32(1);
1746 		init->u.write.to_sink = cpu_to_be64(1);
1747 		init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
1748 		init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
1749 						   sizeof(struct fw_ri_immd),
1750 						   16);
1751 		break;
1752 	case FW_RI_INIT_P2PTYPE_READ_REQ:
1753 		init->u.write.opcode = FW_RI_RDMA_READ_WR;
1754 		init->u.read.stag_src = cpu_to_be32(1);
1755 		init->u.read.to_src_lo = cpu_to_be32(1);
1756 		init->u.read.stag_sink = cpu_to_be32(1);
1757 		init->u.read.to_sink_lo = cpu_to_be32(1);
1758 		init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
1759 		break;
1760 	}
1761 }
1762 
1763 static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
1764 {
1765 	struct fw_ri_wr *wqe;
1766 	int ret;
1767 	struct sk_buff *skb;
1768 
1769 	pr_debug("qhp %p qid 0x%x tid %u ird %u ord %u\n", qhp,
1770 		 qhp->wq.sq.qid, qhp->ep->hwtid, qhp->ep->ird, qhp->ep->ord);
1771 
1772 	skb = alloc_skb(sizeof *wqe, GFP_KERNEL);
1773 	if (!skb) {
1774 		ret = -ENOMEM;
1775 		goto out;
1776 	}
1777 	ret = alloc_ird(rhp, qhp->attr.max_ird);
1778 	if (ret) {
1779 		qhp->attr.max_ird = 0;
1780 		kfree_skb(skb);
1781 		goto out;
1782 	}
1783 	set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
1784 
1785 	wqe = __skb_put_zero(skb, sizeof(*wqe));
1786 	wqe->op_compl = cpu_to_be32(
1787 		FW_WR_OP_V(FW_RI_INIT_WR) |
1788 		FW_WR_COMPL_F);
1789 	wqe->flowid_len16 = cpu_to_be32(
1790 		FW_WR_FLOWID_V(qhp->ep->hwtid) |
1791 		FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*wqe), 16)));
1792 
1793 	wqe->cookie = (uintptr_t)qhp->ep->com.wr_waitp;
1794 
1795 	wqe->u.init.type = FW_RI_TYPE_INIT;
1796 	wqe->u.init.mpareqbit_p2ptype =
1797 		FW_RI_WR_MPAREQBIT_V(qhp->attr.mpa_attr.initiator) |
1798 		FW_RI_WR_P2PTYPE_V(qhp->attr.mpa_attr.p2p_type);
1799 	wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE;
1800 	if (qhp->attr.mpa_attr.recv_marker_enabled)
1801 		wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE;
1802 	if (qhp->attr.mpa_attr.xmit_marker_enabled)
1803 		wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE;
1804 	if (qhp->attr.mpa_attr.crc_enabled)
1805 		wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE;
1806 
1807 	wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE |
1808 			    FW_RI_QP_RDMA_WRITE_ENABLE |
1809 			    FW_RI_QP_BIND_ENABLE;
1810 	if (!qhp->ibqp.uobject)
1811 		wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE |
1812 				     FW_RI_QP_STAG0_ENABLE;
1813 	wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq));
1814 	wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
1815 	wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
1816 	wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
1817 	if (qhp->srq) {
1818 		wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
1819 						  qhp->srq->idx);
1820 	} else {
1821 		wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
1822 		wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
1823 		wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
1824 						   rhp->rdev.lldi.vr->rq.start);
1825 	}
1826 	wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
1827 	wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
1828 	wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
1829 	wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
1830 	wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
1831 	wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
1832 	if (qhp->attr.mpa_attr.initiator)
1833 		build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
1834 
1835 	ret = c4iw_ref_send_wait(&rhp->rdev, skb, qhp->ep->com.wr_waitp,
1836 				 qhp->ep->hwtid, qhp->wq.sq.qid, __func__);
1837 	if (!ret)
1838 		goto out;
1839 
1840 	free_ird(rhp, qhp->attr.max_ird);
1841 out:
1842 	pr_debug("ret %d\n", ret);
1843 	return ret;
1844 }
1845 
1846 int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1847 		   enum c4iw_qp_attr_mask mask,
1848 		   struct c4iw_qp_attributes *attrs,
1849 		   int internal)
1850 {
1851 	int ret = 0;
1852 	struct c4iw_qp_attributes newattr = qhp->attr;
1853 	int disconnect = 0;
1854 	int terminate = 0;
1855 	int abort = 0;
1856 	int free = 0;
1857 	struct c4iw_ep *ep = NULL;
1858 
1859 	pr_debug("qhp %p sqid 0x%x rqid 0x%x ep %p state %d -> %d\n",
1860 		 qhp, qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep, qhp->attr.state,
1861 		 (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
1862 
1863 	mutex_lock(&qhp->mutex);
1864 
1865 	/* Process attr changes if in IDLE */
1866 	if (mask & C4IW_QP_ATTR_VALID_MODIFY) {
1867 		if (qhp->attr.state != C4IW_QP_STATE_IDLE) {
1868 			ret = -EIO;
1869 			goto out;
1870 		}
1871 		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ)
1872 			newattr.enable_rdma_read = attrs->enable_rdma_read;
1873 		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE)
1874 			newattr.enable_rdma_write = attrs->enable_rdma_write;
1875 		if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND)
1876 			newattr.enable_bind = attrs->enable_bind;
1877 		if (mask & C4IW_QP_ATTR_MAX_ORD) {
1878 			if (attrs->max_ord > c4iw_max_read_depth) {
1879 				ret = -EINVAL;
1880 				goto out;
1881 			}
1882 			newattr.max_ord = attrs->max_ord;
1883 		}
1884 		if (mask & C4IW_QP_ATTR_MAX_IRD) {
1885 			if (attrs->max_ird > cur_max_read_depth(rhp)) {
1886 				ret = -EINVAL;
1887 				goto out;
1888 			}
1889 			newattr.max_ird = attrs->max_ird;
1890 		}
1891 		qhp->attr = newattr;
1892 	}
1893 
1894 	if (mask & C4IW_QP_ATTR_SQ_DB) {
1895 		ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc);
1896 		goto out;
1897 	}
1898 	if (mask & C4IW_QP_ATTR_RQ_DB) {
1899 		ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc);
1900 		goto out;
1901 	}
1902 
1903 	if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
1904 		goto out;
1905 	if (qhp->attr.state == attrs->next_state)
1906 		goto out;
1907 
1908 	switch (qhp->attr.state) {
1909 	case C4IW_QP_STATE_IDLE:
1910 		switch (attrs->next_state) {
1911 		case C4IW_QP_STATE_RTS:
1912 			if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) {
1913 				ret = -EINVAL;
1914 				goto out;
1915 			}
1916 			if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) {
1917 				ret = -EINVAL;
1918 				goto out;
1919 			}
1920 			qhp->attr.mpa_attr = attrs->mpa_attr;
1921 			qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
1922 			qhp->ep = qhp->attr.llp_stream_handle;
1923 			set_state(qhp, C4IW_QP_STATE_RTS);
1924 
1925 			/*
1926 			 * Ref the endpoint here and deref when we
1927 			 * disassociate the endpoint from the QP.  This
1928 			 * happens in CLOSING->IDLE transition or *->ERROR
1929 			 * transition.
1930 			 */
1931 			c4iw_get_ep(&qhp->ep->com);
1932 			ret = rdma_init(rhp, qhp);
1933 			if (ret)
1934 				goto err;
1935 			break;
1936 		case C4IW_QP_STATE_ERROR:
1937 			set_state(qhp, C4IW_QP_STATE_ERROR);
1938 			flush_qp(qhp);
1939 			break;
1940 		default:
1941 			ret = -EINVAL;
1942 			goto out;
1943 		}
1944 		break;
1945 	case C4IW_QP_STATE_RTS:
1946 		switch (attrs->next_state) {
1947 		case C4IW_QP_STATE_CLOSING:
1948 			t4_set_wq_in_error(&qhp->wq, 0);
1949 			set_state(qhp, C4IW_QP_STATE_CLOSING);
1950 			ep = qhp->ep;
1951 			if (!internal) {
1952 				abort = 0;
1953 				disconnect = 1;
1954 				c4iw_get_ep(&qhp->ep->com);
1955 			}
1956 			ret = rdma_fini(rhp, qhp, ep);
1957 			if (ret)
1958 				goto err;
1959 			break;
1960 		case C4IW_QP_STATE_TERMINATE:
1961 			t4_set_wq_in_error(&qhp->wq, 0);
1962 			set_state(qhp, C4IW_QP_STATE_TERMINATE);
1963 			qhp->attr.layer_etype = attrs->layer_etype;
1964 			qhp->attr.ecode = attrs->ecode;
1965 			ep = qhp->ep;
1966 			if (!internal) {
1967 				c4iw_get_ep(&qhp->ep->com);
1968 				terminate = 1;
1969 				disconnect = 1;
1970 			} else {
1971 				terminate = qhp->attr.send_term;
1972 				ret = rdma_fini(rhp, qhp, ep);
1973 				if (ret)
1974 					goto err;
1975 			}
1976 			break;
1977 		case C4IW_QP_STATE_ERROR:
1978 			t4_set_wq_in_error(&qhp->wq, 0);
1979 			set_state(qhp, C4IW_QP_STATE_ERROR);
1980 			if (!internal) {
1981 				abort = 1;
1982 				disconnect = 1;
1983 				ep = qhp->ep;
1984 				c4iw_get_ep(&qhp->ep->com);
1985 			}
1986 			goto err;
1987 			break;
1988 		default:
1989 			ret = -EINVAL;
1990 			goto out;
1991 		}
1992 		break;
1993 	case C4IW_QP_STATE_CLOSING:
1994 
1995 		/*
1996 		 * Allow kernel users to move to ERROR for qp draining.
1997 		 */
1998 		if (!internal && (qhp->ibqp.uobject || attrs->next_state !=
1999 				  C4IW_QP_STATE_ERROR)) {
2000 			ret = -EINVAL;
2001 			goto out;
2002 		}
2003 		switch (attrs->next_state) {
2004 		case C4IW_QP_STATE_IDLE:
2005 			flush_qp(qhp);
2006 			set_state(qhp, C4IW_QP_STATE_IDLE);
2007 			qhp->attr.llp_stream_handle = NULL;
2008 			c4iw_put_ep(&qhp->ep->com);
2009 			qhp->ep = NULL;
2010 			wake_up(&qhp->wait);
2011 			break;
2012 		case C4IW_QP_STATE_ERROR:
2013 			goto err;
2014 		default:
2015 			ret = -EINVAL;
2016 			goto err;
2017 		}
2018 		break;
2019 	case C4IW_QP_STATE_ERROR:
2020 		if (attrs->next_state != C4IW_QP_STATE_IDLE) {
2021 			ret = -EINVAL;
2022 			goto out;
2023 		}
2024 		if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) {
2025 			ret = -EINVAL;
2026 			goto out;
2027 		}
2028 		set_state(qhp, C4IW_QP_STATE_IDLE);
2029 		break;
2030 	case C4IW_QP_STATE_TERMINATE:
2031 		if (!internal) {
2032 			ret = -EINVAL;
2033 			goto out;
2034 		}
2035 		goto err;
2036 		break;
2037 	default:
2038 		pr_err("%s in a bad state %d\n", __func__, qhp->attr.state);
2039 		ret = -EINVAL;
2040 		goto err;
2041 		break;
2042 	}
2043 	goto out;
2044 err:
2045 	pr_debug("disassociating ep %p qpid 0x%x\n", qhp->ep,
2046 		 qhp->wq.sq.qid);
2047 
2048 	/* disassociate the LLP connection */
2049 	qhp->attr.llp_stream_handle = NULL;
2050 	if (!ep)
2051 		ep = qhp->ep;
2052 	qhp->ep = NULL;
2053 	set_state(qhp, C4IW_QP_STATE_ERROR);
2054 	free = 1;
2055 	abort = 1;
2056 	flush_qp(qhp);
2057 	wake_up(&qhp->wait);
2058 out:
2059 	mutex_unlock(&qhp->mutex);
2060 
2061 	if (terminate)
2062 		post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL);
2063 
2064 	/*
2065 	 * If disconnect is 1, then we need to initiate a disconnect
2066 	 * on the EP.  This can be a normal close (RTS->CLOSING) or
2067 	 * an abnormal close (RTS/CLOSING->ERROR).
2068 	 */
2069 	if (disconnect) {
2070 		c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC :
2071 							 GFP_KERNEL);
2072 		c4iw_put_ep(&ep->com);
2073 	}
2074 
2075 	/*
2076 	 * If free is 1, then we've disassociated the EP from the QP
2077 	 * and we need to dereference the EP.
2078 	 */
2079 	if (free)
2080 		c4iw_put_ep(&ep->com);
2081 	pr_debug("exit state %d\n", qhp->attr.state);
2082 	return ret;
2083 }
2084 
2085 int c4iw_destroy_qp(struct ib_qp *ib_qp)
2086 {
2087 	struct c4iw_dev *rhp;
2088 	struct c4iw_qp *qhp;
2089 	struct c4iw_qp_attributes attrs;
2090 
2091 	qhp = to_c4iw_qp(ib_qp);
2092 	rhp = qhp->rhp;
2093 
2094 	attrs.next_state = C4IW_QP_STATE_ERROR;
2095 	if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
2096 		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2097 	else
2098 		c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
2099 	wait_event(qhp->wait, !qhp->ep);
2100 
2101 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
2102 
2103 	spin_lock_irq(&rhp->lock);
2104 	if (!list_empty(&qhp->db_fc_entry))
2105 		list_del_init(&qhp->db_fc_entry);
2106 	spin_unlock_irq(&rhp->lock);
2107 	free_ird(rhp, qhp->attr.max_ird);
2108 
2109 	c4iw_qp_rem_ref(ib_qp);
2110 
2111 	pr_debug("ib_qp %p qpid 0x%0x\n", ib_qp, qhp->wq.sq.qid);
2112 	return 0;
2113 }
2114 
2115 struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
2116 			     struct ib_udata *udata)
2117 {
2118 	struct c4iw_dev *rhp;
2119 	struct c4iw_qp *qhp;
2120 	struct c4iw_pd *php;
2121 	struct c4iw_cq *schp;
2122 	struct c4iw_cq *rchp;
2123 	struct c4iw_create_qp_resp uresp;
2124 	unsigned int sqsize, rqsize = 0;
2125 	struct c4iw_ucontext *ucontext;
2126 	int ret;
2127 	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
2128 	struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
2129 
2130 	pr_debug("ib_pd %p\n", pd);
2131 
2132 	if (attrs->qp_type != IB_QPT_RC)
2133 		return ERR_PTR(-EINVAL);
2134 
2135 	php = to_c4iw_pd(pd);
2136 	rhp = php->rhp;
2137 	schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
2138 	rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
2139 	if (!schp || !rchp)
2140 		return ERR_PTR(-EINVAL);
2141 
2142 	if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
2143 		return ERR_PTR(-EINVAL);
2144 
2145 	if (!attrs->srq) {
2146 		if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
2147 			return ERR_PTR(-E2BIG);
2148 		rqsize = attrs->cap.max_recv_wr + 1;
2149 		if (rqsize < 8)
2150 			rqsize = 8;
2151 	}
2152 
2153 	if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
2154 		return ERR_PTR(-E2BIG);
2155 	sqsize = attrs->cap.max_send_wr + 1;
2156 	if (sqsize < 8)
2157 		sqsize = 8;
2158 
2159 	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
2160 
2161 	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
2162 	if (!qhp)
2163 		return ERR_PTR(-ENOMEM);
2164 
2165 	qhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
2166 	if (!qhp->wr_waitp) {
2167 		ret = -ENOMEM;
2168 		goto err_free_qhp;
2169 	}
2170 
2171 	qhp->wq.sq.size = sqsize;
2172 	qhp->wq.sq.memsize =
2173 		(sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2174 		sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
2175 	qhp->wq.sq.flush_cidx = -1;
2176 	if (!attrs->srq) {
2177 		qhp->wq.rq.size = rqsize;
2178 		qhp->wq.rq.memsize =
2179 			(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2180 			sizeof(*qhp->wq.rq.queue);
2181 	}
2182 
2183 	if (ucontext) {
2184 		qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
2185 		if (!attrs->srq)
2186 			qhp->wq.rq.memsize =
2187 				roundup(qhp->wq.rq.memsize, PAGE_SIZE);
2188 	}
2189 
2190 	ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
2191 			ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2192 			qhp->wr_waitp, !attrs->srq);
2193 	if (ret)
2194 		goto err_free_wr_wait;
2195 
2196 	attrs->cap.max_recv_wr = rqsize - 1;
2197 	attrs->cap.max_send_wr = sqsize - 1;
2198 	attrs->cap.max_inline_data = T4_MAX_SEND_INLINE;
2199 
2200 	qhp->rhp = rhp;
2201 	qhp->attr.pd = php->pdid;
2202 	qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
2203 	qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
2204 	qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
2205 	qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
2206 	qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
2207 	if (!attrs->srq) {
2208 		qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
2209 		qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
2210 	}
2211 	qhp->attr.state = C4IW_QP_STATE_IDLE;
2212 	qhp->attr.next_state = C4IW_QP_STATE_IDLE;
2213 	qhp->attr.enable_rdma_read = 1;
2214 	qhp->attr.enable_rdma_write = 1;
2215 	qhp->attr.enable_bind = 1;
2216 	qhp->attr.max_ord = 0;
2217 	qhp->attr.max_ird = 0;
2218 	qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
2219 	spin_lock_init(&qhp->lock);
2220 	mutex_init(&qhp->mutex);
2221 	init_waitqueue_head(&qhp->wait);
2222 	kref_init(&qhp->kref);
2223 	INIT_WORK(&qhp->free_work, free_qp_work);
2224 
2225 	ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
2226 	if (ret)
2227 		goto err_destroy_qp;
2228 
2229 	if (udata && ucontext) {
2230 		sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL);
2231 		if (!sq_key_mm) {
2232 			ret = -ENOMEM;
2233 			goto err_remove_handle;
2234 		}
2235 		if (!attrs->srq) {
2236 			rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
2237 			if (!rq_key_mm) {
2238 				ret = -ENOMEM;
2239 				goto err_free_sq_key;
2240 			}
2241 		}
2242 		sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
2243 		if (!sq_db_key_mm) {
2244 			ret = -ENOMEM;
2245 			goto err_free_rq_key;
2246 		}
2247 		if (!attrs->srq) {
2248 			rq_db_key_mm =
2249 				kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
2250 			if (!rq_db_key_mm) {
2251 				ret = -ENOMEM;
2252 				goto err_free_sq_db_key;
2253 			}
2254 		}
2255 		memset(&uresp, 0, sizeof(uresp));
2256 		if (t4_sq_onchip(&qhp->wq.sq)) {
2257 			ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
2258 						 GFP_KERNEL);
2259 			if (!ma_sync_key_mm) {
2260 				ret = -ENOMEM;
2261 				goto err_free_rq_db_key;
2262 			}
2263 			uresp.flags = C4IW_QPF_ONCHIP;
2264 		}
2265 		if (rhp->rdev.lldi.write_w_imm_support)
2266 			uresp.flags |= C4IW_QPF_WRITE_W_IMM;
2267 		uresp.qid_mask = rhp->rdev.qpmask;
2268 		uresp.sqid = qhp->wq.sq.qid;
2269 		uresp.sq_size = qhp->wq.sq.size;
2270 		uresp.sq_memsize = qhp->wq.sq.memsize;
2271 		if (!attrs->srq) {
2272 			uresp.rqid = qhp->wq.rq.qid;
2273 			uresp.rq_size = qhp->wq.rq.size;
2274 			uresp.rq_memsize = qhp->wq.rq.memsize;
2275 		}
2276 		spin_lock(&ucontext->mmap_lock);
2277 		if (ma_sync_key_mm) {
2278 			uresp.ma_sync_key = ucontext->key;
2279 			ucontext->key += PAGE_SIZE;
2280 		}
2281 		uresp.sq_key = ucontext->key;
2282 		ucontext->key += PAGE_SIZE;
2283 		if (!attrs->srq) {
2284 			uresp.rq_key = ucontext->key;
2285 			ucontext->key += PAGE_SIZE;
2286 		}
2287 		uresp.sq_db_gts_key = ucontext->key;
2288 		ucontext->key += PAGE_SIZE;
2289 		if (!attrs->srq) {
2290 			uresp.rq_db_gts_key = ucontext->key;
2291 			ucontext->key += PAGE_SIZE;
2292 		}
2293 		spin_unlock(&ucontext->mmap_lock);
2294 		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
2295 		if (ret)
2296 			goto err_free_ma_sync_key;
2297 		sq_key_mm->key = uresp.sq_key;
2298 		sq_key_mm->addr = qhp->wq.sq.phys_addr;
2299 		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
2300 		insert_mmap(ucontext, sq_key_mm);
2301 		if (!attrs->srq) {
2302 			rq_key_mm->key = uresp.rq_key;
2303 			rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
2304 			rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
2305 			insert_mmap(ucontext, rq_key_mm);
2306 		}
2307 		sq_db_key_mm->key = uresp.sq_db_gts_key;
2308 		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
2309 		sq_db_key_mm->len = PAGE_SIZE;
2310 		insert_mmap(ucontext, sq_db_key_mm);
2311 		if (!attrs->srq) {
2312 			rq_db_key_mm->key = uresp.rq_db_gts_key;
2313 			rq_db_key_mm->addr =
2314 				(u64)(unsigned long)qhp->wq.rq.bar2_pa;
2315 			rq_db_key_mm->len = PAGE_SIZE;
2316 			insert_mmap(ucontext, rq_db_key_mm);
2317 		}
2318 		if (ma_sync_key_mm) {
2319 			ma_sync_key_mm->key = uresp.ma_sync_key;
2320 			ma_sync_key_mm->addr =
2321 				(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
2322 				PCIE_MA_SYNC_A) & PAGE_MASK;
2323 			ma_sync_key_mm->len = PAGE_SIZE;
2324 			insert_mmap(ucontext, ma_sync_key_mm);
2325 		}
2326 
2327 		c4iw_get_ucontext(ucontext);
2328 		qhp->ucontext = ucontext;
2329 	}
2330 	if (!attrs->srq) {
2331 		qhp->wq.qp_errp =
2332 			&qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
2333 	} else {
2334 		qhp->wq.qp_errp =
2335 			&qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
2336 		qhp->wq.srqidxp =
2337 			&qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
2338 	}
2339 
2340 	qhp->ibqp.qp_num = qhp->wq.sq.qid;
2341 	if (attrs->srq)
2342 		qhp->srq = to_c4iw_srq(attrs->srq);
2343 	INIT_LIST_HEAD(&qhp->db_fc_entry);
2344 	pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
2345 		 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
2346 		 attrs->cap.max_send_wr, qhp->wq.rq.qid, qhp->wq.rq.size,
2347 		 qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
2348 	return &qhp->ibqp;
2349 err_free_ma_sync_key:
2350 	kfree(ma_sync_key_mm);
2351 err_free_rq_db_key:
2352 	if (!attrs->srq)
2353 		kfree(rq_db_key_mm);
2354 err_free_sq_db_key:
2355 	kfree(sq_db_key_mm);
2356 err_free_rq_key:
2357 	if (!attrs->srq)
2358 		kfree(rq_key_mm);
2359 err_free_sq_key:
2360 	kfree(sq_key_mm);
2361 err_remove_handle:
2362 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
2363 err_destroy_qp:
2364 	destroy_qp(&rhp->rdev, &qhp->wq,
2365 		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
2366 err_free_wr_wait:
2367 	c4iw_put_wr_wait(qhp->wr_waitp);
2368 err_free_qhp:
2369 	kfree(qhp);
2370 	return ERR_PTR(ret);
2371 }
2372 
2373 int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2374 		      int attr_mask, struct ib_udata *udata)
2375 {
2376 	struct c4iw_dev *rhp;
2377 	struct c4iw_qp *qhp;
2378 	enum c4iw_qp_attr_mask mask = 0;
2379 	struct c4iw_qp_attributes attrs;
2380 
2381 	pr_debug("ib_qp %p\n", ibqp);
2382 
2383 	/* iwarp does not support the RTR state */
2384 	if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
2385 		attr_mask &= ~IB_QP_STATE;
2386 
2387 	/* Make sure we still have something left to do */
2388 	if (!attr_mask)
2389 		return 0;
2390 
2391 	memset(&attrs, 0, sizeof attrs);
2392 	qhp = to_c4iw_qp(ibqp);
2393 	rhp = qhp->rhp;
2394 
2395 	attrs.next_state = c4iw_convert_state(attr->qp_state);
2396 	attrs.enable_rdma_read = (attr->qp_access_flags &
2397 			       IB_ACCESS_REMOTE_READ) ?  1 : 0;
2398 	attrs.enable_rdma_write = (attr->qp_access_flags &
2399 				IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2400 	attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
2401 
2402 
2403 	mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0;
2404 	mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
2405 			(C4IW_QP_ATTR_ENABLE_RDMA_READ |
2406 			 C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
2407 			 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
2408 
2409 	/*
2410 	 * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
2411 	 * ringing the queue db when we're in DB_FULL mode.
2412 	 * Only allow this on T4 devices.
2413 	 */
2414 	attrs.sq_db_inc = attr->sq_psn;
2415 	attrs.rq_db_inc = attr->rq_psn;
2416 	mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
2417 	mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
2418 	if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
2419 	    (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
2420 		return -EINVAL;
2421 
2422 	return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
2423 }
2424 
2425 struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
2426 {
2427 	pr_debug("ib_dev %p qpn 0x%x\n", dev, qpn);
2428 	return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
2429 }
2430 
2431 void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
2432 {
2433 	struct ib_event event = {};
2434 
2435 	event.device = &srq->rhp->ibdev;
2436 	event.element.srq = &srq->ibsrq;
2437 	event.event = IB_EVENT_SRQ_LIMIT_REACHED;
2438 	ib_dispatch_event(&event);
2439 }
2440 
2441 int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
2442 		    enum ib_srq_attr_mask srq_attr_mask,
2443 		    struct ib_udata *udata)
2444 {
2445 	struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
2446 	int ret = 0;
2447 
2448 	/*
2449 	 * XXX 0 mask == a SW interrupt for srq_limit reached...
2450 	 */
2451 	if (udata && !srq_attr_mask) {
2452 		c4iw_dispatch_srq_limit_reached_event(srq);
2453 		goto out;
2454 	}
2455 
2456 	/* no support for this yet */
2457 	if (srq_attr_mask & IB_SRQ_MAX_WR) {
2458 		ret = -EINVAL;
2459 		goto out;
2460 	}
2461 
2462 	if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
2463 		srq->armed = true;
2464 		srq->srq_limit = attr->srq_limit;
2465 	}
2466 out:
2467 	return ret;
2468 }
2469 
2470 int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
2471 		     int attr_mask, struct ib_qp_init_attr *init_attr)
2472 {
2473 	struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
2474 
2475 	memset(attr, 0, sizeof *attr);
2476 	memset(init_attr, 0, sizeof *init_attr);
2477 	attr->qp_state = to_ib_qp_state(qhp->attr.state);
2478 	init_attr->cap.max_send_wr = qhp->attr.sq_num_entries;
2479 	init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries;
2480 	init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
2481 	init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges;
2482 	init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
2483 	init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
2484 	return 0;
2485 }
2486 
2487 static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2488 			   struct c4iw_wr_wait *wr_waitp)
2489 {
2490 	struct c4iw_rdev *rdev = &srq->rhp->rdev;
2491 	struct sk_buff *skb = srq->destroy_skb;
2492 	struct t4_srq *wq = &srq->wq;
2493 	struct fw_ri_res_wr *res_wr;
2494 	struct fw_ri_res *res;
2495 	int wr_len;
2496 
2497 	wr_len = sizeof(*res_wr) + sizeof(*res);
2498 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
2499 
2500 	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
2501 	memset(res_wr, 0, wr_len);
2502 	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
2503 			FW_RI_RES_WR_NRES_V(1) |
2504 			FW_WR_COMPL_F);
2505 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
2506 	res_wr->cookie = (uintptr_t)wr_waitp;
2507 	res = res_wr->res;
2508 	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
2509 	res->u.srq.op = FW_RI_RES_OP_RESET;
2510 	res->u.srq.srqid = cpu_to_be32(srq->idx);
2511 	res->u.srq.eqid = cpu_to_be32(wq->qid);
2512 
2513 	c4iw_init_wr_wait(wr_waitp);
2514 	c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
2515 
2516 	dma_free_coherent(&rdev->lldi.pdev->dev,
2517 			  wq->memsize, wq->queue,
2518 			pci_unmap_addr(wq, mapping));
2519 	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
2520 	kfree(wq->sw_rq);
2521 	c4iw_put_qpid(rdev, wq->qid, uctx);
2522 }
2523 
2524 static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
2525 			   struct c4iw_wr_wait *wr_waitp)
2526 {
2527 	struct c4iw_rdev *rdev = &srq->rhp->rdev;
2528 	int user = (uctx != &rdev->uctx);
2529 	struct t4_srq *wq = &srq->wq;
2530 	struct fw_ri_res_wr *res_wr;
2531 	struct fw_ri_res *res;
2532 	struct sk_buff *skb;
2533 	int wr_len;
2534 	int eqsize;
2535 	int ret = -ENOMEM;
2536 
2537 	wq->qid = c4iw_get_qpid(rdev, uctx);
2538 	if (!wq->qid)
2539 		goto err;
2540 
2541 	if (!user) {
2542 		wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
2543 				    GFP_KERNEL);
2544 		if (!wq->sw_rq)
2545 			goto err_put_qpid;
2546 		wq->pending_wrs = kcalloc(srq->wq.size,
2547 					  sizeof(*srq->wq.pending_wrs),
2548 					  GFP_KERNEL);
2549 		if (!wq->pending_wrs)
2550 			goto err_free_sw_rq;
2551 	}
2552 
2553 	wq->rqt_size = wq->size;
2554 	wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
2555 	if (!wq->rqt_hwaddr)
2556 		goto err_free_pending_wrs;
2557 	wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
2558 		T4_RQT_ENTRY_SHIFT;
2559 
2560 	wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
2561 				       wq->memsize, &wq->dma_addr,
2562 			GFP_KERNEL);
2563 	if (!wq->queue)
2564 		goto err_free_rqtpool;
2565 
2566 	memset(wq->queue, 0, wq->memsize);
2567 	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
2568 
2569 	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
2570 				      &wq->bar2_qid,
2571 			user ? &wq->bar2_pa : NULL);
2572 
2573 	/*
2574 	 * User mode must have bar2 access.
2575 	 */
2576 
2577 	if (user && !wq->bar2_va) {
2578 		pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
2579 			pci_name(rdev->lldi.pdev), wq->qid);
2580 		ret = -EINVAL;
2581 		goto err_free_queue;
2582 	}
2583 
2584 	/* build fw_ri_res_wr */
2585 	wr_len = sizeof(*res_wr) + sizeof(*res);
2586 
2587 	skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
2588 	if (!skb)
2589 		goto err_free_queue;
2590 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
2591 
2592 	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
2593 	memset(res_wr, 0, wr_len);
2594 	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
2595 			FW_RI_RES_WR_NRES_V(1) |
2596 			FW_WR_COMPL_F);
2597 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
2598 	res_wr->cookie = (uintptr_t)wr_waitp;
2599 	res = res_wr->res;
2600 	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
2601 	res->u.srq.op = FW_RI_RES_OP_WRITE;
2602 
2603 	/*
2604 	 * eqsize is the number of 64B entries plus the status page size.
2605 	 */
2606 	eqsize = wq->size * T4_RQ_NUM_SLOTS +
2607 		rdev->hw_queue.t4_eq_status_entries;
2608 	res->u.srq.eqid = cpu_to_be32(wq->qid);
2609 	res->u.srq.fetchszm_to_iqid =
2610 						/* no host cidx updates */
2611 		cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
2612 		FW_RI_RES_WR_CPRIO_V(0) |       /* don't keep in chip cache */
2613 		FW_RI_RES_WR_PCIECHN_V(0) |     /* set by uP at ri_init time */
2614 		FW_RI_RES_WR_FETCHRO_V(0));     /* relaxed_ordering */
2615 	res->u.srq.dcaen_to_eqsize =
2616 		cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
2617 		FW_RI_RES_WR_DCACPU_V(0) |
2618 		FW_RI_RES_WR_FBMIN_V(2) |
2619 		FW_RI_RES_WR_FBMAX_V(3) |
2620 		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
2621 		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
2622 		FW_RI_RES_WR_EQSIZE_V(eqsize));
2623 	res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
2624 	res->u.srq.srqid = cpu_to_be32(srq->idx);
2625 	res->u.srq.pdid = cpu_to_be32(srq->pdid);
2626 	res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
2627 	res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
2628 			rdev->lldi.vr->rq.start);
2629 
2630 	c4iw_init_wr_wait(wr_waitp);
2631 
2632 	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
2633 	if (ret)
2634 		goto err_free_queue;
2635 
2636 	pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
2637 			" bar2_addr %p rqt addr 0x%x size %d\n",
2638 			__func__, srq->idx, wq->qid, srq->pdid, wq->queue,
2639 			(u64)virt_to_phys(wq->queue), wq->bar2_va,
2640 			wq->rqt_hwaddr, wq->rqt_size);
2641 
2642 	return 0;
2643 err_free_queue:
2644 	dma_free_coherent(&rdev->lldi.pdev->dev,
2645 			  wq->memsize, wq->queue,
2646 			pci_unmap_addr(wq, mapping));
2647 err_free_rqtpool:
2648 	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
2649 err_free_pending_wrs:
2650 	if (!user)
2651 		kfree(wq->pending_wrs);
2652 err_free_sw_rq:
2653 	if (!user)
2654 		kfree(wq->sw_rq);
2655 err_put_qpid:
2656 	c4iw_put_qpid(rdev, wq->qid, uctx);
2657 err:
2658 	return ret;
2659 }
2660 
2661 void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
2662 {
2663 	u64 *src, *dst;
2664 
2665 	src = (u64 *)wqe;
2666 	dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
2667 	while (len16) {
2668 		*dst++ = *src++;
2669 		if (dst >= (u64 *)&srq->queue[srq->size])
2670 			dst = (u64 *)srq->queue;
2671 		*dst++ = *src++;
2672 		if (dst >= (u64 *)&srq->queue[srq->size])
2673 			dst = (u64 *)srq->queue;
2674 		len16--;
2675 	}
2676 }
2677 
2678 struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
2679 			       struct ib_udata *udata)
2680 {
2681 	struct c4iw_dev *rhp;
2682 	struct c4iw_srq *srq;
2683 	struct c4iw_pd *php;
2684 	struct c4iw_create_srq_resp uresp;
2685 	struct c4iw_ucontext *ucontext;
2686 	struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
2687 	int rqsize;
2688 	int ret;
2689 	int wr_len;
2690 
2691 	pr_debug("%s ib_pd %p\n", __func__, pd);
2692 
2693 	php = to_c4iw_pd(pd);
2694 	rhp = php->rhp;
2695 
2696 	if (!rhp->rdev.lldi.vr->srq.size)
2697 		return ERR_PTR(-EINVAL);
2698 	if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
2699 		return ERR_PTR(-E2BIG);
2700 	if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
2701 		return ERR_PTR(-E2BIG);
2702 
2703 	/*
2704 	 * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
2705 	 */
2706 	rqsize = attrs->attr.max_wr + 1;
2707 	rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
2708 
2709 	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
2710 
2711 	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
2712 	if (!srq)
2713 		return ERR_PTR(-ENOMEM);
2714 
2715 	srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
2716 	if (!srq->wr_waitp) {
2717 		ret = -ENOMEM;
2718 		goto err_free_srq;
2719 	}
2720 
2721 	srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
2722 	if (srq->idx < 0) {
2723 		ret = -ENOMEM;
2724 		goto err_free_wr_wait;
2725 	}
2726 
2727 	wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
2728 	srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
2729 	if (!srq->destroy_skb) {
2730 		ret = -ENOMEM;
2731 		goto err_free_srq_idx;
2732 	}
2733 
2734 	srq->rhp = rhp;
2735 	srq->pdid = php->pdid;
2736 
2737 	srq->wq.size = rqsize;
2738 	srq->wq.memsize =
2739 		(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
2740 		sizeof(*srq->wq.queue);
2741 	if (ucontext)
2742 		srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
2743 
2744 	ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
2745 			&rhp->rdev.uctx, srq->wr_waitp);
2746 	if (ret)
2747 		goto err_free_skb;
2748 	attrs->attr.max_wr = rqsize - 1;
2749 
2750 	if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
2751 		srq->flags = T4_SRQ_LIMIT_SUPPORT;
2752 
2753 	ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid);
2754 	if (ret)
2755 		goto err_free_queue;
2756 
2757 	if (udata) {
2758 		srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
2759 		if (!srq_key_mm) {
2760 			ret = -ENOMEM;
2761 			goto err_remove_handle;
2762 		}
2763 		srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
2764 		if (!srq_db_key_mm) {
2765 			ret = -ENOMEM;
2766 			goto err_free_srq_key_mm;
2767 		}
2768 		memset(&uresp, 0, sizeof(uresp));
2769 		uresp.flags = srq->flags;
2770 		uresp.qid_mask = rhp->rdev.qpmask;
2771 		uresp.srqid = srq->wq.qid;
2772 		uresp.srq_size = srq->wq.size;
2773 		uresp.srq_memsize = srq->wq.memsize;
2774 		uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
2775 		spin_lock(&ucontext->mmap_lock);
2776 		uresp.srq_key = ucontext->key;
2777 		ucontext->key += PAGE_SIZE;
2778 		uresp.srq_db_gts_key = ucontext->key;
2779 		ucontext->key += PAGE_SIZE;
2780 		spin_unlock(&ucontext->mmap_lock);
2781 		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
2782 		if (ret)
2783 			goto err_free_srq_db_key_mm;
2784 		srq_key_mm->key = uresp.srq_key;
2785 		srq_key_mm->addr = virt_to_phys(srq->wq.queue);
2786 		srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
2787 		insert_mmap(ucontext, srq_key_mm);
2788 		srq_db_key_mm->key = uresp.srq_db_gts_key;
2789 		srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
2790 		srq_db_key_mm->len = PAGE_SIZE;
2791 		insert_mmap(ucontext, srq_db_key_mm);
2792 	}
2793 
2794 	pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
2795 		 __func__, srq->wq.qid, srq->idx, srq->wq.size,
2796 			(unsigned long)srq->wq.memsize, attrs->attr.max_wr);
2797 
2798 	spin_lock_init(&srq->lock);
2799 	return &srq->ibsrq;
2800 err_free_srq_db_key_mm:
2801 	kfree(srq_db_key_mm);
2802 err_free_srq_key_mm:
2803 	kfree(srq_key_mm);
2804 err_remove_handle:
2805 	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
2806 err_free_queue:
2807 	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2808 		       srq->wr_waitp);
2809 err_free_skb:
2810 	if (srq->destroy_skb)
2811 		kfree_skb(srq->destroy_skb);
2812 err_free_srq_idx:
2813 	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2814 err_free_wr_wait:
2815 	c4iw_put_wr_wait(srq->wr_waitp);
2816 err_free_srq:
2817 	kfree(srq);
2818 	return ERR_PTR(ret);
2819 }
2820 
2821 int c4iw_destroy_srq(struct ib_srq *ibsrq)
2822 {
2823 	struct c4iw_dev *rhp;
2824 	struct c4iw_srq *srq;
2825 	struct c4iw_ucontext *ucontext;
2826 
2827 	srq = to_c4iw_srq(ibsrq);
2828 	rhp = srq->rhp;
2829 
2830 	pr_debug("%s id %d\n", __func__, srq->wq.qid);
2831 
2832 	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
2833 	ucontext = ibsrq->uobject ?
2834 		to_c4iw_ucontext(ibsrq->uobject->context) : NULL;
2835 	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
2836 		       srq->wr_waitp);
2837 	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
2838 	c4iw_put_wr_wait(srq->wr_waitp);
2839 	kfree(srq);
2840 	return 0;
2841 }
2842