xref: /linux/drivers/infiniband/hw/qib/qib_ruc.c (revision 9a379e77033f02c4a071891afdf0f0a01eff8ccb)
1 /*
2  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/spinlock.h>
35 #include <rdma/ib_smi.h>
36 
37 #include "qib.h"
38 #include "qib_mad.h"
39 
40 /*
41  * Validate a RWQE and fill in the SGE state.
42  * Return 1 if OK.
43  */
44 static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
45 {
46 	int i, j, ret;
47 	struct ib_wc wc;
48 	struct rvt_lkey_table *rkt;
49 	struct rvt_pd *pd;
50 	struct rvt_sge_state *ss;
51 
52 	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
53 	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
54 	ss = &qp->r_sge;
55 	ss->sg_list = qp->r_sg_list;
56 	qp->r_len = 0;
57 	for (i = j = 0; i < wqe->num_sge; i++) {
58 		if (wqe->sg_list[i].length == 0)
59 			continue;
60 		/* Check LKEY */
61 		ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
62 				  NULL, &wqe->sg_list[i],
63 				  IB_ACCESS_LOCAL_WRITE);
64 		if (unlikely(ret <= 0))
65 			goto bad_lkey;
66 		qp->r_len += wqe->sg_list[i].length;
67 		j++;
68 	}
69 	ss->num_sge = j;
70 	ss->total_len = qp->r_len;
71 	ret = 1;
72 	goto bail;
73 
74 bad_lkey:
75 	while (j) {
76 		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
77 
78 		rvt_put_mr(sge->mr);
79 	}
80 	ss->num_sge = 0;
81 	memset(&wc, 0, sizeof(wc));
82 	wc.wr_id = wqe->wr_id;
83 	wc.status = IB_WC_LOC_PROT_ERR;
84 	wc.opcode = IB_WC_RECV;
85 	wc.qp = &qp->ibqp;
86 	/* Signal solicited completion event. */
87 	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
88 	ret = 0;
89 bail:
90 	return ret;
91 }
92 
93 /**
94  * qib_get_rwqe - copy the next RWQE into the QP's RWQE
95  * @qp: the QP
96  * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
97  *
98  * Return -1 if there is a local error, 0 if no RWQE is available,
99  * otherwise return 1.
100  *
101  * Can be called from interrupt level.
102  */
103 int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only)
104 {
105 	unsigned long flags;
106 	struct rvt_rq *rq;
107 	struct rvt_rwq *wq;
108 	struct rvt_srq *srq;
109 	struct rvt_rwqe *wqe;
110 	void (*handler)(struct ib_event *, void *);
111 	u32 tail;
112 	int ret;
113 
114 	if (qp->ibqp.srq) {
115 		srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
116 		handler = srq->ibsrq.event_handler;
117 		rq = &srq->rq;
118 	} else {
119 		srq = NULL;
120 		handler = NULL;
121 		rq = &qp->r_rq;
122 	}
123 
124 	spin_lock_irqsave(&rq->lock, flags);
125 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
126 		ret = 0;
127 		goto unlock;
128 	}
129 
130 	wq = rq->wq;
131 	tail = wq->tail;
132 	/* Validate tail before using it since it is user writable. */
133 	if (tail >= rq->size)
134 		tail = 0;
135 	if (unlikely(tail == wq->head)) {
136 		ret = 0;
137 		goto unlock;
138 	}
139 	/* Make sure entry is read after head index is read. */
140 	smp_rmb();
141 	wqe = rvt_get_rwqe_ptr(rq, tail);
142 	/*
143 	 * Even though we update the tail index in memory, the verbs
144 	 * consumer is not supposed to post more entries until a
145 	 * completion is generated.
146 	 */
147 	if (++tail >= rq->size)
148 		tail = 0;
149 	wq->tail = tail;
150 	if (!wr_id_only && !qib_init_sge(qp, wqe)) {
151 		ret = -1;
152 		goto unlock;
153 	}
154 	qp->r_wr_id = wqe->wr_id;
155 
156 	ret = 1;
157 	set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
158 	if (handler) {
159 		u32 n;
160 
161 		/*
162 		 * Validate head pointer value and compute
163 		 * the number of remaining WQEs.
164 		 */
165 		n = wq->head;
166 		if (n >= rq->size)
167 			n = 0;
168 		if (n < tail)
169 			n += rq->size - tail;
170 		else
171 			n -= tail;
172 		if (n < srq->limit) {
173 			struct ib_event ev;
174 
175 			srq->limit = 0;
176 			spin_unlock_irqrestore(&rq->lock, flags);
177 			ev.device = qp->ibqp.device;
178 			ev.element.srq = qp->ibqp.srq;
179 			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
180 			handler(&ev, srq->ibsrq.srq_context);
181 			goto bail;
182 		}
183 	}
184 unlock:
185 	spin_unlock_irqrestore(&rq->lock, flags);
186 bail:
187 	return ret;
188 }
189 
190 /*
191  * Switch to alternate path.
192  * The QP s_lock should be held and interrupts disabled.
193  */
194 void qib_migrate_qp(struct rvt_qp *qp)
195 {
196 	struct ib_event ev;
197 
198 	qp->s_mig_state = IB_MIG_MIGRATED;
199 	qp->remote_ah_attr = qp->alt_ah_attr;
200 	qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
201 	qp->s_pkey_index = qp->s_alt_pkey_index;
202 
203 	ev.device = qp->ibqp.device;
204 	ev.element.qp = &qp->ibqp;
205 	ev.event = IB_EVENT_PATH_MIG;
206 	qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
207 }
208 
209 static __be64 get_sguid(struct qib_ibport *ibp, unsigned index)
210 {
211 	if (!index) {
212 		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
213 
214 		return ppd->guid;
215 	}
216 	return ibp->guids[index - 1];
217 }
218 
219 static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
220 {
221 	return (gid->global.interface_id == id &&
222 		(gid->global.subnet_prefix == gid_prefix ||
223 		 gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
224 }
225 
226 /*
227  *
228  * This should be called with the QP r_lock held.
229  *
230  * The s_lock will be acquired around the qib_migrate_qp() call.
231  */
232 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
233 		      int has_grh, struct rvt_qp *qp, u32 bth0)
234 {
235 	__be64 guid;
236 	unsigned long flags;
237 
238 	if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
239 		if (!has_grh) {
240 			if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
241 			    IB_AH_GRH)
242 				goto err;
243 		} else {
244 			const struct ib_global_route *grh;
245 
246 			if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
247 			      IB_AH_GRH))
248 				goto err;
249 			grh = rdma_ah_read_grh(&qp->alt_ah_attr);
250 			guid = get_sguid(ibp, grh->sgid_index);
251 			if (!gid_ok(&hdr->u.l.grh.dgid,
252 				    ibp->rvp.gid_prefix, guid))
253 				goto err;
254 			if (!gid_ok(&hdr->u.l.grh.sgid,
255 			    grh->dgid.global.subnet_prefix,
256 			    grh->dgid.global.interface_id))
257 				goto err;
258 		}
259 		if (!qib_pkey_ok((u16)bth0,
260 				 qib_get_pkey(ibp, qp->s_alt_pkey_index))) {
261 			qib_bad_pkey(ibp,
262 				     (u16)bth0,
263 				     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
264 				     0, qp->ibqp.qp_num,
265 				     hdr->lrh[3], hdr->lrh[1]);
266 			goto err;
267 		}
268 		/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
269 		if ((be16_to_cpu(hdr->lrh[3]) !=
270 		     rdma_ah_get_dlid(&qp->alt_ah_attr)) ||
271 		    ppd_from_ibp(ibp)->port !=
272 			    rdma_ah_get_port_num(&qp->alt_ah_attr))
273 			goto err;
274 		spin_lock_irqsave(&qp->s_lock, flags);
275 		qib_migrate_qp(qp);
276 		spin_unlock_irqrestore(&qp->s_lock, flags);
277 	} else {
278 		if (!has_grh) {
279 			if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
280 			    IB_AH_GRH)
281 				goto err;
282 		} else {
283 			const struct ib_global_route *grh;
284 
285 			if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
286 			      IB_AH_GRH))
287 				goto err;
288 			grh = rdma_ah_read_grh(&qp->remote_ah_attr);
289 			guid = get_sguid(ibp, grh->sgid_index);
290 			if (!gid_ok(&hdr->u.l.grh.dgid,
291 				    ibp->rvp.gid_prefix, guid))
292 				goto err;
293 			if (!gid_ok(&hdr->u.l.grh.sgid,
294 			    grh->dgid.global.subnet_prefix,
295 			    grh->dgid.global.interface_id))
296 				goto err;
297 		}
298 		if (!qib_pkey_ok((u16)bth0,
299 				 qib_get_pkey(ibp, qp->s_pkey_index))) {
300 			qib_bad_pkey(ibp,
301 				     (u16)bth0,
302 				     (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
303 				     0, qp->ibqp.qp_num,
304 				     hdr->lrh[3], hdr->lrh[1]);
305 			goto err;
306 		}
307 		/* Validate the SLID. See Ch. 9.6.1.5 */
308 		if (be16_to_cpu(hdr->lrh[3]) !=
309 		    rdma_ah_get_dlid(&qp->remote_ah_attr) ||
310 		    ppd_from_ibp(ibp)->port != qp->port_num)
311 			goto err;
312 		if (qp->s_mig_state == IB_MIG_REARM &&
313 		    !(bth0 & IB_BTH_MIG_REQ))
314 			qp->s_mig_state = IB_MIG_ARMED;
315 	}
316 
317 	return 0;
318 
319 err:
320 	return 1;
321 }
322 
323 /**
324  * qib_ruc_loopback - handle UC and RC lookback requests
325  * @sqp: the sending QP
326  *
327  * This is called from qib_do_send() to
328  * forward a WQE addressed to the same HCA.
329  * Note that although we are single threaded due to the tasklet, we still
330  * have to protect against post_send().  We don't have to worry about
331  * receive interrupts since this is a connected protocol and all packets
332  * will pass through here.
333  */
334 static void qib_ruc_loopback(struct rvt_qp *sqp)
335 {
336 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
337 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
338 	struct qib_devdata *dd = ppd->dd;
339 	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
340 	struct rvt_qp *qp;
341 	struct rvt_swqe *wqe;
342 	struct rvt_sge *sge;
343 	unsigned long flags;
344 	struct ib_wc wc;
345 	u64 sdata;
346 	atomic64_t *maddr;
347 	enum ib_wc_status send_status;
348 	int release;
349 	int ret;
350 
351 	rcu_read_lock();
352 	/*
353 	 * Note that we check the responder QP state after
354 	 * checking the requester's state.
355 	 */
356 	qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
357 	if (!qp)
358 		goto done;
359 
360 	spin_lock_irqsave(&sqp->s_lock, flags);
361 
362 	/* Return if we are already busy processing a work request. */
363 	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
364 	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
365 		goto unlock;
366 
367 	sqp->s_flags |= RVT_S_BUSY;
368 
369 again:
370 	if (sqp->s_last == READ_ONCE(sqp->s_head))
371 		goto clr_busy;
372 	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
373 
374 	/* Return if it is not OK to start a new work reqeust. */
375 	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
376 		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
377 			goto clr_busy;
378 		/* We are in the error state, flush the work request. */
379 		send_status = IB_WC_WR_FLUSH_ERR;
380 		goto flush_send;
381 	}
382 
383 	/*
384 	 * We can rely on the entry not changing without the s_lock
385 	 * being held until we update s_last.
386 	 * We increment s_cur to indicate s_last is in progress.
387 	 */
388 	if (sqp->s_last == sqp->s_cur) {
389 		if (++sqp->s_cur >= sqp->s_size)
390 			sqp->s_cur = 0;
391 	}
392 	spin_unlock_irqrestore(&sqp->s_lock, flags);
393 
394 	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
395 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
396 		ibp->rvp.n_pkt_drops++;
397 		/*
398 		 * For RC, the requester would timeout and retry so
399 		 * shortcut the timeouts and just signal too many retries.
400 		 */
401 		if (sqp->ibqp.qp_type == IB_QPT_RC)
402 			send_status = IB_WC_RETRY_EXC_ERR;
403 		else
404 			send_status = IB_WC_SUCCESS;
405 		goto serr;
406 	}
407 
408 	memset(&wc, 0, sizeof(wc));
409 	send_status = IB_WC_SUCCESS;
410 
411 	release = 1;
412 	sqp->s_sge.sge = wqe->sg_list[0];
413 	sqp->s_sge.sg_list = wqe->sg_list + 1;
414 	sqp->s_sge.num_sge = wqe->wr.num_sge;
415 	sqp->s_len = wqe->length;
416 	switch (wqe->wr.opcode) {
417 	case IB_WR_SEND_WITH_IMM:
418 		wc.wc_flags = IB_WC_WITH_IMM;
419 		wc.ex.imm_data = wqe->wr.ex.imm_data;
420 		/* FALLTHROUGH */
421 	case IB_WR_SEND:
422 		ret = qib_get_rwqe(qp, 0);
423 		if (ret < 0)
424 			goto op_err;
425 		if (!ret)
426 			goto rnr_nak;
427 		break;
428 
429 	case IB_WR_RDMA_WRITE_WITH_IMM:
430 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
431 			goto inv_err;
432 		wc.wc_flags = IB_WC_WITH_IMM;
433 		wc.ex.imm_data = wqe->wr.ex.imm_data;
434 		ret = qib_get_rwqe(qp, 1);
435 		if (ret < 0)
436 			goto op_err;
437 		if (!ret)
438 			goto rnr_nak;
439 		/* FALLTHROUGH */
440 	case IB_WR_RDMA_WRITE:
441 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
442 			goto inv_err;
443 		if (wqe->length == 0)
444 			break;
445 		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
446 					  wqe->rdma_wr.remote_addr,
447 					  wqe->rdma_wr.rkey,
448 					  IB_ACCESS_REMOTE_WRITE)))
449 			goto acc_err;
450 		qp->r_sge.sg_list = NULL;
451 		qp->r_sge.num_sge = 1;
452 		qp->r_sge.total_len = wqe->length;
453 		break;
454 
455 	case IB_WR_RDMA_READ:
456 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
457 			goto inv_err;
458 		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
459 					  wqe->rdma_wr.remote_addr,
460 					  wqe->rdma_wr.rkey,
461 					  IB_ACCESS_REMOTE_READ)))
462 			goto acc_err;
463 		release = 0;
464 		sqp->s_sge.sg_list = NULL;
465 		sqp->s_sge.num_sge = 1;
466 		qp->r_sge.sge = wqe->sg_list[0];
467 		qp->r_sge.sg_list = wqe->sg_list + 1;
468 		qp->r_sge.num_sge = wqe->wr.num_sge;
469 		qp->r_sge.total_len = wqe->length;
470 		break;
471 
472 	case IB_WR_ATOMIC_CMP_AND_SWP:
473 	case IB_WR_ATOMIC_FETCH_AND_ADD:
474 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
475 			goto inv_err;
476 		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
477 					  wqe->atomic_wr.remote_addr,
478 					  wqe->atomic_wr.rkey,
479 					  IB_ACCESS_REMOTE_ATOMIC)))
480 			goto acc_err;
481 		/* Perform atomic OP and save result. */
482 		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
483 		sdata = wqe->atomic_wr.compare_add;
484 		*(u64 *) sqp->s_sge.sge.vaddr =
485 			(wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
486 			(u64) atomic64_add_return(sdata, maddr) - sdata :
487 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
488 				      sdata, wqe->atomic_wr.swap);
489 		rvt_put_mr(qp->r_sge.sge.mr);
490 		qp->r_sge.num_sge = 0;
491 		goto send_comp;
492 
493 	default:
494 		send_status = IB_WC_LOC_QP_OP_ERR;
495 		goto serr;
496 	}
497 
498 	sge = &sqp->s_sge.sge;
499 	while (sqp->s_len) {
500 		u32 len = sqp->s_len;
501 
502 		if (len > sge->length)
503 			len = sge->length;
504 		if (len > sge->sge_length)
505 			len = sge->sge_length;
506 		BUG_ON(len == 0);
507 		qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
508 		sge->vaddr += len;
509 		sge->length -= len;
510 		sge->sge_length -= len;
511 		if (sge->sge_length == 0) {
512 			if (!release)
513 				rvt_put_mr(sge->mr);
514 			if (--sqp->s_sge.num_sge)
515 				*sge = *sqp->s_sge.sg_list++;
516 		} else if (sge->length == 0 && sge->mr->lkey) {
517 			if (++sge->n >= RVT_SEGSZ) {
518 				if (++sge->m >= sge->mr->mapsz)
519 					break;
520 				sge->n = 0;
521 			}
522 			sge->vaddr =
523 				sge->mr->map[sge->m]->segs[sge->n].vaddr;
524 			sge->length =
525 				sge->mr->map[sge->m]->segs[sge->n].length;
526 		}
527 		sqp->s_len -= len;
528 	}
529 	if (release)
530 		rvt_put_ss(&qp->r_sge);
531 
532 	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
533 		goto send_comp;
534 
535 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
536 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
537 	else
538 		wc.opcode = IB_WC_RECV;
539 	wc.wr_id = qp->r_wr_id;
540 	wc.status = IB_WC_SUCCESS;
541 	wc.byte_len = wqe->length;
542 	wc.qp = &qp->ibqp;
543 	wc.src_qp = qp->remote_qpn;
544 	wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr);
545 	wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
546 	wc.port_num = 1;
547 	/* Signal completion event if the solicited bit is set. */
548 	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
549 		     wqe->wr.send_flags & IB_SEND_SOLICITED);
550 
551 send_comp:
552 	spin_lock_irqsave(&sqp->s_lock, flags);
553 	ibp->rvp.n_loop_pkts++;
554 flush_send:
555 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
556 	qib_send_complete(sqp, wqe, send_status);
557 	goto again;
558 
559 rnr_nak:
560 	/* Handle RNR NAK */
561 	if (qp->ibqp.qp_type == IB_QPT_UC)
562 		goto send_comp;
563 	ibp->rvp.n_rnr_naks++;
564 	/*
565 	 * Note: we don't need the s_lock held since the BUSY flag
566 	 * makes this single threaded.
567 	 */
568 	if (sqp->s_rnr_retry == 0) {
569 		send_status = IB_WC_RNR_RETRY_EXC_ERR;
570 		goto serr;
571 	}
572 	if (sqp->s_rnr_retry_cnt < 7)
573 		sqp->s_rnr_retry--;
574 	spin_lock_irqsave(&sqp->s_lock, flags);
575 	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
576 		goto clr_busy;
577 	rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
578 				IB_AETH_CREDIT_SHIFT);
579 	goto clr_busy;
580 
581 op_err:
582 	send_status = IB_WC_REM_OP_ERR;
583 	wc.status = IB_WC_LOC_QP_OP_ERR;
584 	goto err;
585 
586 inv_err:
587 	send_status = IB_WC_REM_INV_REQ_ERR;
588 	wc.status = IB_WC_LOC_QP_OP_ERR;
589 	goto err;
590 
591 acc_err:
592 	send_status = IB_WC_REM_ACCESS_ERR;
593 	wc.status = IB_WC_LOC_PROT_ERR;
594 err:
595 	/* responder goes to error state */
596 	rvt_rc_error(qp, wc.status);
597 
598 serr:
599 	spin_lock_irqsave(&sqp->s_lock, flags);
600 	qib_send_complete(sqp, wqe, send_status);
601 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
602 		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
603 
604 		sqp->s_flags &= ~RVT_S_BUSY;
605 		spin_unlock_irqrestore(&sqp->s_lock, flags);
606 		if (lastwqe) {
607 			struct ib_event ev;
608 
609 			ev.device = sqp->ibqp.device;
610 			ev.element.qp = &sqp->ibqp;
611 			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
612 			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
613 		}
614 		goto done;
615 	}
616 clr_busy:
617 	sqp->s_flags &= ~RVT_S_BUSY;
618 unlock:
619 	spin_unlock_irqrestore(&sqp->s_lock, flags);
620 done:
621 	rcu_read_unlock();
622 }
623 
624 /**
625  * qib_make_grh - construct a GRH header
626  * @ibp: a pointer to the IB port
627  * @hdr: a pointer to the GRH header being constructed
628  * @grh: the global route address to send to
629  * @hwords: the number of 32 bit words of header being sent
630  * @nwords: the number of 32 bit words of data being sent
631  *
632  * Return the size of the header in 32 bit words.
633  */
634 u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
635 		 const struct ib_global_route *grh, u32 hwords, u32 nwords)
636 {
637 	hdr->version_tclass_flow =
638 		cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
639 			    (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
640 			    (grh->flow_label << IB_GRH_FLOW_SHIFT));
641 	hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
642 	/* next_hdr is defined by C8-7 in ch. 8.4.1 */
643 	hdr->next_hdr = IB_GRH_NEXT_HDR;
644 	hdr->hop_limit = grh->hop_limit;
645 	/* The SGID is 32-bit aligned. */
646 	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
647 	if (!grh->sgid_index)
648 		hdr->sgid.global.interface_id = ppd_from_ibp(ibp)->guid;
649 	else if (grh->sgid_index < QIB_GUIDS_PER_PORT)
650 		hdr->sgid.global.interface_id = ibp->guids[grh->sgid_index - 1];
651 	hdr->dgid = grh->dgid;
652 
653 	/* GRH header size in 32-bit words. */
654 	return sizeof(struct ib_grh) / sizeof(u32);
655 }
656 
657 void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
658 			 u32 bth0, u32 bth2)
659 {
660 	struct qib_qp_priv *priv = qp->priv;
661 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
662 	u16 lrh0;
663 	u32 nwords;
664 	u32 extra_bytes;
665 
666 	/* Construct the header. */
667 	extra_bytes = -qp->s_cur_size & 3;
668 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
669 	lrh0 = QIB_LRH_BTH;
670 	if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) {
671 		qp->s_hdrwords +=
672 			qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
673 				     rdma_ah_read_grh(&qp->remote_ah_attr),
674 				     qp->s_hdrwords, nwords);
675 		lrh0 = QIB_LRH_GRH;
676 	}
677 	lrh0 |= ibp->sl_to_vl[rdma_ah_get_sl(&qp->remote_ah_attr)] << 12 |
678 		rdma_ah_get_sl(&qp->remote_ah_attr) << 4;
679 	priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
680 	priv->s_hdr->lrh[1] =
681 			cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr));
682 	priv->s_hdr->lrh[2] =
683 			cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
684 	priv->s_hdr->lrh[3] =
685 		cpu_to_be16(ppd_from_ibp(ibp)->lid |
686 			    rdma_ah_get_path_bits(&qp->remote_ah_attr));
687 	bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
688 	bth0 |= extra_bytes << 20;
689 	if (qp->s_mig_state == IB_MIG_MIGRATED)
690 		bth0 |= IB_BTH_MIG_REQ;
691 	ohdr->bth[0] = cpu_to_be32(bth0);
692 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
693 	ohdr->bth[2] = cpu_to_be32(bth2);
694 	this_cpu_inc(ibp->pmastats->n_unicast_xmit);
695 }
696 
697 void _qib_do_send(struct work_struct *work)
698 {
699 	struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv,
700 						s_work);
701 	struct rvt_qp *qp = priv->owner;
702 
703 	qib_do_send(qp);
704 }
705 
706 /**
707  * qib_do_send - perform a send on a QP
708  * @qp: pointer to the QP
709  *
710  * Process entries in the send work queue until credit or queue is
711  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
712  * Otherwise, two threads could send packets out of order.
713  */
714 void qib_do_send(struct rvt_qp *qp)
715 {
716 	struct qib_qp_priv *priv = qp->priv;
717 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
718 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
719 	int (*make_req)(struct rvt_qp *qp, unsigned long *flags);
720 	unsigned long flags;
721 
722 	if ((qp->ibqp.qp_type == IB_QPT_RC ||
723 	     qp->ibqp.qp_type == IB_QPT_UC) &&
724 	    (rdma_ah_get_dlid(&qp->remote_ah_attr) &
725 	     ~((1 << ppd->lmc) - 1)) == ppd->lid) {
726 		qib_ruc_loopback(qp);
727 		return;
728 	}
729 
730 	if (qp->ibqp.qp_type == IB_QPT_RC)
731 		make_req = qib_make_rc_req;
732 	else if (qp->ibqp.qp_type == IB_QPT_UC)
733 		make_req = qib_make_uc_req;
734 	else
735 		make_req = qib_make_ud_req;
736 
737 	spin_lock_irqsave(&qp->s_lock, flags);
738 
739 	/* Return if we are already busy processing a work request. */
740 	if (!qib_send_ok(qp)) {
741 		spin_unlock_irqrestore(&qp->s_lock, flags);
742 		return;
743 	}
744 
745 	qp->s_flags |= RVT_S_BUSY;
746 
747 	do {
748 		/* Check for a constructed packet to be sent. */
749 		if (qp->s_hdrwords != 0) {
750 			spin_unlock_irqrestore(&qp->s_lock, flags);
751 			/*
752 			 * If the packet cannot be sent now, return and
753 			 * the send tasklet will be woken up later.
754 			 */
755 			if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
756 					   qp->s_cur_sge, qp->s_cur_size))
757 				return;
758 			/* Record that s_hdr is empty. */
759 			qp->s_hdrwords = 0;
760 			spin_lock_irqsave(&qp->s_lock, flags);
761 		}
762 	} while (make_req(qp, &flags));
763 
764 	spin_unlock_irqrestore(&qp->s_lock, flags);
765 }
766 
767 /*
768  * This should be called with s_lock held.
769  */
770 void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
771 		       enum ib_wc_status status)
772 {
773 	u32 old_last, last;
774 
775 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
776 		return;
777 
778 	last = qp->s_last;
779 	old_last = last;
780 	if (++last >= qp->s_size)
781 		last = 0;
782 	qp->s_last = last;
783 	/* See post_send() */
784 	barrier();
785 	rvt_put_swqe(wqe);
786 	if (qp->ibqp.qp_type == IB_QPT_UD ||
787 	    qp->ibqp.qp_type == IB_QPT_SMI ||
788 	    qp->ibqp.qp_type == IB_QPT_GSI)
789 		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
790 
791 	rvt_qp_swqe_complete(qp,
792 			     wqe,
793 			     ib_qib_wc_opcode[wqe->wr.opcode],
794 			     status);
795 
796 	if (qp->s_acked == old_last)
797 		qp->s_acked = last;
798 	if (qp->s_cur == old_last)
799 		qp->s_cur = last;
800 	if (qp->s_tail == old_last)
801 		qp->s_tail = last;
802 	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
803 		qp->s_draining = 0;
804 }
805