xref: /linux/drivers/infiniband/hw/qib/qib_rc.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/io.h>
35 
36 #include "qib.h"
37 
38 /* cut down ridiculously long IB macro names */
39 #define OP(x) IB_OPCODE_RC_##x
40 
41 static void rc_timeout(unsigned long arg);
42 
43 static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
44 		       u32 psn, u32 pmtu)
45 {
46 	u32 len;
47 
48 	len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
49 	ss->sge = wqe->sg_list[0];
50 	ss->sg_list = wqe->sg_list + 1;
51 	ss->num_sge = wqe->wr.num_sge;
52 	ss->total_len = wqe->length;
53 	qib_skip_sge(ss, len, 0);
54 	return wqe->length - len;
55 }
56 
57 static void start_timer(struct rvt_qp *qp)
58 {
59 	qp->s_flags |= RVT_S_TIMER;
60 	qp->s_timer.function = rc_timeout;
61 	/* 4.096 usec. * (1 << qp->timeout) */
62 	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
63 	add_timer(&qp->s_timer);
64 }
65 
66 /**
67  * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
68  * @dev: the device for this QP
69  * @qp: a pointer to the QP
70  * @ohdr: a pointer to the IB header being constructed
71  * @pmtu: the path MTU
72  *
73  * Return 1 if constructed; otherwise, return 0.
74  * Note that we are in the responder's side of the QP context.
75  * Note the QP s_lock must be held.
76  */
77 static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
78 			   struct qib_other_headers *ohdr, u32 pmtu)
79 {
80 	struct rvt_ack_entry *e;
81 	u32 hwords;
82 	u32 len;
83 	u32 bth0;
84 	u32 bth2;
85 
86 	/* Don't send an ACK if we aren't supposed to. */
87 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
88 		goto bail;
89 
90 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
91 	hwords = 5;
92 
93 	switch (qp->s_ack_state) {
94 	case OP(RDMA_READ_RESPONSE_LAST):
95 	case OP(RDMA_READ_RESPONSE_ONLY):
96 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
97 		if (e->rdma_sge.mr) {
98 			rvt_put_mr(e->rdma_sge.mr);
99 			e->rdma_sge.mr = NULL;
100 		}
101 		/* FALLTHROUGH */
102 	case OP(ATOMIC_ACKNOWLEDGE):
103 		/*
104 		 * We can increment the tail pointer now that the last
105 		 * response has been sent instead of only being
106 		 * constructed.
107 		 */
108 		if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC)
109 			qp->s_tail_ack_queue = 0;
110 		/* FALLTHROUGH */
111 	case OP(SEND_ONLY):
112 	case OP(ACKNOWLEDGE):
113 		/* Check for no next entry in the queue. */
114 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
115 			if (qp->s_flags & RVT_S_ACK_PENDING)
116 				goto normal;
117 			goto bail;
118 		}
119 
120 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
121 		if (e->opcode == OP(RDMA_READ_REQUEST)) {
122 			/*
123 			 * If a RDMA read response is being resent and
124 			 * we haven't seen the duplicate request yet,
125 			 * then stop sending the remaining responses the
126 			 * responder has seen until the requester resends it.
127 			 */
128 			len = e->rdma_sge.sge_length;
129 			if (len && !e->rdma_sge.mr) {
130 				qp->s_tail_ack_queue = qp->r_head_ack_queue;
131 				goto bail;
132 			}
133 			/* Copy SGE state in case we need to resend */
134 			qp->s_rdma_mr = e->rdma_sge.mr;
135 			if (qp->s_rdma_mr)
136 				rvt_get_mr(qp->s_rdma_mr);
137 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
138 			qp->s_ack_rdma_sge.num_sge = 1;
139 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
140 			if (len > pmtu) {
141 				len = pmtu;
142 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
143 			} else {
144 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
145 				e->sent = 1;
146 			}
147 			ohdr->u.aeth = qib_compute_aeth(qp);
148 			hwords++;
149 			qp->s_ack_rdma_psn = e->psn;
150 			bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
151 		} else {
152 			/* COMPARE_SWAP or FETCH_ADD */
153 			qp->s_cur_sge = NULL;
154 			len = 0;
155 			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
156 			ohdr->u.at.aeth = qib_compute_aeth(qp);
157 			ohdr->u.at.atomic_ack_eth[0] =
158 				cpu_to_be32(e->atomic_data >> 32);
159 			ohdr->u.at.atomic_ack_eth[1] =
160 				cpu_to_be32(e->atomic_data);
161 			hwords += sizeof(ohdr->u.at) / sizeof(u32);
162 			bth2 = e->psn & QIB_PSN_MASK;
163 			e->sent = 1;
164 		}
165 		bth0 = qp->s_ack_state << 24;
166 		break;
167 
168 	case OP(RDMA_READ_RESPONSE_FIRST):
169 		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
170 		/* FALLTHROUGH */
171 	case OP(RDMA_READ_RESPONSE_MIDDLE):
172 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
173 		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
174 		if (qp->s_rdma_mr)
175 			rvt_get_mr(qp->s_rdma_mr);
176 		len = qp->s_ack_rdma_sge.sge.sge_length;
177 		if (len > pmtu)
178 			len = pmtu;
179 		else {
180 			ohdr->u.aeth = qib_compute_aeth(qp);
181 			hwords++;
182 			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
183 			e = &qp->s_ack_queue[qp->s_tail_ack_queue];
184 			e->sent = 1;
185 		}
186 		bth0 = qp->s_ack_state << 24;
187 		bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
188 		break;
189 
190 	default:
191 normal:
192 		/*
193 		 * Send a regular ACK.
194 		 * Set the s_ack_state so we wait until after sending
195 		 * the ACK before setting s_ack_state to ACKNOWLEDGE
196 		 * (see above).
197 		 */
198 		qp->s_ack_state = OP(SEND_ONLY);
199 		qp->s_flags &= ~RVT_S_ACK_PENDING;
200 		qp->s_cur_sge = NULL;
201 		if (qp->s_nak_state)
202 			ohdr->u.aeth =
203 				cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
204 					    (qp->s_nak_state <<
205 					     QIB_AETH_CREDIT_SHIFT));
206 		else
207 			ohdr->u.aeth = qib_compute_aeth(qp);
208 		hwords++;
209 		len = 0;
210 		bth0 = OP(ACKNOWLEDGE) << 24;
211 		bth2 = qp->s_ack_psn & QIB_PSN_MASK;
212 	}
213 	qp->s_rdma_ack_cnt++;
214 	qp->s_hdrwords = hwords;
215 	qp->s_cur_size = len;
216 	qib_make_ruc_header(qp, ohdr, bth0, bth2);
217 	return 1;
218 
219 bail:
220 	qp->s_ack_state = OP(ACKNOWLEDGE);
221 	qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING);
222 	return 0;
223 }
224 
225 /**
226  * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
227  * @qp: a pointer to the QP
228  *
229  * Assumes the s_lock is held.
230  *
231  * Return 1 if constructed; otherwise, return 0.
232  */
233 int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
234 {
235 	struct qib_qp_priv *priv = qp->priv;
236 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
237 	struct qib_other_headers *ohdr;
238 	struct rvt_sge_state *ss;
239 	struct rvt_swqe *wqe;
240 	u32 hwords;
241 	u32 len;
242 	u32 bth0;
243 	u32 bth2;
244 	u32 pmtu = qp->pmtu;
245 	char newreq;
246 	int ret = 0;
247 	int delta;
248 
249 	ohdr = &priv->s_hdr->u.oth;
250 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
251 		ohdr = &priv->s_hdr->u.l.oth;
252 
253 	/* Sending responses has higher priority over sending requests. */
254 	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
255 	    qib_make_rc_ack(dev, qp, ohdr, pmtu))
256 		goto done;
257 
258 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
259 		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
260 			goto bail;
261 		/* We are in the error state, flush the work request. */
262 		smp_read_barrier_depends(); /* see post_one_send() */
263 		if (qp->s_last == ACCESS_ONCE(qp->s_head))
264 			goto bail;
265 		/* If DMAs are in progress, we can't flush immediately. */
266 		if (atomic_read(&priv->s_dma_busy)) {
267 			qp->s_flags |= RVT_S_WAIT_DMA;
268 			goto bail;
269 		}
270 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
271 		qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
272 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
273 		/* will get called again */
274 		goto done;
275 	}
276 
277 	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
278 		goto bail;
279 
280 	if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
281 		if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
282 			qp->s_flags |= RVT_S_WAIT_PSN;
283 			goto bail;
284 		}
285 		qp->s_sending_psn = qp->s_psn;
286 		qp->s_sending_hpsn = qp->s_psn - 1;
287 	}
288 
289 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
290 	hwords = 5;
291 	bth0 = 0;
292 
293 	/* Send a request. */
294 	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
295 	switch (qp->s_state) {
296 	default:
297 		if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
298 			goto bail;
299 		/*
300 		 * Resend an old request or start a new one.
301 		 *
302 		 * We keep track of the current SWQE so that
303 		 * we don't reset the "furthest progress" state
304 		 * if we need to back up.
305 		 */
306 		newreq = 0;
307 		if (qp->s_cur == qp->s_tail) {
308 			/* Check if send work queue is empty. */
309 			if (qp->s_tail == qp->s_head)
310 				goto bail;
311 			/*
312 			 * If a fence is requested, wait for previous
313 			 * RDMA read and atomic operations to finish.
314 			 */
315 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
316 			    qp->s_num_rd_atomic) {
317 				qp->s_flags |= RVT_S_WAIT_FENCE;
318 				goto bail;
319 			}
320 			newreq = 1;
321 			qp->s_psn = wqe->psn;
322 		}
323 		/*
324 		 * Note that we have to be careful not to modify the
325 		 * original work request since we may need to resend
326 		 * it.
327 		 */
328 		len = wqe->length;
329 		ss = &qp->s_sge;
330 		bth2 = qp->s_psn & QIB_PSN_MASK;
331 		switch (wqe->wr.opcode) {
332 		case IB_WR_SEND:
333 		case IB_WR_SEND_WITH_IMM:
334 			/* If no credit, return. */
335 			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
336 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
337 				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
338 				goto bail;
339 			}
340 			if (len > pmtu) {
341 				qp->s_state = OP(SEND_FIRST);
342 				len = pmtu;
343 				break;
344 			}
345 			if (wqe->wr.opcode == IB_WR_SEND)
346 				qp->s_state = OP(SEND_ONLY);
347 			else {
348 				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
349 				/* Immediate data comes after the BTH */
350 				ohdr->u.imm_data = wqe->wr.ex.imm_data;
351 				hwords += 1;
352 			}
353 			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
354 				bth0 |= IB_BTH_SOLICITED;
355 			bth2 |= IB_BTH_REQ_ACK;
356 			if (++qp->s_cur == qp->s_size)
357 				qp->s_cur = 0;
358 			break;
359 
360 		case IB_WR_RDMA_WRITE:
361 			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
362 				qp->s_lsn++;
363 			/* FALLTHROUGH */
364 		case IB_WR_RDMA_WRITE_WITH_IMM:
365 			/* If no credit, return. */
366 			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
367 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
368 				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
369 				goto bail;
370 			}
371 
372 			ohdr->u.rc.reth.vaddr =
373 				cpu_to_be64(wqe->rdma_wr.remote_addr);
374 			ohdr->u.rc.reth.rkey =
375 				cpu_to_be32(wqe->rdma_wr.rkey);
376 			ohdr->u.rc.reth.length = cpu_to_be32(len);
377 			hwords += sizeof(struct ib_reth) / sizeof(u32);
378 			if (len > pmtu) {
379 				qp->s_state = OP(RDMA_WRITE_FIRST);
380 				len = pmtu;
381 				break;
382 			}
383 			if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
384 				qp->s_state = OP(RDMA_WRITE_ONLY);
385 			else {
386 				qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
387 				/* Immediate data comes after RETH */
388 				ohdr->u.rc.imm_data =
389 					wqe->rdma_wr.wr.ex.imm_data;
390 				hwords += 1;
391 				if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
392 					bth0 |= IB_BTH_SOLICITED;
393 			}
394 			bth2 |= IB_BTH_REQ_ACK;
395 			if (++qp->s_cur == qp->s_size)
396 				qp->s_cur = 0;
397 			break;
398 
399 		case IB_WR_RDMA_READ:
400 			/*
401 			 * Don't allow more operations to be started
402 			 * than the QP limits allow.
403 			 */
404 			if (newreq) {
405 				if (qp->s_num_rd_atomic >=
406 				    qp->s_max_rd_atomic) {
407 					qp->s_flags |= RVT_S_WAIT_RDMAR;
408 					goto bail;
409 				}
410 				qp->s_num_rd_atomic++;
411 				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
412 					qp->s_lsn++;
413 			}
414 
415 			ohdr->u.rc.reth.vaddr =
416 				cpu_to_be64(wqe->rdma_wr.remote_addr);
417 			ohdr->u.rc.reth.rkey =
418 				cpu_to_be32(wqe->rdma_wr.rkey);
419 			ohdr->u.rc.reth.length = cpu_to_be32(len);
420 			qp->s_state = OP(RDMA_READ_REQUEST);
421 			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
422 			ss = NULL;
423 			len = 0;
424 			bth2 |= IB_BTH_REQ_ACK;
425 			if (++qp->s_cur == qp->s_size)
426 				qp->s_cur = 0;
427 			break;
428 
429 		case IB_WR_ATOMIC_CMP_AND_SWP:
430 		case IB_WR_ATOMIC_FETCH_AND_ADD:
431 			/*
432 			 * Don't allow more operations to be started
433 			 * than the QP limits allow.
434 			 */
435 			if (newreq) {
436 				if (qp->s_num_rd_atomic >=
437 				    qp->s_max_rd_atomic) {
438 					qp->s_flags |= RVT_S_WAIT_RDMAR;
439 					goto bail;
440 				}
441 				qp->s_num_rd_atomic++;
442 				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
443 					qp->s_lsn++;
444 			}
445 			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
446 				qp->s_state = OP(COMPARE_SWAP);
447 				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
448 					wqe->atomic_wr.swap);
449 				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
450 					wqe->atomic_wr.compare_add);
451 			} else {
452 				qp->s_state = OP(FETCH_ADD);
453 				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
454 					wqe->atomic_wr.compare_add);
455 				ohdr->u.atomic_eth.compare_data = 0;
456 			}
457 			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
458 				wqe->atomic_wr.remote_addr >> 32);
459 			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
460 				wqe->atomic_wr.remote_addr);
461 			ohdr->u.atomic_eth.rkey = cpu_to_be32(
462 				wqe->atomic_wr.rkey);
463 			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
464 			ss = NULL;
465 			len = 0;
466 			bth2 |= IB_BTH_REQ_ACK;
467 			if (++qp->s_cur == qp->s_size)
468 				qp->s_cur = 0;
469 			break;
470 
471 		default:
472 			goto bail;
473 		}
474 		qp->s_sge.sge = wqe->sg_list[0];
475 		qp->s_sge.sg_list = wqe->sg_list + 1;
476 		qp->s_sge.num_sge = wqe->wr.num_sge;
477 		qp->s_sge.total_len = wqe->length;
478 		qp->s_len = wqe->length;
479 		if (newreq) {
480 			qp->s_tail++;
481 			if (qp->s_tail >= qp->s_size)
482 				qp->s_tail = 0;
483 		}
484 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
485 			qp->s_psn = wqe->lpsn + 1;
486 		else
487 			qp->s_psn++;
488 		break;
489 
490 	case OP(RDMA_READ_RESPONSE_FIRST):
491 		/*
492 		 * qp->s_state is normally set to the opcode of the
493 		 * last packet constructed for new requests and therefore
494 		 * is never set to RDMA read response.
495 		 * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
496 		 * thread to indicate a SEND needs to be restarted from an
497 		 * earlier PSN without interferring with the sending thread.
498 		 * See qib_restart_rc().
499 		 */
500 		qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
501 		/* FALLTHROUGH */
502 	case OP(SEND_FIRST):
503 		qp->s_state = OP(SEND_MIDDLE);
504 		/* FALLTHROUGH */
505 	case OP(SEND_MIDDLE):
506 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
507 		ss = &qp->s_sge;
508 		len = qp->s_len;
509 		if (len > pmtu) {
510 			len = pmtu;
511 			break;
512 		}
513 		if (wqe->wr.opcode == IB_WR_SEND)
514 			qp->s_state = OP(SEND_LAST);
515 		else {
516 			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
517 			/* Immediate data comes after the BTH */
518 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
519 			hwords += 1;
520 		}
521 		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
522 			bth0 |= IB_BTH_SOLICITED;
523 		bth2 |= IB_BTH_REQ_ACK;
524 		qp->s_cur++;
525 		if (qp->s_cur >= qp->s_size)
526 			qp->s_cur = 0;
527 		break;
528 
529 	case OP(RDMA_READ_RESPONSE_LAST):
530 		/*
531 		 * qp->s_state is normally set to the opcode of the
532 		 * last packet constructed for new requests and therefore
533 		 * is never set to RDMA read response.
534 		 * RDMA_READ_RESPONSE_LAST is used by the ACK processing
535 		 * thread to indicate a RDMA write needs to be restarted from
536 		 * an earlier PSN without interferring with the sending thread.
537 		 * See qib_restart_rc().
538 		 */
539 		qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
540 		/* FALLTHROUGH */
541 	case OP(RDMA_WRITE_FIRST):
542 		qp->s_state = OP(RDMA_WRITE_MIDDLE);
543 		/* FALLTHROUGH */
544 	case OP(RDMA_WRITE_MIDDLE):
545 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
546 		ss = &qp->s_sge;
547 		len = qp->s_len;
548 		if (len > pmtu) {
549 			len = pmtu;
550 			break;
551 		}
552 		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
553 			qp->s_state = OP(RDMA_WRITE_LAST);
554 		else {
555 			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
556 			/* Immediate data comes after the BTH */
557 			ohdr->u.imm_data = wqe->wr.ex.imm_data;
558 			hwords += 1;
559 			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
560 				bth0 |= IB_BTH_SOLICITED;
561 		}
562 		bth2 |= IB_BTH_REQ_ACK;
563 		qp->s_cur++;
564 		if (qp->s_cur >= qp->s_size)
565 			qp->s_cur = 0;
566 		break;
567 
568 	case OP(RDMA_READ_RESPONSE_MIDDLE):
569 		/*
570 		 * qp->s_state is normally set to the opcode of the
571 		 * last packet constructed for new requests and therefore
572 		 * is never set to RDMA read response.
573 		 * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
574 		 * thread to indicate a RDMA read needs to be restarted from
575 		 * an earlier PSN without interferring with the sending thread.
576 		 * See qib_restart_rc().
577 		 */
578 		len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
579 		ohdr->u.rc.reth.vaddr =
580 			cpu_to_be64(wqe->rdma_wr.remote_addr + len);
581 		ohdr->u.rc.reth.rkey =
582 			cpu_to_be32(wqe->rdma_wr.rkey);
583 		ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
584 		qp->s_state = OP(RDMA_READ_REQUEST);
585 		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
586 		bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK;
587 		qp->s_psn = wqe->lpsn + 1;
588 		ss = NULL;
589 		len = 0;
590 		qp->s_cur++;
591 		if (qp->s_cur == qp->s_size)
592 			qp->s_cur = 0;
593 		break;
594 	}
595 	qp->s_sending_hpsn = bth2;
596 	delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
597 	if (delta && delta % QIB_PSN_CREDIT == 0)
598 		bth2 |= IB_BTH_REQ_ACK;
599 	if (qp->s_flags & RVT_S_SEND_ONE) {
600 		qp->s_flags &= ~RVT_S_SEND_ONE;
601 		qp->s_flags |= RVT_S_WAIT_ACK;
602 		bth2 |= IB_BTH_REQ_ACK;
603 	}
604 	qp->s_len -= len;
605 	qp->s_hdrwords = hwords;
606 	qp->s_cur_sge = ss;
607 	qp->s_cur_size = len;
608 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
609 done:
610 	return 1;
611 bail:
612 	qp->s_flags &= ~RVT_S_BUSY;
613 	return ret;
614 }
615 
616 /**
617  * qib_send_rc_ack - Construct an ACK packet and send it
618  * @qp: a pointer to the QP
619  *
620  * This is called from qib_rc_rcv() and qib_kreceive().
621  * Note that RDMA reads and atomics are handled in the
622  * send side QP state and tasklet.
623  */
624 void qib_send_rc_ack(struct rvt_qp *qp)
625 {
626 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
627 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
628 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
629 	u64 pbc;
630 	u16 lrh0;
631 	u32 bth0;
632 	u32 hwords;
633 	u32 pbufn;
634 	u32 __iomem *piobuf;
635 	struct qib_ib_header hdr;
636 	struct qib_other_headers *ohdr;
637 	u32 control;
638 	unsigned long flags;
639 
640 	spin_lock_irqsave(&qp->s_lock, flags);
641 
642 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
643 		goto unlock;
644 
645 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
646 	if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
647 		goto queue_ack;
648 
649 	/* Construct the header with s_lock held so APM doesn't change it. */
650 	ohdr = &hdr.u.oth;
651 	lrh0 = QIB_LRH_BTH;
652 	/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
653 	hwords = 6;
654 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
655 		hwords += qib_make_grh(ibp, &hdr.u.l.grh,
656 				       &qp->remote_ah_attr.grh, hwords, 0);
657 		ohdr = &hdr.u.l.oth;
658 		lrh0 = QIB_LRH_GRH;
659 	}
660 	/* read pkey_index w/o lock (its atomic) */
661 	bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
662 	if (qp->s_mig_state == IB_MIG_MIGRATED)
663 		bth0 |= IB_BTH_MIG_REQ;
664 	if (qp->r_nak_state)
665 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
666 					    (qp->r_nak_state <<
667 					     QIB_AETH_CREDIT_SHIFT));
668 	else
669 		ohdr->u.aeth = qib_compute_aeth(qp);
670 	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
671 		qp->remote_ah_attr.sl << 4;
672 	hdr.lrh[0] = cpu_to_be16(lrh0);
673 	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
674 	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
675 	hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
676 	ohdr->bth[0] = cpu_to_be32(bth0);
677 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
678 	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
679 
680 	spin_unlock_irqrestore(&qp->s_lock, flags);
681 
682 	/* Don't try to send ACKs if the link isn't ACTIVE */
683 	if (!(ppd->lflags & QIBL_LINKACTIVE))
684 		goto done;
685 
686 	control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC,
687 				       qp->s_srate, lrh0 >> 12);
688 	/* length is + 1 for the control dword */
689 	pbc = ((u64) control << 32) | (hwords + 1);
690 
691 	piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
692 	if (!piobuf) {
693 		/*
694 		 * We are out of PIO buffers at the moment.
695 		 * Pass responsibility for sending the ACK to the
696 		 * send tasklet so that when a PIO buffer becomes
697 		 * available, the ACK is sent ahead of other outgoing
698 		 * packets.
699 		 */
700 		spin_lock_irqsave(&qp->s_lock, flags);
701 		goto queue_ack;
702 	}
703 
704 	/*
705 	 * Write the pbc.
706 	 * We have to flush after the PBC for correctness
707 	 * on some cpus or WC buffer can be written out of order.
708 	 */
709 	writeq(pbc, piobuf);
710 
711 	if (dd->flags & QIB_PIO_FLUSH_WC) {
712 		u32 *hdrp = (u32 *) &hdr;
713 
714 		qib_flush_wc();
715 		qib_pio_copy(piobuf + 2, hdrp, hwords - 1);
716 		qib_flush_wc();
717 		__raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
718 	} else
719 		qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords);
720 
721 	if (dd->flags & QIB_USE_SPCL_TRIG) {
722 		u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
723 
724 		qib_flush_wc();
725 		__raw_writel(0xaebecede, piobuf + spcl_off);
726 	}
727 
728 	qib_flush_wc();
729 	qib_sendbuf_done(dd, pbufn);
730 
731 	this_cpu_inc(ibp->pmastats->n_unicast_xmit);
732 	goto done;
733 
734 queue_ack:
735 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
736 		this_cpu_inc(*ibp->rvp.rc_qacks);
737 		qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
738 		qp->s_nak_state = qp->r_nak_state;
739 		qp->s_ack_psn = qp->r_ack_psn;
740 
741 		/* Schedule the send tasklet. */
742 		qib_schedule_send(qp);
743 	}
744 unlock:
745 	spin_unlock_irqrestore(&qp->s_lock, flags);
746 done:
747 	return;
748 }
749 
750 /**
751  * reset_psn - reset the QP state to send starting from PSN
752  * @qp: the QP
753  * @psn: the packet sequence number to restart at
754  *
755  * This is called from qib_rc_rcv() to process an incoming RC ACK
756  * for the given QP.
757  * Called at interrupt level with the QP s_lock held.
758  */
759 static void reset_psn(struct rvt_qp *qp, u32 psn)
760 {
761 	u32 n = qp->s_acked;
762 	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
763 	u32 opcode;
764 
765 	qp->s_cur = n;
766 
767 	/*
768 	 * If we are starting the request from the beginning,
769 	 * let the normal send code handle initialization.
770 	 */
771 	if (qib_cmp24(psn, wqe->psn) <= 0) {
772 		qp->s_state = OP(SEND_LAST);
773 		goto done;
774 	}
775 
776 	/* Find the work request opcode corresponding to the given PSN. */
777 	opcode = wqe->wr.opcode;
778 	for (;;) {
779 		int diff;
780 
781 		if (++n == qp->s_size)
782 			n = 0;
783 		if (n == qp->s_tail)
784 			break;
785 		wqe = rvt_get_swqe_ptr(qp, n);
786 		diff = qib_cmp24(psn, wqe->psn);
787 		if (diff < 0)
788 			break;
789 		qp->s_cur = n;
790 		/*
791 		 * If we are starting the request from the beginning,
792 		 * let the normal send code handle initialization.
793 		 */
794 		if (diff == 0) {
795 			qp->s_state = OP(SEND_LAST);
796 			goto done;
797 		}
798 		opcode = wqe->wr.opcode;
799 	}
800 
801 	/*
802 	 * Set the state to restart in the middle of a request.
803 	 * Don't change the s_sge, s_cur_sge, or s_cur_size.
804 	 * See qib_make_rc_req().
805 	 */
806 	switch (opcode) {
807 	case IB_WR_SEND:
808 	case IB_WR_SEND_WITH_IMM:
809 		qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
810 		break;
811 
812 	case IB_WR_RDMA_WRITE:
813 	case IB_WR_RDMA_WRITE_WITH_IMM:
814 		qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
815 		break;
816 
817 	case IB_WR_RDMA_READ:
818 		qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
819 		break;
820 
821 	default:
822 		/*
823 		 * This case shouldn't happen since its only
824 		 * one PSN per req.
825 		 */
826 		qp->s_state = OP(SEND_LAST);
827 	}
828 done:
829 	qp->s_psn = psn;
830 	/*
831 	 * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer
832 	 * asynchronously before the send tasklet can get scheduled.
833 	 * Doing it in qib_make_rc_req() is too late.
834 	 */
835 	if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
836 	    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
837 		qp->s_flags |= RVT_S_WAIT_PSN;
838 }
839 
840 /*
841  * Back up requester to resend the last un-ACKed request.
842  * The QP r_lock and s_lock should be held and interrupts disabled.
843  */
844 static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
845 {
846 	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
847 	struct qib_ibport *ibp;
848 
849 	if (qp->s_retry == 0) {
850 		if (qp->s_mig_state == IB_MIG_ARMED) {
851 			qib_migrate_qp(qp);
852 			qp->s_retry = qp->s_retry_cnt;
853 		} else if (qp->s_last == qp->s_acked) {
854 			qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
855 			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
856 			return;
857 		} else /* XXX need to handle delayed completion */
858 			return;
859 	} else
860 		qp->s_retry--;
861 
862 	ibp = to_iport(qp->ibqp.device, qp->port_num);
863 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
864 		ibp->rvp.n_rc_resends++;
865 	else
866 		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
867 
868 	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
869 			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
870 			 RVT_S_WAIT_ACK);
871 	if (wait)
872 		qp->s_flags |= RVT_S_SEND_ONE;
873 	reset_psn(qp, psn);
874 }
875 
876 /*
877  * This is called from s_timer for missing responses.
878  */
879 static void rc_timeout(unsigned long arg)
880 {
881 	struct rvt_qp *qp = (struct rvt_qp *)arg;
882 	struct qib_ibport *ibp;
883 	unsigned long flags;
884 
885 	spin_lock_irqsave(&qp->r_lock, flags);
886 	spin_lock(&qp->s_lock);
887 	if (qp->s_flags & RVT_S_TIMER) {
888 		ibp = to_iport(qp->ibqp.device, qp->port_num);
889 		ibp->rvp.n_rc_timeouts++;
890 		qp->s_flags &= ~RVT_S_TIMER;
891 		del_timer(&qp->s_timer);
892 		qib_restart_rc(qp, qp->s_last_psn + 1, 1);
893 		qib_schedule_send(qp);
894 	}
895 	spin_unlock(&qp->s_lock);
896 	spin_unlock_irqrestore(&qp->r_lock, flags);
897 }
898 
899 /*
900  * This is called from s_timer for RNR timeouts.
901  */
902 void qib_rc_rnr_retry(unsigned long arg)
903 {
904 	struct rvt_qp *qp = (struct rvt_qp *)arg;
905 	unsigned long flags;
906 
907 	spin_lock_irqsave(&qp->s_lock, flags);
908 	if (qp->s_flags & RVT_S_WAIT_RNR) {
909 		qp->s_flags &= ~RVT_S_WAIT_RNR;
910 		del_timer(&qp->s_timer);
911 		qib_schedule_send(qp);
912 	}
913 	spin_unlock_irqrestore(&qp->s_lock, flags);
914 }
915 
916 /*
917  * Set qp->s_sending_psn to the next PSN after the given one.
918  * This would be psn+1 except when RDMA reads are present.
919  */
920 static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
921 {
922 	struct rvt_swqe *wqe;
923 	u32 n = qp->s_last;
924 
925 	/* Find the work request corresponding to the given PSN. */
926 	for (;;) {
927 		wqe = rvt_get_swqe_ptr(qp, n);
928 		if (qib_cmp24(psn, wqe->lpsn) <= 0) {
929 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
930 				qp->s_sending_psn = wqe->lpsn + 1;
931 			else
932 				qp->s_sending_psn = psn + 1;
933 			break;
934 		}
935 		if (++n == qp->s_size)
936 			n = 0;
937 		if (n == qp->s_tail)
938 			break;
939 	}
940 }
941 
942 /*
943  * This should be called with the QP s_lock held and interrupts disabled.
944  */
945 void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
946 {
947 	struct qib_other_headers *ohdr;
948 	struct rvt_swqe *wqe;
949 	struct ib_wc wc;
950 	unsigned i;
951 	u32 opcode;
952 	u32 psn;
953 
954 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
955 		return;
956 
957 	/* Find out where the BTH is */
958 	if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
959 		ohdr = &hdr->u.oth;
960 	else
961 		ohdr = &hdr->u.l.oth;
962 
963 	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
964 	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
965 	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
966 		WARN_ON(!qp->s_rdma_ack_cnt);
967 		qp->s_rdma_ack_cnt--;
968 		return;
969 	}
970 
971 	psn = be32_to_cpu(ohdr->bth[2]);
972 	reset_sending_psn(qp, psn);
973 
974 	/*
975 	 * Start timer after a packet requesting an ACK has been sent and
976 	 * there are still requests that haven't been acked.
977 	 */
978 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
979 	    !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
980 	    (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
981 		start_timer(qp);
982 
983 	while (qp->s_last != qp->s_acked) {
984 		u32 s_last;
985 
986 		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
987 		if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
988 		    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
989 			break;
990 		s_last = qp->s_last;
991 		if (++s_last >= qp->s_size)
992 			s_last = 0;
993 		qp->s_last = s_last;
994 		/* see post_send() */
995 		barrier();
996 		for (i = 0; i < wqe->wr.num_sge; i++) {
997 			struct rvt_sge *sge = &wqe->sg_list[i];
998 
999 			rvt_put_mr(sge->mr);
1000 		}
1001 		/* Post a send completion queue entry if requested. */
1002 		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
1003 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
1004 			memset(&wc, 0, sizeof(wc));
1005 			wc.wr_id = wqe->wr.wr_id;
1006 			wc.status = IB_WC_SUCCESS;
1007 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
1008 			wc.byte_len = wqe->length;
1009 			wc.qp = &qp->ibqp;
1010 			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
1011 		}
1012 	}
1013 	/*
1014 	 * If we were waiting for sends to complete before resending,
1015 	 * and they are now complete, restart sending.
1016 	 */
1017 	if (qp->s_flags & RVT_S_WAIT_PSN &&
1018 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1019 		qp->s_flags &= ~RVT_S_WAIT_PSN;
1020 		qp->s_sending_psn = qp->s_psn;
1021 		qp->s_sending_hpsn = qp->s_psn - 1;
1022 		qib_schedule_send(qp);
1023 	}
1024 }
1025 
1026 static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
1027 {
1028 	qp->s_last_psn = psn;
1029 }
1030 
1031 /*
1032  * Generate a SWQE completion.
1033  * This is similar to qib_send_complete but has to check to be sure
1034  * that the SGEs are not being referenced if the SWQE is being resent.
1035  */
1036 static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
1037 					 struct rvt_swqe *wqe,
1038 					 struct qib_ibport *ibp)
1039 {
1040 	struct ib_wc wc;
1041 	unsigned i;
1042 
1043 	/*
1044 	 * Don't decrement refcount and don't generate a
1045 	 * completion if the SWQE is being resent until the send
1046 	 * is finished.
1047 	 */
1048 	if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
1049 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
1050 		u32 s_last;
1051 
1052 		for (i = 0; i < wqe->wr.num_sge; i++) {
1053 			struct rvt_sge *sge = &wqe->sg_list[i];
1054 
1055 			rvt_put_mr(sge->mr);
1056 		}
1057 		s_last = qp->s_last;
1058 		if (++s_last >= qp->s_size)
1059 			s_last = 0;
1060 		qp->s_last = s_last;
1061 		/* see post_send() */
1062 		barrier();
1063 		/* Post a send completion queue entry if requested. */
1064 		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
1065 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
1066 			memset(&wc, 0, sizeof(wc));
1067 			wc.wr_id = wqe->wr.wr_id;
1068 			wc.status = IB_WC_SUCCESS;
1069 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
1070 			wc.byte_len = wqe->length;
1071 			wc.qp = &qp->ibqp;
1072 			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
1073 		}
1074 	} else
1075 		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
1076 
1077 	qp->s_retry = qp->s_retry_cnt;
1078 	update_last_psn(qp, wqe->lpsn);
1079 
1080 	/*
1081 	 * If we are completing a request which is in the process of
1082 	 * being resent, we can stop resending it since we know the
1083 	 * responder has already seen it.
1084 	 */
1085 	if (qp->s_acked == qp->s_cur) {
1086 		if (++qp->s_cur >= qp->s_size)
1087 			qp->s_cur = 0;
1088 		qp->s_acked = qp->s_cur;
1089 		wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
1090 		if (qp->s_acked != qp->s_tail) {
1091 			qp->s_state = OP(SEND_LAST);
1092 			qp->s_psn = wqe->psn;
1093 		}
1094 	} else {
1095 		if (++qp->s_acked >= qp->s_size)
1096 			qp->s_acked = 0;
1097 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
1098 			qp->s_draining = 0;
1099 		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1100 	}
1101 	return wqe;
1102 }
1103 
1104 /**
1105  * do_rc_ack - process an incoming RC ACK
1106  * @qp: the QP the ACK came in on
1107  * @psn: the packet sequence number of the ACK
1108  * @opcode: the opcode of the request that resulted in the ACK
1109  *
1110  * This is called from qib_rc_rcv_resp() to process an incoming RC ACK
1111  * for the given QP.
1112  * Called at interrupt level with the QP s_lock held.
1113  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
1114  */
1115 static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
1116 		     u64 val, struct qib_ctxtdata *rcd)
1117 {
1118 	struct qib_ibport *ibp;
1119 	enum ib_wc_status status;
1120 	struct rvt_swqe *wqe;
1121 	int ret = 0;
1122 	u32 ack_psn;
1123 	int diff;
1124 
1125 	/* Remove QP from retry timer */
1126 	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1127 		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1128 		del_timer(&qp->s_timer);
1129 	}
1130 
1131 	/*
1132 	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
1133 	 * requests and implicitly NAK RDMA read and atomic requests issued
1134 	 * before the NAK'ed request.  The MSN won't include the NAK'ed
1135 	 * request but will include an ACK'ed request(s).
1136 	 */
1137 	ack_psn = psn;
1138 	if (aeth >> 29)
1139 		ack_psn--;
1140 	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1141 	ibp = to_iport(qp->ibqp.device, qp->port_num);
1142 
1143 	/*
1144 	 * The MSN might be for a later WQE than the PSN indicates so
1145 	 * only complete WQEs that the PSN finishes.
1146 	 */
1147 	while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) {
1148 		/*
1149 		 * RDMA_READ_RESPONSE_ONLY is a special case since
1150 		 * we want to generate completion events for everything
1151 		 * before the RDMA read, copy the data, then generate
1152 		 * the completion for the read.
1153 		 */
1154 		if (wqe->wr.opcode == IB_WR_RDMA_READ &&
1155 		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
1156 		    diff == 0) {
1157 			ret = 1;
1158 			goto bail;
1159 		}
1160 		/*
1161 		 * If this request is a RDMA read or atomic, and the ACK is
1162 		 * for a later operation, this ACK NAKs the RDMA read or
1163 		 * atomic.  In other words, only a RDMA_READ_LAST or ONLY
1164 		 * can ACK a RDMA read and likewise for atomic ops.  Note
1165 		 * that the NAK case can only happen if relaxed ordering is
1166 		 * used and requests are sent after an RDMA read or atomic
1167 		 * is sent but before the response is received.
1168 		 */
1169 		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
1170 		     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
1171 		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1172 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
1173 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
1174 			/* Retry this request. */
1175 			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
1176 				qp->r_flags |= RVT_R_RDMAR_SEQ;
1177 				qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1178 				if (list_empty(&qp->rspwait)) {
1179 					qp->r_flags |= RVT_R_RSP_SEND;
1180 					atomic_inc(&qp->refcount);
1181 					list_add_tail(&qp->rspwait,
1182 						      &rcd->qp_wait_list);
1183 				}
1184 			}
1185 			/*
1186 			 * No need to process the ACK/NAK since we are
1187 			 * restarting an earlier request.
1188 			 */
1189 			goto bail;
1190 		}
1191 		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1192 		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
1193 			u64 *vaddr = wqe->sg_list[0].vaddr;
1194 			*vaddr = val;
1195 		}
1196 		if (qp->s_num_rd_atomic &&
1197 		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
1198 		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1199 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
1200 			qp->s_num_rd_atomic--;
1201 			/* Restart sending task if fence is complete */
1202 			if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
1203 			    !qp->s_num_rd_atomic) {
1204 				qp->s_flags &= ~(RVT_S_WAIT_FENCE |
1205 						 RVT_S_WAIT_ACK);
1206 				qib_schedule_send(qp);
1207 			} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
1208 				qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
1209 						 RVT_S_WAIT_ACK);
1210 				qib_schedule_send(qp);
1211 			}
1212 		}
1213 		wqe = do_rc_completion(qp, wqe, ibp);
1214 		if (qp->s_acked == qp->s_tail)
1215 			break;
1216 	}
1217 
1218 	switch (aeth >> 29) {
1219 	case 0:         /* ACK */
1220 		this_cpu_inc(*ibp->rvp.rc_acks);
1221 		if (qp->s_acked != qp->s_tail) {
1222 			/*
1223 			 * We are expecting more ACKs so
1224 			 * reset the retransmit timer.
1225 			 */
1226 			start_timer(qp);
1227 			/*
1228 			 * We can stop resending the earlier packets and
1229 			 * continue with the next packet the receiver wants.
1230 			 */
1231 			if (qib_cmp24(qp->s_psn, psn) <= 0)
1232 				reset_psn(qp, psn + 1);
1233 		} else if (qib_cmp24(qp->s_psn, psn) <= 0) {
1234 			qp->s_state = OP(SEND_LAST);
1235 			qp->s_psn = psn + 1;
1236 		}
1237 		if (qp->s_flags & RVT_S_WAIT_ACK) {
1238 			qp->s_flags &= ~RVT_S_WAIT_ACK;
1239 			qib_schedule_send(qp);
1240 		}
1241 		qib_get_credit(qp, aeth);
1242 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1243 		qp->s_retry = qp->s_retry_cnt;
1244 		update_last_psn(qp, psn);
1245 		ret = 1;
1246 		goto bail;
1247 
1248 	case 1:         /* RNR NAK */
1249 		ibp->rvp.n_rnr_naks++;
1250 		if (qp->s_acked == qp->s_tail)
1251 			goto bail;
1252 		if (qp->s_flags & RVT_S_WAIT_RNR)
1253 			goto bail;
1254 		if (qp->s_rnr_retry == 0) {
1255 			status = IB_WC_RNR_RETRY_EXC_ERR;
1256 			goto class_b;
1257 		}
1258 		if (qp->s_rnr_retry_cnt < 7)
1259 			qp->s_rnr_retry--;
1260 
1261 		/* The last valid PSN is the previous PSN. */
1262 		update_last_psn(qp, psn - 1);
1263 
1264 		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
1265 
1266 		reset_psn(qp, psn);
1267 
1268 		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
1269 		qp->s_flags |= RVT_S_WAIT_RNR;
1270 		qp->s_timer.function = qib_rc_rnr_retry;
1271 		qp->s_timer.expires = jiffies + usecs_to_jiffies(
1272 			ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
1273 					   QIB_AETH_CREDIT_MASK]);
1274 		add_timer(&qp->s_timer);
1275 		goto bail;
1276 
1277 	case 3:         /* NAK */
1278 		if (qp->s_acked == qp->s_tail)
1279 			goto bail;
1280 		/* The last valid PSN is the previous PSN. */
1281 		update_last_psn(qp, psn - 1);
1282 		switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
1283 			QIB_AETH_CREDIT_MASK) {
1284 		case 0: /* PSN sequence error */
1285 			ibp->rvp.n_seq_naks++;
1286 			/*
1287 			 * Back up to the responder's expected PSN.
1288 			 * Note that we might get a NAK in the middle of an
1289 			 * RDMA READ response which terminates the RDMA
1290 			 * READ.
1291 			 */
1292 			qib_restart_rc(qp, psn, 0);
1293 			qib_schedule_send(qp);
1294 			break;
1295 
1296 		case 1: /* Invalid Request */
1297 			status = IB_WC_REM_INV_REQ_ERR;
1298 			ibp->rvp.n_other_naks++;
1299 			goto class_b;
1300 
1301 		case 2: /* Remote Access Error */
1302 			status = IB_WC_REM_ACCESS_ERR;
1303 			ibp->rvp.n_other_naks++;
1304 			goto class_b;
1305 
1306 		case 3: /* Remote Operation Error */
1307 			status = IB_WC_REM_OP_ERR;
1308 			ibp->rvp.n_other_naks++;
1309 class_b:
1310 			if (qp->s_last == qp->s_acked) {
1311 				qib_send_complete(qp, wqe, status);
1312 				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1313 			}
1314 			break;
1315 
1316 		default:
1317 			/* Ignore other reserved NAK error codes */
1318 			goto reserved;
1319 		}
1320 		qp->s_retry = qp->s_retry_cnt;
1321 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
1322 		goto bail;
1323 
1324 	default:                /* 2: reserved */
1325 reserved:
1326 		/* Ignore reserved NAK codes. */
1327 		goto bail;
1328 	}
1329 
1330 bail:
1331 	return ret;
1332 }
1333 
1334 /*
1335  * We have seen an out of sequence RDMA read middle or last packet.
1336  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
1337  */
1338 static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
1339 			 struct qib_ctxtdata *rcd)
1340 {
1341 	struct rvt_swqe *wqe;
1342 
1343 	/* Remove QP from retry timer */
1344 	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1345 		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1346 		del_timer(&qp->s_timer);
1347 	}
1348 
1349 	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1350 
1351 	while (qib_cmp24(psn, wqe->lpsn) > 0) {
1352 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
1353 		    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
1354 		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
1355 			break;
1356 		wqe = do_rc_completion(qp, wqe, ibp);
1357 	}
1358 
1359 	ibp->rvp.n_rdma_seq++;
1360 	qp->r_flags |= RVT_R_RDMAR_SEQ;
1361 	qib_restart_rc(qp, qp->s_last_psn + 1, 0);
1362 	if (list_empty(&qp->rspwait)) {
1363 		qp->r_flags |= RVT_R_RSP_SEND;
1364 		atomic_inc(&qp->refcount);
1365 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1366 	}
1367 }
1368 
1369 /**
1370  * qib_rc_rcv_resp - process an incoming RC response packet
1371  * @ibp: the port this packet came in on
1372  * @ohdr: the other headers for this packet
1373  * @data: the packet data
1374  * @tlen: the packet length
1375  * @qp: the QP for this packet
1376  * @opcode: the opcode for this packet
1377  * @psn: the packet sequence number for this packet
1378  * @hdrsize: the header length
1379  * @pmtu: the path MTU
1380  *
1381  * This is called from qib_rc_rcv() to process an incoming RC response
1382  * packet for the given QP.
1383  * Called at interrupt level.
1384  */
1385 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
1386 			    struct qib_other_headers *ohdr,
1387 			    void *data, u32 tlen,
1388 			    struct rvt_qp *qp,
1389 			    u32 opcode,
1390 			    u32 psn, u32 hdrsize, u32 pmtu,
1391 			    struct qib_ctxtdata *rcd)
1392 {
1393 	struct rvt_swqe *wqe;
1394 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
1395 	enum ib_wc_status status;
1396 	unsigned long flags;
1397 	int diff;
1398 	u32 pad;
1399 	u32 aeth;
1400 	u64 val;
1401 
1402 	if (opcode != OP(RDMA_READ_RESPONSE_MIDDLE)) {
1403 		/*
1404 		 * If ACK'd PSN on SDMA busy list try to make progress to
1405 		 * reclaim SDMA credits.
1406 		 */
1407 		if ((qib_cmp24(psn, qp->s_sending_psn) >= 0) &&
1408 		    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) {
1409 
1410 			/*
1411 			 * If send tasklet not running attempt to progress
1412 			 * SDMA queue.
1413 			 */
1414 			if (!(qp->s_flags & RVT_S_BUSY)) {
1415 				/* Acquire SDMA Lock */
1416 				spin_lock_irqsave(&ppd->sdma_lock, flags);
1417 				/* Invoke sdma make progress */
1418 				qib_sdma_make_progress(ppd);
1419 				/* Release SDMA Lock */
1420 				spin_unlock_irqrestore(&ppd->sdma_lock, flags);
1421 			}
1422 		}
1423 	}
1424 
1425 	spin_lock_irqsave(&qp->s_lock, flags);
1426 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
1427 		goto ack_done;
1428 
1429 	/* Ignore invalid responses. */
1430 	smp_read_barrier_depends(); /* see post_one_send */
1431 	if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
1432 		goto ack_done;
1433 
1434 	/* Ignore duplicate responses. */
1435 	diff = qib_cmp24(psn, qp->s_last_psn);
1436 	if (unlikely(diff <= 0)) {
1437 		/* Update credits for "ghost" ACKs */
1438 		if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
1439 			aeth = be32_to_cpu(ohdr->u.aeth);
1440 			if ((aeth >> 29) == 0)
1441 				qib_get_credit(qp, aeth);
1442 		}
1443 		goto ack_done;
1444 	}
1445 
1446 	/*
1447 	 * Skip everything other than the PSN we expect, if we are waiting
1448 	 * for a reply to a restarted RDMA read or atomic op.
1449 	 */
1450 	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
1451 		if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
1452 			goto ack_done;
1453 		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
1454 	}
1455 
1456 	if (unlikely(qp->s_acked == qp->s_tail))
1457 		goto ack_done;
1458 	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1459 	status = IB_WC_SUCCESS;
1460 
1461 	switch (opcode) {
1462 	case OP(ACKNOWLEDGE):
1463 	case OP(ATOMIC_ACKNOWLEDGE):
1464 	case OP(RDMA_READ_RESPONSE_FIRST):
1465 		aeth = be32_to_cpu(ohdr->u.aeth);
1466 		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
1467 			__be32 *p = ohdr->u.at.atomic_ack_eth;
1468 
1469 			val = ((u64) be32_to_cpu(p[0]) << 32) |
1470 				be32_to_cpu(p[1]);
1471 		} else
1472 			val = 0;
1473 		if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
1474 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
1475 			goto ack_done;
1476 		hdrsize += 4;
1477 		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1478 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1479 			goto ack_op_err;
1480 		/*
1481 		 * If this is a response to a resent RDMA read, we
1482 		 * have to be careful to copy the data to the right
1483 		 * location.
1484 		 */
1485 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1486 						  wqe, psn, pmtu);
1487 		goto read_middle;
1488 
1489 	case OP(RDMA_READ_RESPONSE_MIDDLE):
1490 		/* no AETH, no ACK */
1491 		if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1492 			goto ack_seq_err;
1493 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1494 			goto ack_op_err;
1495 read_middle:
1496 		if (unlikely(tlen != (hdrsize + pmtu + 4)))
1497 			goto ack_len_err;
1498 		if (unlikely(pmtu >= qp->s_rdma_read_len))
1499 			goto ack_len_err;
1500 
1501 		/*
1502 		 * We got a response so update the timeout.
1503 		 * 4.096 usec. * (1 << qp->timeout)
1504 		 */
1505 		qp->s_flags |= RVT_S_TIMER;
1506 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
1507 		if (qp->s_flags & RVT_S_WAIT_ACK) {
1508 			qp->s_flags &= ~RVT_S_WAIT_ACK;
1509 			qib_schedule_send(qp);
1510 		}
1511 
1512 		if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
1513 			qp->s_retry = qp->s_retry_cnt;
1514 
1515 		/*
1516 		 * Update the RDMA receive state but do the copy w/o
1517 		 * holding the locks and blocking interrupts.
1518 		 */
1519 		qp->s_rdma_read_len -= pmtu;
1520 		update_last_psn(qp, psn);
1521 		spin_unlock_irqrestore(&qp->s_lock, flags);
1522 		qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
1523 		goto bail;
1524 
1525 	case OP(RDMA_READ_RESPONSE_ONLY):
1526 		aeth = be32_to_cpu(ohdr->u.aeth);
1527 		if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
1528 			goto ack_done;
1529 		/* Get the number of bytes the message was padded by. */
1530 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1531 		/*
1532 		 * Check that the data size is >= 0 && <= pmtu.
1533 		 * Remember to account for the AETH header (4) and
1534 		 * ICRC (4).
1535 		 */
1536 		if (unlikely(tlen < (hdrsize + pad + 8)))
1537 			goto ack_len_err;
1538 		/*
1539 		 * If this is a response to a resent RDMA read, we
1540 		 * have to be careful to copy the data to the right
1541 		 * location.
1542 		 */
1543 		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
1544 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
1545 						  wqe, psn, pmtu);
1546 		goto read_last;
1547 
1548 	case OP(RDMA_READ_RESPONSE_LAST):
1549 		/* ACKs READ req. */
1550 		if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
1551 			goto ack_seq_err;
1552 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1553 			goto ack_op_err;
1554 		/* Get the number of bytes the message was padded by. */
1555 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
1556 		/*
1557 		 * Check that the data size is >= 1 && <= pmtu.
1558 		 * Remember to account for the AETH header (4) and
1559 		 * ICRC (4).
1560 		 */
1561 		if (unlikely(tlen <= (hdrsize + pad + 8)))
1562 			goto ack_len_err;
1563 read_last:
1564 		tlen -= hdrsize + pad + 8;
1565 		if (unlikely(tlen != qp->s_rdma_read_len))
1566 			goto ack_len_err;
1567 		aeth = be32_to_cpu(ohdr->u.aeth);
1568 		qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
1569 		WARN_ON(qp->s_rdma_read_sge.num_sge);
1570 		(void) do_rc_ack(qp, aeth, psn,
1571 				 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
1572 		goto ack_done;
1573 	}
1574 
1575 ack_op_err:
1576 	status = IB_WC_LOC_QP_OP_ERR;
1577 	goto ack_err;
1578 
1579 ack_seq_err:
1580 	rdma_seq_err(qp, ibp, psn, rcd);
1581 	goto ack_done;
1582 
1583 ack_len_err:
1584 	status = IB_WC_LOC_LEN_ERR;
1585 ack_err:
1586 	if (qp->s_last == qp->s_acked) {
1587 		qib_send_complete(qp, wqe, status);
1588 		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1589 	}
1590 ack_done:
1591 	spin_unlock_irqrestore(&qp->s_lock, flags);
1592 bail:
1593 	return;
1594 }
1595 
1596 /**
1597  * qib_rc_rcv_error - process an incoming duplicate or error RC packet
1598  * @ohdr: the other headers for this packet
1599  * @data: the packet data
1600  * @qp: the QP for this packet
1601  * @opcode: the opcode for this packet
1602  * @psn: the packet sequence number for this packet
1603  * @diff: the difference between the PSN and the expected PSN
1604  *
1605  * This is called from qib_rc_rcv() to process an unexpected
1606  * incoming RC packet for the given QP.
1607  * Called at interrupt level.
1608  * Return 1 if no more processing is needed; otherwise return 0 to
1609  * schedule a response to be sent.
1610  */
1611 static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
1612 			    void *data,
1613 			    struct rvt_qp *qp,
1614 			    u32 opcode,
1615 			    u32 psn,
1616 			    int diff,
1617 			    struct qib_ctxtdata *rcd)
1618 {
1619 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
1620 	struct rvt_ack_entry *e;
1621 	unsigned long flags;
1622 	u8 i, prev;
1623 	int old_req;
1624 
1625 	if (diff > 0) {
1626 		/*
1627 		 * Packet sequence error.
1628 		 * A NAK will ACK earlier sends and RDMA writes.
1629 		 * Don't queue the NAK if we already sent one.
1630 		 */
1631 		if (!qp->r_nak_state) {
1632 			ibp->rvp.n_rc_seqnak++;
1633 			qp->r_nak_state = IB_NAK_PSN_ERROR;
1634 			/* Use the expected PSN. */
1635 			qp->r_ack_psn = qp->r_psn;
1636 			/*
1637 			 * Wait to send the sequence NAK until all packets
1638 			 * in the receive queue have been processed.
1639 			 * Otherwise, we end up propagating congestion.
1640 			 */
1641 			if (list_empty(&qp->rspwait)) {
1642 				qp->r_flags |= RVT_R_RSP_NAK;
1643 				atomic_inc(&qp->refcount);
1644 				list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
1645 			}
1646 		}
1647 		goto done;
1648 	}
1649 
1650 	/*
1651 	 * Handle a duplicate request.  Don't re-execute SEND, RDMA
1652 	 * write or atomic op.  Don't NAK errors, just silently drop
1653 	 * the duplicate request.  Note that r_sge, r_len, and
1654 	 * r_rcv_len may be in use so don't modify them.
1655 	 *
1656 	 * We are supposed to ACK the earliest duplicate PSN but we
1657 	 * can coalesce an outstanding duplicate ACK.  We have to
1658 	 * send the earliest so that RDMA reads can be restarted at
1659 	 * the requester's expected PSN.
1660 	 *
1661 	 * First, find where this duplicate PSN falls within the
1662 	 * ACKs previously sent.
1663 	 * old_req is true if there is an older response that is scheduled
1664 	 * to be sent before sending this one.
1665 	 */
1666 	e = NULL;
1667 	old_req = 1;
1668 	ibp->rvp.n_rc_dupreq++;
1669 
1670 	spin_lock_irqsave(&qp->s_lock, flags);
1671 
1672 	for (i = qp->r_head_ack_queue; ; i = prev) {
1673 		if (i == qp->s_tail_ack_queue)
1674 			old_req = 0;
1675 		if (i)
1676 			prev = i - 1;
1677 		else
1678 			prev = QIB_MAX_RDMA_ATOMIC;
1679 		if (prev == qp->r_head_ack_queue) {
1680 			e = NULL;
1681 			break;
1682 		}
1683 		e = &qp->s_ack_queue[prev];
1684 		if (!e->opcode) {
1685 			e = NULL;
1686 			break;
1687 		}
1688 		if (qib_cmp24(psn, e->psn) >= 0) {
1689 			if (prev == qp->s_tail_ack_queue &&
1690 			    qib_cmp24(psn, e->lpsn) <= 0)
1691 				old_req = 0;
1692 			break;
1693 		}
1694 	}
1695 	switch (opcode) {
1696 	case OP(RDMA_READ_REQUEST): {
1697 		struct ib_reth *reth;
1698 		u32 offset;
1699 		u32 len;
1700 
1701 		/*
1702 		 * If we didn't find the RDMA read request in the ack queue,
1703 		 * we can ignore this request.
1704 		 */
1705 		if (!e || e->opcode != OP(RDMA_READ_REQUEST))
1706 			goto unlock_done;
1707 		/* RETH comes after BTH */
1708 		reth = &ohdr->u.rc.reth;
1709 		/*
1710 		 * Address range must be a subset of the original
1711 		 * request and start on pmtu boundaries.
1712 		 * We reuse the old ack_queue slot since the requester
1713 		 * should not back up and request an earlier PSN for the
1714 		 * same request.
1715 		 */
1716 		offset = ((psn - e->psn) & QIB_PSN_MASK) *
1717 			qp->pmtu;
1718 		len = be32_to_cpu(reth->length);
1719 		if (unlikely(offset + len != e->rdma_sge.sge_length))
1720 			goto unlock_done;
1721 		if (e->rdma_sge.mr) {
1722 			rvt_put_mr(e->rdma_sge.mr);
1723 			e->rdma_sge.mr = NULL;
1724 		}
1725 		if (len != 0) {
1726 			u32 rkey = be32_to_cpu(reth->rkey);
1727 			u64 vaddr = be64_to_cpu(reth->vaddr);
1728 			int ok;
1729 
1730 			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
1731 					 IB_ACCESS_REMOTE_READ);
1732 			if (unlikely(!ok))
1733 				goto unlock_done;
1734 		} else {
1735 			e->rdma_sge.vaddr = NULL;
1736 			e->rdma_sge.length = 0;
1737 			e->rdma_sge.sge_length = 0;
1738 		}
1739 		e->psn = psn;
1740 		if (old_req)
1741 			goto unlock_done;
1742 		qp->s_tail_ack_queue = prev;
1743 		break;
1744 	}
1745 
1746 	case OP(COMPARE_SWAP):
1747 	case OP(FETCH_ADD): {
1748 		/*
1749 		 * If we didn't find the atomic request in the ack queue
1750 		 * or the send tasklet is already backed up to send an
1751 		 * earlier entry, we can ignore this request.
1752 		 */
1753 		if (!e || e->opcode != (u8) opcode || old_req)
1754 			goto unlock_done;
1755 		qp->s_tail_ack_queue = prev;
1756 		break;
1757 	}
1758 
1759 	default:
1760 		/*
1761 		 * Ignore this operation if it doesn't request an ACK
1762 		 * or an earlier RDMA read or atomic is going to be resent.
1763 		 */
1764 		if (!(psn & IB_BTH_REQ_ACK) || old_req)
1765 			goto unlock_done;
1766 		/*
1767 		 * Resend the most recent ACK if this request is
1768 		 * after all the previous RDMA reads and atomics.
1769 		 */
1770 		if (i == qp->r_head_ack_queue) {
1771 			spin_unlock_irqrestore(&qp->s_lock, flags);
1772 			qp->r_nak_state = 0;
1773 			qp->r_ack_psn = qp->r_psn - 1;
1774 			goto send_ack;
1775 		}
1776 		/*
1777 		 * Try to send a simple ACK to work around a Mellanox bug
1778 		 * which doesn't accept a RDMA read response or atomic
1779 		 * response as an ACK for earlier SENDs or RDMA writes.
1780 		 */
1781 		if (!(qp->s_flags & RVT_S_RESP_PENDING)) {
1782 			spin_unlock_irqrestore(&qp->s_lock, flags);
1783 			qp->r_nak_state = 0;
1784 			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
1785 			goto send_ack;
1786 		}
1787 		/*
1788 		 * Resend the RDMA read or atomic op which
1789 		 * ACKs this duplicate request.
1790 		 */
1791 		qp->s_tail_ack_queue = i;
1792 		break;
1793 	}
1794 	qp->s_ack_state = OP(ACKNOWLEDGE);
1795 	qp->s_flags |= RVT_S_RESP_PENDING;
1796 	qp->r_nak_state = 0;
1797 	qib_schedule_send(qp);
1798 
1799 unlock_done:
1800 	spin_unlock_irqrestore(&qp->s_lock, flags);
1801 done:
1802 	return 1;
1803 
1804 send_ack:
1805 	return 0;
1806 }
1807 
1808 void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
1809 {
1810 	unsigned long flags;
1811 	int lastwqe;
1812 
1813 	spin_lock_irqsave(&qp->s_lock, flags);
1814 	lastwqe = rvt_error_qp(qp, err);
1815 	spin_unlock_irqrestore(&qp->s_lock, flags);
1816 
1817 	if (lastwqe) {
1818 		struct ib_event ev;
1819 
1820 		ev.device = qp->ibqp.device;
1821 		ev.element.qp = &qp->ibqp;
1822 		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1823 		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1824 	}
1825 }
1826 
1827 static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
1828 {
1829 	unsigned next;
1830 
1831 	next = n + 1;
1832 	if (next > QIB_MAX_RDMA_ATOMIC)
1833 		next = 0;
1834 	qp->s_tail_ack_queue = next;
1835 	qp->s_ack_state = OP(ACKNOWLEDGE);
1836 }
1837 
1838 /**
1839  * qib_rc_rcv - process an incoming RC packet
1840  * @rcd: the context pointer
1841  * @hdr: the header of this packet
1842  * @has_grh: true if the header has a GRH
1843  * @data: the packet data
1844  * @tlen: the packet length
1845  * @qp: the QP for this packet
1846  *
1847  * This is called from qib_qp_rcv() to process an incoming RC packet
1848  * for the given QP.
1849  * Called at interrupt level.
1850  */
1851 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
1852 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
1853 {
1854 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
1855 	struct qib_other_headers *ohdr;
1856 	u32 opcode;
1857 	u32 hdrsize;
1858 	u32 psn;
1859 	u32 pad;
1860 	struct ib_wc wc;
1861 	u32 pmtu = qp->pmtu;
1862 	int diff;
1863 	struct ib_reth *reth;
1864 	unsigned long flags;
1865 	int ret;
1866 
1867 	/* Check for GRH */
1868 	if (!has_grh) {
1869 		ohdr = &hdr->u.oth;
1870 		hdrsize = 8 + 12;       /* LRH + BTH */
1871 	} else {
1872 		ohdr = &hdr->u.l.oth;
1873 		hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
1874 	}
1875 
1876 	opcode = be32_to_cpu(ohdr->bth[0]);
1877 	if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
1878 		return;
1879 
1880 	psn = be32_to_cpu(ohdr->bth[2]);
1881 	opcode >>= 24;
1882 
1883 	/*
1884 	 * Process responses (ACKs) before anything else.  Note that the
1885 	 * packet sequence number will be for something in the send work
1886 	 * queue rather than the expected receive packet sequence number.
1887 	 * In other words, this QP is the requester.
1888 	 */
1889 	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
1890 	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
1891 		qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
1892 				hdrsize, pmtu, rcd);
1893 		return;
1894 	}
1895 
1896 	/* Compute 24 bits worth of difference. */
1897 	diff = qib_cmp24(psn, qp->r_psn);
1898 	if (unlikely(diff)) {
1899 		if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
1900 			return;
1901 		goto send_ack;
1902 	}
1903 
1904 	/* Check for opcode sequence errors. */
1905 	switch (qp->r_state) {
1906 	case OP(SEND_FIRST):
1907 	case OP(SEND_MIDDLE):
1908 		if (opcode == OP(SEND_MIDDLE) ||
1909 		    opcode == OP(SEND_LAST) ||
1910 		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1911 			break;
1912 		goto nack_inv;
1913 
1914 	case OP(RDMA_WRITE_FIRST):
1915 	case OP(RDMA_WRITE_MIDDLE):
1916 		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
1917 		    opcode == OP(RDMA_WRITE_LAST) ||
1918 		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1919 			break;
1920 		goto nack_inv;
1921 
1922 	default:
1923 		if (opcode == OP(SEND_MIDDLE) ||
1924 		    opcode == OP(SEND_LAST) ||
1925 		    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
1926 		    opcode == OP(RDMA_WRITE_MIDDLE) ||
1927 		    opcode == OP(RDMA_WRITE_LAST) ||
1928 		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
1929 			goto nack_inv;
1930 		/*
1931 		 * Note that it is up to the requester to not send a new
1932 		 * RDMA read or atomic operation before receiving an ACK
1933 		 * for the previous operation.
1934 		 */
1935 		break;
1936 	}
1937 
1938 	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
1939 		qp->r_flags |= RVT_R_COMM_EST;
1940 		if (qp->ibqp.event_handler) {
1941 			struct ib_event ev;
1942 
1943 			ev.device = qp->ibqp.device;
1944 			ev.element.qp = &qp->ibqp;
1945 			ev.event = IB_EVENT_COMM_EST;
1946 			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1947 		}
1948 	}
1949 
1950 	/* OK, process the packet. */
1951 	switch (opcode) {
1952 	case OP(SEND_FIRST):
1953 		ret = qib_get_rwqe(qp, 0);
1954 		if (ret < 0)
1955 			goto nack_op_err;
1956 		if (!ret)
1957 			goto rnr_nak;
1958 		qp->r_rcv_len = 0;
1959 		/* FALLTHROUGH */
1960 	case OP(SEND_MIDDLE):
1961 	case OP(RDMA_WRITE_MIDDLE):
1962 send_middle:
1963 		/* Check for invalid length PMTU or posted rwqe len. */
1964 		if (unlikely(tlen != (hdrsize + pmtu + 4)))
1965 			goto nack_inv;
1966 		qp->r_rcv_len += pmtu;
1967 		if (unlikely(qp->r_rcv_len > qp->r_len))
1968 			goto nack_inv;
1969 		qib_copy_sge(&qp->r_sge, data, pmtu, 1);
1970 		break;
1971 
1972 	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
1973 		/* consume RWQE */
1974 		ret = qib_get_rwqe(qp, 1);
1975 		if (ret < 0)
1976 			goto nack_op_err;
1977 		if (!ret)
1978 			goto rnr_nak;
1979 		goto send_last_imm;
1980 
1981 	case OP(SEND_ONLY):
1982 	case OP(SEND_ONLY_WITH_IMMEDIATE):
1983 		ret = qib_get_rwqe(qp, 0);
1984 		if (ret < 0)
1985 			goto nack_op_err;
1986 		if (!ret)
1987 			goto rnr_nak;
1988 		qp->r_rcv_len = 0;
1989 		if (opcode == OP(SEND_ONLY))
1990 			goto no_immediate_data;
1991 		/* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */
1992 	case OP(SEND_LAST_WITH_IMMEDIATE):
1993 send_last_imm:
1994 		wc.ex.imm_data = ohdr->u.imm_data;
1995 		hdrsize += 4;
1996 		wc.wc_flags = IB_WC_WITH_IMM;
1997 		goto send_last;
1998 	case OP(SEND_LAST):
1999 	case OP(RDMA_WRITE_LAST):
2000 no_immediate_data:
2001 		wc.wc_flags = 0;
2002 		wc.ex.imm_data = 0;
2003 send_last:
2004 		/* Get the number of bytes the message was padded by. */
2005 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
2006 		/* Check for invalid length. */
2007 		/* XXX LAST len should be >= 1 */
2008 		if (unlikely(tlen < (hdrsize + pad + 4)))
2009 			goto nack_inv;
2010 		/* Don't count the CRC. */
2011 		tlen -= (hdrsize + pad + 4);
2012 		wc.byte_len = tlen + qp->r_rcv_len;
2013 		if (unlikely(wc.byte_len > qp->r_len))
2014 			goto nack_inv;
2015 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
2016 		rvt_put_ss(&qp->r_sge);
2017 		qp->r_msn++;
2018 		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
2019 			break;
2020 		wc.wr_id = qp->r_wr_id;
2021 		wc.status = IB_WC_SUCCESS;
2022 		if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
2023 		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
2024 			wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
2025 		else
2026 			wc.opcode = IB_WC_RECV;
2027 		wc.qp = &qp->ibqp;
2028 		wc.src_qp = qp->remote_qpn;
2029 		wc.slid = qp->remote_ah_attr.dlid;
2030 		wc.sl = qp->remote_ah_attr.sl;
2031 		/* zero fields that are N/A */
2032 		wc.vendor_err = 0;
2033 		wc.pkey_index = 0;
2034 		wc.dlid_path_bits = 0;
2035 		wc.port_num = 0;
2036 		/* Signal completion event if the solicited bit is set. */
2037 		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
2038 			     (ohdr->bth[0] &
2039 			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
2040 		break;
2041 
2042 	case OP(RDMA_WRITE_FIRST):
2043 	case OP(RDMA_WRITE_ONLY):
2044 	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
2045 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
2046 			goto nack_inv;
2047 		/* consume RWQE */
2048 		reth = &ohdr->u.rc.reth;
2049 		hdrsize += sizeof(*reth);
2050 		qp->r_len = be32_to_cpu(reth->length);
2051 		qp->r_rcv_len = 0;
2052 		qp->r_sge.sg_list = NULL;
2053 		if (qp->r_len != 0) {
2054 			u32 rkey = be32_to_cpu(reth->rkey);
2055 			u64 vaddr = be64_to_cpu(reth->vaddr);
2056 			int ok;
2057 
2058 			/* Check rkey & NAK */
2059 			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
2060 					 rkey, IB_ACCESS_REMOTE_WRITE);
2061 			if (unlikely(!ok))
2062 				goto nack_acc;
2063 			qp->r_sge.num_sge = 1;
2064 		} else {
2065 			qp->r_sge.num_sge = 0;
2066 			qp->r_sge.sge.mr = NULL;
2067 			qp->r_sge.sge.vaddr = NULL;
2068 			qp->r_sge.sge.length = 0;
2069 			qp->r_sge.sge.sge_length = 0;
2070 		}
2071 		if (opcode == OP(RDMA_WRITE_FIRST))
2072 			goto send_middle;
2073 		else if (opcode == OP(RDMA_WRITE_ONLY))
2074 			goto no_immediate_data;
2075 		ret = qib_get_rwqe(qp, 1);
2076 		if (ret < 0)
2077 			goto nack_op_err;
2078 		if (!ret)
2079 			goto rnr_nak;
2080 		wc.ex.imm_data = ohdr->u.rc.imm_data;
2081 		hdrsize += 4;
2082 		wc.wc_flags = IB_WC_WITH_IMM;
2083 		goto send_last;
2084 
2085 	case OP(RDMA_READ_REQUEST): {
2086 		struct rvt_ack_entry *e;
2087 		u32 len;
2088 		u8 next;
2089 
2090 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
2091 			goto nack_inv;
2092 		next = qp->r_head_ack_queue + 1;
2093 		/* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
2094 		if (next > QIB_MAX_RDMA_ATOMIC)
2095 			next = 0;
2096 		spin_lock_irqsave(&qp->s_lock, flags);
2097 		if (unlikely(next == qp->s_tail_ack_queue)) {
2098 			if (!qp->s_ack_queue[next].sent)
2099 				goto nack_inv_unlck;
2100 			qib_update_ack_queue(qp, next);
2101 		}
2102 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
2103 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2104 			rvt_put_mr(e->rdma_sge.mr);
2105 			e->rdma_sge.mr = NULL;
2106 		}
2107 		reth = &ohdr->u.rc.reth;
2108 		len = be32_to_cpu(reth->length);
2109 		if (len) {
2110 			u32 rkey = be32_to_cpu(reth->rkey);
2111 			u64 vaddr = be64_to_cpu(reth->vaddr);
2112 			int ok;
2113 
2114 			/* Check rkey & NAK */
2115 			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
2116 					 rkey, IB_ACCESS_REMOTE_READ);
2117 			if (unlikely(!ok))
2118 				goto nack_acc_unlck;
2119 			/*
2120 			 * Update the next expected PSN.  We add 1 later
2121 			 * below, so only add the remainder here.
2122 			 */
2123 			if (len > pmtu)
2124 				qp->r_psn += (len - 1) / pmtu;
2125 		} else {
2126 			e->rdma_sge.mr = NULL;
2127 			e->rdma_sge.vaddr = NULL;
2128 			e->rdma_sge.length = 0;
2129 			e->rdma_sge.sge_length = 0;
2130 		}
2131 		e->opcode = opcode;
2132 		e->sent = 0;
2133 		e->psn = psn;
2134 		e->lpsn = qp->r_psn;
2135 		/*
2136 		 * We need to increment the MSN here instead of when we
2137 		 * finish sending the result since a duplicate request would
2138 		 * increment it more than once.
2139 		 */
2140 		qp->r_msn++;
2141 		qp->r_psn++;
2142 		qp->r_state = opcode;
2143 		qp->r_nak_state = 0;
2144 		qp->r_head_ack_queue = next;
2145 
2146 		/* Schedule the send tasklet. */
2147 		qp->s_flags |= RVT_S_RESP_PENDING;
2148 		qib_schedule_send(qp);
2149 
2150 		goto sunlock;
2151 	}
2152 
2153 	case OP(COMPARE_SWAP):
2154 	case OP(FETCH_ADD): {
2155 		struct ib_atomic_eth *ateth;
2156 		struct rvt_ack_entry *e;
2157 		u64 vaddr;
2158 		atomic64_t *maddr;
2159 		u64 sdata;
2160 		u32 rkey;
2161 		u8 next;
2162 
2163 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
2164 			goto nack_inv;
2165 		next = qp->r_head_ack_queue + 1;
2166 		if (next > QIB_MAX_RDMA_ATOMIC)
2167 			next = 0;
2168 		spin_lock_irqsave(&qp->s_lock, flags);
2169 		if (unlikely(next == qp->s_tail_ack_queue)) {
2170 			if (!qp->s_ack_queue[next].sent)
2171 				goto nack_inv_unlck;
2172 			qib_update_ack_queue(qp, next);
2173 		}
2174 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
2175 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2176 			rvt_put_mr(e->rdma_sge.mr);
2177 			e->rdma_sge.mr = NULL;
2178 		}
2179 		ateth = &ohdr->u.atomic_eth;
2180 		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
2181 			be32_to_cpu(ateth->vaddr[1]);
2182 		if (unlikely(vaddr & (sizeof(u64) - 1)))
2183 			goto nack_inv_unlck;
2184 		rkey = be32_to_cpu(ateth->rkey);
2185 		/* Check rkey & NAK */
2186 		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
2187 					  vaddr, rkey,
2188 					  IB_ACCESS_REMOTE_ATOMIC)))
2189 			goto nack_acc_unlck;
2190 		/* Perform atomic OP and save result. */
2191 		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
2192 		sdata = be64_to_cpu(ateth->swap_data);
2193 		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
2194 			(u64) atomic64_add_return(sdata, maddr) - sdata :
2195 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
2196 				      be64_to_cpu(ateth->compare_data),
2197 				      sdata);
2198 		rvt_put_mr(qp->r_sge.sge.mr);
2199 		qp->r_sge.num_sge = 0;
2200 		e->opcode = opcode;
2201 		e->sent = 0;
2202 		e->psn = psn;
2203 		e->lpsn = psn;
2204 		qp->r_msn++;
2205 		qp->r_psn++;
2206 		qp->r_state = opcode;
2207 		qp->r_nak_state = 0;
2208 		qp->r_head_ack_queue = next;
2209 
2210 		/* Schedule the send tasklet. */
2211 		qp->s_flags |= RVT_S_RESP_PENDING;
2212 		qib_schedule_send(qp);
2213 
2214 		goto sunlock;
2215 	}
2216 
2217 	default:
2218 		/* NAK unknown opcodes. */
2219 		goto nack_inv;
2220 	}
2221 	qp->r_psn++;
2222 	qp->r_state = opcode;
2223 	qp->r_ack_psn = psn;
2224 	qp->r_nak_state = 0;
2225 	/* Send an ACK if requested or required. */
2226 	if (psn & (1 << 31))
2227 		goto send_ack;
2228 	return;
2229 
2230 rnr_nak:
2231 	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
2232 	qp->r_ack_psn = qp->r_psn;
2233 	/* Queue RNR NAK for later */
2234 	if (list_empty(&qp->rspwait)) {
2235 		qp->r_flags |= RVT_R_RSP_NAK;
2236 		atomic_inc(&qp->refcount);
2237 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2238 	}
2239 	return;
2240 
2241 nack_op_err:
2242 	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2243 	qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
2244 	qp->r_ack_psn = qp->r_psn;
2245 	/* Queue NAK for later */
2246 	if (list_empty(&qp->rspwait)) {
2247 		qp->r_flags |= RVT_R_RSP_NAK;
2248 		atomic_inc(&qp->refcount);
2249 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2250 	}
2251 	return;
2252 
2253 nack_inv_unlck:
2254 	spin_unlock_irqrestore(&qp->s_lock, flags);
2255 nack_inv:
2256 	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
2257 	qp->r_nak_state = IB_NAK_INVALID_REQUEST;
2258 	qp->r_ack_psn = qp->r_psn;
2259 	/* Queue NAK for later */
2260 	if (list_empty(&qp->rspwait)) {
2261 		qp->r_flags |= RVT_R_RSP_NAK;
2262 		atomic_inc(&qp->refcount);
2263 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
2264 	}
2265 	return;
2266 
2267 nack_acc_unlck:
2268 	spin_unlock_irqrestore(&qp->s_lock, flags);
2269 nack_acc:
2270 	qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
2271 	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
2272 	qp->r_ack_psn = qp->r_psn;
2273 send_ack:
2274 	qib_send_rc_ack(qp);
2275 	return;
2276 
2277 sunlock:
2278 	spin_unlock_irqrestore(&qp->s_lock, flags);
2279 }
2280