1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/io.h> 49 #include <rdma/rdma_vt.h> 50 #include <rdma/rdmavt_qp.h> 51 52 #include "hfi.h" 53 #include "qp.h" 54 #include "verbs_txreq.h" 55 #include "trace.h" 56 57 /* cut down ridiculously long IB macro names */ 58 #define OP(x) RC_OP(x) 59 60 static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, 61 u32 psn, u32 pmtu) 62 { 63 u32 len; 64 65 len = delta_psn(psn, wqe->psn) * pmtu; 66 ss->sge = wqe->sg_list[0]; 67 ss->sg_list = wqe->sg_list + 1; 68 ss->num_sge = wqe->wr.num_sge; 69 ss->total_len = wqe->length; 70 rvt_skip_sge(ss, len, false); 71 return wqe->length - len; 72 } 73 74 /** 75 * make_rc_ack - construct a response packet (ACK, NAK, or RDMA read) 76 * @dev: the device for this QP 77 * @qp: a pointer to the QP 78 * @ohdr: a pointer to the IB header being constructed 79 * @ps: the xmit packet state 80 * 81 * Return 1 if constructed; otherwise, return 0. 82 * Note that we are in the responder's side of the QP context. 83 * Note the QP s_lock must be held. 84 */ 85 static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, 86 struct ib_other_headers *ohdr, 87 struct hfi1_pkt_state *ps) 88 { 89 struct rvt_ack_entry *e; 90 u32 hwords; 91 u32 len; 92 u32 bth0; 93 u32 bth2; 94 int middle = 0; 95 u32 pmtu = qp->pmtu; 96 struct hfi1_qp_priv *priv = qp->priv; 97 98 lockdep_assert_held(&qp->s_lock); 99 /* Don't send an ACK if we aren't supposed to. */ 100 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) 101 goto bail; 102 103 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 104 hwords = 5; 105 106 switch (qp->s_ack_state) { 107 case OP(RDMA_READ_RESPONSE_LAST): 108 case OP(RDMA_READ_RESPONSE_ONLY): 109 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 110 if (e->rdma_sge.mr) { 111 rvt_put_mr(e->rdma_sge.mr); 112 e->rdma_sge.mr = NULL; 113 } 114 /* FALLTHROUGH */ 115 case OP(ATOMIC_ACKNOWLEDGE): 116 /* 117 * We can increment the tail pointer now that the last 118 * response has been sent instead of only being 119 * constructed. 120 */ 121 if (++qp->s_tail_ack_queue > HFI1_MAX_RDMA_ATOMIC) 122 qp->s_tail_ack_queue = 0; 123 /* FALLTHROUGH */ 124 case OP(SEND_ONLY): 125 case OP(ACKNOWLEDGE): 126 /* Check for no next entry in the queue. */ 127 if (qp->r_head_ack_queue == qp->s_tail_ack_queue) { 128 if (qp->s_flags & RVT_S_ACK_PENDING) 129 goto normal; 130 goto bail; 131 } 132 133 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 134 if (e->opcode == OP(RDMA_READ_REQUEST)) { 135 /* 136 * If a RDMA read response is being resent and 137 * we haven't seen the duplicate request yet, 138 * then stop sending the remaining responses the 139 * responder has seen until the requester re-sends it. 140 */ 141 len = e->rdma_sge.sge_length; 142 if (len && !e->rdma_sge.mr) { 143 qp->s_tail_ack_queue = qp->r_head_ack_queue; 144 goto bail; 145 } 146 /* Copy SGE state in case we need to resend */ 147 ps->s_txreq->mr = e->rdma_sge.mr; 148 if (ps->s_txreq->mr) 149 rvt_get_mr(ps->s_txreq->mr); 150 qp->s_ack_rdma_sge.sge = e->rdma_sge; 151 qp->s_ack_rdma_sge.num_sge = 1; 152 ps->s_txreq->ss = &qp->s_ack_rdma_sge; 153 if (len > pmtu) { 154 len = pmtu; 155 qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST); 156 } else { 157 qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY); 158 e->sent = 1; 159 } 160 ohdr->u.aeth = rvt_compute_aeth(qp); 161 hwords++; 162 qp->s_ack_rdma_psn = e->psn; 163 bth2 = mask_psn(qp->s_ack_rdma_psn++); 164 } else { 165 /* COMPARE_SWAP or FETCH_ADD */ 166 ps->s_txreq->ss = NULL; 167 len = 0; 168 qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); 169 ohdr->u.at.aeth = rvt_compute_aeth(qp); 170 ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); 171 hwords += sizeof(ohdr->u.at) / sizeof(u32); 172 bth2 = mask_psn(e->psn); 173 e->sent = 1; 174 } 175 bth0 = qp->s_ack_state << 24; 176 break; 177 178 case OP(RDMA_READ_RESPONSE_FIRST): 179 qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE); 180 /* FALLTHROUGH */ 181 case OP(RDMA_READ_RESPONSE_MIDDLE): 182 ps->s_txreq->ss = &qp->s_ack_rdma_sge; 183 ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr; 184 if (ps->s_txreq->mr) 185 rvt_get_mr(ps->s_txreq->mr); 186 len = qp->s_ack_rdma_sge.sge.sge_length; 187 if (len > pmtu) { 188 len = pmtu; 189 middle = HFI1_CAP_IS_KSET(SDMA_AHG); 190 } else { 191 ohdr->u.aeth = rvt_compute_aeth(qp); 192 hwords++; 193 qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST); 194 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 195 e->sent = 1; 196 } 197 bth0 = qp->s_ack_state << 24; 198 bth2 = mask_psn(qp->s_ack_rdma_psn++); 199 break; 200 201 default: 202 normal: 203 /* 204 * Send a regular ACK. 205 * Set the s_ack_state so we wait until after sending 206 * the ACK before setting s_ack_state to ACKNOWLEDGE 207 * (see above). 208 */ 209 qp->s_ack_state = OP(SEND_ONLY); 210 qp->s_flags &= ~RVT_S_ACK_PENDING; 211 ps->s_txreq->ss = NULL; 212 if (qp->s_nak_state) 213 ohdr->u.aeth = 214 cpu_to_be32((qp->r_msn & IB_MSN_MASK) | 215 (qp->s_nak_state << 216 IB_AETH_CREDIT_SHIFT)); 217 else 218 ohdr->u.aeth = rvt_compute_aeth(qp); 219 hwords++; 220 len = 0; 221 bth0 = OP(ACKNOWLEDGE) << 24; 222 bth2 = mask_psn(qp->s_ack_psn); 223 } 224 qp->s_rdma_ack_cnt++; 225 qp->s_hdrwords = hwords; 226 ps->s_txreq->sde = priv->s_sde; 227 ps->s_txreq->s_cur_size = len; 228 hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps); 229 /* pbc */ 230 ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; 231 return 1; 232 233 bail: 234 qp->s_ack_state = OP(ACKNOWLEDGE); 235 /* 236 * Ensure s_rdma_ack_cnt changes are committed prior to resetting 237 * RVT_S_RESP_PENDING 238 */ 239 smp_wmb(); 240 qp->s_flags &= ~(RVT_S_RESP_PENDING 241 | RVT_S_ACK_PENDING 242 | RVT_S_AHG_VALID); 243 return 0; 244 } 245 246 /** 247 * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) 248 * @qp: a pointer to the QP 249 * 250 * Assumes s_lock is held. 251 * 252 * Return 1 if constructed; otherwise, return 0. 253 */ 254 int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) 255 { 256 struct hfi1_qp_priv *priv = qp->priv; 257 struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); 258 struct ib_other_headers *ohdr; 259 struct rvt_sge_state *ss; 260 struct rvt_swqe *wqe; 261 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 262 u32 hwords = 5; 263 u32 len; 264 u32 bth0 = 0; 265 u32 bth2; 266 u32 pmtu = qp->pmtu; 267 char newreq; 268 int middle = 0; 269 int delta; 270 271 lockdep_assert_held(&qp->s_lock); 272 ps->s_txreq = get_txreq(ps->dev, qp); 273 if (IS_ERR(ps->s_txreq)) 274 goto bail_no_tx; 275 276 ohdr = &ps->s_txreq->phdr.hdr.u.oth; 277 if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) 278 ohdr = &ps->s_txreq->phdr.hdr.u.l.oth; 279 280 /* Sending responses has higher priority over sending requests. */ 281 if ((qp->s_flags & RVT_S_RESP_PENDING) && 282 make_rc_ack(dev, qp, ohdr, ps)) 283 return 1; 284 285 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { 286 if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) 287 goto bail; 288 /* We are in the error state, flush the work request. */ 289 smp_read_barrier_depends(); /* see post_one_send() */ 290 if (qp->s_last == READ_ONCE(qp->s_head)) 291 goto bail; 292 /* If DMAs are in progress, we can't flush immediately. */ 293 if (iowait_sdma_pending(&priv->s_iowait)) { 294 qp->s_flags |= RVT_S_WAIT_DMA; 295 goto bail; 296 } 297 clear_ahg(qp); 298 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 299 hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ? 300 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); 301 /* will get called again */ 302 goto done_free_tx; 303 } 304 305 if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK)) 306 goto bail; 307 308 if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) { 309 if (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) { 310 qp->s_flags |= RVT_S_WAIT_PSN; 311 goto bail; 312 } 313 qp->s_sending_psn = qp->s_psn; 314 qp->s_sending_hpsn = qp->s_psn - 1; 315 } 316 317 /* Send a request. */ 318 wqe = rvt_get_swqe_ptr(qp, qp->s_cur); 319 switch (qp->s_state) { 320 default: 321 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) 322 goto bail; 323 /* 324 * Resend an old request or start a new one. 325 * 326 * We keep track of the current SWQE so that 327 * we don't reset the "furthest progress" state 328 * if we need to back up. 329 */ 330 newreq = 0; 331 if (qp->s_cur == qp->s_tail) { 332 /* Check if send work queue is empty. */ 333 smp_read_barrier_depends(); /* see post_one_send() */ 334 if (qp->s_tail == READ_ONCE(qp->s_head)) { 335 clear_ahg(qp); 336 goto bail; 337 } 338 /* 339 * If a fence is requested, wait for previous 340 * RDMA read and atomic operations to finish. 341 */ 342 if ((wqe->wr.send_flags & IB_SEND_FENCE) && 343 qp->s_num_rd_atomic) { 344 qp->s_flags |= RVT_S_WAIT_FENCE; 345 goto bail; 346 } 347 /* 348 * Local operations are processed immediately 349 * after all prior requests have completed 350 */ 351 if (wqe->wr.opcode == IB_WR_REG_MR || 352 wqe->wr.opcode == IB_WR_LOCAL_INV) { 353 int local_ops = 0; 354 int err = 0; 355 356 if (qp->s_last != qp->s_cur) 357 goto bail; 358 if (++qp->s_cur == qp->s_size) 359 qp->s_cur = 0; 360 if (++qp->s_tail == qp->s_size) 361 qp->s_tail = 0; 362 if (!(wqe->wr.send_flags & 363 RVT_SEND_COMPLETION_ONLY)) { 364 err = rvt_invalidate_rkey( 365 qp, 366 wqe->wr.ex.invalidate_rkey); 367 local_ops = 1; 368 } 369 hfi1_send_complete(qp, wqe, 370 err ? IB_WC_LOC_PROT_ERR 371 : IB_WC_SUCCESS); 372 if (local_ops) 373 atomic_dec(&qp->local_ops_pending); 374 qp->s_hdrwords = 0; 375 goto done_free_tx; 376 } 377 378 newreq = 1; 379 qp->s_psn = wqe->psn; 380 } 381 /* 382 * Note that we have to be careful not to modify the 383 * original work request since we may need to resend 384 * it. 385 */ 386 len = wqe->length; 387 ss = &qp->s_sge; 388 bth2 = mask_psn(qp->s_psn); 389 switch (wqe->wr.opcode) { 390 case IB_WR_SEND: 391 case IB_WR_SEND_WITH_IMM: 392 case IB_WR_SEND_WITH_INV: 393 /* If no credit, return. */ 394 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && 395 rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { 396 qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; 397 goto bail; 398 } 399 if (len > pmtu) { 400 qp->s_state = OP(SEND_FIRST); 401 len = pmtu; 402 break; 403 } 404 if (wqe->wr.opcode == IB_WR_SEND) { 405 qp->s_state = OP(SEND_ONLY); 406 } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 407 qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE); 408 /* Immediate data comes after the BTH */ 409 ohdr->u.imm_data = wqe->wr.ex.imm_data; 410 hwords += 1; 411 } else { 412 qp->s_state = OP(SEND_ONLY_WITH_INVALIDATE); 413 /* Invalidate rkey comes after the BTH */ 414 ohdr->u.ieth = cpu_to_be32( 415 wqe->wr.ex.invalidate_rkey); 416 hwords += 1; 417 } 418 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 419 bth0 |= IB_BTH_SOLICITED; 420 bth2 |= IB_BTH_REQ_ACK; 421 if (++qp->s_cur == qp->s_size) 422 qp->s_cur = 0; 423 break; 424 425 case IB_WR_RDMA_WRITE: 426 if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) 427 qp->s_lsn++; 428 /* FALLTHROUGH */ 429 case IB_WR_RDMA_WRITE_WITH_IMM: 430 /* If no credit, return. */ 431 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && 432 rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { 433 qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; 434 goto bail; 435 } 436 put_ib_reth_vaddr( 437 wqe->rdma_wr.remote_addr, 438 &ohdr->u.rc.reth); 439 ohdr->u.rc.reth.rkey = 440 cpu_to_be32(wqe->rdma_wr.rkey); 441 ohdr->u.rc.reth.length = cpu_to_be32(len); 442 hwords += sizeof(struct ib_reth) / sizeof(u32); 443 if (len > pmtu) { 444 qp->s_state = OP(RDMA_WRITE_FIRST); 445 len = pmtu; 446 break; 447 } 448 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { 449 qp->s_state = OP(RDMA_WRITE_ONLY); 450 } else { 451 qp->s_state = 452 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); 453 /* Immediate data comes after RETH */ 454 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; 455 hwords += 1; 456 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 457 bth0 |= IB_BTH_SOLICITED; 458 } 459 bth2 |= IB_BTH_REQ_ACK; 460 if (++qp->s_cur == qp->s_size) 461 qp->s_cur = 0; 462 break; 463 464 case IB_WR_RDMA_READ: 465 /* 466 * Don't allow more operations to be started 467 * than the QP limits allow. 468 */ 469 if (newreq) { 470 if (qp->s_num_rd_atomic >= 471 qp->s_max_rd_atomic) { 472 qp->s_flags |= RVT_S_WAIT_RDMAR; 473 goto bail; 474 } 475 qp->s_num_rd_atomic++; 476 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) 477 qp->s_lsn++; 478 } 479 put_ib_reth_vaddr( 480 wqe->rdma_wr.remote_addr, 481 &ohdr->u.rc.reth); 482 ohdr->u.rc.reth.rkey = 483 cpu_to_be32(wqe->rdma_wr.rkey); 484 ohdr->u.rc.reth.length = cpu_to_be32(len); 485 qp->s_state = OP(RDMA_READ_REQUEST); 486 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 487 ss = NULL; 488 len = 0; 489 bth2 |= IB_BTH_REQ_ACK; 490 if (++qp->s_cur == qp->s_size) 491 qp->s_cur = 0; 492 break; 493 494 case IB_WR_ATOMIC_CMP_AND_SWP: 495 case IB_WR_ATOMIC_FETCH_AND_ADD: 496 /* 497 * Don't allow more operations to be started 498 * than the QP limits allow. 499 */ 500 if (newreq) { 501 if (qp->s_num_rd_atomic >= 502 qp->s_max_rd_atomic) { 503 qp->s_flags |= RVT_S_WAIT_RDMAR; 504 goto bail; 505 } 506 qp->s_num_rd_atomic++; 507 if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) 508 qp->s_lsn++; 509 } 510 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 511 qp->s_state = OP(COMPARE_SWAP); 512 put_ib_ateth_swap(wqe->atomic_wr.swap, 513 &ohdr->u.atomic_eth); 514 put_ib_ateth_compare(wqe->atomic_wr.compare_add, 515 &ohdr->u.atomic_eth); 516 } else { 517 qp->s_state = OP(FETCH_ADD); 518 put_ib_ateth_swap(wqe->atomic_wr.compare_add, 519 &ohdr->u.atomic_eth); 520 put_ib_ateth_compare(0, &ohdr->u.atomic_eth); 521 } 522 put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, 523 &ohdr->u.atomic_eth); 524 ohdr->u.atomic_eth.rkey = cpu_to_be32( 525 wqe->atomic_wr.rkey); 526 hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); 527 ss = NULL; 528 len = 0; 529 bth2 |= IB_BTH_REQ_ACK; 530 if (++qp->s_cur == qp->s_size) 531 qp->s_cur = 0; 532 break; 533 534 default: 535 goto bail; 536 } 537 qp->s_sge.sge = wqe->sg_list[0]; 538 qp->s_sge.sg_list = wqe->sg_list + 1; 539 qp->s_sge.num_sge = wqe->wr.num_sge; 540 qp->s_sge.total_len = wqe->length; 541 qp->s_len = wqe->length; 542 if (newreq) { 543 qp->s_tail++; 544 if (qp->s_tail >= qp->s_size) 545 qp->s_tail = 0; 546 } 547 if (wqe->wr.opcode == IB_WR_RDMA_READ) 548 qp->s_psn = wqe->lpsn + 1; 549 else 550 qp->s_psn++; 551 break; 552 553 case OP(RDMA_READ_RESPONSE_FIRST): 554 /* 555 * qp->s_state is normally set to the opcode of the 556 * last packet constructed for new requests and therefore 557 * is never set to RDMA read response. 558 * RDMA_READ_RESPONSE_FIRST is used by the ACK processing 559 * thread to indicate a SEND needs to be restarted from an 560 * earlier PSN without interfering with the sending thread. 561 * See restart_rc(). 562 */ 563 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); 564 /* FALLTHROUGH */ 565 case OP(SEND_FIRST): 566 qp->s_state = OP(SEND_MIDDLE); 567 /* FALLTHROUGH */ 568 case OP(SEND_MIDDLE): 569 bth2 = mask_psn(qp->s_psn++); 570 ss = &qp->s_sge; 571 len = qp->s_len; 572 if (len > pmtu) { 573 len = pmtu; 574 middle = HFI1_CAP_IS_KSET(SDMA_AHG); 575 break; 576 } 577 if (wqe->wr.opcode == IB_WR_SEND) { 578 qp->s_state = OP(SEND_LAST); 579 } else if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 580 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); 581 /* Immediate data comes after the BTH */ 582 ohdr->u.imm_data = wqe->wr.ex.imm_data; 583 hwords += 1; 584 } else { 585 qp->s_state = OP(SEND_LAST_WITH_INVALIDATE); 586 /* invalidate data comes after the BTH */ 587 ohdr->u.ieth = cpu_to_be32(wqe->wr.ex.invalidate_rkey); 588 hwords += 1; 589 } 590 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 591 bth0 |= IB_BTH_SOLICITED; 592 bth2 |= IB_BTH_REQ_ACK; 593 qp->s_cur++; 594 if (qp->s_cur >= qp->s_size) 595 qp->s_cur = 0; 596 break; 597 598 case OP(RDMA_READ_RESPONSE_LAST): 599 /* 600 * qp->s_state is normally set to the opcode of the 601 * last packet constructed for new requests and therefore 602 * is never set to RDMA read response. 603 * RDMA_READ_RESPONSE_LAST is used by the ACK processing 604 * thread to indicate a RDMA write needs to be restarted from 605 * an earlier PSN without interfering with the sending thread. 606 * See restart_rc(). 607 */ 608 qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu); 609 /* FALLTHROUGH */ 610 case OP(RDMA_WRITE_FIRST): 611 qp->s_state = OP(RDMA_WRITE_MIDDLE); 612 /* FALLTHROUGH */ 613 case OP(RDMA_WRITE_MIDDLE): 614 bth2 = mask_psn(qp->s_psn++); 615 ss = &qp->s_sge; 616 len = qp->s_len; 617 if (len > pmtu) { 618 len = pmtu; 619 middle = HFI1_CAP_IS_KSET(SDMA_AHG); 620 break; 621 } 622 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { 623 qp->s_state = OP(RDMA_WRITE_LAST); 624 } else { 625 qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); 626 /* Immediate data comes after the BTH */ 627 ohdr->u.imm_data = wqe->wr.ex.imm_data; 628 hwords += 1; 629 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 630 bth0 |= IB_BTH_SOLICITED; 631 } 632 bth2 |= IB_BTH_REQ_ACK; 633 qp->s_cur++; 634 if (qp->s_cur >= qp->s_size) 635 qp->s_cur = 0; 636 break; 637 638 case OP(RDMA_READ_RESPONSE_MIDDLE): 639 /* 640 * qp->s_state is normally set to the opcode of the 641 * last packet constructed for new requests and therefore 642 * is never set to RDMA read response. 643 * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing 644 * thread to indicate a RDMA read needs to be restarted from 645 * an earlier PSN without interfering with the sending thread. 646 * See restart_rc(). 647 */ 648 len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; 649 put_ib_reth_vaddr( 650 wqe->rdma_wr.remote_addr + len, 651 &ohdr->u.rc.reth); 652 ohdr->u.rc.reth.rkey = 653 cpu_to_be32(wqe->rdma_wr.rkey); 654 ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); 655 qp->s_state = OP(RDMA_READ_REQUEST); 656 hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32); 657 bth2 = mask_psn(qp->s_psn) | IB_BTH_REQ_ACK; 658 qp->s_psn = wqe->lpsn + 1; 659 ss = NULL; 660 len = 0; 661 qp->s_cur++; 662 if (qp->s_cur == qp->s_size) 663 qp->s_cur = 0; 664 break; 665 } 666 qp->s_sending_hpsn = bth2; 667 delta = delta_psn(bth2, wqe->psn); 668 if (delta && delta % HFI1_PSN_CREDIT == 0) 669 bth2 |= IB_BTH_REQ_ACK; 670 if (qp->s_flags & RVT_S_SEND_ONE) { 671 qp->s_flags &= ~RVT_S_SEND_ONE; 672 qp->s_flags |= RVT_S_WAIT_ACK; 673 bth2 |= IB_BTH_REQ_ACK; 674 } 675 qp->s_len -= len; 676 qp->s_hdrwords = hwords; 677 ps->s_txreq->sde = priv->s_sde; 678 ps->s_txreq->ss = ss; 679 ps->s_txreq->s_cur_size = len; 680 hfi1_make_ruc_header( 681 qp, 682 ohdr, 683 bth0 | (qp->s_state << 24), 684 bth2, 685 middle, 686 ps); 687 /* pbc */ 688 ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2; 689 return 1; 690 691 done_free_tx: 692 hfi1_put_txreq(ps->s_txreq); 693 ps->s_txreq = NULL; 694 return 1; 695 696 bail: 697 hfi1_put_txreq(ps->s_txreq); 698 699 bail_no_tx: 700 ps->s_txreq = NULL; 701 qp->s_flags &= ~RVT_S_BUSY; 702 qp->s_hdrwords = 0; 703 return 0; 704 } 705 706 /** 707 * hfi1_send_rc_ack - Construct an ACK packet and send it 708 * @qp: a pointer to the QP 709 * 710 * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). 711 * Note that RDMA reads and atomics are handled in the 712 * send side QP state and send engine. 713 */ 714 void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, 715 int is_fecn) 716 { 717 struct hfi1_ibport *ibp = rcd_to_iport(rcd); 718 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 719 u64 pbc, pbc_flags = 0; 720 u16 lrh0; 721 u16 sc5; 722 u32 bth0; 723 u32 hwords; 724 u32 vl, plen; 725 struct send_context *sc; 726 struct pio_buf *pbuf; 727 struct ib_header hdr; 728 struct ib_other_headers *ohdr; 729 unsigned long flags; 730 731 /* clear the defer count */ 732 qp->r_adefered = 0; 733 734 /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ 735 if (qp->s_flags & RVT_S_RESP_PENDING) 736 goto queue_ack; 737 738 /* Ensure s_rdma_ack_cnt changes are committed */ 739 smp_read_barrier_depends(); 740 if (qp->s_rdma_ack_cnt) 741 goto queue_ack; 742 743 /* Construct the header */ 744 /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4 */ 745 hwords = 6; 746 if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { 747 hwords += hfi1_make_grh(ibp, &hdr.u.l.grh, 748 rdma_ah_read_grh(&qp->remote_ah_attr), 749 hwords, 0); 750 ohdr = &hdr.u.l.oth; 751 lrh0 = HFI1_LRH_GRH; 752 } else { 753 ohdr = &hdr.u.oth; 754 lrh0 = HFI1_LRH_BTH; 755 } 756 /* read pkey_index w/o lock (its atomic) */ 757 bth0 = hfi1_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24); 758 if (qp->s_mig_state == IB_MIG_MIGRATED) 759 bth0 |= IB_BTH_MIG_REQ; 760 if (qp->r_nak_state) 761 ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) | 762 (qp->r_nak_state << 763 IB_AETH_CREDIT_SHIFT)); 764 else 765 ohdr->u.aeth = rvt_compute_aeth(qp); 766 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; 767 /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ 768 pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT); 769 lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr) 770 & 0xf) << 4; 771 hdr.lrh[0] = cpu_to_be16(lrh0); 772 hdr.lrh[1] = cpu_to_be16(rdma_ah_get_dlid(&qp->remote_ah_attr)); 773 hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC); 774 hdr.lrh[3] = cpu_to_be16(ppd->lid | 775 rdma_ah_get_path_bits(&qp->remote_ah_attr)); 776 ohdr->bth[0] = cpu_to_be32(bth0); 777 ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); 778 ohdr->bth[1] |= cpu_to_be32((!!is_fecn) << IB_BECN_SHIFT); 779 ohdr->bth[2] = cpu_to_be32(mask_psn(qp->r_ack_psn)); 780 781 /* Don't try to send ACKs if the link isn't ACTIVE */ 782 if (driver_lstate(ppd) != IB_PORT_ACTIVE) 783 return; 784 785 sc = rcd->sc; 786 plen = 2 /* PBC */ + hwords; 787 vl = sc_to_vlt(ppd->dd, sc5); 788 pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); 789 790 pbuf = sc_buffer_alloc(sc, plen, NULL, NULL); 791 if (!pbuf) { 792 /* 793 * We have no room to send at the moment. Pass 794 * responsibility for sending the ACK to the send engine 795 * so that when enough buffer space becomes available, 796 * the ACK is sent ahead of other outgoing packets. 797 */ 798 goto queue_ack; 799 } 800 801 trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr); 802 803 /* write the pbc and data */ 804 ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); 805 806 return; 807 808 queue_ack: 809 spin_lock_irqsave(&qp->s_lock, flags); 810 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) 811 goto unlock; 812 this_cpu_inc(*ibp->rvp.rc_qacks); 813 qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; 814 qp->s_nak_state = qp->r_nak_state; 815 qp->s_ack_psn = qp->r_ack_psn; 816 if (is_fecn) 817 qp->s_flags |= RVT_S_ECN; 818 819 /* Schedule the send engine. */ 820 hfi1_schedule_send(qp); 821 unlock: 822 spin_unlock_irqrestore(&qp->s_lock, flags); 823 } 824 825 /** 826 * reset_psn - reset the QP state to send starting from PSN 827 * @qp: the QP 828 * @psn: the packet sequence number to restart at 829 * 830 * This is called from hfi1_rc_rcv() to process an incoming RC ACK 831 * for the given QP. 832 * Called at interrupt level with the QP s_lock held. 833 */ 834 static void reset_psn(struct rvt_qp *qp, u32 psn) 835 { 836 u32 n = qp->s_acked; 837 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n); 838 u32 opcode; 839 840 lockdep_assert_held(&qp->s_lock); 841 qp->s_cur = n; 842 843 /* 844 * If we are starting the request from the beginning, 845 * let the normal send code handle initialization. 846 */ 847 if (cmp_psn(psn, wqe->psn) <= 0) { 848 qp->s_state = OP(SEND_LAST); 849 goto done; 850 } 851 852 /* Find the work request opcode corresponding to the given PSN. */ 853 opcode = wqe->wr.opcode; 854 for (;;) { 855 int diff; 856 857 if (++n == qp->s_size) 858 n = 0; 859 if (n == qp->s_tail) 860 break; 861 wqe = rvt_get_swqe_ptr(qp, n); 862 diff = cmp_psn(psn, wqe->psn); 863 if (diff < 0) 864 break; 865 qp->s_cur = n; 866 /* 867 * If we are starting the request from the beginning, 868 * let the normal send code handle initialization. 869 */ 870 if (diff == 0) { 871 qp->s_state = OP(SEND_LAST); 872 goto done; 873 } 874 opcode = wqe->wr.opcode; 875 } 876 877 /* 878 * Set the state to restart in the middle of a request. 879 * Don't change the s_sge, s_cur_sge, or s_cur_size. 880 * See hfi1_make_rc_req(). 881 */ 882 switch (opcode) { 883 case IB_WR_SEND: 884 case IB_WR_SEND_WITH_IMM: 885 qp->s_state = OP(RDMA_READ_RESPONSE_FIRST); 886 break; 887 888 case IB_WR_RDMA_WRITE: 889 case IB_WR_RDMA_WRITE_WITH_IMM: 890 qp->s_state = OP(RDMA_READ_RESPONSE_LAST); 891 break; 892 893 case IB_WR_RDMA_READ: 894 qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE); 895 break; 896 897 default: 898 /* 899 * This case shouldn't happen since its only 900 * one PSN per req. 901 */ 902 qp->s_state = OP(SEND_LAST); 903 } 904 done: 905 qp->s_psn = psn; 906 /* 907 * Set RVT_S_WAIT_PSN as rc_complete() may start the timer 908 * asynchronously before the send engine can get scheduled. 909 * Doing it in hfi1_make_rc_req() is too late. 910 */ 911 if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && 912 (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)) 913 qp->s_flags |= RVT_S_WAIT_PSN; 914 qp->s_flags &= ~RVT_S_AHG_VALID; 915 } 916 917 /* 918 * Back up requester to resend the last un-ACKed request. 919 * The QP r_lock and s_lock should be held and interrupts disabled. 920 */ 921 void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait) 922 { 923 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); 924 struct hfi1_ibport *ibp; 925 926 lockdep_assert_held(&qp->r_lock); 927 lockdep_assert_held(&qp->s_lock); 928 if (qp->s_retry == 0) { 929 if (qp->s_mig_state == IB_MIG_ARMED) { 930 hfi1_migrate_qp(qp); 931 qp->s_retry = qp->s_retry_cnt; 932 } else if (qp->s_last == qp->s_acked) { 933 hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); 934 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 935 return; 936 } else { /* need to handle delayed completion */ 937 return; 938 } 939 } else { 940 qp->s_retry--; 941 } 942 943 ibp = to_iport(qp->ibqp.device, qp->port_num); 944 if (wqe->wr.opcode == IB_WR_RDMA_READ) 945 ibp->rvp.n_rc_resends++; 946 else 947 ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn); 948 949 qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | 950 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN | 951 RVT_S_WAIT_ACK); 952 if (wait) 953 qp->s_flags |= RVT_S_SEND_ONE; 954 reset_psn(qp, psn); 955 } 956 957 /* 958 * Set qp->s_sending_psn to the next PSN after the given one. 959 * This would be psn+1 except when RDMA reads are present. 960 */ 961 static void reset_sending_psn(struct rvt_qp *qp, u32 psn) 962 { 963 struct rvt_swqe *wqe; 964 u32 n = qp->s_last; 965 966 lockdep_assert_held(&qp->s_lock); 967 /* Find the work request corresponding to the given PSN. */ 968 for (;;) { 969 wqe = rvt_get_swqe_ptr(qp, n); 970 if (cmp_psn(psn, wqe->lpsn) <= 0) { 971 if (wqe->wr.opcode == IB_WR_RDMA_READ) 972 qp->s_sending_psn = wqe->lpsn + 1; 973 else 974 qp->s_sending_psn = psn + 1; 975 break; 976 } 977 if (++n == qp->s_size) 978 n = 0; 979 if (n == qp->s_tail) 980 break; 981 } 982 } 983 984 /* 985 * This should be called with the QP s_lock held and interrupts disabled. 986 */ 987 void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) 988 { 989 struct ib_other_headers *ohdr; 990 struct rvt_swqe *wqe; 991 u32 opcode; 992 u32 psn; 993 994 lockdep_assert_held(&qp->s_lock); 995 if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK)) 996 return; 997 998 /* Find out where the BTH is */ 999 if (ib_get_lnh(hdr) == HFI1_LRH_BTH) 1000 ohdr = &hdr->u.oth; 1001 else 1002 ohdr = &hdr->u.l.oth; 1003 1004 opcode = ib_bth_get_opcode(ohdr); 1005 if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && 1006 opcode <= OP(ATOMIC_ACKNOWLEDGE)) { 1007 WARN_ON(!qp->s_rdma_ack_cnt); 1008 qp->s_rdma_ack_cnt--; 1009 return; 1010 } 1011 1012 psn = be32_to_cpu(ohdr->bth[2]); 1013 reset_sending_psn(qp, psn); 1014 1015 /* 1016 * Start timer after a packet requesting an ACK has been sent and 1017 * there are still requests that haven't been acked. 1018 */ 1019 if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail && 1020 !(qp->s_flags & 1021 (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) && 1022 (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) 1023 rvt_add_retry_timer(qp); 1024 1025 while (qp->s_last != qp->s_acked) { 1026 u32 s_last; 1027 1028 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 1029 if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && 1030 cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) 1031 break; 1032 s_last = qp->s_last; 1033 trace_hfi1_qp_send_completion(qp, wqe, s_last); 1034 if (++s_last >= qp->s_size) 1035 s_last = 0; 1036 qp->s_last = s_last; 1037 /* see post_send() */ 1038 barrier(); 1039 rvt_put_swqe(wqe); 1040 rvt_qp_swqe_complete(qp, 1041 wqe, 1042 ib_hfi1_wc_opcode[wqe->wr.opcode], 1043 IB_WC_SUCCESS); 1044 } 1045 /* 1046 * If we were waiting for sends to complete before re-sending, 1047 * and they are now complete, restart sending. 1048 */ 1049 trace_hfi1_sendcomplete(qp, psn); 1050 if (qp->s_flags & RVT_S_WAIT_PSN && 1051 cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { 1052 qp->s_flags &= ~RVT_S_WAIT_PSN; 1053 qp->s_sending_psn = qp->s_psn; 1054 qp->s_sending_hpsn = qp->s_psn - 1; 1055 hfi1_schedule_send(qp); 1056 } 1057 } 1058 1059 static inline void update_last_psn(struct rvt_qp *qp, u32 psn) 1060 { 1061 qp->s_last_psn = psn; 1062 } 1063 1064 /* 1065 * Generate a SWQE completion. 1066 * This is similar to hfi1_send_complete but has to check to be sure 1067 * that the SGEs are not being referenced if the SWQE is being resent. 1068 */ 1069 static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, 1070 struct rvt_swqe *wqe, 1071 struct hfi1_ibport *ibp) 1072 { 1073 lockdep_assert_held(&qp->s_lock); 1074 /* 1075 * Don't decrement refcount and don't generate a 1076 * completion if the SWQE is being resent until the send 1077 * is finished. 1078 */ 1079 if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 || 1080 cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) { 1081 u32 s_last; 1082 1083 rvt_put_swqe(wqe); 1084 s_last = qp->s_last; 1085 trace_hfi1_qp_send_completion(qp, wqe, s_last); 1086 if (++s_last >= qp->s_size) 1087 s_last = 0; 1088 qp->s_last = s_last; 1089 /* see post_send() */ 1090 barrier(); 1091 rvt_qp_swqe_complete(qp, 1092 wqe, 1093 ib_hfi1_wc_opcode[wqe->wr.opcode], 1094 IB_WC_SUCCESS); 1095 } else { 1096 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 1097 1098 this_cpu_inc(*ibp->rvp.rc_delayed_comp); 1099 /* 1100 * If send progress not running attempt to progress 1101 * SDMA queue. 1102 */ 1103 if (ppd->dd->flags & HFI1_HAS_SEND_DMA) { 1104 struct sdma_engine *engine; 1105 u8 sl = rdma_ah_get_sl(&qp->remote_ah_attr); 1106 u8 sc5; 1107 1108 /* For now use sc to find engine */ 1109 sc5 = ibp->sl_to_sc[sl]; 1110 engine = qp_to_sdma_engine(qp, sc5); 1111 sdma_engine_progress_schedule(engine); 1112 } 1113 } 1114 1115 qp->s_retry = qp->s_retry_cnt; 1116 update_last_psn(qp, wqe->lpsn); 1117 1118 /* 1119 * If we are completing a request which is in the process of 1120 * being resent, we can stop re-sending it since we know the 1121 * responder has already seen it. 1122 */ 1123 if (qp->s_acked == qp->s_cur) { 1124 if (++qp->s_cur >= qp->s_size) 1125 qp->s_cur = 0; 1126 qp->s_acked = qp->s_cur; 1127 wqe = rvt_get_swqe_ptr(qp, qp->s_cur); 1128 if (qp->s_acked != qp->s_tail) { 1129 qp->s_state = OP(SEND_LAST); 1130 qp->s_psn = wqe->psn; 1131 } 1132 } else { 1133 if (++qp->s_acked >= qp->s_size) 1134 qp->s_acked = 0; 1135 if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur) 1136 qp->s_draining = 0; 1137 wqe = rvt_get_swqe_ptr(qp, qp->s_acked); 1138 } 1139 return wqe; 1140 } 1141 1142 /** 1143 * do_rc_ack - process an incoming RC ACK 1144 * @qp: the QP the ACK came in on 1145 * @psn: the packet sequence number of the ACK 1146 * @opcode: the opcode of the request that resulted in the ACK 1147 * 1148 * This is called from rc_rcv_resp() to process an incoming RC ACK 1149 * for the given QP. 1150 * May be called at interrupt level, with the QP s_lock held. 1151 * Returns 1 if OK, 0 if current operation should be aborted (NAK). 1152 */ 1153 static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, 1154 u64 val, struct hfi1_ctxtdata *rcd) 1155 { 1156 struct hfi1_ibport *ibp; 1157 enum ib_wc_status status; 1158 struct rvt_swqe *wqe; 1159 int ret = 0; 1160 u32 ack_psn; 1161 int diff; 1162 1163 lockdep_assert_held(&qp->s_lock); 1164 /* 1165 * Note that NAKs implicitly ACK outstanding SEND and RDMA write 1166 * requests and implicitly NAK RDMA read and atomic requests issued 1167 * before the NAK'ed request. The MSN won't include the NAK'ed 1168 * request but will include an ACK'ed request(s). 1169 */ 1170 ack_psn = psn; 1171 if (aeth >> IB_AETH_NAK_SHIFT) 1172 ack_psn--; 1173 wqe = rvt_get_swqe_ptr(qp, qp->s_acked); 1174 ibp = rcd_to_iport(rcd); 1175 1176 /* 1177 * The MSN might be for a later WQE than the PSN indicates so 1178 * only complete WQEs that the PSN finishes. 1179 */ 1180 while ((diff = delta_psn(ack_psn, wqe->lpsn)) >= 0) { 1181 /* 1182 * RDMA_READ_RESPONSE_ONLY is a special case since 1183 * we want to generate completion events for everything 1184 * before the RDMA read, copy the data, then generate 1185 * the completion for the read. 1186 */ 1187 if (wqe->wr.opcode == IB_WR_RDMA_READ && 1188 opcode == OP(RDMA_READ_RESPONSE_ONLY) && 1189 diff == 0) { 1190 ret = 1; 1191 goto bail_stop; 1192 } 1193 /* 1194 * If this request is a RDMA read or atomic, and the ACK is 1195 * for a later operation, this ACK NAKs the RDMA read or 1196 * atomic. In other words, only a RDMA_READ_LAST or ONLY 1197 * can ACK a RDMA read and likewise for atomic ops. Note 1198 * that the NAK case can only happen if relaxed ordering is 1199 * used and requests are sent after an RDMA read or atomic 1200 * is sent but before the response is received. 1201 */ 1202 if ((wqe->wr.opcode == IB_WR_RDMA_READ && 1203 (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) || 1204 ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 1205 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) && 1206 (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) { 1207 /* Retry this request. */ 1208 if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) { 1209 qp->r_flags |= RVT_R_RDMAR_SEQ; 1210 hfi1_restart_rc(qp, qp->s_last_psn + 1, 0); 1211 if (list_empty(&qp->rspwait)) { 1212 qp->r_flags |= RVT_R_RSP_SEND; 1213 rvt_get_qp(qp); 1214 list_add_tail(&qp->rspwait, 1215 &rcd->qp_wait_list); 1216 } 1217 } 1218 /* 1219 * No need to process the ACK/NAK since we are 1220 * restarting an earlier request. 1221 */ 1222 goto bail_stop; 1223 } 1224 if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 1225 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { 1226 u64 *vaddr = wqe->sg_list[0].vaddr; 1227 *vaddr = val; 1228 } 1229 if (qp->s_num_rd_atomic && 1230 (wqe->wr.opcode == IB_WR_RDMA_READ || 1231 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 1232 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { 1233 qp->s_num_rd_atomic--; 1234 /* Restart sending task if fence is complete */ 1235 if ((qp->s_flags & RVT_S_WAIT_FENCE) && 1236 !qp->s_num_rd_atomic) { 1237 qp->s_flags &= ~(RVT_S_WAIT_FENCE | 1238 RVT_S_WAIT_ACK); 1239 hfi1_schedule_send(qp); 1240 } else if (qp->s_flags & RVT_S_WAIT_RDMAR) { 1241 qp->s_flags &= ~(RVT_S_WAIT_RDMAR | 1242 RVT_S_WAIT_ACK); 1243 hfi1_schedule_send(qp); 1244 } 1245 } 1246 wqe = do_rc_completion(qp, wqe, ibp); 1247 if (qp->s_acked == qp->s_tail) 1248 break; 1249 } 1250 1251 switch (aeth >> IB_AETH_NAK_SHIFT) { 1252 case 0: /* ACK */ 1253 this_cpu_inc(*ibp->rvp.rc_acks); 1254 if (qp->s_acked != qp->s_tail) { 1255 /* 1256 * We are expecting more ACKs so 1257 * mod the retry timer. 1258 */ 1259 rvt_mod_retry_timer(qp); 1260 /* 1261 * We can stop re-sending the earlier packets and 1262 * continue with the next packet the receiver wants. 1263 */ 1264 if (cmp_psn(qp->s_psn, psn) <= 0) 1265 reset_psn(qp, psn + 1); 1266 } else { 1267 /* No more acks - kill all timers */ 1268 rvt_stop_rc_timers(qp); 1269 if (cmp_psn(qp->s_psn, psn) <= 0) { 1270 qp->s_state = OP(SEND_LAST); 1271 qp->s_psn = psn + 1; 1272 } 1273 } 1274 if (qp->s_flags & RVT_S_WAIT_ACK) { 1275 qp->s_flags &= ~RVT_S_WAIT_ACK; 1276 hfi1_schedule_send(qp); 1277 } 1278 rvt_get_credit(qp, aeth); 1279 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 1280 qp->s_retry = qp->s_retry_cnt; 1281 update_last_psn(qp, psn); 1282 return 1; 1283 1284 case 1: /* RNR NAK */ 1285 ibp->rvp.n_rnr_naks++; 1286 if (qp->s_acked == qp->s_tail) 1287 goto bail_stop; 1288 if (qp->s_flags & RVT_S_WAIT_RNR) 1289 goto bail_stop; 1290 if (qp->s_rnr_retry == 0) { 1291 status = IB_WC_RNR_RETRY_EXC_ERR; 1292 goto class_b; 1293 } 1294 if (qp->s_rnr_retry_cnt < 7) 1295 qp->s_rnr_retry--; 1296 1297 /* The last valid PSN is the previous PSN. */ 1298 update_last_psn(qp, psn - 1); 1299 1300 ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn); 1301 1302 reset_psn(qp, psn); 1303 1304 qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK); 1305 rvt_stop_rc_timers(qp); 1306 rvt_add_rnr_timer(qp, aeth); 1307 return 0; 1308 1309 case 3: /* NAK */ 1310 if (qp->s_acked == qp->s_tail) 1311 goto bail_stop; 1312 /* The last valid PSN is the previous PSN. */ 1313 update_last_psn(qp, psn - 1); 1314 switch ((aeth >> IB_AETH_CREDIT_SHIFT) & 1315 IB_AETH_CREDIT_MASK) { 1316 case 0: /* PSN sequence error */ 1317 ibp->rvp.n_seq_naks++; 1318 /* 1319 * Back up to the responder's expected PSN. 1320 * Note that we might get a NAK in the middle of an 1321 * RDMA READ response which terminates the RDMA 1322 * READ. 1323 */ 1324 hfi1_restart_rc(qp, psn, 0); 1325 hfi1_schedule_send(qp); 1326 break; 1327 1328 case 1: /* Invalid Request */ 1329 status = IB_WC_REM_INV_REQ_ERR; 1330 ibp->rvp.n_other_naks++; 1331 goto class_b; 1332 1333 case 2: /* Remote Access Error */ 1334 status = IB_WC_REM_ACCESS_ERR; 1335 ibp->rvp.n_other_naks++; 1336 goto class_b; 1337 1338 case 3: /* Remote Operation Error */ 1339 status = IB_WC_REM_OP_ERR; 1340 ibp->rvp.n_other_naks++; 1341 class_b: 1342 if (qp->s_last == qp->s_acked) { 1343 hfi1_send_complete(qp, wqe, status); 1344 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1345 } 1346 break; 1347 1348 default: 1349 /* Ignore other reserved NAK error codes */ 1350 goto reserved; 1351 } 1352 qp->s_retry = qp->s_retry_cnt; 1353 qp->s_rnr_retry = qp->s_rnr_retry_cnt; 1354 goto bail_stop; 1355 1356 default: /* 2: reserved */ 1357 reserved: 1358 /* Ignore reserved NAK codes. */ 1359 goto bail_stop; 1360 } 1361 /* cannot be reached */ 1362 bail_stop: 1363 rvt_stop_rc_timers(qp); 1364 return ret; 1365 } 1366 1367 /* 1368 * We have seen an out of sequence RDMA read middle or last packet. 1369 * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE. 1370 */ 1371 static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, 1372 struct hfi1_ctxtdata *rcd) 1373 { 1374 struct rvt_swqe *wqe; 1375 1376 lockdep_assert_held(&qp->s_lock); 1377 /* Remove QP from retry timer */ 1378 rvt_stop_rc_timers(qp); 1379 1380 wqe = rvt_get_swqe_ptr(qp, qp->s_acked); 1381 1382 while (cmp_psn(psn, wqe->lpsn) > 0) { 1383 if (wqe->wr.opcode == IB_WR_RDMA_READ || 1384 wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP || 1385 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) 1386 break; 1387 wqe = do_rc_completion(qp, wqe, ibp); 1388 } 1389 1390 ibp->rvp.n_rdma_seq++; 1391 qp->r_flags |= RVT_R_RDMAR_SEQ; 1392 hfi1_restart_rc(qp, qp->s_last_psn + 1, 0); 1393 if (list_empty(&qp->rspwait)) { 1394 qp->r_flags |= RVT_R_RSP_SEND; 1395 rvt_get_qp(qp); 1396 list_add_tail(&qp->rspwait, &rcd->qp_wait_list); 1397 } 1398 } 1399 1400 /** 1401 * rc_rcv_resp - process an incoming RC response packet 1402 * @ibp: the port this packet came in on 1403 * @ohdr: the other headers for this packet 1404 * @data: the packet data 1405 * @tlen: the packet length 1406 * @qp: the QP for this packet 1407 * @opcode: the opcode for this packet 1408 * @psn: the packet sequence number for this packet 1409 * @hdrsize: the header length 1410 * @pmtu: the path MTU 1411 * 1412 * This is called from hfi1_rc_rcv() to process an incoming RC response 1413 * packet for the given QP. 1414 * Called at interrupt level. 1415 */ 1416 static void rc_rcv_resp(struct hfi1_ibport *ibp, 1417 struct ib_other_headers *ohdr, 1418 void *data, u32 tlen, struct rvt_qp *qp, 1419 u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, 1420 struct hfi1_ctxtdata *rcd) 1421 { 1422 struct rvt_swqe *wqe; 1423 enum ib_wc_status status; 1424 unsigned long flags; 1425 int diff; 1426 u32 pad; 1427 u32 aeth; 1428 u64 val; 1429 1430 spin_lock_irqsave(&qp->s_lock, flags); 1431 1432 trace_hfi1_ack(qp, psn); 1433 1434 /* Ignore invalid responses. */ 1435 smp_read_barrier_depends(); /* see post_one_send */ 1436 if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0) 1437 goto ack_done; 1438 1439 /* Ignore duplicate responses. */ 1440 diff = cmp_psn(psn, qp->s_last_psn); 1441 if (unlikely(diff <= 0)) { 1442 /* Update credits for "ghost" ACKs */ 1443 if (diff == 0 && opcode == OP(ACKNOWLEDGE)) { 1444 aeth = be32_to_cpu(ohdr->u.aeth); 1445 if ((aeth >> IB_AETH_NAK_SHIFT) == 0) 1446 rvt_get_credit(qp, aeth); 1447 } 1448 goto ack_done; 1449 } 1450 1451 /* 1452 * Skip everything other than the PSN we expect, if we are waiting 1453 * for a reply to a restarted RDMA read or atomic op. 1454 */ 1455 if (qp->r_flags & RVT_R_RDMAR_SEQ) { 1456 if (cmp_psn(psn, qp->s_last_psn + 1) != 0) 1457 goto ack_done; 1458 qp->r_flags &= ~RVT_R_RDMAR_SEQ; 1459 } 1460 1461 if (unlikely(qp->s_acked == qp->s_tail)) 1462 goto ack_done; 1463 wqe = rvt_get_swqe_ptr(qp, qp->s_acked); 1464 status = IB_WC_SUCCESS; 1465 1466 switch (opcode) { 1467 case OP(ACKNOWLEDGE): 1468 case OP(ATOMIC_ACKNOWLEDGE): 1469 case OP(RDMA_READ_RESPONSE_FIRST): 1470 aeth = be32_to_cpu(ohdr->u.aeth); 1471 if (opcode == OP(ATOMIC_ACKNOWLEDGE)) 1472 val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); 1473 else 1474 val = 0; 1475 if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || 1476 opcode != OP(RDMA_READ_RESPONSE_FIRST)) 1477 goto ack_done; 1478 wqe = rvt_get_swqe_ptr(qp, qp->s_acked); 1479 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1480 goto ack_op_err; 1481 /* 1482 * If this is a response to a resent RDMA read, we 1483 * have to be careful to copy the data to the right 1484 * location. 1485 */ 1486 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, 1487 wqe, psn, pmtu); 1488 goto read_middle; 1489 1490 case OP(RDMA_READ_RESPONSE_MIDDLE): 1491 /* no AETH, no ACK */ 1492 if (unlikely(cmp_psn(psn, qp->s_last_psn + 1))) 1493 goto ack_seq_err; 1494 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1495 goto ack_op_err; 1496 read_middle: 1497 if (unlikely(tlen != (hdrsize + pmtu + 4))) 1498 goto ack_len_err; 1499 if (unlikely(pmtu >= qp->s_rdma_read_len)) 1500 goto ack_len_err; 1501 1502 /* 1503 * We got a response so update the timeout. 1504 * 4.096 usec. * (1 << qp->timeout) 1505 */ 1506 rvt_mod_retry_timer(qp); 1507 if (qp->s_flags & RVT_S_WAIT_ACK) { 1508 qp->s_flags &= ~RVT_S_WAIT_ACK; 1509 hfi1_schedule_send(qp); 1510 } 1511 1512 if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE)) 1513 qp->s_retry = qp->s_retry_cnt; 1514 1515 /* 1516 * Update the RDMA receive state but do the copy w/o 1517 * holding the locks and blocking interrupts. 1518 */ 1519 qp->s_rdma_read_len -= pmtu; 1520 update_last_psn(qp, psn); 1521 spin_unlock_irqrestore(&qp->s_lock, flags); 1522 hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false); 1523 goto bail; 1524 1525 case OP(RDMA_READ_RESPONSE_ONLY): 1526 aeth = be32_to_cpu(ohdr->u.aeth); 1527 if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd)) 1528 goto ack_done; 1529 /* Get the number of bytes the message was padded by. */ 1530 pad = ib_bth_get_pad(ohdr); 1531 /* 1532 * Check that the data size is >= 0 && <= pmtu. 1533 * Remember to account for ICRC (4). 1534 */ 1535 if (unlikely(tlen < (hdrsize + pad + 4))) 1536 goto ack_len_err; 1537 /* 1538 * If this is a response to a resent RDMA read, we 1539 * have to be careful to copy the data to the right 1540 * location. 1541 */ 1542 wqe = rvt_get_swqe_ptr(qp, qp->s_acked); 1543 qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge, 1544 wqe, psn, pmtu); 1545 goto read_last; 1546 1547 case OP(RDMA_READ_RESPONSE_LAST): 1548 /* ACKs READ req. */ 1549 if (unlikely(cmp_psn(psn, qp->s_last_psn + 1))) 1550 goto ack_seq_err; 1551 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1552 goto ack_op_err; 1553 /* Get the number of bytes the message was padded by. */ 1554 pad = ib_bth_get_pad(ohdr); 1555 /* 1556 * Check that the data size is >= 1 && <= pmtu. 1557 * Remember to account for ICRC (4). 1558 */ 1559 if (unlikely(tlen <= (hdrsize + pad + 4))) 1560 goto ack_len_err; 1561 read_last: 1562 tlen -= hdrsize + pad + 4; 1563 if (unlikely(tlen != qp->s_rdma_read_len)) 1564 goto ack_len_err; 1565 aeth = be32_to_cpu(ohdr->u.aeth); 1566 hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false); 1567 WARN_ON(qp->s_rdma_read_sge.num_sge); 1568 (void)do_rc_ack(qp, aeth, psn, 1569 OP(RDMA_READ_RESPONSE_LAST), 0, rcd); 1570 goto ack_done; 1571 } 1572 1573 ack_op_err: 1574 status = IB_WC_LOC_QP_OP_ERR; 1575 goto ack_err; 1576 1577 ack_seq_err: 1578 rdma_seq_err(qp, ibp, psn, rcd); 1579 goto ack_done; 1580 1581 ack_len_err: 1582 status = IB_WC_LOC_LEN_ERR; 1583 ack_err: 1584 if (qp->s_last == qp->s_acked) { 1585 hfi1_send_complete(qp, wqe, status); 1586 rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 1587 } 1588 ack_done: 1589 spin_unlock_irqrestore(&qp->s_lock, flags); 1590 bail: 1591 return; 1592 } 1593 1594 static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, 1595 struct rvt_qp *qp) 1596 { 1597 if (list_empty(&qp->rspwait)) { 1598 qp->r_flags |= RVT_R_RSP_NAK; 1599 rvt_get_qp(qp); 1600 list_add_tail(&qp->rspwait, &rcd->qp_wait_list); 1601 } 1602 } 1603 1604 static inline void rc_cancel_ack(struct rvt_qp *qp) 1605 { 1606 qp->r_adefered = 0; 1607 if (list_empty(&qp->rspwait)) 1608 return; 1609 list_del_init(&qp->rspwait); 1610 qp->r_flags &= ~RVT_R_RSP_NAK; 1611 rvt_put_qp(qp); 1612 } 1613 1614 /** 1615 * rc_rcv_error - process an incoming duplicate or error RC packet 1616 * @ohdr: the other headers for this packet 1617 * @data: the packet data 1618 * @qp: the QP for this packet 1619 * @opcode: the opcode for this packet 1620 * @psn: the packet sequence number for this packet 1621 * @diff: the difference between the PSN and the expected PSN 1622 * 1623 * This is called from hfi1_rc_rcv() to process an unexpected 1624 * incoming RC packet for the given QP. 1625 * Called at interrupt level. 1626 * Return 1 if no more processing is needed; otherwise return 0 to 1627 * schedule a response to be sent. 1628 */ 1629 static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, 1630 struct rvt_qp *qp, u32 opcode, u32 psn, 1631 int diff, struct hfi1_ctxtdata *rcd) 1632 { 1633 struct hfi1_ibport *ibp = rcd_to_iport(rcd); 1634 struct rvt_ack_entry *e; 1635 unsigned long flags; 1636 u8 i, prev; 1637 int old_req; 1638 1639 trace_hfi1_rcv_error(qp, psn); 1640 if (diff > 0) { 1641 /* 1642 * Packet sequence error. 1643 * A NAK will ACK earlier sends and RDMA writes. 1644 * Don't queue the NAK if we already sent one. 1645 */ 1646 if (!qp->r_nak_state) { 1647 ibp->rvp.n_rc_seqnak++; 1648 qp->r_nak_state = IB_NAK_PSN_ERROR; 1649 /* Use the expected PSN. */ 1650 qp->r_ack_psn = qp->r_psn; 1651 /* 1652 * Wait to send the sequence NAK until all packets 1653 * in the receive queue have been processed. 1654 * Otherwise, we end up propagating congestion. 1655 */ 1656 rc_defered_ack(rcd, qp); 1657 } 1658 goto done; 1659 } 1660 1661 /* 1662 * Handle a duplicate request. Don't re-execute SEND, RDMA 1663 * write or atomic op. Don't NAK errors, just silently drop 1664 * the duplicate request. Note that r_sge, r_len, and 1665 * r_rcv_len may be in use so don't modify them. 1666 * 1667 * We are supposed to ACK the earliest duplicate PSN but we 1668 * can coalesce an outstanding duplicate ACK. We have to 1669 * send the earliest so that RDMA reads can be restarted at 1670 * the requester's expected PSN. 1671 * 1672 * First, find where this duplicate PSN falls within the 1673 * ACKs previously sent. 1674 * old_req is true if there is an older response that is scheduled 1675 * to be sent before sending this one. 1676 */ 1677 e = NULL; 1678 old_req = 1; 1679 ibp->rvp.n_rc_dupreq++; 1680 1681 spin_lock_irqsave(&qp->s_lock, flags); 1682 1683 for (i = qp->r_head_ack_queue; ; i = prev) { 1684 if (i == qp->s_tail_ack_queue) 1685 old_req = 0; 1686 if (i) 1687 prev = i - 1; 1688 else 1689 prev = HFI1_MAX_RDMA_ATOMIC; 1690 if (prev == qp->r_head_ack_queue) { 1691 e = NULL; 1692 break; 1693 } 1694 e = &qp->s_ack_queue[prev]; 1695 if (!e->opcode) { 1696 e = NULL; 1697 break; 1698 } 1699 if (cmp_psn(psn, e->psn) >= 0) { 1700 if (prev == qp->s_tail_ack_queue && 1701 cmp_psn(psn, e->lpsn) <= 0) 1702 old_req = 0; 1703 break; 1704 } 1705 } 1706 switch (opcode) { 1707 case OP(RDMA_READ_REQUEST): { 1708 struct ib_reth *reth; 1709 u32 offset; 1710 u32 len; 1711 1712 /* 1713 * If we didn't find the RDMA read request in the ack queue, 1714 * we can ignore this request. 1715 */ 1716 if (!e || e->opcode != OP(RDMA_READ_REQUEST)) 1717 goto unlock_done; 1718 /* RETH comes after BTH */ 1719 reth = &ohdr->u.rc.reth; 1720 /* 1721 * Address range must be a subset of the original 1722 * request and start on pmtu boundaries. 1723 * We reuse the old ack_queue slot since the requester 1724 * should not back up and request an earlier PSN for the 1725 * same request. 1726 */ 1727 offset = delta_psn(psn, e->psn) * qp->pmtu; 1728 len = be32_to_cpu(reth->length); 1729 if (unlikely(offset + len != e->rdma_sge.sge_length)) 1730 goto unlock_done; 1731 if (e->rdma_sge.mr) { 1732 rvt_put_mr(e->rdma_sge.mr); 1733 e->rdma_sge.mr = NULL; 1734 } 1735 if (len != 0) { 1736 u32 rkey = be32_to_cpu(reth->rkey); 1737 u64 vaddr = get_ib_reth_vaddr(reth); 1738 int ok; 1739 1740 ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, 1741 IB_ACCESS_REMOTE_READ); 1742 if (unlikely(!ok)) 1743 goto unlock_done; 1744 } else { 1745 e->rdma_sge.vaddr = NULL; 1746 e->rdma_sge.length = 0; 1747 e->rdma_sge.sge_length = 0; 1748 } 1749 e->psn = psn; 1750 if (old_req) 1751 goto unlock_done; 1752 qp->s_tail_ack_queue = prev; 1753 break; 1754 } 1755 1756 case OP(COMPARE_SWAP): 1757 case OP(FETCH_ADD): { 1758 /* 1759 * If we didn't find the atomic request in the ack queue 1760 * or the send engine is already backed up to send an 1761 * earlier entry, we can ignore this request. 1762 */ 1763 if (!e || e->opcode != (u8)opcode || old_req) 1764 goto unlock_done; 1765 qp->s_tail_ack_queue = prev; 1766 break; 1767 } 1768 1769 default: 1770 /* 1771 * Ignore this operation if it doesn't request an ACK 1772 * or an earlier RDMA read or atomic is going to be resent. 1773 */ 1774 if (!(psn & IB_BTH_REQ_ACK) || old_req) 1775 goto unlock_done; 1776 /* 1777 * Resend the most recent ACK if this request is 1778 * after all the previous RDMA reads and atomics. 1779 */ 1780 if (i == qp->r_head_ack_queue) { 1781 spin_unlock_irqrestore(&qp->s_lock, flags); 1782 qp->r_nak_state = 0; 1783 qp->r_ack_psn = qp->r_psn - 1; 1784 goto send_ack; 1785 } 1786 1787 /* 1788 * Resend the RDMA read or atomic op which 1789 * ACKs this duplicate request. 1790 */ 1791 qp->s_tail_ack_queue = i; 1792 break; 1793 } 1794 qp->s_ack_state = OP(ACKNOWLEDGE); 1795 qp->s_flags |= RVT_S_RESP_PENDING; 1796 qp->r_nak_state = 0; 1797 hfi1_schedule_send(qp); 1798 1799 unlock_done: 1800 spin_unlock_irqrestore(&qp->s_lock, flags); 1801 done: 1802 return 1; 1803 1804 send_ack: 1805 return 0; 1806 } 1807 1808 static inline void update_ack_queue(struct rvt_qp *qp, unsigned n) 1809 { 1810 unsigned next; 1811 1812 next = n + 1; 1813 if (next > HFI1_MAX_RDMA_ATOMIC) 1814 next = 0; 1815 qp->s_tail_ack_queue = next; 1816 qp->s_ack_state = OP(ACKNOWLEDGE); 1817 } 1818 1819 static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, 1820 u32 lqpn, u32 rqpn, u8 svc_type) 1821 { 1822 struct opa_hfi1_cong_log_event_internal *cc_event; 1823 unsigned long flags; 1824 1825 if (sl >= OPA_MAX_SLS) 1826 return; 1827 1828 spin_lock_irqsave(&ppd->cc_log_lock, flags); 1829 1830 ppd->threshold_cong_event_map[sl / 8] |= 1 << (sl % 8); 1831 ppd->threshold_event_counter++; 1832 1833 cc_event = &ppd->cc_events[ppd->cc_log_idx++]; 1834 if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS) 1835 ppd->cc_log_idx = 0; 1836 cc_event->lqpn = lqpn & RVT_QPN_MASK; 1837 cc_event->rqpn = rqpn & RVT_QPN_MASK; 1838 cc_event->sl = sl; 1839 cc_event->svc_type = svc_type; 1840 cc_event->rlid = rlid; 1841 /* keep timestamp in units of 1.024 usec */ 1842 cc_event->timestamp = ktime_to_ns(ktime_get()) / 1024; 1843 1844 spin_unlock_irqrestore(&ppd->cc_log_lock, flags); 1845 } 1846 1847 void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, 1848 u32 rqpn, u8 svc_type) 1849 { 1850 struct cca_timer *cca_timer; 1851 u16 ccti, ccti_incr, ccti_timer, ccti_limit; 1852 u8 trigger_threshold; 1853 struct cc_state *cc_state; 1854 unsigned long flags; 1855 1856 if (sl >= OPA_MAX_SLS) 1857 return; 1858 1859 cc_state = get_cc_state(ppd); 1860 1861 if (!cc_state) 1862 return; 1863 1864 /* 1865 * 1) increase CCTI (for this SL) 1866 * 2) select IPG (i.e., call set_link_ipg()) 1867 * 3) start timer 1868 */ 1869 ccti_limit = cc_state->cct.ccti_limit; 1870 ccti_incr = cc_state->cong_setting.entries[sl].ccti_increase; 1871 ccti_timer = cc_state->cong_setting.entries[sl].ccti_timer; 1872 trigger_threshold = 1873 cc_state->cong_setting.entries[sl].trigger_threshold; 1874 1875 spin_lock_irqsave(&ppd->cca_timer_lock, flags); 1876 1877 cca_timer = &ppd->cca_timer[sl]; 1878 if (cca_timer->ccti < ccti_limit) { 1879 if (cca_timer->ccti + ccti_incr <= ccti_limit) 1880 cca_timer->ccti += ccti_incr; 1881 else 1882 cca_timer->ccti = ccti_limit; 1883 set_link_ipg(ppd); 1884 } 1885 1886 ccti = cca_timer->ccti; 1887 1888 if (!hrtimer_active(&cca_timer->hrtimer)) { 1889 /* ccti_timer is in units of 1.024 usec */ 1890 unsigned long nsec = 1024 * ccti_timer; 1891 1892 hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec), 1893 HRTIMER_MODE_REL); 1894 } 1895 1896 spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); 1897 1898 if ((trigger_threshold != 0) && (ccti >= trigger_threshold)) 1899 log_cca_event(ppd, sl, rlid, lqpn, rqpn, svc_type); 1900 } 1901 1902 /** 1903 * hfi1_rc_rcv - process an incoming RC packet 1904 * @rcd: the context pointer 1905 * @hdr: the header of this packet 1906 * @rcv_flags: flags relevant to rcv processing 1907 * @data: the packet data 1908 * @tlen: the packet length 1909 * @qp: the QP for this packet 1910 * 1911 * This is called from qp_rcv() to process an incoming RC packet 1912 * for the given QP. 1913 * May be called at interrupt level. 1914 */ 1915 void hfi1_rc_rcv(struct hfi1_packet *packet) 1916 { 1917 struct hfi1_ctxtdata *rcd = packet->rcd; 1918 struct ib_header *hdr = packet->hdr; 1919 u32 rcv_flags = packet->rcv_flags; 1920 void *data = packet->ebuf; 1921 u32 tlen = packet->tlen; 1922 struct rvt_qp *qp = packet->qp; 1923 struct hfi1_ibport *ibp = rcd_to_iport(rcd); 1924 struct ib_other_headers *ohdr = packet->ohdr; 1925 u32 bth0, opcode; 1926 u32 hdrsize = packet->hlen; 1927 u32 psn; 1928 u32 pad; 1929 struct ib_wc wc; 1930 u32 pmtu = qp->pmtu; 1931 int diff; 1932 struct ib_reth *reth; 1933 unsigned long flags; 1934 int ret; 1935 bool is_fecn = false; 1936 bool copy_last = false; 1937 u32 rkey; 1938 1939 lockdep_assert_held(&qp->r_lock); 1940 bth0 = be32_to_cpu(ohdr->bth[0]); 1941 if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) 1942 return; 1943 1944 is_fecn = process_ecn(qp, packet, false); 1945 1946 psn = be32_to_cpu(ohdr->bth[2]); 1947 opcode = ib_bth_get_opcode(ohdr); 1948 1949 /* 1950 * Process responses (ACKs) before anything else. Note that the 1951 * packet sequence number will be for something in the send work 1952 * queue rather than the expected receive packet sequence number. 1953 * In other words, this QP is the requester. 1954 */ 1955 if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && 1956 opcode <= OP(ATOMIC_ACKNOWLEDGE)) { 1957 rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn, 1958 hdrsize, pmtu, rcd); 1959 if (is_fecn) 1960 goto send_ack; 1961 return; 1962 } 1963 1964 /* Compute 24 bits worth of difference. */ 1965 diff = delta_psn(psn, qp->r_psn); 1966 if (unlikely(diff)) { 1967 if (rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd)) 1968 return; 1969 goto send_ack; 1970 } 1971 1972 /* Check for opcode sequence errors. */ 1973 switch (qp->r_state) { 1974 case OP(SEND_FIRST): 1975 case OP(SEND_MIDDLE): 1976 if (opcode == OP(SEND_MIDDLE) || 1977 opcode == OP(SEND_LAST) || 1978 opcode == OP(SEND_LAST_WITH_IMMEDIATE) || 1979 opcode == OP(SEND_LAST_WITH_INVALIDATE)) 1980 break; 1981 goto nack_inv; 1982 1983 case OP(RDMA_WRITE_FIRST): 1984 case OP(RDMA_WRITE_MIDDLE): 1985 if (opcode == OP(RDMA_WRITE_MIDDLE) || 1986 opcode == OP(RDMA_WRITE_LAST) || 1987 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1988 break; 1989 goto nack_inv; 1990 1991 default: 1992 if (opcode == OP(SEND_MIDDLE) || 1993 opcode == OP(SEND_LAST) || 1994 opcode == OP(SEND_LAST_WITH_IMMEDIATE) || 1995 opcode == OP(SEND_LAST_WITH_INVALIDATE) || 1996 opcode == OP(RDMA_WRITE_MIDDLE) || 1997 opcode == OP(RDMA_WRITE_LAST) || 1998 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 1999 goto nack_inv; 2000 /* 2001 * Note that it is up to the requester to not send a new 2002 * RDMA read or atomic operation before receiving an ACK 2003 * for the previous operation. 2004 */ 2005 break; 2006 } 2007 2008 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) 2009 rvt_comm_est(qp); 2010 2011 /* OK, process the packet. */ 2012 switch (opcode) { 2013 case OP(SEND_FIRST): 2014 ret = hfi1_rvt_get_rwqe(qp, 0); 2015 if (ret < 0) 2016 goto nack_op_err; 2017 if (!ret) 2018 goto rnr_nak; 2019 qp->r_rcv_len = 0; 2020 /* FALLTHROUGH */ 2021 case OP(SEND_MIDDLE): 2022 case OP(RDMA_WRITE_MIDDLE): 2023 send_middle: 2024 /* Check for invalid length PMTU or posted rwqe len. */ 2025 if (unlikely(tlen != (hdrsize + pmtu + 4))) 2026 goto nack_inv; 2027 qp->r_rcv_len += pmtu; 2028 if (unlikely(qp->r_rcv_len > qp->r_len)) 2029 goto nack_inv; 2030 hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false); 2031 break; 2032 2033 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 2034 /* consume RWQE */ 2035 ret = hfi1_rvt_get_rwqe(qp, 1); 2036 if (ret < 0) 2037 goto nack_op_err; 2038 if (!ret) 2039 goto rnr_nak; 2040 goto send_last_imm; 2041 2042 case OP(SEND_ONLY): 2043 case OP(SEND_ONLY_WITH_IMMEDIATE): 2044 case OP(SEND_ONLY_WITH_INVALIDATE): 2045 ret = hfi1_rvt_get_rwqe(qp, 0); 2046 if (ret < 0) 2047 goto nack_op_err; 2048 if (!ret) 2049 goto rnr_nak; 2050 qp->r_rcv_len = 0; 2051 if (opcode == OP(SEND_ONLY)) 2052 goto no_immediate_data; 2053 if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) 2054 goto send_last_inv; 2055 /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ 2056 case OP(SEND_LAST_WITH_IMMEDIATE): 2057 send_last_imm: 2058 wc.ex.imm_data = ohdr->u.imm_data; 2059 wc.wc_flags = IB_WC_WITH_IMM; 2060 goto send_last; 2061 case OP(SEND_LAST_WITH_INVALIDATE): 2062 send_last_inv: 2063 rkey = be32_to_cpu(ohdr->u.ieth); 2064 if (rvt_invalidate_rkey(qp, rkey)) 2065 goto no_immediate_data; 2066 wc.ex.invalidate_rkey = rkey; 2067 wc.wc_flags = IB_WC_WITH_INVALIDATE; 2068 goto send_last; 2069 case OP(RDMA_WRITE_LAST): 2070 copy_last = rvt_is_user_qp(qp); 2071 /* fall through */ 2072 case OP(SEND_LAST): 2073 no_immediate_data: 2074 wc.wc_flags = 0; 2075 wc.ex.imm_data = 0; 2076 send_last: 2077 /* Get the number of bytes the message was padded by. */ 2078 pad = ib_bth_get_pad(ohdr); 2079 /* Check for invalid length. */ 2080 /* LAST len should be >= 1 */ 2081 if (unlikely(tlen < (hdrsize + pad + 4))) 2082 goto nack_inv; 2083 /* Don't count the CRC. */ 2084 tlen -= (hdrsize + pad + 4); 2085 wc.byte_len = tlen + qp->r_rcv_len; 2086 if (unlikely(wc.byte_len > qp->r_len)) 2087 goto nack_inv; 2088 hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last); 2089 rvt_put_ss(&qp->r_sge); 2090 qp->r_msn++; 2091 if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 2092 break; 2093 wc.wr_id = qp->r_wr_id; 2094 wc.status = IB_WC_SUCCESS; 2095 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || 2096 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) 2097 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 2098 else 2099 wc.opcode = IB_WC_RECV; 2100 wc.qp = &qp->ibqp; 2101 wc.src_qp = qp->remote_qpn; 2102 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); 2103 /* 2104 * It seems that IB mandates the presence of an SL in a 2105 * work completion only for the UD transport (see section 2106 * 11.4.2 of IBTA Vol. 1). 2107 * 2108 * However, the way the SL is chosen below is consistent 2109 * with the way that IB/qib works and is trying avoid 2110 * introducing incompatibilities. 2111 * 2112 * See also OPA Vol. 1, section 9.7.6, and table 9-17. 2113 */ 2114 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); 2115 /* zero fields that are N/A */ 2116 wc.vendor_err = 0; 2117 wc.pkey_index = 0; 2118 wc.dlid_path_bits = 0; 2119 wc.port_num = 0; 2120 /* Signal completion event if the solicited bit is set. */ 2121 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 2122 (bth0 & IB_BTH_SOLICITED) != 0); 2123 break; 2124 2125 case OP(RDMA_WRITE_ONLY): 2126 copy_last = rvt_is_user_qp(qp); 2127 /* fall through */ 2128 case OP(RDMA_WRITE_FIRST): 2129 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): 2130 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 2131 goto nack_inv; 2132 /* consume RWQE */ 2133 reth = &ohdr->u.rc.reth; 2134 qp->r_len = be32_to_cpu(reth->length); 2135 qp->r_rcv_len = 0; 2136 qp->r_sge.sg_list = NULL; 2137 if (qp->r_len != 0) { 2138 u32 rkey = be32_to_cpu(reth->rkey); 2139 u64 vaddr = get_ib_reth_vaddr(reth); 2140 int ok; 2141 2142 /* Check rkey & NAK */ 2143 ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr, 2144 rkey, IB_ACCESS_REMOTE_WRITE); 2145 if (unlikely(!ok)) 2146 goto nack_acc; 2147 qp->r_sge.num_sge = 1; 2148 } else { 2149 qp->r_sge.num_sge = 0; 2150 qp->r_sge.sge.mr = NULL; 2151 qp->r_sge.sge.vaddr = NULL; 2152 qp->r_sge.sge.length = 0; 2153 qp->r_sge.sge.sge_length = 0; 2154 } 2155 if (opcode == OP(RDMA_WRITE_FIRST)) 2156 goto send_middle; 2157 else if (opcode == OP(RDMA_WRITE_ONLY)) 2158 goto no_immediate_data; 2159 ret = hfi1_rvt_get_rwqe(qp, 1); 2160 if (ret < 0) 2161 goto nack_op_err; 2162 if (!ret) { 2163 /* peer will send again */ 2164 rvt_put_ss(&qp->r_sge); 2165 goto rnr_nak; 2166 } 2167 wc.ex.imm_data = ohdr->u.rc.imm_data; 2168 wc.wc_flags = IB_WC_WITH_IMM; 2169 goto send_last; 2170 2171 case OP(RDMA_READ_REQUEST): { 2172 struct rvt_ack_entry *e; 2173 u32 len; 2174 u8 next; 2175 2176 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 2177 goto nack_inv; 2178 next = qp->r_head_ack_queue + 1; 2179 /* s_ack_queue is size HFI1_MAX_RDMA_ATOMIC+1 so use > not >= */ 2180 if (next > HFI1_MAX_RDMA_ATOMIC) 2181 next = 0; 2182 spin_lock_irqsave(&qp->s_lock, flags); 2183 if (unlikely(next == qp->s_tail_ack_queue)) { 2184 if (!qp->s_ack_queue[next].sent) 2185 goto nack_inv_unlck; 2186 update_ack_queue(qp, next); 2187 } 2188 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 2189 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { 2190 rvt_put_mr(e->rdma_sge.mr); 2191 e->rdma_sge.mr = NULL; 2192 } 2193 reth = &ohdr->u.rc.reth; 2194 len = be32_to_cpu(reth->length); 2195 if (len) { 2196 u32 rkey = be32_to_cpu(reth->rkey); 2197 u64 vaddr = get_ib_reth_vaddr(reth); 2198 int ok; 2199 2200 /* Check rkey & NAK */ 2201 ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, 2202 rkey, IB_ACCESS_REMOTE_READ); 2203 if (unlikely(!ok)) 2204 goto nack_acc_unlck; 2205 /* 2206 * Update the next expected PSN. We add 1 later 2207 * below, so only add the remainder here. 2208 */ 2209 qp->r_psn += rvt_div_mtu(qp, len - 1); 2210 } else { 2211 e->rdma_sge.mr = NULL; 2212 e->rdma_sge.vaddr = NULL; 2213 e->rdma_sge.length = 0; 2214 e->rdma_sge.sge_length = 0; 2215 } 2216 e->opcode = opcode; 2217 e->sent = 0; 2218 e->psn = psn; 2219 e->lpsn = qp->r_psn; 2220 /* 2221 * We need to increment the MSN here instead of when we 2222 * finish sending the result since a duplicate request would 2223 * increment it more than once. 2224 */ 2225 qp->r_msn++; 2226 qp->r_psn++; 2227 qp->r_state = opcode; 2228 qp->r_nak_state = 0; 2229 qp->r_head_ack_queue = next; 2230 2231 /* Schedule the send engine. */ 2232 qp->s_flags |= RVT_S_RESP_PENDING; 2233 hfi1_schedule_send(qp); 2234 2235 spin_unlock_irqrestore(&qp->s_lock, flags); 2236 if (is_fecn) 2237 goto send_ack; 2238 return; 2239 } 2240 2241 case OP(COMPARE_SWAP): 2242 case OP(FETCH_ADD): { 2243 struct ib_atomic_eth *ateth; 2244 struct rvt_ack_entry *e; 2245 u64 vaddr; 2246 atomic64_t *maddr; 2247 u64 sdata; 2248 u32 rkey; 2249 u8 next; 2250 2251 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 2252 goto nack_inv; 2253 next = qp->r_head_ack_queue + 1; 2254 if (next > HFI1_MAX_RDMA_ATOMIC) 2255 next = 0; 2256 spin_lock_irqsave(&qp->s_lock, flags); 2257 if (unlikely(next == qp->s_tail_ack_queue)) { 2258 if (!qp->s_ack_queue[next].sent) 2259 goto nack_inv_unlck; 2260 update_ack_queue(qp, next); 2261 } 2262 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 2263 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { 2264 rvt_put_mr(e->rdma_sge.mr); 2265 e->rdma_sge.mr = NULL; 2266 } 2267 ateth = &ohdr->u.atomic_eth; 2268 vaddr = get_ib_ateth_vaddr(ateth); 2269 if (unlikely(vaddr & (sizeof(u64) - 1))) 2270 goto nack_inv_unlck; 2271 rkey = be32_to_cpu(ateth->rkey); 2272 /* Check rkey & NAK */ 2273 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 2274 vaddr, rkey, 2275 IB_ACCESS_REMOTE_ATOMIC))) 2276 goto nack_acc_unlck; 2277 /* Perform atomic OP and save result. */ 2278 maddr = (atomic64_t *)qp->r_sge.sge.vaddr; 2279 sdata = get_ib_ateth_swap(ateth); 2280 e->atomic_data = (opcode == OP(FETCH_ADD)) ? 2281 (u64)atomic64_add_return(sdata, maddr) - sdata : 2282 (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, 2283 get_ib_ateth_compare(ateth), 2284 sdata); 2285 rvt_put_mr(qp->r_sge.sge.mr); 2286 qp->r_sge.num_sge = 0; 2287 e->opcode = opcode; 2288 e->sent = 0; 2289 e->psn = psn; 2290 e->lpsn = psn; 2291 qp->r_msn++; 2292 qp->r_psn++; 2293 qp->r_state = opcode; 2294 qp->r_nak_state = 0; 2295 qp->r_head_ack_queue = next; 2296 2297 /* Schedule the send engine. */ 2298 qp->s_flags |= RVT_S_RESP_PENDING; 2299 hfi1_schedule_send(qp); 2300 2301 spin_unlock_irqrestore(&qp->s_lock, flags); 2302 if (is_fecn) 2303 goto send_ack; 2304 return; 2305 } 2306 2307 default: 2308 /* NAK unknown opcodes. */ 2309 goto nack_inv; 2310 } 2311 qp->r_psn++; 2312 qp->r_state = opcode; 2313 qp->r_ack_psn = psn; 2314 qp->r_nak_state = 0; 2315 /* Send an ACK if requested or required. */ 2316 if (psn & IB_BTH_REQ_ACK) { 2317 if (packet->numpkt == 0) { 2318 rc_cancel_ack(qp); 2319 goto send_ack; 2320 } 2321 if (qp->r_adefered >= HFI1_PSN_CREDIT) { 2322 rc_cancel_ack(qp); 2323 goto send_ack; 2324 } 2325 if (unlikely(is_fecn)) { 2326 rc_cancel_ack(qp); 2327 goto send_ack; 2328 } 2329 qp->r_adefered++; 2330 rc_defered_ack(rcd, qp); 2331 } 2332 return; 2333 2334 rnr_nak: 2335 qp->r_nak_state = qp->r_min_rnr_timer | IB_RNR_NAK; 2336 qp->r_ack_psn = qp->r_psn; 2337 /* Queue RNR NAK for later */ 2338 rc_defered_ack(rcd, qp); 2339 return; 2340 2341 nack_op_err: 2342 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 2343 qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR; 2344 qp->r_ack_psn = qp->r_psn; 2345 /* Queue NAK for later */ 2346 rc_defered_ack(rcd, qp); 2347 return; 2348 2349 nack_inv_unlck: 2350 spin_unlock_irqrestore(&qp->s_lock, flags); 2351 nack_inv: 2352 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 2353 qp->r_nak_state = IB_NAK_INVALID_REQUEST; 2354 qp->r_ack_psn = qp->r_psn; 2355 /* Queue NAK for later */ 2356 rc_defered_ack(rcd, qp); 2357 return; 2358 2359 nack_acc_unlck: 2360 spin_unlock_irqrestore(&qp->s_lock, flags); 2361 nack_acc: 2362 rvt_rc_error(qp, IB_WC_LOC_PROT_ERR); 2363 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; 2364 qp->r_ack_psn = qp->r_psn; 2365 send_ack: 2366 hfi1_send_rc_ack(rcd, qp, is_fecn); 2367 } 2368 2369 void hfi1_rc_hdrerr( 2370 struct hfi1_ctxtdata *rcd, 2371 struct ib_header *hdr, 2372 u32 rcv_flags, 2373 struct rvt_qp *qp) 2374 { 2375 int has_grh = rcv_flags & HFI1_HAS_GRH; 2376 struct ib_other_headers *ohdr; 2377 struct hfi1_ibport *ibp = rcd_to_iport(rcd); 2378 int diff; 2379 u32 opcode; 2380 u32 psn, bth0; 2381 2382 /* Check for GRH */ 2383 ohdr = &hdr->u.oth; 2384 if (has_grh) 2385 ohdr = &hdr->u.l.oth; 2386 2387 bth0 = be32_to_cpu(ohdr->bth[0]); 2388 if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0)) 2389 return; 2390 2391 psn = be32_to_cpu(ohdr->bth[2]); 2392 opcode = ib_bth_get_opcode(ohdr); 2393 2394 /* Only deal with RDMA Writes for now */ 2395 if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) { 2396 diff = delta_psn(psn, qp->r_psn); 2397 if (!qp->r_nak_state && diff >= 0) { 2398 ibp->rvp.n_rc_seqnak++; 2399 qp->r_nak_state = IB_NAK_PSN_ERROR; 2400 /* Use the expected PSN. */ 2401 qp->r_ack_psn = qp->r_psn; 2402 /* 2403 * Wait to send the sequence 2404 * NAK until all packets 2405 * in the receive queue have 2406 * been processed. 2407 * Otherwise, we end up 2408 * propagating congestion. 2409 */ 2410 rc_defered_ack(rcd, qp); 2411 } /* Out of sequence NAK */ 2412 } /* QP Request NAKs */ 2413 } 2414