1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright(c) 2015 - 2018 Intel Corporation. 4 */ 5 6 #include "hfi.h" 7 #include "verbs_txreq.h" 8 #include "qp.h" 9 10 /* cut down ridiculously long IB macro names */ 11 #define OP(x) UC_OP(x) 12 13 /** 14 * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) 15 * @qp: a pointer to the QP 16 * @ps: the current packet state 17 * 18 * Assume s_lock is held. 19 * 20 * Return 1 if constructed; otherwise, return 0. 21 */ 22 int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) 23 { 24 struct hfi1_qp_priv *priv = qp->priv; 25 struct ib_other_headers *ohdr; 26 struct rvt_swqe *wqe; 27 u32 hwords; 28 u32 bth0 = 0; 29 u32 len; 30 u32 pmtu = qp->pmtu; 31 int middle = 0; 32 33 ps->s_txreq = get_txreq(ps->dev, qp); 34 if (!ps->s_txreq) 35 goto bail_no_tx; 36 37 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { 38 if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) 39 goto bail; 40 /* We are in the error state, flush the work request. */ 41 if (qp->s_last == READ_ONCE(qp->s_head)) 42 goto bail; 43 /* If DMAs are in progress, we can't flush immediately. */ 44 if (iowait_sdma_pending(&priv->s_iowait)) { 45 qp->s_flags |= RVT_S_WAIT_DMA; 46 goto bail; 47 } 48 clear_ahg(qp); 49 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 50 rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 51 goto done_free_tx; 52 } 53 54 if (priv->hdr_type == HFI1_PKT_TYPE_9B) { 55 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 56 hwords = 5; 57 if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) 58 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.l.oth; 59 else 60 ohdr = &ps->s_txreq->phdr.hdr.ibh.u.oth; 61 } else { 62 /* header size in 32-bit words 16B LRH+BTH = (16+12)/4. */ 63 hwords = 7; 64 if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && 65 (hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr)))) 66 ohdr = &ps->s_txreq->phdr.hdr.opah.u.l.oth; 67 else 68 ohdr = &ps->s_txreq->phdr.hdr.opah.u.oth; 69 } 70 71 /* Get the next send request. */ 72 wqe = rvt_get_swqe_ptr(qp, qp->s_cur); 73 qp->s_wqe = NULL; 74 switch (qp->s_state) { 75 default: 76 if (!(ib_rvt_state_ops[qp->state] & 77 RVT_PROCESS_NEXT_SEND_OK)) 78 goto bail; 79 /* Check if send work queue is empty. */ 80 if (qp->s_cur == READ_ONCE(qp->s_head)) { 81 clear_ahg(qp); 82 goto bail; 83 } 84 /* 85 * Local operations are processed immediately 86 * after all prior requests have completed. 87 */ 88 if (wqe->wr.opcode == IB_WR_REG_MR || 89 wqe->wr.opcode == IB_WR_LOCAL_INV) { 90 int local_ops = 0; 91 int err = 0; 92 93 if (qp->s_last != qp->s_cur) 94 goto bail; 95 if (++qp->s_cur == qp->s_size) 96 qp->s_cur = 0; 97 if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { 98 err = rvt_invalidate_rkey( 99 qp, wqe->wr.ex.invalidate_rkey); 100 local_ops = 1; 101 } 102 rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR 103 : IB_WC_SUCCESS); 104 if (local_ops) 105 atomic_dec(&qp->local_ops_pending); 106 goto done_free_tx; 107 } 108 /* 109 * Start a new request. 110 */ 111 qp->s_psn = wqe->psn; 112 qp->s_sge.sge = wqe->sg_list[0]; 113 qp->s_sge.sg_list = wqe->sg_list + 1; 114 qp->s_sge.num_sge = wqe->wr.num_sge; 115 qp->s_sge.total_len = wqe->length; 116 len = wqe->length; 117 qp->s_len = len; 118 switch (wqe->wr.opcode) { 119 case IB_WR_SEND: 120 case IB_WR_SEND_WITH_IMM: 121 if (len > pmtu) { 122 qp->s_state = OP(SEND_FIRST); 123 len = pmtu; 124 break; 125 } 126 if (wqe->wr.opcode == IB_WR_SEND) { 127 qp->s_state = OP(SEND_ONLY); 128 } else { 129 qp->s_state = 130 OP(SEND_ONLY_WITH_IMMEDIATE); 131 /* Immediate data comes after the BTH */ 132 ohdr->u.imm_data = wqe->wr.ex.imm_data; 133 hwords += 1; 134 } 135 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 136 bth0 |= IB_BTH_SOLICITED; 137 qp->s_wqe = wqe; 138 if (++qp->s_cur >= qp->s_size) 139 qp->s_cur = 0; 140 break; 141 142 case IB_WR_RDMA_WRITE: 143 case IB_WR_RDMA_WRITE_WITH_IMM: 144 ohdr->u.rc.reth.vaddr = 145 cpu_to_be64(wqe->rdma_wr.remote_addr); 146 ohdr->u.rc.reth.rkey = 147 cpu_to_be32(wqe->rdma_wr.rkey); 148 ohdr->u.rc.reth.length = cpu_to_be32(len); 149 hwords += sizeof(struct ib_reth) / 4; 150 if (len > pmtu) { 151 qp->s_state = OP(RDMA_WRITE_FIRST); 152 len = pmtu; 153 break; 154 } 155 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { 156 qp->s_state = OP(RDMA_WRITE_ONLY); 157 } else { 158 qp->s_state = 159 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE); 160 /* Immediate data comes after the RETH */ 161 ohdr->u.rc.imm_data = wqe->wr.ex.imm_data; 162 hwords += 1; 163 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 164 bth0 |= IB_BTH_SOLICITED; 165 } 166 qp->s_wqe = wqe; 167 if (++qp->s_cur >= qp->s_size) 168 qp->s_cur = 0; 169 break; 170 171 default: 172 goto bail; 173 } 174 break; 175 176 case OP(SEND_FIRST): 177 qp->s_state = OP(SEND_MIDDLE); 178 fallthrough; 179 case OP(SEND_MIDDLE): 180 len = qp->s_len; 181 if (len > pmtu) { 182 len = pmtu; 183 middle = HFI1_CAP_IS_KSET(SDMA_AHG); 184 break; 185 } 186 if (wqe->wr.opcode == IB_WR_SEND) { 187 qp->s_state = OP(SEND_LAST); 188 } else { 189 qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE); 190 /* Immediate data comes after the BTH */ 191 ohdr->u.imm_data = wqe->wr.ex.imm_data; 192 hwords += 1; 193 } 194 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 195 bth0 |= IB_BTH_SOLICITED; 196 qp->s_wqe = wqe; 197 if (++qp->s_cur >= qp->s_size) 198 qp->s_cur = 0; 199 break; 200 201 case OP(RDMA_WRITE_FIRST): 202 qp->s_state = OP(RDMA_WRITE_MIDDLE); 203 fallthrough; 204 case OP(RDMA_WRITE_MIDDLE): 205 len = qp->s_len; 206 if (len > pmtu) { 207 len = pmtu; 208 middle = HFI1_CAP_IS_KSET(SDMA_AHG); 209 break; 210 } 211 if (wqe->wr.opcode == IB_WR_RDMA_WRITE) { 212 qp->s_state = OP(RDMA_WRITE_LAST); 213 } else { 214 qp->s_state = 215 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE); 216 /* Immediate data comes after the BTH */ 217 ohdr->u.imm_data = wqe->wr.ex.imm_data; 218 hwords += 1; 219 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 220 bth0 |= IB_BTH_SOLICITED; 221 } 222 qp->s_wqe = wqe; 223 if (++qp->s_cur >= qp->s_size) 224 qp->s_cur = 0; 225 break; 226 } 227 qp->s_len -= len; 228 ps->s_txreq->hdr_dwords = hwords; 229 ps->s_txreq->sde = priv->s_sde; 230 ps->s_txreq->ss = &qp->s_sge; 231 ps->s_txreq->s_cur_size = len; 232 hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), 233 qp->remote_qpn, mask_psn(qp->s_psn++), 234 middle, ps); 235 return 1; 236 237 done_free_tx: 238 hfi1_put_txreq(ps->s_txreq); 239 ps->s_txreq = NULL; 240 return 1; 241 242 bail: 243 hfi1_put_txreq(ps->s_txreq); 244 245 bail_no_tx: 246 ps->s_txreq = NULL; 247 qp->s_flags &= ~RVT_S_BUSY; 248 return 0; 249 } 250 251 /** 252 * hfi1_uc_rcv - handle an incoming UC packet 253 * @packet: the packet structure 254 * 255 * This is called from qp_rcv() to process an incoming UC packet 256 * for the given QP. 257 * Called at interrupt level. 258 */ 259 void hfi1_uc_rcv(struct hfi1_packet *packet) 260 { 261 struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); 262 void *data = packet->payload; 263 u32 tlen = packet->tlen; 264 struct rvt_qp *qp = packet->qp; 265 struct ib_other_headers *ohdr = packet->ohdr; 266 u32 opcode = packet->opcode; 267 u32 hdrsize = packet->hlen; 268 u32 psn; 269 u32 pad = packet->pad; 270 struct ib_wc wc; 271 u32 pmtu = qp->pmtu; 272 struct ib_reth *reth; 273 int ret; 274 u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); 275 276 if (hfi1_ruc_check_hdr(ibp, packet)) 277 return; 278 279 process_ecn(qp, packet); 280 281 psn = ib_bth_get_psn(ohdr); 282 /* Compare the PSN verses the expected PSN. */ 283 if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) { 284 /* 285 * Handle a sequence error. 286 * Silently drop any current message. 287 */ 288 qp->r_psn = psn; 289 inv: 290 if (qp->r_state == OP(SEND_FIRST) || 291 qp->r_state == OP(SEND_MIDDLE)) { 292 set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); 293 qp->r_sge.num_sge = 0; 294 } else { 295 rvt_put_ss(&qp->r_sge); 296 } 297 qp->r_state = OP(SEND_LAST); 298 switch (opcode) { 299 case OP(SEND_FIRST): 300 case OP(SEND_ONLY): 301 case OP(SEND_ONLY_WITH_IMMEDIATE): 302 goto send_first; 303 304 case OP(RDMA_WRITE_FIRST): 305 case OP(RDMA_WRITE_ONLY): 306 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): 307 goto rdma_first; 308 309 default: 310 goto drop; 311 } 312 } 313 314 /* Check for opcode sequence errors. */ 315 switch (qp->r_state) { 316 case OP(SEND_FIRST): 317 case OP(SEND_MIDDLE): 318 if (opcode == OP(SEND_MIDDLE) || 319 opcode == OP(SEND_LAST) || 320 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 321 break; 322 goto inv; 323 324 case OP(RDMA_WRITE_FIRST): 325 case OP(RDMA_WRITE_MIDDLE): 326 if (opcode == OP(RDMA_WRITE_MIDDLE) || 327 opcode == OP(RDMA_WRITE_LAST) || 328 opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE)) 329 break; 330 goto inv; 331 332 default: 333 if (opcode == OP(SEND_FIRST) || 334 opcode == OP(SEND_ONLY) || 335 opcode == OP(SEND_ONLY_WITH_IMMEDIATE) || 336 opcode == OP(RDMA_WRITE_FIRST) || 337 opcode == OP(RDMA_WRITE_ONLY) || 338 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) 339 break; 340 goto inv; 341 } 342 343 if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) 344 rvt_comm_est(qp); 345 346 /* OK, process the packet. */ 347 switch (opcode) { 348 case OP(SEND_FIRST): 349 case OP(SEND_ONLY): 350 case OP(SEND_ONLY_WITH_IMMEDIATE): 351 send_first: 352 if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { 353 qp->r_sge = qp->s_rdma_read_sge; 354 } else { 355 ret = rvt_get_rwqe(qp, false); 356 if (ret < 0) 357 goto op_err; 358 if (!ret) 359 goto drop; 360 /* 361 * qp->s_rdma_read_sge will be the owner 362 * of the mr references. 363 */ 364 qp->s_rdma_read_sge = qp->r_sge; 365 } 366 qp->r_rcv_len = 0; 367 if (opcode == OP(SEND_ONLY)) 368 goto no_immediate_data; 369 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) 370 goto send_last_imm; 371 fallthrough; 372 case OP(SEND_MIDDLE): 373 /* Check for invalid length PMTU or posted rwqe len. */ 374 /* 375 * There will be no padding for 9B packet but 16B packets 376 * will come in with some padding since we always add 377 * CRC and LT bytes which will need to be flit aligned 378 */ 379 if (unlikely(tlen != (hdrsize + pmtu + extra_bytes))) 380 goto rewind; 381 qp->r_rcv_len += pmtu; 382 if (unlikely(qp->r_rcv_len > qp->r_len)) 383 goto rewind; 384 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false); 385 break; 386 387 case OP(SEND_LAST_WITH_IMMEDIATE): 388 send_last_imm: 389 wc.ex.imm_data = ohdr->u.imm_data; 390 wc.wc_flags = IB_WC_WITH_IMM; 391 goto send_last; 392 case OP(SEND_LAST): 393 no_immediate_data: 394 wc.ex.imm_data = 0; 395 wc.wc_flags = 0; 396 send_last: 397 /* Check for invalid length. */ 398 /* LAST len should be >= 1 */ 399 if (unlikely(tlen < (hdrsize + extra_bytes))) 400 goto rewind; 401 /* Don't count the CRC. */ 402 tlen -= (hdrsize + extra_bytes); 403 wc.byte_len = tlen + qp->r_rcv_len; 404 if (unlikely(wc.byte_len > qp->r_len)) 405 goto rewind; 406 wc.opcode = IB_WC_RECV; 407 rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false); 408 rvt_put_ss(&qp->s_rdma_read_sge); 409 last_imm: 410 wc.wr_id = qp->r_wr_id; 411 wc.status = IB_WC_SUCCESS; 412 wc.qp = &qp->ibqp; 413 wc.src_qp = qp->remote_qpn; 414 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; 415 /* 416 * It seems that IB mandates the presence of an SL in a 417 * work completion only for the UD transport (see section 418 * 11.4.2 of IBTA Vol. 1). 419 * 420 * However, the way the SL is chosen below is consistent 421 * with the way that IB/qib works and is trying avoid 422 * introducing incompatibilities. 423 * 424 * See also OPA Vol. 1, section 9.7.6, and table 9-17. 425 */ 426 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); 427 /* zero fields that are N/A */ 428 wc.vendor_err = 0; 429 wc.pkey_index = 0; 430 wc.dlid_path_bits = 0; 431 wc.port_num = 0; 432 /* Signal completion event if the solicited bit is set. */ 433 rvt_recv_cq(qp, &wc, ib_bth_is_solicited(ohdr)); 434 break; 435 436 case OP(RDMA_WRITE_FIRST): 437 case OP(RDMA_WRITE_ONLY): 438 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */ 439 rdma_first: 440 if (unlikely(!(qp->qp_access_flags & 441 IB_ACCESS_REMOTE_WRITE))) { 442 goto drop; 443 } 444 reth = &ohdr->u.rc.reth; 445 qp->r_len = be32_to_cpu(reth->length); 446 qp->r_rcv_len = 0; 447 qp->r_sge.sg_list = NULL; 448 if (qp->r_len != 0) { 449 u32 rkey = be32_to_cpu(reth->rkey); 450 u64 vaddr = be64_to_cpu(reth->vaddr); 451 int ok; 452 453 /* Check rkey */ 454 ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, 455 vaddr, rkey, IB_ACCESS_REMOTE_WRITE); 456 if (unlikely(!ok)) 457 goto drop; 458 qp->r_sge.num_sge = 1; 459 } else { 460 qp->r_sge.num_sge = 0; 461 qp->r_sge.sge.mr = NULL; 462 qp->r_sge.sge.vaddr = NULL; 463 qp->r_sge.sge.length = 0; 464 qp->r_sge.sge.sge_length = 0; 465 } 466 if (opcode == OP(RDMA_WRITE_ONLY)) { 467 goto rdma_last; 468 } else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE)) { 469 wc.ex.imm_data = ohdr->u.rc.imm_data; 470 goto rdma_last_imm; 471 } 472 fallthrough; 473 case OP(RDMA_WRITE_MIDDLE): 474 /* Check for invalid length PMTU or posted rwqe len. */ 475 if (unlikely(tlen != (hdrsize + pmtu + 4))) 476 goto drop; 477 qp->r_rcv_len += pmtu; 478 if (unlikely(qp->r_rcv_len > qp->r_len)) 479 goto drop; 480 rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false); 481 break; 482 483 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): 484 wc.ex.imm_data = ohdr->u.imm_data; 485 rdma_last_imm: 486 wc.wc_flags = IB_WC_WITH_IMM; 487 488 /* Check for invalid length. */ 489 /* LAST len should be >= 1 */ 490 if (unlikely(tlen < (hdrsize + pad + 4))) 491 goto drop; 492 /* Don't count the CRC. */ 493 tlen -= (hdrsize + extra_bytes); 494 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 495 goto drop; 496 if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { 497 rvt_put_ss(&qp->s_rdma_read_sge); 498 } else { 499 ret = rvt_get_rwqe(qp, true); 500 if (ret < 0) 501 goto op_err; 502 if (!ret) 503 goto drop; 504 } 505 wc.byte_len = qp->r_len; 506 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 507 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 508 rvt_put_ss(&qp->r_sge); 509 goto last_imm; 510 511 case OP(RDMA_WRITE_LAST): 512 rdma_last: 513 /* Check for invalid length. */ 514 /* LAST len should be >= 1 */ 515 if (unlikely(tlen < (hdrsize + pad + 4))) 516 goto drop; 517 /* Don't count the CRC. */ 518 tlen -= (hdrsize + extra_bytes); 519 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 520 goto drop; 521 rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false); 522 rvt_put_ss(&qp->r_sge); 523 break; 524 525 default: 526 /* Drop packet for unknown opcodes. */ 527 goto drop; 528 } 529 qp->r_psn++; 530 qp->r_state = opcode; 531 return; 532 533 rewind: 534 set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); 535 qp->r_sge.num_sge = 0; 536 drop: 537 ibp->rvp.n_pkt_drops++; 538 return; 539 540 op_err: 541 rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 542 } 543