1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 4 /* Copyright (c) 2008-2019, IBM Corporation */ 5 6 #include <linux/errno.h> 7 #include <linux/types.h> 8 #include <linux/net.h> 9 #include <linux/scatterlist.h> 10 #include <linux/highmem.h> 11 12 #include <rdma/iw_cm.h> 13 #include <rdma/ib_verbs.h> 14 15 #include "siw.h" 16 #include "siw_verbs.h" 17 #include "siw_mem.h" 18 19 /* 20 * siw_rx_umem() 21 * 22 * Receive data of @len into target referenced by @dest_addr. 23 * 24 * @srx: Receive Context 25 * @umem: siw representation of target memory 26 * @dest_addr: user virtual address 27 * @len: number of bytes to place 28 */ 29 static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem, 30 u64 dest_addr, int len) 31 { 32 int copied = 0; 33 34 while (len) { 35 struct page *p; 36 int pg_off, bytes, rv; 37 void *dest; 38 39 p = siw_get_upage(umem, dest_addr); 40 if (unlikely(!p)) { 41 pr_warn("siw: %s: [QP %u]: bogus addr: %p, %p\n", 42 __func__, qp_id(rx_qp(srx)), 43 (void *)(uintptr_t)dest_addr, 44 (void *)(uintptr_t)umem->fp_addr); 45 /* siw internal error */ 46 srx->skb_copied += copied; 47 srx->skb_new -= copied; 48 49 return -EFAULT; 50 } 51 pg_off = dest_addr & ~PAGE_MASK; 52 bytes = min(len, (int)PAGE_SIZE - pg_off); 53 54 siw_dbg_qp(rx_qp(srx), "page %p, bytes=%u\n", p, bytes); 55 56 dest = kmap_atomic(p); 57 rv = skb_copy_bits(srx->skb, srx->skb_offset, dest + pg_off, 58 bytes); 59 60 if (unlikely(rv)) { 61 kunmap_atomic(dest); 62 srx->skb_copied += copied; 63 srx->skb_new -= copied; 64 65 pr_warn("siw: [QP %u]: %s, len %d, page %p, rv %d\n", 66 qp_id(rx_qp(srx)), __func__, len, p, rv); 67 68 return -EFAULT; 69 } 70 if (srx->mpa_crc_enabled) { 71 if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) { 72 siw_crc_update(&srx->mpa_crc, dest + pg_off, 73 bytes); 74 kunmap_atomic(dest); 75 } else { 76 kunmap_atomic(dest); 77 /* 78 * Do CRC on original, not target buffer. 79 * Some user land applications may 80 * concurrently write the target buffer, 81 * which would yield a broken CRC. 82 * Walking the skb twice is very ineffcient. 83 * Folding the CRC into skb_copy_bits() 84 * would be much better, but is currently 85 * not supported. 86 */ 87 siw_crc_skb(srx, bytes); 88 } 89 } else { 90 kunmap_atomic(dest); 91 } 92 srx->skb_offset += bytes; 93 copied += bytes; 94 len -= bytes; 95 dest_addr += bytes; 96 pg_off = 0; 97 } 98 srx->skb_copied += copied; 99 srx->skb_new -= copied; 100 101 return copied; 102 } 103 104 static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len) 105 { 106 int rv; 107 108 siw_dbg_qp(rx_qp(srx), "kva: 0x%p, len: %u\n", kva, len); 109 110 rv = skb_copy_bits(srx->skb, srx->skb_offset, kva, len); 111 if (unlikely(rv)) { 112 pr_warn("siw: [QP %u]: %s, len %d, kva 0x%p, rv %d\n", 113 qp_id(rx_qp(srx)), __func__, len, kva, rv); 114 115 return rv; 116 } 117 if (srx->mpa_crc_enabled) 118 siw_crc_update(&srx->mpa_crc, kva, len); 119 120 srx->skb_offset += len; 121 srx->skb_copied += len; 122 srx->skb_new -= len; 123 124 return len; 125 } 126 127 static int siw_rx_pbl(struct siw_rx_stream *srx, int *pbl_idx, 128 struct siw_mem *mem, u64 addr, int len) 129 { 130 struct siw_pbl *pbl = mem->pbl; 131 u64 offset = addr - mem->va; 132 int copied = 0; 133 134 while (len) { 135 int bytes; 136 dma_addr_t buf_addr = 137 siw_pbl_get_buffer(pbl, offset, &bytes, pbl_idx); 138 if (!buf_addr) 139 break; 140 141 bytes = min(bytes, len); 142 if (siw_rx_kva(srx, ib_virt_dma_to_ptr(buf_addr), bytes) == 143 bytes) { 144 copied += bytes; 145 offset += bytes; 146 len -= bytes; 147 } else { 148 break; 149 } 150 } 151 return copied; 152 } 153 154 /* 155 * siw_rresp_check_ntoh() 156 * 157 * Check incoming RRESP fragment header against expected 158 * header values and update expected values for potential next 159 * fragment. 160 * 161 * NOTE: This function must be called only if a RRESP DDP segment 162 * starts but not for fragmented consecutive pieces of an 163 * already started DDP segment. 164 */ 165 static int siw_rresp_check_ntoh(struct siw_rx_stream *srx, 166 struct siw_rx_fpdu *frx) 167 { 168 struct iwarp_rdma_rresp *rresp = &srx->hdr.rresp; 169 struct siw_wqe *wqe = &frx->wqe_active; 170 enum ddp_ecode ecode; 171 172 u32 sink_stag = be32_to_cpu(rresp->sink_stag); 173 u64 sink_to = be64_to_cpu(rresp->sink_to); 174 175 if (frx->first_ddp_seg) { 176 srx->ddp_stag = wqe->sqe.sge[0].lkey; 177 srx->ddp_to = wqe->sqe.sge[0].laddr; 178 frx->pbl_idx = 0; 179 } 180 /* Below checks extend beyond the semantics of DDP, and 181 * into RDMAP: 182 * We check if the read response matches exactly the 183 * read request which was send to the remote peer to 184 * trigger this read response. RFC5040/5041 do not 185 * always have a proper error code for the detected 186 * error cases. We choose 'base or bounds error' for 187 * cases where the inbound STag is valid, but offset 188 * or length do not match our response receive state. 189 */ 190 if (unlikely(srx->ddp_stag != sink_stag)) { 191 pr_warn("siw: [QP %u]: rresp stag: %08x != %08x\n", 192 qp_id(rx_qp(srx)), sink_stag, srx->ddp_stag); 193 ecode = DDP_ECODE_T_INVALID_STAG; 194 goto error; 195 } 196 if (unlikely(srx->ddp_to != sink_to)) { 197 pr_warn("siw: [QP %u]: rresp off: %016llx != %016llx\n", 198 qp_id(rx_qp(srx)), (unsigned long long)sink_to, 199 (unsigned long long)srx->ddp_to); 200 ecode = DDP_ECODE_T_BASE_BOUNDS; 201 goto error; 202 } 203 if (unlikely(!frx->more_ddp_segs && 204 (wqe->processed + srx->fpdu_part_rem != wqe->bytes))) { 205 pr_warn("siw: [QP %u]: rresp len: %d != %d\n", 206 qp_id(rx_qp(srx)), 207 wqe->processed + srx->fpdu_part_rem, wqe->bytes); 208 ecode = DDP_ECODE_T_BASE_BOUNDS; 209 goto error; 210 } 211 return 0; 212 error: 213 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, 214 DDP_ETYPE_TAGGED_BUF, ecode, 0); 215 return -EINVAL; 216 } 217 218 /* 219 * siw_write_check_ntoh() 220 * 221 * Check incoming WRITE fragment header against expected 222 * header values and update expected values for potential next 223 * fragment 224 * 225 * NOTE: This function must be called only if a WRITE DDP segment 226 * starts but not for fragmented consecutive pieces of an 227 * already started DDP segment. 228 */ 229 static int siw_write_check_ntoh(struct siw_rx_stream *srx, 230 struct siw_rx_fpdu *frx) 231 { 232 struct iwarp_rdma_write *write = &srx->hdr.rwrite; 233 enum ddp_ecode ecode; 234 235 u32 sink_stag = be32_to_cpu(write->sink_stag); 236 u64 sink_to = be64_to_cpu(write->sink_to); 237 238 if (frx->first_ddp_seg) { 239 srx->ddp_stag = sink_stag; 240 srx->ddp_to = sink_to; 241 frx->pbl_idx = 0; 242 } else { 243 if (unlikely(srx->ddp_stag != sink_stag)) { 244 pr_warn("siw: [QP %u]: write stag: %08x != %08x\n", 245 qp_id(rx_qp(srx)), sink_stag, 246 srx->ddp_stag); 247 ecode = DDP_ECODE_T_INVALID_STAG; 248 goto error; 249 } 250 if (unlikely(srx->ddp_to != sink_to)) { 251 pr_warn("siw: [QP %u]: write off: %016llx != %016llx\n", 252 qp_id(rx_qp(srx)), 253 (unsigned long long)sink_to, 254 (unsigned long long)srx->ddp_to); 255 ecode = DDP_ECODE_T_BASE_BOUNDS; 256 goto error; 257 } 258 } 259 return 0; 260 error: 261 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, 262 DDP_ETYPE_TAGGED_BUF, ecode, 0); 263 return -EINVAL; 264 } 265 266 /* 267 * siw_send_check_ntoh() 268 * 269 * Check incoming SEND fragment header against expected 270 * header values and update expected MSN if no next 271 * fragment expected 272 * 273 * NOTE: This function must be called only if a SEND DDP segment 274 * starts but not for fragmented consecutive pieces of an 275 * already started DDP segment. 276 */ 277 static int siw_send_check_ntoh(struct siw_rx_stream *srx, 278 struct siw_rx_fpdu *frx) 279 { 280 struct iwarp_send_inv *send = &srx->hdr.send_inv; 281 struct siw_wqe *wqe = &frx->wqe_active; 282 enum ddp_ecode ecode; 283 284 u32 ddp_msn = be32_to_cpu(send->ddp_msn); 285 u32 ddp_mo = be32_to_cpu(send->ddp_mo); 286 u32 ddp_qn = be32_to_cpu(send->ddp_qn); 287 288 if (unlikely(ddp_qn != RDMAP_UNTAGGED_QN_SEND)) { 289 pr_warn("siw: [QP %u]: invalid ddp qn %d for send\n", 290 qp_id(rx_qp(srx)), ddp_qn); 291 ecode = DDP_ECODE_UT_INVALID_QN; 292 goto error; 293 } 294 if (unlikely(ddp_msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND])) { 295 pr_warn("siw: [QP %u]: send msn: %u != %u\n", 296 qp_id(rx_qp(srx)), ddp_msn, 297 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]); 298 ecode = DDP_ECODE_UT_INVALID_MSN_RANGE; 299 goto error; 300 } 301 if (unlikely(ddp_mo != wqe->processed)) { 302 pr_warn("siw: [QP %u], send mo: %u != %u\n", 303 qp_id(rx_qp(srx)), ddp_mo, wqe->processed); 304 ecode = DDP_ECODE_UT_INVALID_MO; 305 goto error; 306 } 307 if (frx->first_ddp_seg) { 308 /* initialize user memory write position */ 309 frx->sge_idx = 0; 310 frx->sge_off = 0; 311 frx->pbl_idx = 0; 312 313 /* only valid for SEND_INV and SEND_SE_INV operations */ 314 srx->inval_stag = be32_to_cpu(send->inval_stag); 315 } 316 if (unlikely(wqe->bytes < wqe->processed + srx->fpdu_part_rem)) { 317 siw_dbg_qp(rx_qp(srx), "receive space short: %d - %d < %d\n", 318 wqe->bytes, wqe->processed, srx->fpdu_part_rem); 319 wqe->wc_status = SIW_WC_LOC_LEN_ERR; 320 ecode = DDP_ECODE_UT_INVALID_MSN_NOBUF; 321 goto error; 322 } 323 return 0; 324 error: 325 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, 326 DDP_ETYPE_UNTAGGED_BUF, ecode, 0); 327 return -EINVAL; 328 } 329 330 static struct siw_wqe *siw_rqe_get(struct siw_qp *qp) 331 { 332 struct siw_rqe *rqe; 333 struct siw_srq *srq; 334 struct siw_wqe *wqe = NULL; 335 bool srq_event = false; 336 unsigned long flags; 337 338 srq = qp->srq; 339 if (srq) { 340 spin_lock_irqsave(&srq->lock, flags); 341 if (unlikely(!srq->num_rqe)) 342 goto out; 343 344 rqe = &srq->recvq[srq->rq_get % srq->num_rqe]; 345 } else { 346 if (unlikely(!qp->recvq)) 347 goto out; 348 349 rqe = &qp->recvq[qp->rq_get % qp->attrs.rq_size]; 350 } 351 if (likely(rqe->flags == SIW_WQE_VALID)) { 352 int num_sge = rqe->num_sge; 353 354 if (likely(num_sge <= SIW_MAX_SGE)) { 355 int i = 0; 356 357 wqe = rx_wqe(&qp->rx_untagged); 358 rx_type(wqe) = SIW_OP_RECEIVE; 359 wqe->wr_status = SIW_WR_INPROGRESS; 360 wqe->bytes = 0; 361 wqe->processed = 0; 362 363 wqe->rqe.id = rqe->id; 364 wqe->rqe.num_sge = num_sge; 365 366 while (i < num_sge) { 367 wqe->rqe.sge[i].laddr = rqe->sge[i].laddr; 368 wqe->rqe.sge[i].lkey = rqe->sge[i].lkey; 369 wqe->rqe.sge[i].length = rqe->sge[i].length; 370 wqe->bytes += wqe->rqe.sge[i].length; 371 wqe->mem[i] = NULL; 372 i++; 373 } 374 /* can be re-used by appl */ 375 smp_store_mb(rqe->flags, 0); 376 } else { 377 siw_dbg_qp(qp, "too many sge's: %d\n", rqe->num_sge); 378 if (srq) 379 spin_unlock_irqrestore(&srq->lock, flags); 380 return NULL; 381 } 382 if (!srq) { 383 qp->rq_get++; 384 } else { 385 if (srq->armed) { 386 /* Test SRQ limit */ 387 u32 off = (srq->rq_get + srq->limit) % 388 srq->num_rqe; 389 struct siw_rqe *rqe2 = &srq->recvq[off]; 390 391 if (!(rqe2->flags & SIW_WQE_VALID)) { 392 srq->armed = false; 393 srq_event = true; 394 } 395 } 396 srq->rq_get++; 397 } 398 } 399 out: 400 if (srq) { 401 spin_unlock_irqrestore(&srq->lock, flags); 402 if (srq_event) 403 siw_srq_event(srq, IB_EVENT_SRQ_LIMIT_REACHED); 404 } 405 return wqe; 406 } 407 408 static int siw_rx_data(struct siw_mem *mem_p, struct siw_rx_stream *srx, 409 unsigned int *pbl_idx, u64 addr, int bytes) 410 { 411 int rv; 412 413 if (mem_p->mem_obj == NULL) 414 rv = siw_rx_kva(srx, ib_virt_dma_to_ptr(addr), bytes); 415 else if (!mem_p->is_pbl) 416 rv = siw_rx_umem(srx, mem_p->umem, addr, bytes); 417 else 418 rv = siw_rx_pbl(srx, pbl_idx, mem_p, addr, bytes); 419 return rv; 420 } 421 422 /* 423 * siw_proc_send: 424 * 425 * Process one incoming SEND and place data into memory referenced by 426 * receive wqe. 427 * 428 * Function supports partially received sends (suspending/resuming 429 * current receive wqe processing) 430 * 431 * return value: 432 * 0: reached the end of a DDP segment 433 * -EAGAIN: to be called again to finish the DDP segment 434 */ 435 int siw_proc_send(struct siw_qp *qp) 436 { 437 struct siw_rx_stream *srx = &qp->rx_stream; 438 struct siw_rx_fpdu *frx = &qp->rx_untagged; 439 struct siw_wqe *wqe; 440 u32 data_bytes; /* all data bytes available */ 441 u32 rcvd_bytes; /* sum of data bytes rcvd */ 442 int rv = 0; 443 444 if (frx->first_ddp_seg) { 445 wqe = siw_rqe_get(qp); 446 if (unlikely(!wqe)) { 447 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 448 DDP_ETYPE_UNTAGGED_BUF, 449 DDP_ECODE_UT_INVALID_MSN_NOBUF, 0); 450 return -ENOENT; 451 } 452 } else { 453 wqe = rx_wqe(frx); 454 } 455 if (srx->state == SIW_GET_DATA_START) { 456 rv = siw_send_check_ntoh(srx, frx); 457 if (unlikely(rv)) { 458 siw_qp_event(qp, IB_EVENT_QP_FATAL); 459 return rv; 460 } 461 if (!srx->fpdu_part_rem) /* zero length SEND */ 462 return 0; 463 } 464 data_bytes = min(srx->fpdu_part_rem, srx->skb_new); 465 rcvd_bytes = 0; 466 467 /* A zero length SEND will skip below loop */ 468 while (data_bytes) { 469 struct ib_pd *pd; 470 struct siw_mem **mem, *mem_p; 471 struct siw_sge *sge; 472 u32 sge_bytes; /* data bytes avail for SGE */ 473 474 sge = &wqe->rqe.sge[frx->sge_idx]; 475 476 if (!sge->length) { 477 /* just skip empty sge's */ 478 frx->sge_idx++; 479 frx->sge_off = 0; 480 frx->pbl_idx = 0; 481 continue; 482 } 483 sge_bytes = min(data_bytes, sge->length - frx->sge_off); 484 mem = &wqe->mem[frx->sge_idx]; 485 486 /* 487 * check with QP's PD if no SRQ present, SRQ's PD otherwise 488 */ 489 pd = qp->srq == NULL ? qp->pd : qp->srq->base_srq.pd; 490 491 rv = siw_check_sge(pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 492 frx->sge_off, sge_bytes); 493 if (unlikely(rv)) { 494 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 495 DDP_ETYPE_CATASTROPHIC, 496 DDP_ECODE_CATASTROPHIC, 0); 497 498 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); 499 break; 500 } 501 mem_p = *mem; 502 rv = siw_rx_data(mem_p, srx, &frx->pbl_idx, 503 sge->laddr + frx->sge_off, sge_bytes); 504 if (unlikely(rv != sge_bytes)) { 505 wqe->processed += rcvd_bytes; 506 507 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 508 DDP_ETYPE_CATASTROPHIC, 509 DDP_ECODE_CATASTROPHIC, 0); 510 return -EINVAL; 511 } 512 frx->sge_off += rv; 513 514 if (frx->sge_off == sge->length) { 515 frx->sge_idx++; 516 frx->sge_off = 0; 517 frx->pbl_idx = 0; 518 } 519 data_bytes -= rv; 520 rcvd_bytes += rv; 521 522 srx->fpdu_part_rem -= rv; 523 srx->fpdu_part_rcvd += rv; 524 } 525 wqe->processed += rcvd_bytes; 526 527 if (!srx->fpdu_part_rem) 528 return 0; 529 530 return (rv < 0) ? rv : -EAGAIN; 531 } 532 533 /* 534 * siw_proc_write: 535 * 536 * Place incoming WRITE after referencing and checking target buffer 537 538 * Function supports partially received WRITEs (suspending/resuming 539 * current receive processing) 540 * 541 * return value: 542 * 0: reached the end of a DDP segment 543 * -EAGAIN: to be called again to finish the DDP segment 544 */ 545 int siw_proc_write(struct siw_qp *qp) 546 { 547 struct siw_rx_stream *srx = &qp->rx_stream; 548 struct siw_rx_fpdu *frx = &qp->rx_tagged; 549 struct siw_mem *mem; 550 int bytes, rv; 551 552 if (srx->state == SIW_GET_DATA_START) { 553 if (!srx->fpdu_part_rem) /* zero length WRITE */ 554 return 0; 555 556 rv = siw_write_check_ntoh(srx, frx); 557 if (unlikely(rv)) { 558 siw_qp_event(qp, IB_EVENT_QP_FATAL); 559 return rv; 560 } 561 } 562 bytes = min(srx->fpdu_part_rem, srx->skb_new); 563 564 if (frx->first_ddp_seg) { 565 struct siw_wqe *wqe = rx_wqe(frx); 566 567 rx_mem(frx) = siw_mem_id2obj(qp->sdev, srx->ddp_stag >> 8); 568 if (unlikely(!rx_mem(frx))) { 569 siw_dbg_qp(qp, 570 "sink stag not found/invalid, stag 0x%08x\n", 571 srx->ddp_stag); 572 573 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 574 DDP_ETYPE_TAGGED_BUF, 575 DDP_ECODE_T_INVALID_STAG, 0); 576 return -EINVAL; 577 } 578 wqe->rqe.num_sge = 1; 579 rx_type(wqe) = SIW_OP_WRITE; 580 wqe->wr_status = SIW_WR_INPROGRESS; 581 } 582 mem = rx_mem(frx); 583 584 /* 585 * Check if application re-registered memory with different 586 * key field of STag. 587 */ 588 if (unlikely(mem->stag != srx->ddp_stag)) { 589 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 590 DDP_ETYPE_TAGGED_BUF, 591 DDP_ECODE_T_INVALID_STAG, 0); 592 return -EINVAL; 593 } 594 rv = siw_check_mem(qp->pd, mem, srx->ddp_to + srx->fpdu_part_rcvd, 595 IB_ACCESS_REMOTE_WRITE, bytes); 596 if (unlikely(rv)) { 597 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 598 DDP_ETYPE_TAGGED_BUF, siw_tagged_error(-rv), 599 0); 600 601 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); 602 603 return -EINVAL; 604 } 605 606 rv = siw_rx_data(mem, srx, &frx->pbl_idx, 607 srx->ddp_to + srx->fpdu_part_rcvd, bytes); 608 if (unlikely(rv != bytes)) { 609 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 610 DDP_ETYPE_CATASTROPHIC, 611 DDP_ECODE_CATASTROPHIC, 0); 612 return -EINVAL; 613 } 614 srx->fpdu_part_rem -= rv; 615 srx->fpdu_part_rcvd += rv; 616 617 if (!srx->fpdu_part_rem) { 618 srx->ddp_to += srx->fpdu_part_rcvd; 619 return 0; 620 } 621 return -EAGAIN; 622 } 623 624 /* 625 * Inbound RREQ's cannot carry user data. 626 */ 627 int siw_proc_rreq(struct siw_qp *qp) 628 { 629 struct siw_rx_stream *srx = &qp->rx_stream; 630 631 if (!srx->fpdu_part_rem) 632 return 0; 633 634 pr_warn("siw: [QP %u]: rreq with mpa len %d\n", qp_id(qp), 635 be16_to_cpu(srx->hdr.ctrl.mpa_len)); 636 637 return -EPROTO; 638 } 639 640 /* 641 * siw_init_rresp: 642 * 643 * Process inbound RDMA READ REQ. Produce a pseudo READ RESPONSE WQE. 644 * Put it at the tail of the IRQ, if there is another WQE currently in 645 * transmit processing. If not, make it the current WQE to be processed 646 * and schedule transmit processing. 647 * 648 * Can be called from softirq context and from process 649 * context (RREAD socket loopback case!) 650 * 651 * return value: 652 * 0: success, 653 * failure code otherwise 654 */ 655 656 static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) 657 { 658 struct siw_wqe *tx_work = tx_wqe(qp); 659 struct siw_sqe *resp; 660 661 uint64_t raddr = be64_to_cpu(srx->hdr.rreq.sink_to), 662 laddr = be64_to_cpu(srx->hdr.rreq.source_to); 663 uint32_t length = be32_to_cpu(srx->hdr.rreq.read_size), 664 lkey = be32_to_cpu(srx->hdr.rreq.source_stag), 665 rkey = be32_to_cpu(srx->hdr.rreq.sink_stag), 666 msn = be32_to_cpu(srx->hdr.rreq.ddp_msn); 667 668 int run_sq = 1, rv = 0; 669 unsigned long flags; 670 671 if (unlikely(msn != srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ])) { 672 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 673 DDP_ETYPE_UNTAGGED_BUF, 674 DDP_ECODE_UT_INVALID_MSN_RANGE, 0); 675 return -EPROTO; 676 } 677 spin_lock_irqsave(&qp->sq_lock, flags); 678 679 if (unlikely(!qp->attrs.irq_size)) { 680 run_sq = 0; 681 goto error_irq; 682 } 683 if (tx_work->wr_status == SIW_WR_IDLE) { 684 /* 685 * immediately schedule READ response w/o 686 * consuming IRQ entry: IRQ must be empty. 687 */ 688 tx_work->processed = 0; 689 tx_work->mem[0] = NULL; 690 tx_work->wr_status = SIW_WR_QUEUED; 691 resp = &tx_work->sqe; 692 } else { 693 resp = irq_alloc_free(qp); 694 run_sq = 0; 695 } 696 if (likely(resp)) { 697 resp->opcode = SIW_OP_READ_RESPONSE; 698 699 resp->sge[0].length = length; 700 resp->sge[0].laddr = laddr; 701 resp->sge[0].lkey = lkey; 702 703 /* Keep aside message sequence number for potential 704 * error reporting during Read Response generation. 705 */ 706 resp->sge[1].length = msn; 707 708 resp->raddr = raddr; 709 resp->rkey = rkey; 710 resp->num_sge = length ? 1 : 0; 711 712 /* RRESP now valid as current TX wqe or placed into IRQ */ 713 smp_store_mb(resp->flags, SIW_WQE_VALID); 714 } else { 715 error_irq: 716 pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n", 717 qp_id(qp), qp->attrs.irq_size); 718 719 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, 720 RDMAP_ETYPE_REMOTE_OPERATION, 721 RDMAP_ECODE_CATASTROPHIC_STREAM, 0); 722 rv = -EPROTO; 723 } 724 725 spin_unlock_irqrestore(&qp->sq_lock, flags); 726 727 if (run_sq) 728 rv = siw_sq_start(qp); 729 730 return rv; 731 } 732 733 /* 734 * Only called at start of Read.Resonse processing. 735 * Transfer pending Read from tip of ORQ into currrent rx wqe, 736 * but keep ORQ entry valid until Read.Response processing done. 737 * No Queue locking needed. 738 */ 739 static int siw_orqe_start_rx(struct siw_qp *qp) 740 { 741 struct siw_sqe *orqe; 742 struct siw_wqe *wqe = NULL; 743 744 if (unlikely(!qp->attrs.orq_size)) 745 return -EPROTO; 746 747 /* make sure ORQ indices are current */ 748 smp_mb(); 749 750 orqe = orq_get_current(qp); 751 if (READ_ONCE(orqe->flags) & SIW_WQE_VALID) { 752 /* RRESP is a TAGGED RDMAP operation */ 753 wqe = rx_wqe(&qp->rx_tagged); 754 wqe->sqe.id = orqe->id; 755 wqe->sqe.opcode = orqe->opcode; 756 wqe->sqe.sge[0].laddr = orqe->sge[0].laddr; 757 wqe->sqe.sge[0].lkey = orqe->sge[0].lkey; 758 wqe->sqe.sge[0].length = orqe->sge[0].length; 759 wqe->sqe.flags = orqe->flags; 760 wqe->sqe.num_sge = 1; 761 wqe->bytes = orqe->sge[0].length; 762 wqe->processed = 0; 763 wqe->mem[0] = NULL; 764 /* make sure WQE is completely written before valid */ 765 smp_wmb(); 766 wqe->wr_status = SIW_WR_INPROGRESS; 767 768 return 0; 769 } 770 return -EPROTO; 771 } 772 773 /* 774 * siw_proc_rresp: 775 * 776 * Place incoming RRESP data into memory referenced by RREQ WQE 777 * which is at the tip of the ORQ 778 * 779 * Function supports partially received RRESP's (suspending/resuming 780 * current receive processing) 781 */ 782 int siw_proc_rresp(struct siw_qp *qp) 783 { 784 struct siw_rx_stream *srx = &qp->rx_stream; 785 struct siw_rx_fpdu *frx = &qp->rx_tagged; 786 struct siw_wqe *wqe = rx_wqe(frx); 787 struct siw_mem **mem, *mem_p; 788 struct siw_sge *sge; 789 int bytes, rv; 790 791 if (frx->first_ddp_seg) { 792 if (unlikely(wqe->wr_status != SIW_WR_IDLE)) { 793 pr_warn("siw: [QP %u]: proc RRESP: status %d, op %d\n", 794 qp_id(qp), wqe->wr_status, wqe->sqe.opcode); 795 rv = -EPROTO; 796 goto error_term; 797 } 798 /* 799 * fetch pending RREQ from orq 800 */ 801 rv = siw_orqe_start_rx(qp); 802 if (rv) { 803 pr_warn("siw: [QP %u]: ORQ empty, size %d\n", 804 qp_id(qp), qp->attrs.orq_size); 805 goto error_term; 806 } 807 rv = siw_rresp_check_ntoh(srx, frx); 808 if (unlikely(rv)) { 809 siw_qp_event(qp, IB_EVENT_QP_FATAL); 810 return rv; 811 } 812 } else { 813 if (unlikely(wqe->wr_status != SIW_WR_INPROGRESS)) { 814 pr_warn("siw: [QP %u]: resume RRESP: status %d\n", 815 qp_id(qp), wqe->wr_status); 816 rv = -EPROTO; 817 goto error_term; 818 } 819 } 820 if (!srx->fpdu_part_rem) /* zero length RRESPONSE */ 821 return 0; 822 823 sge = wqe->sqe.sge; /* there is only one */ 824 mem = &wqe->mem[0]; 825 826 if (!(*mem)) { 827 /* 828 * check target memory which resolves memory on first fragment 829 */ 830 rv = siw_check_sge(qp->pd, sge, mem, IB_ACCESS_LOCAL_WRITE, 0, 831 wqe->bytes); 832 if (unlikely(rv)) { 833 siw_dbg_qp(qp, "target mem check: %d\n", rv); 834 wqe->wc_status = SIW_WC_LOC_PROT_ERR; 835 836 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 837 DDP_ETYPE_TAGGED_BUF, 838 siw_tagged_error(-rv), 0); 839 840 siw_qp_event(qp, IB_EVENT_QP_ACCESS_ERR); 841 842 return -EINVAL; 843 } 844 } 845 mem_p = *mem; 846 847 if (unlikely(wqe->processed + srx->fpdu_part_rem > wqe->bytes)) { 848 siw_dbg_qp(qp, "rresp len: %d + %d > %d\n", 849 wqe->processed, srx->fpdu_part_rem, wqe->bytes); 850 wqe->wc_status = SIW_WC_LOC_LEN_ERR; 851 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, 852 DDP_ETYPE_TAGGED_BUF, 853 DDP_ECODE_T_BASE_BOUNDS, 0); 854 return -EINVAL; 855 } 856 bytes = min(srx->fpdu_part_rem, srx->skb_new); 857 rv = siw_rx_data(mem_p, srx, &frx->pbl_idx, 858 sge->laddr + wqe->processed, bytes); 859 if (rv != bytes) { 860 wqe->wc_status = SIW_WC_GENERAL_ERR; 861 rv = -EINVAL; 862 goto error_term; 863 } 864 srx->fpdu_part_rem -= rv; 865 srx->fpdu_part_rcvd += rv; 866 wqe->processed += rv; 867 868 if (!srx->fpdu_part_rem) { 869 srx->ddp_to += srx->fpdu_part_rcvd; 870 return 0; 871 } 872 return -EAGAIN; 873 874 error_term: 875 siw_init_terminate(qp, TERM_ERROR_LAYER_DDP, DDP_ETYPE_CATASTROPHIC, 876 DDP_ECODE_CATASTROPHIC, 0); 877 return rv; 878 } 879 880 static void siw_update_skb_rcvd(struct siw_rx_stream *srx, u16 length) 881 { 882 srx->skb_offset += length; 883 srx->skb_new -= length; 884 srx->skb_copied += length; 885 } 886 887 int siw_proc_terminate(struct siw_qp *qp) 888 { 889 struct siw_rx_stream *srx = &qp->rx_stream; 890 struct sk_buff *skb = srx->skb; 891 struct iwarp_terminate *term = &srx->hdr.terminate; 892 union iwarp_hdr term_info; 893 u8 *infop = (u8 *)&term_info; 894 enum rdma_opcode op; 895 u16 to_copy = sizeof(struct iwarp_ctrl); 896 897 pr_warn("siw: got TERMINATE. layer %d, type %d, code %d\n", 898 __rdmap_term_layer(term), __rdmap_term_etype(term), 899 __rdmap_term_ecode(term)); 900 901 if (be32_to_cpu(term->ddp_qn) != RDMAP_UNTAGGED_QN_TERMINATE || 902 be32_to_cpu(term->ddp_msn) != 903 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] || 904 be32_to_cpu(term->ddp_mo) != 0) { 905 pr_warn("siw: rx bogus TERM [QN x%08x, MSN x%08x, MO x%08x]\n", 906 be32_to_cpu(term->ddp_qn), be32_to_cpu(term->ddp_msn), 907 be32_to_cpu(term->ddp_mo)); 908 return -ECONNRESET; 909 } 910 /* 911 * Receive remaining pieces of TERM if indicated 912 */ 913 if (!term->flag_m) 914 return -ECONNRESET; 915 916 /* Do not take the effort to reassemble a network fragmented 917 * TERM message 918 */ 919 if (srx->skb_new < sizeof(struct iwarp_ctrl_tagged)) 920 return -ECONNRESET; 921 922 memset(infop, 0, sizeof(term_info)); 923 924 skb_copy_bits(skb, srx->skb_offset, infop, to_copy); 925 926 op = __rdmap_get_opcode(&term_info.ctrl); 927 if (op >= RDMAP_TERMINATE) 928 goto out; 929 930 infop += to_copy; 931 siw_update_skb_rcvd(srx, to_copy); 932 srx->fpdu_part_rcvd += to_copy; 933 srx->fpdu_part_rem -= to_copy; 934 935 to_copy = iwarp_pktinfo[op].hdr_len - to_copy; 936 937 /* Again, no network fragmented TERM's */ 938 if (to_copy + MPA_CRC_SIZE > srx->skb_new) 939 return -ECONNRESET; 940 941 skb_copy_bits(skb, srx->skb_offset, infop, to_copy); 942 943 if (term->flag_r) { 944 siw_dbg_qp(qp, "TERM reports RDMAP hdr type %u, len %u (%s)\n", 945 op, be16_to_cpu(term_info.ctrl.mpa_len), 946 term->flag_m ? "valid" : "invalid"); 947 } else if (term->flag_d) { 948 siw_dbg_qp(qp, "TERM reports DDP hdr type %u, len %u (%s)\n", 949 op, be16_to_cpu(term_info.ctrl.mpa_len), 950 term->flag_m ? "valid" : "invalid"); 951 } 952 out: 953 siw_update_skb_rcvd(srx, to_copy); 954 srx->fpdu_part_rcvd += to_copy; 955 srx->fpdu_part_rem -= to_copy; 956 957 return -ECONNRESET; 958 } 959 960 static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) 961 { 962 struct sk_buff *skb = srx->skb; 963 int avail = min(srx->skb_new, srx->fpdu_part_rem); 964 u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad; 965 __wsum crc_in, crc_own = 0; 966 967 siw_dbg_qp(qp, "expected %d, available %d, pad %u\n", 968 srx->fpdu_part_rem, srx->skb_new, srx->pad); 969 970 skb_copy_bits(skb, srx->skb_offset, tbuf, avail); 971 972 siw_update_skb_rcvd(srx, avail); 973 srx->fpdu_part_rem -= avail; 974 975 if (srx->fpdu_part_rem) 976 return -EAGAIN; 977 978 if (!srx->mpa_crc_enabled) 979 return 0; 980 981 if (srx->pad) 982 siw_crc_update(&srx->mpa_crc, tbuf, srx->pad); 983 /* 984 * CRC32 is computed, transmitted and received directly in NBO, 985 * so there's never a reason to convert byte order. 986 */ 987 siw_crc_final(&srx->mpa_crc, (u8 *)&crc_own); 988 crc_in = (__force __wsum)srx->trailer.crc; 989 990 if (unlikely(crc_in != crc_own)) { 991 pr_warn("siw: crc error. in: %08x, own %08x, op %u\n", 992 crc_in, crc_own, qp->rx_stream.rdmap_op); 993 994 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, 995 LLP_ETYPE_MPA, 996 LLP_ECODE_RECEIVED_CRC, 0); 997 return -EINVAL; 998 } 999 return 0; 1000 } 1001 1002 #define MIN_DDP_HDR sizeof(struct iwarp_ctrl_tagged) 1003 1004 static int siw_get_hdr(struct siw_rx_stream *srx) 1005 { 1006 struct sk_buff *skb = srx->skb; 1007 struct siw_qp *qp = rx_qp(srx); 1008 struct iwarp_ctrl *c_hdr = &srx->hdr.ctrl; 1009 struct siw_rx_fpdu *frx; 1010 u8 opcode; 1011 int bytes; 1012 1013 if (srx->fpdu_part_rcvd < MIN_DDP_HDR) { 1014 /* 1015 * copy a mimimum sized (tagged) DDP frame control part 1016 */ 1017 bytes = min_t(int, srx->skb_new, 1018 MIN_DDP_HDR - srx->fpdu_part_rcvd); 1019 1020 skb_copy_bits(skb, srx->skb_offset, 1021 (char *)c_hdr + srx->fpdu_part_rcvd, bytes); 1022 1023 siw_update_skb_rcvd(srx, bytes); 1024 srx->fpdu_part_rcvd += bytes; 1025 if (srx->fpdu_part_rcvd < MIN_DDP_HDR) 1026 return -EAGAIN; 1027 1028 if (unlikely(__ddp_get_version(c_hdr) != DDP_VERSION)) { 1029 enum ddp_etype etype; 1030 enum ddp_ecode ecode; 1031 1032 pr_warn("siw: received ddp version unsupported %d\n", 1033 __ddp_get_version(c_hdr)); 1034 1035 if (c_hdr->ddp_rdmap_ctrl & DDP_FLAG_TAGGED) { 1036 etype = DDP_ETYPE_TAGGED_BUF; 1037 ecode = DDP_ECODE_T_VERSION; 1038 } else { 1039 etype = DDP_ETYPE_UNTAGGED_BUF; 1040 ecode = DDP_ECODE_UT_VERSION; 1041 } 1042 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_DDP, 1043 etype, ecode, 0); 1044 return -EINVAL; 1045 } 1046 if (unlikely(__rdmap_get_version(c_hdr) != RDMAP_VERSION)) { 1047 pr_warn("siw: received rdmap version unsupported %d\n", 1048 __rdmap_get_version(c_hdr)); 1049 1050 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP, 1051 RDMAP_ETYPE_REMOTE_OPERATION, 1052 RDMAP_ECODE_VERSION, 0); 1053 return -EINVAL; 1054 } 1055 opcode = __rdmap_get_opcode(c_hdr); 1056 1057 if (opcode > RDMAP_TERMINATE) { 1058 pr_warn("siw: received unknown packet type %u\n", 1059 opcode); 1060 1061 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_RDMAP, 1062 RDMAP_ETYPE_REMOTE_OPERATION, 1063 RDMAP_ECODE_OPCODE, 0); 1064 return -EINVAL; 1065 } 1066 siw_dbg_qp(rx_qp(srx), "new header, opcode %u\n", opcode); 1067 } else { 1068 opcode = __rdmap_get_opcode(c_hdr); 1069 } 1070 set_rx_fpdu_context(qp, opcode); 1071 frx = qp->rx_fpdu; 1072 1073 /* 1074 * Figure out len of current hdr: variable length of 1075 * iwarp hdr may force us to copy hdr information in 1076 * two steps. Only tagged DDP messages are already 1077 * completely received. 1078 */ 1079 if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) { 1080 int hdrlen = iwarp_pktinfo[opcode].hdr_len; 1081 1082 bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new); 1083 1084 skb_copy_bits(skb, srx->skb_offset, 1085 (char *)c_hdr + srx->fpdu_part_rcvd, bytes); 1086 1087 siw_update_skb_rcvd(srx, bytes); 1088 srx->fpdu_part_rcvd += bytes; 1089 if (srx->fpdu_part_rcvd < hdrlen) 1090 return -EAGAIN; 1091 } 1092 1093 /* 1094 * Peer-controlled mpa_len must not underflow srx->fpdu_part_rem 1095 * in siw_tcp_rx_data(); a negative value flows as a signed copy 1096 * length into siw_check_mem() and skb_copy_bits(). 1097 */ 1098 if (unlikely(be16_to_cpu(c_hdr->mpa_len) + MPA_HDR_SIZE < 1099 iwarp_pktinfo[opcode].hdr_len)) { 1100 pr_warn_ratelimited("siw: short mpa_len %u for opcode %u (hdr_len %u)\n", 1101 be16_to_cpu(c_hdr->mpa_len), opcode, 1102 iwarp_pktinfo[opcode].hdr_len); 1103 siw_init_terminate(rx_qp(srx), TERM_ERROR_LAYER_LLP, 1104 LLP_ETYPE_MPA, LLP_ECODE_FPDU_START, 0); 1105 return -EINVAL; 1106 } 1107 1108 /* 1109 * DDP/RDMAP header receive completed. Check if the current 1110 * DDP segment starts a new RDMAP message or continues a previously 1111 * started RDMAP message. 1112 * 1113 * Alternating reception of DDP segments (or FPDUs) from incomplete 1114 * tagged and untagged RDMAP messages is supported, as long as 1115 * the current tagged or untagged message gets eventually completed 1116 * w/o intersection from another message of the same type 1117 * (tagged/untagged). E.g., a WRITE can get intersected by a SEND, 1118 * but not by a READ RESPONSE etc. 1119 */ 1120 if (srx->mpa_crc_enabled) { 1121 /* 1122 * Restart CRC computation 1123 */ 1124 siw_crc_init(&srx->mpa_crc); 1125 siw_crc_update(&srx->mpa_crc, c_hdr, srx->fpdu_part_rcvd); 1126 } 1127 if (frx->more_ddp_segs) { 1128 frx->first_ddp_seg = 0; 1129 if (frx->prev_rdmap_op != opcode) { 1130 pr_warn("siw: packet intersection: %u : %u\n", 1131 frx->prev_rdmap_op, opcode); 1132 /* 1133 * The last inbound RDMA operation of same type 1134 * (tagged or untagged) is left unfinished. 1135 * To complete it in error, make it the current 1136 * operation again, even with the header already 1137 * overwritten. For error handling, only the opcode 1138 * and current rx context are relevant. 1139 */ 1140 set_rx_fpdu_context(qp, frx->prev_rdmap_op); 1141 __rdmap_set_opcode(c_hdr, frx->prev_rdmap_op); 1142 return -EPROTO; 1143 } 1144 } else { 1145 frx->prev_rdmap_op = opcode; 1146 frx->first_ddp_seg = 1; 1147 } 1148 frx->more_ddp_segs = c_hdr->ddp_rdmap_ctrl & DDP_FLAG_LAST ? 0 : 1; 1149 1150 return 0; 1151 } 1152 1153 static int siw_check_tx_fence(struct siw_qp *qp) 1154 { 1155 struct siw_wqe *tx_waiting = tx_wqe(qp); 1156 struct siw_sqe *rreq; 1157 int resume_tx = 0, rv = 0; 1158 unsigned long flags; 1159 1160 spin_lock_irqsave(&qp->orq_lock, flags); 1161 1162 /* free current orq entry */ 1163 rreq = orq_get_current(qp); 1164 WRITE_ONCE(rreq->flags, 0); 1165 1166 qp->orq_get++; 1167 1168 if (qp->tx_ctx.orq_fence) { 1169 if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { 1170 pr_warn("siw: [QP %u]: fence resume: bad status %d\n", 1171 qp_id(qp), tx_waiting->wr_status); 1172 rv = -EPROTO; 1173 goto out; 1174 } 1175 /* resume SQ processing, if possible */ 1176 if (tx_waiting->sqe.opcode == SIW_OP_READ || 1177 tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { 1178 1179 /* SQ processing was stopped because of a full ORQ */ 1180 rreq = orq_get_free(qp); 1181 if (unlikely(!rreq)) { 1182 pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); 1183 rv = -EPROTO; 1184 goto out; 1185 } 1186 siw_read_to_orq(rreq, &tx_waiting->sqe); 1187 1188 qp->orq_put++; 1189 qp->tx_ctx.orq_fence = 0; 1190 resume_tx = 1; 1191 1192 } else if (siw_orq_empty(qp)) { 1193 /* 1194 * SQ processing was stopped by fenced work request. 1195 * Resume since all previous Read's are now completed. 1196 */ 1197 qp->tx_ctx.orq_fence = 0; 1198 resume_tx = 1; 1199 } 1200 } 1201 out: 1202 spin_unlock_irqrestore(&qp->orq_lock, flags); 1203 1204 if (resume_tx) 1205 rv = siw_sq_start(qp); 1206 1207 return rv; 1208 } 1209 1210 /* 1211 * siw_rdmap_complete() 1212 * 1213 * Complete processing of an RDMA message after receiving all 1214 * DDP segmens or ABort processing after encountering error case. 1215 * 1216 * o SENDs + RRESPs will need for completion, 1217 * o RREQs need for READ RESPONSE initialization 1218 * o WRITEs need memory dereferencing 1219 * 1220 * TODO: Failed WRITEs need local error to be surfaced. 1221 */ 1222 static int siw_rdmap_complete(struct siw_qp *qp, int error) 1223 { 1224 struct siw_rx_stream *srx = &qp->rx_stream; 1225 struct siw_wqe *wqe = rx_wqe(qp->rx_fpdu); 1226 enum siw_wc_status wc_status = wqe->wc_status; 1227 u8 opcode = __rdmap_get_opcode(&srx->hdr.ctrl); 1228 int rv = 0; 1229 1230 switch (opcode) { 1231 case RDMAP_SEND_SE: 1232 case RDMAP_SEND_SE_INVAL: 1233 wqe->rqe.flags |= SIW_WQE_SOLICITED; 1234 fallthrough; 1235 1236 case RDMAP_SEND: 1237 case RDMAP_SEND_INVAL: 1238 if (wqe->wr_status == SIW_WR_IDLE) 1239 break; 1240 1241 srx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]++; 1242 1243 if (error != 0 && wc_status == SIW_WC_SUCCESS) 1244 wc_status = SIW_WC_GENERAL_ERR; 1245 /* 1246 * Handle STag invalidation request 1247 */ 1248 if (wc_status == SIW_WC_SUCCESS && 1249 (opcode == RDMAP_SEND_INVAL || 1250 opcode == RDMAP_SEND_SE_INVAL)) { 1251 rv = siw_invalidate_stag(qp->pd, srx->inval_stag); 1252 if (rv) { 1253 siw_init_terminate( 1254 qp, TERM_ERROR_LAYER_RDMAP, 1255 rv == -EACCES ? 1256 RDMAP_ETYPE_REMOTE_PROTECTION : 1257 RDMAP_ETYPE_REMOTE_OPERATION, 1258 RDMAP_ECODE_CANNOT_INVALIDATE, 0); 1259 1260 wc_status = SIW_WC_REM_INV_REQ_ERR; 1261 } 1262 rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed, 1263 rv ? 0 : srx->inval_stag, 1264 wc_status); 1265 } else { 1266 rv = siw_rqe_complete(qp, &wqe->rqe, wqe->processed, 1267 0, wc_status); 1268 } 1269 siw_wqe_put_mem(wqe, SIW_OP_RECEIVE); 1270 break; 1271 1272 case RDMAP_RDMA_READ_RESP: 1273 if (wqe->wr_status == SIW_WR_IDLE) 1274 break; 1275 1276 if (error != 0) { 1277 if ((srx->state == SIW_GET_HDR && 1278 qp->rx_fpdu->first_ddp_seg) || error == -ENODATA) 1279 /* possible RREQ in ORQ left untouched */ 1280 break; 1281 1282 if (wc_status == SIW_WC_SUCCESS) 1283 wc_status = SIW_WC_GENERAL_ERR; 1284 } else if (rdma_is_kernel_res(&qp->base_qp.res) && 1285 rx_type(wqe) == SIW_OP_READ_LOCAL_INV) { 1286 /* 1287 * Handle any STag invalidation request 1288 */ 1289 rv = siw_invalidate_stag(qp->pd, wqe->sqe.sge[0].lkey); 1290 if (rv) { 1291 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, 1292 RDMAP_ETYPE_CATASTROPHIC, 1293 RDMAP_ECODE_UNSPECIFIED, 0); 1294 1295 if (wc_status == SIW_WC_SUCCESS) { 1296 wc_status = SIW_WC_GENERAL_ERR; 1297 error = rv; 1298 } 1299 } 1300 } 1301 /* 1302 * All errors turn the wqe into signalled. 1303 */ 1304 if ((wqe->sqe.flags & SIW_WQE_SIGNALLED) || error != 0) 1305 rv = siw_sqe_complete(qp, &wqe->sqe, wqe->processed, 1306 wc_status); 1307 siw_wqe_put_mem(wqe, SIW_OP_READ); 1308 1309 if (!error) { 1310 rv = siw_check_tx_fence(qp); 1311 } else { 1312 /* Disable current ORQ element */ 1313 if (qp->attrs.orq_size) 1314 WRITE_ONCE(orq_get_current(qp)->flags, 0); 1315 } 1316 break; 1317 1318 case RDMAP_RDMA_READ_REQ: 1319 if (!error) { 1320 rv = siw_init_rresp(qp, srx); 1321 srx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]++; 1322 } 1323 break; 1324 1325 case RDMAP_RDMA_WRITE: 1326 if (wqe->wr_status == SIW_WR_IDLE) 1327 break; 1328 1329 /* 1330 * Free References from memory object if 1331 * attached to receive context (inbound WRITE). 1332 * While a zero-length WRITE is allowed, 1333 * no memory reference got created. 1334 */ 1335 if (rx_mem(&qp->rx_tagged)) { 1336 siw_mem_put(rx_mem(&qp->rx_tagged)); 1337 rx_mem(&qp->rx_tagged) = NULL; 1338 } 1339 break; 1340 1341 default: 1342 break; 1343 } 1344 wqe->wr_status = SIW_WR_IDLE; 1345 1346 return rv; 1347 } 1348 1349 /* 1350 * siw_tcp_rx_data() 1351 * 1352 * Main routine to consume inbound TCP payload 1353 * 1354 * @rd_desc: read descriptor 1355 * @skb: socket buffer 1356 * @off: offset in skb 1357 * @len: skb->len - offset : payload in skb 1358 */ 1359 int siw_tcp_rx_data(read_descriptor_t *rd_desc, struct sk_buff *skb, 1360 unsigned int off, size_t len) 1361 { 1362 struct siw_qp *qp = rd_desc->arg.data; 1363 struct siw_rx_stream *srx = &qp->rx_stream; 1364 int rv; 1365 1366 srx->skb = skb; 1367 srx->skb_new = skb->len - off; 1368 srx->skb_offset = off; 1369 srx->skb_copied = 0; 1370 1371 siw_dbg_qp(qp, "new data, len %d\n", srx->skb_new); 1372 1373 while (srx->skb_new) { 1374 int run_completion = 1; 1375 1376 if (unlikely(srx->rx_suspend)) { 1377 /* Do not process any more data */ 1378 srx->skb_copied += srx->skb_new; 1379 break; 1380 } 1381 switch (srx->state) { 1382 case SIW_GET_HDR: 1383 rv = siw_get_hdr(srx); 1384 if (!rv) { 1385 srx->fpdu_part_rem = 1386 be16_to_cpu(srx->hdr.ctrl.mpa_len) - 1387 srx->fpdu_part_rcvd + MPA_HDR_SIZE; 1388 1389 if (srx->fpdu_part_rem) 1390 srx->pad = -srx->fpdu_part_rem & 0x3; 1391 else 1392 srx->pad = 0; 1393 1394 srx->state = SIW_GET_DATA_START; 1395 srx->fpdu_part_rcvd = 0; 1396 } 1397 break; 1398 1399 case SIW_GET_DATA_MORE: 1400 /* 1401 * Another data fragment of the same DDP segment. 1402 * Setting first_ddp_seg = 0 avoids repeating 1403 * initializations that shall occur only once per 1404 * DDP segment. 1405 */ 1406 qp->rx_fpdu->first_ddp_seg = 0; 1407 fallthrough; 1408 1409 case SIW_GET_DATA_START: 1410 /* 1411 * Headers will be checked by the opcode-specific 1412 * data receive function below. 1413 */ 1414 rv = iwarp_pktinfo[qp->rx_stream.rdmap_op].rx_data(qp); 1415 if (!rv) { 1416 int mpa_len = 1417 be16_to_cpu(srx->hdr.ctrl.mpa_len) 1418 + MPA_HDR_SIZE; 1419 1420 srx->fpdu_part_rem = (-mpa_len & 0x3) 1421 + MPA_CRC_SIZE; 1422 srx->fpdu_part_rcvd = 0; 1423 srx->state = SIW_GET_TRAILER; 1424 } else { 1425 if (unlikely(rv == -ECONNRESET)) 1426 run_completion = 0; 1427 else 1428 srx->state = SIW_GET_DATA_MORE; 1429 } 1430 break; 1431 1432 case SIW_GET_TRAILER: 1433 /* 1434 * read CRC + any padding 1435 */ 1436 rv = siw_get_trailer(qp, srx); 1437 if (likely(!rv)) { 1438 /* 1439 * FPDU completed. 1440 * complete RDMAP message if last fragment 1441 */ 1442 srx->state = SIW_GET_HDR; 1443 srx->fpdu_part_rcvd = 0; 1444 1445 if (!(srx->hdr.ctrl.ddp_rdmap_ctrl & 1446 DDP_FLAG_LAST)) 1447 /* more frags */ 1448 break; 1449 1450 rv = siw_rdmap_complete(qp, 0); 1451 run_completion = 0; 1452 } 1453 break; 1454 1455 default: 1456 pr_warn("QP[%u]: RX out of state\n", qp_id(qp)); 1457 rv = -EPROTO; 1458 run_completion = 0; 1459 } 1460 if (unlikely(rv != 0 && rv != -EAGAIN)) { 1461 if ((srx->state > SIW_GET_HDR || 1462 (qp->rx_fpdu && qp->rx_fpdu->more_ddp_segs)) && 1463 run_completion) 1464 siw_rdmap_complete(qp, rv); 1465 1466 siw_dbg_qp(qp, "rx error %d, rx state %d\n", rv, 1467 srx->state); 1468 1469 siw_qp_cm_drop(qp, 1); 1470 1471 break; 1472 } 1473 if (rv) { 1474 siw_dbg_qp(qp, "fpdu fragment, state %d, missing %d\n", 1475 srx->state, srx->fpdu_part_rem); 1476 break; 1477 } 1478 } 1479 return srx->skb_copied; 1480 } 1481