1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include <linux/skbuff.h> 8 9 #include "rxe.h" 10 #include "rxe_loc.h" 11 #include "rxe_queue.h" 12 13 enum resp_states { 14 RESPST_NONE, 15 RESPST_GET_REQ, 16 RESPST_CHK_PSN, 17 RESPST_CHK_OP_SEQ, 18 RESPST_CHK_OP_VALID, 19 RESPST_CHK_RESOURCE, 20 RESPST_CHK_LENGTH, 21 RESPST_CHK_RKEY, 22 RESPST_EXECUTE, 23 RESPST_READ_REPLY, 24 RESPST_ATOMIC_REPLY, 25 RESPST_ATOMIC_WRITE_REPLY, 26 RESPST_PROCESS_FLUSH, 27 RESPST_COMPLETE, 28 RESPST_ACKNOWLEDGE, 29 RESPST_CLEANUP, 30 RESPST_DUPLICATE_REQUEST, 31 RESPST_ERR_MALFORMED_WQE, 32 RESPST_ERR_UNSUPPORTED_OPCODE, 33 RESPST_ERR_MISALIGNED_ATOMIC, 34 RESPST_ERR_PSN_OUT_OF_SEQ, 35 RESPST_ERR_MISSING_OPCODE_FIRST, 36 RESPST_ERR_MISSING_OPCODE_LAST_C, 37 RESPST_ERR_MISSING_OPCODE_LAST_D1E, 38 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ, 39 RESPST_ERR_RNR, 40 RESPST_ERR_RKEY_VIOLATION, 41 RESPST_ERR_INVALIDATE_RKEY, 42 RESPST_ERR_LENGTH, 43 RESPST_ERR_CQ_OVERFLOW, 44 RESPST_ERROR, 45 RESPST_RESET, 46 RESPST_DONE, 47 RESPST_EXIT, 48 }; 49 50 static char *resp_state_name[] = { 51 [RESPST_NONE] = "NONE", 52 [RESPST_GET_REQ] = "GET_REQ", 53 [RESPST_CHK_PSN] = "CHK_PSN", 54 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", 55 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", 56 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", 57 [RESPST_CHK_LENGTH] = "CHK_LENGTH", 58 [RESPST_CHK_RKEY] = "CHK_RKEY", 59 [RESPST_EXECUTE] = "EXECUTE", 60 [RESPST_READ_REPLY] = "READ_REPLY", 61 [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", 62 [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", 63 [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", 64 [RESPST_COMPLETE] = "COMPLETE", 65 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", 66 [RESPST_CLEANUP] = "CLEANUP", 67 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", 68 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", 69 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", 70 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", 71 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", 72 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", 73 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", 74 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", 75 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", 76 [RESPST_ERR_RNR] = "ERR_RNR", 77 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", 78 [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION", 79 [RESPST_ERR_LENGTH] = "ERR_LENGTH", 80 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", 81 [RESPST_ERROR] = "ERROR", 82 [RESPST_RESET] = "RESET", 83 [RESPST_DONE] = "DONE", 84 [RESPST_EXIT] = "EXIT", 85 }; 86 87 /* rxe_recv calls here to add a request packet to the input queue */ 88 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) 89 { 90 int must_sched; 91 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 92 93 skb_queue_tail(&qp->req_pkts, skb); 94 95 must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || 96 (skb_queue_len(&qp->req_pkts) > 1); 97 98 if (must_sched) 99 rxe_sched_task(&qp->resp.task); 100 else 101 rxe_run_task(&qp->resp.task); 102 } 103 104 static inline enum resp_states get_req(struct rxe_qp *qp, 105 struct rxe_pkt_info **pkt_p) 106 { 107 struct sk_buff *skb; 108 109 if (qp->resp.state == QP_STATE_ERROR) { 110 while ((skb = skb_dequeue(&qp->req_pkts))) { 111 rxe_put(qp); 112 kfree_skb(skb); 113 ib_device_put(qp->ibqp.device); 114 } 115 116 /* go drain recv wr queue */ 117 return RESPST_CHK_RESOURCE; 118 } 119 120 skb = skb_peek(&qp->req_pkts); 121 if (!skb) 122 return RESPST_EXIT; 123 124 *pkt_p = SKB_TO_PKT(skb); 125 126 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; 127 } 128 129 static enum resp_states check_psn(struct rxe_qp *qp, 130 struct rxe_pkt_info *pkt) 131 { 132 int diff = psn_compare(pkt->psn, qp->resp.psn); 133 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 134 135 switch (qp_type(qp)) { 136 case IB_QPT_RC: 137 if (diff > 0) { 138 if (qp->resp.sent_psn_nak) 139 return RESPST_CLEANUP; 140 141 qp->resp.sent_psn_nak = 1; 142 rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ); 143 return RESPST_ERR_PSN_OUT_OF_SEQ; 144 145 } else if (diff < 0) { 146 rxe_counter_inc(rxe, RXE_CNT_DUP_REQ); 147 return RESPST_DUPLICATE_REQUEST; 148 } 149 150 if (qp->resp.sent_psn_nak) 151 qp->resp.sent_psn_nak = 0; 152 153 break; 154 155 case IB_QPT_UC: 156 if (qp->resp.drop_msg || diff != 0) { 157 if (pkt->mask & RXE_START_MASK) { 158 qp->resp.drop_msg = 0; 159 return RESPST_CHK_OP_SEQ; 160 } 161 162 qp->resp.drop_msg = 1; 163 return RESPST_CLEANUP; 164 } 165 break; 166 default: 167 break; 168 } 169 170 return RESPST_CHK_OP_SEQ; 171 } 172 173 static enum resp_states check_op_seq(struct rxe_qp *qp, 174 struct rxe_pkt_info *pkt) 175 { 176 switch (qp_type(qp)) { 177 case IB_QPT_RC: 178 switch (qp->resp.opcode) { 179 case IB_OPCODE_RC_SEND_FIRST: 180 case IB_OPCODE_RC_SEND_MIDDLE: 181 switch (pkt->opcode) { 182 case IB_OPCODE_RC_SEND_MIDDLE: 183 case IB_OPCODE_RC_SEND_LAST: 184 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 185 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 186 return RESPST_CHK_OP_VALID; 187 default: 188 return RESPST_ERR_MISSING_OPCODE_LAST_C; 189 } 190 191 case IB_OPCODE_RC_RDMA_WRITE_FIRST: 192 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 193 switch (pkt->opcode) { 194 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 195 case IB_OPCODE_RC_RDMA_WRITE_LAST: 196 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 197 return RESPST_CHK_OP_VALID; 198 default: 199 return RESPST_ERR_MISSING_OPCODE_LAST_C; 200 } 201 202 default: 203 switch (pkt->opcode) { 204 case IB_OPCODE_RC_SEND_MIDDLE: 205 case IB_OPCODE_RC_SEND_LAST: 206 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 207 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 208 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 209 case IB_OPCODE_RC_RDMA_WRITE_LAST: 210 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 211 return RESPST_ERR_MISSING_OPCODE_FIRST; 212 default: 213 return RESPST_CHK_OP_VALID; 214 } 215 } 216 break; 217 218 case IB_QPT_UC: 219 switch (qp->resp.opcode) { 220 case IB_OPCODE_UC_SEND_FIRST: 221 case IB_OPCODE_UC_SEND_MIDDLE: 222 switch (pkt->opcode) { 223 case IB_OPCODE_UC_SEND_MIDDLE: 224 case IB_OPCODE_UC_SEND_LAST: 225 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 226 return RESPST_CHK_OP_VALID; 227 default: 228 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 229 } 230 231 case IB_OPCODE_UC_RDMA_WRITE_FIRST: 232 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 233 switch (pkt->opcode) { 234 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 235 case IB_OPCODE_UC_RDMA_WRITE_LAST: 236 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 237 return RESPST_CHK_OP_VALID; 238 default: 239 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 240 } 241 242 default: 243 switch (pkt->opcode) { 244 case IB_OPCODE_UC_SEND_MIDDLE: 245 case IB_OPCODE_UC_SEND_LAST: 246 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 247 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 248 case IB_OPCODE_UC_RDMA_WRITE_LAST: 249 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 250 qp->resp.drop_msg = 1; 251 return RESPST_CLEANUP; 252 default: 253 return RESPST_CHK_OP_VALID; 254 } 255 } 256 break; 257 258 default: 259 return RESPST_CHK_OP_VALID; 260 } 261 } 262 263 static bool check_qp_attr_access(struct rxe_qp *qp, 264 struct rxe_pkt_info *pkt) 265 { 266 if (((pkt->mask & RXE_READ_MASK) && 267 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || 268 ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && 269 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || 270 ((pkt->mask & RXE_ATOMIC_MASK) && 271 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 272 return false; 273 274 if (pkt->mask & RXE_FLUSH_MASK) { 275 u32 flush_type = feth_plt(pkt); 276 277 if ((flush_type & IB_FLUSH_GLOBAL && 278 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || 279 (flush_type & IB_FLUSH_PERSISTENT && 280 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) 281 return false; 282 } 283 284 return true; 285 } 286 287 static enum resp_states check_op_valid(struct rxe_qp *qp, 288 struct rxe_pkt_info *pkt) 289 { 290 switch (qp_type(qp)) { 291 case IB_QPT_RC: 292 if (!check_qp_attr_access(qp, pkt)) 293 return RESPST_ERR_UNSUPPORTED_OPCODE; 294 295 break; 296 297 case IB_QPT_UC: 298 if ((pkt->mask & RXE_WRITE_MASK) && 299 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { 300 qp->resp.drop_msg = 1; 301 return RESPST_CLEANUP; 302 } 303 304 break; 305 306 case IB_QPT_UD: 307 case IB_QPT_GSI: 308 break; 309 310 default: 311 WARN_ON_ONCE(1); 312 break; 313 } 314 315 return RESPST_CHK_RESOURCE; 316 } 317 318 static enum resp_states get_srq_wqe(struct rxe_qp *qp) 319 { 320 struct rxe_srq *srq = qp->srq; 321 struct rxe_queue *q = srq->rq.queue; 322 struct rxe_recv_wqe *wqe; 323 struct ib_event ev; 324 unsigned int count; 325 size_t size; 326 unsigned long flags; 327 328 if (srq->error) 329 return RESPST_ERR_RNR; 330 331 spin_lock_irqsave(&srq->rq.consumer_lock, flags); 332 333 wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 334 if (!wqe) { 335 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 336 return RESPST_ERR_RNR; 337 } 338 339 /* don't trust user space data */ 340 if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { 341 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 342 rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); 343 return RESPST_ERR_MALFORMED_WQE; 344 } 345 size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); 346 memcpy(&qp->resp.srq_wqe, wqe, size); 347 348 qp->resp.wqe = &qp->resp.srq_wqe.wqe; 349 queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); 350 count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); 351 352 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { 353 srq->limit = 0; 354 goto event; 355 } 356 357 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 358 return RESPST_CHK_LENGTH; 359 360 event: 361 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 362 ev.device = qp->ibqp.device; 363 ev.element.srq = qp->ibqp.srq; 364 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 365 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); 366 return RESPST_CHK_LENGTH; 367 } 368 369 static enum resp_states check_resource(struct rxe_qp *qp, 370 struct rxe_pkt_info *pkt) 371 { 372 struct rxe_srq *srq = qp->srq; 373 374 if (qp->resp.state == QP_STATE_ERROR) { 375 if (qp->resp.wqe) { 376 qp->resp.status = IB_WC_WR_FLUSH_ERR; 377 return RESPST_COMPLETE; 378 } else if (!srq) { 379 qp->resp.wqe = queue_head(qp->rq.queue, 380 QUEUE_TYPE_FROM_CLIENT); 381 if (qp->resp.wqe) { 382 qp->resp.status = IB_WC_WR_FLUSH_ERR; 383 return RESPST_COMPLETE; 384 } else { 385 return RESPST_EXIT; 386 } 387 } else { 388 return RESPST_EXIT; 389 } 390 } 391 392 if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { 393 /* it is the requesters job to not send 394 * too many read/atomic ops, we just 395 * recycle the responder resource queue 396 */ 397 if (likely(qp->attr.max_dest_rd_atomic > 0)) 398 return RESPST_CHK_LENGTH; 399 else 400 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; 401 } 402 403 if (pkt->mask & RXE_RWR_MASK) { 404 if (srq) 405 return get_srq_wqe(qp); 406 407 qp->resp.wqe = queue_head(qp->rq.queue, 408 QUEUE_TYPE_FROM_CLIENT); 409 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 410 } 411 412 return RESPST_CHK_LENGTH; 413 } 414 415 static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, 416 struct rxe_pkt_info *pkt) 417 { 418 /* 419 * See IBA C9-92 420 * For UD QPs we only check if the packet will fit in the 421 * receive buffer later. For rmda operations additional 422 * length checks are performed in check_rkey. 423 */ 424 if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || 425 (qp_type(qp) == IB_QPT_UC))) { 426 unsigned int mtu = qp->mtu; 427 unsigned int payload = payload_size(pkt); 428 429 if ((pkt->mask & RXE_START_MASK) && 430 (pkt->mask & RXE_END_MASK)) { 431 if (unlikely(payload > mtu)) { 432 rxe_dbg_qp(qp, "only packet too long"); 433 return RESPST_ERR_LENGTH; 434 } 435 } else if ((pkt->mask & RXE_START_MASK) || 436 (pkt->mask & RXE_MIDDLE_MASK)) { 437 if (unlikely(payload != mtu)) { 438 rxe_dbg_qp(qp, "first or middle packet not mtu"); 439 return RESPST_ERR_LENGTH; 440 } 441 } else if (pkt->mask & RXE_END_MASK) { 442 if (unlikely((payload == 0) || (payload > mtu))) { 443 rxe_dbg_qp(qp, "last packet zero or too long"); 444 return RESPST_ERR_LENGTH; 445 } 446 } 447 } 448 449 /* See IBA C9-94 */ 450 if (pkt->mask & RXE_RETH_MASK) { 451 if (reth_len(pkt) > (1U << 31)) { 452 rxe_dbg_qp(qp, "dma length too long"); 453 return RESPST_ERR_LENGTH; 454 } 455 } 456 457 return RESPST_CHK_RKEY; 458 } 459 460 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 461 { 462 qp->resp.va = reth_va(pkt); 463 qp->resp.offset = 0; 464 qp->resp.rkey = reth_rkey(pkt); 465 qp->resp.resid = reth_len(pkt); 466 qp->resp.length = reth_len(pkt); 467 } 468 469 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 470 { 471 qp->resp.va = atmeth_va(pkt); 472 qp->resp.offset = 0; 473 qp->resp.rkey = atmeth_rkey(pkt); 474 qp->resp.resid = sizeof(u64); 475 } 476 477 static enum resp_states check_rkey(struct rxe_qp *qp, 478 struct rxe_pkt_info *pkt) 479 { 480 struct rxe_mr *mr = NULL; 481 struct rxe_mw *mw = NULL; 482 u64 va; 483 u32 rkey; 484 u32 resid; 485 u32 pktlen; 486 int mtu = qp->mtu; 487 enum resp_states state; 488 int access = 0; 489 490 if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 491 if (pkt->mask & RXE_RETH_MASK) 492 qp_resp_from_reth(qp, pkt); 493 494 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ 495 : IB_ACCESS_REMOTE_WRITE; 496 } else if (pkt->mask & RXE_FLUSH_MASK) { 497 u32 flush_type = feth_plt(pkt); 498 499 if (pkt->mask & RXE_RETH_MASK) 500 qp_resp_from_reth(qp, pkt); 501 502 if (flush_type & IB_FLUSH_GLOBAL) 503 access |= IB_ACCESS_FLUSH_GLOBAL; 504 if (flush_type & IB_FLUSH_PERSISTENT) 505 access |= IB_ACCESS_FLUSH_PERSISTENT; 506 } else if (pkt->mask & RXE_ATOMIC_MASK) { 507 qp_resp_from_atmeth(qp, pkt); 508 access = IB_ACCESS_REMOTE_ATOMIC; 509 } else { 510 return RESPST_EXECUTE; 511 } 512 513 /* A zero-byte op is not required to set an addr or rkey. See C9-88 */ 514 if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && 515 (pkt->mask & RXE_RETH_MASK) && 516 reth_len(pkt) == 0) { 517 return RESPST_EXECUTE; 518 } 519 520 va = qp->resp.va; 521 rkey = qp->resp.rkey; 522 resid = qp->resp.resid; 523 pktlen = payload_size(pkt); 524 525 if (rkey_is_mw(rkey)) { 526 mw = rxe_lookup_mw(qp, access, rkey); 527 if (!mw) { 528 rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); 529 state = RESPST_ERR_RKEY_VIOLATION; 530 goto err; 531 } 532 533 mr = mw->mr; 534 if (!mr) { 535 rxe_dbg_qp(qp, "MW doesn't have an MR\n"); 536 state = RESPST_ERR_RKEY_VIOLATION; 537 goto err; 538 } 539 540 if (mw->access & IB_ZERO_BASED) 541 qp->resp.offset = mw->addr; 542 543 rxe_put(mw); 544 rxe_get(mr); 545 } else { 546 mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); 547 if (!mr) { 548 rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); 549 state = RESPST_ERR_RKEY_VIOLATION; 550 goto err; 551 } 552 } 553 554 if (pkt->mask & RXE_FLUSH_MASK) { 555 /* FLUSH MR may not set va or resid 556 * no need to check range since we will flush whole mr 557 */ 558 if (feth_sel(pkt) == IB_FLUSH_MR) 559 goto skip_check_range; 560 } 561 562 if (mr_check_range(mr, va + qp->resp.offset, resid)) { 563 state = RESPST_ERR_RKEY_VIOLATION; 564 goto err; 565 } 566 567 skip_check_range: 568 if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 569 if (resid > mtu) { 570 if (pktlen != mtu || bth_pad(pkt)) { 571 state = RESPST_ERR_LENGTH; 572 goto err; 573 } 574 } else { 575 if (pktlen != resid) { 576 state = RESPST_ERR_LENGTH; 577 goto err; 578 } 579 if ((bth_pad(pkt) != (0x3 & (-resid)))) { 580 /* This case may not be exactly that 581 * but nothing else fits. 582 */ 583 state = RESPST_ERR_LENGTH; 584 goto err; 585 } 586 } 587 } 588 589 WARN_ON_ONCE(qp->resp.mr); 590 591 qp->resp.mr = mr; 592 return RESPST_EXECUTE; 593 594 err: 595 if (mr) 596 rxe_put(mr); 597 if (mw) 598 rxe_put(mw); 599 600 return state; 601 } 602 603 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, 604 int data_len) 605 { 606 int err; 607 608 err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, 609 data_addr, data_len, RXE_TO_MR_OBJ); 610 if (unlikely(err)) 611 return (err == -ENOSPC) ? RESPST_ERR_LENGTH 612 : RESPST_ERR_MALFORMED_WQE; 613 614 return RESPST_NONE; 615 } 616 617 static enum resp_states write_data_in(struct rxe_qp *qp, 618 struct rxe_pkt_info *pkt) 619 { 620 enum resp_states rc = RESPST_NONE; 621 int err; 622 int data_len = payload_size(pkt); 623 624 err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, 625 payload_addr(pkt), data_len, RXE_TO_MR_OBJ); 626 if (err) { 627 rc = RESPST_ERR_RKEY_VIOLATION; 628 goto out; 629 } 630 631 qp->resp.va += data_len; 632 qp->resp.resid -= data_len; 633 634 out: 635 return rc; 636 } 637 638 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, 639 struct rxe_pkt_info *pkt, 640 int type) 641 { 642 struct resp_res *res; 643 u32 pkts; 644 645 res = &qp->resp.resources[qp->resp.res_head]; 646 rxe_advance_resp_resource(qp); 647 free_rd_atomic_resource(res); 648 649 res->type = type; 650 res->replay = 0; 651 652 switch (type) { 653 case RXE_READ_MASK: 654 res->read.va = qp->resp.va + qp->resp.offset; 655 res->read.va_org = qp->resp.va + qp->resp.offset; 656 res->read.resid = qp->resp.resid; 657 res->read.length = qp->resp.resid; 658 res->read.rkey = qp->resp.rkey; 659 660 pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); 661 res->first_psn = pkt->psn; 662 res->cur_psn = pkt->psn; 663 res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; 664 665 res->state = rdatm_res_state_new; 666 break; 667 case RXE_ATOMIC_MASK: 668 case RXE_ATOMIC_WRITE_MASK: 669 res->first_psn = pkt->psn; 670 res->last_psn = pkt->psn; 671 res->cur_psn = pkt->psn; 672 break; 673 case RXE_FLUSH_MASK: 674 res->flush.va = qp->resp.va + qp->resp.offset; 675 res->flush.length = qp->resp.length; 676 res->flush.type = feth_plt(pkt); 677 res->flush.level = feth_sel(pkt); 678 } 679 680 return res; 681 } 682 683 static enum resp_states process_flush(struct rxe_qp *qp, 684 struct rxe_pkt_info *pkt) 685 { 686 u64 length, start; 687 struct rxe_mr *mr = qp->resp.mr; 688 struct resp_res *res = qp->resp.res; 689 690 /* oA19-14, oA19-15 */ 691 if (res && res->replay) 692 return RESPST_ACKNOWLEDGE; 693 else if (!res) { 694 res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); 695 qp->resp.res = res; 696 } 697 698 if (res->flush.level == IB_FLUSH_RANGE) { 699 start = res->flush.va; 700 length = res->flush.length; 701 } else { /* level == IB_FLUSH_MR */ 702 start = mr->ibmr.iova; 703 length = mr->ibmr.length; 704 } 705 706 if (res->flush.type & IB_FLUSH_PERSISTENT) { 707 if (rxe_flush_pmem_iova(mr, start, length)) 708 return RESPST_ERR_RKEY_VIOLATION; 709 /* Make data persistent. */ 710 wmb(); 711 } else if (res->flush.type & IB_FLUSH_GLOBAL) { 712 /* Make data global visibility. */ 713 wmb(); 714 } 715 716 qp->resp.msn++; 717 718 /* next expected psn, read handles this separately */ 719 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 720 qp->resp.ack_psn = qp->resp.psn; 721 722 qp->resp.opcode = pkt->opcode; 723 qp->resp.status = IB_WC_SUCCESS; 724 725 return RESPST_ACKNOWLEDGE; 726 } 727 728 /* Guarantee atomicity of atomic operations at the machine level. */ 729 static DEFINE_SPINLOCK(atomic_ops_lock); 730 731 static enum resp_states atomic_reply(struct rxe_qp *qp, 732 struct rxe_pkt_info *pkt) 733 { 734 u64 *vaddr; 735 enum resp_states ret; 736 struct rxe_mr *mr = qp->resp.mr; 737 struct resp_res *res = qp->resp.res; 738 u64 value; 739 740 if (!res) { 741 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); 742 qp->resp.res = res; 743 } 744 745 if (!res->replay) { 746 if (mr->state != RXE_MR_STATE_VALID) { 747 ret = RESPST_ERR_RKEY_VIOLATION; 748 goto out; 749 } 750 751 vaddr = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, 752 sizeof(u64)); 753 754 /* check vaddr is 8 bytes aligned. */ 755 if (!vaddr || (uintptr_t)vaddr & 7) { 756 ret = RESPST_ERR_MISALIGNED_ATOMIC; 757 goto out; 758 } 759 760 spin_lock_bh(&atomic_ops_lock); 761 res->atomic.orig_val = value = *vaddr; 762 763 if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) { 764 if (value == atmeth_comp(pkt)) 765 value = atmeth_swap_add(pkt); 766 } else { 767 value += atmeth_swap_add(pkt); 768 } 769 770 *vaddr = value; 771 spin_unlock_bh(&atomic_ops_lock); 772 773 qp->resp.msn++; 774 775 /* next expected psn, read handles this separately */ 776 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 777 qp->resp.ack_psn = qp->resp.psn; 778 779 qp->resp.opcode = pkt->opcode; 780 qp->resp.status = IB_WC_SUCCESS; 781 } 782 783 ret = RESPST_ACKNOWLEDGE; 784 out: 785 return ret; 786 } 787 788 #ifdef CONFIG_64BIT 789 static enum resp_states do_atomic_write(struct rxe_qp *qp, 790 struct rxe_pkt_info *pkt) 791 { 792 struct rxe_mr *mr = qp->resp.mr; 793 int payload = payload_size(pkt); 794 u64 src, *dst; 795 796 if (mr->state != RXE_MR_STATE_VALID) 797 return RESPST_ERR_RKEY_VIOLATION; 798 799 memcpy(&src, payload_addr(pkt), payload); 800 801 dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); 802 /* check vaddr is 8 bytes aligned. */ 803 if (!dst || (uintptr_t)dst & 7) 804 return RESPST_ERR_MISALIGNED_ATOMIC; 805 806 /* Do atomic write after all prior operations have completed */ 807 smp_store_release(dst, src); 808 809 /* decrease resp.resid to zero */ 810 qp->resp.resid -= sizeof(payload); 811 812 qp->resp.msn++; 813 814 /* next expected psn, read handles this separately */ 815 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 816 qp->resp.ack_psn = qp->resp.psn; 817 818 qp->resp.opcode = pkt->opcode; 819 qp->resp.status = IB_WC_SUCCESS; 820 return RESPST_ACKNOWLEDGE; 821 } 822 #else 823 static enum resp_states do_atomic_write(struct rxe_qp *qp, 824 struct rxe_pkt_info *pkt) 825 { 826 return RESPST_ERR_UNSUPPORTED_OPCODE; 827 } 828 #endif /* CONFIG_64BIT */ 829 830 static enum resp_states atomic_write_reply(struct rxe_qp *qp, 831 struct rxe_pkt_info *pkt) 832 { 833 struct resp_res *res = qp->resp.res; 834 835 if (!res) { 836 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); 837 qp->resp.res = res; 838 } 839 840 if (res->replay) 841 return RESPST_ACKNOWLEDGE; 842 return do_atomic_write(qp, pkt); 843 } 844 845 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, 846 struct rxe_pkt_info *ack, 847 int opcode, 848 int payload, 849 u32 psn, 850 u8 syndrome) 851 { 852 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 853 struct sk_buff *skb; 854 int paylen; 855 int pad; 856 int err; 857 858 /* 859 * allocate packet 860 */ 861 pad = (-payload) & 0x3; 862 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 863 864 skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack); 865 if (!skb) 866 return NULL; 867 868 ack->qp = qp; 869 ack->opcode = opcode; 870 ack->mask = rxe_opcode[opcode].mask; 871 ack->paylen = paylen; 872 ack->psn = psn; 873 874 bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL, 875 qp->attr.dest_qp_num, 0, psn); 876 877 if (ack->mask & RXE_AETH_MASK) { 878 aeth_set_syn(ack, syndrome); 879 aeth_set_msn(ack, qp->resp.msn); 880 } 881 882 if (ack->mask & RXE_ATMACK_MASK) 883 atmack_set_orig(ack, qp->resp.res->atomic.orig_val); 884 885 err = rxe_prepare(&qp->pri_av, ack, skb); 886 if (err) { 887 kfree_skb(skb); 888 return NULL; 889 } 890 891 return skb; 892 } 893 894 /** 895 * rxe_recheck_mr - revalidate MR from rkey and get a reference 896 * @qp: the qp 897 * @rkey: the rkey 898 * 899 * This code allows the MR to be invalidated or deregistered or 900 * the MW if one was used to be invalidated or deallocated. 901 * It is assumed that the access permissions if originally good 902 * are OK and the mappings to be unchanged. 903 * 904 * TODO: If someone reregisters an MR to change its size or 905 * access permissions during the processing of an RDMA read 906 * we should kill the responder resource and complete the 907 * operation with an error. 908 * 909 * Return: mr on success else NULL 910 */ 911 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) 912 { 913 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 914 struct rxe_mr *mr; 915 struct rxe_mw *mw; 916 917 if (rkey_is_mw(rkey)) { 918 mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); 919 if (!mw) 920 return NULL; 921 922 mr = mw->mr; 923 if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || 924 !mr || mr->state != RXE_MR_STATE_VALID) { 925 rxe_put(mw); 926 return NULL; 927 } 928 929 rxe_get(mr); 930 rxe_put(mw); 931 932 return mr; 933 } 934 935 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 936 if (!mr) 937 return NULL; 938 939 if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { 940 rxe_put(mr); 941 return NULL; 942 } 943 944 return mr; 945 } 946 947 /* RDMA read response. If res is not NULL, then we have a current RDMA request 948 * being processed or replayed. 949 */ 950 static enum resp_states read_reply(struct rxe_qp *qp, 951 struct rxe_pkt_info *req_pkt) 952 { 953 struct rxe_pkt_info ack_pkt; 954 struct sk_buff *skb; 955 int mtu = qp->mtu; 956 enum resp_states state; 957 int payload; 958 int opcode; 959 int err; 960 struct resp_res *res = qp->resp.res; 961 struct rxe_mr *mr; 962 963 if (!res) { 964 res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); 965 qp->resp.res = res; 966 } 967 968 if (res->state == rdatm_res_state_new) { 969 if (!res->replay) { 970 mr = qp->resp.mr; 971 qp->resp.mr = NULL; 972 } else { 973 mr = rxe_recheck_mr(qp, res->read.rkey); 974 if (!mr) 975 return RESPST_ERR_RKEY_VIOLATION; 976 } 977 978 if (res->read.resid <= mtu) 979 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; 980 else 981 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; 982 } else { 983 mr = rxe_recheck_mr(qp, res->read.rkey); 984 if (!mr) 985 return RESPST_ERR_RKEY_VIOLATION; 986 987 if (res->read.resid > mtu) 988 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; 989 else 990 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; 991 } 992 993 res->state = rdatm_res_state_next; 994 995 payload = min_t(int, res->read.resid, mtu); 996 997 skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, 998 res->cur_psn, AETH_ACK_UNLIMITED); 999 if (!skb) { 1000 if (mr) 1001 rxe_put(mr); 1002 return RESPST_ERR_RNR; 1003 } 1004 1005 err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), 1006 payload, RXE_FROM_MR_OBJ); 1007 if (mr) 1008 rxe_put(mr); 1009 if (err) { 1010 kfree_skb(skb); 1011 return RESPST_ERR_RKEY_VIOLATION; 1012 } 1013 1014 if (bth_pad(&ack_pkt)) { 1015 u8 *pad = payload_addr(&ack_pkt) + payload; 1016 1017 memset(pad, 0, bth_pad(&ack_pkt)); 1018 } 1019 1020 err = rxe_xmit_packet(qp, &ack_pkt, skb); 1021 if (err) 1022 return RESPST_ERR_RNR; 1023 1024 res->read.va += payload; 1025 res->read.resid -= payload; 1026 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; 1027 1028 if (res->read.resid > 0) { 1029 state = RESPST_DONE; 1030 } else { 1031 qp->resp.res = NULL; 1032 if (!res->replay) 1033 qp->resp.opcode = -1; 1034 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 1035 qp->resp.psn = res->cur_psn; 1036 state = RESPST_CLEANUP; 1037 } 1038 1039 return state; 1040 } 1041 1042 static int invalidate_rkey(struct rxe_qp *qp, u32 rkey) 1043 { 1044 if (rkey_is_mw(rkey)) 1045 return rxe_invalidate_mw(qp, rkey); 1046 else 1047 return rxe_invalidate_mr(qp, rkey); 1048 } 1049 1050 /* Executes a new request. A retried request never reach that function (send 1051 * and writes are discarded, and reads and atomics are retried elsewhere. 1052 */ 1053 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 1054 { 1055 enum resp_states err; 1056 struct sk_buff *skb = PKT_TO_SKB(pkt); 1057 union rdma_network_hdr hdr; 1058 1059 if (pkt->mask & RXE_SEND_MASK) { 1060 if (qp_type(qp) == IB_QPT_UD || 1061 qp_type(qp) == IB_QPT_GSI) { 1062 if (skb->protocol == htons(ETH_P_IP)) { 1063 memset(&hdr.reserved, 0, 1064 sizeof(hdr.reserved)); 1065 memcpy(&hdr.roce4grh, ip_hdr(skb), 1066 sizeof(hdr.roce4grh)); 1067 err = send_data_in(qp, &hdr, sizeof(hdr)); 1068 } else { 1069 err = send_data_in(qp, ipv6_hdr(skb), 1070 sizeof(hdr)); 1071 } 1072 if (err) 1073 return err; 1074 } 1075 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); 1076 if (err) 1077 return err; 1078 } else if (pkt->mask & RXE_WRITE_MASK) { 1079 err = write_data_in(qp, pkt); 1080 if (err) 1081 return err; 1082 } else if (pkt->mask & RXE_READ_MASK) { 1083 /* For RDMA Read we can increment the msn now. See C9-148. */ 1084 qp->resp.msn++; 1085 return RESPST_READ_REPLY; 1086 } else if (pkt->mask & RXE_ATOMIC_MASK) { 1087 return RESPST_ATOMIC_REPLY; 1088 } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { 1089 return RESPST_ATOMIC_WRITE_REPLY; 1090 } else if (pkt->mask & RXE_FLUSH_MASK) { 1091 return RESPST_PROCESS_FLUSH; 1092 } else { 1093 /* Unreachable */ 1094 WARN_ON_ONCE(1); 1095 } 1096 1097 if (pkt->mask & RXE_IETH_MASK) { 1098 u32 rkey = ieth_rkey(pkt); 1099 1100 err = invalidate_rkey(qp, rkey); 1101 if (err) 1102 return RESPST_ERR_INVALIDATE_RKEY; 1103 } 1104 1105 if (pkt->mask & RXE_END_MASK) 1106 /* We successfully processed this new request. */ 1107 qp->resp.msn++; 1108 1109 /* next expected psn, read handles this separately */ 1110 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 1111 qp->resp.ack_psn = qp->resp.psn; 1112 1113 qp->resp.opcode = pkt->opcode; 1114 qp->resp.status = IB_WC_SUCCESS; 1115 1116 if (pkt->mask & RXE_COMP_MASK) 1117 return RESPST_COMPLETE; 1118 else if (qp_type(qp) == IB_QPT_RC) 1119 return RESPST_ACKNOWLEDGE; 1120 else 1121 return RESPST_CLEANUP; 1122 } 1123 1124 static enum resp_states do_complete(struct rxe_qp *qp, 1125 struct rxe_pkt_info *pkt) 1126 { 1127 struct rxe_cqe cqe; 1128 struct ib_wc *wc = &cqe.ibwc; 1129 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1130 struct rxe_recv_wqe *wqe = qp->resp.wqe; 1131 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1132 1133 if (!wqe) 1134 goto finish; 1135 1136 memset(&cqe, 0, sizeof(cqe)); 1137 1138 if (qp->rcq->is_user) { 1139 uwc->status = qp->resp.status; 1140 uwc->qp_num = qp->ibqp.qp_num; 1141 uwc->wr_id = wqe->wr_id; 1142 } else { 1143 wc->status = qp->resp.status; 1144 wc->qp = &qp->ibqp; 1145 wc->wr_id = wqe->wr_id; 1146 } 1147 1148 if (wc->status == IB_WC_SUCCESS) { 1149 rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); 1150 wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 1151 pkt->mask & RXE_WRITE_MASK) ? 1152 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 1153 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && 1154 pkt->mask & RXE_WRITE_MASK) ? 1155 qp->resp.length : wqe->dma.length - wqe->dma.resid; 1156 1157 /* fields after byte_len are different between kernel and user 1158 * space 1159 */ 1160 if (qp->rcq->is_user) { 1161 uwc->wc_flags = IB_WC_GRH; 1162 1163 if (pkt->mask & RXE_IMMDT_MASK) { 1164 uwc->wc_flags |= IB_WC_WITH_IMM; 1165 uwc->ex.imm_data = immdt_imm(pkt); 1166 } 1167 1168 if (pkt->mask & RXE_IETH_MASK) { 1169 uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 1170 uwc->ex.invalidate_rkey = ieth_rkey(pkt); 1171 } 1172 1173 if (pkt->mask & RXE_DETH_MASK) 1174 uwc->src_qp = deth_sqp(pkt); 1175 1176 uwc->port_num = qp->attr.port_num; 1177 } else { 1178 struct sk_buff *skb = PKT_TO_SKB(pkt); 1179 1180 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; 1181 if (skb->protocol == htons(ETH_P_IP)) 1182 wc->network_hdr_type = RDMA_NETWORK_IPV4; 1183 else 1184 wc->network_hdr_type = RDMA_NETWORK_IPV6; 1185 1186 if (is_vlan_dev(skb->dev)) { 1187 wc->wc_flags |= IB_WC_WITH_VLAN; 1188 wc->vlan_id = vlan_dev_vlan_id(skb->dev); 1189 } 1190 1191 if (pkt->mask & RXE_IMMDT_MASK) { 1192 wc->wc_flags |= IB_WC_WITH_IMM; 1193 wc->ex.imm_data = immdt_imm(pkt); 1194 } 1195 1196 if (pkt->mask & RXE_IETH_MASK) { 1197 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 1198 wc->ex.invalidate_rkey = ieth_rkey(pkt); 1199 } 1200 1201 if (pkt->mask & RXE_DETH_MASK) 1202 wc->src_qp = deth_sqp(pkt); 1203 1204 wc->port_num = qp->attr.port_num; 1205 } 1206 } 1207 1208 /* have copy for srq and reference for !srq */ 1209 if (!qp->srq) 1210 queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); 1211 1212 qp->resp.wqe = NULL; 1213 1214 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) 1215 return RESPST_ERR_CQ_OVERFLOW; 1216 1217 finish: 1218 if (unlikely(qp->resp.state == QP_STATE_ERROR)) 1219 return RESPST_CHK_RESOURCE; 1220 if (unlikely(!pkt)) 1221 return RESPST_DONE; 1222 if (qp_type(qp) == IB_QPT_RC) 1223 return RESPST_ACKNOWLEDGE; 1224 else 1225 return RESPST_CLEANUP; 1226 } 1227 1228 1229 static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, 1230 int opcode, const char *msg) 1231 { 1232 int err; 1233 struct rxe_pkt_info ack_pkt; 1234 struct sk_buff *skb; 1235 1236 skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); 1237 if (!skb) 1238 return -ENOMEM; 1239 1240 err = rxe_xmit_packet(qp, &ack_pkt, skb); 1241 if (err) 1242 rxe_dbg_qp(qp, "Failed sending %s\n", msg); 1243 1244 return err; 1245 } 1246 1247 static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1248 { 1249 return send_common_ack(qp, syndrome, psn, 1250 IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); 1251 } 1252 1253 static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1254 { 1255 int ret = send_common_ack(qp, syndrome, psn, 1256 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); 1257 1258 /* have to clear this since it is used to trigger 1259 * long read replies 1260 */ 1261 qp->resp.res = NULL; 1262 return ret; 1263 } 1264 1265 static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1266 { 1267 int ret = send_common_ack(qp, syndrome, psn, 1268 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, 1269 "RDMA READ response of length zero ACK"); 1270 1271 /* have to clear this since it is used to trigger 1272 * long read replies 1273 */ 1274 qp->resp.res = NULL; 1275 return ret; 1276 } 1277 1278 static enum resp_states acknowledge(struct rxe_qp *qp, 1279 struct rxe_pkt_info *pkt) 1280 { 1281 if (qp_type(qp) != IB_QPT_RC) 1282 return RESPST_CLEANUP; 1283 1284 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) 1285 send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); 1286 else if (pkt->mask & RXE_ATOMIC_MASK) 1287 send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1288 else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) 1289 send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1290 else if (bth_ack(pkt)) 1291 send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1292 1293 return RESPST_CLEANUP; 1294 } 1295 1296 static enum resp_states cleanup(struct rxe_qp *qp, 1297 struct rxe_pkt_info *pkt) 1298 { 1299 struct sk_buff *skb; 1300 1301 if (pkt) { 1302 skb = skb_dequeue(&qp->req_pkts); 1303 rxe_put(qp); 1304 kfree_skb(skb); 1305 ib_device_put(qp->ibqp.device); 1306 } 1307 1308 if (qp->resp.mr) { 1309 rxe_put(qp->resp.mr); 1310 qp->resp.mr = NULL; 1311 } 1312 1313 return RESPST_DONE; 1314 } 1315 1316 static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) 1317 { 1318 int i; 1319 1320 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { 1321 struct resp_res *res = &qp->resp.resources[i]; 1322 1323 if (res->type == 0) 1324 continue; 1325 1326 if (psn_compare(psn, res->first_psn) >= 0 && 1327 psn_compare(psn, res->last_psn) <= 0) { 1328 return res; 1329 } 1330 } 1331 1332 return NULL; 1333 } 1334 1335 static enum resp_states duplicate_request(struct rxe_qp *qp, 1336 struct rxe_pkt_info *pkt) 1337 { 1338 enum resp_states rc; 1339 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; 1340 1341 if (pkt->mask & RXE_SEND_MASK || 1342 pkt->mask & RXE_WRITE_MASK) { 1343 /* SEND. Ack again and cleanup. C9-105. */ 1344 send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); 1345 return RESPST_CLEANUP; 1346 } else if (pkt->mask & RXE_FLUSH_MASK) { 1347 struct resp_res *res; 1348 1349 /* Find the operation in our list of responder resources. */ 1350 res = find_resource(qp, pkt->psn); 1351 if (res) { 1352 res->replay = 1; 1353 res->cur_psn = pkt->psn; 1354 qp->resp.res = res; 1355 rc = RESPST_PROCESS_FLUSH; 1356 goto out; 1357 } 1358 1359 /* Resource not found. Class D error. Drop the request. */ 1360 rc = RESPST_CLEANUP; 1361 goto out; 1362 } else if (pkt->mask & RXE_READ_MASK) { 1363 struct resp_res *res; 1364 1365 res = find_resource(qp, pkt->psn); 1366 if (!res) { 1367 /* Resource not found. Class D error. Drop the 1368 * request. 1369 */ 1370 rc = RESPST_CLEANUP; 1371 goto out; 1372 } else { 1373 /* Ensure this new request is the same as the previous 1374 * one or a subset of it. 1375 */ 1376 u64 iova = reth_va(pkt); 1377 u32 resid = reth_len(pkt); 1378 1379 if (iova < res->read.va_org || 1380 resid > res->read.length || 1381 (iova + resid) > (res->read.va_org + 1382 res->read.length)) { 1383 rc = RESPST_CLEANUP; 1384 goto out; 1385 } 1386 1387 if (reth_rkey(pkt) != res->read.rkey) { 1388 rc = RESPST_CLEANUP; 1389 goto out; 1390 } 1391 1392 res->cur_psn = pkt->psn; 1393 res->state = (pkt->psn == res->first_psn) ? 1394 rdatm_res_state_new : 1395 rdatm_res_state_replay; 1396 res->replay = 1; 1397 1398 /* Reset the resource, except length. */ 1399 res->read.va_org = iova; 1400 res->read.va = iova; 1401 res->read.resid = resid; 1402 1403 /* Replay the RDMA read reply. */ 1404 qp->resp.res = res; 1405 rc = RESPST_READ_REPLY; 1406 goto out; 1407 } 1408 } else { 1409 struct resp_res *res; 1410 1411 /* Find the operation in our list of responder resources. */ 1412 res = find_resource(qp, pkt->psn); 1413 if (res) { 1414 res->replay = 1; 1415 res->cur_psn = pkt->psn; 1416 qp->resp.res = res; 1417 rc = pkt->mask & RXE_ATOMIC_MASK ? 1418 RESPST_ATOMIC_REPLY : 1419 RESPST_ATOMIC_WRITE_REPLY; 1420 goto out; 1421 } 1422 1423 /* Resource not found. Class D error. Drop the request. */ 1424 rc = RESPST_CLEANUP; 1425 goto out; 1426 } 1427 out: 1428 return rc; 1429 } 1430 1431 /* Process a class A or C. Both are treated the same in this implementation. */ 1432 static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, 1433 enum ib_wc_status status) 1434 { 1435 qp->resp.aeth_syndrome = syndrome; 1436 qp->resp.status = status; 1437 1438 /* indicate that we should go through the ERROR state */ 1439 qp->resp.goto_error = 1; 1440 } 1441 1442 static enum resp_states do_class_d1e_error(struct rxe_qp *qp) 1443 { 1444 /* UC */ 1445 if (qp->srq) { 1446 /* Class E */ 1447 qp->resp.drop_msg = 1; 1448 if (qp->resp.wqe) { 1449 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1450 return RESPST_COMPLETE; 1451 } else { 1452 return RESPST_CLEANUP; 1453 } 1454 } else { 1455 /* Class D1. This packet may be the start of a 1456 * new message and could be valid. The previous 1457 * message is invalid and ignored. reset the 1458 * recv wr to its original state 1459 */ 1460 if (qp->resp.wqe) { 1461 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; 1462 qp->resp.wqe->dma.cur_sge = 0; 1463 qp->resp.wqe->dma.sge_offset = 0; 1464 qp->resp.opcode = -1; 1465 } 1466 1467 if (qp->resp.mr) { 1468 rxe_put(qp->resp.mr); 1469 qp->resp.mr = NULL; 1470 } 1471 1472 return RESPST_CLEANUP; 1473 } 1474 } 1475 1476 static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) 1477 { 1478 struct sk_buff *skb; 1479 struct rxe_queue *q = qp->rq.queue; 1480 1481 while ((skb = skb_dequeue(&qp->req_pkts))) { 1482 rxe_put(qp); 1483 kfree_skb(skb); 1484 ib_device_put(qp->ibqp.device); 1485 } 1486 1487 if (notify) 1488 return; 1489 1490 while (!qp->srq && q && queue_head(q, q->type)) 1491 queue_advance_consumer(q, q->type); 1492 } 1493 1494 int rxe_responder(void *arg) 1495 { 1496 struct rxe_qp *qp = (struct rxe_qp *)arg; 1497 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1498 enum resp_states state; 1499 struct rxe_pkt_info *pkt = NULL; 1500 int ret; 1501 1502 if (!rxe_get(qp)) 1503 return -EAGAIN; 1504 1505 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1506 1507 if (!qp->valid) 1508 goto exit; 1509 1510 switch (qp->resp.state) { 1511 case QP_STATE_RESET: 1512 state = RESPST_RESET; 1513 break; 1514 1515 default: 1516 state = RESPST_GET_REQ; 1517 break; 1518 } 1519 1520 while (1) { 1521 rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); 1522 switch (state) { 1523 case RESPST_GET_REQ: 1524 state = get_req(qp, &pkt); 1525 break; 1526 case RESPST_CHK_PSN: 1527 state = check_psn(qp, pkt); 1528 break; 1529 case RESPST_CHK_OP_SEQ: 1530 state = check_op_seq(qp, pkt); 1531 break; 1532 case RESPST_CHK_OP_VALID: 1533 state = check_op_valid(qp, pkt); 1534 break; 1535 case RESPST_CHK_RESOURCE: 1536 state = check_resource(qp, pkt); 1537 break; 1538 case RESPST_CHK_LENGTH: 1539 state = rxe_resp_check_length(qp, pkt); 1540 break; 1541 case RESPST_CHK_RKEY: 1542 state = check_rkey(qp, pkt); 1543 break; 1544 case RESPST_EXECUTE: 1545 state = execute(qp, pkt); 1546 break; 1547 case RESPST_COMPLETE: 1548 state = do_complete(qp, pkt); 1549 break; 1550 case RESPST_READ_REPLY: 1551 state = read_reply(qp, pkt); 1552 break; 1553 case RESPST_ATOMIC_REPLY: 1554 state = atomic_reply(qp, pkt); 1555 break; 1556 case RESPST_ATOMIC_WRITE_REPLY: 1557 state = atomic_write_reply(qp, pkt); 1558 break; 1559 case RESPST_PROCESS_FLUSH: 1560 state = process_flush(qp, pkt); 1561 break; 1562 case RESPST_ACKNOWLEDGE: 1563 state = acknowledge(qp, pkt); 1564 break; 1565 case RESPST_CLEANUP: 1566 state = cleanup(qp, pkt); 1567 break; 1568 case RESPST_DUPLICATE_REQUEST: 1569 state = duplicate_request(qp, pkt); 1570 break; 1571 case RESPST_ERR_PSN_OUT_OF_SEQ: 1572 /* RC only - Class B. Drop packet. */ 1573 send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); 1574 state = RESPST_CLEANUP; 1575 break; 1576 1577 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: 1578 case RESPST_ERR_MISSING_OPCODE_FIRST: 1579 case RESPST_ERR_MISSING_OPCODE_LAST_C: 1580 case RESPST_ERR_UNSUPPORTED_OPCODE: 1581 case RESPST_ERR_MISALIGNED_ATOMIC: 1582 /* RC Only - Class C. */ 1583 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1584 IB_WC_REM_INV_REQ_ERR); 1585 state = RESPST_COMPLETE; 1586 break; 1587 1588 case RESPST_ERR_MISSING_OPCODE_LAST_D1E: 1589 state = do_class_d1e_error(qp); 1590 break; 1591 case RESPST_ERR_RNR: 1592 if (qp_type(qp) == IB_QPT_RC) { 1593 rxe_counter_inc(rxe, RXE_CNT_SND_RNR); 1594 /* RC - class B */ 1595 send_ack(qp, AETH_RNR_NAK | 1596 (~AETH_TYPE_MASK & 1597 qp->attr.min_rnr_timer), 1598 pkt->psn); 1599 } else { 1600 /* UD/UC - class D */ 1601 qp->resp.drop_msg = 1; 1602 } 1603 state = RESPST_CLEANUP; 1604 break; 1605 1606 case RESPST_ERR_RKEY_VIOLATION: 1607 if (qp_type(qp) == IB_QPT_RC) { 1608 /* Class C */ 1609 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, 1610 IB_WC_REM_ACCESS_ERR); 1611 state = RESPST_COMPLETE; 1612 } else { 1613 qp->resp.drop_msg = 1; 1614 if (qp->srq) { 1615 /* UC/SRQ Class D */ 1616 qp->resp.status = IB_WC_REM_ACCESS_ERR; 1617 state = RESPST_COMPLETE; 1618 } else { 1619 /* UC/non-SRQ Class E. */ 1620 state = RESPST_CLEANUP; 1621 } 1622 } 1623 break; 1624 1625 case RESPST_ERR_INVALIDATE_RKEY: 1626 /* RC - Class J. */ 1627 qp->resp.goto_error = 1; 1628 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1629 state = RESPST_COMPLETE; 1630 break; 1631 1632 case RESPST_ERR_LENGTH: 1633 if (qp_type(qp) == IB_QPT_RC) { 1634 /* Class C */ 1635 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1636 IB_WC_REM_INV_REQ_ERR); 1637 state = RESPST_COMPLETE; 1638 } else if (qp->srq) { 1639 /* UC/UD - class E */ 1640 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1641 state = RESPST_COMPLETE; 1642 } else { 1643 /* UC/UD - class D */ 1644 qp->resp.drop_msg = 1; 1645 state = RESPST_CLEANUP; 1646 } 1647 break; 1648 1649 case RESPST_ERR_MALFORMED_WQE: 1650 /* All, Class A. */ 1651 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, 1652 IB_WC_LOC_QP_OP_ERR); 1653 state = RESPST_COMPLETE; 1654 break; 1655 1656 case RESPST_ERR_CQ_OVERFLOW: 1657 /* All - Class G */ 1658 state = RESPST_ERROR; 1659 break; 1660 1661 case RESPST_DONE: 1662 if (qp->resp.goto_error) { 1663 state = RESPST_ERROR; 1664 break; 1665 } 1666 1667 goto done; 1668 1669 case RESPST_EXIT: 1670 if (qp->resp.goto_error) { 1671 state = RESPST_ERROR; 1672 break; 1673 } 1674 1675 goto exit; 1676 1677 case RESPST_RESET: 1678 rxe_drain_req_pkts(qp, false); 1679 qp->resp.wqe = NULL; 1680 goto exit; 1681 1682 case RESPST_ERROR: 1683 qp->resp.goto_error = 0; 1684 rxe_dbg_qp(qp, "moved to error state\n"); 1685 rxe_qp_error(qp); 1686 goto exit; 1687 1688 default: 1689 WARN_ON_ONCE(1); 1690 } 1691 } 1692 1693 /* A non-zero return value will cause rxe_do_task to 1694 * exit its loop and end the tasklet. A zero return 1695 * will continue looping and return to rxe_responder 1696 */ 1697 done: 1698 ret = 0; 1699 goto out; 1700 exit: 1701 ret = -EAGAIN; 1702 out: 1703 rxe_put(qp); 1704 return ret; 1705 } 1706