1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include <linux/skbuff.h> 8 9 #include "rxe.h" 10 #include "rxe_loc.h" 11 #include "rxe_queue.h" 12 13 static char *resp_state_name[] = { 14 [RESPST_NONE] = "NONE", 15 [RESPST_GET_REQ] = "GET_REQ", 16 [RESPST_CHK_PSN] = "CHK_PSN", 17 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", 18 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", 19 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", 20 [RESPST_CHK_LENGTH] = "CHK_LENGTH", 21 [RESPST_CHK_RKEY] = "CHK_RKEY", 22 [RESPST_EXECUTE] = "EXECUTE", 23 [RESPST_READ_REPLY] = "READ_REPLY", 24 [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", 25 [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", 26 [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", 27 [RESPST_COMPLETE] = "COMPLETE", 28 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", 29 [RESPST_CLEANUP] = "CLEANUP", 30 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", 31 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", 32 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", 33 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", 34 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", 35 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", 36 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", 37 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", 38 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", 39 [RESPST_ERR_RNR] = "ERR_RNR", 40 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", 41 [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION", 42 [RESPST_ERR_LENGTH] = "ERR_LENGTH", 43 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", 44 [RESPST_ERROR] = "ERROR", 45 [RESPST_DONE] = "DONE", 46 [RESPST_EXIT] = "EXIT", 47 }; 48 49 /* rxe_recv calls here to add a request packet to the input queue */ 50 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) 51 { 52 skb_queue_tail(&qp->req_pkts, skb); 53 rxe_sched_task(&qp->recv_task); 54 } 55 56 static inline enum resp_states get_req(struct rxe_qp *qp, 57 struct rxe_pkt_info **pkt_p) 58 { 59 struct sk_buff *skb; 60 61 skb = skb_peek(&qp->req_pkts); 62 if (!skb) 63 return RESPST_EXIT; 64 65 *pkt_p = SKB_TO_PKT(skb); 66 67 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; 68 } 69 70 static enum resp_states check_psn(struct rxe_qp *qp, 71 struct rxe_pkt_info *pkt) 72 { 73 int diff = psn_compare(pkt->psn, qp->resp.psn); 74 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 75 76 switch (qp_type(qp)) { 77 case IB_QPT_RC: 78 if (diff > 0) { 79 if (qp->resp.sent_psn_nak) 80 return RESPST_CLEANUP; 81 82 qp->resp.sent_psn_nak = 1; 83 rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ); 84 return RESPST_ERR_PSN_OUT_OF_SEQ; 85 86 } else if (diff < 0) { 87 rxe_counter_inc(rxe, RXE_CNT_DUP_REQ); 88 return RESPST_DUPLICATE_REQUEST; 89 } 90 91 if (qp->resp.sent_psn_nak) 92 qp->resp.sent_psn_nak = 0; 93 94 break; 95 96 case IB_QPT_UC: 97 if (qp->resp.drop_msg || diff != 0) { 98 if (pkt->mask & RXE_START_MASK) { 99 qp->resp.drop_msg = 0; 100 return RESPST_CHK_OP_SEQ; 101 } 102 103 qp->resp.drop_msg = 1; 104 return RESPST_CLEANUP; 105 } 106 break; 107 default: 108 break; 109 } 110 111 return RESPST_CHK_OP_SEQ; 112 } 113 114 static enum resp_states check_op_seq(struct rxe_qp *qp, 115 struct rxe_pkt_info *pkt) 116 { 117 switch (qp_type(qp)) { 118 case IB_QPT_RC: 119 switch (qp->resp.opcode) { 120 case IB_OPCODE_RC_SEND_FIRST: 121 case IB_OPCODE_RC_SEND_MIDDLE: 122 switch (pkt->opcode) { 123 case IB_OPCODE_RC_SEND_MIDDLE: 124 case IB_OPCODE_RC_SEND_LAST: 125 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 126 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 127 return RESPST_CHK_OP_VALID; 128 default: 129 return RESPST_ERR_MISSING_OPCODE_LAST_C; 130 } 131 132 case IB_OPCODE_RC_RDMA_WRITE_FIRST: 133 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 134 switch (pkt->opcode) { 135 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 136 case IB_OPCODE_RC_RDMA_WRITE_LAST: 137 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 138 return RESPST_CHK_OP_VALID; 139 default: 140 return RESPST_ERR_MISSING_OPCODE_LAST_C; 141 } 142 143 default: 144 switch (pkt->opcode) { 145 case IB_OPCODE_RC_SEND_MIDDLE: 146 case IB_OPCODE_RC_SEND_LAST: 147 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 148 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 149 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 150 case IB_OPCODE_RC_RDMA_WRITE_LAST: 151 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 152 return RESPST_ERR_MISSING_OPCODE_FIRST; 153 default: 154 return RESPST_CHK_OP_VALID; 155 } 156 } 157 break; 158 159 case IB_QPT_UC: 160 switch (qp->resp.opcode) { 161 case IB_OPCODE_UC_SEND_FIRST: 162 case IB_OPCODE_UC_SEND_MIDDLE: 163 switch (pkt->opcode) { 164 case IB_OPCODE_UC_SEND_MIDDLE: 165 case IB_OPCODE_UC_SEND_LAST: 166 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 167 return RESPST_CHK_OP_VALID; 168 default: 169 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 170 } 171 172 case IB_OPCODE_UC_RDMA_WRITE_FIRST: 173 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 174 switch (pkt->opcode) { 175 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 176 case IB_OPCODE_UC_RDMA_WRITE_LAST: 177 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 178 return RESPST_CHK_OP_VALID; 179 default: 180 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 181 } 182 183 default: 184 switch (pkt->opcode) { 185 case IB_OPCODE_UC_SEND_MIDDLE: 186 case IB_OPCODE_UC_SEND_LAST: 187 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 188 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 189 case IB_OPCODE_UC_RDMA_WRITE_LAST: 190 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 191 qp->resp.drop_msg = 1; 192 return RESPST_CLEANUP; 193 default: 194 return RESPST_CHK_OP_VALID; 195 } 196 } 197 break; 198 199 default: 200 return RESPST_CHK_OP_VALID; 201 } 202 } 203 204 static bool check_qp_attr_access(struct rxe_qp *qp, 205 struct rxe_pkt_info *pkt) 206 { 207 if (((pkt->mask & RXE_READ_MASK) && 208 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || 209 ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && 210 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || 211 ((pkt->mask & RXE_ATOMIC_MASK) && 212 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 213 return false; 214 215 if (pkt->mask & RXE_FLUSH_MASK) { 216 u32 flush_type = feth_plt(pkt); 217 218 if ((flush_type & IB_FLUSH_GLOBAL && 219 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || 220 (flush_type & IB_FLUSH_PERSISTENT && 221 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) 222 return false; 223 } 224 225 return true; 226 } 227 228 static enum resp_states check_op_valid(struct rxe_qp *qp, 229 struct rxe_pkt_info *pkt) 230 { 231 switch (qp_type(qp)) { 232 case IB_QPT_RC: 233 if (!check_qp_attr_access(qp, pkt)) 234 return RESPST_ERR_UNSUPPORTED_OPCODE; 235 236 break; 237 238 case IB_QPT_UC: 239 if ((pkt->mask & RXE_WRITE_MASK) && 240 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { 241 qp->resp.drop_msg = 1; 242 return RESPST_CLEANUP; 243 } 244 245 break; 246 247 case IB_QPT_UD: 248 case IB_QPT_GSI: 249 break; 250 251 default: 252 WARN_ON_ONCE(1); 253 break; 254 } 255 256 return RESPST_CHK_RESOURCE; 257 } 258 259 static enum resp_states get_srq_wqe(struct rxe_qp *qp) 260 { 261 struct rxe_srq *srq = qp->srq; 262 struct rxe_queue *q = srq->rq.queue; 263 struct rxe_recv_wqe *wqe; 264 struct ib_event ev; 265 unsigned int count; 266 size_t size; 267 unsigned long flags; 268 269 if (srq->error) 270 return RESPST_ERR_RNR; 271 272 spin_lock_irqsave(&srq->rq.consumer_lock, flags); 273 274 wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 275 if (!wqe) { 276 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 277 return RESPST_ERR_RNR; 278 } 279 280 /* don't trust user space data */ 281 if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { 282 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 283 rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); 284 return RESPST_ERR_MALFORMED_WQE; 285 } 286 size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); 287 memcpy(&qp->resp.srq_wqe, wqe, size); 288 289 qp->resp.wqe = &qp->resp.srq_wqe.wqe; 290 queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); 291 count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); 292 293 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { 294 srq->limit = 0; 295 goto event; 296 } 297 298 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 299 return RESPST_CHK_LENGTH; 300 301 event: 302 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 303 ev.device = qp->ibqp.device; 304 ev.element.srq = qp->ibqp.srq; 305 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 306 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); 307 return RESPST_CHK_LENGTH; 308 } 309 310 static enum resp_states check_resource(struct rxe_qp *qp, 311 struct rxe_pkt_info *pkt) 312 { 313 struct rxe_srq *srq = qp->srq; 314 315 if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { 316 /* it is the requesters job to not send 317 * too many read/atomic ops, we just 318 * recycle the responder resource queue 319 */ 320 if (likely(qp->attr.max_dest_rd_atomic > 0)) 321 return RESPST_CHK_LENGTH; 322 else 323 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; 324 } 325 326 if (pkt->mask & RXE_RWR_MASK) { 327 if (srq) 328 return get_srq_wqe(qp); 329 330 qp->resp.wqe = queue_head(qp->rq.queue, 331 QUEUE_TYPE_FROM_CLIENT); 332 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 333 } 334 335 return RESPST_CHK_LENGTH; 336 } 337 338 static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, 339 struct rxe_pkt_info *pkt) 340 { 341 /* 342 * See IBA C9-92 343 * For UD QPs we only check if the packet will fit in the 344 * receive buffer later. For rmda operations additional 345 * length checks are performed in check_rkey. 346 */ 347 if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || 348 (qp_type(qp) == IB_QPT_UC))) { 349 unsigned int mtu = qp->mtu; 350 unsigned int payload = payload_size(pkt); 351 352 if ((pkt->mask & RXE_START_MASK) && 353 (pkt->mask & RXE_END_MASK)) { 354 if (unlikely(payload > mtu)) { 355 rxe_dbg_qp(qp, "only packet too long\n"); 356 return RESPST_ERR_LENGTH; 357 } 358 } else if ((pkt->mask & RXE_START_MASK) || 359 (pkt->mask & RXE_MIDDLE_MASK)) { 360 if (unlikely(payload != mtu)) { 361 rxe_dbg_qp(qp, "first or middle packet not mtu\n"); 362 return RESPST_ERR_LENGTH; 363 } 364 } else if (pkt->mask & RXE_END_MASK) { 365 if (unlikely((payload == 0) || (payload > mtu))) { 366 rxe_dbg_qp(qp, "last packet zero or too long\n"); 367 return RESPST_ERR_LENGTH; 368 } 369 } 370 } 371 372 /* See IBA C9-94 */ 373 if (pkt->mask & RXE_RETH_MASK) { 374 if (reth_len(pkt) > (1U << 31)) { 375 rxe_dbg_qp(qp, "dma length too long\n"); 376 return RESPST_ERR_LENGTH; 377 } 378 } 379 380 if (pkt->mask & RXE_RDMA_OP_MASK) 381 return RESPST_CHK_RKEY; 382 else 383 return RESPST_EXECUTE; 384 } 385 386 /* if the reth length field is zero we can assume nothing 387 * about the rkey value and should not validate or use it. 388 * Instead set qp->resp.rkey to 0 which is an invalid rkey 389 * value since the minimum index part is 1. 390 */ 391 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 392 { 393 unsigned int length = reth_len(pkt); 394 395 qp->resp.va = reth_va(pkt); 396 qp->resp.offset = 0; 397 qp->resp.resid = length; 398 qp->resp.length = length; 399 if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0) 400 qp->resp.rkey = 0; 401 else 402 qp->resp.rkey = reth_rkey(pkt); 403 } 404 405 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 406 { 407 qp->resp.va = atmeth_va(pkt); 408 qp->resp.offset = 0; 409 qp->resp.rkey = atmeth_rkey(pkt); 410 qp->resp.resid = sizeof(u64); 411 } 412 413 /* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL 414 * if an invalid rkey is received or the rdma length is zero. For middle 415 * or last packets use the stored value of mr. 416 */ 417 static enum resp_states check_rkey(struct rxe_qp *qp, 418 struct rxe_pkt_info *pkt) 419 { 420 struct rxe_mr *mr = NULL; 421 struct rxe_mw *mw = NULL; 422 u64 va; 423 u32 rkey; 424 u32 resid; 425 u32 pktlen; 426 int mtu = qp->mtu; 427 enum resp_states state; 428 int access = 0; 429 430 /* parse RETH or ATMETH header for first/only packets 431 * for va, length, rkey, etc. or use current value for 432 * middle/last packets. 433 */ 434 if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 435 if (pkt->mask & RXE_RETH_MASK) 436 qp_resp_from_reth(qp, pkt); 437 438 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ 439 : IB_ACCESS_REMOTE_WRITE; 440 } else if (pkt->mask & RXE_FLUSH_MASK) { 441 u32 flush_type = feth_plt(pkt); 442 443 if (pkt->mask & RXE_RETH_MASK) 444 qp_resp_from_reth(qp, pkt); 445 446 if (flush_type & IB_FLUSH_GLOBAL) 447 access |= IB_ACCESS_FLUSH_GLOBAL; 448 if (flush_type & IB_FLUSH_PERSISTENT) 449 access |= IB_ACCESS_FLUSH_PERSISTENT; 450 } else if (pkt->mask & RXE_ATOMIC_MASK) { 451 qp_resp_from_atmeth(qp, pkt); 452 access = IB_ACCESS_REMOTE_ATOMIC; 453 } else { 454 /* shouldn't happen */ 455 WARN_ON(1); 456 } 457 458 /* A zero-byte read or write op is not required to 459 * set an addr or rkey. See C9-88 460 */ 461 if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && 462 (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { 463 qp->resp.mr = NULL; 464 return RESPST_EXECUTE; 465 } 466 467 va = qp->resp.va; 468 rkey = qp->resp.rkey; 469 resid = qp->resp.resid; 470 pktlen = payload_size(pkt); 471 472 if (rkey_is_mw(rkey)) { 473 mw = rxe_lookup_mw(qp, access, rkey); 474 if (!mw) { 475 rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); 476 state = RESPST_ERR_RKEY_VIOLATION; 477 goto err; 478 } 479 480 mr = mw->mr; 481 if (!mr) { 482 rxe_dbg_qp(qp, "MW doesn't have an MR\n"); 483 state = RESPST_ERR_RKEY_VIOLATION; 484 goto err; 485 } 486 487 if (mw->access & IB_ZERO_BASED) 488 qp->resp.offset = mw->addr; 489 490 rxe_get(mr); 491 rxe_put(mw); 492 mw = NULL; 493 } else { 494 mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); 495 if (!mr) { 496 rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); 497 state = RESPST_ERR_RKEY_VIOLATION; 498 goto err; 499 } 500 } 501 502 if (pkt->mask & RXE_FLUSH_MASK) { 503 /* FLUSH MR may not set va or resid 504 * no need to check range since we will flush whole mr 505 */ 506 if (feth_sel(pkt) == IB_FLUSH_MR) 507 goto skip_check_range; 508 } 509 510 if (mr_check_range(mr, va + qp->resp.offset, resid)) { 511 state = RESPST_ERR_RKEY_VIOLATION; 512 goto err; 513 } 514 515 skip_check_range: 516 if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 517 if (resid > mtu) { 518 if (pktlen != mtu || bth_pad(pkt)) { 519 state = RESPST_ERR_LENGTH; 520 goto err; 521 } 522 } else { 523 if (pktlen != resid) { 524 state = RESPST_ERR_LENGTH; 525 goto err; 526 } 527 if ((bth_pad(pkt) != (0x3 & (-resid)))) { 528 /* This case may not be exactly that 529 * but nothing else fits. 530 */ 531 state = RESPST_ERR_LENGTH; 532 goto err; 533 } 534 } 535 } 536 537 WARN_ON_ONCE(qp->resp.mr); 538 539 qp->resp.mr = mr; 540 return RESPST_EXECUTE; 541 542 err: 543 qp->resp.mr = NULL; 544 if (mr) 545 rxe_put(mr); 546 if (mw) 547 rxe_put(mw); 548 549 return state; 550 } 551 552 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, 553 int data_len) 554 { 555 int err; 556 557 err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, 558 data_addr, data_len, RXE_TO_MR_OBJ); 559 if (unlikely(err)) 560 return (err == -ENOSPC) ? RESPST_ERR_LENGTH 561 : RESPST_ERR_MALFORMED_WQE; 562 563 return RESPST_NONE; 564 } 565 566 static enum resp_states write_data_in(struct rxe_qp *qp, 567 struct rxe_pkt_info *pkt) 568 { 569 enum resp_states rc = RESPST_NONE; 570 int err; 571 int data_len = payload_size(pkt); 572 573 err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, 574 payload_addr(pkt), data_len, RXE_TO_MR_OBJ); 575 if (err) { 576 rc = RESPST_ERR_RKEY_VIOLATION; 577 goto out; 578 } 579 580 qp->resp.va += data_len; 581 qp->resp.resid -= data_len; 582 583 out: 584 return rc; 585 } 586 587 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, 588 struct rxe_pkt_info *pkt, 589 int type) 590 { 591 struct resp_res *res; 592 u32 pkts; 593 594 res = &qp->resp.resources[qp->resp.res_head]; 595 rxe_advance_resp_resource(qp); 596 free_rd_atomic_resource(res); 597 598 res->type = type; 599 res->replay = 0; 600 601 switch (type) { 602 case RXE_READ_MASK: 603 res->read.va = qp->resp.va + qp->resp.offset; 604 res->read.va_org = qp->resp.va + qp->resp.offset; 605 res->read.resid = qp->resp.resid; 606 res->read.length = qp->resp.resid; 607 res->read.rkey = qp->resp.rkey; 608 609 pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); 610 res->first_psn = pkt->psn; 611 res->cur_psn = pkt->psn; 612 res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; 613 614 res->state = rdatm_res_state_new; 615 break; 616 case RXE_ATOMIC_MASK: 617 case RXE_ATOMIC_WRITE_MASK: 618 res->first_psn = pkt->psn; 619 res->last_psn = pkt->psn; 620 res->cur_psn = pkt->psn; 621 break; 622 case RXE_FLUSH_MASK: 623 res->flush.va = qp->resp.va + qp->resp.offset; 624 res->flush.length = qp->resp.length; 625 res->flush.type = feth_plt(pkt); 626 res->flush.level = feth_sel(pkt); 627 } 628 629 return res; 630 } 631 632 static enum resp_states process_flush(struct rxe_qp *qp, 633 struct rxe_pkt_info *pkt) 634 { 635 u64 length, start; 636 struct rxe_mr *mr = qp->resp.mr; 637 struct resp_res *res = qp->resp.res; 638 639 /* oA19-14, oA19-15 */ 640 if (res && res->replay) 641 return RESPST_ACKNOWLEDGE; 642 else if (!res) { 643 res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); 644 qp->resp.res = res; 645 } 646 647 if (res->flush.level == IB_FLUSH_RANGE) { 648 start = res->flush.va; 649 length = res->flush.length; 650 } else { /* level == IB_FLUSH_MR */ 651 start = mr->ibmr.iova; 652 length = mr->ibmr.length; 653 } 654 655 if (res->flush.type & IB_FLUSH_PERSISTENT) { 656 if (rxe_flush_pmem_iova(mr, start, length)) 657 return RESPST_ERR_RKEY_VIOLATION; 658 /* Make data persistent. */ 659 wmb(); 660 } else if (res->flush.type & IB_FLUSH_GLOBAL) { 661 /* Make data global visibility. */ 662 wmb(); 663 } 664 665 qp->resp.msn++; 666 667 /* next expected psn, read handles this separately */ 668 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 669 qp->resp.ack_psn = qp->resp.psn; 670 671 qp->resp.opcode = pkt->opcode; 672 qp->resp.status = IB_WC_SUCCESS; 673 674 return RESPST_ACKNOWLEDGE; 675 } 676 677 static enum resp_states atomic_reply(struct rxe_qp *qp, 678 struct rxe_pkt_info *pkt) 679 { 680 struct rxe_mr *mr = qp->resp.mr; 681 struct resp_res *res = qp->resp.res; 682 int err; 683 684 if (!res) { 685 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); 686 qp->resp.res = res; 687 } 688 689 if (!res->replay) { 690 u64 iova = qp->resp.va + qp->resp.offset; 691 692 err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, 693 atmeth_comp(pkt), 694 atmeth_swap_add(pkt), 695 &res->atomic.orig_val); 696 if (err) 697 return err; 698 699 qp->resp.msn++; 700 701 /* next expected psn, read handles this separately */ 702 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 703 qp->resp.ack_psn = qp->resp.psn; 704 705 qp->resp.opcode = pkt->opcode; 706 qp->resp.status = IB_WC_SUCCESS; 707 } 708 709 return RESPST_ACKNOWLEDGE; 710 } 711 712 static enum resp_states atomic_write_reply(struct rxe_qp *qp, 713 struct rxe_pkt_info *pkt) 714 { 715 struct resp_res *res = qp->resp.res; 716 struct rxe_mr *mr; 717 u64 value; 718 u64 iova; 719 int err; 720 721 if (!res) { 722 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); 723 qp->resp.res = res; 724 } 725 726 if (res->replay) 727 return RESPST_ACKNOWLEDGE; 728 729 mr = qp->resp.mr; 730 value = *(u64 *)payload_addr(pkt); 731 iova = qp->resp.va + qp->resp.offset; 732 733 err = rxe_mr_do_atomic_write(mr, iova, value); 734 if (err) 735 return err; 736 737 qp->resp.resid = 0; 738 qp->resp.msn++; 739 740 /* next expected psn, read handles this separately */ 741 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 742 qp->resp.ack_psn = qp->resp.psn; 743 744 qp->resp.opcode = pkt->opcode; 745 qp->resp.status = IB_WC_SUCCESS; 746 747 return RESPST_ACKNOWLEDGE; 748 } 749 750 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, 751 struct rxe_pkt_info *ack, 752 int opcode, 753 int payload, 754 u32 psn, 755 u8 syndrome) 756 { 757 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 758 struct sk_buff *skb; 759 int paylen; 760 int pad; 761 int err; 762 763 /* 764 * allocate packet 765 */ 766 pad = (-payload) & 0x3; 767 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 768 769 skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack); 770 if (!skb) 771 return NULL; 772 773 ack->qp = qp; 774 ack->opcode = opcode; 775 ack->mask = rxe_opcode[opcode].mask; 776 ack->paylen = paylen; 777 ack->psn = psn; 778 779 bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL, 780 qp->attr.dest_qp_num, 0, psn); 781 782 if (ack->mask & RXE_AETH_MASK) { 783 aeth_set_syn(ack, syndrome); 784 aeth_set_msn(ack, qp->resp.msn); 785 } 786 787 if (ack->mask & RXE_ATMACK_MASK) 788 atmack_set_orig(ack, qp->resp.res->atomic.orig_val); 789 790 err = rxe_prepare(&qp->pri_av, ack, skb); 791 if (err) { 792 kfree_skb(skb); 793 return NULL; 794 } 795 796 return skb; 797 } 798 799 /** 800 * rxe_recheck_mr - revalidate MR from rkey and get a reference 801 * @qp: the qp 802 * @rkey: the rkey 803 * 804 * This code allows the MR to be invalidated or deregistered or 805 * the MW if one was used to be invalidated or deallocated. 806 * It is assumed that the access permissions if originally good 807 * are OK and the mappings to be unchanged. 808 * 809 * TODO: If someone reregisters an MR to change its size or 810 * access permissions during the processing of an RDMA read 811 * we should kill the responder resource and complete the 812 * operation with an error. 813 * 814 * Return: mr on success else NULL 815 */ 816 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) 817 { 818 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 819 struct rxe_mr *mr; 820 struct rxe_mw *mw; 821 822 if (rkey_is_mw(rkey)) { 823 mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); 824 if (!mw) 825 return NULL; 826 827 mr = mw->mr; 828 if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || 829 !mr || mr->state != RXE_MR_STATE_VALID) { 830 rxe_put(mw); 831 return NULL; 832 } 833 834 rxe_get(mr); 835 rxe_put(mw); 836 837 return mr; 838 } 839 840 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 841 if (!mr) 842 return NULL; 843 844 if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { 845 rxe_put(mr); 846 return NULL; 847 } 848 849 return mr; 850 } 851 852 /* RDMA read response. If res is not NULL, then we have a current RDMA request 853 * being processed or replayed. 854 */ 855 static enum resp_states read_reply(struct rxe_qp *qp, 856 struct rxe_pkt_info *req_pkt) 857 { 858 struct rxe_pkt_info ack_pkt; 859 struct sk_buff *skb; 860 int mtu = qp->mtu; 861 enum resp_states state; 862 int payload; 863 int opcode; 864 int err; 865 struct resp_res *res = qp->resp.res; 866 struct rxe_mr *mr; 867 868 if (!res) { 869 res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); 870 qp->resp.res = res; 871 } 872 873 if (res->state == rdatm_res_state_new) { 874 if (!res->replay || qp->resp.length == 0) { 875 /* if length == 0 mr will be NULL (is ok) 876 * otherwise qp->resp.mr holds a ref on mr 877 * which we transfer to mr and drop below. 878 */ 879 mr = qp->resp.mr; 880 qp->resp.mr = NULL; 881 } else { 882 mr = rxe_recheck_mr(qp, res->read.rkey); 883 if (!mr) 884 return RESPST_ERR_RKEY_VIOLATION; 885 } 886 887 if (res->read.resid <= mtu) 888 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; 889 else 890 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; 891 } else { 892 /* re-lookup mr from rkey on all later packets. 893 * length will be non-zero. This can fail if someone 894 * modifies or destroys the mr since the first packet. 895 */ 896 mr = rxe_recheck_mr(qp, res->read.rkey); 897 if (!mr) 898 return RESPST_ERR_RKEY_VIOLATION; 899 900 if (res->read.resid > mtu) 901 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; 902 else 903 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; 904 } 905 906 res->state = rdatm_res_state_next; 907 908 payload = min_t(int, res->read.resid, mtu); 909 910 skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, 911 res->cur_psn, AETH_ACK_UNLIMITED); 912 if (!skb) { 913 state = RESPST_ERR_RNR; 914 goto err_out; 915 } 916 917 err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), 918 payload, RXE_FROM_MR_OBJ); 919 if (err) { 920 kfree_skb(skb); 921 state = RESPST_ERR_RKEY_VIOLATION; 922 goto err_out; 923 } 924 925 if (bth_pad(&ack_pkt)) { 926 u8 *pad = payload_addr(&ack_pkt) + payload; 927 928 memset(pad, 0, bth_pad(&ack_pkt)); 929 } 930 931 /* rxe_xmit_packet always consumes the skb */ 932 err = rxe_xmit_packet(qp, &ack_pkt, skb); 933 if (err) { 934 state = RESPST_ERR_RNR; 935 goto err_out; 936 } 937 938 res->read.va += payload; 939 res->read.resid -= payload; 940 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; 941 942 if (res->read.resid > 0) { 943 state = RESPST_DONE; 944 } else { 945 qp->resp.res = NULL; 946 if (!res->replay) 947 qp->resp.opcode = -1; 948 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 949 qp->resp.psn = res->cur_psn; 950 state = RESPST_CLEANUP; 951 } 952 953 err_out: 954 if (mr) 955 rxe_put(mr); 956 return state; 957 } 958 959 static int invalidate_rkey(struct rxe_qp *qp, u32 rkey) 960 { 961 if (rkey_is_mw(rkey)) 962 return rxe_invalidate_mw(qp, rkey); 963 else 964 return rxe_invalidate_mr(qp, rkey); 965 } 966 967 /* Executes a new request. A retried request never reach that function (send 968 * and writes are discarded, and reads and atomics are retried elsewhere. 969 */ 970 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 971 { 972 enum resp_states err; 973 struct sk_buff *skb = PKT_TO_SKB(pkt); 974 union rdma_network_hdr hdr; 975 976 if (pkt->mask & RXE_SEND_MASK) { 977 if (qp_type(qp) == IB_QPT_UD || 978 qp_type(qp) == IB_QPT_GSI) { 979 if (skb->protocol == htons(ETH_P_IP)) { 980 memset(&hdr.reserved, 0, 981 sizeof(hdr.reserved)); 982 memcpy(&hdr.roce4grh, ip_hdr(skb), 983 sizeof(hdr.roce4grh)); 984 err = send_data_in(qp, &hdr, sizeof(hdr)); 985 } else { 986 err = send_data_in(qp, ipv6_hdr(skb), 987 sizeof(hdr)); 988 } 989 if (err) 990 return err; 991 } 992 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); 993 if (err) 994 return err; 995 } else if (pkt->mask & RXE_WRITE_MASK) { 996 err = write_data_in(qp, pkt); 997 if (err) 998 return err; 999 } else if (pkt->mask & RXE_READ_MASK) { 1000 /* For RDMA Read we can increment the msn now. See C9-148. */ 1001 qp->resp.msn++; 1002 return RESPST_READ_REPLY; 1003 } else if (pkt->mask & RXE_ATOMIC_MASK) { 1004 return RESPST_ATOMIC_REPLY; 1005 } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { 1006 return RESPST_ATOMIC_WRITE_REPLY; 1007 } else if (pkt->mask & RXE_FLUSH_MASK) { 1008 return RESPST_PROCESS_FLUSH; 1009 } else { 1010 /* Unreachable */ 1011 WARN_ON_ONCE(1); 1012 } 1013 1014 if (pkt->mask & RXE_IETH_MASK) { 1015 u32 rkey = ieth_rkey(pkt); 1016 1017 err = invalidate_rkey(qp, rkey); 1018 if (err) 1019 return RESPST_ERR_INVALIDATE_RKEY; 1020 } 1021 1022 if (pkt->mask & RXE_END_MASK) 1023 /* We successfully processed this new request. */ 1024 qp->resp.msn++; 1025 1026 /* next expected psn, read handles this separately */ 1027 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 1028 qp->resp.ack_psn = qp->resp.psn; 1029 1030 qp->resp.opcode = pkt->opcode; 1031 qp->resp.status = IB_WC_SUCCESS; 1032 1033 if (pkt->mask & RXE_COMP_MASK) 1034 return RESPST_COMPLETE; 1035 else if (qp_type(qp) == IB_QPT_RC) 1036 return RESPST_ACKNOWLEDGE; 1037 else 1038 return RESPST_CLEANUP; 1039 } 1040 1041 static enum resp_states do_complete(struct rxe_qp *qp, 1042 struct rxe_pkt_info *pkt) 1043 { 1044 struct rxe_cqe cqe; 1045 struct ib_wc *wc = &cqe.ibwc; 1046 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1047 struct rxe_recv_wqe *wqe = qp->resp.wqe; 1048 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1049 unsigned long flags; 1050 1051 if (!wqe) 1052 goto finish; 1053 1054 memset(&cqe, 0, sizeof(cqe)); 1055 1056 if (qp->rcq->is_user) { 1057 uwc->status = qp->resp.status; 1058 uwc->qp_num = qp->ibqp.qp_num; 1059 uwc->wr_id = wqe->wr_id; 1060 } else { 1061 wc->status = qp->resp.status; 1062 wc->qp = &qp->ibqp; 1063 wc->wr_id = wqe->wr_id; 1064 } 1065 1066 if (wc->status == IB_WC_SUCCESS) { 1067 rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); 1068 wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 1069 pkt->mask & RXE_WRITE_MASK) ? 1070 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 1071 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && 1072 pkt->mask & RXE_WRITE_MASK) ? 1073 qp->resp.length : wqe->dma.length - wqe->dma.resid; 1074 1075 /* fields after byte_len are different between kernel and user 1076 * space 1077 */ 1078 if (qp->rcq->is_user) { 1079 uwc->wc_flags = IB_WC_GRH; 1080 1081 if (pkt->mask & RXE_IMMDT_MASK) { 1082 uwc->wc_flags |= IB_WC_WITH_IMM; 1083 uwc->ex.imm_data = immdt_imm(pkt); 1084 } 1085 1086 if (pkt->mask & RXE_IETH_MASK) { 1087 uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 1088 uwc->ex.invalidate_rkey = ieth_rkey(pkt); 1089 } 1090 1091 if (pkt->mask & RXE_DETH_MASK) 1092 uwc->src_qp = deth_sqp(pkt); 1093 1094 uwc->port_num = qp->attr.port_num; 1095 } else { 1096 struct sk_buff *skb = PKT_TO_SKB(pkt); 1097 1098 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; 1099 if (skb->protocol == htons(ETH_P_IP)) 1100 wc->network_hdr_type = RDMA_NETWORK_IPV4; 1101 else 1102 wc->network_hdr_type = RDMA_NETWORK_IPV6; 1103 1104 if (is_vlan_dev(skb->dev)) { 1105 wc->wc_flags |= IB_WC_WITH_VLAN; 1106 wc->vlan_id = vlan_dev_vlan_id(skb->dev); 1107 } 1108 1109 if (pkt->mask & RXE_IMMDT_MASK) { 1110 wc->wc_flags |= IB_WC_WITH_IMM; 1111 wc->ex.imm_data = immdt_imm(pkt); 1112 } 1113 1114 if (pkt->mask & RXE_IETH_MASK) { 1115 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 1116 wc->ex.invalidate_rkey = ieth_rkey(pkt); 1117 } 1118 1119 if (pkt->mask & RXE_DETH_MASK) 1120 wc->src_qp = deth_sqp(pkt); 1121 1122 wc->port_num = qp->attr.port_num; 1123 } 1124 } else { 1125 if (wc->status != IB_WC_WR_FLUSH_ERR) 1126 rxe_err_qp(qp, "non-flush error status = %d\n", 1127 wc->status); 1128 } 1129 1130 /* have copy for srq and reference for !srq */ 1131 if (!qp->srq) 1132 queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); 1133 1134 qp->resp.wqe = NULL; 1135 1136 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) 1137 return RESPST_ERR_CQ_OVERFLOW; 1138 1139 finish: 1140 spin_lock_irqsave(&qp->state_lock, flags); 1141 if (unlikely(qp_state(qp) == IB_QPS_ERR)) { 1142 spin_unlock_irqrestore(&qp->state_lock, flags); 1143 return RESPST_CHK_RESOURCE; 1144 } 1145 spin_unlock_irqrestore(&qp->state_lock, flags); 1146 1147 if (unlikely(!pkt)) 1148 return RESPST_DONE; 1149 if (qp_type(qp) == IB_QPT_RC) 1150 return RESPST_ACKNOWLEDGE; 1151 else 1152 return RESPST_CLEANUP; 1153 } 1154 1155 1156 static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, 1157 int opcode, const char *msg) 1158 { 1159 int err; 1160 struct rxe_pkt_info ack_pkt; 1161 struct sk_buff *skb; 1162 1163 skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); 1164 if (!skb) 1165 return -ENOMEM; 1166 1167 err = rxe_xmit_packet(qp, &ack_pkt, skb); 1168 if (err) 1169 rxe_dbg_qp(qp, "Failed sending %s\n", msg); 1170 1171 return err; 1172 } 1173 1174 static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1175 { 1176 return send_common_ack(qp, syndrome, psn, 1177 IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); 1178 } 1179 1180 static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1181 { 1182 int ret = send_common_ack(qp, syndrome, psn, 1183 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); 1184 1185 /* have to clear this since it is used to trigger 1186 * long read replies 1187 */ 1188 qp->resp.res = NULL; 1189 return ret; 1190 } 1191 1192 static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1193 { 1194 int ret = send_common_ack(qp, syndrome, psn, 1195 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, 1196 "RDMA READ response of length zero ACK"); 1197 1198 /* have to clear this since it is used to trigger 1199 * long read replies 1200 */ 1201 qp->resp.res = NULL; 1202 return ret; 1203 } 1204 1205 static enum resp_states acknowledge(struct rxe_qp *qp, 1206 struct rxe_pkt_info *pkt) 1207 { 1208 if (qp_type(qp) != IB_QPT_RC) 1209 return RESPST_CLEANUP; 1210 1211 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) 1212 send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); 1213 else if (pkt->mask & RXE_ATOMIC_MASK) 1214 send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1215 else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) 1216 send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1217 else if (bth_ack(pkt)) 1218 send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1219 1220 return RESPST_CLEANUP; 1221 } 1222 1223 static enum resp_states cleanup(struct rxe_qp *qp, 1224 struct rxe_pkt_info *pkt) 1225 { 1226 struct sk_buff *skb; 1227 1228 if (pkt) { 1229 skb = skb_dequeue(&qp->req_pkts); 1230 rxe_put(qp); 1231 kfree_skb(skb); 1232 ib_device_put(qp->ibqp.device); 1233 } 1234 1235 if (qp->resp.mr) { 1236 rxe_put(qp->resp.mr); 1237 qp->resp.mr = NULL; 1238 } 1239 1240 return RESPST_DONE; 1241 } 1242 1243 static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) 1244 { 1245 int i; 1246 1247 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { 1248 struct resp_res *res = &qp->resp.resources[i]; 1249 1250 if (res->type == 0) 1251 continue; 1252 1253 if (psn_compare(psn, res->first_psn) >= 0 && 1254 psn_compare(psn, res->last_psn) <= 0) { 1255 return res; 1256 } 1257 } 1258 1259 return NULL; 1260 } 1261 1262 static enum resp_states duplicate_request(struct rxe_qp *qp, 1263 struct rxe_pkt_info *pkt) 1264 { 1265 enum resp_states rc; 1266 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; 1267 1268 if (pkt->mask & RXE_SEND_MASK || 1269 pkt->mask & RXE_WRITE_MASK) { 1270 /* SEND. Ack again and cleanup. C9-105. */ 1271 send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); 1272 return RESPST_CLEANUP; 1273 } else if (pkt->mask & RXE_FLUSH_MASK) { 1274 struct resp_res *res; 1275 1276 /* Find the operation in our list of responder resources. */ 1277 res = find_resource(qp, pkt->psn); 1278 if (res) { 1279 res->replay = 1; 1280 res->cur_psn = pkt->psn; 1281 qp->resp.res = res; 1282 rc = RESPST_PROCESS_FLUSH; 1283 goto out; 1284 } 1285 1286 /* Resource not found. Class D error. Drop the request. */ 1287 rc = RESPST_CLEANUP; 1288 goto out; 1289 } else if (pkt->mask & RXE_READ_MASK) { 1290 struct resp_res *res; 1291 1292 res = find_resource(qp, pkt->psn); 1293 if (!res) { 1294 /* Resource not found. Class D error. Drop the 1295 * request. 1296 */ 1297 rc = RESPST_CLEANUP; 1298 goto out; 1299 } else { 1300 /* Ensure this new request is the same as the previous 1301 * one or a subset of it. 1302 */ 1303 u64 iova = reth_va(pkt); 1304 u32 resid = reth_len(pkt); 1305 1306 if (iova < res->read.va_org || 1307 resid > res->read.length || 1308 (iova + resid) > (res->read.va_org + 1309 res->read.length)) { 1310 rc = RESPST_CLEANUP; 1311 goto out; 1312 } 1313 1314 if (reth_rkey(pkt) != res->read.rkey) { 1315 rc = RESPST_CLEANUP; 1316 goto out; 1317 } 1318 1319 res->cur_psn = pkt->psn; 1320 res->state = (pkt->psn == res->first_psn) ? 1321 rdatm_res_state_new : 1322 rdatm_res_state_replay; 1323 res->replay = 1; 1324 1325 /* Reset the resource, except length. */ 1326 res->read.va_org = iova; 1327 res->read.va = iova; 1328 res->read.resid = resid; 1329 1330 /* Replay the RDMA read reply. */ 1331 qp->resp.res = res; 1332 rc = RESPST_READ_REPLY; 1333 goto out; 1334 } 1335 } else { 1336 struct resp_res *res; 1337 1338 /* Find the operation in our list of responder resources. */ 1339 res = find_resource(qp, pkt->psn); 1340 if (res) { 1341 res->replay = 1; 1342 res->cur_psn = pkt->psn; 1343 qp->resp.res = res; 1344 rc = pkt->mask & RXE_ATOMIC_MASK ? 1345 RESPST_ATOMIC_REPLY : 1346 RESPST_ATOMIC_WRITE_REPLY; 1347 goto out; 1348 } 1349 1350 /* Resource not found. Class D error. Drop the request. */ 1351 rc = RESPST_CLEANUP; 1352 goto out; 1353 } 1354 out: 1355 return rc; 1356 } 1357 1358 /* Process a class A or C. Both are treated the same in this implementation. */ 1359 static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, 1360 enum ib_wc_status status) 1361 { 1362 qp->resp.aeth_syndrome = syndrome; 1363 qp->resp.status = status; 1364 1365 /* indicate that we should go through the ERROR state */ 1366 qp->resp.goto_error = 1; 1367 } 1368 1369 static enum resp_states do_class_d1e_error(struct rxe_qp *qp) 1370 { 1371 /* UC */ 1372 if (qp->srq) { 1373 /* Class E */ 1374 qp->resp.drop_msg = 1; 1375 if (qp->resp.wqe) { 1376 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1377 return RESPST_COMPLETE; 1378 } else { 1379 return RESPST_CLEANUP; 1380 } 1381 } else { 1382 /* Class D1. This packet may be the start of a 1383 * new message and could be valid. The previous 1384 * message is invalid and ignored. reset the 1385 * recv wr to its original state 1386 */ 1387 if (qp->resp.wqe) { 1388 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; 1389 qp->resp.wqe->dma.cur_sge = 0; 1390 qp->resp.wqe->dma.sge_offset = 0; 1391 qp->resp.opcode = -1; 1392 } 1393 1394 if (qp->resp.mr) { 1395 rxe_put(qp->resp.mr); 1396 qp->resp.mr = NULL; 1397 } 1398 1399 return RESPST_CLEANUP; 1400 } 1401 } 1402 1403 /* drain incoming request packet queue */ 1404 static void drain_req_pkts(struct rxe_qp *qp) 1405 { 1406 struct sk_buff *skb; 1407 1408 while ((skb = skb_dequeue(&qp->req_pkts))) { 1409 rxe_put(qp); 1410 kfree_skb(skb); 1411 ib_device_put(qp->ibqp.device); 1412 } 1413 } 1414 1415 /* complete receive wqe with flush error */ 1416 static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe) 1417 { 1418 struct rxe_cqe cqe = {}; 1419 struct ib_wc *wc = &cqe.ibwc; 1420 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1421 int err; 1422 1423 if (qp->rcq->is_user) { 1424 uwc->wr_id = wqe->wr_id; 1425 uwc->status = IB_WC_WR_FLUSH_ERR; 1426 uwc->qp_num = qp_num(qp); 1427 } else { 1428 wc->wr_id = wqe->wr_id; 1429 wc->status = IB_WC_WR_FLUSH_ERR; 1430 wc->qp = &qp->ibqp; 1431 } 1432 1433 err = rxe_cq_post(qp->rcq, &cqe, 0); 1434 if (err) 1435 rxe_dbg_cq(qp->rcq, "post cq failed err = %d\n", err); 1436 1437 return err; 1438 } 1439 1440 /* drain and optionally complete the recive queue 1441 * if unable to complete a wqe stop completing and 1442 * just flush the remaining wqes 1443 */ 1444 static void flush_recv_queue(struct rxe_qp *qp, bool notify) 1445 { 1446 struct rxe_queue *q = qp->rq.queue; 1447 struct rxe_recv_wqe *wqe; 1448 int err; 1449 1450 if (qp->srq) { 1451 if (notify && qp->ibqp.event_handler) { 1452 struct ib_event ev; 1453 1454 ev.device = qp->ibqp.device; 1455 ev.element.qp = &qp->ibqp; 1456 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 1457 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 1458 } 1459 return; 1460 } 1461 1462 /* recv queue not created. nothing to do. */ 1463 if (!qp->rq.queue) 1464 return; 1465 1466 while ((wqe = queue_head(q, q->type))) { 1467 if (notify) { 1468 err = flush_recv_wqe(qp, wqe); 1469 if (err) 1470 notify = 0; 1471 } 1472 queue_advance_consumer(q, q->type); 1473 } 1474 1475 qp->resp.wqe = NULL; 1476 } 1477 1478 int rxe_receiver(struct rxe_qp *qp) 1479 { 1480 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1481 enum resp_states state; 1482 struct rxe_pkt_info *pkt = NULL; 1483 int ret; 1484 unsigned long flags; 1485 1486 spin_lock_irqsave(&qp->state_lock, flags); 1487 if (!qp->valid || qp_state(qp) == IB_QPS_ERR || 1488 qp_state(qp) == IB_QPS_RESET) { 1489 bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); 1490 1491 drain_req_pkts(qp); 1492 flush_recv_queue(qp, notify); 1493 spin_unlock_irqrestore(&qp->state_lock, flags); 1494 goto exit; 1495 } 1496 spin_unlock_irqrestore(&qp->state_lock, flags); 1497 1498 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1499 1500 state = RESPST_GET_REQ; 1501 1502 while (1) { 1503 rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); 1504 switch (state) { 1505 case RESPST_GET_REQ: 1506 state = get_req(qp, &pkt); 1507 break; 1508 case RESPST_CHK_PSN: 1509 state = check_psn(qp, pkt); 1510 break; 1511 case RESPST_CHK_OP_SEQ: 1512 state = check_op_seq(qp, pkt); 1513 break; 1514 case RESPST_CHK_OP_VALID: 1515 state = check_op_valid(qp, pkt); 1516 break; 1517 case RESPST_CHK_RESOURCE: 1518 state = check_resource(qp, pkt); 1519 break; 1520 case RESPST_CHK_LENGTH: 1521 state = rxe_resp_check_length(qp, pkt); 1522 break; 1523 case RESPST_CHK_RKEY: 1524 state = check_rkey(qp, pkt); 1525 break; 1526 case RESPST_EXECUTE: 1527 state = execute(qp, pkt); 1528 break; 1529 case RESPST_COMPLETE: 1530 state = do_complete(qp, pkt); 1531 break; 1532 case RESPST_READ_REPLY: 1533 state = read_reply(qp, pkt); 1534 break; 1535 case RESPST_ATOMIC_REPLY: 1536 state = atomic_reply(qp, pkt); 1537 break; 1538 case RESPST_ATOMIC_WRITE_REPLY: 1539 state = atomic_write_reply(qp, pkt); 1540 break; 1541 case RESPST_PROCESS_FLUSH: 1542 state = process_flush(qp, pkt); 1543 break; 1544 case RESPST_ACKNOWLEDGE: 1545 state = acknowledge(qp, pkt); 1546 break; 1547 case RESPST_CLEANUP: 1548 state = cleanup(qp, pkt); 1549 break; 1550 case RESPST_DUPLICATE_REQUEST: 1551 state = duplicate_request(qp, pkt); 1552 break; 1553 case RESPST_ERR_PSN_OUT_OF_SEQ: 1554 /* RC only - Class B. Drop packet. */ 1555 send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); 1556 state = RESPST_CLEANUP; 1557 break; 1558 1559 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: 1560 case RESPST_ERR_MISSING_OPCODE_FIRST: 1561 case RESPST_ERR_MISSING_OPCODE_LAST_C: 1562 case RESPST_ERR_UNSUPPORTED_OPCODE: 1563 case RESPST_ERR_MISALIGNED_ATOMIC: 1564 /* RC Only - Class C. */ 1565 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1566 IB_WC_REM_INV_REQ_ERR); 1567 state = RESPST_COMPLETE; 1568 break; 1569 1570 case RESPST_ERR_MISSING_OPCODE_LAST_D1E: 1571 state = do_class_d1e_error(qp); 1572 break; 1573 case RESPST_ERR_RNR: 1574 if (qp_type(qp) == IB_QPT_RC) { 1575 rxe_counter_inc(rxe, RXE_CNT_SND_RNR); 1576 /* RC - class B */ 1577 send_ack(qp, AETH_RNR_NAK | 1578 (~AETH_TYPE_MASK & 1579 qp->attr.min_rnr_timer), 1580 pkt->psn); 1581 } else { 1582 /* UD/UC - class D */ 1583 qp->resp.drop_msg = 1; 1584 } 1585 state = RESPST_CLEANUP; 1586 break; 1587 1588 case RESPST_ERR_RKEY_VIOLATION: 1589 if (qp_type(qp) == IB_QPT_RC) { 1590 /* Class C */ 1591 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, 1592 IB_WC_REM_ACCESS_ERR); 1593 state = RESPST_COMPLETE; 1594 } else { 1595 qp->resp.drop_msg = 1; 1596 if (qp->srq) { 1597 /* UC/SRQ Class D */ 1598 qp->resp.status = IB_WC_REM_ACCESS_ERR; 1599 state = RESPST_COMPLETE; 1600 } else { 1601 /* UC/non-SRQ Class E. */ 1602 state = RESPST_CLEANUP; 1603 } 1604 } 1605 break; 1606 1607 case RESPST_ERR_INVALIDATE_RKEY: 1608 /* RC - Class J. */ 1609 qp->resp.goto_error = 1; 1610 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1611 state = RESPST_COMPLETE; 1612 break; 1613 1614 case RESPST_ERR_LENGTH: 1615 if (qp_type(qp) == IB_QPT_RC) { 1616 /* Class C */ 1617 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1618 IB_WC_REM_INV_REQ_ERR); 1619 state = RESPST_COMPLETE; 1620 } else if (qp->srq) { 1621 /* UC/UD - class E */ 1622 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1623 state = RESPST_COMPLETE; 1624 } else { 1625 /* UC/UD - class D */ 1626 qp->resp.drop_msg = 1; 1627 state = RESPST_CLEANUP; 1628 } 1629 break; 1630 1631 case RESPST_ERR_MALFORMED_WQE: 1632 /* All, Class A. */ 1633 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, 1634 IB_WC_LOC_QP_OP_ERR); 1635 state = RESPST_COMPLETE; 1636 break; 1637 1638 case RESPST_ERR_CQ_OVERFLOW: 1639 /* All - Class G */ 1640 state = RESPST_ERROR; 1641 break; 1642 1643 case RESPST_DONE: 1644 if (qp->resp.goto_error) { 1645 state = RESPST_ERROR; 1646 break; 1647 } 1648 1649 goto done; 1650 1651 case RESPST_EXIT: 1652 if (qp->resp.goto_error) { 1653 state = RESPST_ERROR; 1654 break; 1655 } 1656 1657 goto exit; 1658 1659 case RESPST_ERROR: 1660 qp->resp.goto_error = 0; 1661 rxe_dbg_qp(qp, "moved to error state\n"); 1662 rxe_qp_error(qp); 1663 goto exit; 1664 1665 default: 1666 WARN_ON_ONCE(1); 1667 } 1668 } 1669 1670 /* A non-zero return value will cause rxe_do_task to 1671 * exit its loop and end the work item. A zero return 1672 * will continue looping and return to rxe_responder 1673 */ 1674 done: 1675 ret = 0; 1676 goto out; 1677 exit: 1678 ret = -EAGAIN; 1679 out: 1680 return ret; 1681 } 1682