1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include <linux/skbuff.h> 8 9 #include "rxe.h" 10 #include "rxe_loc.h" 11 #include "rxe_queue.h" 12 13 static char *resp_state_name[] = { 14 [RESPST_NONE] = "NONE", 15 [RESPST_GET_REQ] = "GET_REQ", 16 [RESPST_CHK_PSN] = "CHK_PSN", 17 [RESPST_CHK_OP_SEQ] = "CHK_OP_SEQ", 18 [RESPST_CHK_OP_VALID] = "CHK_OP_VALID", 19 [RESPST_CHK_RESOURCE] = "CHK_RESOURCE", 20 [RESPST_CHK_LENGTH] = "CHK_LENGTH", 21 [RESPST_CHK_RKEY] = "CHK_RKEY", 22 [RESPST_EXECUTE] = "EXECUTE", 23 [RESPST_READ_REPLY] = "READ_REPLY", 24 [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", 25 [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", 26 [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", 27 [RESPST_COMPLETE] = "COMPLETE", 28 [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", 29 [RESPST_CLEANUP] = "CLEANUP", 30 [RESPST_DUPLICATE_REQUEST] = "DUPLICATE_REQUEST", 31 [RESPST_ERR_MALFORMED_WQE] = "ERR_MALFORMED_WQE", 32 [RESPST_ERR_UNSUPPORTED_OPCODE] = "ERR_UNSUPPORTED_OPCODE", 33 [RESPST_ERR_MISALIGNED_ATOMIC] = "ERR_MISALIGNED_ATOMIC", 34 [RESPST_ERR_PSN_OUT_OF_SEQ] = "ERR_PSN_OUT_OF_SEQ", 35 [RESPST_ERR_MISSING_OPCODE_FIRST] = "ERR_MISSING_OPCODE_FIRST", 36 [RESPST_ERR_MISSING_OPCODE_LAST_C] = "ERR_MISSING_OPCODE_LAST_C", 37 [RESPST_ERR_MISSING_OPCODE_LAST_D1E] = "ERR_MISSING_OPCODE_LAST_D1E", 38 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ] = "ERR_TOO_MANY_RDMA_ATM_REQ", 39 [RESPST_ERR_RNR] = "ERR_RNR", 40 [RESPST_ERR_RKEY_VIOLATION] = "ERR_RKEY_VIOLATION", 41 [RESPST_ERR_INVALIDATE_RKEY] = "ERR_INVALIDATE_RKEY_VIOLATION", 42 [RESPST_ERR_LENGTH] = "ERR_LENGTH", 43 [RESPST_ERR_CQ_OVERFLOW] = "ERR_CQ_OVERFLOW", 44 [RESPST_ERROR] = "ERROR", 45 [RESPST_DONE] = "DONE", 46 [RESPST_EXIT] = "EXIT", 47 }; 48 49 /* rxe_recv calls here to add a request packet to the input queue */ 50 void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) 51 { 52 int must_sched; 53 struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); 54 55 skb_queue_tail(&qp->req_pkts, skb); 56 57 must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || 58 (skb_queue_len(&qp->req_pkts) > 1); 59 60 if (must_sched) 61 rxe_sched_task(&qp->resp.task); 62 else 63 rxe_run_task(&qp->resp.task); 64 } 65 66 static inline enum resp_states get_req(struct rxe_qp *qp, 67 struct rxe_pkt_info **pkt_p) 68 { 69 struct sk_buff *skb; 70 71 skb = skb_peek(&qp->req_pkts); 72 if (!skb) 73 return RESPST_EXIT; 74 75 *pkt_p = SKB_TO_PKT(skb); 76 77 return (qp->resp.res) ? RESPST_READ_REPLY : RESPST_CHK_PSN; 78 } 79 80 static enum resp_states check_psn(struct rxe_qp *qp, 81 struct rxe_pkt_info *pkt) 82 { 83 int diff = psn_compare(pkt->psn, qp->resp.psn); 84 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 85 86 switch (qp_type(qp)) { 87 case IB_QPT_RC: 88 if (diff > 0) { 89 if (qp->resp.sent_psn_nak) 90 return RESPST_CLEANUP; 91 92 qp->resp.sent_psn_nak = 1; 93 rxe_counter_inc(rxe, RXE_CNT_OUT_OF_SEQ_REQ); 94 return RESPST_ERR_PSN_OUT_OF_SEQ; 95 96 } else if (diff < 0) { 97 rxe_counter_inc(rxe, RXE_CNT_DUP_REQ); 98 return RESPST_DUPLICATE_REQUEST; 99 } 100 101 if (qp->resp.sent_psn_nak) 102 qp->resp.sent_psn_nak = 0; 103 104 break; 105 106 case IB_QPT_UC: 107 if (qp->resp.drop_msg || diff != 0) { 108 if (pkt->mask & RXE_START_MASK) { 109 qp->resp.drop_msg = 0; 110 return RESPST_CHK_OP_SEQ; 111 } 112 113 qp->resp.drop_msg = 1; 114 return RESPST_CLEANUP; 115 } 116 break; 117 default: 118 break; 119 } 120 121 return RESPST_CHK_OP_SEQ; 122 } 123 124 static enum resp_states check_op_seq(struct rxe_qp *qp, 125 struct rxe_pkt_info *pkt) 126 { 127 switch (qp_type(qp)) { 128 case IB_QPT_RC: 129 switch (qp->resp.opcode) { 130 case IB_OPCODE_RC_SEND_FIRST: 131 case IB_OPCODE_RC_SEND_MIDDLE: 132 switch (pkt->opcode) { 133 case IB_OPCODE_RC_SEND_MIDDLE: 134 case IB_OPCODE_RC_SEND_LAST: 135 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 136 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 137 return RESPST_CHK_OP_VALID; 138 default: 139 return RESPST_ERR_MISSING_OPCODE_LAST_C; 140 } 141 142 case IB_OPCODE_RC_RDMA_WRITE_FIRST: 143 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 144 switch (pkt->opcode) { 145 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 146 case IB_OPCODE_RC_RDMA_WRITE_LAST: 147 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 148 return RESPST_CHK_OP_VALID; 149 default: 150 return RESPST_ERR_MISSING_OPCODE_LAST_C; 151 } 152 153 default: 154 switch (pkt->opcode) { 155 case IB_OPCODE_RC_SEND_MIDDLE: 156 case IB_OPCODE_RC_SEND_LAST: 157 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: 158 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: 159 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: 160 case IB_OPCODE_RC_RDMA_WRITE_LAST: 161 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 162 return RESPST_ERR_MISSING_OPCODE_FIRST; 163 default: 164 return RESPST_CHK_OP_VALID; 165 } 166 } 167 break; 168 169 case IB_QPT_UC: 170 switch (qp->resp.opcode) { 171 case IB_OPCODE_UC_SEND_FIRST: 172 case IB_OPCODE_UC_SEND_MIDDLE: 173 switch (pkt->opcode) { 174 case IB_OPCODE_UC_SEND_MIDDLE: 175 case IB_OPCODE_UC_SEND_LAST: 176 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 177 return RESPST_CHK_OP_VALID; 178 default: 179 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 180 } 181 182 case IB_OPCODE_UC_RDMA_WRITE_FIRST: 183 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 184 switch (pkt->opcode) { 185 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 186 case IB_OPCODE_UC_RDMA_WRITE_LAST: 187 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 188 return RESPST_CHK_OP_VALID; 189 default: 190 return RESPST_ERR_MISSING_OPCODE_LAST_D1E; 191 } 192 193 default: 194 switch (pkt->opcode) { 195 case IB_OPCODE_UC_SEND_MIDDLE: 196 case IB_OPCODE_UC_SEND_LAST: 197 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: 198 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: 199 case IB_OPCODE_UC_RDMA_WRITE_LAST: 200 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: 201 qp->resp.drop_msg = 1; 202 return RESPST_CLEANUP; 203 default: 204 return RESPST_CHK_OP_VALID; 205 } 206 } 207 break; 208 209 default: 210 return RESPST_CHK_OP_VALID; 211 } 212 } 213 214 static bool check_qp_attr_access(struct rxe_qp *qp, 215 struct rxe_pkt_info *pkt) 216 { 217 if (((pkt->mask & RXE_READ_MASK) && 218 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || 219 ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && 220 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || 221 ((pkt->mask & RXE_ATOMIC_MASK) && 222 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 223 return false; 224 225 if (pkt->mask & RXE_FLUSH_MASK) { 226 u32 flush_type = feth_plt(pkt); 227 228 if ((flush_type & IB_FLUSH_GLOBAL && 229 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || 230 (flush_type & IB_FLUSH_PERSISTENT && 231 !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) 232 return false; 233 } 234 235 return true; 236 } 237 238 static enum resp_states check_op_valid(struct rxe_qp *qp, 239 struct rxe_pkt_info *pkt) 240 { 241 switch (qp_type(qp)) { 242 case IB_QPT_RC: 243 if (!check_qp_attr_access(qp, pkt)) 244 return RESPST_ERR_UNSUPPORTED_OPCODE; 245 246 break; 247 248 case IB_QPT_UC: 249 if ((pkt->mask & RXE_WRITE_MASK) && 250 !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) { 251 qp->resp.drop_msg = 1; 252 return RESPST_CLEANUP; 253 } 254 255 break; 256 257 case IB_QPT_UD: 258 case IB_QPT_GSI: 259 break; 260 261 default: 262 WARN_ON_ONCE(1); 263 break; 264 } 265 266 return RESPST_CHK_RESOURCE; 267 } 268 269 static enum resp_states get_srq_wqe(struct rxe_qp *qp) 270 { 271 struct rxe_srq *srq = qp->srq; 272 struct rxe_queue *q = srq->rq.queue; 273 struct rxe_recv_wqe *wqe; 274 struct ib_event ev; 275 unsigned int count; 276 size_t size; 277 unsigned long flags; 278 279 if (srq->error) 280 return RESPST_ERR_RNR; 281 282 spin_lock_irqsave(&srq->rq.consumer_lock, flags); 283 284 wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); 285 if (!wqe) { 286 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 287 return RESPST_ERR_RNR; 288 } 289 290 /* don't trust user space data */ 291 if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { 292 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 293 rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); 294 return RESPST_ERR_MALFORMED_WQE; 295 } 296 size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); 297 memcpy(&qp->resp.srq_wqe, wqe, size); 298 299 qp->resp.wqe = &qp->resp.srq_wqe.wqe; 300 queue_advance_consumer(q, QUEUE_TYPE_FROM_CLIENT); 301 count = queue_count(q, QUEUE_TYPE_FROM_CLIENT); 302 303 if (srq->limit && srq->ibsrq.event_handler && (count < srq->limit)) { 304 srq->limit = 0; 305 goto event; 306 } 307 308 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 309 return RESPST_CHK_LENGTH; 310 311 event: 312 spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); 313 ev.device = qp->ibqp.device; 314 ev.element.srq = qp->ibqp.srq; 315 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 316 srq->ibsrq.event_handler(&ev, srq->ibsrq.srq_context); 317 return RESPST_CHK_LENGTH; 318 } 319 320 static enum resp_states check_resource(struct rxe_qp *qp, 321 struct rxe_pkt_info *pkt) 322 { 323 struct rxe_srq *srq = qp->srq; 324 325 if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { 326 /* it is the requesters job to not send 327 * too many read/atomic ops, we just 328 * recycle the responder resource queue 329 */ 330 if (likely(qp->attr.max_dest_rd_atomic > 0)) 331 return RESPST_CHK_LENGTH; 332 else 333 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; 334 } 335 336 if (pkt->mask & RXE_RWR_MASK) { 337 if (srq) 338 return get_srq_wqe(qp); 339 340 qp->resp.wqe = queue_head(qp->rq.queue, 341 QUEUE_TYPE_FROM_CLIENT); 342 return (qp->resp.wqe) ? RESPST_CHK_LENGTH : RESPST_ERR_RNR; 343 } 344 345 return RESPST_CHK_LENGTH; 346 } 347 348 static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, 349 struct rxe_pkt_info *pkt) 350 { 351 /* 352 * See IBA C9-92 353 * For UD QPs we only check if the packet will fit in the 354 * receive buffer later. For rmda operations additional 355 * length checks are performed in check_rkey. 356 */ 357 if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || 358 (qp_type(qp) == IB_QPT_UC))) { 359 unsigned int mtu = qp->mtu; 360 unsigned int payload = payload_size(pkt); 361 362 if ((pkt->mask & RXE_START_MASK) && 363 (pkt->mask & RXE_END_MASK)) { 364 if (unlikely(payload > mtu)) { 365 rxe_dbg_qp(qp, "only packet too long\n"); 366 return RESPST_ERR_LENGTH; 367 } 368 } else if ((pkt->mask & RXE_START_MASK) || 369 (pkt->mask & RXE_MIDDLE_MASK)) { 370 if (unlikely(payload != mtu)) { 371 rxe_dbg_qp(qp, "first or middle packet not mtu\n"); 372 return RESPST_ERR_LENGTH; 373 } 374 } else if (pkt->mask & RXE_END_MASK) { 375 if (unlikely((payload == 0) || (payload > mtu))) { 376 rxe_dbg_qp(qp, "last packet zero or too long\n"); 377 return RESPST_ERR_LENGTH; 378 } 379 } 380 } 381 382 /* See IBA C9-94 */ 383 if (pkt->mask & RXE_RETH_MASK) { 384 if (reth_len(pkt) > (1U << 31)) { 385 rxe_dbg_qp(qp, "dma length too long\n"); 386 return RESPST_ERR_LENGTH; 387 } 388 } 389 390 if (pkt->mask & RXE_RDMA_OP_MASK) 391 return RESPST_CHK_RKEY; 392 else 393 return RESPST_EXECUTE; 394 } 395 396 /* if the reth length field is zero we can assume nothing 397 * about the rkey value and should not validate or use it. 398 * Instead set qp->resp.rkey to 0 which is an invalid rkey 399 * value since the minimum index part is 1. 400 */ 401 static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 402 { 403 unsigned int length = reth_len(pkt); 404 405 qp->resp.va = reth_va(pkt); 406 qp->resp.offset = 0; 407 qp->resp.resid = length; 408 qp->resp.length = length; 409 if (pkt->mask & RXE_READ_OR_WRITE_MASK && length == 0) 410 qp->resp.rkey = 0; 411 else 412 qp->resp.rkey = reth_rkey(pkt); 413 } 414 415 static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 416 { 417 qp->resp.va = atmeth_va(pkt); 418 qp->resp.offset = 0; 419 qp->resp.rkey = atmeth_rkey(pkt); 420 qp->resp.resid = sizeof(u64); 421 } 422 423 /* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL 424 * if an invalid rkey is received or the rdma length is zero. For middle 425 * or last packets use the stored value of mr. 426 */ 427 static enum resp_states check_rkey(struct rxe_qp *qp, 428 struct rxe_pkt_info *pkt) 429 { 430 struct rxe_mr *mr = NULL; 431 struct rxe_mw *mw = NULL; 432 u64 va; 433 u32 rkey; 434 u32 resid; 435 u32 pktlen; 436 int mtu = qp->mtu; 437 enum resp_states state; 438 int access = 0; 439 440 /* parse RETH or ATMETH header for first/only packets 441 * for va, length, rkey, etc. or use current value for 442 * middle/last packets. 443 */ 444 if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 445 if (pkt->mask & RXE_RETH_MASK) 446 qp_resp_from_reth(qp, pkt); 447 448 access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ 449 : IB_ACCESS_REMOTE_WRITE; 450 } else if (pkt->mask & RXE_FLUSH_MASK) { 451 u32 flush_type = feth_plt(pkt); 452 453 if (pkt->mask & RXE_RETH_MASK) 454 qp_resp_from_reth(qp, pkt); 455 456 if (flush_type & IB_FLUSH_GLOBAL) 457 access |= IB_ACCESS_FLUSH_GLOBAL; 458 if (flush_type & IB_FLUSH_PERSISTENT) 459 access |= IB_ACCESS_FLUSH_PERSISTENT; 460 } else if (pkt->mask & RXE_ATOMIC_MASK) { 461 qp_resp_from_atmeth(qp, pkt); 462 access = IB_ACCESS_REMOTE_ATOMIC; 463 } else { 464 /* shouldn't happen */ 465 WARN_ON(1); 466 } 467 468 /* A zero-byte read or write op is not required to 469 * set an addr or rkey. See C9-88 470 */ 471 if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && 472 (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { 473 qp->resp.mr = NULL; 474 return RESPST_EXECUTE; 475 } 476 477 va = qp->resp.va; 478 rkey = qp->resp.rkey; 479 resid = qp->resp.resid; 480 pktlen = payload_size(pkt); 481 482 if (rkey_is_mw(rkey)) { 483 mw = rxe_lookup_mw(qp, access, rkey); 484 if (!mw) { 485 rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); 486 state = RESPST_ERR_RKEY_VIOLATION; 487 goto err; 488 } 489 490 mr = mw->mr; 491 if (!mr) { 492 rxe_dbg_qp(qp, "MW doesn't have an MR\n"); 493 state = RESPST_ERR_RKEY_VIOLATION; 494 goto err; 495 } 496 497 if (mw->access & IB_ZERO_BASED) 498 qp->resp.offset = mw->addr; 499 500 rxe_get(mr); 501 rxe_put(mw); 502 mw = NULL; 503 } else { 504 mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); 505 if (!mr) { 506 rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); 507 state = RESPST_ERR_RKEY_VIOLATION; 508 goto err; 509 } 510 } 511 512 if (pkt->mask & RXE_FLUSH_MASK) { 513 /* FLUSH MR may not set va or resid 514 * no need to check range since we will flush whole mr 515 */ 516 if (feth_sel(pkt) == IB_FLUSH_MR) 517 goto skip_check_range; 518 } 519 520 if (mr_check_range(mr, va + qp->resp.offset, resid)) { 521 state = RESPST_ERR_RKEY_VIOLATION; 522 goto err; 523 } 524 525 skip_check_range: 526 if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { 527 if (resid > mtu) { 528 if (pktlen != mtu || bth_pad(pkt)) { 529 state = RESPST_ERR_LENGTH; 530 goto err; 531 } 532 } else { 533 if (pktlen != resid) { 534 state = RESPST_ERR_LENGTH; 535 goto err; 536 } 537 if ((bth_pad(pkt) != (0x3 & (-resid)))) { 538 /* This case may not be exactly that 539 * but nothing else fits. 540 */ 541 state = RESPST_ERR_LENGTH; 542 goto err; 543 } 544 } 545 } 546 547 WARN_ON_ONCE(qp->resp.mr); 548 549 qp->resp.mr = mr; 550 return RESPST_EXECUTE; 551 552 err: 553 qp->resp.mr = NULL; 554 if (mr) 555 rxe_put(mr); 556 if (mw) 557 rxe_put(mw); 558 559 return state; 560 } 561 562 static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, 563 int data_len) 564 { 565 int err; 566 567 err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma, 568 data_addr, data_len, RXE_TO_MR_OBJ); 569 if (unlikely(err)) 570 return (err == -ENOSPC) ? RESPST_ERR_LENGTH 571 : RESPST_ERR_MALFORMED_WQE; 572 573 return RESPST_NONE; 574 } 575 576 static enum resp_states write_data_in(struct rxe_qp *qp, 577 struct rxe_pkt_info *pkt) 578 { 579 enum resp_states rc = RESPST_NONE; 580 int err; 581 int data_len = payload_size(pkt); 582 583 err = rxe_mr_copy(qp->resp.mr, qp->resp.va + qp->resp.offset, 584 payload_addr(pkt), data_len, RXE_TO_MR_OBJ); 585 if (err) { 586 rc = RESPST_ERR_RKEY_VIOLATION; 587 goto out; 588 } 589 590 qp->resp.va += data_len; 591 qp->resp.resid -= data_len; 592 593 out: 594 return rc; 595 } 596 597 static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, 598 struct rxe_pkt_info *pkt, 599 int type) 600 { 601 struct resp_res *res; 602 u32 pkts; 603 604 res = &qp->resp.resources[qp->resp.res_head]; 605 rxe_advance_resp_resource(qp); 606 free_rd_atomic_resource(res); 607 608 res->type = type; 609 res->replay = 0; 610 611 switch (type) { 612 case RXE_READ_MASK: 613 res->read.va = qp->resp.va + qp->resp.offset; 614 res->read.va_org = qp->resp.va + qp->resp.offset; 615 res->read.resid = qp->resp.resid; 616 res->read.length = qp->resp.resid; 617 res->read.rkey = qp->resp.rkey; 618 619 pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); 620 res->first_psn = pkt->psn; 621 res->cur_psn = pkt->psn; 622 res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; 623 624 res->state = rdatm_res_state_new; 625 break; 626 case RXE_ATOMIC_MASK: 627 case RXE_ATOMIC_WRITE_MASK: 628 res->first_psn = pkt->psn; 629 res->last_psn = pkt->psn; 630 res->cur_psn = pkt->psn; 631 break; 632 case RXE_FLUSH_MASK: 633 res->flush.va = qp->resp.va + qp->resp.offset; 634 res->flush.length = qp->resp.length; 635 res->flush.type = feth_plt(pkt); 636 res->flush.level = feth_sel(pkt); 637 } 638 639 return res; 640 } 641 642 static enum resp_states process_flush(struct rxe_qp *qp, 643 struct rxe_pkt_info *pkt) 644 { 645 u64 length, start; 646 struct rxe_mr *mr = qp->resp.mr; 647 struct resp_res *res = qp->resp.res; 648 649 /* oA19-14, oA19-15 */ 650 if (res && res->replay) 651 return RESPST_ACKNOWLEDGE; 652 else if (!res) { 653 res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); 654 qp->resp.res = res; 655 } 656 657 if (res->flush.level == IB_FLUSH_RANGE) { 658 start = res->flush.va; 659 length = res->flush.length; 660 } else { /* level == IB_FLUSH_MR */ 661 start = mr->ibmr.iova; 662 length = mr->ibmr.length; 663 } 664 665 if (res->flush.type & IB_FLUSH_PERSISTENT) { 666 if (rxe_flush_pmem_iova(mr, start, length)) 667 return RESPST_ERR_RKEY_VIOLATION; 668 /* Make data persistent. */ 669 wmb(); 670 } else if (res->flush.type & IB_FLUSH_GLOBAL) { 671 /* Make data global visibility. */ 672 wmb(); 673 } 674 675 qp->resp.msn++; 676 677 /* next expected psn, read handles this separately */ 678 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 679 qp->resp.ack_psn = qp->resp.psn; 680 681 qp->resp.opcode = pkt->opcode; 682 qp->resp.status = IB_WC_SUCCESS; 683 684 return RESPST_ACKNOWLEDGE; 685 } 686 687 static enum resp_states atomic_reply(struct rxe_qp *qp, 688 struct rxe_pkt_info *pkt) 689 { 690 struct rxe_mr *mr = qp->resp.mr; 691 struct resp_res *res = qp->resp.res; 692 int err; 693 694 if (!res) { 695 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_MASK); 696 qp->resp.res = res; 697 } 698 699 if (!res->replay) { 700 u64 iova = qp->resp.va + qp->resp.offset; 701 702 err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode, 703 atmeth_comp(pkt), 704 atmeth_swap_add(pkt), 705 &res->atomic.orig_val); 706 if (err) 707 return err; 708 709 qp->resp.msn++; 710 711 /* next expected psn, read handles this separately */ 712 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 713 qp->resp.ack_psn = qp->resp.psn; 714 715 qp->resp.opcode = pkt->opcode; 716 qp->resp.status = IB_WC_SUCCESS; 717 } 718 719 return RESPST_ACKNOWLEDGE; 720 } 721 722 static enum resp_states atomic_write_reply(struct rxe_qp *qp, 723 struct rxe_pkt_info *pkt) 724 { 725 struct resp_res *res = qp->resp.res; 726 struct rxe_mr *mr; 727 u64 value; 728 u64 iova; 729 int err; 730 731 if (!res) { 732 res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); 733 qp->resp.res = res; 734 } 735 736 if (res->replay) 737 return RESPST_ACKNOWLEDGE; 738 739 mr = qp->resp.mr; 740 value = *(u64 *)payload_addr(pkt); 741 iova = qp->resp.va + qp->resp.offset; 742 743 err = rxe_mr_do_atomic_write(mr, iova, value); 744 if (err) 745 return err; 746 747 qp->resp.resid = 0; 748 qp->resp.msn++; 749 750 /* next expected psn, read handles this separately */ 751 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 752 qp->resp.ack_psn = qp->resp.psn; 753 754 qp->resp.opcode = pkt->opcode; 755 qp->resp.status = IB_WC_SUCCESS; 756 757 return RESPST_ACKNOWLEDGE; 758 } 759 760 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, 761 struct rxe_pkt_info *ack, 762 int opcode, 763 int payload, 764 u32 psn, 765 u8 syndrome) 766 { 767 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 768 struct sk_buff *skb; 769 int paylen; 770 int pad; 771 int err; 772 773 /* 774 * allocate packet 775 */ 776 pad = (-payload) & 0x3; 777 paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; 778 779 skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack); 780 if (!skb) 781 return NULL; 782 783 ack->qp = qp; 784 ack->opcode = opcode; 785 ack->mask = rxe_opcode[opcode].mask; 786 ack->paylen = paylen; 787 ack->psn = psn; 788 789 bth_init(ack, opcode, 0, 0, pad, IB_DEFAULT_PKEY_FULL, 790 qp->attr.dest_qp_num, 0, psn); 791 792 if (ack->mask & RXE_AETH_MASK) { 793 aeth_set_syn(ack, syndrome); 794 aeth_set_msn(ack, qp->resp.msn); 795 } 796 797 if (ack->mask & RXE_ATMACK_MASK) 798 atmack_set_orig(ack, qp->resp.res->atomic.orig_val); 799 800 err = rxe_prepare(&qp->pri_av, ack, skb); 801 if (err) { 802 kfree_skb(skb); 803 return NULL; 804 } 805 806 return skb; 807 } 808 809 /** 810 * rxe_recheck_mr - revalidate MR from rkey and get a reference 811 * @qp: the qp 812 * @rkey: the rkey 813 * 814 * This code allows the MR to be invalidated or deregistered or 815 * the MW if one was used to be invalidated or deallocated. 816 * It is assumed that the access permissions if originally good 817 * are OK and the mappings to be unchanged. 818 * 819 * TODO: If someone reregisters an MR to change its size or 820 * access permissions during the processing of an RDMA read 821 * we should kill the responder resource and complete the 822 * operation with an error. 823 * 824 * Return: mr on success else NULL 825 */ 826 static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) 827 { 828 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 829 struct rxe_mr *mr; 830 struct rxe_mw *mw; 831 832 if (rkey_is_mw(rkey)) { 833 mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); 834 if (!mw) 835 return NULL; 836 837 mr = mw->mr; 838 if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID || 839 !mr || mr->state != RXE_MR_STATE_VALID) { 840 rxe_put(mw); 841 return NULL; 842 } 843 844 rxe_get(mr); 845 rxe_put(mw); 846 847 return mr; 848 } 849 850 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 851 if (!mr) 852 return NULL; 853 854 if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) { 855 rxe_put(mr); 856 return NULL; 857 } 858 859 return mr; 860 } 861 862 /* RDMA read response. If res is not NULL, then we have a current RDMA request 863 * being processed or replayed. 864 */ 865 static enum resp_states read_reply(struct rxe_qp *qp, 866 struct rxe_pkt_info *req_pkt) 867 { 868 struct rxe_pkt_info ack_pkt; 869 struct sk_buff *skb; 870 int mtu = qp->mtu; 871 enum resp_states state; 872 int payload; 873 int opcode; 874 int err; 875 struct resp_res *res = qp->resp.res; 876 struct rxe_mr *mr; 877 878 if (!res) { 879 res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK); 880 qp->resp.res = res; 881 } 882 883 if (res->state == rdatm_res_state_new) { 884 if (!res->replay || qp->resp.length == 0) { 885 /* if length == 0 mr will be NULL (is ok) 886 * otherwise qp->resp.mr holds a ref on mr 887 * which we transfer to mr and drop below. 888 */ 889 mr = qp->resp.mr; 890 qp->resp.mr = NULL; 891 } else { 892 mr = rxe_recheck_mr(qp, res->read.rkey); 893 if (!mr) 894 return RESPST_ERR_RKEY_VIOLATION; 895 } 896 897 if (res->read.resid <= mtu) 898 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; 899 else 900 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; 901 } else { 902 /* re-lookup mr from rkey on all later packets. 903 * length will be non-zero. This can fail if someone 904 * modifies or destroys the mr since the first packet. 905 */ 906 mr = rxe_recheck_mr(qp, res->read.rkey); 907 if (!mr) 908 return RESPST_ERR_RKEY_VIOLATION; 909 910 if (res->read.resid > mtu) 911 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; 912 else 913 opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST; 914 } 915 916 res->state = rdatm_res_state_next; 917 918 payload = min_t(int, res->read.resid, mtu); 919 920 skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, 921 res->cur_psn, AETH_ACK_UNLIMITED); 922 if (!skb) { 923 state = RESPST_ERR_RNR; 924 goto err_out; 925 } 926 927 err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), 928 payload, RXE_FROM_MR_OBJ); 929 if (err) { 930 kfree_skb(skb); 931 state = RESPST_ERR_RKEY_VIOLATION; 932 goto err_out; 933 } 934 935 if (bth_pad(&ack_pkt)) { 936 u8 *pad = payload_addr(&ack_pkt) + payload; 937 938 memset(pad, 0, bth_pad(&ack_pkt)); 939 } 940 941 /* rxe_xmit_packet always consumes the skb */ 942 err = rxe_xmit_packet(qp, &ack_pkt, skb); 943 if (err) { 944 state = RESPST_ERR_RNR; 945 goto err_out; 946 } 947 948 res->read.va += payload; 949 res->read.resid -= payload; 950 res->cur_psn = (res->cur_psn + 1) & BTH_PSN_MASK; 951 952 if (res->read.resid > 0) { 953 state = RESPST_DONE; 954 } else { 955 qp->resp.res = NULL; 956 if (!res->replay) 957 qp->resp.opcode = -1; 958 if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) 959 qp->resp.psn = res->cur_psn; 960 state = RESPST_CLEANUP; 961 } 962 963 err_out: 964 if (mr) 965 rxe_put(mr); 966 return state; 967 } 968 969 static int invalidate_rkey(struct rxe_qp *qp, u32 rkey) 970 { 971 if (rkey_is_mw(rkey)) 972 return rxe_invalidate_mw(qp, rkey); 973 else 974 return rxe_invalidate_mr(qp, rkey); 975 } 976 977 /* Executes a new request. A retried request never reach that function (send 978 * and writes are discarded, and reads and atomics are retried elsewhere. 979 */ 980 static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) 981 { 982 enum resp_states err; 983 struct sk_buff *skb = PKT_TO_SKB(pkt); 984 union rdma_network_hdr hdr; 985 986 if (pkt->mask & RXE_SEND_MASK) { 987 if (qp_type(qp) == IB_QPT_UD || 988 qp_type(qp) == IB_QPT_GSI) { 989 if (skb->protocol == htons(ETH_P_IP)) { 990 memset(&hdr.reserved, 0, 991 sizeof(hdr.reserved)); 992 memcpy(&hdr.roce4grh, ip_hdr(skb), 993 sizeof(hdr.roce4grh)); 994 err = send_data_in(qp, &hdr, sizeof(hdr)); 995 } else { 996 err = send_data_in(qp, ipv6_hdr(skb), 997 sizeof(hdr)); 998 } 999 if (err) 1000 return err; 1001 } 1002 err = send_data_in(qp, payload_addr(pkt), payload_size(pkt)); 1003 if (err) 1004 return err; 1005 } else if (pkt->mask & RXE_WRITE_MASK) { 1006 err = write_data_in(qp, pkt); 1007 if (err) 1008 return err; 1009 } else if (pkt->mask & RXE_READ_MASK) { 1010 /* For RDMA Read we can increment the msn now. See C9-148. */ 1011 qp->resp.msn++; 1012 return RESPST_READ_REPLY; 1013 } else if (pkt->mask & RXE_ATOMIC_MASK) { 1014 return RESPST_ATOMIC_REPLY; 1015 } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { 1016 return RESPST_ATOMIC_WRITE_REPLY; 1017 } else if (pkt->mask & RXE_FLUSH_MASK) { 1018 return RESPST_PROCESS_FLUSH; 1019 } else { 1020 /* Unreachable */ 1021 WARN_ON_ONCE(1); 1022 } 1023 1024 if (pkt->mask & RXE_IETH_MASK) { 1025 u32 rkey = ieth_rkey(pkt); 1026 1027 err = invalidate_rkey(qp, rkey); 1028 if (err) 1029 return RESPST_ERR_INVALIDATE_RKEY; 1030 } 1031 1032 if (pkt->mask & RXE_END_MASK) 1033 /* We successfully processed this new request. */ 1034 qp->resp.msn++; 1035 1036 /* next expected psn, read handles this separately */ 1037 qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; 1038 qp->resp.ack_psn = qp->resp.psn; 1039 1040 qp->resp.opcode = pkt->opcode; 1041 qp->resp.status = IB_WC_SUCCESS; 1042 1043 if (pkt->mask & RXE_COMP_MASK) 1044 return RESPST_COMPLETE; 1045 else if (qp_type(qp) == IB_QPT_RC) 1046 return RESPST_ACKNOWLEDGE; 1047 else 1048 return RESPST_CLEANUP; 1049 } 1050 1051 static enum resp_states do_complete(struct rxe_qp *qp, 1052 struct rxe_pkt_info *pkt) 1053 { 1054 struct rxe_cqe cqe; 1055 struct ib_wc *wc = &cqe.ibwc; 1056 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1057 struct rxe_recv_wqe *wqe = qp->resp.wqe; 1058 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1059 unsigned long flags; 1060 1061 if (!wqe) 1062 goto finish; 1063 1064 memset(&cqe, 0, sizeof(cqe)); 1065 1066 if (qp->rcq->is_user) { 1067 uwc->status = qp->resp.status; 1068 uwc->qp_num = qp->ibqp.qp_num; 1069 uwc->wr_id = wqe->wr_id; 1070 } else { 1071 wc->status = qp->resp.status; 1072 wc->qp = &qp->ibqp; 1073 wc->wr_id = wqe->wr_id; 1074 } 1075 1076 if (wc->status == IB_WC_SUCCESS) { 1077 rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV); 1078 wc->opcode = (pkt->mask & RXE_IMMDT_MASK && 1079 pkt->mask & RXE_WRITE_MASK) ? 1080 IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; 1081 wc->byte_len = (pkt->mask & RXE_IMMDT_MASK && 1082 pkt->mask & RXE_WRITE_MASK) ? 1083 qp->resp.length : wqe->dma.length - wqe->dma.resid; 1084 1085 /* fields after byte_len are different between kernel and user 1086 * space 1087 */ 1088 if (qp->rcq->is_user) { 1089 uwc->wc_flags = IB_WC_GRH; 1090 1091 if (pkt->mask & RXE_IMMDT_MASK) { 1092 uwc->wc_flags |= IB_WC_WITH_IMM; 1093 uwc->ex.imm_data = immdt_imm(pkt); 1094 } 1095 1096 if (pkt->mask & RXE_IETH_MASK) { 1097 uwc->wc_flags |= IB_WC_WITH_INVALIDATE; 1098 uwc->ex.invalidate_rkey = ieth_rkey(pkt); 1099 } 1100 1101 if (pkt->mask & RXE_DETH_MASK) 1102 uwc->src_qp = deth_sqp(pkt); 1103 1104 uwc->port_num = qp->attr.port_num; 1105 } else { 1106 struct sk_buff *skb = PKT_TO_SKB(pkt); 1107 1108 wc->wc_flags = IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE; 1109 if (skb->protocol == htons(ETH_P_IP)) 1110 wc->network_hdr_type = RDMA_NETWORK_IPV4; 1111 else 1112 wc->network_hdr_type = RDMA_NETWORK_IPV6; 1113 1114 if (is_vlan_dev(skb->dev)) { 1115 wc->wc_flags |= IB_WC_WITH_VLAN; 1116 wc->vlan_id = vlan_dev_vlan_id(skb->dev); 1117 } 1118 1119 if (pkt->mask & RXE_IMMDT_MASK) { 1120 wc->wc_flags |= IB_WC_WITH_IMM; 1121 wc->ex.imm_data = immdt_imm(pkt); 1122 } 1123 1124 if (pkt->mask & RXE_IETH_MASK) { 1125 wc->wc_flags |= IB_WC_WITH_INVALIDATE; 1126 wc->ex.invalidate_rkey = ieth_rkey(pkt); 1127 } 1128 1129 if (pkt->mask & RXE_DETH_MASK) 1130 wc->src_qp = deth_sqp(pkt); 1131 1132 wc->port_num = qp->attr.port_num; 1133 } 1134 } else { 1135 if (wc->status != IB_WC_WR_FLUSH_ERR) 1136 rxe_err_qp(qp, "non-flush error status = %d\n", 1137 wc->status); 1138 } 1139 1140 /* have copy for srq and reference for !srq */ 1141 if (!qp->srq) 1142 queue_advance_consumer(qp->rq.queue, QUEUE_TYPE_FROM_CLIENT); 1143 1144 qp->resp.wqe = NULL; 1145 1146 if (rxe_cq_post(qp->rcq, &cqe, pkt ? bth_se(pkt) : 1)) 1147 return RESPST_ERR_CQ_OVERFLOW; 1148 1149 finish: 1150 spin_lock_irqsave(&qp->state_lock, flags); 1151 if (unlikely(qp_state(qp) == IB_QPS_ERR)) { 1152 spin_unlock_irqrestore(&qp->state_lock, flags); 1153 return RESPST_CHK_RESOURCE; 1154 } 1155 spin_unlock_irqrestore(&qp->state_lock, flags); 1156 1157 if (unlikely(!pkt)) 1158 return RESPST_DONE; 1159 if (qp_type(qp) == IB_QPT_RC) 1160 return RESPST_ACKNOWLEDGE; 1161 else 1162 return RESPST_CLEANUP; 1163 } 1164 1165 1166 static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, 1167 int opcode, const char *msg) 1168 { 1169 int err; 1170 struct rxe_pkt_info ack_pkt; 1171 struct sk_buff *skb; 1172 1173 skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); 1174 if (!skb) 1175 return -ENOMEM; 1176 1177 err = rxe_xmit_packet(qp, &ack_pkt, skb); 1178 if (err) 1179 rxe_dbg_qp(qp, "Failed sending %s\n", msg); 1180 1181 return err; 1182 } 1183 1184 static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1185 { 1186 return send_common_ack(qp, syndrome, psn, 1187 IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); 1188 } 1189 1190 static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1191 { 1192 int ret = send_common_ack(qp, syndrome, psn, 1193 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); 1194 1195 /* have to clear this since it is used to trigger 1196 * long read replies 1197 */ 1198 qp->resp.res = NULL; 1199 return ret; 1200 } 1201 1202 static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) 1203 { 1204 int ret = send_common_ack(qp, syndrome, psn, 1205 IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, 1206 "RDMA READ response of length zero ACK"); 1207 1208 /* have to clear this since it is used to trigger 1209 * long read replies 1210 */ 1211 qp->resp.res = NULL; 1212 return ret; 1213 } 1214 1215 static enum resp_states acknowledge(struct rxe_qp *qp, 1216 struct rxe_pkt_info *pkt) 1217 { 1218 if (qp_type(qp) != IB_QPT_RC) 1219 return RESPST_CLEANUP; 1220 1221 if (qp->resp.aeth_syndrome != AETH_ACK_UNLIMITED) 1222 send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); 1223 else if (pkt->mask & RXE_ATOMIC_MASK) 1224 send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1225 else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) 1226 send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1227 else if (bth_ack(pkt)) 1228 send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); 1229 1230 return RESPST_CLEANUP; 1231 } 1232 1233 static enum resp_states cleanup(struct rxe_qp *qp, 1234 struct rxe_pkt_info *pkt) 1235 { 1236 struct sk_buff *skb; 1237 1238 if (pkt) { 1239 skb = skb_dequeue(&qp->req_pkts); 1240 rxe_put(qp); 1241 kfree_skb(skb); 1242 ib_device_put(qp->ibqp.device); 1243 } 1244 1245 if (qp->resp.mr) { 1246 rxe_put(qp->resp.mr); 1247 qp->resp.mr = NULL; 1248 } 1249 1250 return RESPST_DONE; 1251 } 1252 1253 static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn) 1254 { 1255 int i; 1256 1257 for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { 1258 struct resp_res *res = &qp->resp.resources[i]; 1259 1260 if (res->type == 0) 1261 continue; 1262 1263 if (psn_compare(psn, res->first_psn) >= 0 && 1264 psn_compare(psn, res->last_psn) <= 0) { 1265 return res; 1266 } 1267 } 1268 1269 return NULL; 1270 } 1271 1272 static enum resp_states duplicate_request(struct rxe_qp *qp, 1273 struct rxe_pkt_info *pkt) 1274 { 1275 enum resp_states rc; 1276 u32 prev_psn = (qp->resp.ack_psn - 1) & BTH_PSN_MASK; 1277 1278 if (pkt->mask & RXE_SEND_MASK || 1279 pkt->mask & RXE_WRITE_MASK) { 1280 /* SEND. Ack again and cleanup. C9-105. */ 1281 send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); 1282 return RESPST_CLEANUP; 1283 } else if (pkt->mask & RXE_FLUSH_MASK) { 1284 struct resp_res *res; 1285 1286 /* Find the operation in our list of responder resources. */ 1287 res = find_resource(qp, pkt->psn); 1288 if (res) { 1289 res->replay = 1; 1290 res->cur_psn = pkt->psn; 1291 qp->resp.res = res; 1292 rc = RESPST_PROCESS_FLUSH; 1293 goto out; 1294 } 1295 1296 /* Resource not found. Class D error. Drop the request. */ 1297 rc = RESPST_CLEANUP; 1298 goto out; 1299 } else if (pkt->mask & RXE_READ_MASK) { 1300 struct resp_res *res; 1301 1302 res = find_resource(qp, pkt->psn); 1303 if (!res) { 1304 /* Resource not found. Class D error. Drop the 1305 * request. 1306 */ 1307 rc = RESPST_CLEANUP; 1308 goto out; 1309 } else { 1310 /* Ensure this new request is the same as the previous 1311 * one or a subset of it. 1312 */ 1313 u64 iova = reth_va(pkt); 1314 u32 resid = reth_len(pkt); 1315 1316 if (iova < res->read.va_org || 1317 resid > res->read.length || 1318 (iova + resid) > (res->read.va_org + 1319 res->read.length)) { 1320 rc = RESPST_CLEANUP; 1321 goto out; 1322 } 1323 1324 if (reth_rkey(pkt) != res->read.rkey) { 1325 rc = RESPST_CLEANUP; 1326 goto out; 1327 } 1328 1329 res->cur_psn = pkt->psn; 1330 res->state = (pkt->psn == res->first_psn) ? 1331 rdatm_res_state_new : 1332 rdatm_res_state_replay; 1333 res->replay = 1; 1334 1335 /* Reset the resource, except length. */ 1336 res->read.va_org = iova; 1337 res->read.va = iova; 1338 res->read.resid = resid; 1339 1340 /* Replay the RDMA read reply. */ 1341 qp->resp.res = res; 1342 rc = RESPST_READ_REPLY; 1343 goto out; 1344 } 1345 } else { 1346 struct resp_res *res; 1347 1348 /* Find the operation in our list of responder resources. */ 1349 res = find_resource(qp, pkt->psn); 1350 if (res) { 1351 res->replay = 1; 1352 res->cur_psn = pkt->psn; 1353 qp->resp.res = res; 1354 rc = pkt->mask & RXE_ATOMIC_MASK ? 1355 RESPST_ATOMIC_REPLY : 1356 RESPST_ATOMIC_WRITE_REPLY; 1357 goto out; 1358 } 1359 1360 /* Resource not found. Class D error. Drop the request. */ 1361 rc = RESPST_CLEANUP; 1362 goto out; 1363 } 1364 out: 1365 return rc; 1366 } 1367 1368 /* Process a class A or C. Both are treated the same in this implementation. */ 1369 static void do_class_ac_error(struct rxe_qp *qp, u8 syndrome, 1370 enum ib_wc_status status) 1371 { 1372 qp->resp.aeth_syndrome = syndrome; 1373 qp->resp.status = status; 1374 1375 /* indicate that we should go through the ERROR state */ 1376 qp->resp.goto_error = 1; 1377 } 1378 1379 static enum resp_states do_class_d1e_error(struct rxe_qp *qp) 1380 { 1381 /* UC */ 1382 if (qp->srq) { 1383 /* Class E */ 1384 qp->resp.drop_msg = 1; 1385 if (qp->resp.wqe) { 1386 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1387 return RESPST_COMPLETE; 1388 } else { 1389 return RESPST_CLEANUP; 1390 } 1391 } else { 1392 /* Class D1. This packet may be the start of a 1393 * new message and could be valid. The previous 1394 * message is invalid and ignored. reset the 1395 * recv wr to its original state 1396 */ 1397 if (qp->resp.wqe) { 1398 qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length; 1399 qp->resp.wqe->dma.cur_sge = 0; 1400 qp->resp.wqe->dma.sge_offset = 0; 1401 qp->resp.opcode = -1; 1402 } 1403 1404 if (qp->resp.mr) { 1405 rxe_put(qp->resp.mr); 1406 qp->resp.mr = NULL; 1407 } 1408 1409 return RESPST_CLEANUP; 1410 } 1411 } 1412 1413 /* drain incoming request packet queue */ 1414 static void drain_req_pkts(struct rxe_qp *qp) 1415 { 1416 struct sk_buff *skb; 1417 1418 while ((skb = skb_dequeue(&qp->req_pkts))) { 1419 rxe_put(qp); 1420 kfree_skb(skb); 1421 ib_device_put(qp->ibqp.device); 1422 } 1423 } 1424 1425 /* complete receive wqe with flush error */ 1426 static int flush_recv_wqe(struct rxe_qp *qp, struct rxe_recv_wqe *wqe) 1427 { 1428 struct rxe_cqe cqe = {}; 1429 struct ib_wc *wc = &cqe.ibwc; 1430 struct ib_uverbs_wc *uwc = &cqe.uibwc; 1431 int err; 1432 1433 if (qp->rcq->is_user) { 1434 uwc->wr_id = wqe->wr_id; 1435 uwc->status = IB_WC_WR_FLUSH_ERR; 1436 uwc->qp_num = qp_num(qp); 1437 } else { 1438 wc->wr_id = wqe->wr_id; 1439 wc->status = IB_WC_WR_FLUSH_ERR; 1440 wc->qp = &qp->ibqp; 1441 } 1442 1443 err = rxe_cq_post(qp->rcq, &cqe, 0); 1444 if (err) 1445 rxe_dbg_cq(qp->rcq, "post cq failed err = %d\n", err); 1446 1447 return err; 1448 } 1449 1450 /* drain and optionally complete the recive queue 1451 * if unable to complete a wqe stop completing and 1452 * just flush the remaining wqes 1453 */ 1454 static void flush_recv_queue(struct rxe_qp *qp, bool notify) 1455 { 1456 struct rxe_queue *q = qp->rq.queue; 1457 struct rxe_recv_wqe *wqe; 1458 int err; 1459 1460 if (qp->srq) { 1461 if (notify && qp->ibqp.event_handler) { 1462 struct ib_event ev; 1463 1464 ev.device = qp->ibqp.device; 1465 ev.element.qp = &qp->ibqp; 1466 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 1467 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 1468 } 1469 return; 1470 } 1471 1472 /* recv queue not created. nothing to do. */ 1473 if (!qp->rq.queue) 1474 return; 1475 1476 while ((wqe = queue_head(q, q->type))) { 1477 if (notify) { 1478 err = flush_recv_wqe(qp, wqe); 1479 if (err) 1480 notify = 0; 1481 } 1482 queue_advance_consumer(q, q->type); 1483 } 1484 1485 qp->resp.wqe = NULL; 1486 } 1487 1488 int rxe_responder(struct rxe_qp *qp) 1489 { 1490 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 1491 enum resp_states state; 1492 struct rxe_pkt_info *pkt = NULL; 1493 int ret; 1494 unsigned long flags; 1495 1496 spin_lock_irqsave(&qp->state_lock, flags); 1497 if (!qp->valid || qp_state(qp) == IB_QPS_ERR || 1498 qp_state(qp) == IB_QPS_RESET) { 1499 bool notify = qp->valid && (qp_state(qp) == IB_QPS_ERR); 1500 1501 drain_req_pkts(qp); 1502 flush_recv_queue(qp, notify); 1503 spin_unlock_irqrestore(&qp->state_lock, flags); 1504 goto exit; 1505 } 1506 spin_unlock_irqrestore(&qp->state_lock, flags); 1507 1508 qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; 1509 1510 state = RESPST_GET_REQ; 1511 1512 while (1) { 1513 rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); 1514 switch (state) { 1515 case RESPST_GET_REQ: 1516 state = get_req(qp, &pkt); 1517 break; 1518 case RESPST_CHK_PSN: 1519 state = check_psn(qp, pkt); 1520 break; 1521 case RESPST_CHK_OP_SEQ: 1522 state = check_op_seq(qp, pkt); 1523 break; 1524 case RESPST_CHK_OP_VALID: 1525 state = check_op_valid(qp, pkt); 1526 break; 1527 case RESPST_CHK_RESOURCE: 1528 state = check_resource(qp, pkt); 1529 break; 1530 case RESPST_CHK_LENGTH: 1531 state = rxe_resp_check_length(qp, pkt); 1532 break; 1533 case RESPST_CHK_RKEY: 1534 state = check_rkey(qp, pkt); 1535 break; 1536 case RESPST_EXECUTE: 1537 state = execute(qp, pkt); 1538 break; 1539 case RESPST_COMPLETE: 1540 state = do_complete(qp, pkt); 1541 break; 1542 case RESPST_READ_REPLY: 1543 state = read_reply(qp, pkt); 1544 break; 1545 case RESPST_ATOMIC_REPLY: 1546 state = atomic_reply(qp, pkt); 1547 break; 1548 case RESPST_ATOMIC_WRITE_REPLY: 1549 state = atomic_write_reply(qp, pkt); 1550 break; 1551 case RESPST_PROCESS_FLUSH: 1552 state = process_flush(qp, pkt); 1553 break; 1554 case RESPST_ACKNOWLEDGE: 1555 state = acknowledge(qp, pkt); 1556 break; 1557 case RESPST_CLEANUP: 1558 state = cleanup(qp, pkt); 1559 break; 1560 case RESPST_DUPLICATE_REQUEST: 1561 state = duplicate_request(qp, pkt); 1562 break; 1563 case RESPST_ERR_PSN_OUT_OF_SEQ: 1564 /* RC only - Class B. Drop packet. */ 1565 send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn); 1566 state = RESPST_CLEANUP; 1567 break; 1568 1569 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: 1570 case RESPST_ERR_MISSING_OPCODE_FIRST: 1571 case RESPST_ERR_MISSING_OPCODE_LAST_C: 1572 case RESPST_ERR_UNSUPPORTED_OPCODE: 1573 case RESPST_ERR_MISALIGNED_ATOMIC: 1574 /* RC Only - Class C. */ 1575 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1576 IB_WC_REM_INV_REQ_ERR); 1577 state = RESPST_COMPLETE; 1578 break; 1579 1580 case RESPST_ERR_MISSING_OPCODE_LAST_D1E: 1581 state = do_class_d1e_error(qp); 1582 break; 1583 case RESPST_ERR_RNR: 1584 if (qp_type(qp) == IB_QPT_RC) { 1585 rxe_counter_inc(rxe, RXE_CNT_SND_RNR); 1586 /* RC - class B */ 1587 send_ack(qp, AETH_RNR_NAK | 1588 (~AETH_TYPE_MASK & 1589 qp->attr.min_rnr_timer), 1590 pkt->psn); 1591 } else { 1592 /* UD/UC - class D */ 1593 qp->resp.drop_msg = 1; 1594 } 1595 state = RESPST_CLEANUP; 1596 break; 1597 1598 case RESPST_ERR_RKEY_VIOLATION: 1599 if (qp_type(qp) == IB_QPT_RC) { 1600 /* Class C */ 1601 do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR, 1602 IB_WC_REM_ACCESS_ERR); 1603 state = RESPST_COMPLETE; 1604 } else { 1605 qp->resp.drop_msg = 1; 1606 if (qp->srq) { 1607 /* UC/SRQ Class D */ 1608 qp->resp.status = IB_WC_REM_ACCESS_ERR; 1609 state = RESPST_COMPLETE; 1610 } else { 1611 /* UC/non-SRQ Class E. */ 1612 state = RESPST_CLEANUP; 1613 } 1614 } 1615 break; 1616 1617 case RESPST_ERR_INVALIDATE_RKEY: 1618 /* RC - Class J. */ 1619 qp->resp.goto_error = 1; 1620 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1621 state = RESPST_COMPLETE; 1622 break; 1623 1624 case RESPST_ERR_LENGTH: 1625 if (qp_type(qp) == IB_QPT_RC) { 1626 /* Class C */ 1627 do_class_ac_error(qp, AETH_NAK_INVALID_REQ, 1628 IB_WC_REM_INV_REQ_ERR); 1629 state = RESPST_COMPLETE; 1630 } else if (qp->srq) { 1631 /* UC/UD - class E */ 1632 qp->resp.status = IB_WC_REM_INV_REQ_ERR; 1633 state = RESPST_COMPLETE; 1634 } else { 1635 /* UC/UD - class D */ 1636 qp->resp.drop_msg = 1; 1637 state = RESPST_CLEANUP; 1638 } 1639 break; 1640 1641 case RESPST_ERR_MALFORMED_WQE: 1642 /* All, Class A. */ 1643 do_class_ac_error(qp, AETH_NAK_REM_OP_ERR, 1644 IB_WC_LOC_QP_OP_ERR); 1645 state = RESPST_COMPLETE; 1646 break; 1647 1648 case RESPST_ERR_CQ_OVERFLOW: 1649 /* All - Class G */ 1650 state = RESPST_ERROR; 1651 break; 1652 1653 case RESPST_DONE: 1654 if (qp->resp.goto_error) { 1655 state = RESPST_ERROR; 1656 break; 1657 } 1658 1659 goto done; 1660 1661 case RESPST_EXIT: 1662 if (qp->resp.goto_error) { 1663 state = RESPST_ERROR; 1664 break; 1665 } 1666 1667 goto exit; 1668 1669 case RESPST_ERROR: 1670 qp->resp.goto_error = 0; 1671 rxe_dbg_qp(qp, "moved to error state\n"); 1672 rxe_qp_error(qp); 1673 goto exit; 1674 1675 default: 1676 WARN_ON_ONCE(1); 1677 } 1678 } 1679 1680 /* A non-zero return value will cause rxe_do_task to 1681 * exit its loop and end the work item. A zero return 1682 * will continue looping and return to rxe_responder 1683 */ 1684 done: 1685 ret = 0; 1686 goto out; 1687 exit: 1688 ret = -EAGAIN; 1689 out: 1690 return ret; 1691 } 1692