1 /* 2 * Copyright(c) 2015, 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/spinlock.h> 49 50 #include "hfi.h" 51 #include "mad.h" 52 #include "qp.h" 53 #include "verbs_txreq.h" 54 #include "trace.h" 55 56 /* 57 * Convert the AETH RNR timeout code into the number of microseconds. 58 */ 59 const u32 ib_hfi1_rnr_table[32] = { 60 655360, /* 00: 655.36 */ 61 10, /* 01: .01 */ 62 20, /* 02 .02 */ 63 30, /* 03: .03 */ 64 40, /* 04: .04 */ 65 60, /* 05: .06 */ 66 80, /* 06: .08 */ 67 120, /* 07: .12 */ 68 160, /* 08: .16 */ 69 240, /* 09: .24 */ 70 320, /* 0A: .32 */ 71 480, /* 0B: .48 */ 72 640, /* 0C: .64 */ 73 960, /* 0D: .96 */ 74 1280, /* 0E: 1.28 */ 75 1920, /* 0F: 1.92 */ 76 2560, /* 10: 2.56 */ 77 3840, /* 11: 3.84 */ 78 5120, /* 12: 5.12 */ 79 7680, /* 13: 7.68 */ 80 10240, /* 14: 10.24 */ 81 15360, /* 15: 15.36 */ 82 20480, /* 16: 20.48 */ 83 30720, /* 17: 30.72 */ 84 40960, /* 18: 40.96 */ 85 61440, /* 19: 61.44 */ 86 81920, /* 1A: 81.92 */ 87 122880, /* 1B: 122.88 */ 88 163840, /* 1C: 163.84 */ 89 245760, /* 1D: 245.76 */ 90 327680, /* 1E: 327.68 */ 91 491520 /* 1F: 491.52 */ 92 }; 93 94 /* 95 * Validate a RWQE and fill in the SGE state. 96 * Return 1 if OK. 97 */ 98 static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) 99 { 100 int i, j, ret; 101 struct ib_wc wc; 102 struct rvt_lkey_table *rkt; 103 struct rvt_pd *pd; 104 struct rvt_sge_state *ss; 105 106 rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; 107 pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); 108 ss = &qp->r_sge; 109 ss->sg_list = qp->r_sg_list; 110 qp->r_len = 0; 111 for (i = j = 0; i < wqe->num_sge; i++) { 112 if (wqe->sg_list[i].length == 0) 113 continue; 114 /* Check LKEY */ 115 if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, 116 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE)) 117 goto bad_lkey; 118 qp->r_len += wqe->sg_list[i].length; 119 j++; 120 } 121 ss->num_sge = j; 122 ss->total_len = qp->r_len; 123 ret = 1; 124 goto bail; 125 126 bad_lkey: 127 while (j) { 128 struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; 129 130 rvt_put_mr(sge->mr); 131 } 132 ss->num_sge = 0; 133 memset(&wc, 0, sizeof(wc)); 134 wc.wr_id = wqe->wr_id; 135 wc.status = IB_WC_LOC_PROT_ERR; 136 wc.opcode = IB_WC_RECV; 137 wc.qp = &qp->ibqp; 138 /* Signal solicited completion event. */ 139 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); 140 ret = 0; 141 bail: 142 return ret; 143 } 144 145 /** 146 * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE 147 * @qp: the QP 148 * @wr_id_only: update qp->r_wr_id only, not qp->r_sge 149 * 150 * Return -1 if there is a local error, 0 if no RWQE is available, 151 * otherwise return 1. 152 * 153 * Can be called from interrupt level. 154 */ 155 int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only) 156 { 157 unsigned long flags; 158 struct rvt_rq *rq; 159 struct rvt_rwq *wq; 160 struct rvt_srq *srq; 161 struct rvt_rwqe *wqe; 162 void (*handler)(struct ib_event *, void *); 163 u32 tail; 164 int ret; 165 166 if (qp->ibqp.srq) { 167 srq = ibsrq_to_rvtsrq(qp->ibqp.srq); 168 handler = srq->ibsrq.event_handler; 169 rq = &srq->rq; 170 } else { 171 srq = NULL; 172 handler = NULL; 173 rq = &qp->r_rq; 174 } 175 176 spin_lock_irqsave(&rq->lock, flags); 177 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { 178 ret = 0; 179 goto unlock; 180 } 181 182 wq = rq->wq; 183 tail = wq->tail; 184 /* Validate tail before using it since it is user writable. */ 185 if (tail >= rq->size) 186 tail = 0; 187 if (unlikely(tail == wq->head)) { 188 ret = 0; 189 goto unlock; 190 } 191 /* Make sure entry is read after head index is read. */ 192 smp_rmb(); 193 wqe = rvt_get_rwqe_ptr(rq, tail); 194 /* 195 * Even though we update the tail index in memory, the verbs 196 * consumer is not supposed to post more entries until a 197 * completion is generated. 198 */ 199 if (++tail >= rq->size) 200 tail = 0; 201 wq->tail = tail; 202 if (!wr_id_only && !init_sge(qp, wqe)) { 203 ret = -1; 204 goto unlock; 205 } 206 qp->r_wr_id = wqe->wr_id; 207 208 ret = 1; 209 set_bit(RVT_R_WRID_VALID, &qp->r_aflags); 210 if (handler) { 211 u32 n; 212 213 /* 214 * Validate head pointer value and compute 215 * the number of remaining WQEs. 216 */ 217 n = wq->head; 218 if (n >= rq->size) 219 n = 0; 220 if (n < tail) 221 n += rq->size - tail; 222 else 223 n -= tail; 224 if (n < srq->limit) { 225 struct ib_event ev; 226 227 srq->limit = 0; 228 spin_unlock_irqrestore(&rq->lock, flags); 229 ev.device = qp->ibqp.device; 230 ev.element.srq = qp->ibqp.srq; 231 ev.event = IB_EVENT_SRQ_LIMIT_REACHED; 232 handler(&ev, srq->ibsrq.srq_context); 233 goto bail; 234 } 235 } 236 unlock: 237 spin_unlock_irqrestore(&rq->lock, flags); 238 bail: 239 return ret; 240 } 241 242 static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) 243 { 244 return (gid->global.interface_id == id && 245 (gid->global.subnet_prefix == gid_prefix || 246 gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX)); 247 } 248 249 /* 250 * 251 * This should be called with the QP r_lock held. 252 * 253 * The s_lock will be acquired around the hfi1_migrate_qp() call. 254 */ 255 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, 256 int has_grh, struct rvt_qp *qp, u32 bth0) 257 { 258 __be64 guid; 259 unsigned long flags; 260 u8 sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; 261 262 if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { 263 if (!has_grh) { 264 if (qp->alt_ah_attr.ah_flags & IB_AH_GRH) 265 goto err; 266 } else { 267 if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH)) 268 goto err; 269 guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index); 270 if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, 271 guid)) 272 goto err; 273 if (!gid_ok( 274 &hdr->u.l.grh.sgid, 275 qp->alt_ah_attr.grh.dgid.global.subnet_prefix, 276 qp->alt_ah_attr.grh.dgid.global.interface_id)) 277 goto err; 278 } 279 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, 280 sc5, be16_to_cpu(hdr->lrh[3])))) { 281 hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, 282 (u16)bth0, 283 (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, 284 0, qp->ibqp.qp_num, 285 be16_to_cpu(hdr->lrh[3]), 286 be16_to_cpu(hdr->lrh[1])); 287 goto err; 288 } 289 /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ 290 if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || 291 ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) 292 goto err; 293 spin_lock_irqsave(&qp->s_lock, flags); 294 hfi1_migrate_qp(qp); 295 spin_unlock_irqrestore(&qp->s_lock, flags); 296 } else { 297 if (!has_grh) { 298 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 299 goto err; 300 } else { 301 if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) 302 goto err; 303 guid = get_sguid(ibp, 304 qp->remote_ah_attr.grh.sgid_index); 305 if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix, 306 guid)) 307 goto err; 308 if (!gid_ok( 309 &hdr->u.l.grh.sgid, 310 qp->remote_ah_attr.grh.dgid.global.subnet_prefix, 311 qp->remote_ah_attr.grh.dgid.global.interface_id)) 312 goto err; 313 } 314 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, 315 sc5, be16_to_cpu(hdr->lrh[3])))) { 316 hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, 317 (u16)bth0, 318 (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF, 319 0, qp->ibqp.qp_num, 320 be16_to_cpu(hdr->lrh[3]), 321 be16_to_cpu(hdr->lrh[1])); 322 goto err; 323 } 324 /* Validate the SLID. See Ch. 9.6.1.5 */ 325 if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid || 326 ppd_from_ibp(ibp)->port != qp->port_num) 327 goto err; 328 if (qp->s_mig_state == IB_MIG_REARM && 329 !(bth0 & IB_BTH_MIG_REQ)) 330 qp->s_mig_state = IB_MIG_ARMED; 331 } 332 333 return 0; 334 335 err: 336 return 1; 337 } 338 339 /** 340 * ruc_loopback - handle UC and RC loopback requests 341 * @sqp: the sending QP 342 * 343 * This is called from hfi1_do_send() to 344 * forward a WQE addressed to the same HFI. 345 * Note that although we are single threaded due to the send engine, we still 346 * have to protect against post_send(). We don't have to worry about 347 * receive interrupts since this is a connected protocol and all packets 348 * will pass through here. 349 */ 350 static void ruc_loopback(struct rvt_qp *sqp) 351 { 352 struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); 353 struct rvt_qp *qp; 354 struct rvt_swqe *wqe; 355 struct rvt_sge *sge; 356 unsigned long flags; 357 struct ib_wc wc; 358 u64 sdata; 359 atomic64_t *maddr; 360 enum ib_wc_status send_status; 361 int release; 362 int ret; 363 int copy_last = 0; 364 u32 to; 365 int local_ops = 0; 366 367 rcu_read_lock(); 368 369 /* 370 * Note that we check the responder QP state after 371 * checking the requester's state. 372 */ 373 qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, 374 sqp->remote_qpn); 375 376 spin_lock_irqsave(&sqp->s_lock, flags); 377 378 /* Return if we are already busy processing a work request. */ 379 if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) || 380 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 381 goto unlock; 382 383 sqp->s_flags |= RVT_S_BUSY; 384 385 again: 386 smp_read_barrier_depends(); /* see post_one_send() */ 387 if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) 388 goto clr_busy; 389 wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); 390 391 /* Return if it is not OK to start a new work request. */ 392 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { 393 if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) 394 goto clr_busy; 395 /* We are in the error state, flush the work request. */ 396 send_status = IB_WC_WR_FLUSH_ERR; 397 goto flush_send; 398 } 399 400 /* 401 * We can rely on the entry not changing without the s_lock 402 * being held until we update s_last. 403 * We increment s_cur to indicate s_last is in progress. 404 */ 405 if (sqp->s_last == sqp->s_cur) { 406 if (++sqp->s_cur >= sqp->s_size) 407 sqp->s_cur = 0; 408 } 409 spin_unlock_irqrestore(&sqp->s_lock, flags); 410 411 if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || 412 qp->ibqp.qp_type != sqp->ibqp.qp_type) { 413 ibp->rvp.n_pkt_drops++; 414 /* 415 * For RC, the requester would timeout and retry so 416 * shortcut the timeouts and just signal too many retries. 417 */ 418 if (sqp->ibqp.qp_type == IB_QPT_RC) 419 send_status = IB_WC_RETRY_EXC_ERR; 420 else 421 send_status = IB_WC_SUCCESS; 422 goto serr; 423 } 424 425 memset(&wc, 0, sizeof(wc)); 426 send_status = IB_WC_SUCCESS; 427 428 release = 1; 429 sqp->s_sge.sge = wqe->sg_list[0]; 430 sqp->s_sge.sg_list = wqe->sg_list + 1; 431 sqp->s_sge.num_sge = wqe->wr.num_sge; 432 sqp->s_len = wqe->length; 433 switch (wqe->wr.opcode) { 434 case IB_WR_REG_MR: 435 goto send_comp; 436 437 case IB_WR_LOCAL_INV: 438 if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { 439 if (rvt_invalidate_rkey(sqp, 440 wqe->wr.ex.invalidate_rkey)) 441 send_status = IB_WC_LOC_PROT_ERR; 442 local_ops = 1; 443 } 444 goto send_comp; 445 446 case IB_WR_SEND_WITH_INV: 447 if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { 448 wc.wc_flags = IB_WC_WITH_INVALIDATE; 449 wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; 450 } 451 goto send; 452 453 case IB_WR_SEND_WITH_IMM: 454 wc.wc_flags = IB_WC_WITH_IMM; 455 wc.ex.imm_data = wqe->wr.ex.imm_data; 456 /* FALLTHROUGH */ 457 case IB_WR_SEND: 458 send: 459 ret = hfi1_rvt_get_rwqe(qp, 0); 460 if (ret < 0) 461 goto op_err; 462 if (!ret) 463 goto rnr_nak; 464 break; 465 466 case IB_WR_RDMA_WRITE_WITH_IMM: 467 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 468 goto inv_err; 469 wc.wc_flags = IB_WC_WITH_IMM; 470 wc.ex.imm_data = wqe->wr.ex.imm_data; 471 ret = hfi1_rvt_get_rwqe(qp, 1); 472 if (ret < 0) 473 goto op_err; 474 if (!ret) 475 goto rnr_nak; 476 /* skip copy_last set and qp_access_flags recheck */ 477 goto do_write; 478 case IB_WR_RDMA_WRITE: 479 copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user; 480 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 481 goto inv_err; 482 do_write: 483 if (wqe->length == 0) 484 break; 485 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, 486 wqe->rdma_wr.remote_addr, 487 wqe->rdma_wr.rkey, 488 IB_ACCESS_REMOTE_WRITE))) 489 goto acc_err; 490 qp->r_sge.sg_list = NULL; 491 qp->r_sge.num_sge = 1; 492 qp->r_sge.total_len = wqe->length; 493 break; 494 495 case IB_WR_RDMA_READ: 496 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 497 goto inv_err; 498 if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, 499 wqe->rdma_wr.remote_addr, 500 wqe->rdma_wr.rkey, 501 IB_ACCESS_REMOTE_READ))) 502 goto acc_err; 503 release = 0; 504 sqp->s_sge.sg_list = NULL; 505 sqp->s_sge.num_sge = 1; 506 qp->r_sge.sge = wqe->sg_list[0]; 507 qp->r_sge.sg_list = wqe->sg_list + 1; 508 qp->r_sge.num_sge = wqe->wr.num_sge; 509 qp->r_sge.total_len = wqe->length; 510 break; 511 512 case IB_WR_ATOMIC_CMP_AND_SWP: 513 case IB_WR_ATOMIC_FETCH_AND_ADD: 514 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 515 goto inv_err; 516 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 517 wqe->atomic_wr.remote_addr, 518 wqe->atomic_wr.rkey, 519 IB_ACCESS_REMOTE_ATOMIC))) 520 goto acc_err; 521 /* Perform atomic OP and save result. */ 522 maddr = (atomic64_t *)qp->r_sge.sge.vaddr; 523 sdata = wqe->atomic_wr.compare_add; 524 *(u64 *)sqp->s_sge.sge.vaddr = 525 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 526 (u64)atomic64_add_return(sdata, maddr) - sdata : 527 (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, 528 sdata, wqe->atomic_wr.swap); 529 rvt_put_mr(qp->r_sge.sge.mr); 530 qp->r_sge.num_sge = 0; 531 goto send_comp; 532 533 default: 534 send_status = IB_WC_LOC_QP_OP_ERR; 535 goto serr; 536 } 537 538 sge = &sqp->s_sge.sge; 539 while (sqp->s_len) { 540 u32 len = sqp->s_len; 541 542 if (len > sge->length) 543 len = sge->length; 544 if (len > sge->sge_length) 545 len = sge->sge_length; 546 WARN_ON_ONCE(len == 0); 547 hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last); 548 sge->vaddr += len; 549 sge->length -= len; 550 sge->sge_length -= len; 551 if (sge->sge_length == 0) { 552 if (!release) 553 rvt_put_mr(sge->mr); 554 if (--sqp->s_sge.num_sge) 555 *sge = *sqp->s_sge.sg_list++; 556 } else if (sge->length == 0 && sge->mr->lkey) { 557 if (++sge->n >= RVT_SEGSZ) { 558 if (++sge->m >= sge->mr->mapsz) 559 break; 560 sge->n = 0; 561 } 562 sge->vaddr = 563 sge->mr->map[sge->m]->segs[sge->n].vaddr; 564 sge->length = 565 sge->mr->map[sge->m]->segs[sge->n].length; 566 } 567 sqp->s_len -= len; 568 } 569 if (release) 570 rvt_put_ss(&qp->r_sge); 571 572 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 573 goto send_comp; 574 575 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) 576 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 577 else 578 wc.opcode = IB_WC_RECV; 579 wc.wr_id = qp->r_wr_id; 580 wc.status = IB_WC_SUCCESS; 581 wc.byte_len = wqe->length; 582 wc.qp = &qp->ibqp; 583 wc.src_qp = qp->remote_qpn; 584 wc.slid = qp->remote_ah_attr.dlid; 585 wc.sl = qp->remote_ah_attr.sl; 586 wc.port_num = 1; 587 /* Signal completion event if the solicited bit is set. */ 588 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 589 wqe->wr.send_flags & IB_SEND_SOLICITED); 590 591 send_comp: 592 spin_lock_irqsave(&sqp->s_lock, flags); 593 ibp->rvp.n_loop_pkts++; 594 flush_send: 595 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 596 hfi1_send_complete(sqp, wqe, send_status); 597 if (local_ops) { 598 atomic_dec(&sqp->local_ops_pending); 599 local_ops = 0; 600 } 601 goto again; 602 603 rnr_nak: 604 /* Handle RNR NAK */ 605 if (qp->ibqp.qp_type == IB_QPT_UC) 606 goto send_comp; 607 ibp->rvp.n_rnr_naks++; 608 /* 609 * Note: we don't need the s_lock held since the BUSY flag 610 * makes this single threaded. 611 */ 612 if (sqp->s_rnr_retry == 0) { 613 send_status = IB_WC_RNR_RETRY_EXC_ERR; 614 goto serr; 615 } 616 if (sqp->s_rnr_retry_cnt < 7) 617 sqp->s_rnr_retry--; 618 spin_lock_irqsave(&sqp->s_lock, flags); 619 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) 620 goto clr_busy; 621 to = ib_hfi1_rnr_table[qp->r_min_rnr_timer]; 622 hfi1_add_rnr_timer(sqp, to); 623 goto clr_busy; 624 625 op_err: 626 send_status = IB_WC_REM_OP_ERR; 627 wc.status = IB_WC_LOC_QP_OP_ERR; 628 goto err; 629 630 inv_err: 631 send_status = IB_WC_REM_INV_REQ_ERR; 632 wc.status = IB_WC_LOC_QP_OP_ERR; 633 goto err; 634 635 acc_err: 636 send_status = IB_WC_REM_ACCESS_ERR; 637 wc.status = IB_WC_LOC_PROT_ERR; 638 err: 639 /* responder goes to error state */ 640 hfi1_rc_error(qp, wc.status); 641 642 serr: 643 spin_lock_irqsave(&sqp->s_lock, flags); 644 hfi1_send_complete(sqp, wqe, send_status); 645 if (sqp->ibqp.qp_type == IB_QPT_RC) { 646 int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); 647 648 sqp->s_flags &= ~RVT_S_BUSY; 649 spin_unlock_irqrestore(&sqp->s_lock, flags); 650 if (lastwqe) { 651 struct ib_event ev; 652 653 ev.device = sqp->ibqp.device; 654 ev.element.qp = &sqp->ibqp; 655 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 656 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); 657 } 658 goto done; 659 } 660 clr_busy: 661 sqp->s_flags &= ~RVT_S_BUSY; 662 unlock: 663 spin_unlock_irqrestore(&sqp->s_lock, flags); 664 done: 665 rcu_read_unlock(); 666 } 667 668 /** 669 * hfi1_make_grh - construct a GRH header 670 * @ibp: a pointer to the IB port 671 * @hdr: a pointer to the GRH header being constructed 672 * @grh: the global route address to send to 673 * @hwords: the number of 32 bit words of header being sent 674 * @nwords: the number of 32 bit words of data being sent 675 * 676 * Return the size of the header in 32 bit words. 677 */ 678 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, 679 struct ib_global_route *grh, u32 hwords, u32 nwords) 680 { 681 hdr->version_tclass_flow = 682 cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | 683 (grh->traffic_class << IB_GRH_TCLASS_SHIFT) | 684 (grh->flow_label << IB_GRH_FLOW_SHIFT)); 685 hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2); 686 /* next_hdr is defined by C8-7 in ch. 8.4.1 */ 687 hdr->next_hdr = IB_GRH_NEXT_HDR; 688 hdr->hop_limit = grh->hop_limit; 689 /* The SGID is 32-bit aligned. */ 690 hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; 691 hdr->sgid.global.interface_id = 692 grh->sgid_index < HFI1_GUIDS_PER_PORT ? 693 get_sguid(ibp, grh->sgid_index) : 694 get_sguid(ibp, HFI1_PORT_GUID_INDEX); 695 hdr->dgid = grh->dgid; 696 697 /* GRH header size in 32-bit words. */ 698 return sizeof(struct ib_grh) / sizeof(u32); 699 } 700 701 #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, hdr.u.oth.bth[2]) / 4) 702 703 /** 704 * build_ahg - create ahg in s_ahg 705 * @qp: a pointer to QP 706 * @npsn: the next PSN for the request/response 707 * 708 * This routine handles the AHG by allocating an ahg entry and causing the 709 * copy of the first middle. 710 * 711 * Subsequent middles use the copied entry, editing the 712 * PSN with 1 or 2 edits. 713 */ 714 static inline void build_ahg(struct rvt_qp *qp, u32 npsn) 715 { 716 struct hfi1_qp_priv *priv = qp->priv; 717 718 if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR)) 719 clear_ahg(qp); 720 if (!(qp->s_flags & RVT_S_AHG_VALID)) { 721 /* first middle that needs copy */ 722 if (qp->s_ahgidx < 0) 723 qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); 724 if (qp->s_ahgidx >= 0) { 725 qp->s_ahgpsn = npsn; 726 priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; 727 /* save to protect a change in another thread */ 728 priv->s_ahg->ahgidx = qp->s_ahgidx; 729 qp->s_flags |= RVT_S_AHG_VALID; 730 } 731 } else { 732 /* subsequent middle after valid */ 733 if (qp->s_ahgidx >= 0) { 734 priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG; 735 priv->s_ahg->ahgidx = qp->s_ahgidx; 736 priv->s_ahg->ahgcount++; 737 priv->s_ahg->ahgdesc[0] = 738 sdma_build_ahg_descriptor( 739 (__force u16)cpu_to_be16((u16)npsn), 740 BTH2_OFFSET, 741 16, 742 16); 743 if ((npsn & 0xffff0000) != 744 (qp->s_ahgpsn & 0xffff0000)) { 745 priv->s_ahg->ahgcount++; 746 priv->s_ahg->ahgdesc[1] = 747 sdma_build_ahg_descriptor( 748 (__force u16)cpu_to_be16( 749 (u16)(npsn >> 16)), 750 BTH2_OFFSET, 751 0, 752 16); 753 } 754 } 755 } 756 } 757 758 void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, 759 u32 bth0, u32 bth2, int middle, 760 struct hfi1_pkt_state *ps) 761 { 762 struct hfi1_qp_priv *priv = qp->priv; 763 struct hfi1_ibport *ibp = ps->ibp; 764 u16 lrh0; 765 u32 nwords; 766 u32 extra_bytes; 767 u32 bth1; 768 769 /* Construct the header. */ 770 extra_bytes = -ps->s_txreq->s_cur_size & 3; 771 nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2; 772 lrh0 = HFI1_LRH_BTH; 773 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { 774 qp->s_hdrwords += hfi1_make_grh(ibp, 775 &ps->s_txreq->phdr.hdr.u.l.grh, 776 &qp->remote_ah_attr.grh, 777 qp->s_hdrwords, nwords); 778 lrh0 = HFI1_LRH_GRH; 779 middle = 0; 780 } 781 lrh0 |= (priv->s_sc & 0xf) << 12 | (qp->remote_ah_attr.sl & 0xf) << 4; 782 /* 783 * reset s_ahg/AHG fields 784 * 785 * This insures that the ahgentry/ahgcount 786 * are at a non-AHG default to protect 787 * build_verbs_tx_desc() from using 788 * an include ahgidx. 789 * 790 * build_ahg() will modify as appropriate 791 * to use the AHG feature. 792 */ 793 priv->s_ahg->tx_flags = 0; 794 priv->s_ahg->ahgcount = 0; 795 priv->s_ahg->ahgidx = 0; 796 if (qp->s_mig_state == IB_MIG_MIGRATED) 797 bth0 |= IB_BTH_MIG_REQ; 798 else 799 middle = 0; 800 if (middle) 801 build_ahg(qp, bth2); 802 else 803 qp->s_flags &= ~RVT_S_AHG_VALID; 804 ps->s_txreq->phdr.hdr.lrh[0] = cpu_to_be16(lrh0); 805 ps->s_txreq->phdr.hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 806 ps->s_txreq->phdr.hdr.lrh[2] = 807 cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); 808 ps->s_txreq->phdr.hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | 809 qp->remote_ah_attr.src_path_bits); 810 bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); 811 bth0 |= extra_bytes << 20; 812 ohdr->bth[0] = cpu_to_be32(bth0); 813 bth1 = qp->remote_qpn; 814 if (qp->s_flags & RVT_S_ECN) { 815 qp->s_flags &= ~RVT_S_ECN; 816 /* we recently received a FECN, so return a BECN */ 817 bth1 |= (HFI1_BECN_MASK << HFI1_BECN_SHIFT); 818 } 819 ohdr->bth[1] = cpu_to_be32(bth1); 820 ohdr->bth[2] = cpu_to_be32(bth2); 821 } 822 823 /* when sending, force a reschedule every one of these periods */ 824 #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ 825 826 void _hfi1_do_send(struct work_struct *work) 827 { 828 struct iowait *wait = container_of(work, struct iowait, iowork); 829 struct rvt_qp *qp = iowait_to_qp(wait); 830 831 hfi1_do_send(qp); 832 } 833 834 /** 835 * hfi1_do_send - perform a send on a QP 836 * @work: contains a pointer to the QP 837 * 838 * Process entries in the send work queue until credit or queue is 839 * exhausted. Only allow one CPU to send a packet per QP. 840 * Otherwise, two threads could send packets out of order. 841 */ 842 void hfi1_do_send(struct rvt_qp *qp) 843 { 844 struct hfi1_pkt_state ps; 845 struct hfi1_qp_priv *priv = qp->priv; 846 int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 847 unsigned long timeout; 848 unsigned long timeout_int; 849 int cpu; 850 851 ps.dev = to_idev(qp->ibqp.device); 852 ps.ibp = to_iport(qp->ibqp.device, qp->port_num); 853 ps.ppd = ppd_from_ibp(ps.ibp); 854 855 switch (qp->ibqp.qp_type) { 856 case IB_QPT_RC: 857 if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc 858 ) - 1)) == 859 ps.ppd->lid)) { 860 ruc_loopback(qp); 861 return; 862 } 863 make_req = hfi1_make_rc_req; 864 timeout_int = (qp->timeout_jiffies); 865 break; 866 case IB_QPT_UC: 867 if (!loopback && ((qp->remote_ah_attr.dlid & ~((1 << ps.ppd->lmc 868 ) - 1)) == 869 ps.ppd->lid)) { 870 ruc_loopback(qp); 871 return; 872 } 873 make_req = hfi1_make_uc_req; 874 timeout_int = SEND_RESCHED_TIMEOUT; 875 break; 876 default: 877 make_req = hfi1_make_ud_req; 878 timeout_int = SEND_RESCHED_TIMEOUT; 879 } 880 881 spin_lock_irqsave(&qp->s_lock, ps.flags); 882 883 /* Return if we are already busy processing a work request. */ 884 if (!hfi1_send_ok(qp)) { 885 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 886 return; 887 } 888 889 qp->s_flags |= RVT_S_BUSY; 890 891 timeout = jiffies + (timeout_int) / 8; 892 cpu = priv->s_sde ? priv->s_sde->cpu : 893 cpumask_first(cpumask_of_node(ps.ppd->dd->node)); 894 /* insure a pre-built packet is handled */ 895 ps.s_txreq = get_waiting_verbs_txreq(qp); 896 do { 897 /* Check for a constructed packet to be sent. */ 898 if (qp->s_hdrwords != 0) { 899 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 900 /* 901 * If the packet cannot be sent now, return and 902 * the send engine will be woken up later. 903 */ 904 if (hfi1_verbs_send(qp, &ps)) 905 return; 906 /* Record that s_ahg is empty. */ 907 qp->s_hdrwords = 0; 908 /* allow other tasks to run */ 909 if (unlikely(time_after(jiffies, timeout))) { 910 if (workqueue_congested(cpu, 911 ps.ppd->hfi1_wq)) { 912 spin_lock_irqsave( 913 &qp->s_lock, 914 ps.flags); 915 qp->s_flags &= ~RVT_S_BUSY; 916 hfi1_schedule_send(qp); 917 spin_unlock_irqrestore( 918 &qp->s_lock, 919 ps.flags); 920 this_cpu_inc( 921 *ps.ppd->dd->send_schedule); 922 return; 923 } 924 if (!irqs_disabled()) { 925 cond_resched(); 926 this_cpu_inc( 927 *ps.ppd->dd->send_schedule); 928 } 929 timeout = jiffies + (timeout_int) / 8; 930 } 931 spin_lock_irqsave(&qp->s_lock, ps.flags); 932 } 933 } while (make_req(qp, &ps)); 934 935 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 936 } 937 938 /* 939 * This should be called with s_lock held. 940 */ 941 void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 942 enum ib_wc_status status) 943 { 944 u32 old_last, last; 945 946 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 947 return; 948 949 last = qp->s_last; 950 old_last = last; 951 if (++last >= qp->s_size) 952 last = 0; 953 qp->s_last = last; 954 /* See post_send() */ 955 barrier(); 956 rvt_put_swqe(wqe); 957 if (qp->ibqp.qp_type == IB_QPT_UD || 958 qp->ibqp.qp_type == IB_QPT_SMI || 959 qp->ibqp.qp_type == IB_QPT_GSI) 960 atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); 961 962 rvt_qp_swqe_complete(qp, wqe, status); 963 964 if (qp->s_acked == old_last) 965 qp->s_acked = last; 966 if (qp->s_cur == old_last) 967 qp->s_cur = last; 968 if (qp->s_tail == old_last) 969 qp->s_tail = last; 970 if (qp->state == IB_QPS_SQD && last == qp->s_cur) 971 qp->s_draining = 0; 972 } 973