1 /* 2 * Copyright(c) 2015 - 2018 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/spinlock.h> 49 50 #include "hfi.h" 51 #include "mad.h" 52 #include "qp.h" 53 #include "verbs_txreq.h" 54 #include "trace.h" 55 56 static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) 57 { 58 return (gid->global.interface_id == id && 59 (gid->global.subnet_prefix == gid_prefix || 60 gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX)); 61 } 62 63 /* 64 * 65 * This should be called with the QP r_lock held. 66 * 67 * The s_lock will be acquired around the hfi1_migrate_qp() call. 68 */ 69 int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet) 70 { 71 __be64 guid; 72 unsigned long flags; 73 struct rvt_qp *qp = packet->qp; 74 u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; 75 u32 dlid = packet->dlid; 76 u32 slid = packet->slid; 77 u32 sl = packet->sl; 78 bool migrated = packet->migrated; 79 u16 pkey = packet->pkey; 80 81 if (qp->s_mig_state == IB_MIG_ARMED && migrated) { 82 if (!packet->grh) { 83 if ((rdma_ah_get_ah_flags(&qp->alt_ah_attr) & 84 IB_AH_GRH) && 85 (packet->etype != RHF_RCV_TYPE_BYPASS)) 86 return 1; 87 } else { 88 const struct ib_global_route *grh; 89 90 if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) & 91 IB_AH_GRH)) 92 return 1; 93 grh = rdma_ah_read_grh(&qp->alt_ah_attr); 94 guid = get_sguid(ibp, grh->sgid_index); 95 if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, 96 guid)) 97 return 1; 98 if (!gid_ok( 99 &packet->grh->sgid, 100 grh->dgid.global.subnet_prefix, 101 grh->dgid.global.interface_id)) 102 return 1; 103 } 104 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), pkey, 105 sc5, slid))) { 106 hfi1_bad_pkey(ibp, pkey, sl, 0, qp->ibqp.qp_num, 107 slid, dlid); 108 return 1; 109 } 110 /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */ 111 if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) || 112 ppd_from_ibp(ibp)->port != 113 rdma_ah_get_port_num(&qp->alt_ah_attr)) 114 return 1; 115 spin_lock_irqsave(&qp->s_lock, flags); 116 hfi1_migrate_qp(qp); 117 spin_unlock_irqrestore(&qp->s_lock, flags); 118 } else { 119 if (!packet->grh) { 120 if ((rdma_ah_get_ah_flags(&qp->remote_ah_attr) & 121 IB_AH_GRH) && 122 (packet->etype != RHF_RCV_TYPE_BYPASS)) 123 return 1; 124 } else { 125 const struct ib_global_route *grh; 126 127 if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & 128 IB_AH_GRH)) 129 return 1; 130 grh = rdma_ah_read_grh(&qp->remote_ah_attr); 131 guid = get_sguid(ibp, grh->sgid_index); 132 if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix, 133 guid)) 134 return 1; 135 if (!gid_ok( 136 &packet->grh->sgid, 137 grh->dgid.global.subnet_prefix, 138 grh->dgid.global.interface_id)) 139 return 1; 140 } 141 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), pkey, 142 sc5, slid))) { 143 hfi1_bad_pkey(ibp, pkey, sl, 0, qp->ibqp.qp_num, 144 slid, dlid); 145 return 1; 146 } 147 /* Validate the SLID. See Ch. 9.6.1.5 */ 148 if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) || 149 ppd_from_ibp(ibp)->port != qp->port_num) 150 return 1; 151 if (qp->s_mig_state == IB_MIG_REARM && !migrated) 152 qp->s_mig_state = IB_MIG_ARMED; 153 } 154 155 return 0; 156 } 157 158 /** 159 * ruc_loopback - handle UC and RC loopback requests 160 * @sqp: the sending QP 161 * 162 * This is called from hfi1_do_send() to 163 * forward a WQE addressed to the same HFI. 164 * Note that although we are single threaded due to the send engine, we still 165 * have to protect against post_send(). We don't have to worry about 166 * receive interrupts since this is a connected protocol and all packets 167 * will pass through here. 168 */ 169 static void ruc_loopback(struct rvt_qp *sqp) 170 { 171 struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num); 172 struct rvt_qp *qp; 173 struct rvt_swqe *wqe; 174 struct rvt_sge *sge; 175 unsigned long flags; 176 struct ib_wc wc; 177 u64 sdata; 178 atomic64_t *maddr; 179 enum ib_wc_status send_status; 180 bool release; 181 int ret; 182 bool copy_last = false; 183 int local_ops = 0; 184 185 rcu_read_lock(); 186 187 /* 188 * Note that we check the responder QP state after 189 * checking the requester's state. 190 */ 191 qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, 192 sqp->remote_qpn); 193 194 spin_lock_irqsave(&sqp->s_lock, flags); 195 196 /* Return if we are already busy processing a work request. */ 197 if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) || 198 !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 199 goto unlock; 200 201 sqp->s_flags |= RVT_S_BUSY; 202 203 again: 204 if (sqp->s_last == READ_ONCE(sqp->s_head)) 205 goto clr_busy; 206 wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); 207 208 /* Return if it is not OK to start a new work request. */ 209 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) { 210 if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND)) 211 goto clr_busy; 212 /* We are in the error state, flush the work request. */ 213 send_status = IB_WC_WR_FLUSH_ERR; 214 goto flush_send; 215 } 216 217 /* 218 * We can rely on the entry not changing without the s_lock 219 * being held until we update s_last. 220 * We increment s_cur to indicate s_last is in progress. 221 */ 222 if (sqp->s_last == sqp->s_cur) { 223 if (++sqp->s_cur >= sqp->s_size) 224 sqp->s_cur = 0; 225 } 226 spin_unlock_irqrestore(&sqp->s_lock, flags); 227 228 if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) || 229 qp->ibqp.qp_type != sqp->ibqp.qp_type) { 230 ibp->rvp.n_pkt_drops++; 231 /* 232 * For RC, the requester would timeout and retry so 233 * shortcut the timeouts and just signal too many retries. 234 */ 235 if (sqp->ibqp.qp_type == IB_QPT_RC) 236 send_status = IB_WC_RETRY_EXC_ERR; 237 else 238 send_status = IB_WC_SUCCESS; 239 goto serr; 240 } 241 242 memset(&wc, 0, sizeof(wc)); 243 send_status = IB_WC_SUCCESS; 244 245 release = true; 246 sqp->s_sge.sge = wqe->sg_list[0]; 247 sqp->s_sge.sg_list = wqe->sg_list + 1; 248 sqp->s_sge.num_sge = wqe->wr.num_sge; 249 sqp->s_len = wqe->length; 250 switch (wqe->wr.opcode) { 251 case IB_WR_REG_MR: 252 goto send_comp; 253 254 case IB_WR_LOCAL_INV: 255 if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) { 256 if (rvt_invalidate_rkey(sqp, 257 wqe->wr.ex.invalidate_rkey)) 258 send_status = IB_WC_LOC_PROT_ERR; 259 local_ops = 1; 260 } 261 goto send_comp; 262 263 case IB_WR_SEND_WITH_INV: 264 if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) { 265 wc.wc_flags = IB_WC_WITH_INVALIDATE; 266 wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey; 267 } 268 goto send; 269 270 case IB_WR_SEND_WITH_IMM: 271 wc.wc_flags = IB_WC_WITH_IMM; 272 wc.ex.imm_data = wqe->wr.ex.imm_data; 273 /* FALLTHROUGH */ 274 case IB_WR_SEND: 275 send: 276 ret = rvt_get_rwqe(qp, false); 277 if (ret < 0) 278 goto op_err; 279 if (!ret) 280 goto rnr_nak; 281 break; 282 283 case IB_WR_RDMA_WRITE_WITH_IMM: 284 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 285 goto inv_err; 286 wc.wc_flags = IB_WC_WITH_IMM; 287 wc.ex.imm_data = wqe->wr.ex.imm_data; 288 ret = rvt_get_rwqe(qp, true); 289 if (ret < 0) 290 goto op_err; 291 if (!ret) 292 goto rnr_nak; 293 /* skip copy_last set and qp_access_flags recheck */ 294 goto do_write; 295 case IB_WR_RDMA_WRITE: 296 copy_last = rvt_is_user_qp(qp); 297 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE))) 298 goto inv_err; 299 do_write: 300 if (wqe->length == 0) 301 break; 302 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length, 303 wqe->rdma_wr.remote_addr, 304 wqe->rdma_wr.rkey, 305 IB_ACCESS_REMOTE_WRITE))) 306 goto acc_err; 307 qp->r_sge.sg_list = NULL; 308 qp->r_sge.num_sge = 1; 309 qp->r_sge.total_len = wqe->length; 310 break; 311 312 case IB_WR_RDMA_READ: 313 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ))) 314 goto inv_err; 315 if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length, 316 wqe->rdma_wr.remote_addr, 317 wqe->rdma_wr.rkey, 318 IB_ACCESS_REMOTE_READ))) 319 goto acc_err; 320 release = false; 321 sqp->s_sge.sg_list = NULL; 322 sqp->s_sge.num_sge = 1; 323 qp->r_sge.sge = wqe->sg_list[0]; 324 qp->r_sge.sg_list = wqe->sg_list + 1; 325 qp->r_sge.num_sge = wqe->wr.num_sge; 326 qp->r_sge.total_len = wqe->length; 327 break; 328 329 case IB_WR_ATOMIC_CMP_AND_SWP: 330 case IB_WR_ATOMIC_FETCH_AND_ADD: 331 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) 332 goto inv_err; 333 if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), 334 wqe->atomic_wr.remote_addr, 335 wqe->atomic_wr.rkey, 336 IB_ACCESS_REMOTE_ATOMIC))) 337 goto acc_err; 338 /* Perform atomic OP and save result. */ 339 maddr = (atomic64_t *)qp->r_sge.sge.vaddr; 340 sdata = wqe->atomic_wr.compare_add; 341 *(u64 *)sqp->s_sge.sge.vaddr = 342 (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ? 343 (u64)atomic64_add_return(sdata, maddr) - sdata : 344 (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, 345 sdata, wqe->atomic_wr.swap); 346 rvt_put_mr(qp->r_sge.sge.mr); 347 qp->r_sge.num_sge = 0; 348 goto send_comp; 349 350 default: 351 send_status = IB_WC_LOC_QP_OP_ERR; 352 goto serr; 353 } 354 355 sge = &sqp->s_sge.sge; 356 while (sqp->s_len) { 357 u32 len = sqp->s_len; 358 359 if (len > sge->length) 360 len = sge->length; 361 if (len > sge->sge_length) 362 len = sge->sge_length; 363 WARN_ON_ONCE(len == 0); 364 hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last); 365 sge->vaddr += len; 366 sge->length -= len; 367 sge->sge_length -= len; 368 if (sge->sge_length == 0) { 369 if (!release) 370 rvt_put_mr(sge->mr); 371 if (--sqp->s_sge.num_sge) 372 *sge = *sqp->s_sge.sg_list++; 373 } else if (sge->length == 0 && sge->mr->lkey) { 374 if (++sge->n >= RVT_SEGSZ) { 375 if (++sge->m >= sge->mr->mapsz) 376 break; 377 sge->n = 0; 378 } 379 sge->vaddr = 380 sge->mr->map[sge->m]->segs[sge->n].vaddr; 381 sge->length = 382 sge->mr->map[sge->m]->segs[sge->n].length; 383 } 384 sqp->s_len -= len; 385 } 386 if (release) 387 rvt_put_ss(&qp->r_sge); 388 389 if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) 390 goto send_comp; 391 392 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) 393 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 394 else 395 wc.opcode = IB_WC_RECV; 396 wc.wr_id = qp->r_wr_id; 397 wc.status = IB_WC_SUCCESS; 398 wc.byte_len = wqe->length; 399 wc.qp = &qp->ibqp; 400 wc.src_qp = qp->remote_qpn; 401 wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; 402 wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); 403 wc.port_num = 1; 404 /* Signal completion event if the solicited bit is set. */ 405 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 406 wqe->wr.send_flags & IB_SEND_SOLICITED); 407 408 send_comp: 409 spin_lock_irqsave(&sqp->s_lock, flags); 410 ibp->rvp.n_loop_pkts++; 411 flush_send: 412 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 413 hfi1_send_complete(sqp, wqe, send_status); 414 if (local_ops) { 415 atomic_dec(&sqp->local_ops_pending); 416 local_ops = 0; 417 } 418 goto again; 419 420 rnr_nak: 421 /* Handle RNR NAK */ 422 if (qp->ibqp.qp_type == IB_QPT_UC) 423 goto send_comp; 424 ibp->rvp.n_rnr_naks++; 425 /* 426 * Note: we don't need the s_lock held since the BUSY flag 427 * makes this single threaded. 428 */ 429 if (sqp->s_rnr_retry == 0) { 430 send_status = IB_WC_RNR_RETRY_EXC_ERR; 431 goto serr; 432 } 433 if (sqp->s_rnr_retry_cnt < 7) 434 sqp->s_rnr_retry--; 435 spin_lock_irqsave(&sqp->s_lock, flags); 436 if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK)) 437 goto clr_busy; 438 rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer << 439 IB_AETH_CREDIT_SHIFT); 440 goto clr_busy; 441 442 op_err: 443 send_status = IB_WC_REM_OP_ERR; 444 wc.status = IB_WC_LOC_QP_OP_ERR; 445 goto err; 446 447 inv_err: 448 send_status = IB_WC_REM_INV_REQ_ERR; 449 wc.status = IB_WC_LOC_QP_OP_ERR; 450 goto err; 451 452 acc_err: 453 send_status = IB_WC_REM_ACCESS_ERR; 454 wc.status = IB_WC_LOC_PROT_ERR; 455 err: 456 /* responder goes to error state */ 457 rvt_rc_error(qp, wc.status); 458 459 serr: 460 spin_lock_irqsave(&sqp->s_lock, flags); 461 hfi1_send_complete(sqp, wqe, send_status); 462 if (sqp->ibqp.qp_type == IB_QPT_RC) { 463 int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); 464 465 sqp->s_flags &= ~RVT_S_BUSY; 466 spin_unlock_irqrestore(&sqp->s_lock, flags); 467 if (lastwqe) { 468 struct ib_event ev; 469 470 ev.device = sqp->ibqp.device; 471 ev.element.qp = &sqp->ibqp; 472 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 473 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context); 474 } 475 goto done; 476 } 477 clr_busy: 478 sqp->s_flags &= ~RVT_S_BUSY; 479 unlock: 480 spin_unlock_irqrestore(&sqp->s_lock, flags); 481 done: 482 rcu_read_unlock(); 483 } 484 485 /** 486 * hfi1_make_grh - construct a GRH header 487 * @ibp: a pointer to the IB port 488 * @hdr: a pointer to the GRH header being constructed 489 * @grh: the global route address to send to 490 * @hwords: size of header after grh being sent in dwords 491 * @nwords: the number of 32 bit words of data being sent 492 * 493 * Return the size of the header in 32 bit words. 494 */ 495 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, 496 const struct ib_global_route *grh, u32 hwords, u32 nwords) 497 { 498 hdr->version_tclass_flow = 499 cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) | 500 (grh->traffic_class << IB_GRH_TCLASS_SHIFT) | 501 (grh->flow_label << IB_GRH_FLOW_SHIFT)); 502 hdr->paylen = cpu_to_be16((hwords + nwords) << 2); 503 /* next_hdr is defined by C8-7 in ch. 8.4.1 */ 504 hdr->next_hdr = IB_GRH_NEXT_HDR; 505 hdr->hop_limit = grh->hop_limit; 506 /* The SGID is 32-bit aligned. */ 507 hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix; 508 hdr->sgid.global.interface_id = 509 grh->sgid_index < HFI1_GUIDS_PER_PORT ? 510 get_sguid(ibp, grh->sgid_index) : 511 get_sguid(ibp, HFI1_PORT_GUID_INDEX); 512 hdr->dgid = grh->dgid; 513 514 /* GRH header size in 32-bit words. */ 515 return sizeof(struct ib_grh) / sizeof(u32); 516 } 517 518 #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, \ 519 hdr.ibh.u.oth.bth[2]) / 4) 520 521 /** 522 * build_ahg - create ahg in s_ahg 523 * @qp: a pointer to QP 524 * @npsn: the next PSN for the request/response 525 * 526 * This routine handles the AHG by allocating an ahg entry and causing the 527 * copy of the first middle. 528 * 529 * Subsequent middles use the copied entry, editing the 530 * PSN with 1 or 2 edits. 531 */ 532 static inline void build_ahg(struct rvt_qp *qp, u32 npsn) 533 { 534 struct hfi1_qp_priv *priv = qp->priv; 535 536 if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR)) 537 clear_ahg(qp); 538 if (!(qp->s_flags & HFI1_S_AHG_VALID)) { 539 /* first middle that needs copy */ 540 if (qp->s_ahgidx < 0) 541 qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde); 542 if (qp->s_ahgidx >= 0) { 543 qp->s_ahgpsn = npsn; 544 priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY; 545 /* save to protect a change in another thread */ 546 priv->s_ahg->ahgidx = qp->s_ahgidx; 547 qp->s_flags |= HFI1_S_AHG_VALID; 548 } 549 } else { 550 /* subsequent middle after valid */ 551 if (qp->s_ahgidx >= 0) { 552 priv->s_ahg->tx_flags |= SDMA_TXREQ_F_USE_AHG; 553 priv->s_ahg->ahgidx = qp->s_ahgidx; 554 priv->s_ahg->ahgcount++; 555 priv->s_ahg->ahgdesc[0] = 556 sdma_build_ahg_descriptor( 557 (__force u16)cpu_to_be16((u16)npsn), 558 BTH2_OFFSET, 559 16, 560 16); 561 if ((npsn & 0xffff0000) != 562 (qp->s_ahgpsn & 0xffff0000)) { 563 priv->s_ahg->ahgcount++; 564 priv->s_ahg->ahgdesc[1] = 565 sdma_build_ahg_descriptor( 566 (__force u16)cpu_to_be16( 567 (u16)(npsn >> 16)), 568 BTH2_OFFSET, 569 0, 570 16); 571 } 572 } 573 } 574 } 575 576 static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, 577 struct ib_other_headers *ohdr, 578 u32 bth0, u32 bth1, u32 bth2) 579 { 580 bth1 |= qp->remote_qpn; 581 ohdr->bth[0] = cpu_to_be32(bth0); 582 ohdr->bth[1] = cpu_to_be32(bth1); 583 ohdr->bth[2] = cpu_to_be32(bth2); 584 } 585 586 /** 587 * hfi1_make_ruc_header_16B - build a 16B header 588 * @qp: the queue pair 589 * @ohdr: a pointer to the destination header memory 590 * @bth0: bth0 passed in from the RC/UC builder 591 * @bth2: bth2 passed in from the RC/UC builder 592 * @middle: non zero implies indicates ahg "could" be used 593 * @ps: the current packet state 594 * 595 * This routine may disarm ahg under these situations: 596 * - packet needs a GRH 597 * - BECN needed 598 * - migration state not IB_MIG_MIGRATED 599 */ 600 static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, 601 struct ib_other_headers *ohdr, 602 u32 bth0, u32 bth2, int middle, 603 struct hfi1_pkt_state *ps) 604 { 605 struct hfi1_qp_priv *priv = qp->priv; 606 struct hfi1_ibport *ibp = ps->ibp; 607 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 608 u32 bth1 = 0; 609 u32 slid; 610 u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); 611 u8 l4 = OPA_16B_L4_IB_LOCAL; 612 u8 extra_bytes = hfi1_get_16b_padding( 613 (ps->s_txreq->hdr_dwords << 2), 614 ps->s_txreq->s_cur_size); 615 u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + 616 extra_bytes + SIZE_OF_LT) >> 2); 617 bool becn = false; 618 619 if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH) && 620 hfi1_check_mcast(rdma_ah_get_dlid(&qp->remote_ah_attr))) { 621 struct ib_grh *grh; 622 struct ib_global_route *grd = 623 rdma_ah_retrieve_grh(&qp->remote_ah_attr); 624 /* 625 * Ensure OPA GIDs are transformed to IB gids 626 * before creating the GRH. 627 */ 628 if (grd->sgid_index == OPA_GID_INDEX) 629 grd->sgid_index = 0; 630 grh = &ps->s_txreq->phdr.hdr.opah.u.l.grh; 631 l4 = OPA_16B_L4_IB_GLOBAL; 632 ps->s_txreq->hdr_dwords += 633 hfi1_make_grh(ibp, grh, grd, 634 ps->s_txreq->hdr_dwords - LRH_16B_DWORDS, 635 nwords); 636 middle = 0; 637 } 638 639 if (qp->s_mig_state == IB_MIG_MIGRATED) 640 bth1 |= OPA_BTH_MIG_REQ; 641 else 642 middle = 0; 643 644 if (qp->s_flags & RVT_S_ECN) { 645 qp->s_flags &= ~RVT_S_ECN; 646 /* we recently received a FECN, so return a BECN */ 647 becn = true; 648 middle = 0; 649 } 650 if (middle) 651 build_ahg(qp, bth2); 652 else 653 qp->s_flags &= ~HFI1_S_AHG_VALID; 654 655 bth0 |= pkey; 656 bth0 |= extra_bytes << 20; 657 hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); 658 659 if (!ppd->lid) 660 slid = be32_to_cpu(OPA_LID_PERMISSIVE); 661 else 662 slid = ppd->lid | 663 (rdma_ah_get_path_bits(&qp->remote_ah_attr) & 664 ((1 << ppd->lmc) - 1)); 665 666 hfi1_make_16b_hdr(&ps->s_txreq->phdr.hdr.opah, 667 slid, 668 opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 669 16B), 670 (ps->s_txreq->hdr_dwords + nwords) >> 1, 671 pkey, becn, 0, l4, priv->s_sc); 672 } 673 674 /** 675 * hfi1_make_ruc_header_9B - build a 9B header 676 * @qp: the queue pair 677 * @ohdr: a pointer to the destination header memory 678 * @bth0: bth0 passed in from the RC/UC builder 679 * @bth2: bth2 passed in from the RC/UC builder 680 * @middle: non zero implies indicates ahg "could" be used 681 * @ps: the current packet state 682 * 683 * This routine may disarm ahg under these situations: 684 * - packet needs a GRH 685 * - BECN needed 686 * - migration state not IB_MIG_MIGRATED 687 */ 688 static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, 689 struct ib_other_headers *ohdr, 690 u32 bth0, u32 bth2, int middle, 691 struct hfi1_pkt_state *ps) 692 { 693 struct hfi1_qp_priv *priv = qp->priv; 694 struct hfi1_ibport *ibp = ps->ibp; 695 u32 bth1 = 0; 696 u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); 697 u16 lrh0 = HFI1_LRH_BTH; 698 u8 extra_bytes = -ps->s_txreq->s_cur_size & 3; 699 u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + 700 extra_bytes) >> 2); 701 702 if (unlikely(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) { 703 struct ib_grh *grh = &ps->s_txreq->phdr.hdr.ibh.u.l.grh; 704 705 lrh0 = HFI1_LRH_GRH; 706 ps->s_txreq->hdr_dwords += 707 hfi1_make_grh(ibp, grh, 708 rdma_ah_read_grh(&qp->remote_ah_attr), 709 ps->s_txreq->hdr_dwords - LRH_9B_DWORDS, 710 nwords); 711 middle = 0; 712 } 713 lrh0 |= (priv->s_sc & 0xf) << 12 | 714 (rdma_ah_get_sl(&qp->remote_ah_attr) & 0xf) << 4; 715 716 if (qp->s_mig_state == IB_MIG_MIGRATED) 717 bth0 |= IB_BTH_MIG_REQ; 718 else 719 middle = 0; 720 721 if (qp->s_flags & RVT_S_ECN) { 722 qp->s_flags &= ~RVT_S_ECN; 723 /* we recently received a FECN, so return a BECN */ 724 bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); 725 middle = 0; 726 } 727 if (middle) 728 build_ahg(qp, bth2); 729 else 730 qp->s_flags &= ~HFI1_S_AHG_VALID; 731 732 bth0 |= pkey; 733 bth0 |= extra_bytes << 20; 734 hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); 735 hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, 736 lrh0, 737 ps->s_txreq->hdr_dwords + nwords, 738 opa_get_lid(rdma_ah_get_dlid(&qp->remote_ah_attr), 9B), 739 ppd_from_ibp(ibp)->lid | 740 rdma_ah_get_path_bits(&qp->remote_ah_attr)); 741 } 742 743 typedef void (*hfi1_make_ruc_hdr)(struct rvt_qp *qp, 744 struct ib_other_headers *ohdr, 745 u32 bth0, u32 bth2, int middle, 746 struct hfi1_pkt_state *ps); 747 748 /* We support only two types - 9B and 16B for now */ 749 static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl[2] = { 750 [HFI1_PKT_TYPE_9B] = &hfi1_make_ruc_header_9B, 751 [HFI1_PKT_TYPE_16B] = &hfi1_make_ruc_header_16B 752 }; 753 754 void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, 755 u32 bth0, u32 bth2, int middle, 756 struct hfi1_pkt_state *ps) 757 { 758 struct hfi1_qp_priv *priv = qp->priv; 759 760 /* 761 * reset s_ahg/AHG fields 762 * 763 * This insures that the ahgentry/ahgcount 764 * are at a non-AHG default to protect 765 * build_verbs_tx_desc() from using 766 * an include ahgidx. 767 * 768 * build_ahg() will modify as appropriate 769 * to use the AHG feature. 770 */ 771 priv->s_ahg->tx_flags = 0; 772 priv->s_ahg->ahgcount = 0; 773 priv->s_ahg->ahgidx = 0; 774 775 /* Make the appropriate header */ 776 hfi1_ruc_header_tbl[priv->hdr_type](qp, ohdr, bth0, bth2, middle, ps); 777 } 778 779 /* when sending, force a reschedule every one of these periods */ 780 #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */ 781 782 /** 783 * schedule_send_yield - test for a yield required for QP send engine 784 * @timeout: Final time for timeout slice for jiffies 785 * @qp: a pointer to QP 786 * @ps: a pointer to a structure with commonly lookup values for 787 * the the send engine progress 788 * 789 * This routine checks if the time slice for the QP has expired 790 * for RC QPs, if so an additional work entry is queued. At this 791 * point, other QPs have an opportunity to be scheduled. It 792 * returns true if a yield is required, otherwise, false 793 * is returned. 794 */ 795 static bool schedule_send_yield(struct rvt_qp *qp, 796 struct hfi1_pkt_state *ps) 797 { 798 ps->pkts_sent = true; 799 800 if (unlikely(time_after(jiffies, ps->timeout))) { 801 if (!ps->in_thread || 802 workqueue_congested(ps->cpu, ps->ppd->hfi1_wq)) { 803 spin_lock_irqsave(&qp->s_lock, ps->flags); 804 qp->s_flags &= ~RVT_S_BUSY; 805 hfi1_schedule_send(qp); 806 spin_unlock_irqrestore(&qp->s_lock, ps->flags); 807 this_cpu_inc(*ps->ppd->dd->send_schedule); 808 trace_hfi1_rc_expired_time_slice(qp, true); 809 return true; 810 } 811 812 cond_resched(); 813 this_cpu_inc(*ps->ppd->dd->send_schedule); 814 ps->timeout = jiffies + ps->timeout_int; 815 } 816 817 trace_hfi1_rc_expired_time_slice(qp, false); 818 return false; 819 } 820 821 void hfi1_do_send_from_rvt(struct rvt_qp *qp) 822 { 823 hfi1_do_send(qp, false); 824 } 825 826 void _hfi1_do_send(struct work_struct *work) 827 { 828 struct iowait *wait = container_of(work, struct iowait, iowork); 829 struct rvt_qp *qp = iowait_to_qp(wait); 830 831 hfi1_do_send(qp, true); 832 } 833 834 /** 835 * hfi1_do_send - perform a send on a QP 836 * @work: contains a pointer to the QP 837 * @in_thread: true if in a workqueue thread 838 * 839 * Process entries in the send work queue until credit or queue is 840 * exhausted. Only allow one CPU to send a packet per QP. 841 * Otherwise, two threads could send packets out of order. 842 */ 843 void hfi1_do_send(struct rvt_qp *qp, bool in_thread) 844 { 845 struct hfi1_pkt_state ps; 846 struct hfi1_qp_priv *priv = qp->priv; 847 int (*make_req)(struct rvt_qp *qp, struct hfi1_pkt_state *ps); 848 849 ps.dev = to_idev(qp->ibqp.device); 850 ps.ibp = to_iport(qp->ibqp.device, qp->port_num); 851 ps.ppd = ppd_from_ibp(ps.ibp); 852 ps.in_thread = in_thread; 853 854 trace_hfi1_rc_do_send(qp, in_thread); 855 856 switch (qp->ibqp.qp_type) { 857 case IB_QPT_RC: 858 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & 859 ~((1 << ps.ppd->lmc) - 1)) == 860 ps.ppd->lid)) { 861 ruc_loopback(qp); 862 return; 863 } 864 make_req = hfi1_make_rc_req; 865 ps.timeout_int = qp->timeout_jiffies; 866 break; 867 case IB_QPT_UC: 868 if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) & 869 ~((1 << ps.ppd->lmc) - 1)) == 870 ps.ppd->lid)) { 871 ruc_loopback(qp); 872 return; 873 } 874 make_req = hfi1_make_uc_req; 875 ps.timeout_int = SEND_RESCHED_TIMEOUT; 876 break; 877 default: 878 make_req = hfi1_make_ud_req; 879 ps.timeout_int = SEND_RESCHED_TIMEOUT; 880 } 881 882 spin_lock_irqsave(&qp->s_lock, ps.flags); 883 884 /* Return if we are already busy processing a work request. */ 885 if (!hfi1_send_ok(qp)) { 886 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 887 return; 888 } 889 890 qp->s_flags |= RVT_S_BUSY; 891 892 ps.timeout_int = ps.timeout_int / 8; 893 ps.timeout = jiffies + ps.timeout_int; 894 ps.cpu = priv->s_sde ? priv->s_sde->cpu : 895 cpumask_first(cpumask_of_node(ps.ppd->dd->node)); 896 ps.pkts_sent = false; 897 898 /* insure a pre-built packet is handled */ 899 ps.s_txreq = get_waiting_verbs_txreq(qp); 900 do { 901 /* Check for a constructed packet to be sent. */ 902 if (ps.s_txreq) { 903 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 904 /* 905 * If the packet cannot be sent now, return and 906 * the send engine will be woken up later. 907 */ 908 if (hfi1_verbs_send(qp, &ps)) 909 return; 910 /* allow other tasks to run */ 911 if (schedule_send_yield(qp, &ps)) 912 return; 913 914 spin_lock_irqsave(&qp->s_lock, ps.flags); 915 } 916 } while (make_req(qp, &ps)); 917 iowait_starve_clear(ps.pkts_sent, &priv->s_iowait); 918 spin_unlock_irqrestore(&qp->s_lock, ps.flags); 919 } 920 921 /* 922 * This should be called with s_lock held. 923 */ 924 void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, 925 enum ib_wc_status status) 926 { 927 u32 old_last, last; 928 929 if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) 930 return; 931 932 last = qp->s_last; 933 old_last = last; 934 trace_hfi1_qp_send_completion(qp, wqe, last); 935 if (++last >= qp->s_size) 936 last = 0; 937 trace_hfi1_qp_send_completion(qp, wqe, last); 938 qp->s_last = last; 939 /* See post_send() */ 940 barrier(); 941 rvt_put_swqe(wqe); 942 if (qp->ibqp.qp_type == IB_QPT_UD || 943 qp->ibqp.qp_type == IB_QPT_SMI || 944 qp->ibqp.qp_type == IB_QPT_GSI) 945 atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); 946 947 rvt_qp_swqe_complete(qp, 948 wqe, 949 ib_hfi1_wc_opcode[wqe->wr.opcode], 950 status); 951 952 if (qp->s_acked == old_last) 953 qp->s_acked = last; 954 if (qp->s_cur == old_last) 955 qp->s_cur = last; 956 if (qp->s_tail == old_last) 957 qp->s_tail = last; 958 if (qp->state == IB_QPS_SQD && last == qp->s_cur) 959 qp->s_draining = 0; 960 } 961