1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 /* 3 * Copyright(c) 2015 - 2020 Intel Corporation. 4 */ 5 6 #include <linux/err.h> 7 #include <linux/vmalloc.h> 8 #include <linux/hash.h> 9 #include <linux/module.h> 10 #include <linux/seq_file.h> 11 #include <rdma/rdma_vt.h> 12 #include <rdma/rdmavt_qp.h> 13 #include <rdma/ib_verbs.h> 14 15 #include "hfi.h" 16 #include "qp.h" 17 #include "trace.h" 18 #include "verbs_txreq.h" 19 20 unsigned int hfi1_qp_table_size = 256; 21 module_param_named(qp_table_size, hfi1_qp_table_size, uint, S_IRUGO); 22 MODULE_PARM_DESC(qp_table_size, "QP table size"); 23 24 static void flush_tx_list(struct rvt_qp *qp); 25 static int iowait_sleep( 26 struct sdma_engine *sde, 27 struct iowait_work *wait, 28 struct sdma_txreq *stx, 29 unsigned int seq, 30 bool pkts_sent); 31 static void iowait_wakeup(struct iowait *wait, int reason); 32 static void iowait_sdma_drained(struct iowait *wait); 33 static void qp_pio_drain(struct rvt_qp *qp); 34 35 const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = { 36 [IB_WR_RDMA_WRITE] = { 37 .length = sizeof(struct ib_rdma_wr), 38 .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 39 }, 40 41 [IB_WR_RDMA_READ] = { 42 .length = sizeof(struct ib_rdma_wr), 43 .qpt_support = BIT(IB_QPT_RC), 44 .flags = RVT_OPERATION_ATOMIC, 45 }, 46 47 [IB_WR_ATOMIC_CMP_AND_SWP] = { 48 .length = sizeof(struct ib_atomic_wr), 49 .qpt_support = BIT(IB_QPT_RC), 50 .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, 51 }, 52 53 [IB_WR_ATOMIC_FETCH_AND_ADD] = { 54 .length = sizeof(struct ib_atomic_wr), 55 .qpt_support = BIT(IB_QPT_RC), 56 .flags = RVT_OPERATION_ATOMIC | RVT_OPERATION_ATOMIC_SGE, 57 }, 58 59 [IB_WR_RDMA_WRITE_WITH_IMM] = { 60 .length = sizeof(struct ib_rdma_wr), 61 .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 62 }, 63 64 [IB_WR_SEND] = { 65 .length = sizeof(struct ib_send_wr), 66 .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | 67 BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 68 }, 69 70 [IB_WR_SEND_WITH_IMM] = { 71 .length = sizeof(struct ib_send_wr), 72 .qpt_support = BIT(IB_QPT_UD) | BIT(IB_QPT_SMI) | BIT(IB_QPT_GSI) | 73 BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 74 }, 75 76 [IB_WR_REG_MR] = { 77 .length = sizeof(struct ib_reg_wr), 78 .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 79 .flags = RVT_OPERATION_LOCAL, 80 }, 81 82 [IB_WR_LOCAL_INV] = { 83 .length = sizeof(struct ib_send_wr), 84 .qpt_support = BIT(IB_QPT_UC) | BIT(IB_QPT_RC), 85 .flags = RVT_OPERATION_LOCAL, 86 }, 87 88 [IB_WR_SEND_WITH_INV] = { 89 .length = sizeof(struct ib_send_wr), 90 .qpt_support = BIT(IB_QPT_RC), 91 }, 92 93 [IB_WR_OPFN] = { 94 .length = sizeof(struct ib_atomic_wr), 95 .qpt_support = BIT(IB_QPT_RC), 96 .flags = RVT_OPERATION_USE_RESERVE, 97 }, 98 99 [IB_WR_TID_RDMA_WRITE] = { 100 .length = sizeof(struct ib_rdma_wr), 101 .qpt_support = BIT(IB_QPT_RC), 102 .flags = RVT_OPERATION_IGN_RNR_CNT, 103 }, 104 105 }; 106 107 static void flush_list_head(struct list_head *l) 108 { 109 while (!list_empty(l)) { 110 struct sdma_txreq *tx; 111 112 tx = list_first_entry( 113 l, 114 struct sdma_txreq, 115 list); 116 list_del_init(&tx->list); 117 hfi1_put_txreq( 118 container_of(tx, struct verbs_txreq, txreq)); 119 } 120 } 121 122 static void flush_tx_list(struct rvt_qp *qp) 123 { 124 struct hfi1_qp_priv *priv = qp->priv; 125 126 flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head); 127 flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head); 128 } 129 130 static void flush_iowait(struct rvt_qp *qp) 131 { 132 struct hfi1_qp_priv *priv = qp->priv; 133 unsigned long flags; 134 seqlock_t *lock = priv->s_iowait.lock; 135 136 if (!lock) 137 return; 138 write_seqlock_irqsave(lock, flags); 139 if (!list_empty(&priv->s_iowait.list)) { 140 list_del_init(&priv->s_iowait.list); 141 priv->s_iowait.lock = NULL; 142 rvt_put_qp(qp); 143 } 144 write_sequnlock_irqrestore(lock, flags); 145 } 146 147 /* 148 * This function is what we would push to the core layer if we wanted to be a 149 * "first class citizen". Instead we hide this here and rely on Verbs ULPs 150 * to blindly pass the MTU enum value from the PathRecord to us. 151 */ 152 static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) 153 { 154 /* Constraining 10KB packets to 8KB packets */ 155 if (mtu == (enum ib_mtu)OPA_MTU_10240) 156 mtu = (enum ib_mtu)OPA_MTU_8192; 157 return opa_mtu_enum_to_int((enum opa_mtu)mtu); 158 } 159 160 int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 161 int attr_mask, struct ib_udata *udata) 162 { 163 struct ib_qp *ibqp = &qp->ibqp; 164 struct hfi1_ibdev *dev = to_idev(ibqp->device); 165 struct hfi1_devdata *dd = dd_from_dev(dev); 166 u8 sc; 167 168 if (attr_mask & IB_QP_AV) { 169 sc = ah_to_sc(ibqp->device, &attr->ah_attr); 170 if (sc == 0xf) 171 return -EINVAL; 172 173 if (!qp_to_sdma_engine(qp, sc) && 174 dd->flags & HFI1_HAS_SEND_DMA) 175 return -EINVAL; 176 177 if (!qp_to_send_context(qp, sc)) 178 return -EINVAL; 179 } 180 181 if (attr_mask & IB_QP_ALT_PATH) { 182 sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr); 183 if (sc == 0xf) 184 return -EINVAL; 185 186 if (!qp_to_sdma_engine(qp, sc) && 187 dd->flags & HFI1_HAS_SEND_DMA) 188 return -EINVAL; 189 190 if (!qp_to_send_context(qp, sc)) 191 return -EINVAL; 192 } 193 194 return 0; 195 } 196 197 /* 198 * qp_set_16b - Set the hdr_type based on whether the slid or the 199 * dlid in the connection is extended. Only applicable for RC and UC 200 * QPs. UD QPs determine this on the fly from the ah in the wqe 201 */ 202 static inline void qp_set_16b(struct rvt_qp *qp) 203 { 204 struct hfi1_pportdata *ppd; 205 struct hfi1_ibport *ibp; 206 struct hfi1_qp_priv *priv = qp->priv; 207 208 /* Update ah_attr to account for extended LIDs */ 209 hfi1_update_ah_attr(qp->ibqp.device, &qp->remote_ah_attr); 210 211 /* Create 32 bit LIDs */ 212 hfi1_make_opa_lid(&qp->remote_ah_attr); 213 214 if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) & IB_AH_GRH)) 215 return; 216 217 ibp = to_iport(qp->ibqp.device, qp->port_num); 218 ppd = ppd_from_ibp(ibp); 219 priv->hdr_type = hfi1_get_hdr_type(ppd->lid, &qp->remote_ah_attr); 220 } 221 222 void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, 223 int attr_mask, struct ib_udata *udata) 224 { 225 struct ib_qp *ibqp = &qp->ibqp; 226 struct hfi1_qp_priv *priv = qp->priv; 227 228 if (attr_mask & IB_QP_AV) { 229 priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); 230 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 231 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); 232 qp_set_16b(qp); 233 } 234 235 if (attr_mask & IB_QP_PATH_MIG_STATE && 236 attr->path_mig_state == IB_MIG_MIGRATED && 237 qp->s_mig_state == IB_MIG_ARMED) { 238 qp->s_flags |= HFI1_S_AHG_CLEAR; 239 priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); 240 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 241 priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc); 242 qp_set_16b(qp); 243 } 244 245 opfn_qp_init(qp, attr, attr_mask); 246 } 247 248 /** 249 * hfi1_setup_wqe - set up the wqe 250 * @qp: The qp 251 * @wqe: The built wqe 252 * @call_send: Determine if the send should be posted or scheduled. 253 * 254 * Perform setup of the wqe. This is called 255 * prior to inserting the wqe into the ring but after 256 * the wqe has been setup by RDMAVT. This function 257 * allows the driver the opportunity to perform 258 * validation and additional setup of the wqe. 259 * 260 * Returns 0 on success, -EINVAL on failure 261 * 262 */ 263 int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send) 264 { 265 struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 266 struct rvt_ah *ah; 267 struct hfi1_pportdata *ppd; 268 struct hfi1_devdata *dd; 269 270 switch (qp->ibqp.qp_type) { 271 case IB_QPT_RC: 272 hfi1_setup_tid_rdma_wqe(qp, wqe); 273 fallthrough; 274 case IB_QPT_UC: 275 if (wqe->length > 0x80000000U) 276 return -EINVAL; 277 if (wqe->length > qp->pmtu) 278 *call_send = false; 279 break; 280 case IB_QPT_SMI: 281 /* 282 * SM packets should exclusively use VL15 and their SL is 283 * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah 284 * is created, SL is 0 in most cases and as a result some 285 * fields (vl and pmtu) in ah may not be set correctly, 286 * depending on the SL2SC and SC2VL tables at the time. 287 */ 288 ppd = ppd_from_ibp(ibp); 289 dd = dd_from_ppd(ppd); 290 if (wqe->length > dd->vld[15].mtu) 291 return -EINVAL; 292 break; 293 case IB_QPT_GSI: 294 case IB_QPT_UD: 295 ah = rvt_get_swqe_ah(wqe); 296 if (wqe->length > (1 << ah->log_pmtu)) 297 return -EINVAL; 298 if (ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)] == 0xf) 299 return -EINVAL; 300 break; 301 default: 302 break; 303 } 304 305 /* 306 * System latency between send and schedule is large enough that 307 * forcing call_send to true for piothreshold packets is necessary. 308 */ 309 if (wqe->length <= piothreshold) 310 *call_send = true; 311 return 0; 312 } 313 314 /** 315 * _hfi1_schedule_send - schedule progress 316 * @qp: the QP 317 * 318 * This schedules qp progress w/o regard to the s_flags. 319 * 320 * It is only used in the post send, which doesn't hold 321 * the s_lock. 322 */ 323 bool _hfi1_schedule_send(struct rvt_qp *qp) 324 { 325 struct hfi1_qp_priv *priv = qp->priv; 326 struct hfi1_ibport *ibp = 327 to_iport(qp->ibqp.device, qp->port_num); 328 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 329 struct hfi1_devdata *dd = ppd->dd; 330 331 if (dd->flags & HFI1_SHUTDOWN) 332 return true; 333 334 return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, 335 priv->s_sde ? 336 priv->s_sde->cpu : 337 cpumask_first(cpumask_of_node(dd->node))); 338 } 339 340 static void qp_pio_drain(struct rvt_qp *qp) 341 { 342 struct hfi1_qp_priv *priv = qp->priv; 343 344 if (!priv->s_sendcontext) 345 return; 346 while (iowait_pio_pending(&priv->s_iowait)) { 347 write_seqlock_irq(&priv->s_sendcontext->waitlock); 348 hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); 349 write_sequnlock_irq(&priv->s_sendcontext->waitlock); 350 iowait_pio_drain(&priv->s_iowait); 351 write_seqlock_irq(&priv->s_sendcontext->waitlock); 352 hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); 353 write_sequnlock_irq(&priv->s_sendcontext->waitlock); 354 } 355 } 356 357 /** 358 * hfi1_schedule_send - schedule progress 359 * @qp: the QP 360 * 361 * This schedules qp progress and caller should hold 362 * the s_lock. 363 * @return true if the first leg is scheduled; 364 * false if the first leg is not scheduled. 365 */ 366 bool hfi1_schedule_send(struct rvt_qp *qp) 367 { 368 lockdep_assert_held(&qp->s_lock); 369 if (hfi1_send_ok(qp)) { 370 _hfi1_schedule_send(qp); 371 return true; 372 } 373 if (qp->s_flags & HFI1_S_ANY_WAIT_IO) 374 iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait, 375 IOWAIT_PENDING_IB); 376 return false; 377 } 378 379 static void hfi1_qp_schedule(struct rvt_qp *qp) 380 { 381 struct hfi1_qp_priv *priv = qp->priv; 382 bool ret; 383 384 if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) { 385 ret = hfi1_schedule_send(qp); 386 if (ret) 387 iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); 388 } 389 if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_TID)) { 390 ret = hfi1_schedule_tid_send(qp); 391 if (ret) 392 iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID); 393 } 394 } 395 396 void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) 397 { 398 unsigned long flags; 399 400 spin_lock_irqsave(&qp->s_lock, flags); 401 if (qp->s_flags & flag) { 402 qp->s_flags &= ~flag; 403 trace_hfi1_qpwakeup(qp, flag); 404 hfi1_qp_schedule(qp); 405 } 406 spin_unlock_irqrestore(&qp->s_lock, flags); 407 /* Notify hfi1_destroy_qp() if it is waiting. */ 408 rvt_put_qp(qp); 409 } 410 411 void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait) 412 { 413 struct hfi1_qp_priv *priv = qp->priv; 414 415 if (iowait_set_work_flag(wait) == IOWAIT_IB_SE) { 416 qp->s_flags &= ~RVT_S_BUSY; 417 /* 418 * If we are sending a first-leg packet from the second leg, 419 * we need to clear the busy flag from priv->s_flags to 420 * avoid a race condition when the qp wakes up before 421 * the call to hfi1_verbs_send() returns to the second 422 * leg. In that case, the second leg will terminate without 423 * being re-scheduled, resulting in failure to send TID RDMA 424 * WRITE DATA and TID RDMA ACK packets. 425 */ 426 if (priv->s_flags & HFI1_S_TID_BUSY_SET) { 427 priv->s_flags &= ~(HFI1_S_TID_BUSY_SET | 428 RVT_S_BUSY); 429 iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_TID); 430 } 431 } else { 432 priv->s_flags &= ~RVT_S_BUSY; 433 } 434 } 435 436 static int iowait_sleep( 437 struct sdma_engine *sde, 438 struct iowait_work *wait, 439 struct sdma_txreq *stx, 440 uint seq, 441 bool pkts_sent) 442 { 443 struct verbs_txreq *tx = container_of(stx, struct verbs_txreq, txreq); 444 struct rvt_qp *qp; 445 struct hfi1_qp_priv *priv; 446 unsigned long flags; 447 int ret = 0; 448 449 qp = tx->qp; 450 priv = qp->priv; 451 452 spin_lock_irqsave(&qp->s_lock, flags); 453 if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { 454 /* 455 * If we couldn't queue the DMA request, save the info 456 * and try again later rather than destroying the 457 * buffer and undoing the side effects of the copy. 458 */ 459 /* Make a common routine? */ 460 list_add_tail(&stx->list, &wait->tx_head); 461 write_seqlock(&sde->waitlock); 462 if (sdma_progress(sde, seq, stx)) 463 goto eagain; 464 if (list_empty(&priv->s_iowait.list)) { 465 struct hfi1_ibport *ibp = 466 to_iport(qp->ibqp.device, qp->port_num); 467 468 ibp->rvp.n_dmawait++; 469 qp->s_flags |= RVT_S_WAIT_DMA_DESC; 470 iowait_get_priority(&priv->s_iowait); 471 iowait_queue(pkts_sent, &priv->s_iowait, 472 &sde->dmawait); 473 priv->s_iowait.lock = &sde->waitlock; 474 trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); 475 rvt_get_qp(qp); 476 } 477 write_sequnlock(&sde->waitlock); 478 hfi1_qp_unbusy(qp, wait); 479 spin_unlock_irqrestore(&qp->s_lock, flags); 480 ret = -EBUSY; 481 } else { 482 spin_unlock_irqrestore(&qp->s_lock, flags); 483 hfi1_put_txreq(tx); 484 } 485 return ret; 486 eagain: 487 write_sequnlock(&sde->waitlock); 488 spin_unlock_irqrestore(&qp->s_lock, flags); 489 list_del_init(&stx->list); 490 return -EAGAIN; 491 } 492 493 static void iowait_wakeup(struct iowait *wait, int reason) 494 { 495 struct rvt_qp *qp = iowait_to_qp(wait); 496 497 WARN_ON(reason != SDMA_AVAIL_REASON); 498 hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC); 499 } 500 501 static void iowait_sdma_drained(struct iowait *wait) 502 { 503 struct rvt_qp *qp = iowait_to_qp(wait); 504 unsigned long flags; 505 506 /* 507 * This happens when the send engine notes 508 * a QP in the error state and cannot 509 * do the flush work until that QP's 510 * sdma work has finished. 511 */ 512 spin_lock_irqsave(&qp->s_lock, flags); 513 if (qp->s_flags & RVT_S_WAIT_DMA) { 514 qp->s_flags &= ~RVT_S_WAIT_DMA; 515 hfi1_schedule_send(qp); 516 } 517 spin_unlock_irqrestore(&qp->s_lock, flags); 518 } 519 520 static void hfi1_init_priority(struct iowait *w) 521 { 522 struct rvt_qp *qp = iowait_to_qp(w); 523 struct hfi1_qp_priv *priv = qp->priv; 524 525 if (qp->s_flags & RVT_S_ACK_PENDING) 526 w->priority++; 527 if (priv->s_flags & RVT_S_ACK_PENDING) 528 w->priority++; 529 } 530 531 /** 532 * qp_to_sdma_engine - map a qp to a send engine 533 * @qp: the QP 534 * @sc5: the 5 bit sc 535 * 536 * Return: 537 * A send engine for the qp or NULL for SMI type qp. 538 */ 539 struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) 540 { 541 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 542 struct sdma_engine *sde; 543 544 if (!(dd->flags & HFI1_HAS_SEND_DMA)) 545 return NULL; 546 switch (qp->ibqp.qp_type) { 547 case IB_QPT_SMI: 548 return NULL; 549 default: 550 break; 551 } 552 sde = sdma_select_engine_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, sc5); 553 return sde; 554 } 555 556 /** 557 * qp_to_send_context - map a qp to a send context 558 * @qp: the QP 559 * @sc5: the 5 bit sc 560 * 561 * Return: 562 * A send context for the qp 563 */ 564 struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5) 565 { 566 struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); 567 568 switch (qp->ibqp.qp_type) { 569 case IB_QPT_SMI: 570 /* SMA packets to VL15 */ 571 return dd->vld[15].sc; 572 default: 573 break; 574 } 575 576 return pio_select_send_context_sc(dd, qp->ibqp.qp_num >> dd->qos_shift, 577 sc5); 578 } 579 580 static const char * const qp_type_str[] = { 581 "SMI", "GSI", "RC", "UC", "UD", 582 }; 583 584 static int qp_idle(struct rvt_qp *qp) 585 { 586 return 587 qp->s_last == qp->s_acked && 588 qp->s_acked == qp->s_cur && 589 qp->s_cur == qp->s_tail && 590 qp->s_tail == qp->s_head; 591 } 592 593 /** 594 * qp_iter_print - print the qp information to seq_file 595 * @s: the seq_file to emit the qp information on 596 * @iter: the iterator for the qp hash list 597 */ 598 void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) 599 { 600 struct rvt_swqe *wqe; 601 struct rvt_qp *qp = iter->qp; 602 struct hfi1_qp_priv *priv = qp->priv; 603 struct sdma_engine *sde; 604 struct send_context *send_context; 605 struct rvt_ack_entry *e = NULL; 606 struct rvt_srq *srq = qp->ibqp.srq ? 607 ibsrq_to_rvtsrq(qp->ibqp.srq) : NULL; 608 609 sde = qp_to_sdma_engine(qp, priv->s_sc); 610 wqe = rvt_get_swqe_ptr(qp, qp->s_last); 611 send_context = qp_to_send_context(qp, priv->s_sc); 612 if (qp->s_ack_queue) 613 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 614 seq_printf(s, 615 "N %d %s QP %x R %u %s %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x S(%u %u %u %u %u %u %u) R(%u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d OS %x %x E %x %x %x RNR %d %s %d\n", 616 iter->n, 617 qp_idle(qp) ? "I" : "B", 618 qp->ibqp.qp_num, 619 atomic_read(&qp->refcount), 620 qp_type_str[qp->ibqp.qp_type], 621 qp->state, 622 wqe ? wqe->wr.opcode : 0, 623 qp->s_flags, 624 iowait_sdma_pending(&priv->s_iowait), 625 iowait_pio_pending(&priv->s_iowait), 626 !list_empty(&priv->s_iowait.list), 627 qp->timeout, 628 wqe ? wqe->ssn : 0, 629 qp->s_lsn, 630 qp->s_last_psn, 631 qp->s_psn, qp->s_next_psn, 632 qp->s_sending_psn, qp->s_sending_hpsn, 633 qp->r_psn, 634 qp->s_last, qp->s_acked, qp->s_cur, 635 qp->s_tail, qp->s_head, qp->s_size, 636 qp->s_avail, 637 /* ack_queue ring pointers, size */ 638 qp->s_tail_ack_queue, qp->r_head_ack_queue, 639 rvt_max_atomic(&to_idev(qp->ibqp.device)->rdi), 640 /* remote QP info */ 641 qp->remote_qpn, 642 rdma_ah_get_dlid(&qp->remote_ah_attr), 643 rdma_ah_get_sl(&qp->remote_ah_attr), 644 qp->pmtu, 645 qp->s_retry, 646 qp->s_retry_cnt, 647 qp->s_rnr_retry_cnt, 648 qp->s_rnr_retry, 649 sde, 650 sde ? sde->this_idx : 0, 651 send_context, 652 send_context ? send_context->sw_index : 0, 653 ib_cq_head(qp->ibqp.send_cq), 654 ib_cq_tail(qp->ibqp.send_cq), 655 qp->pid, 656 qp->s_state, 657 qp->s_ack_state, 658 /* ack queue information */ 659 e ? e->opcode : 0, 660 e ? e->psn : 0, 661 e ? e->lpsn : 0, 662 qp->r_min_rnr_timer, 663 srq ? "SRQ" : "RQ", 664 srq ? srq->rq.size : qp->r_rq.size 665 ); 666 } 667 668 void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp) 669 { 670 struct hfi1_qp_priv *priv; 671 672 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL, rdi->dparms.node); 673 if (!priv) 674 return ERR_PTR(-ENOMEM); 675 676 priv->owner = qp; 677 678 priv->s_ahg = kzalloc_node(sizeof(*priv->s_ahg), GFP_KERNEL, 679 rdi->dparms.node); 680 if (!priv->s_ahg) { 681 kfree(priv); 682 return ERR_PTR(-ENOMEM); 683 } 684 iowait_init( 685 &priv->s_iowait, 686 1, 687 _hfi1_do_send, 688 _hfi1_do_tid_send, 689 iowait_sleep, 690 iowait_wakeup, 691 iowait_sdma_drained, 692 hfi1_init_priority); 693 /* Init to a value to start the running average correctly */ 694 priv->s_running_pkt_size = piothreshold / 2; 695 return priv; 696 } 697 698 void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp) 699 { 700 struct hfi1_qp_priv *priv = qp->priv; 701 702 hfi1_qp_priv_tid_free(rdi, qp); 703 kfree(priv->s_ahg); 704 kfree(priv); 705 } 706 707 unsigned free_all_qps(struct rvt_dev_info *rdi) 708 { 709 struct hfi1_ibdev *verbs_dev = container_of(rdi, 710 struct hfi1_ibdev, 711 rdi); 712 struct hfi1_devdata *dd = container_of(verbs_dev, 713 struct hfi1_devdata, 714 verbs_dev); 715 int n; 716 unsigned qp_inuse = 0; 717 718 for (n = 0; n < dd->num_pports; n++) { 719 struct hfi1_ibport *ibp = &dd->pport[n].ibport_data; 720 721 rcu_read_lock(); 722 if (rcu_dereference(ibp->rvp.qp[0])) 723 qp_inuse++; 724 if (rcu_dereference(ibp->rvp.qp[1])) 725 qp_inuse++; 726 rcu_read_unlock(); 727 } 728 729 return qp_inuse; 730 } 731 732 void flush_qp_waiters(struct rvt_qp *qp) 733 { 734 lockdep_assert_held(&qp->s_lock); 735 flush_iowait(qp); 736 hfi1_tid_rdma_flush_wait(qp); 737 } 738 739 void stop_send_queue(struct rvt_qp *qp) 740 { 741 struct hfi1_qp_priv *priv = qp->priv; 742 743 iowait_cancel_work(&priv->s_iowait); 744 if (cancel_work_sync(&priv->tid_rdma.trigger_work)) 745 rvt_put_qp(qp); 746 } 747 748 void quiesce_qp(struct rvt_qp *qp) 749 { 750 struct hfi1_qp_priv *priv = qp->priv; 751 752 hfi1_del_tid_reap_timer(qp); 753 hfi1_del_tid_retry_timer(qp); 754 iowait_sdma_drain(&priv->s_iowait); 755 qp_pio_drain(qp); 756 flush_tx_list(qp); 757 } 758 759 void notify_qp_reset(struct rvt_qp *qp) 760 { 761 hfi1_qp_kern_exp_rcv_clear_all(qp); 762 qp->r_adefered = 0; 763 clear_ahg(qp); 764 765 /* Clear any OPFN state */ 766 if (qp->ibqp.qp_type == IB_QPT_RC) 767 opfn_conn_error(qp); 768 } 769 770 /* 771 * Switch to alternate path. 772 * The QP s_lock should be held and interrupts disabled. 773 */ 774 void hfi1_migrate_qp(struct rvt_qp *qp) 775 { 776 struct hfi1_qp_priv *priv = qp->priv; 777 struct ib_event ev; 778 779 qp->s_mig_state = IB_MIG_MIGRATED; 780 qp->remote_ah_attr = qp->alt_ah_attr; 781 qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr); 782 qp->s_pkey_index = qp->s_alt_pkey_index; 783 qp->s_flags |= HFI1_S_AHG_CLEAR; 784 priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr); 785 priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); 786 qp_set_16b(qp); 787 788 ev.device = qp->ibqp.device; 789 ev.element.qp = &qp->ibqp; 790 ev.event = IB_EVENT_PATH_MIG; 791 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 792 } 793 794 int mtu_to_path_mtu(u32 mtu) 795 { 796 return mtu_to_enum(mtu, OPA_MTU_8192); 797 } 798 799 u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) 800 { 801 u32 mtu; 802 struct hfi1_ibdev *verbs_dev = container_of(rdi, 803 struct hfi1_ibdev, 804 rdi); 805 struct hfi1_devdata *dd = container_of(verbs_dev, 806 struct hfi1_devdata, 807 verbs_dev); 808 struct hfi1_ibport *ibp; 809 u8 sc, vl; 810 811 ibp = &dd->pport[qp->port_num - 1].ibport_data; 812 sc = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)]; 813 vl = sc_to_vlt(dd, sc); 814 815 mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu); 816 if (vl < PER_VL_SEND_CONTEXTS) 817 mtu = min_t(u32, mtu, dd->vld[vl].mtu); 818 return mtu; 819 } 820 821 int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, 822 struct ib_qp_attr *attr) 823 { 824 int mtu, pidx = qp->port_num - 1; 825 struct hfi1_ibdev *verbs_dev = container_of(rdi, 826 struct hfi1_ibdev, 827 rdi); 828 struct hfi1_devdata *dd = container_of(verbs_dev, 829 struct hfi1_devdata, 830 verbs_dev); 831 mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu); 832 if (mtu == -1) 833 return -1; /* values less than 0 are error */ 834 835 if (mtu > dd->pport[pidx].ibmtu) 836 return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); 837 else 838 return attr->path_mtu; 839 } 840 841 void notify_error_qp(struct rvt_qp *qp) 842 { 843 struct hfi1_qp_priv *priv = qp->priv; 844 seqlock_t *lock = priv->s_iowait.lock; 845 846 if (lock) { 847 write_seqlock(lock); 848 if (!list_empty(&priv->s_iowait.list) && 849 !(qp->s_flags & RVT_S_BUSY) && 850 !(priv->s_flags & RVT_S_BUSY)) { 851 qp->s_flags &= ~HFI1_S_ANY_WAIT_IO; 852 iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB); 853 iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_TID); 854 list_del_init(&priv->s_iowait.list); 855 priv->s_iowait.lock = NULL; 856 rvt_put_qp(qp); 857 } 858 write_sequnlock(lock); 859 } 860 861 if (!(qp->s_flags & RVT_S_BUSY) && !(priv->s_flags & RVT_S_BUSY)) { 862 qp->s_hdrwords = 0; 863 if (qp->s_rdma_mr) { 864 rvt_put_mr(qp->s_rdma_mr); 865 qp->s_rdma_mr = NULL; 866 } 867 flush_tx_list(qp); 868 } 869 } 870 871 /** 872 * hfi1_qp_iter_cb - callback for iterator 873 * @qp: the qp 874 * @v: the sl in low bits of v 875 * 876 * This is called from the iterator callback to work 877 * on an individual qp. 878 */ 879 static void hfi1_qp_iter_cb(struct rvt_qp *qp, u64 v) 880 { 881 int lastwqe; 882 struct ib_event ev; 883 struct hfi1_ibport *ibp = 884 to_iport(qp->ibqp.device, qp->port_num); 885 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 886 u8 sl = (u8)v; 887 888 if (qp->port_num != ppd->port || 889 (qp->ibqp.qp_type != IB_QPT_UC && 890 qp->ibqp.qp_type != IB_QPT_RC) || 891 rdma_ah_get_sl(&qp->remote_ah_attr) != sl || 892 !(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK)) 893 return; 894 895 spin_lock_irq(&qp->r_lock); 896 spin_lock(&qp->s_hlock); 897 spin_lock(&qp->s_lock); 898 lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); 899 spin_unlock(&qp->s_lock); 900 spin_unlock(&qp->s_hlock); 901 spin_unlock_irq(&qp->r_lock); 902 if (lastwqe) { 903 ev.device = qp->ibqp.device; 904 ev.element.qp = &qp->ibqp; 905 ev.event = IB_EVENT_QP_LAST_WQE_REACHED; 906 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); 907 } 908 } 909 910 /** 911 * hfi1_error_port_qps - put a port's RC/UC qps into error state 912 * @ibp: the ibport. 913 * @sl: the service level. 914 * 915 * This function places all RC/UC qps with a given service level into error 916 * state. It is generally called to force upper lay apps to abandon stale qps 917 * after an sl->sc mapping change. 918 */ 919 void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl) 920 { 921 struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); 922 struct hfi1_ibdev *dev = &ppd->dd->verbs_dev; 923 924 rvt_qp_iter(&dev->rdi, sl, hfi1_qp_iter_cb); 925 } 926