1 /*- 2 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "icl_iser.h" 27 28 static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend"); 29 static int iser_cq_poll_limit = 512; 30 31 static void 32 iser_cq_event_callback(struct ib_event *cause, void *context) 33 { 34 ISER_ERR("got cq event %d", cause->event); 35 } 36 37 static void 38 iser_qp_event_callback(struct ib_event *cause, void *context) 39 { 40 ISER_ERR("got qp event %d", cause->event); 41 } 42 43 static void 44 iser_event_handler(struct ib_event_handler *handler, 45 struct ib_event *event) 46 { 47 ISER_ERR("async event %d on device %s port %d", 48 event->event, event->device->name, 49 event->element.port_num); 50 } 51 52 /** 53 * is_iser_tx_desc - Indicate if the completion wr_id 54 * is a TX descriptor or not. 55 * @iser_conn: iser connection 56 * @wr_id: completion WR identifier 57 * 58 * Since we cannot rely on wc opcode in FLUSH errors 59 * we must work around it by checking if the wr_id address 60 * falls in the iser connection rx_descs buffer. If so 61 * it is an RX descriptor, otherwize it is a TX. 62 */ 63 static inline bool 64 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id) 65 { 66 void *start = iser_conn->rx_descs; 67 u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs); 68 void *end = (void *)((uintptr_t)start + (uintptr_t)len); 69 70 if (start) { 71 if (wr_id >= start && wr_id < end) 72 return false; 73 } else { 74 return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf); 75 } 76 77 return true; 78 } 79 80 /** 81 * iser_handle_comp_error() - Handle error completion 82 * @ib_conn: connection RDMA resources 83 * @wc: work completion 84 * 85 * Notes: Update post_recv_buf_count in case of recv error completion. 86 * For non-FLUSH error completion we should also notify iscsi layer that 87 * connection is failed (in case we passed bind stage). 88 */ 89 static void 90 iser_handle_comp_error(struct ib_conn *ib_conn, 91 struct ib_wc *wc) 92 { 93 void *wr_id = (void *)(uintptr_t)wc->wr_id; 94 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 95 ib_conn); 96 97 if (is_iser_tx_desc(iser_conn, wr_id)) { 98 ISER_DBG("conn %p got send comp error", iser_conn); 99 } else { 100 ISER_DBG("conn %p got recv comp error", iser_conn); 101 ib_conn->post_recv_buf_count--; 102 } 103 if (wc->status != IB_WC_WR_FLUSH_ERR) 104 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn); 105 } 106 107 /** 108 * iser_handle_wc - handle a single work completion 109 * @wc: work completion 110 * 111 * Soft-IRQ context, work completion can be either 112 * SEND or RECV, and can turn out successful or 113 * with error (or flush error). 114 */ 115 static void iser_handle_wc(struct ib_wc *wc) 116 { 117 struct ib_conn *ib_conn; 118 struct iser_tx_desc *tx_desc; 119 struct iser_rx_desc *rx_desc; 120 121 ib_conn = wc->qp->qp_context; 122 if (likely(wc->status == IB_WC_SUCCESS)) { 123 if (wc->opcode == IB_WC_RECV) { 124 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; 125 iser_rcv_completion(rx_desc, wc->byte_len, 126 ib_conn); 127 } else 128 if (wc->opcode == IB_WC_SEND) { 129 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; 130 iser_snd_completion(tx_desc, ib_conn); 131 } else { 132 ISER_ERR("Unknown wc opcode %d", wc->opcode); 133 } 134 } else { 135 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 136 ib_conn); 137 if (wc->status != IB_WC_WR_FLUSH_ERR) { 138 ISER_ERR("conn %p wr id %llx status %d vend_err %x", 139 iser_conn, (unsigned long long)wc->wr_id, 140 wc->status, wc->vendor_err); 141 } else { 142 ISER_DBG("flush error: conn %p wr id %llx", 143 iser_conn, (unsigned long long)wc->wr_id); 144 } 145 146 if (wc->wr_id == ISER_BEACON_WRID) { 147 /* all flush errors were consumed */ 148 mtx_lock(&ib_conn->beacon.flush_lock); 149 ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn); 150 cv_signal(&ib_conn->beacon.flush_cv); 151 mtx_unlock(&ib_conn->beacon.flush_lock); 152 } else { 153 iser_handle_comp_error(ib_conn, wc); 154 } 155 } 156 } 157 158 static void 159 iser_cq_tasklet_fn(void *data, int pending) 160 { 161 struct iser_comp *comp = (struct iser_comp *)data; 162 struct ib_cq *cq = comp->cq; 163 struct ib_wc *const wcs = comp->wcs; 164 int completed = 0; 165 int i; 166 int n; 167 168 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { 169 for (i = 0; i < n; i++) 170 iser_handle_wc(&wcs[i]); 171 172 completed += n; 173 if (completed >= iser_cq_poll_limit) 174 break; 175 } 176 177 /* 178 * It is assumed here that arming CQ only once its empty 179 * would not cause interrupts to be missed. 180 */ 181 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 182 } 183 184 static void 185 iser_cq_callback(struct ib_cq *cq, void *cq_context) 186 { 187 struct iser_comp *comp = cq_context; 188 189 taskqueue_enqueue(comp->tq, &comp->task); 190 } 191 192 /** 193 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 194 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 195 * the adapator. 196 * 197 * returns 0 on success, -1 on failure 198 */ 199 static int 200 iser_create_device_ib_res(struct iser_device *device) 201 { 202 struct ib_device *ib_dev = device->ib_device; 203 int i, max_cqe; 204 205 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { 206 ISER_ERR("device %s doesn't support Fastreg, " 207 "can't register memory", device->ib_device->name); 208 return (1); 209 } 210 211 device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors); 212 213 device->comps = malloc(device->comps_used * sizeof(*device->comps), 214 M_ISER_VERBS, M_WAITOK | M_ZERO); 215 if (!device->comps) 216 goto comps_err; 217 218 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); 219 220 ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d", 221 device->comps_used, device->ib_device->name, 222 device->ib_device->num_comp_vectors, max_cqe); 223 224 device->pd = ib_alloc_pd(device->ib_device, IB_PD_UNSAFE_GLOBAL_RKEY); 225 if (IS_ERR(device->pd)) 226 goto pd_err; 227 228 for (i = 0; i < device->comps_used; i++) { 229 struct iser_comp *comp = &device->comps[i]; 230 struct ib_cq_init_attr cq_attr = { 231 .cqe = max_cqe, 232 .comp_vector = i, 233 }; 234 235 comp->device = device; 236 comp->cq = ib_create_cq(device->ib_device, 237 iser_cq_callback, 238 iser_cq_event_callback, 239 (void *)comp, 240 &cq_attr); 241 if (IS_ERR(comp->cq)) { 242 comp->cq = NULL; 243 goto cq_err; 244 } 245 246 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP)) 247 goto cq_err; 248 249 TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp); 250 comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT, 251 taskqueue_thread_enqueue, &comp->tq); 252 if (!comp->tq) 253 goto tq_err; 254 taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq"); 255 } 256 257 device->mr = device->pd->__internal_mr; 258 if (IS_ERR(device->mr)) 259 goto tq_err; 260 261 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 262 iser_event_handler); 263 if (ib_register_event_handler(&device->event_handler)) 264 goto tq_err; 265 266 return (0); 267 268 tq_err: 269 for (i = 0; i < device->comps_used; i++) { 270 struct iser_comp *comp = &device->comps[i]; 271 if (comp->tq) 272 taskqueue_free(comp->tq); 273 } 274 cq_err: 275 for (i = 0; i < device->comps_used; i++) { 276 struct iser_comp *comp = &device->comps[i]; 277 if (comp->cq) 278 ib_destroy_cq(comp->cq); 279 } 280 ib_dealloc_pd(device->pd); 281 pd_err: 282 free(device->comps, M_ISER_VERBS); 283 comps_err: 284 ISER_ERR("failed to allocate an IB resource"); 285 return (1); 286 } 287 288 /** 289 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 290 * CQ and PD created with the device associated with the adapator. 291 */ 292 static void 293 iser_free_device_ib_res(struct iser_device *device) 294 { 295 int i; 296 297 for (i = 0; i < device->comps_used; i++) { 298 struct iser_comp *comp = &device->comps[i]; 299 300 taskqueue_free(comp->tq); 301 ib_destroy_cq(comp->cq); 302 comp->cq = NULL; 303 } 304 305 (void)ib_unregister_event_handler(&device->event_handler); 306 (void)ib_dealloc_pd(device->pd); 307 308 free(device->comps, M_ISER_VERBS); 309 device->comps = NULL; 310 311 device->mr = NULL; 312 device->pd = NULL; 313 } 314 315 static int 316 iser_alloc_reg_res(struct ib_device *ib_device, 317 struct ib_pd *pd, 318 struct iser_reg_resources *res) 319 { 320 int ret; 321 322 res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, ISCSI_ISER_SG_TABLESIZE + 1); 323 if (IS_ERR(res->mr)) { 324 ret = -PTR_ERR(res->mr); 325 ISER_ERR("Failed to allocate fast reg mr err=%d", ret); 326 return (ret); 327 } 328 res->mr_valid = 1; 329 330 return (0); 331 } 332 333 static void 334 iser_free_reg_res(struct iser_reg_resources *rsc) 335 { 336 ib_dereg_mr(rsc->mr); 337 } 338 339 static struct fast_reg_descriptor * 340 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd) 341 { 342 struct fast_reg_descriptor *desc; 343 int ret; 344 345 desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO); 346 if (!desc) { 347 ISER_ERR("Failed to allocate a new fastreg descriptor"); 348 return (NULL); 349 } 350 351 ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc); 352 if (ret) { 353 ISER_ERR("failed to allocate reg_resources"); 354 goto err; 355 } 356 357 return (desc); 358 err: 359 free(desc, M_ISER_VERBS); 360 return (NULL); 361 } 362 363 /** 364 * iser_create_fmr_pool - Creates FMR pool and page_vector 365 * 366 * returns 0 on success, or errno code on failure 367 */ 368 int 369 iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max) 370 { 371 struct iser_device *device = ib_conn->device; 372 struct fast_reg_descriptor *desc; 373 int i; 374 375 INIT_LIST_HEAD(&ib_conn->fastreg.pool); 376 ib_conn->fastreg.pool_size = 0; 377 for (i = 0; i < cmds_max; i++) { 378 desc = iser_create_fastreg_desc(device->ib_device, device->pd); 379 if (!desc) { 380 ISER_ERR("Failed to create fastreg descriptor"); 381 goto err; 382 } 383 384 list_add_tail(&desc->list, &ib_conn->fastreg.pool); 385 ib_conn->fastreg.pool_size++; 386 } 387 388 return (0); 389 390 err: 391 iser_free_fastreg_pool(ib_conn); 392 return (ENOMEM); 393 } 394 395 /** 396 * iser_free_fmr_pool - releases the FMR pool and page vec 397 */ 398 void 399 iser_free_fastreg_pool(struct ib_conn *ib_conn) 400 { 401 struct fast_reg_descriptor *desc, *tmp; 402 int i = 0; 403 404 if (list_empty(&ib_conn->fastreg.pool)) 405 return; 406 407 ISER_DBG("freeing conn %p fr pool", ib_conn); 408 409 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { 410 list_del(&desc->list); 411 iser_free_reg_res(&desc->rsc); 412 free(desc, M_ISER_VERBS); 413 ++i; 414 } 415 416 if (i < ib_conn->fastreg.pool_size) 417 ISER_WARN("pool still has %d regions registered", 418 ib_conn->fastreg.pool_size - i); 419 } 420 421 /** 422 * iser_create_ib_conn_res - Queue-Pair (QP) 423 * 424 * returns 0 on success, 1 on failure 425 */ 426 static int 427 iser_create_ib_conn_res(struct ib_conn *ib_conn) 428 { 429 struct iser_conn *iser_conn; 430 struct iser_device *device; 431 struct ib_device_attr *dev_attr; 432 struct ib_qp_init_attr init_attr; 433 int index, min_index = 0; 434 int ret = -ENOMEM; 435 436 iser_conn = container_of(ib_conn, struct iser_conn, ib_conn); 437 device = ib_conn->device; 438 dev_attr = &device->dev_attr; 439 440 mtx_lock(&ig.connlist_mutex); 441 /* select the CQ with the minimal number of usages */ 442 for (index = 0; index < device->comps_used; index++) { 443 if (device->comps[index].active_qps < 444 device->comps[min_index].active_qps) 445 min_index = index; 446 } 447 ib_conn->comp = &device->comps[min_index]; 448 ib_conn->comp->active_qps++; 449 mtx_unlock(&ig.connlist_mutex); 450 ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn); 451 452 memset(&init_attr, 0, sizeof init_attr); 453 init_attr.event_handler = iser_qp_event_callback; 454 init_attr.qp_context = (void *)ib_conn; 455 init_attr.send_cq = ib_conn->comp->cq; 456 init_attr.recv_cq = ib_conn->comp->cq; 457 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 458 init_attr.cap.max_send_sge = 2; 459 init_attr.cap.max_recv_sge = 1; 460 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 461 init_attr.qp_type = IB_QPT_RC; 462 463 if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) { 464 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 465 iser_conn->max_cmds = 466 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); 467 } else { 468 init_attr.cap.max_send_wr = dev_attr->max_qp_wr; 469 iser_conn->max_cmds = 470 ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr); 471 } 472 ISER_DBG("device %s supports max_send_wr %d", 473 device->ib_device->name, dev_attr->max_qp_wr); 474 475 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 476 if (ret) 477 goto out_err; 478 479 ib_conn->qp = ib_conn->cma_id->qp; 480 ISER_DBG("setting conn %p cma_id %p qp %p", 481 ib_conn, ib_conn->cma_id, 482 ib_conn->cma_id->qp); 483 484 return (ret); 485 486 out_err: 487 mtx_lock(&ig.connlist_mutex); 488 ib_conn->comp->active_qps--; 489 mtx_unlock(&ig.connlist_mutex); 490 ISER_ERR("unable to alloc mem or create resource, err %d", ret); 491 492 return (ret); 493 } 494 495 /** 496 * based on the resolved device node GUID see if there already allocated 497 * device for this device. If there's no such, create one. 498 */ 499 static struct iser_device * 500 iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 501 { 502 struct iser_device *device; 503 504 sx_xlock(&ig.device_list_mutex); 505 506 list_for_each_entry(device, &ig.device_list, ig_list) 507 /* find if there's a match using the node GUID */ 508 if (device->ib_device->node_guid == cma_id->device->node_guid) 509 goto inc_refcnt; 510 511 device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO); 512 if (device == NULL) 513 goto out; 514 515 /* assign this device to the device */ 516 device->ib_device = cma_id->device; 517 /* init the device and link it into ig device list */ 518 if (iser_create_device_ib_res(device)) { 519 free(device, M_ISER_VERBS); 520 device = NULL; 521 goto out; 522 } 523 list_add(&device->ig_list, &ig.device_list); 524 525 inc_refcnt: 526 device->refcount++; 527 ISER_INFO("device %p refcount %d", device, device->refcount); 528 out: 529 sx_xunlock(&ig.device_list_mutex); 530 return (device); 531 } 532 533 /* if there's no demand for this device, release it */ 534 static void 535 iser_device_try_release(struct iser_device *device) 536 { 537 sx_xlock(&ig.device_list_mutex); 538 device->refcount--; 539 ISER_INFO("device %p refcount %d", device, device->refcount); 540 if (!device->refcount) { 541 iser_free_device_ib_res(device); 542 list_del(&device->ig_list); 543 free(device, M_ISER_VERBS); 544 device = NULL; 545 } 546 sx_xunlock(&ig.device_list_mutex); 547 } 548 549 /** 550 * Called with state mutex held 551 **/ 552 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, 553 enum iser_conn_state comp, 554 enum iser_conn_state exch) 555 { 556 int ret; 557 558 ret = (iser_conn->state == comp); 559 if (ret) 560 iser_conn->state = exch; 561 562 return ret; 563 } 564 565 /** 566 * iser_free_ib_conn_res - release IB related resources 567 * @iser_conn: iser connection struct 568 * @destroy: indicator if we need to try to release the 569 * iser device and memory regoins pool (only iscsi 570 * shutdown and DEVICE_REMOVAL will use this). 571 * 572 * This routine is called with the iser state mutex held 573 * so the cm_id removal is out of here. It is Safe to 574 * be invoked multiple times. 575 */ 576 void 577 iser_free_ib_conn_res(struct iser_conn *iser_conn, 578 bool destroy) 579 { 580 struct ib_conn *ib_conn = &iser_conn->ib_conn; 581 struct iser_device *device = ib_conn->device; 582 583 ISER_INFO("freeing conn %p cma_id %p qp %p", 584 iser_conn, ib_conn->cma_id, ib_conn->qp); 585 586 if (ib_conn->qp != NULL) { 587 mtx_lock(&ig.connlist_mutex); 588 ib_conn->comp->active_qps--; 589 mtx_unlock(&ig.connlist_mutex); 590 rdma_destroy_qp(ib_conn->cma_id); 591 ib_conn->qp = NULL; 592 } 593 594 if (destroy) { 595 if (iser_conn->login_buf) 596 iser_free_login_buf(iser_conn); 597 598 if (iser_conn->rx_descs) 599 iser_free_rx_descriptors(iser_conn); 600 601 if (device != NULL) { 602 iser_device_try_release(device); 603 ib_conn->device = NULL; 604 } 605 } 606 } 607 608 /** 609 * triggers start of the disconnect procedures and wait for them to be done 610 * Called with state mutex held 611 */ 612 int 613 iser_conn_terminate(struct iser_conn *iser_conn) 614 { 615 struct ib_conn *ib_conn = &iser_conn->ib_conn; 616 const struct ib_send_wr *bad_send_wr; 617 const struct ib_recv_wr *bad_recv_wr; 618 int err = 0; 619 620 /* terminate the iser conn only if the conn state is UP */ 621 if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, 622 ISER_CONN_TERMINATING)) 623 return (0); 624 625 ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state); 626 627 if (ib_conn->qp == NULL) { 628 /* HOW can this be??? */ 629 ISER_WARN("qp wasn't created"); 630 return (1); 631 } 632 633 /* 634 * Todo: This is a temporary workaround. 635 * We serialize the connection closure using global lock in order to 636 * receive all posted beacons completions. 637 * Without Serialization, in case we open many connections (QPs) on 638 * the same CQ, we might miss beacons because of missing interrupts. 639 */ 640 sx_xlock(&ig.close_conns_mutex); 641 642 /* 643 * In case we didn't already clean up the cma_id (peer initiated 644 * a disconnection), we need to Cause the CMA to change the QP 645 * state to ERROR. 646 */ 647 if (ib_conn->cma_id) { 648 err = rdma_disconnect(ib_conn->cma_id); 649 if (err) 650 ISER_ERR("Failed to disconnect, conn: 0x%p err %d", 651 iser_conn, err); 652 653 mtx_lock(&ib_conn->beacon.flush_lock); 654 memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr)); 655 ib_conn->beacon.send.wr_id = ISER_BEACON_WRID; 656 ib_conn->beacon.send.opcode = IB_WR_SEND; 657 /* post an indication that all send flush errors were consumed */ 658 err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr); 659 if (err) { 660 ISER_ERR("conn %p failed to post send_beacon", ib_conn); 661 mtx_unlock(&ib_conn->beacon.flush_lock); 662 goto out; 663 } 664 665 ISER_DBG("before send cv_wait: %p", iser_conn); 666 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock); 667 ISER_DBG("after send cv_wait: %p", iser_conn); 668 669 memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr)); 670 ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID; 671 /* post an indication that all recv flush errors were consumed */ 672 err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr); 673 if (err) { 674 ISER_ERR("conn %p failed to post recv_beacon", ib_conn); 675 mtx_unlock(&ib_conn->beacon.flush_lock); 676 goto out; 677 } 678 679 ISER_DBG("before recv cv_wait: %p", iser_conn); 680 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock); 681 mtx_unlock(&ib_conn->beacon.flush_lock); 682 ISER_DBG("after recv cv_wait: %p", iser_conn); 683 } 684 out: 685 sx_xunlock(&ig.close_conns_mutex); 686 return (1); 687 } 688 689 /** 690 * Called with state mutex held 691 **/ 692 static void 693 iser_connect_error(struct rdma_cm_id *cma_id) 694 { 695 struct iser_conn *iser_conn; 696 697 iser_conn = cma_id->context; 698 699 ISER_ERR("conn %p", iser_conn); 700 701 iser_conn->state = ISER_CONN_TERMINATING; 702 703 cv_signal(&iser_conn->up_cv); 704 } 705 706 /** 707 * Called with state mutex held 708 **/ 709 static void 710 iser_addr_handler(struct rdma_cm_id *cma_id) 711 { 712 struct iser_device *device; 713 struct iser_conn *iser_conn; 714 struct ib_conn *ib_conn; 715 int ret; 716 717 iser_conn = cma_id->context; 718 719 ib_conn = &iser_conn->ib_conn; 720 device = iser_device_find_by_ib_device(cma_id); 721 if (!device) { 722 ISER_ERR("conn %p device lookup/creation failed", 723 iser_conn); 724 iser_connect_error(cma_id); 725 return; 726 } 727 728 ib_conn->device = device; 729 730 ret = rdma_resolve_route(cma_id, 1000); 731 if (ret) { 732 ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret); 733 iser_connect_error(cma_id); 734 return; 735 } 736 } 737 738 /** 739 * Called with state mutex held 740 **/ 741 static void 742 iser_route_handler(struct rdma_cm_id *cma_id) 743 { 744 struct rdma_conn_param conn_param; 745 int ret; 746 struct iser_cm_hdr req_hdr; 747 struct iser_conn *iser_conn = cma_id->context; 748 struct ib_conn *ib_conn = &iser_conn->ib_conn; 749 struct iser_device *device = ib_conn->device; 750 751 ret = iser_create_ib_conn_res(ib_conn); 752 if (ret) 753 goto failure; 754 755 memset(&conn_param, 0, sizeof conn_param); 756 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; 757 conn_param.retry_count = 7; 758 conn_param.rnr_retry_count = 6; 759 /* 760 * Initiaotr depth should not be set, but in order to compat 761 * with old targets, we keep this value set. 762 */ 763 conn_param.initiator_depth = 1; 764 765 memset(&req_hdr, 0, sizeof(req_hdr)); 766 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 767 ISER_SEND_W_INV_NOT_SUPPORTED); 768 conn_param.private_data = (void *)&req_hdr; 769 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 770 771 ret = rdma_connect(cma_id, &conn_param); 772 if (ret) { 773 ISER_ERR("conn %p failure connecting: %d", iser_conn, ret); 774 goto failure; 775 } 776 777 return; 778 failure: 779 iser_connect_error(cma_id); 780 } 781 782 /** 783 * Called with state mutex held 784 **/ 785 static void 786 iser_connected_handler(struct rdma_cm_id *cma_id) 787 { 788 struct iser_conn *iser_conn; 789 struct ib_qp_attr attr; 790 struct ib_qp_init_attr init_attr; 791 792 iser_conn = cma_id->context; 793 794 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 795 796 ISER_INFO("remote qpn:%x my qpn:%x", 797 attr.dest_qp_num, cma_id->qp->qp_num); 798 799 iser_conn->state = ISER_CONN_UP; 800 801 cv_signal(&iser_conn->up_cv); 802 } 803 804 /** 805 * Called with state mutex held 806 **/ 807 static void 808 iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy) 809 { 810 struct iser_conn *iser_conn = cma_id->context; 811 812 if (iser_conn_terminate(iser_conn)) 813 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn); 814 815 } 816 817 int 818 iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 819 { 820 struct iser_conn *iser_conn; 821 int ret = 0; 822 823 iser_conn = cma_id->context; 824 ISER_INFO("event %d status %d conn %p id %p", 825 event->event, event->status, cma_id->context, cma_id); 826 827 sx_xlock(&iser_conn->state_mutex); 828 switch (event->event) { 829 case RDMA_CM_EVENT_ADDR_RESOLVED: 830 iser_addr_handler(cma_id); 831 break; 832 case RDMA_CM_EVENT_ROUTE_RESOLVED: 833 iser_route_handler(cma_id); 834 break; 835 case RDMA_CM_EVENT_ESTABLISHED: 836 iser_connected_handler(cma_id); 837 break; 838 case RDMA_CM_EVENT_ADDR_ERROR: 839 case RDMA_CM_EVENT_ROUTE_ERROR: 840 case RDMA_CM_EVENT_CONNECT_ERROR: 841 case RDMA_CM_EVENT_UNREACHABLE: 842 case RDMA_CM_EVENT_REJECTED: 843 iser_connect_error(cma_id); 844 break; 845 case RDMA_CM_EVENT_DISCONNECTED: 846 case RDMA_CM_EVENT_ADDR_CHANGE: 847 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 848 iser_cleanup_handler(cma_id, false); 849 break; 850 default: 851 ISER_ERR("Unexpected RDMA CM event (%d)", event->event); 852 break; 853 } 854 sx_xunlock(&iser_conn->state_mutex); 855 856 return (ret); 857 } 858 859 int 860 iser_post_recvl(struct iser_conn *iser_conn) 861 { 862 const struct ib_recv_wr *rx_wr_failed; 863 struct ib_recv_wr rx_wr; 864 struct ib_conn *ib_conn = &iser_conn->ib_conn; 865 struct ib_sge sge; 866 int ib_ret; 867 868 sge.addr = iser_conn->login_resp_dma; 869 sge.length = ISER_RX_LOGIN_SIZE; 870 sge.lkey = ib_conn->device->mr->lkey; 871 872 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; 873 rx_wr.sg_list = &sge; 874 rx_wr.num_sge = 1; 875 rx_wr.next = NULL; 876 877 ib_conn->post_recv_buf_count++; 878 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 879 if (ib_ret) { 880 ISER_ERR("ib_post_recv failed ret=%d", ib_ret); 881 ib_conn->post_recv_buf_count--; 882 } 883 884 return (ib_ret); 885 } 886 887 int 888 iser_post_recvm(struct iser_conn *iser_conn, int count) 889 { 890 const struct ib_recv_wr *rx_wr_failed; 891 struct ib_recv_wr *rx_wr; 892 int i, ib_ret; 893 struct ib_conn *ib_conn = &iser_conn->ib_conn; 894 unsigned int my_rx_head = iser_conn->rx_desc_head; 895 struct iser_rx_desc *rx_desc; 896 897 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 898 rx_desc = &iser_conn->rx_descs[my_rx_head]; 899 rx_wr->wr_id = (uintptr_t)rx_desc; 900 rx_wr->sg_list = &rx_desc->rx_sg; 901 rx_wr->num_sge = 1; 902 rx_wr->next = rx_wr + 1; 903 my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos; 904 } 905 906 rx_wr--; 907 rx_wr->next = NULL; /* mark end of work requests list */ 908 909 ib_conn->post_recv_buf_count += count; 910 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 911 if (ib_ret) { 912 ISER_ERR("ib_post_recv failed ret=%d", ib_ret); 913 ib_conn->post_recv_buf_count -= count; 914 } else 915 iser_conn->rx_desc_head = my_rx_head; 916 917 return (ib_ret); 918 } 919 920 /** 921 * iser_start_send - Initiate a Send DTO operation 922 * 923 * returns 0 on success, -1 on failure 924 */ 925 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, 926 bool signal) 927 { 928 int ib_ret; 929 const struct ib_send_wr *send_wr_failed; 930 struct ib_send_wr send_wr; 931 932 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 933 tx_desc->dma_addr, ISER_HEADERS_LEN, 934 DMA_TO_DEVICE); 935 936 send_wr.next = NULL; 937 send_wr.wr_id = (uintptr_t)tx_desc; 938 send_wr.sg_list = tx_desc->tx_sg; 939 send_wr.num_sge = tx_desc->num_sge; 940 send_wr.opcode = IB_WR_SEND; 941 send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0; 942 943 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 944 if (ib_ret) 945 ISER_ERR("ib_post_send failed, ret:%d", ib_ret); 946 947 return (ib_ret); 948 } 949