1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend"); 30 static int iser_cq_poll_limit = 512; 31 32 static void 33 iser_cq_event_callback(struct ib_event *cause, void *context) 34 { 35 ISER_ERR("got cq event %d", cause->event); 36 } 37 38 static void 39 iser_qp_event_callback(struct ib_event *cause, void *context) 40 { 41 ISER_ERR("got qp event %d", cause->event); 42 } 43 44 static void 45 iser_event_handler(struct ib_event_handler *handler, 46 struct ib_event *event) 47 { 48 ISER_ERR("async event %d on device %s port %d", 49 event->event, event->device->name, 50 event->element.port_num); 51 } 52 53 /** 54 * is_iser_tx_desc - Indicate if the completion wr_id 55 * is a TX descriptor or not. 56 * @iser_conn: iser connection 57 * @wr_id: completion WR identifier 58 * 59 * Since we cannot rely on wc opcode in FLUSH errors 60 * we must work around it by checking if the wr_id address 61 * falls in the iser connection rx_descs buffer. If so 62 * it is an RX descriptor, otherwize it is a TX. 63 */ 64 static inline bool 65 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id) 66 { 67 void *start = iser_conn->rx_descs; 68 u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs); 69 void *end = (void *)((uintptr_t)start + (uintptr_t)len); 70 71 if (start) { 72 if (wr_id >= start && wr_id < end) 73 return false; 74 } else { 75 return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf); 76 } 77 78 return true; 79 } 80 81 /** 82 * iser_handle_comp_error() - Handle error completion 83 * @ib_conn: connection RDMA resources 84 * @wc: work completion 85 * 86 * Notes: Update post_recv_buf_count in case of recv error completion. 87 * For non-FLUSH error completion we should also notify iscsi layer that 88 * connection is failed (in case we passed bind stage). 89 */ 90 static void 91 iser_handle_comp_error(struct ib_conn *ib_conn, 92 struct ib_wc *wc) 93 { 94 void *wr_id = (void *)(uintptr_t)wc->wr_id; 95 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 96 ib_conn); 97 98 if (is_iser_tx_desc(iser_conn, wr_id)) { 99 ISER_DBG("conn %p got send comp error", iser_conn); 100 } else { 101 ISER_DBG("conn %p got recv comp error", iser_conn); 102 ib_conn->post_recv_buf_count--; 103 } 104 if (wc->status != IB_WC_WR_FLUSH_ERR) 105 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn); 106 } 107 108 /** 109 * iser_handle_wc - handle a single work completion 110 * @wc: work completion 111 * 112 * Soft-IRQ context, work completion can be either 113 * SEND or RECV, and can turn out successful or 114 * with error (or flush error). 115 */ 116 static void iser_handle_wc(struct ib_wc *wc) 117 { 118 struct ib_conn *ib_conn; 119 struct iser_tx_desc *tx_desc; 120 struct iser_rx_desc *rx_desc; 121 122 ib_conn = wc->qp->qp_context; 123 if (likely(wc->status == IB_WC_SUCCESS)) { 124 if (wc->opcode == IB_WC_RECV) { 125 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; 126 iser_rcv_completion(rx_desc, wc->byte_len, 127 ib_conn); 128 } else 129 if (wc->opcode == IB_WC_SEND) { 130 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; 131 iser_snd_completion(tx_desc, ib_conn); 132 } else { 133 ISER_ERR("Unknown wc opcode %d", wc->opcode); 134 } 135 } else { 136 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 137 ib_conn); 138 if (wc->status != IB_WC_WR_FLUSH_ERR) { 139 ISER_ERR("conn %p wr id %llx status %d vend_err %x", 140 iser_conn, (unsigned long long)wc->wr_id, 141 wc->status, wc->vendor_err); 142 } else { 143 ISER_DBG("flush error: conn %p wr id %llx", 144 iser_conn, (unsigned long long)wc->wr_id); 145 } 146 147 if (wc->wr_id == ISER_BEACON_WRID) { 148 /* all flush errors were consumed */ 149 mtx_lock(&ib_conn->beacon.flush_lock); 150 ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn); 151 cv_signal(&ib_conn->beacon.flush_cv); 152 mtx_unlock(&ib_conn->beacon.flush_lock); 153 } else { 154 iser_handle_comp_error(ib_conn, wc); 155 } 156 } 157 } 158 159 static void 160 iser_cq_tasklet_fn(void *data, int pending) 161 { 162 struct iser_comp *comp = (struct iser_comp *)data; 163 struct ib_cq *cq = comp->cq; 164 struct ib_wc *const wcs = comp->wcs; 165 int completed = 0; 166 int i; 167 int n; 168 169 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { 170 for (i = 0; i < n; i++) 171 iser_handle_wc(&wcs[i]); 172 173 completed += n; 174 if (completed >= iser_cq_poll_limit) 175 break; 176 } 177 178 /* 179 * It is assumed here that arming CQ only once its empty 180 * would not cause interrupts to be missed. 181 */ 182 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 183 } 184 185 static void 186 iser_cq_callback(struct ib_cq *cq, void *cq_context) 187 { 188 struct iser_comp *comp = cq_context; 189 190 taskqueue_enqueue(comp->tq, &comp->task); 191 } 192 193 /** 194 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 195 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 196 * the adapator. 197 * 198 * returns 0 on success, -1 on failure 199 */ 200 static int 201 iser_create_device_ib_res(struct iser_device *device) 202 { 203 struct ib_device *ib_dev = device->ib_device; 204 int i, max_cqe; 205 206 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { 207 ISER_ERR("device %s doesn't support Fastreg, " 208 "can't register memory", device->ib_device->name); 209 return (1); 210 } 211 212 device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors); 213 214 device->comps = malloc(device->comps_used * sizeof(*device->comps), 215 M_ISER_VERBS, M_WAITOK | M_ZERO); 216 if (!device->comps) 217 goto comps_err; 218 219 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); 220 221 ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d", 222 device->comps_used, device->ib_device->name, 223 device->ib_device->num_comp_vectors, max_cqe); 224 225 device->pd = ib_alloc_pd(device->ib_device, IB_PD_UNSAFE_GLOBAL_RKEY); 226 if (IS_ERR(device->pd)) 227 goto pd_err; 228 229 for (i = 0; i < device->comps_used; i++) { 230 struct iser_comp *comp = &device->comps[i]; 231 struct ib_cq_init_attr cq_attr = { 232 .cqe = max_cqe, 233 .comp_vector = i, 234 }; 235 236 comp->device = device; 237 comp->cq = ib_create_cq(device->ib_device, 238 iser_cq_callback, 239 iser_cq_event_callback, 240 (void *)comp, 241 &cq_attr); 242 if (IS_ERR(comp->cq)) { 243 comp->cq = NULL; 244 goto cq_err; 245 } 246 247 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP)) 248 goto cq_err; 249 250 TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp); 251 comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT, 252 taskqueue_thread_enqueue, &comp->tq); 253 if (!comp->tq) 254 goto tq_err; 255 taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq"); 256 } 257 258 device->mr = device->pd->__internal_mr; 259 if (IS_ERR(device->mr)) 260 goto tq_err; 261 262 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 263 iser_event_handler); 264 if (ib_register_event_handler(&device->event_handler)) 265 goto handler_err; 266 267 return (0); 268 269 handler_err: 270 ib_dereg_mr(device->mr); 271 tq_err: 272 for (i = 0; i < device->comps_used; i++) { 273 struct iser_comp *comp = &device->comps[i]; 274 if (comp->tq) 275 taskqueue_free(comp->tq); 276 } 277 cq_err: 278 for (i = 0; i < device->comps_used; i++) { 279 struct iser_comp *comp = &device->comps[i]; 280 if (comp->cq) 281 ib_destroy_cq(comp->cq); 282 } 283 ib_dealloc_pd(device->pd); 284 pd_err: 285 free(device->comps, M_ISER_VERBS); 286 comps_err: 287 ISER_ERR("failed to allocate an IB resource"); 288 return (1); 289 } 290 291 /** 292 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 293 * CQ and PD created with the device associated with the adapator. 294 */ 295 static void 296 iser_free_device_ib_res(struct iser_device *device) 297 { 298 int i; 299 300 for (i = 0; i < device->comps_used; i++) { 301 struct iser_comp *comp = &device->comps[i]; 302 303 taskqueue_free(comp->tq); 304 ib_destroy_cq(comp->cq); 305 comp->cq = NULL; 306 } 307 308 (void)ib_unregister_event_handler(&device->event_handler); 309 (void)ib_dereg_mr(device->mr); 310 (void)ib_dealloc_pd(device->pd); 311 312 free(device->comps, M_ISER_VERBS); 313 device->comps = NULL; 314 315 device->mr = NULL; 316 device->pd = NULL; 317 } 318 319 static int 320 iser_alloc_reg_res(struct ib_device *ib_device, 321 struct ib_pd *pd, 322 struct iser_reg_resources *res) 323 { 324 int ret; 325 326 res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, ISCSI_ISER_SG_TABLESIZE + 1); 327 if (IS_ERR(res->mr)) { 328 ret = -PTR_ERR(res->mr); 329 ISER_ERR("Failed to allocate fast reg mr err=%d", ret); 330 return (ret); 331 } 332 res->mr_valid = 1; 333 334 return (0); 335 } 336 337 static void 338 iser_free_reg_res(struct iser_reg_resources *rsc) 339 { 340 ib_dereg_mr(rsc->mr); 341 } 342 343 static struct fast_reg_descriptor * 344 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd) 345 { 346 struct fast_reg_descriptor *desc; 347 int ret; 348 349 desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO); 350 if (!desc) { 351 ISER_ERR("Failed to allocate a new fastreg descriptor"); 352 return (NULL); 353 } 354 355 ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc); 356 if (ret) { 357 ISER_ERR("failed to allocate reg_resources"); 358 goto err; 359 } 360 361 return (desc); 362 err: 363 free(desc, M_ISER_VERBS); 364 return (NULL); 365 } 366 367 /** 368 * iser_create_fmr_pool - Creates FMR pool and page_vector 369 * 370 * returns 0 on success, or errno code on failure 371 */ 372 int 373 iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max) 374 { 375 struct iser_device *device = ib_conn->device; 376 struct fast_reg_descriptor *desc; 377 int i; 378 379 INIT_LIST_HEAD(&ib_conn->fastreg.pool); 380 ib_conn->fastreg.pool_size = 0; 381 for (i = 0; i < cmds_max; i++) { 382 desc = iser_create_fastreg_desc(device->ib_device, device->pd); 383 if (!desc) { 384 ISER_ERR("Failed to create fastreg descriptor"); 385 goto err; 386 } 387 388 list_add_tail(&desc->list, &ib_conn->fastreg.pool); 389 ib_conn->fastreg.pool_size++; 390 } 391 392 return (0); 393 394 err: 395 iser_free_fastreg_pool(ib_conn); 396 return (ENOMEM); 397 } 398 399 /** 400 * iser_free_fmr_pool - releases the FMR pool and page vec 401 */ 402 void 403 iser_free_fastreg_pool(struct ib_conn *ib_conn) 404 { 405 struct fast_reg_descriptor *desc, *tmp; 406 int i = 0; 407 408 if (list_empty(&ib_conn->fastreg.pool)) 409 return; 410 411 ISER_DBG("freeing conn %p fr pool", ib_conn); 412 413 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { 414 list_del(&desc->list); 415 iser_free_reg_res(&desc->rsc); 416 free(desc, M_ISER_VERBS); 417 ++i; 418 } 419 420 if (i < ib_conn->fastreg.pool_size) 421 ISER_WARN("pool still has %d regions registered", 422 ib_conn->fastreg.pool_size - i); 423 } 424 425 /** 426 * iser_create_ib_conn_res - Queue-Pair (QP) 427 * 428 * returns 0 on success, 1 on failure 429 */ 430 static int 431 iser_create_ib_conn_res(struct ib_conn *ib_conn) 432 { 433 struct iser_conn *iser_conn; 434 struct iser_device *device; 435 struct ib_device_attr *dev_attr; 436 struct ib_qp_init_attr init_attr; 437 int index, min_index = 0; 438 int ret = -ENOMEM; 439 440 iser_conn = container_of(ib_conn, struct iser_conn, ib_conn); 441 device = ib_conn->device; 442 dev_attr = &device->dev_attr; 443 444 mtx_lock(&ig.connlist_mutex); 445 /* select the CQ with the minimal number of usages */ 446 for (index = 0; index < device->comps_used; index++) { 447 if (device->comps[index].active_qps < 448 device->comps[min_index].active_qps) 449 min_index = index; 450 } 451 ib_conn->comp = &device->comps[min_index]; 452 ib_conn->comp->active_qps++; 453 mtx_unlock(&ig.connlist_mutex); 454 ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn); 455 456 memset(&init_attr, 0, sizeof init_attr); 457 init_attr.event_handler = iser_qp_event_callback; 458 init_attr.qp_context = (void *)ib_conn; 459 init_attr.send_cq = ib_conn->comp->cq; 460 init_attr.recv_cq = ib_conn->comp->cq; 461 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 462 init_attr.cap.max_send_sge = 2; 463 init_attr.cap.max_recv_sge = 1; 464 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 465 init_attr.qp_type = IB_QPT_RC; 466 467 if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) { 468 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 469 iser_conn->max_cmds = 470 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); 471 } else { 472 init_attr.cap.max_send_wr = dev_attr->max_qp_wr; 473 iser_conn->max_cmds = 474 ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr); 475 } 476 ISER_DBG("device %s supports max_send_wr %d", 477 device->ib_device->name, dev_attr->max_qp_wr); 478 479 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 480 if (ret) 481 goto out_err; 482 483 ib_conn->qp = ib_conn->cma_id->qp; 484 ISER_DBG("setting conn %p cma_id %p qp %p", 485 ib_conn, ib_conn->cma_id, 486 ib_conn->cma_id->qp); 487 488 return (ret); 489 490 out_err: 491 mtx_lock(&ig.connlist_mutex); 492 ib_conn->comp->active_qps--; 493 mtx_unlock(&ig.connlist_mutex); 494 ISER_ERR("unable to alloc mem or create resource, err %d", ret); 495 496 return (ret); 497 } 498 499 /** 500 * based on the resolved device node GUID see if there already allocated 501 * device for this device. If there's no such, create one. 502 */ 503 static struct iser_device * 504 iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 505 { 506 struct iser_device *device; 507 508 sx_xlock(&ig.device_list_mutex); 509 510 list_for_each_entry(device, &ig.device_list, ig_list) 511 /* find if there's a match using the node GUID */ 512 if (device->ib_device->node_guid == cma_id->device->node_guid) 513 goto inc_refcnt; 514 515 device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO); 516 if (device == NULL) 517 goto out; 518 519 /* assign this device to the device */ 520 device->ib_device = cma_id->device; 521 /* init the device and link it into ig device list */ 522 if (iser_create_device_ib_res(device)) { 523 free(device, M_ISER_VERBS); 524 device = NULL; 525 goto out; 526 } 527 list_add(&device->ig_list, &ig.device_list); 528 529 inc_refcnt: 530 device->refcount++; 531 ISER_INFO("device %p refcount %d", device, device->refcount); 532 out: 533 sx_xunlock(&ig.device_list_mutex); 534 return (device); 535 } 536 537 /* if there's no demand for this device, release it */ 538 static void 539 iser_device_try_release(struct iser_device *device) 540 { 541 sx_xlock(&ig.device_list_mutex); 542 device->refcount--; 543 ISER_INFO("device %p refcount %d", device, device->refcount); 544 if (!device->refcount) { 545 iser_free_device_ib_res(device); 546 list_del(&device->ig_list); 547 free(device, M_ISER_VERBS); 548 device = NULL; 549 } 550 sx_xunlock(&ig.device_list_mutex); 551 } 552 553 /** 554 * Called with state mutex held 555 **/ 556 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, 557 enum iser_conn_state comp, 558 enum iser_conn_state exch) 559 { 560 int ret; 561 562 ret = (iser_conn->state == comp); 563 if (ret) 564 iser_conn->state = exch; 565 566 return ret; 567 } 568 569 /** 570 * iser_free_ib_conn_res - release IB related resources 571 * @iser_conn: iser connection struct 572 * @destroy: indicator if we need to try to release the 573 * iser device and memory regoins pool (only iscsi 574 * shutdown and DEVICE_REMOVAL will use this). 575 * 576 * This routine is called with the iser state mutex held 577 * so the cm_id removal is out of here. It is Safe to 578 * be invoked multiple times. 579 */ 580 void 581 iser_free_ib_conn_res(struct iser_conn *iser_conn, 582 bool destroy) 583 { 584 struct ib_conn *ib_conn = &iser_conn->ib_conn; 585 struct iser_device *device = ib_conn->device; 586 587 ISER_INFO("freeing conn %p cma_id %p qp %p", 588 iser_conn, ib_conn->cma_id, ib_conn->qp); 589 590 if (ib_conn->qp != NULL) { 591 mtx_lock(&ig.connlist_mutex); 592 ib_conn->comp->active_qps--; 593 mtx_unlock(&ig.connlist_mutex); 594 rdma_destroy_qp(ib_conn->cma_id); 595 ib_conn->qp = NULL; 596 } 597 598 if (destroy) { 599 if (iser_conn->login_buf) 600 iser_free_login_buf(iser_conn); 601 602 if (iser_conn->rx_descs) 603 iser_free_rx_descriptors(iser_conn); 604 605 if (device != NULL) { 606 iser_device_try_release(device); 607 ib_conn->device = NULL; 608 } 609 } 610 } 611 612 /** 613 * triggers start of the disconnect procedures and wait for them to be done 614 * Called with state mutex held 615 */ 616 int 617 iser_conn_terminate(struct iser_conn *iser_conn) 618 { 619 struct ib_conn *ib_conn = &iser_conn->ib_conn; 620 struct ib_send_wr *bad_send_wr; 621 struct ib_recv_wr *bad_recv_wr; 622 int err = 0; 623 624 /* terminate the iser conn only if the conn state is UP */ 625 if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, 626 ISER_CONN_TERMINATING)) 627 return (0); 628 629 ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state); 630 631 if (ib_conn->qp == NULL) { 632 /* HOW can this be??? */ 633 ISER_WARN("qp wasn't created"); 634 return (1); 635 } 636 637 /* 638 * Todo: This is a temporary workaround. 639 * We serialize the connection closure using global lock in order to 640 * receive all posted beacons completions. 641 * Without Serialization, in case we open many connections (QPs) on 642 * the same CQ, we might miss beacons because of missing interrupts. 643 */ 644 sx_xlock(&ig.close_conns_mutex); 645 646 /* 647 * In case we didn't already clean up the cma_id (peer initiated 648 * a disconnection), we need to Cause the CMA to change the QP 649 * state to ERROR. 650 */ 651 if (ib_conn->cma_id) { 652 err = rdma_disconnect(ib_conn->cma_id); 653 if (err) 654 ISER_ERR("Failed to disconnect, conn: 0x%p err %d", 655 iser_conn, err); 656 657 mtx_lock(&ib_conn->beacon.flush_lock); 658 memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr)); 659 ib_conn->beacon.send.wr_id = ISER_BEACON_WRID; 660 ib_conn->beacon.send.opcode = IB_WR_SEND; 661 /* post an indication that all send flush errors were consumed */ 662 err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr); 663 if (err) { 664 ISER_ERR("conn %p failed to post send_beacon", ib_conn); 665 mtx_unlock(&ib_conn->beacon.flush_lock); 666 goto out; 667 } 668 669 ISER_DBG("before send cv_wait: %p", iser_conn); 670 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock); 671 ISER_DBG("after send cv_wait: %p", iser_conn); 672 673 memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr)); 674 ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID; 675 /* post an indication that all recv flush errors were consumed */ 676 err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr); 677 if (err) { 678 ISER_ERR("conn %p failed to post recv_beacon", ib_conn); 679 mtx_unlock(&ib_conn->beacon.flush_lock); 680 goto out; 681 } 682 683 ISER_DBG("before recv cv_wait: %p", iser_conn); 684 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock); 685 mtx_unlock(&ib_conn->beacon.flush_lock); 686 ISER_DBG("after recv cv_wait: %p", iser_conn); 687 } 688 out: 689 sx_xunlock(&ig.close_conns_mutex); 690 return (1); 691 } 692 693 /** 694 * Called with state mutex held 695 **/ 696 static void 697 iser_connect_error(struct rdma_cm_id *cma_id) 698 { 699 struct iser_conn *iser_conn; 700 701 iser_conn = cma_id->context; 702 703 ISER_ERR("conn %p", iser_conn); 704 705 iser_conn->state = ISER_CONN_TERMINATING; 706 707 cv_signal(&iser_conn->up_cv); 708 } 709 710 /** 711 * Called with state mutex held 712 **/ 713 static void 714 iser_addr_handler(struct rdma_cm_id *cma_id) 715 { 716 struct iser_device *device; 717 struct iser_conn *iser_conn; 718 struct ib_conn *ib_conn; 719 int ret; 720 721 iser_conn = cma_id->context; 722 723 ib_conn = &iser_conn->ib_conn; 724 device = iser_device_find_by_ib_device(cma_id); 725 if (!device) { 726 ISER_ERR("conn %p device lookup/creation failed", 727 iser_conn); 728 iser_connect_error(cma_id); 729 return; 730 } 731 732 ib_conn->device = device; 733 734 ret = rdma_resolve_route(cma_id, 1000); 735 if (ret) { 736 ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret); 737 iser_connect_error(cma_id); 738 return; 739 } 740 } 741 742 /** 743 * Called with state mutex held 744 **/ 745 static void 746 iser_route_handler(struct rdma_cm_id *cma_id) 747 { 748 struct rdma_conn_param conn_param; 749 int ret; 750 struct iser_cm_hdr req_hdr; 751 struct iser_conn *iser_conn = cma_id->context; 752 struct ib_conn *ib_conn = &iser_conn->ib_conn; 753 struct iser_device *device = ib_conn->device; 754 755 ret = iser_create_ib_conn_res(ib_conn); 756 if (ret) 757 goto failure; 758 759 memset(&conn_param, 0, sizeof conn_param); 760 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; 761 conn_param.retry_count = 7; 762 conn_param.rnr_retry_count = 6; 763 /* 764 * Initiaotr depth should not be set, but in order to compat 765 * with old targets, we keep this value set. 766 */ 767 conn_param.initiator_depth = 1; 768 769 memset(&req_hdr, 0, sizeof(req_hdr)); 770 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 771 ISER_SEND_W_INV_NOT_SUPPORTED); 772 conn_param.private_data = (void *)&req_hdr; 773 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 774 775 ret = rdma_connect(cma_id, &conn_param); 776 if (ret) { 777 ISER_ERR("conn %p failure connecting: %d", iser_conn, ret); 778 goto failure; 779 } 780 781 return; 782 failure: 783 iser_connect_error(cma_id); 784 } 785 786 /** 787 * Called with state mutex held 788 **/ 789 static void 790 iser_connected_handler(struct rdma_cm_id *cma_id) 791 { 792 struct iser_conn *iser_conn; 793 struct ib_qp_attr attr; 794 struct ib_qp_init_attr init_attr; 795 796 iser_conn = cma_id->context; 797 798 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 799 800 ISER_INFO("remote qpn:%x my qpn:%x", 801 attr.dest_qp_num, cma_id->qp->qp_num); 802 803 iser_conn->state = ISER_CONN_UP; 804 805 cv_signal(&iser_conn->up_cv); 806 } 807 808 /** 809 * Called with state mutex held 810 **/ 811 static void 812 iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy) 813 { 814 struct iser_conn *iser_conn = cma_id->context; 815 816 if (iser_conn_terminate(iser_conn)) 817 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn); 818 819 } 820 821 int 822 iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 823 { 824 struct iser_conn *iser_conn; 825 int ret = 0; 826 827 iser_conn = cma_id->context; 828 ISER_INFO("event %d status %d conn %p id %p", 829 event->event, event->status, cma_id->context, cma_id); 830 831 sx_xlock(&iser_conn->state_mutex); 832 switch (event->event) { 833 case RDMA_CM_EVENT_ADDR_RESOLVED: 834 iser_addr_handler(cma_id); 835 break; 836 case RDMA_CM_EVENT_ROUTE_RESOLVED: 837 iser_route_handler(cma_id); 838 break; 839 case RDMA_CM_EVENT_ESTABLISHED: 840 iser_connected_handler(cma_id); 841 break; 842 case RDMA_CM_EVENT_ADDR_ERROR: 843 case RDMA_CM_EVENT_ROUTE_ERROR: 844 case RDMA_CM_EVENT_CONNECT_ERROR: 845 case RDMA_CM_EVENT_UNREACHABLE: 846 case RDMA_CM_EVENT_REJECTED: 847 iser_connect_error(cma_id); 848 break; 849 case RDMA_CM_EVENT_DISCONNECTED: 850 case RDMA_CM_EVENT_ADDR_CHANGE: 851 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 852 iser_cleanup_handler(cma_id, false); 853 break; 854 default: 855 ISER_ERR("Unexpected RDMA CM event (%d)", event->event); 856 break; 857 } 858 sx_xunlock(&iser_conn->state_mutex); 859 860 return (ret); 861 } 862 863 int 864 iser_post_recvl(struct iser_conn *iser_conn) 865 { 866 struct ib_recv_wr rx_wr, *rx_wr_failed; 867 struct ib_conn *ib_conn = &iser_conn->ib_conn; 868 struct ib_sge sge; 869 int ib_ret; 870 871 sge.addr = iser_conn->login_resp_dma; 872 sge.length = ISER_RX_LOGIN_SIZE; 873 sge.lkey = ib_conn->device->mr->lkey; 874 875 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; 876 rx_wr.sg_list = &sge; 877 rx_wr.num_sge = 1; 878 rx_wr.next = NULL; 879 880 ib_conn->post_recv_buf_count++; 881 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 882 if (ib_ret) { 883 ISER_ERR("ib_post_recv failed ret=%d", ib_ret); 884 ib_conn->post_recv_buf_count--; 885 } 886 887 return (ib_ret); 888 } 889 890 int 891 iser_post_recvm(struct iser_conn *iser_conn, int count) 892 { 893 struct ib_recv_wr *rx_wr, *rx_wr_failed; 894 int i, ib_ret; 895 struct ib_conn *ib_conn = &iser_conn->ib_conn; 896 unsigned int my_rx_head = iser_conn->rx_desc_head; 897 struct iser_rx_desc *rx_desc; 898 899 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 900 rx_desc = &iser_conn->rx_descs[my_rx_head]; 901 rx_wr->wr_id = (uintptr_t)rx_desc; 902 rx_wr->sg_list = &rx_desc->rx_sg; 903 rx_wr->num_sge = 1; 904 rx_wr->next = rx_wr + 1; 905 my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos; 906 } 907 908 rx_wr--; 909 rx_wr->next = NULL; /* mark end of work requests list */ 910 911 ib_conn->post_recv_buf_count += count; 912 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 913 if (ib_ret) { 914 ISER_ERR("ib_post_recv failed ret=%d", ib_ret); 915 ib_conn->post_recv_buf_count -= count; 916 } else 917 iser_conn->rx_desc_head = my_rx_head; 918 919 return (ib_ret); 920 } 921 922 /** 923 * iser_start_send - Initiate a Send DTO operation 924 * 925 * returns 0 on success, -1 on failure 926 */ 927 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, 928 bool signal) 929 { 930 int ib_ret; 931 struct ib_send_wr send_wr, *send_wr_failed; 932 933 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 934 tx_desc->dma_addr, ISER_HEADERS_LEN, 935 DMA_TO_DEVICE); 936 937 send_wr.next = NULL; 938 send_wr.wr_id = (uintptr_t)tx_desc; 939 send_wr.sg_list = tx_desc->tx_sg; 940 send_wr.num_sge = tx_desc->num_sge; 941 send_wr.opcode = IB_WR_SEND; 942 send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0; 943 944 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 945 if (ib_ret) 946 ISER_ERR("ib_post_send failed, ret:%d", ib_ret); 947 948 return (ib_ret); 949 } 950