1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend"); 30 static int iser_cq_poll_limit = 512; 31 32 static void 33 iser_cq_event_callback(struct ib_event *cause, void *context) 34 { 35 ISER_ERR("got cq event %d", cause->event); 36 } 37 38 static void 39 iser_qp_event_callback(struct ib_event *cause, void *context) 40 { 41 ISER_ERR("got qp event %d", cause->event); 42 } 43 44 static void 45 iser_event_handler(struct ib_event_handler *handler, 46 struct ib_event *event) 47 { 48 ISER_ERR("async event %d on device %s port %d", 49 event->event, event->device->name, 50 event->element.port_num); 51 } 52 53 /** 54 * is_iser_tx_desc - Indicate if the completion wr_id 55 * is a TX descriptor or not. 56 * @iser_conn: iser connection 57 * @wr_id: completion WR identifier 58 * 59 * Since we cannot rely on wc opcode in FLUSH errors 60 * we must work around it by checking if the wr_id address 61 * falls in the iser connection rx_descs buffer. If so 62 * it is an RX descriptor, otherwize it is a TX. 63 */ 64 static inline bool 65 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id) 66 { 67 void *start = iser_conn->rx_descs; 68 u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs); 69 void *end = (void *)((uintptr_t)start + (uintptr_t)len); 70 71 if (start) { 72 if (wr_id >= start && wr_id < end) 73 return false; 74 } else { 75 return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf); 76 } 77 78 return true; 79 } 80 81 /** 82 * iser_handle_comp_error() - Handle error completion 83 * @ib_conn: connection RDMA resources 84 * @wc: work completion 85 * 86 * Notes: Update post_recv_buf_count in case of recv error completion. 87 * For non-FLUSH error completion we should also notify iscsi layer that 88 * connection is failed (in case we passed bind stage). 89 */ 90 static void 91 iser_handle_comp_error(struct ib_conn *ib_conn, 92 struct ib_wc *wc) 93 { 94 void *wr_id = (void *)(uintptr_t)wc->wr_id; 95 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 96 ib_conn); 97 98 if (is_iser_tx_desc(iser_conn, wr_id)) { 99 ISER_DBG("conn %p got send comp error", iser_conn); 100 } else { 101 ISER_DBG("conn %p got recv comp error", iser_conn); 102 ib_conn->post_recv_buf_count--; 103 } 104 if (wc->status != IB_WC_WR_FLUSH_ERR) 105 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn); 106 } 107 108 /** 109 * iser_handle_wc - handle a single work completion 110 * @wc: work completion 111 * 112 * Soft-IRQ context, work completion can be either 113 * SEND or RECV, and can turn out successful or 114 * with error (or flush error). 115 */ 116 static void iser_handle_wc(struct ib_wc *wc) 117 { 118 struct ib_conn *ib_conn; 119 struct iser_tx_desc *tx_desc; 120 struct iser_rx_desc *rx_desc; 121 122 ib_conn = wc->qp->qp_context; 123 if (likely(wc->status == IB_WC_SUCCESS)) { 124 if (wc->opcode == IB_WC_RECV) { 125 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; 126 iser_rcv_completion(rx_desc, wc->byte_len, 127 ib_conn); 128 } else 129 if (wc->opcode == IB_WC_SEND) { 130 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; 131 iser_snd_completion(tx_desc, ib_conn); 132 } else { 133 ISER_ERR("Unknown wc opcode %d", wc->opcode); 134 } 135 } else { 136 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 137 ib_conn); 138 if (wc->status != IB_WC_WR_FLUSH_ERR) { 139 ISER_ERR("conn %p wr id %lx status %d vend_err %x", 140 iser_conn, wc->wr_id, wc->status, wc->vendor_err); 141 } else { 142 ISER_DBG("flush error: conn %p wr id %lx", iser_conn, wc->wr_id); 143 } 144 145 if (wc->wr_id == ISER_BEACON_WRID) { 146 /* all flush errors were consumed */ 147 mtx_lock(&ib_conn->beacon.flush_lock); 148 ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn); 149 cv_signal(&ib_conn->beacon.flush_cv); 150 mtx_unlock(&ib_conn->beacon.flush_lock); 151 } else { 152 iser_handle_comp_error(ib_conn, wc); 153 } 154 } 155 } 156 157 static void 158 iser_cq_tasklet_fn(void *data, int pending) 159 { 160 struct iser_comp *comp = (struct iser_comp *)data; 161 struct ib_cq *cq = comp->cq; 162 struct ib_wc *const wcs = comp->wcs; 163 int completed = 0; 164 int i; 165 int n; 166 167 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { 168 for (i = 0; i < n; i++) 169 iser_handle_wc(&wcs[i]); 170 171 completed += n; 172 if (completed >= iser_cq_poll_limit) 173 break; 174 } 175 176 /* 177 * It is assumed here that arming CQ only once its empty 178 * would not cause interrupts to be missed. 179 */ 180 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 181 } 182 183 static void 184 iser_cq_callback(struct ib_cq *cq, void *cq_context) 185 { 186 struct iser_comp *comp = cq_context; 187 188 taskqueue_enqueue(comp->tq, &comp->task); 189 } 190 191 /** 192 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 193 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 194 * the adapator. 195 * 196 * returns 0 on success, -1 on failure 197 */ 198 static int 199 iser_create_device_ib_res(struct iser_device *device) 200 { 201 struct ib_device_attr *dev_attr = &device->dev_attr; 202 int ret, i, max_cqe; 203 204 ret = ib_query_device(device->ib_device, dev_attr); 205 if (ret) { 206 ISER_ERR("Query device failed for %s", device->ib_device->name); 207 return (ret); 208 } 209 210 if (!(dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { 211 ISER_ERR("device %s doesn't support Fastreg, " 212 "can't register memory", device->ib_device->name); 213 return (1); 214 } 215 216 device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors); 217 218 device->comps = malloc(device->comps_used * sizeof(*device->comps), 219 M_ISER_VERBS, M_WAITOK | M_ZERO); 220 if (!device->comps) 221 goto comps_err; 222 223 max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe); 224 225 ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d", 226 device->comps_used, device->ib_device->name, 227 device->ib_device->num_comp_vectors, max_cqe); 228 229 device->pd = ib_alloc_pd(device->ib_device); 230 if (IS_ERR(device->pd)) 231 goto pd_err; 232 233 for (i = 0; i < device->comps_used; i++) { 234 struct iser_comp *comp = &device->comps[i]; 235 236 comp->device = device; 237 comp->cq = ib_create_cq(device->ib_device, 238 iser_cq_callback, 239 iser_cq_event_callback, 240 (void *)comp, 241 max_cqe, i); 242 if (IS_ERR(comp->cq)) { 243 comp->cq = NULL; 244 goto cq_err; 245 } 246 247 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP)) 248 goto cq_err; 249 250 TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp); 251 comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT, 252 taskqueue_thread_enqueue, &comp->tq); 253 if (!comp->tq) 254 goto tq_err; 255 taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq"); 256 } 257 258 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 259 IB_ACCESS_REMOTE_WRITE | 260 IB_ACCESS_REMOTE_READ); 261 if (IS_ERR(device->mr)) 262 goto tq_err; 263 264 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 265 iser_event_handler); 266 if (ib_register_event_handler(&device->event_handler)) 267 goto handler_err; 268 269 return (0); 270 271 handler_err: 272 ib_dereg_mr(device->mr); 273 tq_err: 274 for (i = 0; i < device->comps_used; i++) { 275 struct iser_comp *comp = &device->comps[i]; 276 if (comp->tq) 277 taskqueue_free(comp->tq); 278 } 279 cq_err: 280 for (i = 0; i < device->comps_used; i++) { 281 struct iser_comp *comp = &device->comps[i]; 282 if (comp->cq) 283 ib_destroy_cq(comp->cq); 284 } 285 ib_dealloc_pd(device->pd); 286 pd_err: 287 free(device->comps, M_ISER_VERBS); 288 comps_err: 289 ISER_ERR("failed to allocate an IB resource"); 290 return (1); 291 } 292 293 /** 294 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 295 * CQ and PD created with the device associated with the adapator. 296 */ 297 static void 298 iser_free_device_ib_res(struct iser_device *device) 299 { 300 int i; 301 302 for (i = 0; i < device->comps_used; i++) { 303 struct iser_comp *comp = &device->comps[i]; 304 305 taskqueue_free(comp->tq); 306 ib_destroy_cq(comp->cq); 307 comp->cq = NULL; 308 } 309 310 (void)ib_unregister_event_handler(&device->event_handler); 311 (void)ib_dereg_mr(device->mr); 312 (void)ib_dealloc_pd(device->pd); 313 314 free(device->comps, M_ISER_VERBS); 315 device->comps = NULL; 316 317 device->mr = NULL; 318 device->pd = NULL; 319 } 320 321 static int 322 iser_alloc_reg_res(struct ib_device *ib_device, 323 struct ib_pd *pd, 324 struct iser_reg_resources *res) 325 { 326 int ret; 327 328 res->frpl = ib_alloc_fast_reg_page_list(ib_device, 329 ISCSI_ISER_SG_TABLESIZE + 1); 330 if (IS_ERR(res->frpl)) { 331 ret = -PTR_ERR(res->frpl); 332 ISER_ERR("Failed to allocate fast reg page list err=%d", ret); 333 return (ret); 334 } 335 336 res->mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); 337 if (IS_ERR(res->mr)) { 338 ret = -PTR_ERR(res->mr); 339 ISER_ERR("Failed to allocate fast reg mr err=%d", ret); 340 goto fast_reg_mr_failure; 341 } 342 res->mr_valid = 1; 343 344 return (0); 345 346 fast_reg_mr_failure: 347 ib_free_fast_reg_page_list(res->frpl); 348 349 return (ret); 350 } 351 352 static void 353 iser_free_reg_res(struct iser_reg_resources *rsc) 354 { 355 ib_dereg_mr(rsc->mr); 356 ib_free_fast_reg_page_list(rsc->frpl); 357 } 358 359 static struct fast_reg_descriptor * 360 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd) 361 { 362 struct fast_reg_descriptor *desc; 363 int ret; 364 365 desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO); 366 if (!desc) { 367 ISER_ERR("Failed to allocate a new fastreg descriptor"); 368 return (NULL); 369 } 370 371 ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc); 372 if (ret) { 373 ISER_ERR("failed to allocate reg_resources"); 374 goto err; 375 } 376 377 return (desc); 378 err: 379 free(desc, M_ISER_VERBS); 380 return (NULL); 381 } 382 383 /** 384 * iser_create_fmr_pool - Creates FMR pool and page_vector 385 * 386 * returns 0 on success, or errno code on failure 387 */ 388 int 389 iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max) 390 { 391 struct iser_device *device = ib_conn->device; 392 struct fast_reg_descriptor *desc; 393 int i; 394 395 INIT_LIST_HEAD(&ib_conn->fastreg.pool); 396 ib_conn->fastreg.pool_size = 0; 397 for (i = 0; i < cmds_max; i++) { 398 desc = iser_create_fastreg_desc(device->ib_device, device->pd); 399 if (!desc) { 400 ISER_ERR("Failed to create fastreg descriptor"); 401 goto err; 402 } 403 404 list_add_tail(&desc->list, &ib_conn->fastreg.pool); 405 ib_conn->fastreg.pool_size++; 406 } 407 408 return (0); 409 410 err: 411 iser_free_fastreg_pool(ib_conn); 412 return (ENOMEM); 413 } 414 415 /** 416 * iser_free_fmr_pool - releases the FMR pool and page vec 417 */ 418 void 419 iser_free_fastreg_pool(struct ib_conn *ib_conn) 420 { 421 struct fast_reg_descriptor *desc, *tmp; 422 int i = 0; 423 424 if (list_empty(&ib_conn->fastreg.pool)) 425 return; 426 427 ISER_DBG("freeing conn %p fr pool", ib_conn); 428 429 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { 430 list_del(&desc->list); 431 iser_free_reg_res(&desc->rsc); 432 free(desc, M_ISER_VERBS); 433 ++i; 434 } 435 436 if (i < ib_conn->fastreg.pool_size) 437 ISER_WARN("pool still has %d regions registered", 438 ib_conn->fastreg.pool_size - i); 439 } 440 441 /** 442 * iser_create_ib_conn_res - Queue-Pair (QP) 443 * 444 * returns 0 on success, 1 on failure 445 */ 446 static int 447 iser_create_ib_conn_res(struct ib_conn *ib_conn) 448 { 449 struct iser_conn *iser_conn; 450 struct iser_device *device; 451 struct ib_device_attr *dev_attr; 452 struct ib_qp_init_attr init_attr; 453 int index, min_index = 0; 454 int ret = -ENOMEM; 455 456 iser_conn = container_of(ib_conn, struct iser_conn, ib_conn); 457 device = ib_conn->device; 458 dev_attr = &device->dev_attr; 459 460 mtx_lock(&ig.connlist_mutex); 461 /* select the CQ with the minimal number of usages */ 462 for (index = 0; index < device->comps_used; index++) { 463 if (device->comps[index].active_qps < 464 device->comps[min_index].active_qps) 465 min_index = index; 466 } 467 ib_conn->comp = &device->comps[min_index]; 468 ib_conn->comp->active_qps++; 469 mtx_unlock(&ig.connlist_mutex); 470 ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn); 471 472 memset(&init_attr, 0, sizeof init_attr); 473 init_attr.event_handler = iser_qp_event_callback; 474 init_attr.qp_context = (void *)ib_conn; 475 init_attr.send_cq = ib_conn->comp->cq; 476 init_attr.recv_cq = ib_conn->comp->cq; 477 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 478 init_attr.cap.max_send_sge = 2; 479 init_attr.cap.max_recv_sge = 1; 480 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 481 init_attr.qp_type = IB_QPT_RC; 482 483 if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) { 484 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 485 iser_conn->max_cmds = 486 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); 487 } else { 488 init_attr.cap.max_send_wr = dev_attr->max_qp_wr; 489 iser_conn->max_cmds = 490 ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr); 491 } 492 ISER_DBG("device %s supports max_send_wr %d", 493 device->ib_device->name, dev_attr->max_qp_wr); 494 495 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 496 if (ret) 497 goto out_err; 498 499 ib_conn->qp = ib_conn->cma_id->qp; 500 ISER_DBG("setting conn %p cma_id %p qp %p", 501 ib_conn, ib_conn->cma_id, 502 ib_conn->cma_id->qp); 503 504 return (ret); 505 506 out_err: 507 mtx_lock(&ig.connlist_mutex); 508 ib_conn->comp->active_qps--; 509 mtx_unlock(&ig.connlist_mutex); 510 ISER_ERR("unable to alloc mem or create resource, err %d", ret); 511 512 return (ret); 513 } 514 515 /** 516 * based on the resolved device node GUID see if there already allocated 517 * device for this device. If there's no such, create one. 518 */ 519 static struct iser_device * 520 iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 521 { 522 struct iser_device *device; 523 524 sx_xlock(&ig.device_list_mutex); 525 526 list_for_each_entry(device, &ig.device_list, ig_list) 527 /* find if there's a match using the node GUID */ 528 if (device->ib_device->node_guid == cma_id->device->node_guid) 529 goto inc_refcnt; 530 531 device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO); 532 if (device == NULL) 533 goto out; 534 535 /* assign this device to the device */ 536 device->ib_device = cma_id->device; 537 /* init the device and link it into ig device list */ 538 if (iser_create_device_ib_res(device)) { 539 free(device, M_ISER_VERBS); 540 device = NULL; 541 goto out; 542 } 543 list_add(&device->ig_list, &ig.device_list); 544 545 inc_refcnt: 546 device->refcount++; 547 ISER_INFO("device %p refcount %d", device, device->refcount); 548 out: 549 sx_xunlock(&ig.device_list_mutex); 550 return (device); 551 } 552 553 /* if there's no demand for this device, release it */ 554 static void 555 iser_device_try_release(struct iser_device *device) 556 { 557 sx_xlock(&ig.device_list_mutex); 558 device->refcount--; 559 ISER_INFO("device %p refcount %d", device, device->refcount); 560 if (!device->refcount) { 561 iser_free_device_ib_res(device); 562 list_del(&device->ig_list); 563 free(device, M_ISER_VERBS); 564 device = NULL; 565 } 566 sx_xunlock(&ig.device_list_mutex); 567 } 568 569 /** 570 * Called with state mutex held 571 **/ 572 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, 573 enum iser_conn_state comp, 574 enum iser_conn_state exch) 575 { 576 int ret; 577 578 ret = (iser_conn->state == comp); 579 if (ret) 580 iser_conn->state = exch; 581 582 return ret; 583 } 584 585 /** 586 * iser_free_ib_conn_res - release IB related resources 587 * @iser_conn: iser connection struct 588 * @destroy: indicator if we need to try to release the 589 * iser device and memory regoins pool (only iscsi 590 * shutdown and DEVICE_REMOVAL will use this). 591 * 592 * This routine is called with the iser state mutex held 593 * so the cm_id removal is out of here. It is Safe to 594 * be invoked multiple times. 595 */ 596 void 597 iser_free_ib_conn_res(struct iser_conn *iser_conn, 598 bool destroy) 599 { 600 struct ib_conn *ib_conn = &iser_conn->ib_conn; 601 struct iser_device *device = ib_conn->device; 602 603 ISER_INFO("freeing conn %p cma_id %p qp %p", 604 iser_conn, ib_conn->cma_id, ib_conn->qp); 605 606 if (ib_conn->qp != NULL) { 607 mtx_lock(&ig.connlist_mutex); 608 ib_conn->comp->active_qps--; 609 mtx_unlock(&ig.connlist_mutex); 610 rdma_destroy_qp(ib_conn->cma_id); 611 ib_conn->qp = NULL; 612 } 613 614 if (destroy) { 615 if (iser_conn->login_buf) 616 iser_free_login_buf(iser_conn); 617 618 if (iser_conn->rx_descs) 619 iser_free_rx_descriptors(iser_conn); 620 621 if (device != NULL) { 622 iser_device_try_release(device); 623 ib_conn->device = NULL; 624 } 625 } 626 } 627 628 /** 629 * triggers start of the disconnect procedures and wait for them to be done 630 * Called with state mutex held 631 */ 632 int 633 iser_conn_terminate(struct iser_conn *iser_conn) 634 { 635 struct ib_conn *ib_conn = &iser_conn->ib_conn; 636 struct ib_send_wr *bad_send_wr; 637 struct ib_recv_wr *bad_recv_wr; 638 int err = 0; 639 640 /* terminate the iser conn only if the conn state is UP */ 641 if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, 642 ISER_CONN_TERMINATING)) 643 return (0); 644 645 ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state); 646 647 if (ib_conn->qp == NULL) { 648 /* HOW can this be??? */ 649 ISER_WARN("qp wasn't created"); 650 return (1); 651 } 652 653 /* 654 * Todo: This is a temporary workaround. 655 * We serialize the connection closure using global lock in order to 656 * receive all posted beacons completions. 657 * Without Serialization, in case we open many connections (QPs) on 658 * the same CQ, we might miss beacons because of missing interrupts. 659 */ 660 sx_xlock(&ig.close_conns_mutex); 661 662 /* 663 * In case we didn't already clean up the cma_id (peer initiated 664 * a disconnection), we need to Cause the CMA to change the QP 665 * state to ERROR. 666 */ 667 if (ib_conn->cma_id) { 668 err = rdma_disconnect(ib_conn->cma_id); 669 if (err) 670 ISER_ERR("Failed to disconnect, conn: 0x%p err %d", 671 iser_conn, err); 672 673 mtx_lock(&ib_conn->beacon.flush_lock); 674 memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr)); 675 ib_conn->beacon.send.wr_id = ISER_BEACON_WRID; 676 ib_conn->beacon.send.opcode = IB_WR_SEND; 677 /* post an indication that all send flush errors were consumed */ 678 err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr); 679 if (err) { 680 ISER_ERR("conn %p failed to post send_beacon", ib_conn); 681 mtx_unlock(&ib_conn->beacon.flush_lock); 682 goto out; 683 } 684 685 ISER_DBG("before send cv_wait: %p", iser_conn); 686 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock); 687 ISER_DBG("after send cv_wait: %p", iser_conn); 688 689 memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr)); 690 ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID; 691 /* post an indication that all recv flush errors were consumed */ 692 err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr); 693 if (err) { 694 ISER_ERR("conn %p failed to post recv_beacon", ib_conn); 695 mtx_unlock(&ib_conn->beacon.flush_lock); 696 goto out; 697 } 698 699 ISER_DBG("before recv cv_wait: %p", iser_conn); 700 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock); 701 mtx_unlock(&ib_conn->beacon.flush_lock); 702 ISER_DBG("after recv cv_wait: %p", iser_conn); 703 } 704 out: 705 sx_xunlock(&ig.close_conns_mutex); 706 return (1); 707 } 708 709 /** 710 * Called with state mutex held 711 **/ 712 static void 713 iser_connect_error(struct rdma_cm_id *cma_id) 714 { 715 struct iser_conn *iser_conn; 716 717 iser_conn = cma_id->context; 718 719 ISER_ERR("conn %p", iser_conn); 720 721 iser_conn->state = ISER_CONN_TERMINATING; 722 723 cv_signal(&iser_conn->up_cv); 724 } 725 726 /** 727 * Called with state mutex held 728 **/ 729 static void 730 iser_addr_handler(struct rdma_cm_id *cma_id) 731 { 732 struct iser_device *device; 733 struct iser_conn *iser_conn; 734 struct ib_conn *ib_conn; 735 int ret; 736 737 iser_conn = cma_id->context; 738 739 ib_conn = &iser_conn->ib_conn; 740 device = iser_device_find_by_ib_device(cma_id); 741 if (!device) { 742 ISER_ERR("conn %p device lookup/creation failed", 743 iser_conn); 744 iser_connect_error(cma_id); 745 return; 746 } 747 748 ib_conn->device = device; 749 750 ret = rdma_resolve_route(cma_id, 1000); 751 if (ret) { 752 ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret); 753 iser_connect_error(cma_id); 754 return; 755 } 756 } 757 758 /** 759 * Called with state mutex held 760 **/ 761 static void 762 iser_route_handler(struct rdma_cm_id *cma_id) 763 { 764 struct rdma_conn_param conn_param; 765 int ret; 766 struct iser_cm_hdr req_hdr; 767 struct iser_conn *iser_conn = cma_id->context; 768 struct ib_conn *ib_conn = &iser_conn->ib_conn; 769 struct iser_device *device = ib_conn->device; 770 771 ret = iser_create_ib_conn_res(ib_conn); 772 if (ret) 773 goto failure; 774 775 memset(&conn_param, 0, sizeof conn_param); 776 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; 777 conn_param.retry_count = 7; 778 conn_param.rnr_retry_count = 6; 779 /* 780 * Initiaotr depth should not be set, but in order to compat 781 * with old targets, we keep this value set. 782 */ 783 conn_param.initiator_depth = 1; 784 785 memset(&req_hdr, 0, sizeof(req_hdr)); 786 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 787 ISER_SEND_W_INV_NOT_SUPPORTED); 788 conn_param.private_data = (void *)&req_hdr; 789 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 790 791 ret = rdma_connect(cma_id, &conn_param); 792 if (ret) { 793 ISER_ERR("conn %p failure connecting: %d", iser_conn, ret); 794 goto failure; 795 } 796 797 return; 798 failure: 799 iser_connect_error(cma_id); 800 } 801 802 /** 803 * Called with state mutex held 804 **/ 805 static void 806 iser_connected_handler(struct rdma_cm_id *cma_id) 807 { 808 struct iser_conn *iser_conn; 809 struct ib_qp_attr attr; 810 struct ib_qp_init_attr init_attr; 811 812 iser_conn = cma_id->context; 813 814 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 815 816 ISER_INFO("remote qpn:%x my qpn:%x", 817 attr.dest_qp_num, cma_id->qp->qp_num); 818 819 iser_conn->state = ISER_CONN_UP; 820 821 cv_signal(&iser_conn->up_cv); 822 } 823 824 /** 825 * Called with state mutex held 826 **/ 827 static void 828 iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy) 829 { 830 struct iser_conn *iser_conn = cma_id->context; 831 832 if (iser_conn_terminate(iser_conn)) 833 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn); 834 835 } 836 837 int 838 iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 839 { 840 struct iser_conn *iser_conn; 841 int ret = 0; 842 843 iser_conn = cma_id->context; 844 ISER_INFO("event %d status %d conn %p id %p", 845 event->event, event->status, cma_id->context, cma_id); 846 847 sx_xlock(&iser_conn->state_mutex); 848 switch (event->event) { 849 case RDMA_CM_EVENT_ADDR_RESOLVED: 850 iser_addr_handler(cma_id); 851 break; 852 case RDMA_CM_EVENT_ROUTE_RESOLVED: 853 iser_route_handler(cma_id); 854 break; 855 case RDMA_CM_EVENT_ESTABLISHED: 856 iser_connected_handler(cma_id); 857 break; 858 case RDMA_CM_EVENT_ADDR_ERROR: 859 case RDMA_CM_EVENT_ROUTE_ERROR: 860 case RDMA_CM_EVENT_CONNECT_ERROR: 861 case RDMA_CM_EVENT_UNREACHABLE: 862 case RDMA_CM_EVENT_REJECTED: 863 iser_connect_error(cma_id); 864 break; 865 case RDMA_CM_EVENT_DISCONNECTED: 866 case RDMA_CM_EVENT_ADDR_CHANGE: 867 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 868 iser_cleanup_handler(cma_id, false); 869 break; 870 default: 871 ISER_ERR("Unexpected RDMA CM event (%d)", event->event); 872 break; 873 } 874 sx_xunlock(&iser_conn->state_mutex); 875 876 return (ret); 877 } 878 879 int 880 iser_post_recvl(struct iser_conn *iser_conn) 881 { 882 struct ib_recv_wr rx_wr, *rx_wr_failed; 883 struct ib_conn *ib_conn = &iser_conn->ib_conn; 884 struct ib_sge sge; 885 int ib_ret; 886 887 sge.addr = iser_conn->login_resp_dma; 888 sge.length = ISER_RX_LOGIN_SIZE; 889 sge.lkey = ib_conn->device->mr->lkey; 890 891 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; 892 rx_wr.sg_list = &sge; 893 rx_wr.num_sge = 1; 894 rx_wr.next = NULL; 895 896 ib_conn->post_recv_buf_count++; 897 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 898 if (ib_ret) { 899 ISER_ERR("ib_post_recv failed ret=%d", ib_ret); 900 ib_conn->post_recv_buf_count--; 901 } 902 903 return (ib_ret); 904 } 905 906 int 907 iser_post_recvm(struct iser_conn *iser_conn, int count) 908 { 909 struct ib_recv_wr *rx_wr, *rx_wr_failed; 910 int i, ib_ret; 911 struct ib_conn *ib_conn = &iser_conn->ib_conn; 912 unsigned int my_rx_head = iser_conn->rx_desc_head; 913 struct iser_rx_desc *rx_desc; 914 915 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 916 rx_desc = &iser_conn->rx_descs[my_rx_head]; 917 rx_wr->wr_id = (uintptr_t)rx_desc; 918 rx_wr->sg_list = &rx_desc->rx_sg; 919 rx_wr->num_sge = 1; 920 rx_wr->next = rx_wr + 1; 921 my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos; 922 } 923 924 rx_wr--; 925 rx_wr->next = NULL; /* mark end of work requests list */ 926 927 ib_conn->post_recv_buf_count += count; 928 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 929 if (ib_ret) { 930 ISER_ERR("ib_post_recv failed ret=%d", ib_ret); 931 ib_conn->post_recv_buf_count -= count; 932 } else 933 iser_conn->rx_desc_head = my_rx_head; 934 935 return (ib_ret); 936 } 937 938 /** 939 * iser_start_send - Initiate a Send DTO operation 940 * 941 * returns 0 on success, -1 on failure 942 */ 943 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, 944 bool signal) 945 { 946 int ib_ret; 947 struct ib_send_wr send_wr, *send_wr_failed; 948 949 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 950 tx_desc->dma_addr, ISER_HEADERS_LEN, 951 DMA_TO_DEVICE); 952 953 send_wr.next = NULL; 954 send_wr.wr_id = (uintptr_t)tx_desc; 955 send_wr.sg_list = tx_desc->tx_sg; 956 send_wr.num_sge = tx_desc->num_sge; 957 send_wr.opcode = IB_WR_SEND; 958 send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0; 959 960 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 961 if (ib_ret) 962 ISER_ERR("ib_post_send failed, ret:%d", ib_ret); 963 964 return (ib_ret); 965 } 966