1 /*- 2 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "icl_iser.h" 27 28 static MALLOC_DEFINE(M_ISER_VERBS, "iser_verbs", "iser verbs backend"); 29 static int iser_cq_poll_limit = 512; 30 31 static void 32 iser_cq_event_callback(struct ib_event *cause, void *context) 33 { 34 ISER_ERR("got cq event %d", cause->event); 35 } 36 37 static void 38 iser_qp_event_callback(struct ib_event *cause, void *context) 39 { 40 ISER_ERR("got qp event %d", cause->event); 41 } 42 43 static void 44 iser_event_handler(struct ib_event_handler *handler, 45 struct ib_event *event) 46 { 47 ISER_ERR("async event %d on device %s port %d", 48 event->event, event->device->name, 49 event->element.port_num); 50 } 51 52 /** 53 * is_iser_tx_desc - Indicate if the completion wr_id 54 * is a TX descriptor or not. 55 * @iser_conn: iser connection 56 * @wr_id: completion WR identifier 57 * 58 * Since we cannot rely on wc opcode in FLUSH errors 59 * we must work around it by checking if the wr_id address 60 * falls in the iser connection rx_descs buffer. If so 61 * it is an RX descriptor, otherwize it is a TX. 62 */ 63 static inline bool 64 is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id) 65 { 66 void *start = iser_conn->rx_descs; 67 u64 len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs); 68 void *end = (void *)((uintptr_t)start + (uintptr_t)len); 69 70 if (start) { 71 if (wr_id >= start && wr_id < end) 72 return false; 73 } else { 74 return ((uintptr_t)wr_id != (uintptr_t)iser_conn->login_resp_buf); 75 } 76 77 return true; 78 } 79 80 /** 81 * iser_handle_comp_error() - Handle error completion 82 * @ib_conn: connection RDMA resources 83 * @wc: work completion 84 * 85 * Notes: Update post_recv_buf_count in case of recv error completion. 86 * For non-FLUSH error completion we should also notify iscsi layer that 87 * connection is failed (in case we passed bind stage). 88 */ 89 static void 90 iser_handle_comp_error(struct ib_conn *ib_conn, 91 struct ib_wc *wc) 92 { 93 void *wr_id = (void *)(uintptr_t)wc->wr_id; 94 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 95 ib_conn); 96 97 if (is_iser_tx_desc(iser_conn, wr_id)) { 98 ISER_DBG("conn %p got send comp error", iser_conn); 99 } else { 100 ISER_DBG("conn %p got recv comp error", iser_conn); 101 ib_conn->post_recv_buf_count--; 102 } 103 if (wc->status != IB_WC_WR_FLUSH_ERR) 104 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn); 105 } 106 107 /** 108 * iser_handle_wc - handle a single work completion 109 * @wc: work completion 110 * 111 * Soft-IRQ context, work completion can be either 112 * SEND or RECV, and can turn out successful or 113 * with error (or flush error). 114 */ 115 static void iser_handle_wc(struct ib_wc *wc) 116 { 117 struct ib_conn *ib_conn; 118 struct iser_tx_desc *tx_desc; 119 struct iser_rx_desc *rx_desc; 120 121 ib_conn = wc->qp->qp_context; 122 if (likely(wc->status == IB_WC_SUCCESS)) { 123 if (wc->opcode == IB_WC_RECV) { 124 rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; 125 iser_rcv_completion(rx_desc, wc->byte_len, 126 ib_conn); 127 } else 128 if (wc->opcode == IB_WC_SEND) { 129 tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; 130 iser_snd_completion(tx_desc, ib_conn); 131 } else { 132 ISER_ERR("Unknown wc opcode %d", wc->opcode); 133 } 134 } else { 135 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 136 ib_conn); 137 if (wc->status != IB_WC_WR_FLUSH_ERR) { 138 ISER_ERR("conn %p wr id %llx status %d vend_err %x", 139 iser_conn, (unsigned long long)wc->wr_id, 140 wc->status, wc->vendor_err); 141 } else { 142 ISER_DBG("flush error: conn %p wr id %llx", 143 iser_conn, (unsigned long long)wc->wr_id); 144 } 145 146 if (wc->wr_id == ISER_BEACON_WRID) { 147 /* all flush errors were consumed */ 148 mtx_lock(&ib_conn->beacon.flush_lock); 149 ISER_DBG("conn %p got ISER_BEACON_WRID", iser_conn); 150 cv_signal(&ib_conn->beacon.flush_cv); 151 mtx_unlock(&ib_conn->beacon.flush_lock); 152 } else { 153 iser_handle_comp_error(ib_conn, wc); 154 } 155 } 156 } 157 158 static void 159 iser_cq_tasklet_fn(void *data, int pending) 160 { 161 struct iser_comp *comp = (struct iser_comp *)data; 162 struct ib_cq *cq = comp->cq; 163 struct ib_wc *const wcs = comp->wcs; 164 int completed = 0; 165 int i; 166 int n; 167 168 while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { 169 for (i = 0; i < n; i++) 170 iser_handle_wc(&wcs[i]); 171 172 completed += n; 173 if (completed >= iser_cq_poll_limit) 174 break; 175 } 176 177 /* 178 * It is assumed here that arming CQ only once its empty 179 * would not cause interrupts to be missed. 180 */ 181 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 182 } 183 184 static void 185 iser_cq_callback(struct ib_cq *cq, void *cq_context) 186 { 187 struct iser_comp *comp = cq_context; 188 189 taskqueue_enqueue(comp->tq, &comp->task); 190 } 191 192 /** 193 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 194 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 195 * the adapator. 196 * 197 * returns 0 on success, -1 on failure 198 */ 199 static int 200 iser_create_device_ib_res(struct iser_device *device) 201 { 202 struct ib_device *ib_dev = device->ib_device; 203 int i, max_cqe; 204 205 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { 206 ISER_ERR("device %s doesn't support Fastreg, " 207 "can't register memory", device->ib_device->name); 208 return (1); 209 } 210 211 device->comps_used = min(mp_ncpus, device->ib_device->num_comp_vectors); 212 213 device->comps = malloc(device->comps_used * sizeof(*device->comps), 214 M_ISER_VERBS, M_WAITOK | M_ZERO); 215 216 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); 217 218 ISER_DBG("using %d CQs, device %s supports %d vectors max_cqe %d", 219 device->comps_used, device->ib_device->name, 220 device->ib_device->num_comp_vectors, max_cqe); 221 222 device->pd = ib_alloc_pd(device->ib_device, IB_PD_UNSAFE_GLOBAL_RKEY); 223 if (IS_ERR(device->pd)) 224 goto pd_err; 225 226 for (i = 0; i < device->comps_used; i++) { 227 struct iser_comp *comp = &device->comps[i]; 228 struct ib_cq_init_attr cq_attr = { 229 .cqe = max_cqe, 230 .comp_vector = i, 231 }; 232 233 comp->device = device; 234 comp->cq = ib_create_cq(device->ib_device, 235 iser_cq_callback, 236 iser_cq_event_callback, 237 (void *)comp, 238 &cq_attr); 239 if (IS_ERR(comp->cq)) { 240 comp->cq = NULL; 241 goto cq_err; 242 } 243 244 if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP)) 245 goto cq_err; 246 247 TASK_INIT(&comp->task, 0, iser_cq_tasklet_fn, comp); 248 comp->tq = taskqueue_create_fast("iser_taskq", M_NOWAIT, 249 taskqueue_thread_enqueue, &comp->tq); 250 if (!comp->tq) 251 goto tq_err; 252 taskqueue_start_threads(&comp->tq, 1, PI_NET, "iser taskq"); 253 } 254 255 device->mr = device->pd->__internal_mr; 256 if (IS_ERR(device->mr)) 257 goto tq_err; 258 259 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 260 iser_event_handler); 261 if (ib_register_event_handler(&device->event_handler)) 262 goto tq_err; 263 264 return (0); 265 266 tq_err: 267 for (i = 0; i < device->comps_used; i++) { 268 struct iser_comp *comp = &device->comps[i]; 269 if (comp->tq) 270 taskqueue_free(comp->tq); 271 } 272 cq_err: 273 for (i = 0; i < device->comps_used; i++) { 274 struct iser_comp *comp = &device->comps[i]; 275 if (comp->cq) 276 ib_destroy_cq(comp->cq); 277 } 278 ib_dealloc_pd(device->pd); 279 pd_err: 280 free(device->comps, M_ISER_VERBS); 281 ISER_ERR("failed to allocate an IB resource"); 282 return (1); 283 } 284 285 /** 286 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 287 * CQ and PD created with the device associated with the adapator. 288 */ 289 static void 290 iser_free_device_ib_res(struct iser_device *device) 291 { 292 int i; 293 294 for (i = 0; i < device->comps_used; i++) { 295 struct iser_comp *comp = &device->comps[i]; 296 297 taskqueue_free(comp->tq); 298 ib_destroy_cq(comp->cq); 299 comp->cq = NULL; 300 } 301 302 (void)ib_unregister_event_handler(&device->event_handler); 303 (void)ib_dealloc_pd(device->pd); 304 305 free(device->comps, M_ISER_VERBS); 306 device->comps = NULL; 307 308 device->mr = NULL; 309 device->pd = NULL; 310 } 311 312 static int 313 iser_alloc_reg_res(struct ib_device *ib_device, 314 struct ib_pd *pd, 315 struct iser_reg_resources *res) 316 { 317 int ret; 318 319 res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, ISCSI_ISER_SG_TABLESIZE + 1); 320 if (IS_ERR(res->mr)) { 321 ret = -PTR_ERR(res->mr); 322 ISER_ERR("Failed to allocate fast reg mr err=%d", ret); 323 return (ret); 324 } 325 res->mr_valid = 1; 326 327 return (0); 328 } 329 330 static void 331 iser_free_reg_res(struct iser_reg_resources *rsc) 332 { 333 ib_dereg_mr(rsc->mr); 334 } 335 336 static struct fast_reg_descriptor * 337 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd) 338 { 339 struct fast_reg_descriptor *desc; 340 int ret; 341 342 desc = malloc(sizeof(*desc), M_ISER_VERBS, M_WAITOK | M_ZERO); 343 ret = iser_alloc_reg_res(ib_device, pd, &desc->rsc); 344 if (ret) { 345 ISER_ERR("failed to allocate reg_resources"); 346 goto err; 347 } 348 349 return (desc); 350 err: 351 free(desc, M_ISER_VERBS); 352 return (NULL); 353 } 354 355 /** 356 * iser_create_fmr_pool - Creates FMR pool and page_vector 357 * 358 * returns 0 on success, or errno code on failure 359 */ 360 int 361 iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max) 362 { 363 struct iser_device *device = ib_conn->device; 364 struct fast_reg_descriptor *desc; 365 int i; 366 367 INIT_LIST_HEAD(&ib_conn->fastreg.pool); 368 ib_conn->fastreg.pool_size = 0; 369 for (i = 0; i < cmds_max; i++) { 370 desc = iser_create_fastreg_desc(device->ib_device, device->pd); 371 if (!desc) { 372 ISER_ERR("Failed to create fastreg descriptor"); 373 goto err; 374 } 375 376 list_add_tail(&desc->list, &ib_conn->fastreg.pool); 377 ib_conn->fastreg.pool_size++; 378 } 379 380 return (0); 381 382 err: 383 iser_free_fastreg_pool(ib_conn); 384 return (ENOMEM); 385 } 386 387 /** 388 * iser_free_fmr_pool - releases the FMR pool and page vec 389 */ 390 void 391 iser_free_fastreg_pool(struct ib_conn *ib_conn) 392 { 393 struct fast_reg_descriptor *desc, *tmp; 394 int i = 0; 395 396 if (list_empty(&ib_conn->fastreg.pool)) 397 return; 398 399 ISER_DBG("freeing conn %p fr pool", ib_conn); 400 401 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { 402 list_del(&desc->list); 403 iser_free_reg_res(&desc->rsc); 404 free(desc, M_ISER_VERBS); 405 ++i; 406 } 407 408 if (i < ib_conn->fastreg.pool_size) 409 ISER_WARN("pool still has %d regions registered", 410 ib_conn->fastreg.pool_size - i); 411 } 412 413 /** 414 * iser_create_ib_conn_res - Queue-Pair (QP) 415 * 416 * returns 0 on success, 1 on failure 417 */ 418 static int 419 iser_create_ib_conn_res(struct ib_conn *ib_conn) 420 { 421 struct iser_conn *iser_conn; 422 struct iser_device *device; 423 struct ib_device_attr *dev_attr; 424 struct ib_qp_init_attr init_attr; 425 int index, min_index = 0; 426 int ret = -ENOMEM; 427 428 iser_conn = container_of(ib_conn, struct iser_conn, ib_conn); 429 device = ib_conn->device; 430 dev_attr = &device->dev_attr; 431 432 mtx_lock(&ig.connlist_mutex); 433 /* select the CQ with the minimal number of usages */ 434 for (index = 0; index < device->comps_used; index++) { 435 if (device->comps[index].active_qps < 436 device->comps[min_index].active_qps) 437 min_index = index; 438 } 439 ib_conn->comp = &device->comps[min_index]; 440 ib_conn->comp->active_qps++; 441 mtx_unlock(&ig.connlist_mutex); 442 ISER_INFO("cq index %d used for ib_conn %p", min_index, ib_conn); 443 444 memset(&init_attr, 0, sizeof init_attr); 445 init_attr.event_handler = iser_qp_event_callback; 446 init_attr.qp_context = (void *)ib_conn; 447 init_attr.send_cq = ib_conn->comp->cq; 448 init_attr.recv_cq = ib_conn->comp->cq; 449 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 450 init_attr.cap.max_send_sge = 2; 451 init_attr.cap.max_recv_sge = 1; 452 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 453 init_attr.qp_type = IB_QPT_RC; 454 455 if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) { 456 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 457 iser_conn->max_cmds = 458 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); 459 } else { 460 init_attr.cap.max_send_wr = dev_attr->max_qp_wr; 461 iser_conn->max_cmds = 462 ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr); 463 } 464 ISER_DBG("device %s supports max_send_wr %d", 465 device->ib_device->name, dev_attr->max_qp_wr); 466 467 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 468 if (ret) 469 goto out_err; 470 471 ib_conn->qp = ib_conn->cma_id->qp; 472 ISER_DBG("setting conn %p cma_id %p qp %p", 473 ib_conn, ib_conn->cma_id, 474 ib_conn->cma_id->qp); 475 476 return (ret); 477 478 out_err: 479 mtx_lock(&ig.connlist_mutex); 480 ib_conn->comp->active_qps--; 481 mtx_unlock(&ig.connlist_mutex); 482 ISER_ERR("unable to alloc mem or create resource, err %d", ret); 483 484 return (ret); 485 } 486 487 /** 488 * based on the resolved device node GUID see if there already allocated 489 * device for this device. If there's no such, create one. 490 */ 491 static struct iser_device * 492 iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 493 { 494 struct iser_device *device; 495 496 sx_xlock(&ig.device_list_mutex); 497 498 list_for_each_entry(device, &ig.device_list, ig_list) 499 /* find if there's a match using the node GUID */ 500 if (device->ib_device->node_guid == cma_id->device->node_guid) 501 goto inc_refcnt; 502 503 device = malloc(sizeof *device, M_ISER_VERBS, M_WAITOK | M_ZERO); 504 /* assign this device to the device */ 505 device->ib_device = cma_id->device; 506 /* init the device and link it into ig device list */ 507 if (iser_create_device_ib_res(device)) { 508 free(device, M_ISER_VERBS); 509 device = NULL; 510 goto out; 511 } 512 list_add(&device->ig_list, &ig.device_list); 513 514 inc_refcnt: 515 device->refcount++; 516 ISER_INFO("device %p refcount %d", device, device->refcount); 517 out: 518 sx_xunlock(&ig.device_list_mutex); 519 return (device); 520 } 521 522 /* if there's no demand for this device, release it */ 523 static void 524 iser_device_try_release(struct iser_device *device) 525 { 526 sx_xlock(&ig.device_list_mutex); 527 device->refcount--; 528 ISER_INFO("device %p refcount %d", device, device->refcount); 529 if (!device->refcount) { 530 iser_free_device_ib_res(device); 531 list_del(&device->ig_list); 532 free(device, M_ISER_VERBS); 533 device = NULL; 534 } 535 sx_xunlock(&ig.device_list_mutex); 536 } 537 538 /** 539 * Called with state mutex held 540 **/ 541 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, 542 enum iser_conn_state comp, 543 enum iser_conn_state exch) 544 { 545 int ret; 546 547 ret = (iser_conn->state == comp); 548 if (ret) 549 iser_conn->state = exch; 550 551 return ret; 552 } 553 554 /** 555 * iser_free_ib_conn_res - release IB related resources 556 * @iser_conn: iser connection struct 557 * @destroy: indicator if we need to try to release the 558 * iser device and memory regoins pool (only iscsi 559 * shutdown and DEVICE_REMOVAL will use this). 560 * 561 * This routine is called with the iser state mutex held 562 * so the cm_id removal is out of here. It is Safe to 563 * be invoked multiple times. 564 */ 565 void 566 iser_free_ib_conn_res(struct iser_conn *iser_conn, 567 bool destroy) 568 { 569 struct ib_conn *ib_conn = &iser_conn->ib_conn; 570 struct iser_device *device = ib_conn->device; 571 572 ISER_INFO("freeing conn %p cma_id %p qp %p", 573 iser_conn, ib_conn->cma_id, ib_conn->qp); 574 575 if (ib_conn->qp != NULL) { 576 mtx_lock(&ig.connlist_mutex); 577 ib_conn->comp->active_qps--; 578 mtx_unlock(&ig.connlist_mutex); 579 rdma_destroy_qp(ib_conn->cma_id); 580 ib_conn->qp = NULL; 581 } 582 583 if (destroy) { 584 if (iser_conn->login_buf) 585 iser_free_login_buf(iser_conn); 586 587 if (iser_conn->rx_descs) 588 iser_free_rx_descriptors(iser_conn); 589 590 if (device != NULL) { 591 iser_device_try_release(device); 592 ib_conn->device = NULL; 593 } 594 } 595 } 596 597 /** 598 * triggers start of the disconnect procedures and wait for them to be done 599 * Called with state mutex held 600 */ 601 int 602 iser_conn_terminate(struct iser_conn *iser_conn) 603 { 604 struct ib_conn *ib_conn = &iser_conn->ib_conn; 605 const struct ib_send_wr *bad_send_wr; 606 const struct ib_recv_wr *bad_recv_wr; 607 int err = 0; 608 609 /* terminate the iser conn only if the conn state is UP */ 610 if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, 611 ISER_CONN_TERMINATING)) 612 return (0); 613 614 ISER_INFO("iser_conn %p state %d\n", iser_conn, iser_conn->state); 615 616 if (ib_conn->qp == NULL) { 617 /* HOW can this be??? */ 618 ISER_WARN("qp wasn't created"); 619 return (1); 620 } 621 622 /* 623 * Todo: This is a temporary workaround. 624 * We serialize the connection closure using global lock in order to 625 * receive all posted beacons completions. 626 * Without Serialization, in case we open many connections (QPs) on 627 * the same CQ, we might miss beacons because of missing interrupts. 628 */ 629 sx_xlock(&ig.close_conns_mutex); 630 631 /* 632 * In case we didn't already clean up the cma_id (peer initiated 633 * a disconnection), we need to Cause the CMA to change the QP 634 * state to ERROR. 635 */ 636 if (ib_conn->cma_id) { 637 err = rdma_disconnect(ib_conn->cma_id); 638 if (err) 639 ISER_ERR("Failed to disconnect, conn: 0x%p err %d", 640 iser_conn, err); 641 642 mtx_lock(&ib_conn->beacon.flush_lock); 643 memset(&ib_conn->beacon.send, 0, sizeof(struct ib_send_wr)); 644 ib_conn->beacon.send.wr_id = ISER_BEACON_WRID; 645 ib_conn->beacon.send.opcode = IB_WR_SEND; 646 /* post an indication that all send flush errors were consumed */ 647 err = ib_post_send(ib_conn->qp, &ib_conn->beacon.send, &bad_send_wr); 648 if (err) { 649 ISER_ERR("conn %p failed to post send_beacon", ib_conn); 650 mtx_unlock(&ib_conn->beacon.flush_lock); 651 goto out; 652 } 653 654 ISER_DBG("before send cv_wait: %p", iser_conn); 655 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock); 656 ISER_DBG("after send cv_wait: %p", iser_conn); 657 658 memset(&ib_conn->beacon.recv, 0, sizeof(struct ib_recv_wr)); 659 ib_conn->beacon.recv.wr_id = ISER_BEACON_WRID; 660 /* post an indication that all recv flush errors were consumed */ 661 err = ib_post_recv(ib_conn->qp, &ib_conn->beacon.recv, &bad_recv_wr); 662 if (err) { 663 ISER_ERR("conn %p failed to post recv_beacon", ib_conn); 664 mtx_unlock(&ib_conn->beacon.flush_lock); 665 goto out; 666 } 667 668 ISER_DBG("before recv cv_wait: %p", iser_conn); 669 cv_wait(&ib_conn->beacon.flush_cv, &ib_conn->beacon.flush_lock); 670 mtx_unlock(&ib_conn->beacon.flush_lock); 671 ISER_DBG("after recv cv_wait: %p", iser_conn); 672 } 673 out: 674 sx_xunlock(&ig.close_conns_mutex); 675 return (1); 676 } 677 678 /** 679 * Called with state mutex held 680 **/ 681 static void 682 iser_connect_error(struct rdma_cm_id *cma_id) 683 { 684 struct iser_conn *iser_conn; 685 686 iser_conn = cma_id->context; 687 688 ISER_ERR("conn %p", iser_conn); 689 690 iser_conn->state = ISER_CONN_TERMINATING; 691 692 cv_signal(&iser_conn->up_cv); 693 } 694 695 /** 696 * Called with state mutex held 697 **/ 698 static void 699 iser_addr_handler(struct rdma_cm_id *cma_id) 700 { 701 struct iser_device *device; 702 struct iser_conn *iser_conn; 703 struct ib_conn *ib_conn; 704 int ret; 705 706 iser_conn = cma_id->context; 707 708 ib_conn = &iser_conn->ib_conn; 709 device = iser_device_find_by_ib_device(cma_id); 710 if (!device) { 711 ISER_ERR("conn %p device lookup/creation failed", 712 iser_conn); 713 iser_connect_error(cma_id); 714 return; 715 } 716 717 ib_conn->device = device; 718 719 ret = rdma_resolve_route(cma_id, 1000); 720 if (ret) { 721 ISER_ERR("conn %p resolve route failed: %d", iser_conn, ret); 722 iser_connect_error(cma_id); 723 return; 724 } 725 } 726 727 /** 728 * Called with state mutex held 729 **/ 730 static void 731 iser_route_handler(struct rdma_cm_id *cma_id) 732 { 733 struct rdma_conn_param conn_param; 734 int ret; 735 struct iser_cm_hdr req_hdr; 736 struct iser_conn *iser_conn = cma_id->context; 737 struct ib_conn *ib_conn = &iser_conn->ib_conn; 738 struct iser_device *device = ib_conn->device; 739 740 ret = iser_create_ib_conn_res(ib_conn); 741 if (ret) 742 goto failure; 743 744 memset(&conn_param, 0, sizeof conn_param); 745 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; 746 conn_param.retry_count = 7; 747 conn_param.rnr_retry_count = 6; 748 /* 749 * Initiaotr depth should not be set, but in order to compat 750 * with old targets, we keep this value set. 751 */ 752 conn_param.initiator_depth = 1; 753 754 memset(&req_hdr, 0, sizeof(req_hdr)); 755 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 756 ISER_SEND_W_INV_NOT_SUPPORTED); 757 conn_param.private_data = (void *)&req_hdr; 758 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 759 760 ret = rdma_connect(cma_id, &conn_param); 761 if (ret) { 762 ISER_ERR("conn %p failure connecting: %d", iser_conn, ret); 763 goto failure; 764 } 765 766 return; 767 failure: 768 iser_connect_error(cma_id); 769 } 770 771 /** 772 * Called with state mutex held 773 **/ 774 static void 775 iser_connected_handler(struct rdma_cm_id *cma_id) 776 { 777 struct iser_conn *iser_conn; 778 struct ib_qp_attr attr; 779 struct ib_qp_init_attr init_attr; 780 781 iser_conn = cma_id->context; 782 783 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 784 785 ISER_INFO("remote qpn:%x my qpn:%x", 786 attr.dest_qp_num, cma_id->qp->qp_num); 787 788 iser_conn->state = ISER_CONN_UP; 789 790 cv_signal(&iser_conn->up_cv); 791 } 792 793 /** 794 * Called with state mutex held 795 **/ 796 static void 797 iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy) 798 { 799 struct iser_conn *iser_conn = cma_id->context; 800 801 if (iser_conn_terminate(iser_conn)) 802 iser_conn->icl_conn.ic_error(&iser_conn->icl_conn); 803 804 } 805 806 int 807 iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 808 { 809 struct iser_conn *iser_conn; 810 int ret = 0; 811 812 iser_conn = cma_id->context; 813 ISER_INFO("event %d status %d conn %p id %p", 814 event->event, event->status, cma_id->context, cma_id); 815 816 sx_xlock(&iser_conn->state_mutex); 817 switch (event->event) { 818 case RDMA_CM_EVENT_ADDR_RESOLVED: 819 iser_addr_handler(cma_id); 820 break; 821 case RDMA_CM_EVENT_ROUTE_RESOLVED: 822 iser_route_handler(cma_id); 823 break; 824 case RDMA_CM_EVENT_ESTABLISHED: 825 iser_connected_handler(cma_id); 826 break; 827 case RDMA_CM_EVENT_ADDR_ERROR: 828 case RDMA_CM_EVENT_ROUTE_ERROR: 829 case RDMA_CM_EVENT_CONNECT_ERROR: 830 case RDMA_CM_EVENT_UNREACHABLE: 831 case RDMA_CM_EVENT_REJECTED: 832 iser_connect_error(cma_id); 833 break; 834 case RDMA_CM_EVENT_DISCONNECTED: 835 case RDMA_CM_EVENT_ADDR_CHANGE: 836 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 837 iser_cleanup_handler(cma_id, false); 838 break; 839 default: 840 ISER_ERR("Unexpected RDMA CM event (%d)", event->event); 841 break; 842 } 843 sx_xunlock(&iser_conn->state_mutex); 844 845 return (ret); 846 } 847 848 int 849 iser_post_recvl(struct iser_conn *iser_conn) 850 { 851 const struct ib_recv_wr *rx_wr_failed; 852 struct ib_recv_wr rx_wr; 853 struct ib_conn *ib_conn = &iser_conn->ib_conn; 854 struct ib_sge sge; 855 int ib_ret; 856 857 sge.addr = iser_conn->login_resp_dma; 858 sge.length = ISER_RX_LOGIN_SIZE; 859 sge.lkey = ib_conn->device->mr->lkey; 860 861 rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; 862 rx_wr.sg_list = &sge; 863 rx_wr.num_sge = 1; 864 rx_wr.next = NULL; 865 866 ib_conn->post_recv_buf_count++; 867 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 868 if (ib_ret) { 869 ISER_ERR("ib_post_recv failed ret=%d", ib_ret); 870 ib_conn->post_recv_buf_count--; 871 } 872 873 return (ib_ret); 874 } 875 876 int 877 iser_post_recvm(struct iser_conn *iser_conn, int count) 878 { 879 const struct ib_recv_wr *rx_wr_failed; 880 struct ib_recv_wr *rx_wr; 881 int i, ib_ret; 882 struct ib_conn *ib_conn = &iser_conn->ib_conn; 883 unsigned int my_rx_head = iser_conn->rx_desc_head; 884 struct iser_rx_desc *rx_desc; 885 886 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 887 rx_desc = &iser_conn->rx_descs[my_rx_head]; 888 rx_wr->wr_id = (uintptr_t)rx_desc; 889 rx_wr->sg_list = &rx_desc->rx_sg; 890 rx_wr->num_sge = 1; 891 rx_wr->next = rx_wr + 1; 892 my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos; 893 } 894 895 rx_wr--; 896 rx_wr->next = NULL; /* mark end of work requests list */ 897 898 ib_conn->post_recv_buf_count += count; 899 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 900 if (ib_ret) { 901 ISER_ERR("ib_post_recv failed ret=%d", ib_ret); 902 ib_conn->post_recv_buf_count -= count; 903 } else 904 iser_conn->rx_desc_head = my_rx_head; 905 906 return (ib_ret); 907 } 908 909 /** 910 * iser_start_send - Initiate a Send DTO operation 911 * 912 * returns 0 on success, -1 on failure 913 */ 914 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, 915 bool signal) 916 { 917 int ib_ret; 918 const struct ib_send_wr *send_wr_failed; 919 struct ib_send_wr send_wr; 920 921 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 922 tx_desc->dma_addr, ISER_HEADERS_LEN, 923 DMA_TO_DEVICE); 924 925 send_wr.next = NULL; 926 send_wr.wr_id = (uintptr_t)tx_desc; 927 send_wr.sg_list = tx_desc->tx_sg; 928 send_wr.num_sge = tx_desc->num_sge; 929 send_wr.opcode = IB_WR_SEND; 930 send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0; 931 932 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 933 if (ib_ret) 934 ISER_ERR("ib_post_send failed, ret:%d", ib_ret); 935 936 return (ib_ret); 937 } 938