1 /* 2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 #include <linux/kernel.h> 34 #include <linux/module.h> 35 #include <linux/slab.h> 36 #include <linux/delay.h> 37 38 #include "iscsi_iser.h" 39 40 #define ISCSI_ISER_MAX_CONN 8 41 #define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) 42 #define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) 43 44 static void iser_cq_tasklet_fn(unsigned long data); 45 static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 46 47 static void iser_cq_event_callback(struct ib_event *cause, void *context) 48 { 49 iser_err("got cq event %d \n", cause->event); 50 } 51 52 static void iser_qp_event_callback(struct ib_event *cause, void *context) 53 { 54 iser_err("got qp event %d\n",cause->event); 55 } 56 57 static void iser_event_handler(struct ib_event_handler *handler, 58 struct ib_event *event) 59 { 60 iser_err("async event %d on device %s port %d\n", event->event, 61 event->device->name, event->element.port_num); 62 } 63 64 /** 65 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 66 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 67 * the adapator. 68 * 69 * returns 0 on success, -1 on failure 70 */ 71 static int iser_create_device_ib_res(struct iser_device *device) 72 { 73 device->pd = ib_alloc_pd(device->ib_device); 74 if (IS_ERR(device->pd)) 75 goto pd_err; 76 77 device->rx_cq = ib_create_cq(device->ib_device, 78 iser_cq_callback, 79 iser_cq_event_callback, 80 (void *)device, 81 ISER_MAX_RX_CQ_LEN, 0); 82 if (IS_ERR(device->rx_cq)) 83 goto rx_cq_err; 84 85 device->tx_cq = ib_create_cq(device->ib_device, 86 NULL, iser_cq_event_callback, 87 (void *)device, 88 ISER_MAX_TX_CQ_LEN, 0); 89 90 if (IS_ERR(device->tx_cq)) 91 goto tx_cq_err; 92 93 if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) 94 goto cq_arm_err; 95 96 tasklet_init(&device->cq_tasklet, 97 iser_cq_tasklet_fn, 98 (unsigned long)device); 99 100 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 101 IB_ACCESS_REMOTE_WRITE | 102 IB_ACCESS_REMOTE_READ); 103 if (IS_ERR(device->mr)) 104 goto dma_mr_err; 105 106 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 107 iser_event_handler); 108 if (ib_register_event_handler(&device->event_handler)) 109 goto handler_err; 110 111 return 0; 112 113 handler_err: 114 ib_dereg_mr(device->mr); 115 dma_mr_err: 116 tasklet_kill(&device->cq_tasklet); 117 cq_arm_err: 118 ib_destroy_cq(device->tx_cq); 119 tx_cq_err: 120 ib_destroy_cq(device->rx_cq); 121 rx_cq_err: 122 ib_dealloc_pd(device->pd); 123 pd_err: 124 iser_err("failed to allocate an IB resource\n"); 125 return -1; 126 } 127 128 /** 129 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 130 * CQ and PD created with the device associated with the adapator. 131 */ 132 static void iser_free_device_ib_res(struct iser_device *device) 133 { 134 BUG_ON(device->mr == NULL); 135 136 tasklet_kill(&device->cq_tasklet); 137 (void)ib_unregister_event_handler(&device->event_handler); 138 (void)ib_dereg_mr(device->mr); 139 (void)ib_destroy_cq(device->tx_cq); 140 (void)ib_destroy_cq(device->rx_cq); 141 (void)ib_dealloc_pd(device->pd); 142 143 device->mr = NULL; 144 device->tx_cq = NULL; 145 device->rx_cq = NULL; 146 device->pd = NULL; 147 } 148 149 /** 150 * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP) 151 * 152 * returns 0 on success, -1 on failure 153 */ 154 static int iser_create_ib_conn_res(struct iser_conn *ib_conn) 155 { 156 struct iser_device *device; 157 struct ib_qp_init_attr init_attr; 158 int req_err, resp_err, ret = -ENOMEM; 159 struct ib_fmr_pool_param params; 160 161 BUG_ON(ib_conn->device == NULL); 162 163 device = ib_conn->device; 164 165 ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + 166 ISER_RX_LOGIN_SIZE, GFP_KERNEL); 167 if (!ib_conn->login_buf) 168 goto out_err; 169 170 ib_conn->login_req_buf = ib_conn->login_buf; 171 ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; 172 173 ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, 174 (void *)ib_conn->login_req_buf, 175 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); 176 177 ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, 178 (void *)ib_conn->login_resp_buf, 179 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); 180 181 req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma); 182 resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma); 183 184 if (req_err || resp_err) { 185 if (req_err) 186 ib_conn->login_req_dma = 0; 187 if (resp_err) 188 ib_conn->login_resp_dma = 0; 189 goto out_err; 190 } 191 192 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 193 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 194 GFP_KERNEL); 195 if (!ib_conn->page_vec) 196 goto out_err; 197 198 ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); 199 200 params.page_shift = SHIFT_4K; 201 /* when the first/last SG element are not start/end * 202 * page aligned, the map whould be of N+1 pages */ 203 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; 204 /* make the pool size twice the max number of SCSI commands * 205 * the ML is expected to queue, watermark for unmap at 50% */ 206 params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; 207 params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; 208 params.cache = 0; 209 params.flush_function = NULL; 210 params.access = (IB_ACCESS_LOCAL_WRITE | 211 IB_ACCESS_REMOTE_WRITE | 212 IB_ACCESS_REMOTE_READ); 213 214 ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); 215 if (IS_ERR(ib_conn->fmr_pool)) { 216 ret = PTR_ERR(ib_conn->fmr_pool); 217 ib_conn->fmr_pool = NULL; 218 goto out_err; 219 } 220 221 memset(&init_attr, 0, sizeof init_attr); 222 223 init_attr.event_handler = iser_qp_event_callback; 224 init_attr.qp_context = (void *)ib_conn; 225 init_attr.send_cq = device->tx_cq; 226 init_attr.recv_cq = device->rx_cq; 227 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 228 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 229 init_attr.cap.max_send_sge = 2; 230 init_attr.cap.max_recv_sge = 1; 231 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 232 init_attr.qp_type = IB_QPT_RC; 233 234 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 235 if (ret) 236 goto out_err; 237 238 ib_conn->qp = ib_conn->cma_id->qp; 239 iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", 240 ib_conn, ib_conn->cma_id, 241 ib_conn->fmr_pool, ib_conn->cma_id->qp); 242 return ret; 243 244 out_err: 245 iser_err("unable to alloc mem or create resource, err %d\n", ret); 246 return ret; 247 } 248 249 /** 250 * releases the FMR pool, QP and CMA ID objects, returns 0 on success, 251 * -1 on failure 252 */ 253 static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) 254 { 255 BUG_ON(ib_conn == NULL); 256 257 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", 258 ib_conn, ib_conn->cma_id, 259 ib_conn->fmr_pool, ib_conn->qp); 260 261 /* qp is created only once both addr & route are resolved */ 262 if (ib_conn->fmr_pool != NULL) 263 ib_destroy_fmr_pool(ib_conn->fmr_pool); 264 265 if (ib_conn->qp != NULL) 266 rdma_destroy_qp(ib_conn->cma_id); 267 268 /* if cma handler context, the caller acts s.t the cma destroy the id */ 269 if (ib_conn->cma_id != NULL && can_destroy_id) 270 rdma_destroy_id(ib_conn->cma_id); 271 272 ib_conn->fmr_pool = NULL; 273 ib_conn->qp = NULL; 274 ib_conn->cma_id = NULL; 275 kfree(ib_conn->page_vec); 276 277 if (ib_conn->login_buf) { 278 if (ib_conn->login_req_dma) 279 ib_dma_unmap_single(ib_conn->device->ib_device, 280 ib_conn->login_req_dma, 281 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); 282 if (ib_conn->login_resp_dma) 283 ib_dma_unmap_single(ib_conn->device->ib_device, 284 ib_conn->login_resp_dma, 285 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); 286 kfree(ib_conn->login_buf); 287 } 288 289 return 0; 290 } 291 292 /** 293 * based on the resolved device node GUID see if there already allocated 294 * device for this device. If there's no such, create one. 295 */ 296 static 297 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 298 { 299 struct iser_device *device; 300 301 mutex_lock(&ig.device_list_mutex); 302 303 list_for_each_entry(device, &ig.device_list, ig_list) 304 /* find if there's a match using the node GUID */ 305 if (device->ib_device->node_guid == cma_id->device->node_guid) 306 goto inc_refcnt; 307 308 device = kzalloc(sizeof *device, GFP_KERNEL); 309 if (device == NULL) 310 goto out; 311 312 /* assign this device to the device */ 313 device->ib_device = cma_id->device; 314 /* init the device and link it into ig device list */ 315 if (iser_create_device_ib_res(device)) { 316 kfree(device); 317 device = NULL; 318 goto out; 319 } 320 list_add(&device->ig_list, &ig.device_list); 321 322 inc_refcnt: 323 device->refcount++; 324 out: 325 mutex_unlock(&ig.device_list_mutex); 326 return device; 327 } 328 329 /* if there's no demand for this device, release it */ 330 static void iser_device_try_release(struct iser_device *device) 331 { 332 mutex_lock(&ig.device_list_mutex); 333 device->refcount--; 334 iser_err("device %p refcount %d\n",device,device->refcount); 335 if (!device->refcount) { 336 iser_free_device_ib_res(device); 337 list_del(&device->ig_list); 338 kfree(device); 339 } 340 mutex_unlock(&ig.device_list_mutex); 341 } 342 343 static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 344 enum iser_ib_conn_state comp, 345 enum iser_ib_conn_state exch) 346 { 347 int ret; 348 349 spin_lock_bh(&ib_conn->lock); 350 if ((ret = (ib_conn->state == comp))) 351 ib_conn->state = exch; 352 spin_unlock_bh(&ib_conn->lock); 353 return ret; 354 } 355 356 /** 357 * Frees all conn objects and deallocs conn descriptor 358 */ 359 static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) 360 { 361 struct iser_device *device = ib_conn->device; 362 363 BUG_ON(ib_conn->state != ISER_CONN_DOWN); 364 365 mutex_lock(&ig.connlist_mutex); 366 list_del(&ib_conn->conn_list); 367 mutex_unlock(&ig.connlist_mutex); 368 iser_free_rx_descriptors(ib_conn); 369 iser_free_ib_conn_res(ib_conn, can_destroy_id); 370 ib_conn->device = NULL; 371 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 372 if (device != NULL) 373 iser_device_try_release(device); 374 iscsi_destroy_endpoint(ib_conn->ep); 375 } 376 377 void iser_conn_get(struct iser_conn *ib_conn) 378 { 379 atomic_inc(&ib_conn->refcount); 380 } 381 382 int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) 383 { 384 if (atomic_dec_and_test(&ib_conn->refcount)) { 385 iser_conn_release(ib_conn, can_destroy_id); 386 return 1; 387 } 388 return 0; 389 } 390 391 /** 392 * triggers start of the disconnect procedures and wait for them to be done 393 */ 394 void iser_conn_terminate(struct iser_conn *ib_conn) 395 { 396 int err = 0; 397 398 /* change the ib conn state only if the conn is UP, however always call 399 * rdma_disconnect since this is the only way to cause the CMA to change 400 * the QP state to ERROR 401 */ 402 403 iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); 404 err = rdma_disconnect(ib_conn->cma_id); 405 if (err) 406 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 407 ib_conn,err); 408 409 wait_event_interruptible(ib_conn->wait, 410 ib_conn->state == ISER_CONN_DOWN); 411 412 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 413 } 414 415 static int iser_connect_error(struct rdma_cm_id *cma_id) 416 { 417 struct iser_conn *ib_conn; 418 ib_conn = (struct iser_conn *)cma_id->context; 419 420 ib_conn->state = ISER_CONN_DOWN; 421 wake_up_interruptible(&ib_conn->wait); 422 return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 423 } 424 425 static int iser_addr_handler(struct rdma_cm_id *cma_id) 426 { 427 struct iser_device *device; 428 struct iser_conn *ib_conn; 429 int ret; 430 431 device = iser_device_find_by_ib_device(cma_id); 432 if (!device) { 433 iser_err("device lookup/creation failed\n"); 434 return iser_connect_error(cma_id); 435 } 436 437 ib_conn = (struct iser_conn *)cma_id->context; 438 ib_conn->device = device; 439 440 ret = rdma_resolve_route(cma_id, 1000); 441 if (ret) { 442 iser_err("resolve route failed: %d\n", ret); 443 return iser_connect_error(cma_id); 444 } 445 446 return 0; 447 } 448 449 static int iser_route_handler(struct rdma_cm_id *cma_id) 450 { 451 struct rdma_conn_param conn_param; 452 int ret; 453 454 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); 455 if (ret) 456 goto failure; 457 458 memset(&conn_param, 0, sizeof conn_param); 459 conn_param.responder_resources = 4; 460 conn_param.initiator_depth = 1; 461 conn_param.retry_count = 7; 462 conn_param.rnr_retry_count = 6; 463 464 ret = rdma_connect(cma_id, &conn_param); 465 if (ret) { 466 iser_err("failure connecting: %d\n", ret); 467 goto failure; 468 } 469 470 return 0; 471 failure: 472 return iser_connect_error(cma_id); 473 } 474 475 static void iser_connected_handler(struct rdma_cm_id *cma_id) 476 { 477 struct iser_conn *ib_conn; 478 479 ib_conn = (struct iser_conn *)cma_id->context; 480 ib_conn->state = ISER_CONN_UP; 481 wake_up_interruptible(&ib_conn->wait); 482 } 483 484 static int iser_disconnected_handler(struct rdma_cm_id *cma_id) 485 { 486 struct iser_conn *ib_conn; 487 int ret; 488 489 ib_conn = (struct iser_conn *)cma_id->context; 490 491 /* getting here when the state is UP means that the conn is being * 492 * terminated asynchronously from the iSCSI layer's perspective. */ 493 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 494 ISER_CONN_TERMINATING)) 495 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 496 ISCSI_ERR_CONN_FAILED); 497 498 /* Complete the termination process if no posts are pending */ 499 if (ib_conn->post_recv_buf_count == 0 && 500 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 501 ib_conn->state = ISER_CONN_DOWN; 502 wake_up_interruptible(&ib_conn->wait); 503 } 504 505 ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 506 return ret; 507 } 508 509 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 510 { 511 int ret = 0; 512 513 iser_err("event %d status %d conn %p id %p\n", 514 event->event, event->status, cma_id->context, cma_id); 515 516 switch (event->event) { 517 case RDMA_CM_EVENT_ADDR_RESOLVED: 518 ret = iser_addr_handler(cma_id); 519 break; 520 case RDMA_CM_EVENT_ROUTE_RESOLVED: 521 ret = iser_route_handler(cma_id); 522 break; 523 case RDMA_CM_EVENT_ESTABLISHED: 524 iser_connected_handler(cma_id); 525 break; 526 case RDMA_CM_EVENT_ADDR_ERROR: 527 case RDMA_CM_EVENT_ROUTE_ERROR: 528 case RDMA_CM_EVENT_CONNECT_ERROR: 529 case RDMA_CM_EVENT_UNREACHABLE: 530 case RDMA_CM_EVENT_REJECTED: 531 ret = iser_connect_error(cma_id); 532 break; 533 case RDMA_CM_EVENT_DISCONNECTED: 534 case RDMA_CM_EVENT_DEVICE_REMOVAL: 535 case RDMA_CM_EVENT_ADDR_CHANGE: 536 ret = iser_disconnected_handler(cma_id); 537 break; 538 default: 539 iser_err("Unexpected RDMA CM event (%d)\n", event->event); 540 break; 541 } 542 return ret; 543 } 544 545 void iser_conn_init(struct iser_conn *ib_conn) 546 { 547 ib_conn->state = ISER_CONN_INIT; 548 init_waitqueue_head(&ib_conn->wait); 549 ib_conn->post_recv_buf_count = 0; 550 atomic_set(&ib_conn->post_send_buf_count, 0); 551 atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ 552 INIT_LIST_HEAD(&ib_conn->conn_list); 553 spin_lock_init(&ib_conn->lock); 554 } 555 556 /** 557 * starts the process of connecting to the target 558 * sleeps until the connection is established or rejected 559 */ 560 int iser_connect(struct iser_conn *ib_conn, 561 struct sockaddr_in *src_addr, 562 struct sockaddr_in *dst_addr, 563 int non_blocking) 564 { 565 struct sockaddr *src, *dst; 566 int err = 0; 567 568 sprintf(ib_conn->name, "%pI4:%d", 569 &dst_addr->sin_addr.s_addr, dst_addr->sin_port); 570 571 /* the device is known only --after-- address resolution */ 572 ib_conn->device = NULL; 573 574 iser_err("connecting to: %pI4, port 0x%x\n", 575 &dst_addr->sin_addr, dst_addr->sin_port); 576 577 ib_conn->state = ISER_CONN_PENDING; 578 579 iser_conn_get(ib_conn); /* ref ib conn's cma id */ 580 ib_conn->cma_id = rdma_create_id(iser_cma_handler, 581 (void *)ib_conn, 582 RDMA_PS_TCP, IB_QPT_RC); 583 if (IS_ERR(ib_conn->cma_id)) { 584 err = PTR_ERR(ib_conn->cma_id); 585 iser_err("rdma_create_id failed: %d\n", err); 586 goto id_failure; 587 } 588 589 src = (struct sockaddr *)src_addr; 590 dst = (struct sockaddr *)dst_addr; 591 err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); 592 if (err) { 593 iser_err("rdma_resolve_addr failed: %d\n", err); 594 goto addr_failure; 595 } 596 597 if (!non_blocking) { 598 wait_event_interruptible(ib_conn->wait, 599 (ib_conn->state != ISER_CONN_PENDING)); 600 601 if (ib_conn->state != ISER_CONN_UP) { 602 err = -EIO; 603 goto connect_failure; 604 } 605 } 606 607 mutex_lock(&ig.connlist_mutex); 608 list_add(&ib_conn->conn_list, &ig.connlist); 609 mutex_unlock(&ig.connlist_mutex); 610 return 0; 611 612 id_failure: 613 ib_conn->cma_id = NULL; 614 addr_failure: 615 ib_conn->state = ISER_CONN_DOWN; 616 iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */ 617 connect_failure: 618 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 619 return err; 620 } 621 622 /** 623 * iser_reg_page_vec - Register physical memory 624 * 625 * returns: 0 on success, errno code on failure 626 */ 627 int iser_reg_page_vec(struct iser_conn *ib_conn, 628 struct iser_page_vec *page_vec, 629 struct iser_mem_reg *mem_reg) 630 { 631 struct ib_pool_fmr *mem; 632 u64 io_addr; 633 u64 *page_list; 634 int status; 635 636 page_list = page_vec->pages; 637 io_addr = page_list[0]; 638 639 mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, 640 page_list, 641 page_vec->length, 642 io_addr); 643 644 if (IS_ERR(mem)) { 645 status = (int)PTR_ERR(mem); 646 iser_err("ib_fmr_pool_map_phys failed: %d\n", status); 647 return status; 648 } 649 650 mem_reg->lkey = mem->fmr->lkey; 651 mem_reg->rkey = mem->fmr->rkey; 652 mem_reg->len = page_vec->length * SIZE_4K; 653 mem_reg->va = io_addr; 654 mem_reg->is_fmr = 1; 655 mem_reg->mem_h = (void *)mem; 656 657 mem_reg->va += page_vec->offset; 658 mem_reg->len = page_vec->data_size; 659 660 iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " 661 "entry[0]: (0x%08lx,%ld)] -> " 662 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", 663 page_vec, page_vec->length, 664 (unsigned long)page_vec->pages[0], 665 (unsigned long)page_vec->data_size, 666 (unsigned int)mem_reg->lkey, mem_reg->mem_h, 667 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); 668 return 0; 669 } 670 671 /** 672 * Unregister (previosuly registered) memory. 673 */ 674 void iser_unreg_mem(struct iser_mem_reg *reg) 675 { 676 int ret; 677 678 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); 679 680 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); 681 if (ret) 682 iser_err("ib_fmr_pool_unmap failed %d\n", ret); 683 684 reg->mem_h = NULL; 685 } 686 687 int iser_post_recvl(struct iser_conn *ib_conn) 688 { 689 struct ib_recv_wr rx_wr, *rx_wr_failed; 690 struct ib_sge sge; 691 int ib_ret; 692 693 sge.addr = ib_conn->login_resp_dma; 694 sge.length = ISER_RX_LOGIN_SIZE; 695 sge.lkey = ib_conn->device->mr->lkey; 696 697 rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; 698 rx_wr.sg_list = &sge; 699 rx_wr.num_sge = 1; 700 rx_wr.next = NULL; 701 702 ib_conn->post_recv_buf_count++; 703 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 704 if (ib_ret) { 705 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 706 ib_conn->post_recv_buf_count--; 707 } 708 return ib_ret; 709 } 710 711 int iser_post_recvm(struct iser_conn *ib_conn, int count) 712 { 713 struct ib_recv_wr *rx_wr, *rx_wr_failed; 714 int i, ib_ret; 715 unsigned int my_rx_head = ib_conn->rx_desc_head; 716 struct iser_rx_desc *rx_desc; 717 718 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 719 rx_desc = &ib_conn->rx_descs[my_rx_head]; 720 rx_wr->wr_id = (unsigned long)rx_desc; 721 rx_wr->sg_list = &rx_desc->rx_sg; 722 rx_wr->num_sge = 1; 723 rx_wr->next = rx_wr + 1; 724 my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1); 725 } 726 727 rx_wr--; 728 rx_wr->next = NULL; /* mark end of work requests list */ 729 730 ib_conn->post_recv_buf_count += count; 731 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 732 if (ib_ret) { 733 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 734 ib_conn->post_recv_buf_count -= count; 735 } else 736 ib_conn->rx_desc_head = my_rx_head; 737 return ib_ret; 738 } 739 740 741 /** 742 * iser_start_send - Initiate a Send DTO operation 743 * 744 * returns 0 on success, -1 on failure 745 */ 746 int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) 747 { 748 int ib_ret; 749 struct ib_send_wr send_wr, *send_wr_failed; 750 751 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 752 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 753 754 send_wr.next = NULL; 755 send_wr.wr_id = (unsigned long)tx_desc; 756 send_wr.sg_list = tx_desc->tx_sg; 757 send_wr.num_sge = tx_desc->num_sge; 758 send_wr.opcode = IB_WR_SEND; 759 send_wr.send_flags = IB_SEND_SIGNALED; 760 761 atomic_inc(&ib_conn->post_send_buf_count); 762 763 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 764 if (ib_ret) { 765 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 766 atomic_dec(&ib_conn->post_send_buf_count); 767 } 768 return ib_ret; 769 } 770 771 static void iser_handle_comp_error(struct iser_tx_desc *desc, 772 struct iser_conn *ib_conn) 773 { 774 if (desc && desc->type == ISCSI_TX_DATAOUT) 775 kmem_cache_free(ig.desc_cache, desc); 776 777 if (ib_conn->post_recv_buf_count == 0 && 778 atomic_read(&ib_conn->post_send_buf_count) == 0) { 779 /* getting here when the state is UP means that the conn is * 780 * being terminated asynchronously from the iSCSI layer's * 781 * perspective. */ 782 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 783 ISER_CONN_TERMINATING)) 784 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 785 ISCSI_ERR_CONN_FAILED); 786 787 /* no more non completed posts to the QP, complete the 788 * termination process w.o worrying on disconnect event */ 789 ib_conn->state = ISER_CONN_DOWN; 790 wake_up_interruptible(&ib_conn->wait); 791 } 792 } 793 794 static int iser_drain_tx_cq(struct iser_device *device) 795 { 796 struct ib_cq *cq = device->tx_cq; 797 struct ib_wc wc; 798 struct iser_tx_desc *tx_desc; 799 struct iser_conn *ib_conn; 800 int completed_tx = 0; 801 802 while (ib_poll_cq(cq, 1, &wc) == 1) { 803 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; 804 ib_conn = wc.qp->qp_context; 805 if (wc.status == IB_WC_SUCCESS) { 806 if (wc.opcode == IB_WC_SEND) 807 iser_snd_completion(tx_desc, ib_conn); 808 else 809 iser_err("expected opcode %d got %d\n", 810 IB_WC_SEND, wc.opcode); 811 } else { 812 iser_err("tx id %llx status %d vend_err %x\n", 813 wc.wr_id, wc.status, wc.vendor_err); 814 atomic_dec(&ib_conn->post_send_buf_count); 815 iser_handle_comp_error(tx_desc, ib_conn); 816 } 817 completed_tx++; 818 } 819 return completed_tx; 820 } 821 822 823 static void iser_cq_tasklet_fn(unsigned long data) 824 { 825 struct iser_device *device = (struct iser_device *)data; 826 struct ib_cq *cq = device->rx_cq; 827 struct ib_wc wc; 828 struct iser_rx_desc *desc; 829 unsigned long xfer_len; 830 struct iser_conn *ib_conn; 831 int completed_tx, completed_rx; 832 completed_tx = completed_rx = 0; 833 834 while (ib_poll_cq(cq, 1, &wc) == 1) { 835 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; 836 BUG_ON(desc == NULL); 837 ib_conn = wc.qp->qp_context; 838 if (wc.status == IB_WC_SUCCESS) { 839 if (wc.opcode == IB_WC_RECV) { 840 xfer_len = (unsigned long)wc.byte_len; 841 iser_rcv_completion(desc, xfer_len, ib_conn); 842 } else 843 iser_err("expected opcode %d got %d\n", 844 IB_WC_RECV, wc.opcode); 845 } else { 846 if (wc.status != IB_WC_WR_FLUSH_ERR) 847 iser_err("rx id %llx status %d vend_err %x\n", 848 wc.wr_id, wc.status, wc.vendor_err); 849 ib_conn->post_recv_buf_count--; 850 iser_handle_comp_error(NULL, ib_conn); 851 } 852 completed_rx++; 853 if (!(completed_rx & 63)) 854 completed_tx += iser_drain_tx_cq(device); 855 } 856 /* #warning "it is assumed here that arming CQ only once its empty" * 857 * " would not cause interrupts to be missed" */ 858 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 859 860 completed_tx += iser_drain_tx_cq(device); 861 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 862 } 863 864 static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 865 { 866 struct iser_device *device = (struct iser_device *)cq_context; 867 868 tasklet_schedule(&device->cq_tasklet); 869 } 870