1 /*- 2 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "icl_iser.h" 27 28 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 29 "iSER module"); 30 int iser_debug = 0; 31 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, 32 &iser_debug, 0, "Enable iser debug messages"); 33 34 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); 35 static uma_zone_t icl_pdu_zone; 36 37 static volatile u_int icl_iser_ncons; 38 struct iser_global ig; 39 40 static void iser_conn_release(struct icl_conn *ic); 41 42 static icl_conn_new_pdu_t iser_conn_new_pdu; 43 static icl_conn_pdu_free_t iser_conn_pdu_free; 44 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; 45 static icl_conn_pdu_append_bio_t iser_conn_pdu_append_bio; 46 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; 47 static icl_conn_pdu_queue_t iser_conn_pdu_queue; 48 static icl_conn_handoff_t iser_conn_handoff; 49 static icl_conn_free_t iser_conn_free; 50 static icl_conn_close_t iser_conn_close; 51 static icl_conn_connect_t iser_conn_connect; 52 static icl_conn_task_setup_t iser_conn_task_setup; 53 static icl_conn_task_done_t iser_conn_task_done; 54 static icl_conn_pdu_get_bio_t iser_conn_pdu_get_bio; 55 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; 56 57 static kobj_method_t icl_iser_methods[] = { 58 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), 59 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), 60 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), 61 KOBJMETHOD(icl_conn_pdu_append_bio, iser_conn_pdu_append_bio), 62 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), 63 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), 64 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), 65 KOBJMETHOD(icl_conn_free, iser_conn_free), 66 KOBJMETHOD(icl_conn_close, iser_conn_close), 67 KOBJMETHOD(icl_conn_connect, iser_conn_connect), 68 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), 69 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), 70 KOBJMETHOD(icl_conn_pdu_get_bio, iser_conn_pdu_get_bio), 71 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), 72 { 0, 0 } 73 }; 74 75 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); 76 77 /** 78 * iser_initialize_headers() - Initialize task headers 79 * @pdu: iser pdu 80 * @iser_conn: iser connection 81 * 82 * Notes: 83 * This routine may race with iser teardown flow for scsi 84 * error handling TMFs. So for TMF we should acquire the 85 * state mutex to avoid dereferencing the IB device which 86 * may have already been terminated (racing teardown sequence). 87 */ 88 int 89 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) 90 { 91 struct iser_tx_desc *tx_desc = &pdu->desc; 92 struct iser_device *device = iser_conn->ib_conn.device; 93 u64 dma_addr; 94 int ret = 0; 95 96 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, 97 ISER_HEADERS_LEN, DMA_TO_DEVICE); 98 if (ib_dma_mapping_error(device->ib_device, dma_addr)) { 99 ret = -ENOMEM; 100 goto out; 101 } 102 103 tx_desc->mapped = true; 104 tx_desc->dma_addr = dma_addr; 105 tx_desc->tx_sg[0].addr = tx_desc->dma_addr; 106 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 107 tx_desc->tx_sg[0].lkey = device->mr->lkey; 108 109 out: 110 111 return (ret); 112 } 113 114 int 115 iser_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request, 116 struct bio *bp, size_t offset, size_t len, int flags) 117 { 118 MPASS(!((request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) == 119 ISCSI_BHS_OPCODE_LOGIN_REQUEST || 120 (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) == 121 ISCSI_BHS_OPCODE_TEXT_REQUEST)); 122 123 return (0); 124 } 125 126 int 127 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 128 const void *addr, size_t len, int flags) 129 { 130 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 131 132 switch (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) { 133 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 134 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 135 ISER_DBG("copy to login buff"); 136 memcpy(iser_conn->login_req_buf, addr, len); 137 request->ip_data_len = len; 138 break; 139 } 140 141 return (0); 142 } 143 144 void 145 iser_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, 146 size_t pdu_off, struct bio *bp, size_t bio_off, 147 size_t len) 148 { 149 MPASS(ip->ip_data_mbuf == NULL); 150 } 151 152 void 153 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 154 size_t off, void *addr, size_t len) 155 { 156 /* If we have a receive data, copy it to upper layer buffer */ 157 if (ip->ip_data_mbuf) 158 memcpy(addr, ip->ip_data_mbuf + off, len); 159 } 160 161 /* 162 * Allocate icl_pdu with empty BHS to fill up by the caller. 163 */ 164 struct icl_pdu * 165 iser_new_pdu(struct icl_conn *ic, int flags) 166 { 167 struct icl_iser_pdu *iser_pdu; 168 struct icl_pdu *ip; 169 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 170 171 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 172 if (iser_pdu == NULL) { 173 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); 174 return (NULL); 175 } 176 177 iser_pdu->iser_conn = iser_conn; 178 ip = &iser_pdu->icl_pdu; 179 ip->ip_conn = ic; 180 ip->ip_bhs = &iser_pdu->desc.iscsi_header; 181 182 return (ip); 183 } 184 185 struct icl_pdu * 186 iser_conn_new_pdu(struct icl_conn *ic, int flags) 187 { 188 return (iser_new_pdu(ic, flags)); 189 } 190 191 void 192 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 193 { 194 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 195 196 uma_zfree(icl_pdu_zone, iser_pdu); 197 } 198 199 size_t 200 iser_conn_pdu_data_segment_length(struct icl_conn *ic, 201 const struct icl_pdu *request) 202 { 203 uint32_t len = 0; 204 205 len += request->ip_bhs->bhs_data_segment_len[0]; 206 len <<= 8; 207 len += request->ip_bhs->bhs_data_segment_len[1]; 208 len <<= 8; 209 len += request->ip_bhs->bhs_data_segment_len[2]; 210 211 return (len); 212 } 213 214 void 215 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 216 { 217 iser_pdu_free(ic, ip); 218 } 219 220 static bool 221 is_control_opcode(uint8_t opcode) 222 { 223 bool is_control = false; 224 225 switch (opcode & ISCSI_OPCODE_MASK) { 226 case ISCSI_BHS_OPCODE_NOP_OUT: 227 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 228 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: 229 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 230 is_control = true; 231 break; 232 case ISCSI_BHS_OPCODE_SCSI_COMMAND: 233 is_control = false; 234 break; 235 default: 236 ISER_ERR("unknown opcode %d", opcode); 237 } 238 239 return (is_control); 240 } 241 242 void 243 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 244 { 245 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 246 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 247 int ret; 248 249 if (iser_conn->state != ISER_CONN_UP) 250 return; 251 252 ret = iser_initialize_headers(iser_pdu, iser_conn); 253 if (ret) { 254 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); 255 return; 256 } 257 258 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { 259 ret = iser_send_control(iser_conn, iser_pdu); 260 if (unlikely(ret)) 261 ISER_ERR("Failed to send control pdu %p", iser_pdu); 262 } else { 263 ret = iser_send_command(iser_conn, iser_pdu); 264 if (unlikely(ret)) 265 ISER_ERR("Failed to send command pdu %p", iser_pdu); 266 } 267 } 268 269 static struct icl_conn * 270 iser_new_conn(const char *name, struct mtx *lock) 271 { 272 struct iser_conn *iser_conn; 273 struct icl_conn *ic; 274 275 refcount_acquire(&icl_iser_ncons); 276 277 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); 278 if (!iser_conn) { 279 ISER_ERR("failed to allocate iser conn"); 280 refcount_release(&icl_iser_ncons); 281 return (NULL); 282 } 283 284 cv_init(&iser_conn->up_cv, "iser_cv"); 285 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); 286 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF); 287 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); 288 mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF); 289 290 ic = &iser_conn->icl_conn; 291 ic->ic_lock = lock; 292 ic->ic_name = name; 293 ic->ic_offload = strdup("iser", M_TEMP); 294 ic->ic_iser = true; 295 ic->ic_unmapped = true; 296 297 return (ic); 298 } 299 300 void 301 iser_conn_free(struct icl_conn *ic) 302 { 303 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 304 305 iser_conn_release(ic); 306 mtx_destroy(&iser_conn->ib_conn.lock); 307 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); 308 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); 309 sx_destroy(&iser_conn->state_mutex); 310 cv_destroy(&iser_conn->up_cv); 311 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); 312 refcount_release(&icl_iser_ncons); 313 } 314 315 int 316 iser_conn_handoff(struct icl_conn *ic, int fd) 317 { 318 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 319 int error = 0; 320 321 sx_xlock(&iser_conn->state_mutex); 322 if (iser_conn->state != ISER_CONN_UP) { 323 error = EINVAL; 324 ISER_ERR("iser_conn %p state is %d, teardown started\n", 325 iser_conn, iser_conn->state); 326 goto out; 327 } 328 329 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags); 330 if (error) 331 goto out; 332 333 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); 334 if (error) 335 goto post_error; 336 337 iser_conn->handoff_done = true; 338 339 sx_xunlock(&iser_conn->state_mutex); 340 return (error); 341 342 post_error: 343 iser_free_rx_descriptors(iser_conn); 344 out: 345 sx_xunlock(&iser_conn->state_mutex); 346 return (error); 347 348 } 349 350 /** 351 * Frees all conn objects 352 */ 353 static void 354 iser_conn_release(struct icl_conn *ic) 355 { 356 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 357 struct ib_conn *ib_conn = &iser_conn->ib_conn; 358 struct iser_conn *curr, *tmp; 359 360 mtx_lock(&ig.connlist_mutex); 361 /* 362 * Search for iser connection in global list. 363 * It may not be there in case of failure in connection establishment 364 * stage. 365 */ 366 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { 367 if (iser_conn == curr) { 368 ISER_WARN("found iser_conn %p", iser_conn); 369 list_del(&iser_conn->conn_list); 370 } 371 } 372 mtx_unlock(&ig.connlist_mutex); 373 374 /* 375 * In case we reconnecting or removing session, we need to 376 * release IB resources (which is safe to call more than once). 377 */ 378 sx_xlock(&iser_conn->state_mutex); 379 iser_free_ib_conn_res(iser_conn, true); 380 sx_xunlock(&iser_conn->state_mutex); 381 382 if (ib_conn->cma_id != NULL) { 383 rdma_destroy_id(ib_conn->cma_id); 384 ib_conn->cma_id = NULL; 385 } 386 387 } 388 389 void 390 iser_conn_close(struct icl_conn *ic) 391 { 392 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 393 394 ISER_INFO("closing conn %p", iser_conn); 395 396 sx_xlock(&iser_conn->state_mutex); 397 /* 398 * In case iser connection is waiting on conditional variable 399 * (state PENDING) and we try to close it before connection establishment, 400 * we need to signal it to continue releasing connection properly. 401 */ 402 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) 403 cv_signal(&iser_conn->up_cv); 404 sx_xunlock(&iser_conn->state_mutex); 405 406 } 407 408 int 409 iser_conn_connect(struct icl_conn *ic, int domain, int socktype, 410 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 411 { 412 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 413 struct ib_conn *ib_conn = &iser_conn->ib_conn; 414 int err = 0; 415 416 iser_conn_release(ic); 417 418 sx_xlock(&iser_conn->state_mutex); 419 /* the device is known only --after-- address resolution */ 420 ib_conn->device = NULL; 421 iser_conn->handoff_done = false; 422 423 iser_conn->state = ISER_CONN_PENDING; 424 425 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, 426 RDMA_PS_TCP, IB_QPT_RC); 427 if (IS_ERR(ib_conn->cma_id)) { 428 err = -PTR_ERR(ib_conn->cma_id); 429 ISER_ERR("rdma_create_id failed: %d", err); 430 goto id_failure; 431 } 432 433 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); 434 if (err) { 435 ISER_ERR("rdma_resolve_addr failed: %d", err); 436 if (err < 0) 437 err = -err; 438 goto addr_failure; 439 } 440 441 ISER_DBG("before cv_wait: %p", iser_conn); 442 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); 443 ISER_DBG("after cv_wait: %p", iser_conn); 444 445 if (iser_conn->state != ISER_CONN_UP) { 446 err = EIO; 447 goto addr_failure; 448 } 449 450 err = iser_alloc_login_buf(iser_conn); 451 if (err) 452 goto addr_failure; 453 sx_xunlock(&iser_conn->state_mutex); 454 455 mtx_lock(&ig.connlist_mutex); 456 list_add(&iser_conn->conn_list, &ig.connlist); 457 mtx_unlock(&ig.connlist_mutex); 458 459 return (0); 460 461 id_failure: 462 ib_conn->cma_id = NULL; 463 addr_failure: 464 sx_xunlock(&iser_conn->state_mutex); 465 return (err); 466 } 467 468 int 469 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 470 struct ccb_scsiio *csio, 471 uint32_t *task_tagp, void **prvp) 472 { 473 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 474 475 *prvp = ip; 476 iser_pdu->csio = csio; 477 478 return (0); 479 } 480 481 void 482 iser_conn_task_done(struct icl_conn *ic, void *prv) 483 { 484 struct icl_pdu *ip = prv; 485 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 486 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; 487 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 488 489 if (iser_pdu->dir[ISER_DIR_IN]) { 490 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); 491 iser_dma_unmap_task_data(iser_pdu, 492 &iser_pdu->data[ISER_DIR_IN], 493 DMA_FROM_DEVICE); 494 } 495 496 if (iser_pdu->dir[ISER_DIR_OUT]) { 497 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); 498 iser_dma_unmap_task_data(iser_pdu, 499 &iser_pdu->data[ISER_DIR_OUT], 500 DMA_TO_DEVICE); 501 } 502 503 if (likely(tx_desc->mapped)) { 504 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 505 ISER_HEADERS_LEN, DMA_TO_DEVICE); 506 tx_desc->mapped = false; 507 } 508 509 iser_pdu_free(ic, ip); 510 } 511 512 static int 513 iser_limits(struct icl_drv_limits *idl, int socket) 514 { 515 516 idl->idl_max_recv_data_segment_length = 128 * 1024; 517 idl->idl_max_send_data_segment_length = 128 * 1024; 518 idl->idl_max_burst_length = 262144; 519 idl->idl_first_burst_length = 65536; 520 521 return (0); 522 } 523 524 static int 525 icl_iser_load(void) 526 { 527 int error; 528 529 ISER_DBG("Starting iSER datamover..."); 530 531 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), 532 NULL, NULL, NULL, NULL, 533 UMA_ALIGN_PTR, 0); 534 /* FIXME: Check rc */ 535 536 refcount_init(&icl_iser_ncons, 0); 537 538 error = icl_register("iser", true, 0, iser_limits, iser_new_conn); 539 KASSERT(error == 0, ("failed to register iser")); 540 541 memset(&ig, 0, sizeof(struct iser_global)); 542 543 /* device init is called only after the first addr resolution */ 544 sx_init(&ig.device_list_mutex, "global_device_lock"); 545 INIT_LIST_HEAD(&ig.device_list); 546 mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF); 547 INIT_LIST_HEAD(&ig.connlist); 548 sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); 549 550 return (error); 551 } 552 553 static int 554 icl_iser_unload(void) 555 { 556 ISER_DBG("Removing iSER datamover..."); 557 558 if (icl_iser_ncons != 0) 559 return (EBUSY); 560 561 sx_destroy(&ig.close_conns_mutex); 562 mtx_destroy(&ig.connlist_mutex); 563 sx_destroy(&ig.device_list_mutex); 564 565 icl_unregister("iser", true); 566 567 uma_zdestroy(icl_pdu_zone); 568 569 return (0); 570 } 571 572 static int 573 icl_iser_modevent(module_t mod, int what, void *arg) 574 { 575 switch (what) { 576 case MOD_LOAD: 577 return (icl_iser_load()); 578 case MOD_UNLOAD: 579 return (icl_iser_unload()); 580 default: 581 return (EINVAL); 582 } 583 } 584 585 moduledata_t icl_iser_data = { 586 .name = "icl_iser", 587 .evhand = icl_iser_modevent, 588 .priv = 0 589 }; 590 591 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 592 MODULE_DEPEND(icl_iser, icl, 1, 1, 1); 593 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); 594 MODULE_VERSION(icl_iser, 1); 595