1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 30 "iSER module"); 31 int iser_debug = 0; 32 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, 33 &iser_debug, 0, "Enable iser debug messages"); 34 35 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); 36 static uma_zone_t icl_pdu_zone; 37 38 static volatile u_int icl_iser_ncons; 39 struct iser_global ig; 40 41 static void iser_conn_release(struct icl_conn *ic); 42 43 static icl_conn_new_pdu_t iser_conn_new_pdu; 44 static icl_conn_pdu_free_t iser_conn_pdu_free; 45 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; 46 static icl_conn_pdu_append_bio_t iser_conn_pdu_append_bio; 47 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; 48 static icl_conn_pdu_queue_t iser_conn_pdu_queue; 49 static icl_conn_handoff_t iser_conn_handoff; 50 static icl_conn_free_t iser_conn_free; 51 static icl_conn_close_t iser_conn_close; 52 static icl_conn_connect_t iser_conn_connect; 53 static icl_conn_task_setup_t iser_conn_task_setup; 54 static icl_conn_task_done_t iser_conn_task_done; 55 static icl_conn_pdu_get_bio_t iser_conn_pdu_get_bio; 56 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; 57 58 static kobj_method_t icl_iser_methods[] = { 59 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), 60 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), 61 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), 62 KOBJMETHOD(icl_conn_pdu_append_bio, iser_conn_pdu_append_bio), 63 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), 64 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), 65 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), 66 KOBJMETHOD(icl_conn_free, iser_conn_free), 67 KOBJMETHOD(icl_conn_close, iser_conn_close), 68 KOBJMETHOD(icl_conn_connect, iser_conn_connect), 69 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), 70 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), 71 KOBJMETHOD(icl_conn_pdu_get_bio, iser_conn_pdu_get_bio), 72 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), 73 { 0, 0 } 74 }; 75 76 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); 77 78 /** 79 * iser_initialize_headers() - Initialize task headers 80 * @pdu: iser pdu 81 * @iser_conn: iser connection 82 * 83 * Notes: 84 * This routine may race with iser teardown flow for scsi 85 * error handling TMFs. So for TMF we should acquire the 86 * state mutex to avoid dereferencing the IB device which 87 * may have already been terminated (racing teardown sequence). 88 */ 89 int 90 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) 91 { 92 struct iser_tx_desc *tx_desc = &pdu->desc; 93 struct iser_device *device = iser_conn->ib_conn.device; 94 u64 dma_addr; 95 int ret = 0; 96 97 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, 98 ISER_HEADERS_LEN, DMA_TO_DEVICE); 99 if (ib_dma_mapping_error(device->ib_device, dma_addr)) { 100 ret = -ENOMEM; 101 goto out; 102 } 103 104 tx_desc->mapped = true; 105 tx_desc->dma_addr = dma_addr; 106 tx_desc->tx_sg[0].addr = tx_desc->dma_addr; 107 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 108 tx_desc->tx_sg[0].lkey = device->mr->lkey; 109 110 out: 111 112 return (ret); 113 } 114 115 int 116 iser_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request, 117 struct bio *bp, size_t offset, size_t len, int flags) 118 { 119 MPASS(!((request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) == 120 ISCSI_BHS_OPCODE_LOGIN_REQUEST || 121 (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) == 122 ISCSI_BHS_OPCODE_TEXT_REQUEST)); 123 124 return (0); 125 } 126 127 int 128 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 129 const void *addr, size_t len, int flags) 130 { 131 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 132 133 switch (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) { 134 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 135 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 136 ISER_DBG("copy to login buff"); 137 memcpy(iser_conn->login_req_buf, addr, len); 138 request->ip_data_len = len; 139 break; 140 } 141 142 return (0); 143 } 144 145 void 146 iser_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip, 147 size_t pdu_off, struct bio *bp, size_t bio_off, 148 size_t len) 149 { 150 MPASS(ip->ip_data_mbuf == NULL); 151 } 152 153 void 154 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 155 size_t off, void *addr, size_t len) 156 { 157 /* If we have a receive data, copy it to upper layer buffer */ 158 if (ip->ip_data_mbuf) 159 memcpy(addr, ip->ip_data_mbuf + off, len); 160 } 161 162 /* 163 * Allocate icl_pdu with empty BHS to fill up by the caller. 164 */ 165 struct icl_pdu * 166 iser_new_pdu(struct icl_conn *ic, int flags) 167 { 168 struct icl_iser_pdu *iser_pdu; 169 struct icl_pdu *ip; 170 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 171 172 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 173 if (iser_pdu == NULL) { 174 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); 175 return (NULL); 176 } 177 178 iser_pdu->iser_conn = iser_conn; 179 ip = &iser_pdu->icl_pdu; 180 ip->ip_conn = ic; 181 ip->ip_bhs = &iser_pdu->desc.iscsi_header; 182 183 return (ip); 184 } 185 186 struct icl_pdu * 187 iser_conn_new_pdu(struct icl_conn *ic, int flags) 188 { 189 return (iser_new_pdu(ic, flags)); 190 } 191 192 void 193 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 194 { 195 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 196 197 uma_zfree(icl_pdu_zone, iser_pdu); 198 } 199 200 size_t 201 iser_conn_pdu_data_segment_length(struct icl_conn *ic, 202 const struct icl_pdu *request) 203 { 204 uint32_t len = 0; 205 206 len += request->ip_bhs->bhs_data_segment_len[0]; 207 len <<= 8; 208 len += request->ip_bhs->bhs_data_segment_len[1]; 209 len <<= 8; 210 len += request->ip_bhs->bhs_data_segment_len[2]; 211 212 return (len); 213 } 214 215 void 216 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 217 { 218 iser_pdu_free(ic, ip); 219 } 220 221 static bool 222 is_control_opcode(uint8_t opcode) 223 { 224 bool is_control = false; 225 226 switch (opcode & ISCSI_OPCODE_MASK) { 227 case ISCSI_BHS_OPCODE_NOP_OUT: 228 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 229 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: 230 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 231 is_control = true; 232 break; 233 case ISCSI_BHS_OPCODE_SCSI_COMMAND: 234 is_control = false; 235 break; 236 default: 237 ISER_ERR("unknown opcode %d", opcode); 238 } 239 240 return (is_control); 241 } 242 243 void 244 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 245 { 246 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 247 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 248 int ret; 249 250 if (iser_conn->state != ISER_CONN_UP) 251 return; 252 253 ret = iser_initialize_headers(iser_pdu, iser_conn); 254 if (ret) { 255 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); 256 return; 257 } 258 259 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { 260 ret = iser_send_control(iser_conn, iser_pdu); 261 if (unlikely(ret)) 262 ISER_ERR("Failed to send control pdu %p", iser_pdu); 263 } else { 264 ret = iser_send_command(iser_conn, iser_pdu); 265 if (unlikely(ret)) 266 ISER_ERR("Failed to send command pdu %p", iser_pdu); 267 } 268 } 269 270 static struct icl_conn * 271 iser_new_conn(const char *name, struct mtx *lock) 272 { 273 struct iser_conn *iser_conn; 274 struct icl_conn *ic; 275 276 refcount_acquire(&icl_iser_ncons); 277 278 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); 279 if (!iser_conn) { 280 ISER_ERR("failed to allocate iser conn"); 281 refcount_release(&icl_iser_ncons); 282 return (NULL); 283 } 284 285 cv_init(&iser_conn->up_cv, "iser_cv"); 286 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); 287 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF); 288 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); 289 mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF); 290 291 ic = &iser_conn->icl_conn; 292 ic->ic_lock = lock; 293 ic->ic_name = name; 294 ic->ic_offload = strdup("iser", M_TEMP); 295 ic->ic_iser = true; 296 ic->ic_unmapped = true; 297 298 return (ic); 299 } 300 301 void 302 iser_conn_free(struct icl_conn *ic) 303 { 304 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 305 306 iser_conn_release(ic); 307 mtx_destroy(&iser_conn->ib_conn.lock); 308 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); 309 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); 310 sx_destroy(&iser_conn->state_mutex); 311 cv_destroy(&iser_conn->up_cv); 312 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); 313 refcount_release(&icl_iser_ncons); 314 } 315 316 int 317 iser_conn_handoff(struct icl_conn *ic, int fd) 318 { 319 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 320 int error = 0; 321 322 sx_xlock(&iser_conn->state_mutex); 323 if (iser_conn->state != ISER_CONN_UP) { 324 error = EINVAL; 325 ISER_ERR("iser_conn %p state is %d, teardown started\n", 326 iser_conn, iser_conn->state); 327 goto out; 328 } 329 330 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags); 331 if (error) 332 goto out; 333 334 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); 335 if (error) 336 goto post_error; 337 338 iser_conn->handoff_done = true; 339 340 sx_xunlock(&iser_conn->state_mutex); 341 return (error); 342 343 post_error: 344 iser_free_rx_descriptors(iser_conn); 345 out: 346 sx_xunlock(&iser_conn->state_mutex); 347 return (error); 348 349 } 350 351 /** 352 * Frees all conn objects 353 */ 354 static void 355 iser_conn_release(struct icl_conn *ic) 356 { 357 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 358 struct ib_conn *ib_conn = &iser_conn->ib_conn; 359 struct iser_conn *curr, *tmp; 360 361 mtx_lock(&ig.connlist_mutex); 362 /* 363 * Search for iser connection in global list. 364 * It may not be there in case of failure in connection establishment 365 * stage. 366 */ 367 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { 368 if (iser_conn == curr) { 369 ISER_WARN("found iser_conn %p", iser_conn); 370 list_del(&iser_conn->conn_list); 371 } 372 } 373 mtx_unlock(&ig.connlist_mutex); 374 375 /* 376 * In case we reconnecting or removing session, we need to 377 * release IB resources (which is safe to call more than once). 378 */ 379 sx_xlock(&iser_conn->state_mutex); 380 iser_free_ib_conn_res(iser_conn, true); 381 sx_xunlock(&iser_conn->state_mutex); 382 383 if (ib_conn->cma_id != NULL) { 384 rdma_destroy_id(ib_conn->cma_id); 385 ib_conn->cma_id = NULL; 386 } 387 388 } 389 390 void 391 iser_conn_close(struct icl_conn *ic) 392 { 393 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 394 395 ISER_INFO("closing conn %p", iser_conn); 396 397 sx_xlock(&iser_conn->state_mutex); 398 /* 399 * In case iser connection is waiting on conditional variable 400 * (state PENDING) and we try to close it before connection establishment, 401 * we need to signal it to continue releasing connection properly. 402 */ 403 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) 404 cv_signal(&iser_conn->up_cv); 405 sx_xunlock(&iser_conn->state_mutex); 406 407 } 408 409 int 410 iser_conn_connect(struct icl_conn *ic, int domain, int socktype, 411 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 412 { 413 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 414 struct ib_conn *ib_conn = &iser_conn->ib_conn; 415 int err = 0; 416 417 iser_conn_release(ic); 418 419 sx_xlock(&iser_conn->state_mutex); 420 /* the device is known only --after-- address resolution */ 421 ib_conn->device = NULL; 422 iser_conn->handoff_done = false; 423 424 iser_conn->state = ISER_CONN_PENDING; 425 426 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, 427 RDMA_PS_TCP, IB_QPT_RC); 428 if (IS_ERR(ib_conn->cma_id)) { 429 err = -PTR_ERR(ib_conn->cma_id); 430 ISER_ERR("rdma_create_id failed: %d", err); 431 goto id_failure; 432 } 433 434 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); 435 if (err) { 436 ISER_ERR("rdma_resolve_addr failed: %d", err); 437 if (err < 0) 438 err = -err; 439 goto addr_failure; 440 } 441 442 ISER_DBG("before cv_wait: %p", iser_conn); 443 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); 444 ISER_DBG("after cv_wait: %p", iser_conn); 445 446 if (iser_conn->state != ISER_CONN_UP) { 447 err = EIO; 448 goto addr_failure; 449 } 450 451 err = iser_alloc_login_buf(iser_conn); 452 if (err) 453 goto addr_failure; 454 sx_xunlock(&iser_conn->state_mutex); 455 456 mtx_lock(&ig.connlist_mutex); 457 list_add(&iser_conn->conn_list, &ig.connlist); 458 mtx_unlock(&ig.connlist_mutex); 459 460 return (0); 461 462 id_failure: 463 ib_conn->cma_id = NULL; 464 addr_failure: 465 sx_xunlock(&iser_conn->state_mutex); 466 return (err); 467 } 468 469 int 470 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 471 struct ccb_scsiio *csio, 472 uint32_t *task_tagp, void **prvp) 473 { 474 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 475 476 *prvp = ip; 477 iser_pdu->csio = csio; 478 479 return (0); 480 } 481 482 void 483 iser_conn_task_done(struct icl_conn *ic, void *prv) 484 { 485 struct icl_pdu *ip = prv; 486 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 487 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; 488 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 489 490 if (iser_pdu->dir[ISER_DIR_IN]) { 491 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); 492 iser_dma_unmap_task_data(iser_pdu, 493 &iser_pdu->data[ISER_DIR_IN], 494 DMA_FROM_DEVICE); 495 } 496 497 if (iser_pdu->dir[ISER_DIR_OUT]) { 498 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); 499 iser_dma_unmap_task_data(iser_pdu, 500 &iser_pdu->data[ISER_DIR_OUT], 501 DMA_TO_DEVICE); 502 } 503 504 if (likely(tx_desc->mapped)) { 505 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 506 ISER_HEADERS_LEN, DMA_TO_DEVICE); 507 tx_desc->mapped = false; 508 } 509 510 iser_pdu_free(ic, ip); 511 } 512 513 static int 514 iser_limits(struct icl_drv_limits *idl, int socket) 515 { 516 517 idl->idl_max_recv_data_segment_length = 128 * 1024; 518 idl->idl_max_send_data_segment_length = 128 * 1024; 519 idl->idl_max_burst_length = 262144; 520 idl->idl_first_burst_length = 65536; 521 522 return (0); 523 } 524 525 static int 526 icl_iser_load(void) 527 { 528 int error; 529 530 ISER_DBG("Starting iSER datamover..."); 531 532 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), 533 NULL, NULL, NULL, NULL, 534 UMA_ALIGN_PTR, 0); 535 /* FIXME: Check rc */ 536 537 refcount_init(&icl_iser_ncons, 0); 538 539 error = icl_register("iser", true, 0, iser_limits, iser_new_conn); 540 KASSERT(error == 0, ("failed to register iser")); 541 542 memset(&ig, 0, sizeof(struct iser_global)); 543 544 /* device init is called only after the first addr resolution */ 545 sx_init(&ig.device_list_mutex, "global_device_lock"); 546 INIT_LIST_HEAD(&ig.device_list); 547 mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF); 548 INIT_LIST_HEAD(&ig.connlist); 549 sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); 550 551 return (error); 552 } 553 554 static int 555 icl_iser_unload(void) 556 { 557 ISER_DBG("Removing iSER datamover..."); 558 559 if (icl_iser_ncons != 0) 560 return (EBUSY); 561 562 sx_destroy(&ig.close_conns_mutex); 563 mtx_destroy(&ig.connlist_mutex); 564 sx_destroy(&ig.device_list_mutex); 565 566 icl_unregister("iser", true); 567 568 uma_zdestroy(icl_pdu_zone); 569 570 return (0); 571 } 572 573 static int 574 icl_iser_modevent(module_t mod, int what, void *arg) 575 { 576 switch (what) { 577 case MOD_LOAD: 578 return (icl_iser_load()); 579 case MOD_UNLOAD: 580 return (icl_iser_unload()); 581 default: 582 return (EINVAL); 583 } 584 } 585 586 moduledata_t icl_iser_data = { 587 .name = "icl_iser", 588 .evhand = icl_iser_modevent, 589 .priv = 0 590 }; 591 592 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 593 MODULE_DEPEND(icl_iser, icl, 1, 1, 1); 594 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); 595 MODULE_VERSION(icl_iser, 1); 596