1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module"); 30 int iser_debug = 0; 31 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, 32 &iser_debug, 0, "Enable iser debug messages"); 33 34 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); 35 static uma_zone_t icl_pdu_zone; 36 37 static volatile u_int icl_iser_ncons; 38 struct iser_global ig; 39 40 static icl_conn_new_pdu_t iser_conn_new_pdu; 41 static icl_conn_pdu_free_t iser_conn_pdu_free; 42 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; 43 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; 44 static icl_conn_pdu_queue_t iser_conn_pdu_queue; 45 static icl_conn_handoff_t iser_conn_handoff; 46 static icl_conn_free_t iser_conn_free; 47 static icl_conn_close_t iser_conn_close; 48 static icl_conn_release_t iser_conn_release; 49 static icl_conn_connect_t iser_conn_connect; 50 static icl_conn_connected_t iser_conn_connected; 51 static icl_conn_task_setup_t iser_conn_task_setup; 52 static icl_conn_task_done_t iser_conn_task_done; 53 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; 54 55 static kobj_method_t icl_iser_methods[] = { 56 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), 57 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), 58 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), 59 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), 60 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), 61 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), 62 KOBJMETHOD(icl_conn_free, iser_conn_free), 63 KOBJMETHOD(icl_conn_close, iser_conn_close), 64 KOBJMETHOD(icl_conn_release, iser_conn_release), 65 KOBJMETHOD(icl_conn_connect, iser_conn_connect), 66 KOBJMETHOD(icl_conn_connected, iser_conn_connected), 67 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), 68 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), 69 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), 70 { 0, 0 } 71 }; 72 73 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); 74 75 /** 76 * iser_initialize_headers() - Initialize task headers 77 * @pdu: iser pdu 78 * @iser_conn: iser connection 79 * 80 * Notes: 81 * This routine may race with iser teardown flow for scsi 82 * error handling TMFs. So for TMF we should acquire the 83 * state mutex to avoid dereferencing the IB device which 84 * may have already been terminated (racing teardown sequence). 85 */ 86 int 87 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) 88 { 89 struct iser_tx_desc *tx_desc = &pdu->desc; 90 struct iser_device *device = iser_conn->ib_conn.device; 91 u64 dma_addr; 92 int ret = 0; 93 94 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, 95 ISER_HEADERS_LEN, DMA_TO_DEVICE); 96 if (ib_dma_mapping_error(device->ib_device, dma_addr)) { 97 ret = -ENOMEM; 98 goto out; 99 } 100 101 tx_desc->mapped = true; 102 tx_desc->dma_addr = dma_addr; 103 tx_desc->tx_sg[0].addr = tx_desc->dma_addr; 104 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 105 tx_desc->tx_sg[0].lkey = device->mr->lkey; 106 107 out: 108 109 return (ret); 110 } 111 112 int 113 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 114 const void *addr, size_t len, int flags) 115 { 116 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 117 118 if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST || 119 request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) { 120 ISER_DBG("copy to login buff"); 121 memcpy(iser_conn->login_req_buf, addr, len); 122 request->ip_data_len = len; 123 } 124 125 return (0); 126 } 127 128 void 129 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 130 size_t off, void *addr, size_t len) 131 { 132 /* If we have a receive data, copy it to upper layer buffer */ 133 if (ip->ip_data_mbuf) 134 memcpy(addr, ip->ip_data_mbuf + off, len); 135 } 136 137 /* 138 * Allocate icl_pdu with empty BHS to fill up by the caller. 139 */ 140 struct icl_pdu * 141 iser_new_pdu(struct icl_conn *ic, int flags) 142 { 143 struct icl_iser_pdu *iser_pdu; 144 struct icl_pdu *ip; 145 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 146 147 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 148 if (iser_pdu == NULL) { 149 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); 150 return (NULL); 151 } 152 153 iser_pdu->iser_conn = iser_conn; 154 ip = &iser_pdu->icl_pdu; 155 ip->ip_conn = ic; 156 ip->ip_bhs = &iser_pdu->desc.iscsi_header; 157 158 return (ip); 159 } 160 161 struct icl_pdu * 162 iser_conn_new_pdu(struct icl_conn *ic, int flags) 163 { 164 return (iser_new_pdu(ic, flags)); 165 } 166 167 void 168 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 169 { 170 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 171 172 uma_zfree(icl_pdu_zone, iser_pdu); 173 } 174 175 size_t 176 iser_conn_pdu_data_segment_length(struct icl_conn *ic, 177 const struct icl_pdu *request) 178 { 179 uint32_t len = 0; 180 181 len += request->ip_bhs->bhs_data_segment_len[0]; 182 len <<= 8; 183 len += request->ip_bhs->bhs_data_segment_len[1]; 184 len <<= 8; 185 len += request->ip_bhs->bhs_data_segment_len[2]; 186 187 return (len); 188 } 189 190 void 191 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 192 { 193 iser_pdu_free(ic, ip); 194 } 195 196 static bool 197 is_control_opcode(uint8_t opcode) 198 { 199 bool is_control = false; 200 201 switch (opcode & ISCSI_OPCODE_MASK) { 202 case ISCSI_BHS_OPCODE_NOP_OUT: 203 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 204 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: 205 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 206 is_control = true; 207 break; 208 case ISCSI_BHS_OPCODE_SCSI_COMMAND: 209 is_control = false; 210 break; 211 default: 212 ISER_ERR("unknown opcode %d", opcode); 213 } 214 215 return (is_control); 216 } 217 218 void 219 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 220 { 221 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 222 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 223 int ret; 224 225 ret = iser_initialize_headers(iser_pdu, iser_conn); 226 if (ret) { 227 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); 228 return; 229 } 230 231 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { 232 ret = iser_send_control(iser_conn, iser_pdu); 233 if (unlikely(ret)) 234 ISER_ERR("Failed to send control pdu %p", iser_pdu); 235 } else { 236 ret = iser_send_command(iser_conn, iser_pdu); 237 if (unlikely(ret)) 238 ISER_ERR("Failed to send command pdu %p", iser_pdu); 239 } 240 } 241 242 static struct icl_conn * 243 iser_new_conn(const char *name, struct mtx *lock) 244 { 245 struct iser_conn *iser_conn; 246 struct icl_conn *ic; 247 248 refcount_acquire(&icl_iser_ncons); 249 250 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); 251 if (!iser_conn) { 252 ISER_ERR("failed to allocate iser conn"); 253 refcount_release(&icl_iser_ncons); 254 return (NULL); 255 } 256 257 cv_init(&iser_conn->up_cv, "iser_cv"); 258 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); 259 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "flush_lock", NULL, MTX_DEF); 260 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); 261 mtx_init(&iser_conn->ib_conn.lock, "lock", NULL, MTX_DEF); 262 263 ic = &iser_conn->icl_conn; 264 ic->ic_lock = lock; 265 ic->ic_name = name; 266 ic->ic_driver = strdup("iser", M_TEMP); 267 ic->ic_iser = true; 268 269 return (ic); 270 } 271 272 void 273 iser_conn_free(struct icl_conn *ic) 274 { 275 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 276 277 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); 278 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); 279 sx_destroy(&iser_conn->state_mutex); 280 cv_destroy(&iser_conn->up_cv); 281 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); 282 refcount_release(&icl_iser_ncons); 283 } 284 285 int 286 iser_conn_handoff(struct icl_conn *ic, int cmds_max) 287 { 288 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 289 int error = 0; 290 291 sx_xlock(&iser_conn->state_mutex); 292 if (iser_conn->state != ISER_CONN_UP) { 293 error = EINVAL; 294 ISER_ERR("iser_conn %p state is %d, teardown started\n", 295 iser_conn, iser_conn->state); 296 goto out; 297 } 298 299 /* 300 * In discovery session no need to allocate rx desc and posting recv 301 * work request 302 */ 303 if (ic->ic_session_type_discovery(ic)) 304 goto out; 305 306 error = iser_alloc_rx_descriptors(iser_conn, cmds_max); 307 if (error) 308 goto out; 309 310 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); 311 if (error) 312 goto post_error; 313 314 sx_xunlock(&iser_conn->state_mutex); 315 return (error); 316 317 post_error: 318 iser_free_rx_descriptors(iser_conn); 319 out: 320 sx_xunlock(&iser_conn->state_mutex); 321 return (error); 322 323 } 324 325 /** 326 * Frees all conn objects 327 */ 328 void 329 iser_conn_release(struct icl_conn *ic) 330 { 331 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 332 struct ib_conn *ib_conn = &iser_conn->ib_conn; 333 struct iser_conn *curr, *tmp; 334 335 mtx_lock(&ig.connlist_mutex); 336 /* 337 * Search for iser connection in global list. 338 * It may not be there in case of failure in connection establishment 339 * stage. 340 */ 341 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { 342 if (iser_conn == curr) { 343 ISER_WARN("found iser_conn %p", iser_conn); 344 list_del(&iser_conn->conn_list); 345 } 346 } 347 mtx_unlock(&ig.connlist_mutex); 348 349 /* 350 * In case we reconnecting or removing session, we need to 351 * release IB resources (which is safe to call more than once). 352 */ 353 sx_xlock(&iser_conn->state_mutex); 354 iser_free_ib_conn_res(iser_conn, true); 355 sx_xunlock(&iser_conn->state_mutex); 356 357 if (ib_conn->cma_id != NULL) { 358 rdma_destroy_id(ib_conn->cma_id); 359 ib_conn->cma_id = NULL; 360 } 361 362 } 363 364 void 365 iser_conn_close(struct icl_conn *ic) 366 { 367 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 368 369 ISER_INFO("closing conn %p", iser_conn); 370 371 sx_xlock(&iser_conn->state_mutex); 372 /* 373 * In case iser connection is waiting on conditional variable 374 * (state PENDING) and we try to close it before connection establishment, 375 * we need to signal it to continue releasing connection properly. 376 */ 377 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) 378 cv_signal(&iser_conn->up_cv); 379 sx_xunlock(&iser_conn->state_mutex); 380 381 } 382 383 int 384 iser_conn_connect(struct icl_conn *ic, int domain, int socktype, 385 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 386 { 387 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 388 struct ib_conn *ib_conn = &iser_conn->ib_conn; 389 int err = 0; 390 391 sx_xlock(&iser_conn->state_mutex); 392 /* the device is known only --after-- address resolution */ 393 ib_conn->device = NULL; 394 395 iser_conn->state = ISER_CONN_PENDING; 396 397 ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)iser_conn, 398 RDMA_PS_TCP, IB_QPT_RC); 399 if (IS_ERR(ib_conn->cma_id)) { 400 err = -PTR_ERR(ib_conn->cma_id); 401 ISER_ERR("rdma_create_id failed: %d", err); 402 goto id_failure; 403 } 404 405 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); 406 if (err) { 407 ISER_ERR("rdma_resolve_addr failed: %d", err); 408 if (err < 0) 409 err = -err; 410 goto addr_failure; 411 } 412 413 ISER_DBG("before cv_wait: %p", iser_conn); 414 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); 415 ISER_DBG("after cv_wait: %p", iser_conn); 416 417 if (iser_conn->state != ISER_CONN_UP) { 418 err = EIO; 419 goto addr_failure; 420 } 421 422 err = iser_alloc_login_buf(iser_conn); 423 if (err) 424 goto addr_failure; 425 sx_xunlock(&iser_conn->state_mutex); 426 427 mtx_lock(&ig.connlist_mutex); 428 list_add(&iser_conn->conn_list, &ig.connlist); 429 mtx_unlock(&ig.connlist_mutex); 430 431 return (0); 432 433 id_failure: 434 ib_conn->cma_id = NULL; 435 addr_failure: 436 sx_xunlock(&iser_conn->state_mutex); 437 return (err); 438 } 439 440 /** 441 * Called with session spinlock held. 442 * No need to lock state mutex on an advisory check. 443 **/ 444 bool 445 iser_conn_connected(struct icl_conn *ic) 446 { 447 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 448 449 return (iser_conn->state == ISER_CONN_UP); 450 } 451 452 int 453 iser_conn_task_setup(struct icl_conn *ic, struct ccb_scsiio *csio, 454 uint32_t *task_tagp, void **prvp, struct icl_pdu *ip) 455 { 456 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 457 458 *prvp = ip; 459 iser_pdu->csio = csio; 460 461 return (0); 462 } 463 464 void 465 iser_conn_task_done(struct icl_conn *ic, void *prv) 466 { 467 struct icl_pdu *ip = prv; 468 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 469 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; 470 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 471 472 if (iser_pdu->dir[ISER_DIR_IN]) { 473 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); 474 iser_dma_unmap_task_data(iser_pdu, 475 &iser_pdu->data[ISER_DIR_IN], 476 DMA_FROM_DEVICE); 477 } 478 479 if (iser_pdu->dir[ISER_DIR_OUT]) { 480 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); 481 iser_dma_unmap_task_data(iser_pdu, 482 &iser_pdu->data[ISER_DIR_OUT], 483 DMA_TO_DEVICE); 484 } 485 486 if (likely(tx_desc->mapped)) { 487 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 488 ISER_HEADERS_LEN, DMA_TO_DEVICE); 489 tx_desc->mapped = false; 490 } 491 492 iser_pdu_free(ic, ip); 493 } 494 495 static u_int32_t 496 iser_hba_misc() 497 { 498 return (PIM_UNMAPPED); 499 } 500 501 static int 502 iser_limits(size_t *limitp) 503 { 504 *limitp = 128 * 1024; 505 506 return (0); 507 } 508 509 static int 510 icl_iser_load(void) 511 { 512 int error; 513 514 ISER_DBG("Starting iSER datamover..."); 515 516 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), 517 NULL, NULL, NULL, NULL, 518 UMA_ALIGN_PTR, 0); 519 /* FIXME: Check rc */ 520 521 refcount_init(&icl_iser_ncons, 0); 522 523 error = icl_register("iser", 0, iser_limits, iser_new_conn, iser_hba_misc); 524 KASSERT(error == 0, ("failed to register iser")); 525 526 memset(&ig, 0, sizeof(struct iser_global)); 527 528 /* device init is called only after the first addr resolution */ 529 sx_init(&ig.device_list_mutex, "global_device_lock"); 530 INIT_LIST_HEAD(&ig.device_list); 531 mtx_init(&ig.connlist_mutex, "global_conn_lock", NULL, MTX_DEF); 532 INIT_LIST_HEAD(&ig.connlist); 533 sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); 534 535 return (error); 536 } 537 538 static int 539 icl_iser_unload(void) 540 { 541 ISER_DBG("Removing iSER datamover..."); 542 543 if (icl_iser_ncons != 0) 544 return (EBUSY); 545 546 sx_destroy(&ig.close_conns_mutex); 547 mtx_destroy(&ig.connlist_mutex); 548 sx_destroy(&ig.device_list_mutex); 549 550 icl_unregister("iser"); 551 552 uma_zdestroy(icl_pdu_zone); 553 554 return (0); 555 } 556 557 static int 558 icl_iser_modevent(module_t mod, int what, void *arg) 559 { 560 switch (what) { 561 case MOD_LOAD: 562 return (icl_iser_load()); 563 case MOD_UNLOAD: 564 return (icl_iser_unload()); 565 default: 566 return (EINVAL); 567 } 568 } 569 570 moduledata_t icl_iser_data = { 571 .name = "icl_iser", 572 .evhand = icl_iser_modevent, 573 .priv = 0 574 }; 575 576 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 577 MODULE_DEPEND(icl_iser, icl, 1, 1, 1); 578 MODULE_DEPEND(icl_iser, iscsi, 1, 1, 1); 579 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); 580 MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1); 581 MODULE_VERSION(icl_iser, 1); 582 583