1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 30 "iSER module"); 31 int iser_debug = 0; 32 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, 33 &iser_debug, 0, "Enable iser debug messages"); 34 35 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); 36 static uma_zone_t icl_pdu_zone; 37 38 static volatile u_int icl_iser_ncons; 39 struct iser_global ig; 40 41 static void iser_conn_release(struct icl_conn *ic); 42 43 static icl_conn_new_pdu_t iser_conn_new_pdu; 44 static icl_conn_pdu_free_t iser_conn_pdu_free; 45 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; 46 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; 47 static icl_conn_pdu_queue_t iser_conn_pdu_queue; 48 static icl_conn_handoff_t iser_conn_handoff; 49 static icl_conn_free_t iser_conn_free; 50 static icl_conn_close_t iser_conn_close; 51 static icl_conn_connect_t iser_conn_connect; 52 static icl_conn_task_setup_t iser_conn_task_setup; 53 static icl_conn_task_done_t iser_conn_task_done; 54 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; 55 56 static kobj_method_t icl_iser_methods[] = { 57 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), 58 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), 59 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), 60 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), 61 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), 62 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), 63 KOBJMETHOD(icl_conn_free, iser_conn_free), 64 KOBJMETHOD(icl_conn_close, iser_conn_close), 65 KOBJMETHOD(icl_conn_connect, iser_conn_connect), 66 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), 67 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), 68 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), 69 { 0, 0 } 70 }; 71 72 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); 73 74 /** 75 * iser_initialize_headers() - Initialize task headers 76 * @pdu: iser pdu 77 * @iser_conn: iser connection 78 * 79 * Notes: 80 * This routine may race with iser teardown flow for scsi 81 * error handling TMFs. So for TMF we should acquire the 82 * state mutex to avoid dereferencing the IB device which 83 * may have already been terminated (racing teardown sequence). 84 */ 85 int 86 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) 87 { 88 struct iser_tx_desc *tx_desc = &pdu->desc; 89 struct iser_device *device = iser_conn->ib_conn.device; 90 u64 dma_addr; 91 int ret = 0; 92 93 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, 94 ISER_HEADERS_LEN, DMA_TO_DEVICE); 95 if (ib_dma_mapping_error(device->ib_device, dma_addr)) { 96 ret = -ENOMEM; 97 goto out; 98 } 99 100 tx_desc->mapped = true; 101 tx_desc->dma_addr = dma_addr; 102 tx_desc->tx_sg[0].addr = tx_desc->dma_addr; 103 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 104 tx_desc->tx_sg[0].lkey = device->mr->lkey; 105 106 out: 107 108 return (ret); 109 } 110 111 int 112 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 113 const void *addr, size_t len, int flags) 114 { 115 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 116 117 switch (request->ip_bhs->bhs_opcode & ISCSI_OPCODE_MASK) { 118 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 119 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 120 ISER_DBG("copy to login buff"); 121 memcpy(iser_conn->login_req_buf, addr, len); 122 request->ip_data_len = len; 123 break; 124 } 125 126 return (0); 127 } 128 129 void 130 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 131 size_t off, void *addr, size_t len) 132 { 133 /* If we have a receive data, copy it to upper layer buffer */ 134 if (ip->ip_data_mbuf) 135 memcpy(addr, ip->ip_data_mbuf + off, len); 136 } 137 138 /* 139 * Allocate icl_pdu with empty BHS to fill up by the caller. 140 */ 141 struct icl_pdu * 142 iser_new_pdu(struct icl_conn *ic, int flags) 143 { 144 struct icl_iser_pdu *iser_pdu; 145 struct icl_pdu *ip; 146 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 147 148 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 149 if (iser_pdu == NULL) { 150 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); 151 return (NULL); 152 } 153 154 iser_pdu->iser_conn = iser_conn; 155 ip = &iser_pdu->icl_pdu; 156 ip->ip_conn = ic; 157 ip->ip_bhs = &iser_pdu->desc.iscsi_header; 158 159 return (ip); 160 } 161 162 struct icl_pdu * 163 iser_conn_new_pdu(struct icl_conn *ic, int flags) 164 { 165 return (iser_new_pdu(ic, flags)); 166 } 167 168 void 169 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 170 { 171 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 172 173 uma_zfree(icl_pdu_zone, iser_pdu); 174 } 175 176 size_t 177 iser_conn_pdu_data_segment_length(struct icl_conn *ic, 178 const struct icl_pdu *request) 179 { 180 uint32_t len = 0; 181 182 len += request->ip_bhs->bhs_data_segment_len[0]; 183 len <<= 8; 184 len += request->ip_bhs->bhs_data_segment_len[1]; 185 len <<= 8; 186 len += request->ip_bhs->bhs_data_segment_len[2]; 187 188 return (len); 189 } 190 191 void 192 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 193 { 194 iser_pdu_free(ic, ip); 195 } 196 197 static bool 198 is_control_opcode(uint8_t opcode) 199 { 200 bool is_control = false; 201 202 switch (opcode & ISCSI_OPCODE_MASK) { 203 case ISCSI_BHS_OPCODE_NOP_OUT: 204 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 205 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: 206 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 207 is_control = true; 208 break; 209 case ISCSI_BHS_OPCODE_SCSI_COMMAND: 210 is_control = false; 211 break; 212 default: 213 ISER_ERR("unknown opcode %d", opcode); 214 } 215 216 return (is_control); 217 } 218 219 void 220 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 221 { 222 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 223 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 224 int ret; 225 226 if (iser_conn->state != ISER_CONN_UP) 227 return; 228 229 ret = iser_initialize_headers(iser_pdu, iser_conn); 230 if (ret) { 231 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); 232 return; 233 } 234 235 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { 236 ret = iser_send_control(iser_conn, iser_pdu); 237 if (unlikely(ret)) 238 ISER_ERR("Failed to send control pdu %p", iser_pdu); 239 } else { 240 ret = iser_send_command(iser_conn, iser_pdu); 241 if (unlikely(ret)) 242 ISER_ERR("Failed to send command pdu %p", iser_pdu); 243 } 244 } 245 246 static struct icl_conn * 247 iser_new_conn(const char *name, struct mtx *lock) 248 { 249 struct iser_conn *iser_conn; 250 struct icl_conn *ic; 251 252 refcount_acquire(&icl_iser_ncons); 253 254 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); 255 if (!iser_conn) { 256 ISER_ERR("failed to allocate iser conn"); 257 refcount_release(&icl_iser_ncons); 258 return (NULL); 259 } 260 261 cv_init(&iser_conn->up_cv, "iser_cv"); 262 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); 263 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF); 264 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); 265 mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF); 266 267 ic = &iser_conn->icl_conn; 268 ic->ic_lock = lock; 269 ic->ic_name = name; 270 ic->ic_offload = strdup("iser", M_TEMP); 271 ic->ic_iser = true; 272 ic->ic_unmapped = true; 273 274 return (ic); 275 } 276 277 void 278 iser_conn_free(struct icl_conn *ic) 279 { 280 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 281 282 iser_conn_release(ic); 283 mtx_destroy(&iser_conn->ib_conn.lock); 284 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); 285 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); 286 sx_destroy(&iser_conn->state_mutex); 287 cv_destroy(&iser_conn->up_cv); 288 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); 289 refcount_release(&icl_iser_ncons); 290 } 291 292 int 293 iser_conn_handoff(struct icl_conn *ic, int fd) 294 { 295 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 296 int error = 0; 297 298 sx_xlock(&iser_conn->state_mutex); 299 if (iser_conn->state != ISER_CONN_UP) { 300 error = EINVAL; 301 ISER_ERR("iser_conn %p state is %d, teardown started\n", 302 iser_conn, iser_conn->state); 303 goto out; 304 } 305 306 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags); 307 if (error) 308 goto out; 309 310 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); 311 if (error) 312 goto post_error; 313 314 iser_conn->handoff_done = true; 315 316 sx_xunlock(&iser_conn->state_mutex); 317 return (error); 318 319 post_error: 320 iser_free_rx_descriptors(iser_conn); 321 out: 322 sx_xunlock(&iser_conn->state_mutex); 323 return (error); 324 325 } 326 327 /** 328 * Frees all conn objects 329 */ 330 static void 331 iser_conn_release(struct icl_conn *ic) 332 { 333 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 334 struct ib_conn *ib_conn = &iser_conn->ib_conn; 335 struct iser_conn *curr, *tmp; 336 337 mtx_lock(&ig.connlist_mutex); 338 /* 339 * Search for iser connection in global list. 340 * It may not be there in case of failure in connection establishment 341 * stage. 342 */ 343 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { 344 if (iser_conn == curr) { 345 ISER_WARN("found iser_conn %p", iser_conn); 346 list_del(&iser_conn->conn_list); 347 } 348 } 349 mtx_unlock(&ig.connlist_mutex); 350 351 /* 352 * In case we reconnecting or removing session, we need to 353 * release IB resources (which is safe to call more than once). 354 */ 355 sx_xlock(&iser_conn->state_mutex); 356 iser_free_ib_conn_res(iser_conn, true); 357 sx_xunlock(&iser_conn->state_mutex); 358 359 if (ib_conn->cma_id != NULL) { 360 rdma_destroy_id(ib_conn->cma_id); 361 ib_conn->cma_id = NULL; 362 } 363 364 } 365 366 void 367 iser_conn_close(struct icl_conn *ic) 368 { 369 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 370 371 ISER_INFO("closing conn %p", iser_conn); 372 373 sx_xlock(&iser_conn->state_mutex); 374 /* 375 * In case iser connection is waiting on conditional variable 376 * (state PENDING) and we try to close it before connection establishment, 377 * we need to signal it to continue releasing connection properly. 378 */ 379 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) 380 cv_signal(&iser_conn->up_cv); 381 sx_xunlock(&iser_conn->state_mutex); 382 383 } 384 385 int 386 iser_conn_connect(struct icl_conn *ic, int domain, int socktype, 387 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 388 { 389 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 390 struct ib_conn *ib_conn = &iser_conn->ib_conn; 391 int err = 0; 392 393 iser_conn_release(ic); 394 395 sx_xlock(&iser_conn->state_mutex); 396 /* the device is known only --after-- address resolution */ 397 ib_conn->device = NULL; 398 iser_conn->handoff_done = false; 399 400 iser_conn->state = ISER_CONN_PENDING; 401 402 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, 403 RDMA_PS_TCP, IB_QPT_RC); 404 if (IS_ERR(ib_conn->cma_id)) { 405 err = -PTR_ERR(ib_conn->cma_id); 406 ISER_ERR("rdma_create_id failed: %d", err); 407 goto id_failure; 408 } 409 410 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); 411 if (err) { 412 ISER_ERR("rdma_resolve_addr failed: %d", err); 413 if (err < 0) 414 err = -err; 415 goto addr_failure; 416 } 417 418 ISER_DBG("before cv_wait: %p", iser_conn); 419 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); 420 ISER_DBG("after cv_wait: %p", iser_conn); 421 422 if (iser_conn->state != ISER_CONN_UP) { 423 err = EIO; 424 goto addr_failure; 425 } 426 427 err = iser_alloc_login_buf(iser_conn); 428 if (err) 429 goto addr_failure; 430 sx_xunlock(&iser_conn->state_mutex); 431 432 mtx_lock(&ig.connlist_mutex); 433 list_add(&iser_conn->conn_list, &ig.connlist); 434 mtx_unlock(&ig.connlist_mutex); 435 436 return (0); 437 438 id_failure: 439 ib_conn->cma_id = NULL; 440 addr_failure: 441 sx_xunlock(&iser_conn->state_mutex); 442 return (err); 443 } 444 445 int 446 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 447 struct ccb_scsiio *csio, 448 uint32_t *task_tagp, void **prvp) 449 { 450 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 451 452 *prvp = ip; 453 iser_pdu->csio = csio; 454 455 return (0); 456 } 457 458 void 459 iser_conn_task_done(struct icl_conn *ic, void *prv) 460 { 461 struct icl_pdu *ip = prv; 462 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 463 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; 464 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 465 466 if (iser_pdu->dir[ISER_DIR_IN]) { 467 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); 468 iser_dma_unmap_task_data(iser_pdu, 469 &iser_pdu->data[ISER_DIR_IN], 470 DMA_FROM_DEVICE); 471 } 472 473 if (iser_pdu->dir[ISER_DIR_OUT]) { 474 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); 475 iser_dma_unmap_task_data(iser_pdu, 476 &iser_pdu->data[ISER_DIR_OUT], 477 DMA_TO_DEVICE); 478 } 479 480 if (likely(tx_desc->mapped)) { 481 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 482 ISER_HEADERS_LEN, DMA_TO_DEVICE); 483 tx_desc->mapped = false; 484 } 485 486 iser_pdu_free(ic, ip); 487 } 488 489 static int 490 iser_limits(struct icl_drv_limits *idl) 491 { 492 493 idl->idl_max_recv_data_segment_length = 128 * 1024; 494 idl->idl_max_send_data_segment_length = 128 * 1024; 495 idl->idl_max_burst_length = 262144; 496 idl->idl_first_burst_length = 65536; 497 498 return (0); 499 } 500 501 static int 502 icl_iser_load(void) 503 { 504 int error; 505 506 ISER_DBG("Starting iSER datamover..."); 507 508 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), 509 NULL, NULL, NULL, NULL, 510 UMA_ALIGN_PTR, 0); 511 /* FIXME: Check rc */ 512 513 refcount_init(&icl_iser_ncons, 0); 514 515 error = icl_register("iser", true, 0, iser_limits, iser_new_conn); 516 KASSERT(error == 0, ("failed to register iser")); 517 518 memset(&ig, 0, sizeof(struct iser_global)); 519 520 /* device init is called only after the first addr resolution */ 521 sx_init(&ig.device_list_mutex, "global_device_lock"); 522 INIT_LIST_HEAD(&ig.device_list); 523 mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF); 524 INIT_LIST_HEAD(&ig.connlist); 525 sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); 526 527 return (error); 528 } 529 530 static int 531 icl_iser_unload(void) 532 { 533 ISER_DBG("Removing iSER datamover..."); 534 535 if (icl_iser_ncons != 0) 536 return (EBUSY); 537 538 sx_destroy(&ig.close_conns_mutex); 539 mtx_destroy(&ig.connlist_mutex); 540 sx_destroy(&ig.device_list_mutex); 541 542 icl_unregister("iser", true); 543 544 uma_zdestroy(icl_pdu_zone); 545 546 return (0); 547 } 548 549 static int 550 icl_iser_modevent(module_t mod, int what, void *arg) 551 { 552 switch (what) { 553 case MOD_LOAD: 554 return (icl_iser_load()); 555 case MOD_UNLOAD: 556 return (icl_iser_unload()); 557 default: 558 return (EINVAL); 559 } 560 } 561 562 moduledata_t icl_iser_data = { 563 .name = "icl_iser", 564 .evhand = icl_iser_modevent, 565 .priv = 0 566 }; 567 568 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 569 MODULE_DEPEND(icl_iser, icl, 1, 1, 1); 570 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); 571 MODULE_VERSION(icl_iser, 1); 572