1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 30 "iSER module"); 31 int iser_debug = 0; 32 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, 33 &iser_debug, 0, "Enable iser debug messages"); 34 35 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); 36 static uma_zone_t icl_pdu_zone; 37 38 static volatile u_int icl_iser_ncons; 39 struct iser_global ig; 40 41 static void iser_conn_release(struct icl_conn *ic); 42 43 static icl_conn_new_pdu_t iser_conn_new_pdu; 44 static icl_conn_pdu_free_t iser_conn_pdu_free; 45 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; 46 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; 47 static icl_conn_pdu_queue_t iser_conn_pdu_queue; 48 static icl_conn_handoff_t iser_conn_handoff; 49 static icl_conn_free_t iser_conn_free; 50 static icl_conn_close_t iser_conn_close; 51 static icl_conn_connect_t iser_conn_connect; 52 static icl_conn_task_setup_t iser_conn_task_setup; 53 static icl_conn_task_done_t iser_conn_task_done; 54 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; 55 56 static kobj_method_t icl_iser_methods[] = { 57 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), 58 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), 59 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), 60 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), 61 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), 62 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), 63 KOBJMETHOD(icl_conn_free, iser_conn_free), 64 KOBJMETHOD(icl_conn_close, iser_conn_close), 65 KOBJMETHOD(icl_conn_connect, iser_conn_connect), 66 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), 67 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), 68 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), 69 { 0, 0 } 70 }; 71 72 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); 73 74 /** 75 * iser_initialize_headers() - Initialize task headers 76 * @pdu: iser pdu 77 * @iser_conn: iser connection 78 * 79 * Notes: 80 * This routine may race with iser teardown flow for scsi 81 * error handling TMFs. So for TMF we should acquire the 82 * state mutex to avoid dereferencing the IB device which 83 * may have already been terminated (racing teardown sequence). 84 */ 85 int 86 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) 87 { 88 struct iser_tx_desc *tx_desc = &pdu->desc; 89 struct iser_device *device = iser_conn->ib_conn.device; 90 u64 dma_addr; 91 int ret = 0; 92 93 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, 94 ISER_HEADERS_LEN, DMA_TO_DEVICE); 95 if (ib_dma_mapping_error(device->ib_device, dma_addr)) { 96 ret = -ENOMEM; 97 goto out; 98 } 99 100 tx_desc->mapped = true; 101 tx_desc->dma_addr = dma_addr; 102 tx_desc->tx_sg[0].addr = tx_desc->dma_addr; 103 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 104 tx_desc->tx_sg[0].lkey = device->mr->lkey; 105 106 out: 107 108 return (ret); 109 } 110 111 int 112 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 113 const void *addr, size_t len, int flags) 114 { 115 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 116 117 if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST || 118 request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) { 119 ISER_DBG("copy to login buff"); 120 memcpy(iser_conn->login_req_buf, addr, len); 121 request->ip_data_len = len; 122 } 123 124 return (0); 125 } 126 127 void 128 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 129 size_t off, void *addr, size_t len) 130 { 131 /* If we have a receive data, copy it to upper layer buffer */ 132 if (ip->ip_data_mbuf) 133 memcpy(addr, ip->ip_data_mbuf + off, len); 134 } 135 136 /* 137 * Allocate icl_pdu with empty BHS to fill up by the caller. 138 */ 139 struct icl_pdu * 140 iser_new_pdu(struct icl_conn *ic, int flags) 141 { 142 struct icl_iser_pdu *iser_pdu; 143 struct icl_pdu *ip; 144 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 145 146 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 147 if (iser_pdu == NULL) { 148 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); 149 return (NULL); 150 } 151 152 iser_pdu->iser_conn = iser_conn; 153 ip = &iser_pdu->icl_pdu; 154 ip->ip_conn = ic; 155 ip->ip_bhs = &iser_pdu->desc.iscsi_header; 156 157 return (ip); 158 } 159 160 struct icl_pdu * 161 iser_conn_new_pdu(struct icl_conn *ic, int flags) 162 { 163 return (iser_new_pdu(ic, flags)); 164 } 165 166 void 167 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 168 { 169 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 170 171 uma_zfree(icl_pdu_zone, iser_pdu); 172 } 173 174 size_t 175 iser_conn_pdu_data_segment_length(struct icl_conn *ic, 176 const struct icl_pdu *request) 177 { 178 uint32_t len = 0; 179 180 len += request->ip_bhs->bhs_data_segment_len[0]; 181 len <<= 8; 182 len += request->ip_bhs->bhs_data_segment_len[1]; 183 len <<= 8; 184 len += request->ip_bhs->bhs_data_segment_len[2]; 185 186 return (len); 187 } 188 189 void 190 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 191 { 192 iser_pdu_free(ic, ip); 193 } 194 195 static bool 196 is_control_opcode(uint8_t opcode) 197 { 198 bool is_control = false; 199 200 switch (opcode & ISCSI_OPCODE_MASK) { 201 case ISCSI_BHS_OPCODE_NOP_OUT: 202 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 203 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: 204 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 205 is_control = true; 206 break; 207 case ISCSI_BHS_OPCODE_SCSI_COMMAND: 208 is_control = false; 209 break; 210 default: 211 ISER_ERR("unknown opcode %d", opcode); 212 } 213 214 return (is_control); 215 } 216 217 void 218 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 219 { 220 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 221 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 222 int ret; 223 224 if (iser_conn->state != ISER_CONN_UP) 225 return; 226 227 ret = iser_initialize_headers(iser_pdu, iser_conn); 228 if (ret) { 229 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); 230 return; 231 } 232 233 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { 234 ret = iser_send_control(iser_conn, iser_pdu); 235 if (unlikely(ret)) 236 ISER_ERR("Failed to send control pdu %p", iser_pdu); 237 } else { 238 ret = iser_send_command(iser_conn, iser_pdu); 239 if (unlikely(ret)) 240 ISER_ERR("Failed to send command pdu %p", iser_pdu); 241 } 242 } 243 244 static struct icl_conn * 245 iser_new_conn(const char *name, struct mtx *lock) 246 { 247 struct iser_conn *iser_conn; 248 struct icl_conn *ic; 249 250 refcount_acquire(&icl_iser_ncons); 251 252 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); 253 if (!iser_conn) { 254 ISER_ERR("failed to allocate iser conn"); 255 refcount_release(&icl_iser_ncons); 256 return (NULL); 257 } 258 259 cv_init(&iser_conn->up_cv, "iser_cv"); 260 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); 261 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF); 262 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); 263 mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF); 264 265 ic = &iser_conn->icl_conn; 266 ic->ic_lock = lock; 267 ic->ic_name = name; 268 ic->ic_offload = strdup("iser", M_TEMP); 269 ic->ic_iser = true; 270 ic->ic_unmapped = true; 271 272 return (ic); 273 } 274 275 void 276 iser_conn_free(struct icl_conn *ic) 277 { 278 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 279 280 iser_conn_release(ic); 281 mtx_destroy(&iser_conn->ib_conn.lock); 282 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); 283 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); 284 sx_destroy(&iser_conn->state_mutex); 285 cv_destroy(&iser_conn->up_cv); 286 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); 287 refcount_release(&icl_iser_ncons); 288 } 289 290 int 291 iser_conn_handoff(struct icl_conn *ic, int fd) 292 { 293 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 294 int error = 0; 295 296 sx_xlock(&iser_conn->state_mutex); 297 if (iser_conn->state != ISER_CONN_UP) { 298 error = EINVAL; 299 ISER_ERR("iser_conn %p state is %d, teardown started\n", 300 iser_conn, iser_conn->state); 301 goto out; 302 } 303 304 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags); 305 if (error) 306 goto out; 307 308 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); 309 if (error) 310 goto post_error; 311 312 iser_conn->handoff_done = true; 313 314 sx_xunlock(&iser_conn->state_mutex); 315 return (error); 316 317 post_error: 318 iser_free_rx_descriptors(iser_conn); 319 out: 320 sx_xunlock(&iser_conn->state_mutex); 321 return (error); 322 323 } 324 325 /** 326 * Frees all conn objects 327 */ 328 static void 329 iser_conn_release(struct icl_conn *ic) 330 { 331 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 332 struct ib_conn *ib_conn = &iser_conn->ib_conn; 333 struct iser_conn *curr, *tmp; 334 335 mtx_lock(&ig.connlist_mutex); 336 /* 337 * Search for iser connection in global list. 338 * It may not be there in case of failure in connection establishment 339 * stage. 340 */ 341 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { 342 if (iser_conn == curr) { 343 ISER_WARN("found iser_conn %p", iser_conn); 344 list_del(&iser_conn->conn_list); 345 } 346 } 347 mtx_unlock(&ig.connlist_mutex); 348 349 /* 350 * In case we reconnecting or removing session, we need to 351 * release IB resources (which is safe to call more than once). 352 */ 353 sx_xlock(&iser_conn->state_mutex); 354 iser_free_ib_conn_res(iser_conn, true); 355 sx_xunlock(&iser_conn->state_mutex); 356 357 if (ib_conn->cma_id != NULL) { 358 rdma_destroy_id(ib_conn->cma_id); 359 ib_conn->cma_id = NULL; 360 } 361 362 } 363 364 void 365 iser_conn_close(struct icl_conn *ic) 366 { 367 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 368 369 ISER_INFO("closing conn %p", iser_conn); 370 371 sx_xlock(&iser_conn->state_mutex); 372 /* 373 * In case iser connection is waiting on conditional variable 374 * (state PENDING) and we try to close it before connection establishment, 375 * we need to signal it to continue releasing connection properly. 376 */ 377 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) 378 cv_signal(&iser_conn->up_cv); 379 sx_xunlock(&iser_conn->state_mutex); 380 381 } 382 383 int 384 iser_conn_connect(struct icl_conn *ic, int domain, int socktype, 385 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 386 { 387 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 388 struct ib_conn *ib_conn = &iser_conn->ib_conn; 389 int err = 0; 390 391 iser_conn_release(ic); 392 393 sx_xlock(&iser_conn->state_mutex); 394 /* the device is known only --after-- address resolution */ 395 ib_conn->device = NULL; 396 iser_conn->handoff_done = false; 397 398 iser_conn->state = ISER_CONN_PENDING; 399 400 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, 401 RDMA_PS_TCP, IB_QPT_RC); 402 if (IS_ERR(ib_conn->cma_id)) { 403 err = -PTR_ERR(ib_conn->cma_id); 404 ISER_ERR("rdma_create_id failed: %d", err); 405 goto id_failure; 406 } 407 408 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); 409 if (err) { 410 ISER_ERR("rdma_resolve_addr failed: %d", err); 411 if (err < 0) 412 err = -err; 413 goto addr_failure; 414 } 415 416 ISER_DBG("before cv_wait: %p", iser_conn); 417 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); 418 ISER_DBG("after cv_wait: %p", iser_conn); 419 420 if (iser_conn->state != ISER_CONN_UP) { 421 err = EIO; 422 goto addr_failure; 423 } 424 425 err = iser_alloc_login_buf(iser_conn); 426 if (err) 427 goto addr_failure; 428 sx_xunlock(&iser_conn->state_mutex); 429 430 mtx_lock(&ig.connlist_mutex); 431 list_add(&iser_conn->conn_list, &ig.connlist); 432 mtx_unlock(&ig.connlist_mutex); 433 434 return (0); 435 436 id_failure: 437 ib_conn->cma_id = NULL; 438 addr_failure: 439 sx_xunlock(&iser_conn->state_mutex); 440 return (err); 441 } 442 443 int 444 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 445 struct ccb_scsiio *csio, 446 uint32_t *task_tagp, void **prvp) 447 { 448 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 449 450 *prvp = ip; 451 iser_pdu->csio = csio; 452 453 return (0); 454 } 455 456 void 457 iser_conn_task_done(struct icl_conn *ic, void *prv) 458 { 459 struct icl_pdu *ip = prv; 460 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 461 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; 462 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 463 464 if (iser_pdu->dir[ISER_DIR_IN]) { 465 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); 466 iser_dma_unmap_task_data(iser_pdu, 467 &iser_pdu->data[ISER_DIR_IN], 468 DMA_FROM_DEVICE); 469 } 470 471 if (iser_pdu->dir[ISER_DIR_OUT]) { 472 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); 473 iser_dma_unmap_task_data(iser_pdu, 474 &iser_pdu->data[ISER_DIR_OUT], 475 DMA_TO_DEVICE); 476 } 477 478 if (likely(tx_desc->mapped)) { 479 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 480 ISER_HEADERS_LEN, DMA_TO_DEVICE); 481 tx_desc->mapped = false; 482 } 483 484 iser_pdu_free(ic, ip); 485 } 486 487 static int 488 iser_limits(struct icl_drv_limits *idl) 489 { 490 491 idl->idl_max_recv_data_segment_length = 128 * 1024; 492 idl->idl_max_send_data_segment_length = 128 * 1024; 493 idl->idl_max_burst_length = 262144; 494 idl->idl_first_burst_length = 65536; 495 496 return (0); 497 } 498 499 static int 500 icl_iser_load(void) 501 { 502 int error; 503 504 ISER_DBG("Starting iSER datamover..."); 505 506 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), 507 NULL, NULL, NULL, NULL, 508 UMA_ALIGN_PTR, 0); 509 /* FIXME: Check rc */ 510 511 refcount_init(&icl_iser_ncons, 0); 512 513 error = icl_register("iser", true, 0, iser_limits, iser_new_conn); 514 KASSERT(error == 0, ("failed to register iser")); 515 516 memset(&ig, 0, sizeof(struct iser_global)); 517 518 /* device init is called only after the first addr resolution */ 519 sx_init(&ig.device_list_mutex, "global_device_lock"); 520 INIT_LIST_HEAD(&ig.device_list); 521 mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF); 522 INIT_LIST_HEAD(&ig.connlist); 523 sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); 524 525 return (error); 526 } 527 528 static int 529 icl_iser_unload(void) 530 { 531 ISER_DBG("Removing iSER datamover..."); 532 533 if (icl_iser_ncons != 0) 534 return (EBUSY); 535 536 sx_destroy(&ig.close_conns_mutex); 537 mtx_destroy(&ig.connlist_mutex); 538 sx_destroy(&ig.device_list_mutex); 539 540 icl_unregister("iser", true); 541 542 uma_zdestroy(icl_pdu_zone); 543 544 return (0); 545 } 546 547 static int 548 icl_iser_modevent(module_t mod, int what, void *arg) 549 { 550 switch (what) { 551 case MOD_LOAD: 552 return (icl_iser_load()); 553 case MOD_UNLOAD: 554 return (icl_iser_unload()); 555 default: 556 return (EINVAL); 557 } 558 } 559 560 moduledata_t icl_iser_data = { 561 .name = "icl_iser", 562 .evhand = icl_iser_modevent, 563 .priv = 0 564 }; 565 566 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 567 MODULE_DEPEND(icl_iser, icl, 1, 1, 1); 568 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); 569 MODULE_VERSION(icl_iser, 1); 570