1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module"); 30 int iser_debug = 0; 31 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, 32 &iser_debug, 0, "Enable iser debug messages"); 33 34 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); 35 static uma_zone_t icl_pdu_zone; 36 37 static volatile u_int icl_iser_ncons; 38 struct iser_global ig; 39 40 static void iser_conn_release(struct icl_conn *ic); 41 42 static icl_conn_new_pdu_t iser_conn_new_pdu; 43 static icl_conn_pdu_free_t iser_conn_pdu_free; 44 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; 45 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; 46 static icl_conn_pdu_queue_t iser_conn_pdu_queue; 47 static icl_conn_handoff_t iser_conn_handoff; 48 static icl_conn_free_t iser_conn_free; 49 static icl_conn_close_t iser_conn_close; 50 static icl_conn_connect_t iser_conn_connect; 51 static icl_conn_task_setup_t iser_conn_task_setup; 52 static icl_conn_task_done_t iser_conn_task_done; 53 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; 54 55 static kobj_method_t icl_iser_methods[] = { 56 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), 57 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), 58 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), 59 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), 60 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), 61 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), 62 KOBJMETHOD(icl_conn_free, iser_conn_free), 63 KOBJMETHOD(icl_conn_close, iser_conn_close), 64 KOBJMETHOD(icl_conn_connect, iser_conn_connect), 65 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), 66 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), 67 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), 68 { 0, 0 } 69 }; 70 71 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); 72 73 /** 74 * iser_initialize_headers() - Initialize task headers 75 * @pdu: iser pdu 76 * @iser_conn: iser connection 77 * 78 * Notes: 79 * This routine may race with iser teardown flow for scsi 80 * error handling TMFs. So for TMF we should acquire the 81 * state mutex to avoid dereferencing the IB device which 82 * may have already been terminated (racing teardown sequence). 83 */ 84 int 85 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) 86 { 87 struct iser_tx_desc *tx_desc = &pdu->desc; 88 struct iser_device *device = iser_conn->ib_conn.device; 89 u64 dma_addr; 90 int ret = 0; 91 92 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, 93 ISER_HEADERS_LEN, DMA_TO_DEVICE); 94 if (ib_dma_mapping_error(device->ib_device, dma_addr)) { 95 ret = -ENOMEM; 96 goto out; 97 } 98 99 tx_desc->mapped = true; 100 tx_desc->dma_addr = dma_addr; 101 tx_desc->tx_sg[0].addr = tx_desc->dma_addr; 102 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 103 tx_desc->tx_sg[0].lkey = device->mr->lkey; 104 105 out: 106 107 return (ret); 108 } 109 110 int 111 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 112 const void *addr, size_t len, int flags) 113 { 114 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 115 116 if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST || 117 request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) { 118 ISER_DBG("copy to login buff"); 119 memcpy(iser_conn->login_req_buf, addr, len); 120 request->ip_data_len = len; 121 } 122 123 return (0); 124 } 125 126 void 127 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 128 size_t off, void *addr, size_t len) 129 { 130 /* If we have a receive data, copy it to upper layer buffer */ 131 if (ip->ip_data_mbuf) 132 memcpy(addr, ip->ip_data_mbuf + off, len); 133 } 134 135 /* 136 * Allocate icl_pdu with empty BHS to fill up by the caller. 137 */ 138 struct icl_pdu * 139 iser_new_pdu(struct icl_conn *ic, int flags) 140 { 141 struct icl_iser_pdu *iser_pdu; 142 struct icl_pdu *ip; 143 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 144 145 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 146 if (iser_pdu == NULL) { 147 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); 148 return (NULL); 149 } 150 151 iser_pdu->iser_conn = iser_conn; 152 ip = &iser_pdu->icl_pdu; 153 ip->ip_conn = ic; 154 ip->ip_bhs = &iser_pdu->desc.iscsi_header; 155 156 return (ip); 157 } 158 159 struct icl_pdu * 160 iser_conn_new_pdu(struct icl_conn *ic, int flags) 161 { 162 return (iser_new_pdu(ic, flags)); 163 } 164 165 void 166 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 167 { 168 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 169 170 uma_zfree(icl_pdu_zone, iser_pdu); 171 } 172 173 size_t 174 iser_conn_pdu_data_segment_length(struct icl_conn *ic, 175 const struct icl_pdu *request) 176 { 177 uint32_t len = 0; 178 179 len += request->ip_bhs->bhs_data_segment_len[0]; 180 len <<= 8; 181 len += request->ip_bhs->bhs_data_segment_len[1]; 182 len <<= 8; 183 len += request->ip_bhs->bhs_data_segment_len[2]; 184 185 return (len); 186 } 187 188 void 189 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 190 { 191 iser_pdu_free(ic, ip); 192 } 193 194 static bool 195 is_control_opcode(uint8_t opcode) 196 { 197 bool is_control = false; 198 199 switch (opcode & ISCSI_OPCODE_MASK) { 200 case ISCSI_BHS_OPCODE_NOP_OUT: 201 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 202 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: 203 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 204 is_control = true; 205 break; 206 case ISCSI_BHS_OPCODE_SCSI_COMMAND: 207 is_control = false; 208 break; 209 default: 210 ISER_ERR("unknown opcode %d", opcode); 211 } 212 213 return (is_control); 214 } 215 216 void 217 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 218 { 219 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 220 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 221 int ret; 222 223 if (iser_conn->state != ISER_CONN_UP) 224 return; 225 226 ret = iser_initialize_headers(iser_pdu, iser_conn); 227 if (ret) { 228 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); 229 return; 230 } 231 232 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { 233 ret = iser_send_control(iser_conn, iser_pdu); 234 if (unlikely(ret)) 235 ISER_ERR("Failed to send control pdu %p", iser_pdu); 236 } else { 237 ret = iser_send_command(iser_conn, iser_pdu); 238 if (unlikely(ret)) 239 ISER_ERR("Failed to send command pdu %p", iser_pdu); 240 } 241 } 242 243 static struct icl_conn * 244 iser_new_conn(const char *name, struct mtx *lock) 245 { 246 struct iser_conn *iser_conn; 247 struct icl_conn *ic; 248 249 refcount_acquire(&icl_iser_ncons); 250 251 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); 252 if (!iser_conn) { 253 ISER_ERR("failed to allocate iser conn"); 254 refcount_release(&icl_iser_ncons); 255 return (NULL); 256 } 257 258 cv_init(&iser_conn->up_cv, "iser_cv"); 259 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); 260 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "flush_lock", NULL, MTX_DEF); 261 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); 262 mtx_init(&iser_conn->ib_conn.lock, "lock", NULL, MTX_DEF); 263 264 ic = &iser_conn->icl_conn; 265 ic->ic_lock = lock; 266 ic->ic_name = name; 267 ic->ic_offload = strdup("iser", M_TEMP); 268 ic->ic_iser = true; 269 ic->ic_unmapped = true; 270 271 return (ic); 272 } 273 274 void 275 iser_conn_free(struct icl_conn *ic) 276 { 277 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 278 279 iser_conn_release(ic); 280 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); 281 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); 282 sx_destroy(&iser_conn->state_mutex); 283 cv_destroy(&iser_conn->up_cv); 284 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); 285 refcount_release(&icl_iser_ncons); 286 } 287 288 int 289 iser_conn_handoff(struct icl_conn *ic, int fd) 290 { 291 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 292 int error = 0; 293 294 sx_xlock(&iser_conn->state_mutex); 295 if (iser_conn->state != ISER_CONN_UP) { 296 error = EINVAL; 297 ISER_ERR("iser_conn %p state is %d, teardown started\n", 298 iser_conn, iser_conn->state); 299 goto out; 300 } 301 302 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags); 303 if (error) 304 goto out; 305 306 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); 307 if (error) 308 goto post_error; 309 310 iser_conn->handoff_done = true; 311 312 sx_xunlock(&iser_conn->state_mutex); 313 return (error); 314 315 post_error: 316 iser_free_rx_descriptors(iser_conn); 317 out: 318 sx_xunlock(&iser_conn->state_mutex); 319 return (error); 320 321 } 322 323 /** 324 * Frees all conn objects 325 */ 326 static void 327 iser_conn_release(struct icl_conn *ic) 328 { 329 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 330 struct ib_conn *ib_conn = &iser_conn->ib_conn; 331 struct iser_conn *curr, *tmp; 332 333 mtx_lock(&ig.connlist_mutex); 334 /* 335 * Search for iser connection in global list. 336 * It may not be there in case of failure in connection establishment 337 * stage. 338 */ 339 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { 340 if (iser_conn == curr) { 341 ISER_WARN("found iser_conn %p", iser_conn); 342 list_del(&iser_conn->conn_list); 343 } 344 } 345 mtx_unlock(&ig.connlist_mutex); 346 347 /* 348 * In case we reconnecting or removing session, we need to 349 * release IB resources (which is safe to call more than once). 350 */ 351 sx_xlock(&iser_conn->state_mutex); 352 iser_free_ib_conn_res(iser_conn, true); 353 sx_xunlock(&iser_conn->state_mutex); 354 355 if (ib_conn->cma_id != NULL) { 356 rdma_destroy_id(ib_conn->cma_id); 357 ib_conn->cma_id = NULL; 358 } 359 360 } 361 362 void 363 iser_conn_close(struct icl_conn *ic) 364 { 365 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 366 367 ISER_INFO("closing conn %p", iser_conn); 368 369 sx_xlock(&iser_conn->state_mutex); 370 /* 371 * In case iser connection is waiting on conditional variable 372 * (state PENDING) and we try to close it before connection establishment, 373 * we need to signal it to continue releasing connection properly. 374 */ 375 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) 376 cv_signal(&iser_conn->up_cv); 377 sx_xunlock(&iser_conn->state_mutex); 378 379 } 380 381 int 382 iser_conn_connect(struct icl_conn *ic, int domain, int socktype, 383 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 384 { 385 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 386 struct ib_conn *ib_conn = &iser_conn->ib_conn; 387 int err = 0; 388 389 iser_conn_release(ic); 390 391 sx_xlock(&iser_conn->state_mutex); 392 /* the device is known only --after-- address resolution */ 393 ib_conn->device = NULL; 394 iser_conn->handoff_done = false; 395 396 iser_conn->state = ISER_CONN_PENDING; 397 398 ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)iser_conn, 399 RDMA_PS_TCP, IB_QPT_RC); 400 if (IS_ERR(ib_conn->cma_id)) { 401 err = -PTR_ERR(ib_conn->cma_id); 402 ISER_ERR("rdma_create_id failed: %d", err); 403 goto id_failure; 404 } 405 406 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); 407 if (err) { 408 ISER_ERR("rdma_resolve_addr failed: %d", err); 409 if (err < 0) 410 err = -err; 411 goto addr_failure; 412 } 413 414 ISER_DBG("before cv_wait: %p", iser_conn); 415 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); 416 ISER_DBG("after cv_wait: %p", iser_conn); 417 418 if (iser_conn->state != ISER_CONN_UP) { 419 err = EIO; 420 goto addr_failure; 421 } 422 423 err = iser_alloc_login_buf(iser_conn); 424 if (err) 425 goto addr_failure; 426 sx_xunlock(&iser_conn->state_mutex); 427 428 mtx_lock(&ig.connlist_mutex); 429 list_add(&iser_conn->conn_list, &ig.connlist); 430 mtx_unlock(&ig.connlist_mutex); 431 432 return (0); 433 434 id_failure: 435 ib_conn->cma_id = NULL; 436 addr_failure: 437 sx_xunlock(&iser_conn->state_mutex); 438 return (err); 439 } 440 441 int 442 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 443 struct ccb_scsiio *csio, 444 uint32_t *task_tagp, void **prvp) 445 { 446 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 447 448 *prvp = ip; 449 iser_pdu->csio = csio; 450 451 return (0); 452 } 453 454 void 455 iser_conn_task_done(struct icl_conn *ic, void *prv) 456 { 457 struct icl_pdu *ip = prv; 458 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 459 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; 460 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 461 462 if (iser_pdu->dir[ISER_DIR_IN]) { 463 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); 464 iser_dma_unmap_task_data(iser_pdu, 465 &iser_pdu->data[ISER_DIR_IN], 466 DMA_FROM_DEVICE); 467 } 468 469 if (iser_pdu->dir[ISER_DIR_OUT]) { 470 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); 471 iser_dma_unmap_task_data(iser_pdu, 472 &iser_pdu->data[ISER_DIR_OUT], 473 DMA_TO_DEVICE); 474 } 475 476 if (likely(tx_desc->mapped)) { 477 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 478 ISER_HEADERS_LEN, DMA_TO_DEVICE); 479 tx_desc->mapped = false; 480 } 481 482 iser_pdu_free(ic, ip); 483 } 484 485 static int 486 iser_limits(struct icl_drv_limits *idl) 487 { 488 idl->idl_max_recv_data_segment_length = 128 * 1024; 489 490 return (0); 491 } 492 493 static int 494 icl_iser_load(void) 495 { 496 int error; 497 498 ISER_DBG("Starting iSER datamover..."); 499 500 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), 501 NULL, NULL, NULL, NULL, 502 UMA_ALIGN_PTR, 0); 503 /* FIXME: Check rc */ 504 505 refcount_init(&icl_iser_ncons, 0); 506 507 error = icl_register("iser", true, 0, iser_limits, iser_new_conn); 508 KASSERT(error == 0, ("failed to register iser")); 509 510 memset(&ig, 0, sizeof(struct iser_global)); 511 512 /* device init is called only after the first addr resolution */ 513 sx_init(&ig.device_list_mutex, "global_device_lock"); 514 INIT_LIST_HEAD(&ig.device_list); 515 mtx_init(&ig.connlist_mutex, "global_conn_lock", NULL, MTX_DEF); 516 INIT_LIST_HEAD(&ig.connlist); 517 sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); 518 519 return (error); 520 } 521 522 static int 523 icl_iser_unload(void) 524 { 525 ISER_DBG("Removing iSER datamover..."); 526 527 if (icl_iser_ncons != 0) 528 return (EBUSY); 529 530 sx_destroy(&ig.close_conns_mutex); 531 mtx_destroy(&ig.connlist_mutex); 532 sx_destroy(&ig.device_list_mutex); 533 534 icl_unregister("iser", true); 535 536 uma_zdestroy(icl_pdu_zone); 537 538 return (0); 539 } 540 541 static int 542 icl_iser_modevent(module_t mod, int what, void *arg) 543 { 544 switch (what) { 545 case MOD_LOAD: 546 return (icl_iser_load()); 547 case MOD_UNLOAD: 548 return (icl_iser_unload()); 549 default: 550 return (EINVAL); 551 } 552 } 553 554 moduledata_t icl_iser_data = { 555 .name = "icl_iser", 556 .evhand = icl_iser_modevent, 557 .priv = 0 558 }; 559 560 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 561 MODULE_DEPEND(icl_iser, icl, 1, 1, 1); 562 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); 563 MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1); 564 MODULE_VERSION(icl_iser, 1); 565