1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module"); 30 int iser_debug = 0; 31 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN, 32 &iser_debug, 0, "Enable iser debug messages"); 33 34 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend"); 35 static uma_zone_t icl_pdu_zone; 36 37 static volatile u_int icl_iser_ncons; 38 struct iser_global ig; 39 40 static void iser_conn_release(struct icl_conn *ic); 41 42 static icl_conn_new_pdu_t iser_conn_new_pdu; 43 static icl_conn_pdu_free_t iser_conn_pdu_free; 44 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length; 45 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data; 46 static icl_conn_pdu_queue_t iser_conn_pdu_queue; 47 static icl_conn_handoff_t iser_conn_handoff; 48 static icl_conn_free_t iser_conn_free; 49 static icl_conn_close_t iser_conn_close; 50 static icl_conn_connect_t iser_conn_connect; 51 static icl_conn_task_setup_t iser_conn_task_setup; 52 static icl_conn_task_done_t iser_conn_task_done; 53 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data; 54 55 static kobj_method_t icl_iser_methods[] = { 56 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu), 57 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free), 58 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length), 59 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data), 60 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue), 61 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff), 62 KOBJMETHOD(icl_conn_free, iser_conn_free), 63 KOBJMETHOD(icl_conn_close, iser_conn_close), 64 KOBJMETHOD(icl_conn_connect, iser_conn_connect), 65 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup), 66 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done), 67 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data), 68 { 0, 0 } 69 }; 70 71 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn)); 72 73 /** 74 * iser_initialize_headers() - Initialize task headers 75 * @pdu: iser pdu 76 * @iser_conn: iser connection 77 * 78 * Notes: 79 * This routine may race with iser teardown flow for scsi 80 * error handling TMFs. So for TMF we should acquire the 81 * state mutex to avoid dereferencing the IB device which 82 * may have already been terminated (racing teardown sequence). 83 */ 84 int 85 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn) 86 { 87 struct iser_tx_desc *tx_desc = &pdu->desc; 88 struct iser_device *device = iser_conn->ib_conn.device; 89 u64 dma_addr; 90 int ret = 0; 91 92 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc, 93 ISER_HEADERS_LEN, DMA_TO_DEVICE); 94 if (ib_dma_mapping_error(device->ib_device, dma_addr)) { 95 ret = -ENOMEM; 96 goto out; 97 } 98 99 tx_desc->mapped = true; 100 tx_desc->dma_addr = dma_addr; 101 tx_desc->tx_sg[0].addr = tx_desc->dma_addr; 102 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 103 tx_desc->tx_sg[0].lkey = device->mr->lkey; 104 105 out: 106 107 return (ret); 108 } 109 110 int 111 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request, 112 const void *addr, size_t len, int flags) 113 { 114 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 115 116 if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST || 117 request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) { 118 ISER_DBG("copy to login buff"); 119 memcpy(iser_conn->login_req_buf, addr, len); 120 request->ip_data_len = len; 121 } 122 123 return (0); 124 } 125 126 void 127 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 128 size_t off, void *addr, size_t len) 129 { 130 /* If we have a receive data, copy it to upper layer buffer */ 131 if (ip->ip_data_mbuf) 132 memcpy(addr, ip->ip_data_mbuf + off, len); 133 } 134 135 /* 136 * Allocate icl_pdu with empty BHS to fill up by the caller. 137 */ 138 struct icl_pdu * 139 iser_new_pdu(struct icl_conn *ic, int flags) 140 { 141 struct icl_iser_pdu *iser_pdu; 142 struct icl_pdu *ip; 143 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 144 145 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO); 146 if (iser_pdu == NULL) { 147 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu)); 148 return (NULL); 149 } 150 151 iser_pdu->iser_conn = iser_conn; 152 ip = &iser_pdu->icl_pdu; 153 ip->ip_conn = ic; 154 ip->ip_bhs = &iser_pdu->desc.iscsi_header; 155 156 return (ip); 157 } 158 159 struct icl_pdu * 160 iser_conn_new_pdu(struct icl_conn *ic, int flags) 161 { 162 return (iser_new_pdu(ic, flags)); 163 } 164 165 void 166 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 167 { 168 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 169 170 uma_zfree(icl_pdu_zone, iser_pdu); 171 } 172 173 size_t 174 iser_conn_pdu_data_segment_length(struct icl_conn *ic, 175 const struct icl_pdu *request) 176 { 177 uint32_t len = 0; 178 179 len += request->ip_bhs->bhs_data_segment_len[0]; 180 len <<= 8; 181 len += request->ip_bhs->bhs_data_segment_len[1]; 182 len <<= 8; 183 len += request->ip_bhs->bhs_data_segment_len[2]; 184 185 return (len); 186 } 187 188 void 189 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 190 { 191 iser_pdu_free(ic, ip); 192 } 193 194 static bool 195 is_control_opcode(uint8_t opcode) 196 { 197 bool is_control = false; 198 199 switch (opcode & ISCSI_OPCODE_MASK) { 200 case ISCSI_BHS_OPCODE_NOP_OUT: 201 case ISCSI_BHS_OPCODE_LOGIN_REQUEST: 202 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST: 203 case ISCSI_BHS_OPCODE_TEXT_REQUEST: 204 is_control = true; 205 break; 206 case ISCSI_BHS_OPCODE_SCSI_COMMAND: 207 is_control = false; 208 break; 209 default: 210 ISER_ERR("unknown opcode %d", opcode); 211 } 212 213 return (is_control); 214 } 215 216 void 217 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 218 { 219 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 220 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 221 int ret; 222 223 ret = iser_initialize_headers(iser_pdu, iser_conn); 224 if (ret) { 225 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu); 226 return; 227 } 228 229 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) { 230 ret = iser_send_control(iser_conn, iser_pdu); 231 if (unlikely(ret)) 232 ISER_ERR("Failed to send control pdu %p", iser_pdu); 233 } else { 234 ret = iser_send_command(iser_conn, iser_pdu); 235 if (unlikely(ret)) 236 ISER_ERR("Failed to send command pdu %p", iser_pdu); 237 } 238 } 239 240 static struct icl_conn * 241 iser_new_conn(const char *name, struct mtx *lock) 242 { 243 struct iser_conn *iser_conn; 244 struct icl_conn *ic; 245 246 refcount_acquire(&icl_iser_ncons); 247 248 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO); 249 if (!iser_conn) { 250 ISER_ERR("failed to allocate iser conn"); 251 refcount_release(&icl_iser_ncons); 252 return (NULL); 253 } 254 255 cv_init(&iser_conn->up_cv, "iser_cv"); 256 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex"); 257 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "flush_lock", NULL, MTX_DEF); 258 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv"); 259 mtx_init(&iser_conn->ib_conn.lock, "lock", NULL, MTX_DEF); 260 261 ic = &iser_conn->icl_conn; 262 ic->ic_lock = lock; 263 ic->ic_name = name; 264 ic->ic_offload = strdup("iser", M_TEMP); 265 ic->ic_iser = true; 266 ic->ic_unmapped = true; 267 268 return (ic); 269 } 270 271 void 272 iser_conn_free(struct icl_conn *ic) 273 { 274 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 275 276 iser_conn_release(ic); 277 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv); 278 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock); 279 sx_destroy(&iser_conn->state_mutex); 280 cv_destroy(&iser_conn->up_cv); 281 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER); 282 refcount_release(&icl_iser_ncons); 283 } 284 285 int 286 iser_conn_handoff(struct icl_conn *ic, int fd) 287 { 288 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 289 int error = 0; 290 291 sx_xlock(&iser_conn->state_mutex); 292 if (iser_conn->state != ISER_CONN_UP) { 293 error = EINVAL; 294 ISER_ERR("iser_conn %p state is %d, teardown started\n", 295 iser_conn, iser_conn->state); 296 goto out; 297 } 298 299 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags); 300 if (error) 301 goto out; 302 303 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx); 304 if (error) 305 goto post_error; 306 307 iser_conn->handoff_done = true; 308 309 sx_xunlock(&iser_conn->state_mutex); 310 return (error); 311 312 post_error: 313 iser_free_rx_descriptors(iser_conn); 314 out: 315 sx_xunlock(&iser_conn->state_mutex); 316 return (error); 317 318 } 319 320 /** 321 * Frees all conn objects 322 */ 323 static void 324 iser_conn_release(struct icl_conn *ic) 325 { 326 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 327 struct ib_conn *ib_conn = &iser_conn->ib_conn; 328 struct iser_conn *curr, *tmp; 329 330 mtx_lock(&ig.connlist_mutex); 331 /* 332 * Search for iser connection in global list. 333 * It may not be there in case of failure in connection establishment 334 * stage. 335 */ 336 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) { 337 if (iser_conn == curr) { 338 ISER_WARN("found iser_conn %p", iser_conn); 339 list_del(&iser_conn->conn_list); 340 } 341 } 342 mtx_unlock(&ig.connlist_mutex); 343 344 /* 345 * In case we reconnecting or removing session, we need to 346 * release IB resources (which is safe to call more than once). 347 */ 348 sx_xlock(&iser_conn->state_mutex); 349 iser_free_ib_conn_res(iser_conn, true); 350 sx_xunlock(&iser_conn->state_mutex); 351 352 if (ib_conn->cma_id != NULL) { 353 rdma_destroy_id(ib_conn->cma_id); 354 ib_conn->cma_id = NULL; 355 } 356 357 } 358 359 void 360 iser_conn_close(struct icl_conn *ic) 361 { 362 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 363 364 ISER_INFO("closing conn %p", iser_conn); 365 366 sx_xlock(&iser_conn->state_mutex); 367 /* 368 * In case iser connection is waiting on conditional variable 369 * (state PENDING) and we try to close it before connection establishment, 370 * we need to signal it to continue releasing connection properly. 371 */ 372 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING) 373 cv_signal(&iser_conn->up_cv); 374 sx_xunlock(&iser_conn->state_mutex); 375 376 } 377 378 int 379 iser_conn_connect(struct icl_conn *ic, int domain, int socktype, 380 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa) 381 { 382 struct iser_conn *iser_conn = icl_to_iser_conn(ic); 383 struct ib_conn *ib_conn = &iser_conn->ib_conn; 384 int err = 0; 385 386 iser_conn_release(ic); 387 388 sx_xlock(&iser_conn->state_mutex); 389 /* the device is known only --after-- address resolution */ 390 ib_conn->device = NULL; 391 iser_conn->handoff_done = false; 392 393 iser_conn->state = ISER_CONN_PENDING; 394 395 ib_conn->cma_id = rdma_create_id(iser_cma_handler, (void *)iser_conn, 396 RDMA_PS_TCP, IB_QPT_RC); 397 if (IS_ERR(ib_conn->cma_id)) { 398 err = -PTR_ERR(ib_conn->cma_id); 399 ISER_ERR("rdma_create_id failed: %d", err); 400 goto id_failure; 401 } 402 403 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000); 404 if (err) { 405 ISER_ERR("rdma_resolve_addr failed: %d", err); 406 if (err < 0) 407 err = -err; 408 goto addr_failure; 409 } 410 411 ISER_DBG("before cv_wait: %p", iser_conn); 412 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex); 413 ISER_DBG("after cv_wait: %p", iser_conn); 414 415 if (iser_conn->state != ISER_CONN_UP) { 416 err = EIO; 417 goto addr_failure; 418 } 419 420 err = iser_alloc_login_buf(iser_conn); 421 if (err) 422 goto addr_failure; 423 sx_xunlock(&iser_conn->state_mutex); 424 425 mtx_lock(&ig.connlist_mutex); 426 list_add(&iser_conn->conn_list, &ig.connlist); 427 mtx_unlock(&ig.connlist_mutex); 428 429 return (0); 430 431 id_failure: 432 ib_conn->cma_id = NULL; 433 addr_failure: 434 sx_xunlock(&iser_conn->state_mutex); 435 return (err); 436 } 437 438 int 439 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 440 struct ccb_scsiio *csio, 441 uint32_t *task_tagp, void **prvp) 442 { 443 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 444 445 *prvp = ip; 446 iser_pdu->csio = csio; 447 448 return (0); 449 } 450 451 void 452 iser_conn_task_done(struct icl_conn *ic, void *prv) 453 { 454 struct icl_pdu *ip = prv; 455 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip); 456 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device; 457 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 458 459 if (iser_pdu->dir[ISER_DIR_IN]) { 460 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN); 461 iser_dma_unmap_task_data(iser_pdu, 462 &iser_pdu->data[ISER_DIR_IN], 463 DMA_FROM_DEVICE); 464 } 465 466 if (iser_pdu->dir[ISER_DIR_OUT]) { 467 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT); 468 iser_dma_unmap_task_data(iser_pdu, 469 &iser_pdu->data[ISER_DIR_OUT], 470 DMA_TO_DEVICE); 471 } 472 473 if (likely(tx_desc->mapped)) { 474 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 475 ISER_HEADERS_LEN, DMA_TO_DEVICE); 476 tx_desc->mapped = false; 477 } 478 479 iser_pdu_free(ic, ip); 480 } 481 482 static int 483 iser_limits(size_t *limitp) 484 { 485 *limitp = 128 * 1024; 486 487 return (0); 488 } 489 490 static int 491 icl_iser_load(void) 492 { 493 int error; 494 495 ISER_DBG("Starting iSER datamover..."); 496 497 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu), 498 NULL, NULL, NULL, NULL, 499 UMA_ALIGN_PTR, 0); 500 /* FIXME: Check rc */ 501 502 refcount_init(&icl_iser_ncons, 0); 503 504 error = icl_register("iser", true, 0, iser_limits, iser_new_conn); 505 KASSERT(error == 0, ("failed to register iser")); 506 507 memset(&ig, 0, sizeof(struct iser_global)); 508 509 /* device init is called only after the first addr resolution */ 510 sx_init(&ig.device_list_mutex, "global_device_lock"); 511 INIT_LIST_HEAD(&ig.device_list); 512 mtx_init(&ig.connlist_mutex, "global_conn_lock", NULL, MTX_DEF); 513 INIT_LIST_HEAD(&ig.connlist); 514 sx_init(&ig.close_conns_mutex, "global_close_conns_lock"); 515 516 return (error); 517 } 518 519 static int 520 icl_iser_unload(void) 521 { 522 ISER_DBG("Removing iSER datamover..."); 523 524 if (icl_iser_ncons != 0) 525 return (EBUSY); 526 527 sx_destroy(&ig.close_conns_mutex); 528 mtx_destroy(&ig.connlist_mutex); 529 sx_destroy(&ig.device_list_mutex); 530 531 icl_unregister("iser", true); 532 533 uma_zdestroy(icl_pdu_zone); 534 535 return (0); 536 } 537 538 static int 539 icl_iser_modevent(module_t mod, int what, void *arg) 540 { 541 switch (what) { 542 case MOD_LOAD: 543 return (icl_iser_load()); 544 case MOD_UNLOAD: 545 return (icl_iser_unload()); 546 default: 547 return (EINVAL); 548 } 549 } 550 551 moduledata_t icl_iser_data = { 552 .name = "icl_iser", 553 .evhand = icl_iser_modevent, 554 .priv = 0 555 }; 556 557 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 558 MODULE_DEPEND(icl_iser, icl, 1, 1, 1); 559 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1); 560 MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1); 561 MODULE_VERSION(icl_iser, 1); 562