1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/cpuvar.h> 26 #include <sys/conf.h> 27 #include <sys/file.h> 28 #include <sys/ddi.h> 29 #include <sys/sunddi.h> 30 #include <sys/modctl.h> 31 32 #include <sys/socket.h> 33 #include <sys/strsubr.h> 34 #include <sys/sysmacros.h> 35 36 #include <sys/socketvar.h> 37 #include <netinet/in.h> 38 39 #include <sys/idm/idm.h> 40 #include <sys/idm/idm_so.h> 41 42 #define IDM_NAME_VERSION "iSCSI Data Mover" 43 44 extern struct mod_ops mod_miscops; 45 extern struct mod_ops mod_miscops; 46 47 static struct modlmisc modlmisc = { 48 &mod_miscops, /* Type of module */ 49 IDM_NAME_VERSION 50 }; 51 52 static struct modlinkage modlinkage = { 53 MODREV_1, (void *)&modlmisc, NULL 54 }; 55 56 extern void idm_wd_thread(void *arg); 57 58 static int _idm_init(void); 59 static int _idm_fini(void); 60 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf); 61 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf); 62 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf); 63 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf); 64 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, 65 idm_abort_type_t abort_type); 66 static void idm_task_aborted(idm_task_t *idt, idm_status_t status); 67 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, 68 int sleepflag); 69 70 boolean_t idm_conn_logging = 0; 71 boolean_t idm_svc_logging = 0; 72 #ifdef DEBUG 73 boolean_t idm_pattern_checking = 1; 74 #else 75 boolean_t idm_pattern_checking = 0; 76 #endif 77 78 /* 79 * Potential tuneable for the maximum number of tasks. Default to 80 * IDM_TASKIDS_MAX 81 */ 82 83 uint32_t idm_max_taskids = IDM_TASKIDS_MAX; 84 85 /* 86 * Global list of transport handles 87 * These are listed in preferential order, so we can simply take the 88 * first "it_conn_is_capable" hit. Note also that the order maps to 89 * the order of the idm_transport_type_t list. 90 */ 91 idm_transport_t idm_transport_list[] = { 92 93 /* iSER on InfiniBand transport handle */ 94 {IDM_TRANSPORT_TYPE_ISER, /* type */ 95 "/devices/ib/iser@0:iser", /* device path */ 96 NULL, /* LDI handle */ 97 NULL, /* transport ops */ 98 NULL}, /* transport caps */ 99 100 /* IDM native sockets transport handle */ 101 {IDM_TRANSPORT_TYPE_SOCKETS, /* type */ 102 NULL, /* device path */ 103 NULL, /* LDI handle */ 104 NULL, /* transport ops */ 105 NULL} /* transport caps */ 106 107 }; 108 109 int 110 _init(void) 111 { 112 int rc; 113 114 if ((rc = _idm_init()) != 0) { 115 return (rc); 116 } 117 118 return (mod_install(&modlinkage)); 119 } 120 121 int 122 _fini(void) 123 { 124 int rc; 125 126 if ((rc = _idm_fini()) != 0) { 127 return (rc); 128 } 129 130 if ((rc = mod_remove(&modlinkage)) != 0) { 131 return (rc); 132 } 133 134 return (rc); 135 } 136 137 int 138 _info(struct modinfo *modinfop) 139 { 140 return (mod_info(&modlinkage, modinfop)); 141 } 142 143 /* 144 * idm_transport_register() 145 * 146 * Provides a mechanism for an IDM transport driver to register its 147 * transport ops and caps with the IDM kernel module. Invoked during 148 * a transport driver's attach routine. 149 */ 150 idm_status_t 151 idm_transport_register(idm_transport_attr_t *attr) 152 { 153 ASSERT(attr->it_ops != NULL); 154 ASSERT(attr->it_caps != NULL); 155 156 switch (attr->type) { 157 /* All known non-native transports here; for now, iSER */ 158 case IDM_TRANSPORT_TYPE_ISER: 159 idm_transport_list[attr->type].it_ops = attr->it_ops; 160 idm_transport_list[attr->type].it_caps = attr->it_caps; 161 return (IDM_STATUS_SUCCESS); 162 163 default: 164 cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in " 165 "idm_transport_register", attr->type); 166 return (IDM_STATUS_SUCCESS); 167 } 168 } 169 170 /* 171 * idm_ini_conn_create 172 * 173 * This function is invoked by the iSCSI layer to create a connection context. 174 * This does not actually establish the socket connection. 175 * 176 * cr - Connection request parameters 177 * new_con - Output parameter that contains the new request if successful 178 * 179 */ 180 idm_status_t 181 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con) 182 { 183 idm_transport_t *it; 184 idm_conn_t *ic; 185 int rc; 186 187 it = idm_transport_lookup(cr); 188 189 retry: 190 ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type, 191 &cr->icr_conn_ops); 192 193 bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr, 194 sizeof (cr->cr_ini_dst_addr)); 195 196 /* create the transport-specific connection components */ 197 rc = it->it_ops->it_ini_conn_create(cr, ic); 198 if (rc != IDM_STATUS_SUCCESS) { 199 /* cleanup the failed connection */ 200 idm_conn_destroy_common(ic); 201 202 /* 203 * It is possible for an IB client to connect to 204 * an ethernet-only client via an IB-eth gateway. 205 * Therefore, if we are attempting to use iSER and 206 * fail, retry with sockets before ultimately 207 * failing the connection. 208 */ 209 if (it->it_type == IDM_TRANSPORT_TYPE_ISER) { 210 it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]; 211 goto retry; 212 } 213 214 return (IDM_STATUS_FAIL); 215 } 216 217 *new_con = ic; 218 219 mutex_enter(&idm.idm_global_mutex); 220 list_insert_tail(&idm.idm_ini_conn_list, ic); 221 mutex_exit(&idm.idm_global_mutex); 222 223 return (IDM_STATUS_SUCCESS); 224 } 225 226 /* 227 * idm_ini_conn_destroy 228 * 229 * Releases any resources associated with the connection. This is the 230 * complement to idm_ini_conn_create. 231 * ic - idm_conn_t structure representing the relevant connection 232 * 233 */ 234 void 235 idm_ini_conn_destroy_task(void *ic_void) 236 { 237 idm_conn_t *ic = ic_void; 238 239 ic->ic_transport_ops->it_ini_conn_destroy(ic); 240 idm_conn_destroy_common(ic); 241 } 242 243 void 244 idm_ini_conn_destroy(idm_conn_t *ic) 245 { 246 /* 247 * It's reasonable for the initiator to call idm_ini_conn_destroy 248 * from within the context of the CN_CONNECT_DESTROY notification. 249 * That's a problem since we want to destroy the taskq for the 250 * state machine associated with the connection. Remove the 251 * connection from the list right away then handle the remaining 252 * work via the idm_global_taskq. 253 */ 254 mutex_enter(&idm.idm_global_mutex); 255 list_remove(&idm.idm_ini_conn_list, ic); 256 mutex_exit(&idm.idm_global_mutex); 257 258 if (taskq_dispatch(idm.idm_global_taskq, 259 &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == TASKQID_INVALID) { 260 cmn_err(CE_WARN, 261 "idm_ini_conn_destroy: Couldn't dispatch task"); 262 } 263 } 264 265 /* 266 * idm_ini_conn_connect 267 * 268 * Establish connection to the remote system identified in idm_conn_t. 269 * The connection parameters including the remote IP address were established 270 * in the call to idm_ini_conn_create. The IDM state machine will 271 * perform client notifications as necessary to prompt the initiator through 272 * the login process. IDM also keeps a timer running so that if the login 273 * process doesn't complete in a timely manner it will fail. 274 * 275 * ic - idm_conn_t structure representing the relevant connection 276 * 277 * Returns success if the connection was established, otherwise some kind 278 * of meaningful error code. 279 * 280 * Upon return the login has either failed or is loggin in (ffp) 281 */ 282 idm_status_t 283 idm_ini_conn_connect(idm_conn_t *ic) 284 { 285 idm_status_t rc; 286 287 rc = idm_conn_sm_init(ic); 288 if (rc != IDM_STATUS_SUCCESS) { 289 return (ic->ic_conn_sm_status); 290 } 291 292 /* Hold connection until we return */ 293 idm_conn_hold(ic); 294 295 /* Kick state machine */ 296 idm_conn_event(ic, CE_CONNECT_REQ, (uintptr_t)NULL); 297 298 /* Wait for login flag */ 299 mutex_enter(&ic->ic_state_mutex); 300 while (!(ic->ic_state_flags & CF_LOGIN_READY) && 301 !(ic->ic_state_flags & CF_ERROR)) { 302 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); 303 } 304 305 /* 306 * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to 307 * idm_notify_client has already been generated by the idm conn 308 * state machine. If connection fails any time after this 309 * check, we will detect it in iscsi_login. 310 */ 311 if (ic->ic_state_flags & CF_ERROR) { 312 rc = ic->ic_conn_sm_status; 313 } 314 mutex_exit(&ic->ic_state_mutex); 315 idm_conn_rele(ic); 316 317 return (rc); 318 } 319 320 /* 321 * idm_ini_conn_disconnect 322 * 323 * Forces a connection (previously established using idm_ini_conn_connect) 324 * to perform a controlled shutdown, cleaning up any outstanding requests. 325 * 326 * ic - idm_conn_t structure representing the relevant connection 327 * 328 * This is asynchronous and will return before the connection is properly 329 * shutdown 330 */ 331 /* ARGSUSED */ 332 void 333 idm_ini_conn_disconnect(idm_conn_t *ic) 334 { 335 idm_conn_event(ic, CE_TRANSPORT_FAIL, (uintptr_t)NULL); 336 } 337 338 /* 339 * idm_ini_conn_disconnect_wait 340 * 341 * Forces a connection (previously established using idm_ini_conn_connect) 342 * to perform a controlled shutdown. Blocks until the connection is 343 * disconnected. 344 * 345 * ic - idm_conn_t structure representing the relevant connection 346 */ 347 /* ARGSUSED */ 348 void 349 idm_ini_conn_disconnect_sync(idm_conn_t *ic) 350 { 351 mutex_enter(&ic->ic_state_mutex); 352 if ((ic->ic_state != CS_S9_INIT_ERROR) && 353 (ic->ic_state != CS_S11_COMPLETE)) { 354 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, (uintptr_t)NULL, 355 CT_NONE); 356 while ((ic->ic_state != CS_S9_INIT_ERROR) && 357 (ic->ic_state != CS_S11_COMPLETE)) 358 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); 359 } 360 mutex_exit(&ic->ic_state_mutex); 361 } 362 363 /* 364 * idm_tgt_svc_create 365 * 366 * The target calls this service to obtain a service context for each available 367 * transport, starting a service of each type related to the IP address and port 368 * passed. The idm_svc_req_t contains the service parameters. 369 */ 370 idm_status_t 371 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc) 372 { 373 idm_transport_type_t type; 374 idm_transport_t *it; 375 idm_svc_t *is; 376 int rc; 377 378 *new_svc = NULL; 379 is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP); 380 381 /* Initialize transport-agnostic components of the service handle */ 382 is->is_svc_req = *sr; 383 mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL); 384 cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL); 385 mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL); 386 cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL); 387 idm_refcnt_init(&is->is_refcnt, is); 388 389 /* 390 * Make sure all available transports are setup. We call this now 391 * instead of at initialization time in case IB has become available 392 * since we started (hotplug, etc). 393 */ 394 idm_transport_setup(sr->sr_li, B_FALSE); 395 396 /* 397 * Loop through the transports, configuring the transport-specific 398 * components of each one. 399 */ 400 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 401 402 it = &idm_transport_list[type]; 403 /* 404 * If it_ops is NULL then the transport is unconfigured 405 * and we shouldn't try to start the service. 406 */ 407 if (it->it_ops == NULL) { 408 continue; 409 } 410 411 rc = it->it_ops->it_tgt_svc_create(sr, is); 412 if (rc != IDM_STATUS_SUCCESS) { 413 /* Teardown any configured services */ 414 while (type--) { 415 it = &idm_transport_list[type]; 416 if (it->it_ops == NULL) { 417 continue; 418 } 419 it->it_ops->it_tgt_svc_destroy(is); 420 } 421 /* Free the svc context and return */ 422 kmem_free(is, sizeof (idm_svc_t)); 423 return (rc); 424 } 425 } 426 427 *new_svc = is; 428 429 mutex_enter(&idm.idm_global_mutex); 430 list_insert_tail(&idm.idm_tgt_svc_list, is); 431 mutex_exit(&idm.idm_global_mutex); 432 433 return (IDM_STATUS_SUCCESS); 434 } 435 436 /* 437 * idm_tgt_svc_destroy 438 * 439 * is - idm_svc_t returned by the call to idm_tgt_svc_create 440 * 441 * Cleanup any resources associated with the idm_svc_t. 442 */ 443 void 444 idm_tgt_svc_destroy(idm_svc_t *is) 445 { 446 idm_transport_type_t type; 447 idm_transport_t *it; 448 449 mutex_enter(&idm.idm_global_mutex); 450 /* remove this service from the global list */ 451 list_remove(&idm.idm_tgt_svc_list, is); 452 /* wakeup any waiters for service change */ 453 cv_broadcast(&idm.idm_tgt_svc_cv); 454 mutex_exit(&idm.idm_global_mutex); 455 456 /* teardown each transport-specific service */ 457 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 458 it = &idm_transport_list[type]; 459 if (it->it_ops == NULL) { 460 continue; 461 } 462 463 it->it_ops->it_tgt_svc_destroy(is); 464 } 465 466 /* tear down the svc resources */ 467 idm_refcnt_destroy(&is->is_refcnt); 468 cv_destroy(&is->is_count_cv); 469 mutex_destroy(&is->is_count_mutex); 470 cv_destroy(&is->is_cv); 471 mutex_destroy(&is->is_mutex); 472 473 /* free the svc handle */ 474 kmem_free(is, sizeof (idm_svc_t)); 475 } 476 477 void 478 idm_tgt_svc_hold(idm_svc_t *is) 479 { 480 idm_refcnt_hold(&is->is_refcnt); 481 } 482 483 void 484 idm_tgt_svc_rele_and_destroy(idm_svc_t *is) 485 { 486 idm_refcnt_rele_and_destroy(&is->is_refcnt, 487 (idm_refcnt_cb_t *)&idm_tgt_svc_destroy); 488 } 489 490 /* 491 * idm_tgt_svc_online 492 * 493 * is - idm_svc_t returned by the call to idm_tgt_svc_create 494 * 495 * Online each transport service, as we want this target to be accessible 496 * via any configured transport. 497 * 498 * When the initiator establishes a new connection to the target, IDM will 499 * call the "new connect" callback defined in the idm_svc_req_t structure 500 * and it will pass an idm_conn_t structure representing that new connection. 501 */ 502 idm_status_t 503 idm_tgt_svc_online(idm_svc_t *is) 504 { 505 506 idm_transport_type_t type, last_type; 507 idm_transport_t *it; 508 int rc = IDM_STATUS_SUCCESS; 509 510 mutex_enter(&is->is_mutex); 511 if (is->is_online == 0) { 512 /* Walk through each of the transports and online them */ 513 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 514 it = &idm_transport_list[type]; 515 if (it->it_ops == NULL) { 516 /* transport is not registered */ 517 continue; 518 } 519 520 mutex_exit(&is->is_mutex); 521 rc = it->it_ops->it_tgt_svc_online(is); 522 mutex_enter(&is->is_mutex); 523 if (rc != IDM_STATUS_SUCCESS) { 524 last_type = type; 525 break; 526 } 527 } 528 if (rc != IDM_STATUS_SUCCESS) { 529 /* 530 * The last transport failed to online. 531 * Offline any transport onlined above and 532 * do not online the target. 533 */ 534 for (type = 0; type < last_type; type++) { 535 it = &idm_transport_list[type]; 536 if (it->it_ops == NULL) { 537 /* transport is not registered */ 538 continue; 539 } 540 541 mutex_exit(&is->is_mutex); 542 it->it_ops->it_tgt_svc_offline(is); 543 mutex_enter(&is->is_mutex); 544 } 545 } else { 546 /* Target service now online */ 547 is->is_online = 1; 548 } 549 } else { 550 /* Target service already online, just bump the count */ 551 is->is_online++; 552 } 553 mutex_exit(&is->is_mutex); 554 555 return (rc); 556 } 557 558 /* 559 * idm_tgt_svc_offline 560 * 561 * is - idm_svc_t returned by the call to idm_tgt_svc_create 562 * 563 * Shutdown any online target services. 564 */ 565 void 566 idm_tgt_svc_offline(idm_svc_t *is) 567 { 568 idm_transport_type_t type; 569 idm_transport_t *it; 570 571 mutex_enter(&is->is_mutex); 572 is->is_online--; 573 if (is->is_online == 0) { 574 /* Walk through each of the transports and offline them */ 575 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 576 it = &idm_transport_list[type]; 577 if (it->it_ops == NULL) { 578 /* transport is not registered */ 579 continue; 580 } 581 582 mutex_exit(&is->is_mutex); 583 it->it_ops->it_tgt_svc_offline(is); 584 mutex_enter(&is->is_mutex); 585 } 586 } 587 mutex_exit(&is->is_mutex); 588 } 589 590 /* 591 * idm_tgt_svc_lookup 592 * 593 * Lookup a service instance listening on the specified port 594 */ 595 596 idm_svc_t * 597 idm_tgt_svc_lookup(uint16_t port) 598 { 599 idm_svc_t *result; 600 601 retry: 602 mutex_enter(&idm.idm_global_mutex); 603 for (result = list_head(&idm.idm_tgt_svc_list); 604 result != NULL; 605 result = list_next(&idm.idm_tgt_svc_list, result)) { 606 if (result->is_svc_req.sr_port == port) { 607 if (result->is_online == 0) { 608 /* 609 * A service exists on this port, but it 610 * is going away, wait for it to cleanup. 611 */ 612 cv_wait(&idm.idm_tgt_svc_cv, 613 &idm.idm_global_mutex); 614 mutex_exit(&idm.idm_global_mutex); 615 goto retry; 616 } 617 idm_tgt_svc_hold(result); 618 mutex_exit(&idm.idm_global_mutex); 619 return (result); 620 } 621 } 622 mutex_exit(&idm.idm_global_mutex); 623 624 return (NULL); 625 } 626 627 /* 628 * idm_negotiate_key_values() 629 * Give IDM level a chance to negotiate any login parameters it should own. 630 * -- leave unhandled parameters alone on request_nvl 631 * -- move all handled parameters to response_nvl with an appropriate response 632 * -- also add an entry to negotiated_nvl for any accepted parameters 633 */ 634 kv_status_t 635 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl, 636 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 637 { 638 ASSERT(ic->ic_transport_ops != NULL); 639 return (ic->ic_transport_ops->it_negotiate_key_values(ic, 640 request_nvl, response_nvl, negotiated_nvl)); 641 } 642 643 /* 644 * idm_notice_key_values() 645 * Activate at the IDM level any parameters that have been negotiated. 646 * Passes the set of key value pairs to the transport for activation. 647 * This will be invoked as the connection is entering full-feature mode. 648 */ 649 void 650 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl) 651 { 652 ASSERT(ic->ic_transport_ops != NULL); 653 ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl); 654 } 655 656 /* 657 * idm_declare_key_values() 658 * Activate an operational set of declarative parameters from the config_nvl, 659 * and return the selected values in the outgoing_nvl. 660 */ 661 kv_status_t 662 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl, 663 nvlist_t *outgoing_nvl) 664 { 665 ASSERT(ic->ic_transport_ops != NULL); 666 return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl, 667 outgoing_nvl)); 668 } 669 670 /* 671 * idm_buf_tx_to_ini 672 * 673 * This is IDM's implementation of the 'Put_Data' operational primitive. 674 * 675 * This function is invoked by a target iSCSI layer to request its local 676 * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer 677 * on the remote iSCSI node. The I/O buffer represented by 'idb' is 678 * transferred to the initiator associated with task 'idt'. The connection 679 * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS, 680 * and the callback (idb->idb_buf_cb) at transfer completion are 681 * provided as input. 682 * 683 * This data transfer takes place transparently to the remote iSCSI layer, 684 * i.e. without its participation. 685 * 686 * Using sockets, IDM implements the data transfer by segmenting the data 687 * buffer into appropriately sized iSCSI PDUs and transmitting them to the 688 * initiator. iSER performs the transfer using RDMA write. 689 * 690 */ 691 idm_status_t 692 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb, 693 uint32_t offset, uint32_t xfer_len, 694 idm_buf_cb_t idb_buf_cb, void *cb_arg) 695 { 696 idm_status_t rc; 697 698 idb->idb_bufoffset = offset; 699 idb->idb_xfer_len = xfer_len; 700 idb->idb_buf_cb = idb_buf_cb; 701 idb->idb_cb_arg = cb_arg; 702 gethrestime(&idb->idb_xfer_start); 703 704 /* 705 * Buffer should not contain the pattern. If the pattern is 706 * present then we've been asked to transmit initialized data 707 */ 708 IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT); 709 710 mutex_enter(&idt->idt_mutex); 711 switch (idt->idt_state) { 712 case TASK_ACTIVE: 713 idt->idt_tx_to_ini_start++; 714 idm_task_hold(idt); 715 idm_buf_bind_in_locked(idt, idb); 716 idb->idb_in_transport = B_TRUE; 717 rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini) 718 (idt, idb); 719 return (rc); 720 721 case TASK_SUSPENDING: 722 case TASK_SUSPENDED: 723 /* 724 * Bind buffer but don't start a transfer since the task 725 * is suspended 726 */ 727 idm_buf_bind_in_locked(idt, idb); 728 mutex_exit(&idt->idt_mutex); 729 return (IDM_STATUS_SUCCESS); 730 731 case TASK_ABORTING: 732 case TASK_ABORTED: 733 /* 734 * Once the task is aborted, any buffers added to the 735 * idt_inbufv will never get cleaned up, so just return 736 * SUCCESS. The buffer should get cleaned up by the 737 * client or framework once task_aborted has completed. 738 */ 739 mutex_exit(&idt->idt_mutex); 740 return (IDM_STATUS_SUCCESS); 741 742 default: 743 ASSERT(0); 744 break; 745 } 746 mutex_exit(&idt->idt_mutex); 747 748 return (IDM_STATUS_FAIL); 749 } 750 751 /* 752 * idm_buf_rx_from_ini 753 * 754 * This is IDM's implementation of the 'Get_Data' operational primitive. 755 * 756 * This function is invoked by a target iSCSI layer to request its local 757 * Datamover layer to retrieve certain data identified by the R2T PDU from the 758 * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be 759 * mapped to the respective buffer by the task tags (ITT & TTT). 760 * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and 761 * the callback (idb->idb_buf_cb) notification for data transfer completion are 762 * are provided as input. 763 * 764 * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local 765 * Datamover layer, the local and remote Datamover layers transparently bring 766 * about the data transfer requested by the R2T PDU, without the participation 767 * of the iSCSI layers. 768 * 769 * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out() 770 * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read. 771 * 772 */ 773 idm_status_t 774 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb, 775 uint32_t offset, uint32_t xfer_len, 776 idm_buf_cb_t idb_buf_cb, void *cb_arg) 777 { 778 idm_status_t rc; 779 780 idb->idb_bufoffset = offset; 781 idb->idb_xfer_len = xfer_len; 782 idb->idb_buf_cb = idb_buf_cb; 783 idb->idb_cb_arg = cb_arg; 784 gethrestime(&idb->idb_xfer_start); 785 786 /* 787 * "In" buf list is for "Data In" PDU's, "Out" buf list is for 788 * "Data Out" PDU's 789 */ 790 mutex_enter(&idt->idt_mutex); 791 switch (idt->idt_state) { 792 case TASK_ACTIVE: 793 idt->idt_rx_from_ini_start++; 794 idm_task_hold(idt); 795 idm_buf_bind_out_locked(idt, idb); 796 idb->idb_in_transport = B_TRUE; 797 rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini) 798 (idt, idb); 799 return (rc); 800 case TASK_SUSPENDING: 801 case TASK_SUSPENDED: 802 case TASK_ABORTING: 803 case TASK_ABORTED: 804 /* 805 * Bind buffer but don't start a transfer since the task 806 * is suspended 807 */ 808 idm_buf_bind_out_locked(idt, idb); 809 mutex_exit(&idt->idt_mutex); 810 return (IDM_STATUS_SUCCESS); 811 default: 812 ASSERT(0); 813 break; 814 } 815 mutex_exit(&idt->idt_mutex); 816 817 return (IDM_STATUS_FAIL); 818 } 819 820 /* 821 * idm_buf_tx_to_ini_done 822 * 823 * The transport calls this after it has completed a transfer requested by 824 * a call to transport_buf_tx_to_ini 825 * 826 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning. 827 * idt may be freed after the call to idb->idb_buf_cb. 828 */ 829 void 830 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status) 831 { 832 ASSERT(mutex_owned(&idt->idt_mutex)); 833 idb->idb_in_transport = B_FALSE; 834 idb->idb_tx_thread = B_FALSE; 835 idt->idt_tx_to_ini_done++; 836 gethrestime(&idb->idb_xfer_done); 837 838 /* 839 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or 840 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes 841 * to 0. 842 */ 843 idm_task_rele(idt); 844 idb->idb_status = status; 845 846 switch (idt->idt_state) { 847 case TASK_ACTIVE: 848 idt->idt_ic->ic_timestamp = ddi_get_lbolt(); 849 idm_buf_unbind_in_locked(idt, idb); 850 mutex_exit(&idt->idt_mutex); 851 (*idb->idb_buf_cb)(idb, status); 852 return; 853 case TASK_SUSPENDING: 854 case TASK_SUSPENDED: 855 case TASK_ABORTING: 856 case TASK_ABORTED: 857 /* 858 * To keep things simple we will ignore the case where the 859 * transfer was successful and leave all buffers bound to the 860 * task. This allows us to also ignore the case where we've 861 * been asked to abort a task but the last transfer of the 862 * task has completed. IDM has no idea whether this was, in 863 * fact, the last transfer of the task so it would be difficult 864 * to handle this case. Everything should get sorted out again 865 * after task reassignment is complete. 866 * 867 * In the case of TASK_ABORTING we could conceivably call the 868 * buffer callback here but the timing of when the client's 869 * client_task_aborted callback is invoked vs. when the client's 870 * buffer callback gets invoked gets sticky. We don't want 871 * the client to here from us again after the call to 872 * client_task_aborted() but we don't want to give it a bunch 873 * of failed buffer transfers until we've called 874 * client_task_aborted(). Instead we'll just leave all the 875 * buffers bound and allow the client to cleanup. 876 */ 877 break; 878 default: 879 ASSERT(0); 880 } 881 mutex_exit(&idt->idt_mutex); 882 } 883 884 /* 885 * idm_buf_rx_from_ini_done 886 * 887 * The transport calls this after it has completed a transfer requested by 888 * a call totransport_buf_tx_to_ini 889 * 890 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning. 891 * idt may be freed after the call to idb->idb_buf_cb. 892 */ 893 void 894 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status) 895 { 896 ASSERT(mutex_owned(&idt->idt_mutex)); 897 idb->idb_in_transport = B_FALSE; 898 idt->idt_rx_from_ini_done++; 899 gethrestime(&idb->idb_xfer_done); 900 901 /* 902 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or 903 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes 904 * to 0. 905 */ 906 idm_task_rele(idt); 907 idb->idb_status = status; 908 909 if (status == IDM_STATUS_SUCCESS) { 910 /* 911 * Buffer should not contain the pattern. If it does then 912 * we did not get the data from the remote host. 913 */ 914 IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT); 915 } 916 917 switch (idt->idt_state) { 918 case TASK_ACTIVE: 919 idt->idt_ic->ic_timestamp = ddi_get_lbolt(); 920 idm_buf_unbind_out_locked(idt, idb); 921 mutex_exit(&idt->idt_mutex); 922 (*idb->idb_buf_cb)(idb, status); 923 return; 924 case TASK_SUSPENDING: 925 case TASK_SUSPENDED: 926 case TASK_ABORTING: 927 case TASK_ABORTED: 928 /* 929 * To keep things simple we will ignore the case where the 930 * transfer was successful and leave all buffers bound to the 931 * task. This allows us to also ignore the case where we've 932 * been asked to abort a task but the last transfer of the 933 * task has completed. IDM has no idea whether this was, in 934 * fact, the last transfer of the task so it would be difficult 935 * to handle this case. Everything should get sorted out again 936 * after task reassignment is complete. 937 * 938 * In the case of TASK_ABORTING we could conceivably call the 939 * buffer callback here but the timing of when the client's 940 * client_task_aborted callback is invoked vs. when the client's 941 * buffer callback gets invoked gets sticky. We don't want 942 * the client to here from us again after the call to 943 * client_task_aborted() but we don't want to give it a bunch 944 * of failed buffer transfers until we've called 945 * client_task_aborted(). Instead we'll just leave all the 946 * buffers bound and allow the client to cleanup. 947 */ 948 break; 949 default: 950 ASSERT(0); 951 } 952 mutex_exit(&idt->idt_mutex); 953 } 954 955 /* 956 * idm_buf_alloc 957 * 958 * Allocates a buffer handle and registers it for use with the transport 959 * layer. If a buffer is not passed on bufptr, the buffer will be allocated 960 * as well as the handle. 961 * 962 * ic - connection on which the buffer will be transferred 963 * bufptr - allocate memory for buffer if NULL, else assign to buffer 964 * buflen - length of buffer 965 * 966 * Returns idm_buf_t handle if successful, otherwise NULL 967 */ 968 idm_buf_t * 969 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen) 970 { 971 idm_buf_t *buf = NULL; 972 int rc; 973 974 ASSERT(ic != NULL); 975 ASSERT(idm.idm_buf_cache != NULL); 976 ASSERT(buflen > 0); 977 978 /* Don't allocate new buffers if we are not in FFP */ 979 mutex_enter(&ic->ic_state_mutex); 980 if (!ic->ic_ffp) { 981 mutex_exit(&ic->ic_state_mutex); 982 return (NULL); 983 } 984 985 986 idm_conn_hold(ic); 987 mutex_exit(&ic->ic_state_mutex); 988 989 buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP); 990 if (buf == NULL) { 991 idm_conn_rele(ic); 992 return (NULL); 993 } 994 995 buf->idb_ic = ic; 996 buf->idb_buflen = buflen; 997 buf->idb_exp_offset = 0; 998 buf->idb_bufoffset = 0; 999 buf->idb_xfer_len = 0; 1000 buf->idb_magic = IDM_BUF_MAGIC; 1001 buf->idb_in_transport = B_FALSE; 1002 buf->idb_bufbcopy = B_FALSE; 1003 1004 /* 1005 * If bufptr is NULL, we have an implicit request to allocate 1006 * memory for this IDM buffer handle and register it for use 1007 * with the transport. To simplify this, and to give more freedom 1008 * to the transport layer for it's own buffer management, both of 1009 * these actions will take place in the transport layer. 1010 * If bufptr is set, then the caller has allocated memory (or more 1011 * likely it's been passed from an upper layer), and we need only 1012 * register the buffer for use with the transport layer. 1013 */ 1014 if (bufptr == NULL) { 1015 /* 1016 * Allocate a buffer from the transport layer (which 1017 * will also register the buffer for use). 1018 */ 1019 rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen); 1020 if (rc != 0) { 1021 idm_conn_rele(ic); 1022 kmem_cache_free(idm.idm_buf_cache, buf); 1023 return (NULL); 1024 } 1025 /* Set the bufalloc'd flag */ 1026 buf->idb_bufalloc = B_TRUE; 1027 } else { 1028 /* 1029 * For large transfers, Set the passed bufptr into 1030 * the buf handle, and register the handle with the 1031 * transport layer. As memory registration with the 1032 * transport layer is a time/cpu intensive operation, 1033 * for small transfers (up to a pre-defined bcopy 1034 * threshold), use pre-registered memory buffers 1035 * and bcopy data at the appropriate time. 1036 */ 1037 buf->idb_buf = bufptr; 1038 1039 rc = ic->ic_transport_ops->it_buf_setup(buf); 1040 if (rc != 0) { 1041 idm_conn_rele(ic); 1042 kmem_cache_free(idm.idm_buf_cache, buf); 1043 return (NULL); 1044 } 1045 /* 1046 * The transport layer is now expected to set the idb_bufalloc 1047 * correctly to indicate if resources have been allocated. 1048 */ 1049 } 1050 1051 IDM_BUFPAT_SET(buf); 1052 1053 return (buf); 1054 } 1055 1056 /* 1057 * idm_buf_free 1058 * 1059 * Release a buffer handle along with the associated buffer that was allocated 1060 * or assigned with idm_buf_alloc 1061 */ 1062 void 1063 idm_buf_free(idm_buf_t *buf) 1064 { 1065 idm_conn_t *ic = buf->idb_ic; 1066 1067 1068 buf->idb_task_binding = NULL; 1069 1070 if (buf->idb_bufalloc) { 1071 ic->ic_transport_ops->it_buf_free(buf); 1072 } else { 1073 ic->ic_transport_ops->it_buf_teardown(buf); 1074 } 1075 kmem_cache_free(idm.idm_buf_cache, buf); 1076 idm_conn_rele(ic); 1077 } 1078 1079 /* 1080 * idm_buf_bind_in 1081 * 1082 * This function associates a buffer with a task. This is only for use by the 1083 * iSCSI initiator that will have only one buffer per transfer direction 1084 * 1085 */ 1086 void 1087 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf) 1088 { 1089 mutex_enter(&idt->idt_mutex); 1090 idm_buf_bind_in_locked(idt, buf); 1091 mutex_exit(&idt->idt_mutex); 1092 } 1093 1094 static void 1095 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf) 1096 { 1097 buf->idb_task_binding = idt; 1098 buf->idb_ic = idt->idt_ic; 1099 idm_listbuf_insert(&idt->idt_inbufv, buf); 1100 } 1101 1102 void 1103 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf) 1104 { 1105 /* 1106 * For small transfers, the iSER transport delegates the IDM 1107 * layer to bcopy the SCSI Write data for faster IOPS. 1108 */ 1109 if (buf->idb_bufbcopy == B_TRUE) { 1110 1111 bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen); 1112 } 1113 mutex_enter(&idt->idt_mutex); 1114 idm_buf_bind_out_locked(idt, buf); 1115 mutex_exit(&idt->idt_mutex); 1116 } 1117 1118 static void 1119 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf) 1120 { 1121 buf->idb_task_binding = idt; 1122 buf->idb_ic = idt->idt_ic; 1123 idm_listbuf_insert(&idt->idt_outbufv, buf); 1124 } 1125 1126 void 1127 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf) 1128 { 1129 /* 1130 * For small transfers, the iSER transport delegates the IDM 1131 * layer to bcopy the SCSI Read data into the read buufer 1132 * for faster IOPS. 1133 */ 1134 if (buf->idb_bufbcopy == B_TRUE) { 1135 bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen); 1136 } 1137 mutex_enter(&idt->idt_mutex); 1138 idm_buf_unbind_in_locked(idt, buf); 1139 mutex_exit(&idt->idt_mutex); 1140 } 1141 1142 static void 1143 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf) 1144 { 1145 list_remove(&idt->idt_inbufv, buf); 1146 } 1147 1148 void 1149 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf) 1150 { 1151 mutex_enter(&idt->idt_mutex); 1152 idm_buf_unbind_out_locked(idt, buf); 1153 mutex_exit(&idt->idt_mutex); 1154 } 1155 1156 static void 1157 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf) 1158 { 1159 list_remove(&idt->idt_outbufv, buf); 1160 } 1161 1162 /* 1163 * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the 1164 * iSCSI PDU 1165 */ 1166 idm_buf_t * 1167 idm_buf_find(void *lbuf, size_t data_offset) 1168 { 1169 idm_buf_t *idb; 1170 list_t *lst = (list_t *)lbuf; 1171 1172 /* iterate through the list to find the buffer */ 1173 for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) { 1174 1175 ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) || 1176 (idb->idb_bufoffset == 0)); 1177 1178 if ((data_offset >= idb->idb_bufoffset) && 1179 (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) { 1180 1181 return (idb); 1182 } 1183 } 1184 1185 return (NULL); 1186 } 1187 1188 void 1189 idm_bufpat_set(idm_buf_t *idb) 1190 { 1191 idm_bufpat_t *bufpat; 1192 int len, i; 1193 1194 len = idb->idb_buflen; 1195 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t); 1196 1197 bufpat = idb->idb_buf; 1198 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) { 1199 bufpat->bufpat_idb = idb; 1200 bufpat->bufpat_bufmagic = IDM_BUF_MAGIC; 1201 bufpat->bufpat_offset = i; 1202 bufpat++; 1203 } 1204 } 1205 1206 boolean_t 1207 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type) 1208 { 1209 idm_bufpat_t *bufpat; 1210 int len, i; 1211 1212 len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len; 1213 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t); 1214 ASSERT(len <= idb->idb_buflen); 1215 bufpat = idb->idb_buf; 1216 1217 /* 1218 * Don't check the pattern in buffers that came from outside IDM 1219 * (these will be buffers from the initiator that we opted not 1220 * to double-buffer) 1221 */ 1222 if (!idb->idb_bufalloc) 1223 return (B_FALSE); 1224 1225 /* 1226 * Return true if we find the pattern anywhere in the buffer 1227 */ 1228 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) { 1229 if (BUFPAT_MATCH(bufpat, idb)) { 1230 IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: " 1231 "idb %p bufpat %p " 1232 "bufpat_idb=%p bufmagic=%08x offset=%08x", 1233 (void *)idb, (void *)bufpat, bufpat->bufpat_idb, 1234 bufpat->bufpat_bufmagic, bufpat->bufpat_offset); 1235 DTRACE_PROBE2(bufpat__pattern__found, 1236 idm_buf_t *, idb, idm_bufpat_t *, bufpat); 1237 if (type == BP_CHECK_ASSERT) { 1238 ASSERT(0); 1239 } 1240 return (B_TRUE); 1241 } 1242 bufpat++; 1243 } 1244 1245 return (B_FALSE); 1246 } 1247 1248 /* 1249 * idm_task_alloc 1250 * 1251 * This function will allocate a idm_task_t structure. A task tag is also 1252 * generated and saved in idt_tt. The task is not active. 1253 */ 1254 idm_task_t * 1255 idm_task_alloc(idm_conn_t *ic) 1256 { 1257 idm_task_t *idt; 1258 1259 ASSERT(ic != NULL); 1260 1261 /* Don't allocate new tasks if we are not in FFP */ 1262 if (!ic->ic_ffp) { 1263 return (NULL); 1264 } 1265 idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP); 1266 if (idt == NULL) { 1267 return (NULL); 1268 } 1269 1270 ASSERT(list_is_empty(&idt->idt_inbufv)); 1271 ASSERT(list_is_empty(&idt->idt_outbufv)); 1272 1273 mutex_enter(&ic->ic_state_mutex); 1274 if (!ic->ic_ffp) { 1275 mutex_exit(&ic->ic_state_mutex); 1276 kmem_cache_free(idm.idm_task_cache, idt); 1277 return (NULL); 1278 } 1279 idm_conn_hold(ic); 1280 mutex_exit(&ic->ic_state_mutex); 1281 1282 idt->idt_state = TASK_IDLE; 1283 idt->idt_ic = ic; 1284 idt->idt_private = NULL; 1285 idt->idt_exp_datasn = 0; 1286 idt->idt_exp_rttsn = 0; 1287 idt->idt_flags = 0; 1288 return (idt); 1289 } 1290 1291 /* 1292 * idm_task_start 1293 * 1294 * Mark the task active and initialize some stats. The caller 1295 * sets up the idm_task_t structure with a prior call to idm_task_alloc(). 1296 * The task service does not function as a task/work engine, it is the 1297 * responsibility of the initiator to start the data transfer and free the 1298 * resources. 1299 */ 1300 void 1301 idm_task_start(idm_task_t *idt, uintptr_t handle) 1302 { 1303 ASSERT(idt != NULL); 1304 1305 /* mark the task as ACTIVE */ 1306 idt->idt_state = TASK_ACTIVE; 1307 idt->idt_client_handle = handle; 1308 idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done = 1309 idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done = 1310 idt->idt_tx_bytes = idt->idt_rx_bytes = 0; 1311 } 1312 1313 /* 1314 * idm_task_done 1315 * 1316 * This function sets the state to indicate that the task is no longer active. 1317 */ 1318 void 1319 idm_task_done(idm_task_t *idt) 1320 { 1321 ASSERT(idt != NULL); 1322 1323 mutex_enter(&idt->idt_mutex); 1324 idt->idt_state = TASK_IDLE; 1325 mutex_exit(&idt->idt_mutex); 1326 1327 /* 1328 * Although unlikely it is possible for a reference to come in after 1329 * the client has decided the task is over but before we've marked 1330 * the task idle. One specific unavoidable scenario is the case where 1331 * received PDU with the matching ITT/TTT results in a successful 1332 * lookup of this task. We are at the mercy of the remote node in 1333 * that case so we need to handle it. Now that the task state 1334 * has changed no more references will occur so a simple call to 1335 * idm_refcnt_wait_ref should deal with the situation. 1336 */ 1337 idm_refcnt_wait_ref(&idt->idt_refcnt); 1338 idm_refcnt_reset(&idt->idt_refcnt); 1339 } 1340 1341 /* 1342 * idm_task_free 1343 * 1344 * This function will free the Task Tag and the memory allocated for the task 1345 * idm_task_done should be called prior to this call 1346 */ 1347 void 1348 idm_task_free(idm_task_t *idt) 1349 { 1350 idm_conn_t *ic; 1351 1352 ASSERT(idt != NULL); 1353 ASSERT(idt->idt_refcnt.ir_refcnt == 0); 1354 ASSERT(idt->idt_state == TASK_IDLE); 1355 1356 ic = idt->idt_ic; 1357 1358 /* 1359 * It's possible for items to still be in the idt_inbufv list if 1360 * they were added after idm_free_task_rsrc was called. We rely on 1361 * STMF to free all buffers associated with the task however STMF 1362 * doesn't know that we have this reference to the buffers. 1363 * Use list_create so that we don't end up with stale references 1364 * to these buffers. 1365 */ 1366 list_create(&idt->idt_inbufv, sizeof (idm_buf_t), 1367 offsetof(idm_buf_t, idb_buflink)); 1368 list_create(&idt->idt_outbufv, sizeof (idm_buf_t), 1369 offsetof(idm_buf_t, idb_buflink)); 1370 1371 kmem_cache_free(idm.idm_task_cache, idt); 1372 1373 idm_conn_rele(ic); 1374 } 1375 1376 /* 1377 * idm_task_find_common 1378 * common code for idm_task_find() and idm_task_find_and_complete() 1379 */ 1380 /*ARGSUSED*/ 1381 static idm_task_t * 1382 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt, 1383 boolean_t complete) 1384 { 1385 uint32_t tt, client_handle; 1386 idm_task_t *idt; 1387 1388 /* 1389 * Must match both itt and ttt. The table is indexed by itt 1390 * for initiator connections and ttt for target connections. 1391 */ 1392 if (IDM_CONN_ISTGT(ic)) { 1393 tt = ttt; 1394 client_handle = itt; 1395 } else { 1396 tt = itt; 1397 client_handle = ttt; 1398 } 1399 1400 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1401 if (tt >= idm.idm_taskid_max) { 1402 rw_exit(&idm.idm_taskid_table_lock); 1403 return (NULL); 1404 } 1405 1406 idt = idm.idm_taskid_table[tt]; 1407 1408 if (idt != NULL) { 1409 mutex_enter(&idt->idt_mutex); 1410 if ((idt->idt_state != TASK_ACTIVE) || 1411 (idt->idt_ic != ic) || 1412 (IDM_CONN_ISTGT(ic) && 1413 (idt->idt_client_handle != client_handle))) { 1414 /* 1415 * Task doesn't match or task is aborting and 1416 * we don't want any more references. 1417 */ 1418 if ((idt->idt_ic != ic) && 1419 (idt->idt_state == TASK_ACTIVE) && 1420 (IDM_CONN_ISINI(ic) || idt->idt_client_handle == 1421 client_handle)) { 1422 IDM_CONN_LOG(CE_WARN, 1423 "idm_task_find: wrong connection %p != %p", 1424 (void *)ic, (void *)idt->idt_ic); 1425 } 1426 mutex_exit(&idt->idt_mutex); 1427 rw_exit(&idm.idm_taskid_table_lock); 1428 return (NULL); 1429 } 1430 idm_task_hold(idt); 1431 /* 1432 * Set the task state to TASK_COMPLETE so it can no longer 1433 * be found or aborted. 1434 */ 1435 if (B_TRUE == complete) 1436 idt->idt_state = TASK_COMPLETE; 1437 mutex_exit(&idt->idt_mutex); 1438 } 1439 rw_exit(&idm.idm_taskid_table_lock); 1440 1441 return (idt); 1442 } 1443 1444 /* 1445 * This function looks up a task by task tag. 1446 */ 1447 idm_task_t * 1448 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt) 1449 { 1450 return (idm_task_find_common(ic, itt, ttt, B_FALSE)); 1451 } 1452 1453 /* 1454 * This function looks up a task by task tag. If found, the task state 1455 * is atomically set to TASK_COMPLETE so it can longer be found or aborted. 1456 */ 1457 idm_task_t * 1458 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt) 1459 { 1460 return (idm_task_find_common(ic, itt, ttt, B_TRUE)); 1461 } 1462 1463 /* 1464 * idm_task_find_by_handle 1465 * 1466 * This function looks up a task by the client-private idt_client_handle. 1467 * 1468 * This function should NEVER be called in the performance path. It is 1469 * intended strictly for error recovery/task management. 1470 */ 1471 /*ARGSUSED*/ 1472 void * 1473 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle) 1474 { 1475 idm_task_t *idt = NULL; 1476 int idx = 0; 1477 1478 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1479 1480 for (idx = 0; idx < idm.idm_taskid_max; idx++) { 1481 idt = idm.idm_taskid_table[idx]; 1482 1483 if (idt == NULL) 1484 continue; 1485 1486 mutex_enter(&idt->idt_mutex); 1487 1488 if (idt->idt_state != TASK_ACTIVE) { 1489 /* 1490 * Task is either in suspend, abort, or already 1491 * complete. 1492 */ 1493 mutex_exit(&idt->idt_mutex); 1494 continue; 1495 } 1496 1497 if (idt->idt_client_handle == handle) { 1498 idm_task_hold(idt); 1499 mutex_exit(&idt->idt_mutex); 1500 break; 1501 } 1502 1503 mutex_exit(&idt->idt_mutex); 1504 } 1505 1506 rw_exit(&idm.idm_taskid_table_lock); 1507 1508 if ((idt == NULL) || (idx == idm.idm_taskid_max)) 1509 return (NULL); 1510 1511 return (idt->idt_private); 1512 } 1513 1514 void 1515 idm_task_hold(idm_task_t *idt) 1516 { 1517 idm_refcnt_hold(&idt->idt_refcnt); 1518 } 1519 1520 void 1521 idm_task_rele(idm_task_t *idt) 1522 { 1523 idm_refcnt_rele(&idt->idt_refcnt); 1524 } 1525 1526 void 1527 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) 1528 { 1529 idm_task_t *task; 1530 int idx; 1531 1532 /* 1533 * Passing NULL as the task indicates that all tasks 1534 * for this connection should be aborted. 1535 */ 1536 if (idt == NULL) { 1537 /* 1538 * Only the connection state machine should ask for 1539 * all tasks to abort and this should never happen in FFP. 1540 */ 1541 ASSERT(!ic->ic_ffp); 1542 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1543 for (idx = 0; idx < idm.idm_taskid_max; idx++) { 1544 task = idm.idm_taskid_table[idx]; 1545 if (task == NULL) 1546 continue; 1547 mutex_enter(&task->idt_mutex); 1548 if ((task->idt_state != TASK_IDLE) && 1549 (task->idt_state != TASK_COMPLETE) && 1550 (task->idt_ic == ic)) { 1551 rw_exit(&idm.idm_taskid_table_lock); 1552 idm_task_abort_one(ic, task, abort_type); 1553 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1554 } else 1555 mutex_exit(&task->idt_mutex); 1556 } 1557 rw_exit(&idm.idm_taskid_table_lock); 1558 } else { 1559 mutex_enter(&idt->idt_mutex); 1560 idm_task_abort_one(ic, idt, abort_type); 1561 } 1562 } 1563 1564 static void 1565 idm_task_abort_unref_cb(void *ref) 1566 { 1567 idm_task_t *idt = ref; 1568 1569 mutex_enter(&idt->idt_mutex); 1570 switch (idt->idt_state) { 1571 case TASK_SUSPENDING: 1572 idt->idt_state = TASK_SUSPENDED; 1573 mutex_exit(&idt->idt_mutex); 1574 idm_task_aborted(idt, IDM_STATUS_SUSPENDED); 1575 return; 1576 case TASK_ABORTING: 1577 idt->idt_state = TASK_ABORTED; 1578 mutex_exit(&idt->idt_mutex); 1579 idm_task_aborted(idt, IDM_STATUS_ABORTED); 1580 return; 1581 default: 1582 mutex_exit(&idt->idt_mutex); 1583 ASSERT(0); 1584 break; 1585 } 1586 } 1587 1588 /* 1589 * Abort the idm task. 1590 * Caller must hold the task mutex, which will be released before return 1591 */ 1592 static void 1593 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) 1594 { 1595 /* Caller must hold connection mutex */ 1596 ASSERT(mutex_owned(&idt->idt_mutex)); 1597 switch (idt->idt_state) { 1598 case TASK_ACTIVE: 1599 switch (abort_type) { 1600 case AT_INTERNAL_SUSPEND: 1601 /* Call transport to release any resources */ 1602 idt->idt_state = TASK_SUSPENDING; 1603 mutex_exit(&idt->idt_mutex); 1604 ic->ic_transport_ops->it_free_task_rsrc(idt); 1605 1606 /* 1607 * Wait for outstanding references. When all 1608 * references are released the callback will call 1609 * idm_task_aborted(). 1610 */ 1611 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1612 &idm_task_abort_unref_cb); 1613 return; 1614 case AT_INTERNAL_ABORT: 1615 case AT_TASK_MGMT_ABORT: 1616 idt->idt_state = TASK_ABORTING; 1617 mutex_exit(&idt->idt_mutex); 1618 ic->ic_transport_ops->it_free_task_rsrc(idt); 1619 1620 /* 1621 * Wait for outstanding references. When all 1622 * references are released the callback will call 1623 * idm_task_aborted(). 1624 */ 1625 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1626 &idm_task_abort_unref_cb); 1627 return; 1628 default: 1629 ASSERT(0); 1630 } 1631 break; 1632 case TASK_SUSPENDING: 1633 /* Already called transport_free_task_rsrc(); */ 1634 switch (abort_type) { 1635 case AT_INTERNAL_SUSPEND: 1636 /* Already doing it */ 1637 break; 1638 case AT_INTERNAL_ABORT: 1639 case AT_TASK_MGMT_ABORT: 1640 idt->idt_state = TASK_ABORTING; 1641 break; 1642 default: 1643 ASSERT(0); 1644 } 1645 break; 1646 case TASK_SUSPENDED: 1647 /* Already called transport_free_task_rsrc(); */ 1648 switch (abort_type) { 1649 case AT_INTERNAL_SUSPEND: 1650 /* Already doing it */ 1651 break; 1652 case AT_INTERNAL_ABORT: 1653 case AT_TASK_MGMT_ABORT: 1654 idt->idt_state = TASK_ABORTING; 1655 mutex_exit(&idt->idt_mutex); 1656 1657 /* 1658 * We could probably call idm_task_aborted directly 1659 * here but we may be holding the conn lock. It's 1660 * easier to just switch contexts. Even though 1661 * we shouldn't really have any references we'll 1662 * set the state to TASK_ABORTING instead of 1663 * TASK_ABORTED so we can use the same code path. 1664 */ 1665 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1666 &idm_task_abort_unref_cb); 1667 return; 1668 default: 1669 ASSERT(0); 1670 } 1671 break; 1672 case TASK_ABORTING: 1673 case TASK_ABORTED: 1674 switch (abort_type) { 1675 case AT_INTERNAL_SUSPEND: 1676 /* We're already past this point... */ 1677 case AT_INTERNAL_ABORT: 1678 case AT_TASK_MGMT_ABORT: 1679 /* Already doing it */ 1680 break; 1681 default: 1682 ASSERT(0); 1683 } 1684 break; 1685 case TASK_COMPLETE: 1686 /* 1687 * In this case, let it go. The status has already been 1688 * sent (which may or may not get successfully transmitted) 1689 * and we don't want to end up in a race between completing 1690 * the status PDU and marking the task suspended. 1691 */ 1692 break; 1693 default: 1694 ASSERT(0); 1695 } 1696 mutex_exit(&idt->idt_mutex); 1697 } 1698 1699 static void 1700 idm_task_aborted(idm_task_t *idt, idm_status_t status) 1701 { 1702 (*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status); 1703 } 1704 1705 /* 1706 * idm_pdu_tx 1707 * 1708 * This is IDM's implementation of the 'Send_Control' operational primitive. 1709 * This function is invoked by an initiator iSCSI layer requesting the transfer 1710 * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a 1711 * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover 1712 * layer to the peer iSCSI layer in the remote iSCSI node. The connection info 1713 * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size 1714 * are provided as input. 1715 * 1716 */ 1717 void 1718 idm_pdu_tx(idm_pdu_t *pdu) 1719 { 1720 idm_conn_t *ic = pdu->isp_ic; 1721 iscsi_async_evt_hdr_t *async_evt; 1722 1723 /* 1724 * If we are in full-featured mode then route SCSI-related 1725 * commands to the appropriate function vector without checking 1726 * the connection state. We will only be in full-feature mode 1727 * when we are in an acceptable state for SCSI PDU's. 1728 * 1729 * We also need to ensure that there are no PDU events outstanding 1730 * on the state machine. Any non-SCSI PDU's received in full-feature 1731 * mode will result in PDU events and until these have been handled 1732 * we need to route all PDU's through the state machine as PDU 1733 * events to maintain ordering. 1734 * 1735 * Note that IDM cannot enter FFP mode until it processes in 1736 * its state machine the last xmit of the login process. 1737 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be 1738 * superfluous. 1739 */ 1740 mutex_enter(&ic->ic_state_mutex); 1741 if (ic->ic_ffp && (ic->ic_pdu_events == 0)) { 1742 mutex_exit(&ic->ic_state_mutex); 1743 switch (IDM_PDU_OPCODE(pdu)) { 1744 case ISCSI_OP_SCSI_RSP: 1745 /* Target only */ 1746 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic, 1747 iscsi_scsi_rsp_hdr_t *, 1748 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr); 1749 idm_pdu_tx_forward(ic, pdu); 1750 return; 1751 case ISCSI_OP_SCSI_TASK_MGT_RSP: 1752 /* Target only */ 1753 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic, 1754 iscsi_text_rsp_hdr_t *, 1755 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1756 idm_pdu_tx_forward(ic, pdu); 1757 return; 1758 case ISCSI_OP_SCSI_DATA_RSP: 1759 /* Target only */ 1760 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic, 1761 iscsi_data_rsp_hdr_t *, 1762 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 1763 idm_pdu_tx_forward(ic, pdu); 1764 return; 1765 case ISCSI_OP_RTT_RSP: 1766 /* Target only */ 1767 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic, 1768 iscsi_rtt_hdr_t *, 1769 (iscsi_rtt_hdr_t *)pdu->isp_hdr); 1770 idm_pdu_tx_forward(ic, pdu); 1771 return; 1772 case ISCSI_OP_NOOP_IN: 1773 /* Target only */ 1774 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic, 1775 iscsi_nop_in_hdr_t *, 1776 (iscsi_nop_in_hdr_t *)pdu->isp_hdr); 1777 idm_pdu_tx_forward(ic, pdu); 1778 return; 1779 case ISCSI_OP_TEXT_RSP: 1780 /* Target only */ 1781 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic, 1782 iscsi_text_rsp_hdr_t *, 1783 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1784 idm_pdu_tx_forward(ic, pdu); 1785 return; 1786 case ISCSI_OP_TEXT_CMD: 1787 case ISCSI_OP_NOOP_OUT: 1788 case ISCSI_OP_SCSI_CMD: 1789 case ISCSI_OP_SCSI_DATA: 1790 case ISCSI_OP_SCSI_TASK_MGT_MSG: 1791 /* Initiator only */ 1792 idm_pdu_tx_forward(ic, pdu); 1793 return; 1794 default: 1795 break; 1796 } 1797 1798 mutex_enter(&ic->ic_state_mutex); 1799 } 1800 1801 /* 1802 * Any PDU's processed outside of full-feature mode and non-SCSI 1803 * PDU's in full-feature mode are handled by generating an 1804 * event to the connection state machine. The state machine 1805 * will validate the PDU against the current state and either 1806 * transmit the PDU if the opcode is allowed or handle an 1807 * error if the PDU is not allowed. 1808 * 1809 * This code-path will also generate any events that are implied 1810 * by the PDU opcode. For example a "login response" with success 1811 * status generates a CE_LOGOUT_SUCCESS_SND event. 1812 */ 1813 switch (IDM_PDU_OPCODE(pdu)) { 1814 case ISCSI_OP_LOGIN_CMD: 1815 idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu); 1816 break; 1817 case ISCSI_OP_LOGIN_RSP: 1818 DTRACE_ISCSI_2(login__response, idm_conn_t *, ic, 1819 iscsi_login_rsp_hdr_t *, 1820 (iscsi_login_rsp_hdr_t *)pdu->isp_hdr); 1821 idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE); 1822 break; 1823 case ISCSI_OP_LOGOUT_CMD: 1824 idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE); 1825 break; 1826 case ISCSI_OP_LOGOUT_RSP: 1827 DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic, 1828 iscsi_logout_rsp_hdr_t *, 1829 (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr); 1830 idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE); 1831 break; 1832 case ISCSI_OP_ASYNC_EVENT: 1833 DTRACE_ISCSI_2(async__send, idm_conn_t *, ic, 1834 iscsi_async_evt_hdr_t *, 1835 (iscsi_async_evt_hdr_t *)pdu->isp_hdr); 1836 async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr; 1837 switch (async_evt->async_event) { 1838 case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT: 1839 idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND, 1840 (uintptr_t)pdu); 1841 break; 1842 case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION: 1843 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND, 1844 (uintptr_t)pdu); 1845 break; 1846 case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS: 1847 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND, 1848 (uintptr_t)pdu); 1849 break; 1850 case ISCSI_ASYNC_EVENT_SCSI_EVENT: 1851 case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION: 1852 default: 1853 idm_conn_tx_pdu_event(ic, CE_MISC_TX, 1854 (uintptr_t)pdu); 1855 break; 1856 } 1857 break; 1858 case ISCSI_OP_SCSI_RSP: 1859 /* Target only */ 1860 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic, 1861 iscsi_scsi_rsp_hdr_t *, 1862 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr); 1863 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1864 break; 1865 case ISCSI_OP_SCSI_TASK_MGT_RSP: 1866 /* Target only */ 1867 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic, 1868 iscsi_scsi_task_mgt_rsp_hdr_t *, 1869 (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr); 1870 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1871 break; 1872 case ISCSI_OP_SCSI_DATA_RSP: 1873 /* Target only */ 1874 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic, 1875 iscsi_data_rsp_hdr_t *, 1876 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 1877 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1878 break; 1879 case ISCSI_OP_RTT_RSP: 1880 /* Target only */ 1881 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic, 1882 iscsi_rtt_hdr_t *, 1883 (iscsi_rtt_hdr_t *)pdu->isp_hdr); 1884 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1885 break; 1886 case ISCSI_OP_NOOP_IN: 1887 /* Target only */ 1888 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic, 1889 iscsi_nop_in_hdr_t *, 1890 (iscsi_nop_in_hdr_t *)pdu->isp_hdr); 1891 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1892 break; 1893 case ISCSI_OP_TEXT_RSP: 1894 /* Target only */ 1895 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic, 1896 iscsi_text_rsp_hdr_t *, 1897 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1898 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1899 break; 1900 /* Initiator only */ 1901 case ISCSI_OP_SCSI_CMD: 1902 case ISCSI_OP_SCSI_TASK_MGT_MSG: 1903 case ISCSI_OP_SCSI_DATA: 1904 case ISCSI_OP_NOOP_OUT: 1905 case ISCSI_OP_TEXT_CMD: 1906 case ISCSI_OP_SNACK_CMD: 1907 case ISCSI_OP_REJECT_MSG: 1908 default: 1909 /* 1910 * Connection state machine will validate these PDU's against 1911 * the current state. A PDU not allowed in the current 1912 * state will cause a protocol error. 1913 */ 1914 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1915 break; 1916 } 1917 mutex_exit(&ic->ic_state_mutex); 1918 } 1919 1920 /* 1921 * Common allocation of a PDU along with memory for header and data. 1922 */ 1923 static idm_pdu_t * 1924 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag) 1925 { 1926 idm_pdu_t *result; 1927 1928 /* 1929 * IDM clients should cache these structures for performance 1930 * critical paths. We can't cache effectively in IDM because we 1931 * don't know the correct header and data size. 1932 * 1933 * Valid header length is assumed to be hdrlen and valid data 1934 * length is assumed to be datalen. isp_hdrlen and isp_datalen 1935 * can be adjusted after the PDU is returned if necessary. 1936 */ 1937 result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag); 1938 if (result != NULL) { 1939 /* For idm_pdu_free sanity check */ 1940 result->isp_flags |= IDM_PDU_ALLOC; 1941 /* pointer arithmetic */ 1942 result->isp_hdr = (iscsi_hdr_t *)(result + 1); 1943 result->isp_hdrlen = hdrlen; 1944 result->isp_hdrbuflen = hdrlen; 1945 result->isp_transport_hdrlen = 0; 1946 if (datalen != 0) 1947 result->isp_data = (uint8_t *)result->isp_hdr + hdrlen; 1948 result->isp_datalen = datalen; 1949 result->isp_databuflen = datalen; 1950 result->isp_magic = IDM_PDU_MAGIC; 1951 } 1952 1953 return (result); 1954 } 1955 1956 /* 1957 * Typical idm_pdu_alloc invocation, will block for resources. 1958 */ 1959 idm_pdu_t * 1960 idm_pdu_alloc(uint_t hdrlen, uint_t datalen) 1961 { 1962 return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP)); 1963 } 1964 1965 /* 1966 * Non-blocking idm_pdu_alloc implementation, returns NULL if resources 1967 * are not available. Needed for transport-layer allocations which may 1968 * be invoking in interrupt context. 1969 */ 1970 idm_pdu_t * 1971 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen) 1972 { 1973 return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP)); 1974 } 1975 1976 /* 1977 * Free a PDU previously allocated with idm_pdu_alloc() including any 1978 * header and data space allocated as part of the original request. 1979 * Additional memory regions referenced by subsequent modification of 1980 * the isp_hdr and/or isp_data fields will not be freed. 1981 */ 1982 void 1983 idm_pdu_free(idm_pdu_t *pdu) 1984 { 1985 /* Make sure the structure was allocated using idm_pdu_alloc() */ 1986 ASSERT(pdu->isp_flags & IDM_PDU_ALLOC); 1987 kmem_free(pdu, 1988 sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen); 1989 } 1990 1991 /* 1992 * Initialize the connection, private and callback fields in a PDU. 1993 */ 1994 void 1995 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb) 1996 { 1997 /* 1998 * idm_pdu_complete() will call idm_pdu_free if the callback is 1999 * NULL. This will only work if the PDU was originally allocated 2000 * with idm_pdu_alloc(). 2001 */ 2002 ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) || 2003 (cb != NULL)); 2004 pdu->isp_magic = IDM_PDU_MAGIC; 2005 pdu->isp_ic = ic; 2006 pdu->isp_private = private; 2007 pdu->isp_callback = cb; 2008 } 2009 2010 /* 2011 * Initialize the header and header length field. This function should 2012 * not be used to adjust the header length in a buffer allocated via 2013 * pdu_pdu_alloc since it overwrites the existing header pointer. 2014 */ 2015 void 2016 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen) 2017 { 2018 pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr); 2019 pdu->isp_hdrlen = hdrlen; 2020 } 2021 2022 /* 2023 * Initialize the data and data length fields. This function should 2024 * not be used to adjust the data length of a buffer allocated via 2025 * idm_pdu_alloc since it overwrites the existing data pointer. 2026 */ 2027 void 2028 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen) 2029 { 2030 pdu->isp_data = data; 2031 pdu->isp_datalen = datalen; 2032 } 2033 2034 void 2035 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status) 2036 { 2037 if (pdu->isp_callback) { 2038 pdu->isp_status = status; 2039 (*pdu->isp_callback)(pdu, status); 2040 } else { 2041 idm_pdu_free(pdu); 2042 } 2043 } 2044 2045 /* 2046 * State machine auditing 2047 */ 2048 2049 void 2050 idm_sm_audit_init(sm_audit_buf_t *audit_buf) 2051 { 2052 bzero(audit_buf, sizeof (sm_audit_buf_t)); 2053 audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1; 2054 } 2055 2056 static 2057 sm_audit_record_t * 2058 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type, 2059 sm_audit_sm_type_t sm_type, 2060 int current_state) 2061 { 2062 sm_audit_record_t *sar; 2063 2064 sar = audit_buf->sab_records; 2065 sar += audit_buf->sab_index; 2066 audit_buf->sab_index++; 2067 audit_buf->sab_index &= audit_buf->sab_max_index; 2068 2069 sar->sar_type = r_type; 2070 gethrestime(&sar->sar_timestamp); 2071 sar->sar_sm_type = sm_type; 2072 sar->sar_state = current_state; 2073 2074 return (sar); 2075 } 2076 2077 void 2078 idm_sm_audit_event(sm_audit_buf_t *audit_buf, 2079 sm_audit_sm_type_t sm_type, int current_state, 2080 int event, uintptr_t event_info) 2081 { 2082 sm_audit_record_t *sar; 2083 2084 sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT, 2085 sm_type, current_state); 2086 sar->sar_event = event; 2087 sar->sar_event_info = event_info; 2088 } 2089 2090 void 2091 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf, 2092 sm_audit_sm_type_t sm_type, int current_state, int new_state) 2093 { 2094 sm_audit_record_t *sar; 2095 2096 sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE, 2097 sm_type, current_state); 2098 sar->sar_new_state = new_state; 2099 } 2100 2101 2102 /* 2103 * Object reference tracking 2104 */ 2105 2106 void 2107 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj) 2108 { 2109 bzero(refcnt, sizeof (*refcnt)); 2110 idm_refcnt_reset(refcnt); 2111 refcnt->ir_referenced_obj = referenced_obj; 2112 bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t)); 2113 refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1; 2114 mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL); 2115 cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL); 2116 } 2117 2118 void 2119 idm_refcnt_destroy(idm_refcnt_t *refcnt) 2120 { 2121 /* 2122 * Grab the mutex to there are no other lingering threads holding 2123 * the mutex before we destroy it (e.g. idm_refcnt_rele just after 2124 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC) 2125 */ 2126 mutex_enter(&refcnt->ir_mutex); 2127 ASSERT(refcnt->ir_refcnt == 0); 2128 cv_destroy(&refcnt->ir_cv); 2129 mutex_destroy(&refcnt->ir_mutex); 2130 } 2131 2132 void 2133 idm_refcnt_reset(idm_refcnt_t *refcnt) 2134 { 2135 refcnt->ir_waiting = REF_NOWAIT; 2136 refcnt->ir_refcnt = 0; 2137 } 2138 2139 void 2140 idm_refcnt_hold(idm_refcnt_t *refcnt) 2141 { 2142 /* 2143 * Nothing should take a hold on an object after a call to 2144 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref 2145 */ 2146 ASSERT(refcnt->ir_waiting == REF_NOWAIT); 2147 2148 mutex_enter(&refcnt->ir_mutex); 2149 refcnt->ir_refcnt++; 2150 REFCNT_AUDIT(refcnt); 2151 mutex_exit(&refcnt->ir_mutex); 2152 } 2153 2154 static void 2155 idm_refcnt_unref_task(void *refcnt_void) 2156 { 2157 idm_refcnt_t *refcnt = refcnt_void; 2158 2159 REFCNT_AUDIT(refcnt); 2160 (*refcnt->ir_cb)(refcnt->ir_referenced_obj); 2161 } 2162 2163 void 2164 idm_refcnt_rele(idm_refcnt_t *refcnt) 2165 { 2166 mutex_enter(&refcnt->ir_mutex); 2167 ASSERT(refcnt->ir_refcnt > 0); 2168 refcnt->ir_refcnt--; 2169 REFCNT_AUDIT(refcnt); 2170 if (refcnt->ir_waiting == REF_NOWAIT) { 2171 /* No one is waiting on this object */ 2172 mutex_exit(&refcnt->ir_mutex); 2173 return; 2174 } 2175 2176 /* 2177 * Someone is waiting for this object to go idle so check if 2178 * refcnt is 0. Waiting on an object then later grabbing another 2179 * reference is not allowed so we don't need to handle that case. 2180 */ 2181 if (refcnt->ir_refcnt == 0) { 2182 if (refcnt->ir_waiting == REF_WAIT_ASYNC) { 2183 if (taskq_dispatch(idm.idm_global_taskq, 2184 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == 2185 TASKQID_INVALID) { 2186 cmn_err(CE_WARN, 2187 "idm_refcnt_rele: Couldn't dispatch task"); 2188 } 2189 } else if (refcnt->ir_waiting == REF_WAIT_SYNC) { 2190 cv_signal(&refcnt->ir_cv); 2191 } 2192 } 2193 mutex_exit(&refcnt->ir_mutex); 2194 } 2195 2196 void 2197 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func) 2198 { 2199 mutex_enter(&refcnt->ir_mutex); 2200 ASSERT(refcnt->ir_refcnt > 0); 2201 refcnt->ir_refcnt--; 2202 REFCNT_AUDIT(refcnt); 2203 2204 /* 2205 * Someone is waiting for this object to go idle so check if 2206 * refcnt is 0. Waiting on an object then later grabbing another 2207 * reference is not allowed so we don't need to handle that case. 2208 */ 2209 if (refcnt->ir_refcnt == 0) { 2210 refcnt->ir_cb = cb_func; 2211 refcnt->ir_waiting = REF_WAIT_ASYNC; 2212 if (taskq_dispatch(idm.idm_global_taskq, 2213 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == 2214 TASKQID_INVALID) { 2215 cmn_err(CE_WARN, 2216 "idm_refcnt_rele: Couldn't dispatch task"); 2217 } 2218 } 2219 mutex_exit(&refcnt->ir_mutex); 2220 } 2221 2222 void 2223 idm_refcnt_wait_ref(idm_refcnt_t *refcnt) 2224 { 2225 mutex_enter(&refcnt->ir_mutex); 2226 refcnt->ir_waiting = REF_WAIT_SYNC; 2227 REFCNT_AUDIT(refcnt); 2228 while (refcnt->ir_refcnt != 0) 2229 cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex); 2230 mutex_exit(&refcnt->ir_mutex); 2231 } 2232 2233 void 2234 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func) 2235 { 2236 mutex_enter(&refcnt->ir_mutex); 2237 refcnt->ir_waiting = REF_WAIT_ASYNC; 2238 refcnt->ir_cb = cb_func; 2239 REFCNT_AUDIT(refcnt); 2240 /* 2241 * It's possible we don't have any references. To make things easier 2242 * on the caller use a taskq to call the callback instead of 2243 * calling it synchronously 2244 */ 2245 if (refcnt->ir_refcnt == 0) { 2246 if (taskq_dispatch(idm.idm_global_taskq, 2247 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == 2248 TASKQID_INVALID) { 2249 cmn_err(CE_WARN, 2250 "idm_refcnt_async_wait_ref: " 2251 "Couldn't dispatch task"); 2252 } 2253 } 2254 mutex_exit(&refcnt->ir_mutex); 2255 } 2256 2257 void 2258 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt, 2259 idm_refcnt_cb_t *cb_func) 2260 { 2261 mutex_enter(&refcnt->ir_mutex); 2262 if (refcnt->ir_refcnt == 0) { 2263 mutex_exit(&refcnt->ir_mutex); 2264 (*cb_func)(refcnt->ir_referenced_obj); 2265 return; 2266 } 2267 mutex_exit(&refcnt->ir_mutex); 2268 } 2269 2270 void 2271 idm_conn_hold(idm_conn_t *ic) 2272 { 2273 idm_refcnt_hold(&ic->ic_refcnt); 2274 } 2275 2276 void 2277 idm_conn_rele(idm_conn_t *ic) 2278 { 2279 idm_refcnt_rele(&ic->ic_refcnt); 2280 } 2281 2282 void 2283 idm_conn_set_target_name(idm_conn_t *ic, char *target_name) 2284 { 2285 (void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1); 2286 } 2287 2288 void 2289 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name) 2290 { 2291 (void) strlcpy(ic->ic_initiator_name, initiator_name, 2292 ISCSI_MAX_NAME_LEN + 1); 2293 } 2294 2295 void 2296 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN]) 2297 { 2298 (void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1, 2299 "%02x%02x%02x%02x%02x%02x", 2300 isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]); 2301 } 2302 2303 static int 2304 _idm_init(void) 2305 { 2306 /* Initialize the rwlock for the taskid table */ 2307 rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL); 2308 2309 /* Initialize the global mutex and taskq */ 2310 mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL); 2311 2312 cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL); 2313 cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL); 2314 2315 /* 2316 * The maximum allocation needs to be high here since there can be 2317 * many concurrent tasks using the global taskq. 2318 */ 2319 idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri, 2320 128, 16384, TASKQ_PREPOPULATE); 2321 if (idm.idm_global_taskq == NULL) { 2322 cv_destroy(&idm.idm_wd_cv); 2323 cv_destroy(&idm.idm_tgt_svc_cv); 2324 mutex_destroy(&idm.idm_global_mutex); 2325 rw_destroy(&idm.idm_taskid_table_lock); 2326 return (ENOMEM); 2327 } 2328 2329 /* Start watchdog thread */ 2330 idm.idm_wd_thread = thread_create(NULL, 0, 2331 idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri); 2332 if (idm.idm_wd_thread == NULL) { 2333 /* Couldn't create the watchdog thread */ 2334 taskq_destroy(idm.idm_global_taskq); 2335 cv_destroy(&idm.idm_wd_cv); 2336 cv_destroy(&idm.idm_tgt_svc_cv); 2337 mutex_destroy(&idm.idm_global_mutex); 2338 rw_destroy(&idm.idm_taskid_table_lock); 2339 return (ENOMEM); 2340 } 2341 2342 /* Pause until the watchdog thread is running */ 2343 mutex_enter(&idm.idm_global_mutex); 2344 while (!idm.idm_wd_thread_running) 2345 cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex); 2346 mutex_exit(&idm.idm_global_mutex); 2347 2348 /* 2349 * Allocate the task ID table and set "next" to 0. 2350 */ 2351 2352 idm.idm_taskid_max = idm_max_taskids; 2353 idm.idm_taskid_table = (idm_task_t **) 2354 kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP); 2355 idm.idm_taskid_next = 0; 2356 2357 /* Create the global buffer and task kmem caches */ 2358 idm.idm_buf_cache = kmem_cache_create("idm_buf_cache", 2359 sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP); 2360 2361 /* 2362 * Note, we're explicitly allocating an additional iSER header- 2363 * sized chunk for each of these elements. See idm_task_constructor(). 2364 */ 2365 idm.idm_task_cache = kmem_cache_create("idm_task_cache", 2366 sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8, 2367 &idm_task_constructor, &idm_task_destructor, 2368 NULL, NULL, NULL, KM_SLEEP); 2369 2370 /* Create the service and connection context lists */ 2371 list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t), 2372 offsetof(idm_svc_t, is_list_node)); 2373 list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t), 2374 offsetof(idm_conn_t, ic_list_node)); 2375 list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t), 2376 offsetof(idm_conn_t, ic_list_node)); 2377 2378 /* Initialize the native sockets transport */ 2379 idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]); 2380 2381 /* Create connection ID pool */ 2382 (void) idm_idpool_create(&idm.idm_conn_id_pool); 2383 2384 return (DDI_SUCCESS); 2385 } 2386 2387 static int 2388 _idm_fini(void) 2389 { 2390 if (!list_is_empty(&idm.idm_ini_conn_list) || 2391 !list_is_empty(&idm.idm_tgt_conn_list) || 2392 !list_is_empty(&idm.idm_tgt_svc_list)) { 2393 return (EBUSY); 2394 } 2395 2396 mutex_enter(&idm.idm_global_mutex); 2397 idm.idm_wd_thread_running = B_FALSE; 2398 cv_signal(&idm.idm_wd_cv); 2399 mutex_exit(&idm.idm_global_mutex); 2400 2401 thread_join(idm.idm_wd_thread_did); 2402 2403 idm_idpool_destroy(&idm.idm_conn_id_pool); 2404 2405 /* Close any LDI handles we have open on transport drivers */ 2406 mutex_enter(&idm.idm_global_mutex); 2407 idm_transport_teardown(); 2408 mutex_exit(&idm.idm_global_mutex); 2409 2410 /* Teardown the native sockets transport */ 2411 idm_so_fini(); 2412 2413 list_destroy(&idm.idm_ini_conn_list); 2414 list_destroy(&idm.idm_tgt_conn_list); 2415 list_destroy(&idm.idm_tgt_svc_list); 2416 kmem_cache_destroy(idm.idm_task_cache); 2417 kmem_cache_destroy(idm.idm_buf_cache); 2418 kmem_free(idm.idm_taskid_table, 2419 idm.idm_taskid_max * sizeof (idm_task_t *)); 2420 mutex_destroy(&idm.idm_global_mutex); 2421 cv_destroy(&idm.idm_wd_cv); 2422 cv_destroy(&idm.idm_tgt_svc_cv); 2423 rw_destroy(&idm.idm_taskid_table_lock); 2424 2425 return (0); 2426 } 2427