1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cpuvar.h> 27 #include <sys/conf.h> 28 #include <sys/file.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/modctl.h> 32 33 #include <sys/socket.h> 34 #include <sys/strsubr.h> 35 #include <sys/sysmacros.h> 36 37 #include <sys/socketvar.h> 38 #include <netinet/in.h> 39 40 #include <sys/idm/idm.h> 41 #include <sys/idm/idm_so.h> 42 43 #define IDM_NAME_VERSION "iSCSI Data Mover" 44 45 extern struct mod_ops mod_miscops; 46 extern struct mod_ops mod_miscops; 47 48 static struct modlmisc modlmisc = { 49 &mod_miscops, /* Type of module */ 50 IDM_NAME_VERSION 51 }; 52 53 static struct modlinkage modlinkage = { 54 MODREV_1, (void *)&modlmisc, NULL 55 }; 56 57 extern int idm_task_compare(const void *t1, const void *t2); 58 extern void idm_wd_thread(void *arg); 59 60 static int _idm_init(void); 61 static int _idm_fini(void); 62 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf); 63 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf); 64 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf); 65 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf); 66 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, 67 idm_abort_type_t abort_type); 68 static void idm_task_aborted(idm_task_t *idt, idm_status_t status); 69 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, 70 int sleepflag); 71 72 boolean_t idm_conn_logging = 0; 73 boolean_t idm_svc_logging = 0; 74 #ifdef DEBUG 75 boolean_t idm_pattern_checking = 1; 76 #else 77 boolean_t idm_pattern_checking = 0; 78 #endif 79 80 /* 81 * Potential tuneable for the maximum number of tasks. Default to 82 * IDM_TASKIDS_MAX 83 */ 84 85 uint32_t idm_max_taskids = IDM_TASKIDS_MAX; 86 87 /* 88 * Global list of transport handles 89 * These are listed in preferential order, so we can simply take the 90 * first "it_conn_is_capable" hit. Note also that the order maps to 91 * the order of the idm_transport_type_t list. 92 */ 93 idm_transport_t idm_transport_list[] = { 94 95 /* iSER on InfiniBand transport handle */ 96 {IDM_TRANSPORT_TYPE_ISER, /* type */ 97 "/devices/ib/iser@0:iser", /* device path */ 98 NULL, /* LDI handle */ 99 NULL, /* transport ops */ 100 NULL}, /* transport caps */ 101 102 /* IDM native sockets transport handle */ 103 {IDM_TRANSPORT_TYPE_SOCKETS, /* type */ 104 NULL, /* device path */ 105 NULL, /* LDI handle */ 106 NULL, /* transport ops */ 107 NULL} /* transport caps */ 108 109 }; 110 111 int 112 _init(void) 113 { 114 int rc; 115 116 if ((rc = _idm_init()) != 0) { 117 return (rc); 118 } 119 120 return (mod_install(&modlinkage)); 121 } 122 123 int 124 _fini(void) 125 { 126 int rc; 127 128 if ((rc = _idm_fini()) != 0) { 129 return (rc); 130 } 131 132 if ((rc = mod_remove(&modlinkage)) != 0) { 133 return (rc); 134 } 135 136 return (rc); 137 } 138 139 int 140 _info(struct modinfo *modinfop) 141 { 142 return (mod_info(&modlinkage, modinfop)); 143 } 144 145 /* 146 * idm_transport_register() 147 * 148 * Provides a mechanism for an IDM transport driver to register its 149 * transport ops and caps with the IDM kernel module. Invoked during 150 * a transport driver's attach routine. 151 */ 152 idm_status_t 153 idm_transport_register(idm_transport_attr_t *attr) 154 { 155 ASSERT(attr->it_ops != NULL); 156 ASSERT(attr->it_caps != NULL); 157 158 switch (attr->type) { 159 /* All known non-native transports here; for now, iSER */ 160 case IDM_TRANSPORT_TYPE_ISER: 161 idm_transport_list[attr->type].it_ops = attr->it_ops; 162 idm_transport_list[attr->type].it_caps = attr->it_caps; 163 return (IDM_STATUS_SUCCESS); 164 165 default: 166 cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in " 167 "idm_transport_register", attr->type); 168 return (IDM_STATUS_SUCCESS); 169 } 170 } 171 172 /* 173 * idm_ini_conn_create 174 * 175 * This function is invoked by the iSCSI layer to create a connection context. 176 * This does not actually establish the socket connection. 177 * 178 * cr - Connection request parameters 179 * new_con - Output parameter that contains the new request if successful 180 * 181 */ 182 idm_status_t 183 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con) 184 { 185 idm_transport_t *it; 186 idm_conn_t *ic; 187 int rc; 188 189 it = idm_transport_lookup(cr); 190 191 retry: 192 ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type, 193 &cr->icr_conn_ops); 194 195 bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr, 196 sizeof (cr->cr_ini_dst_addr)); 197 198 /* create the transport-specific connection components */ 199 rc = it->it_ops->it_ini_conn_create(cr, ic); 200 if (rc != IDM_STATUS_SUCCESS) { 201 /* cleanup the failed connection */ 202 idm_conn_destroy_common(ic); 203 204 /* 205 * It is possible for an IB client to connect to 206 * an ethernet-only client via an IB-eth gateway. 207 * Therefore, if we are attempting to use iSER and 208 * fail, retry with sockets before ultimately 209 * failing the connection. 210 */ 211 if (it->it_type == IDM_TRANSPORT_TYPE_ISER) { 212 it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]; 213 goto retry; 214 } 215 216 return (IDM_STATUS_FAIL); 217 } 218 219 *new_con = ic; 220 221 mutex_enter(&idm.idm_global_mutex); 222 list_insert_tail(&idm.idm_ini_conn_list, ic); 223 mutex_exit(&idm.idm_global_mutex); 224 225 return (IDM_STATUS_SUCCESS); 226 } 227 228 /* 229 * idm_ini_conn_destroy 230 * 231 * Releases any resources associated with the connection. This is the 232 * complement to idm_ini_conn_create. 233 * ic - idm_conn_t structure representing the relevant connection 234 * 235 */ 236 void 237 idm_ini_conn_destroy_task(void *ic_void) 238 { 239 idm_conn_t *ic = ic_void; 240 241 ic->ic_transport_ops->it_ini_conn_destroy(ic); 242 idm_conn_destroy_common(ic); 243 } 244 245 void 246 idm_ini_conn_destroy(idm_conn_t *ic) 247 { 248 /* 249 * It's reasonable for the initiator to call idm_ini_conn_destroy 250 * from within the context of the CN_CONNECT_DESTROY notification. 251 * That's a problem since we want to destroy the taskq for the 252 * state machine associated with the connection. Remove the 253 * connection from the list right away then handle the remaining 254 * work via the idm_global_taskq. 255 */ 256 mutex_enter(&idm.idm_global_mutex); 257 list_remove(&idm.idm_ini_conn_list, ic); 258 mutex_exit(&idm.idm_global_mutex); 259 260 if (taskq_dispatch(idm.idm_global_taskq, 261 &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) { 262 cmn_err(CE_WARN, 263 "idm_ini_conn_destroy: Couldn't dispatch task"); 264 } 265 } 266 267 /* 268 * idm_ini_conn_connect 269 * 270 * Establish connection to the remote system identified in idm_conn_t. 271 * The connection parameters including the remote IP address were established 272 * in the call to idm_ini_conn_create. The IDM state machine will 273 * perform client notifications as necessary to prompt the initiator through 274 * the login process. IDM also keeps a timer running so that if the login 275 * process doesn't complete in a timely manner it will fail. 276 * 277 * ic - idm_conn_t structure representing the relevant connection 278 * 279 * Returns success if the connection was established, otherwise some kind 280 * of meaningful error code. 281 * 282 * Upon return the login has either failed or is loggin in (ffp) 283 */ 284 idm_status_t 285 idm_ini_conn_connect(idm_conn_t *ic) 286 { 287 idm_status_t rc; 288 289 rc = idm_conn_sm_init(ic); 290 if (rc != IDM_STATUS_SUCCESS) { 291 return (ic->ic_conn_sm_status); 292 } 293 294 /* Hold connection until we return */ 295 idm_conn_hold(ic); 296 297 /* Kick state machine */ 298 idm_conn_event(ic, CE_CONNECT_REQ, NULL); 299 300 /* Wait for login flag */ 301 mutex_enter(&ic->ic_state_mutex); 302 while (!(ic->ic_state_flags & CF_LOGIN_READY) && 303 !(ic->ic_state_flags & CF_ERROR)) { 304 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); 305 } 306 307 /* 308 * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to 309 * idm_notify_client has already been generated by the idm conn 310 * state machine. If connection fails any time after this 311 * check, we will detect it in iscsi_login. 312 */ 313 if (ic->ic_state_flags & CF_ERROR) { 314 rc = ic->ic_conn_sm_status; 315 } 316 mutex_exit(&ic->ic_state_mutex); 317 idm_conn_rele(ic); 318 319 return (rc); 320 } 321 322 /* 323 * idm_ini_conn_disconnect 324 * 325 * Forces a connection (previously established using idm_ini_conn_connect) 326 * to perform a controlled shutdown, cleaning up any outstanding requests. 327 * 328 * ic - idm_conn_t structure representing the relevant connection 329 * 330 * This is asynchronous and will return before the connection is properly 331 * shutdown 332 */ 333 /* ARGSUSED */ 334 void 335 idm_ini_conn_disconnect(idm_conn_t *ic) 336 { 337 idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL); 338 } 339 340 /* 341 * idm_ini_conn_disconnect_wait 342 * 343 * Forces a connection (previously established using idm_ini_conn_connect) 344 * to perform a controlled shutdown. Blocks until the connection is 345 * disconnected. 346 * 347 * ic - idm_conn_t structure representing the relevant connection 348 */ 349 /* ARGSUSED */ 350 void 351 idm_ini_conn_disconnect_sync(idm_conn_t *ic) 352 { 353 mutex_enter(&ic->ic_state_mutex); 354 if ((ic->ic_state != CS_S9_INIT_ERROR) && 355 (ic->ic_state != CS_S11_COMPLETE)) { 356 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE); 357 while ((ic->ic_state != CS_S9_INIT_ERROR) && 358 (ic->ic_state != CS_S11_COMPLETE)) 359 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); 360 } 361 mutex_exit(&ic->ic_state_mutex); 362 } 363 364 /* 365 * idm_tgt_svc_create 366 * 367 * The target calls this service to obtain a service context for each available 368 * transport, starting a service of each type related to the IP address and port 369 * passed. The idm_svc_req_t contains the service parameters. 370 */ 371 idm_status_t 372 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc) 373 { 374 idm_transport_type_t type; 375 idm_transport_t *it; 376 idm_svc_t *is; 377 int rc; 378 379 *new_svc = NULL; 380 is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP); 381 382 /* Initialize transport-agnostic components of the service handle */ 383 is->is_svc_req = *sr; 384 mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL); 385 cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL); 386 mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL); 387 cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL); 388 idm_refcnt_init(&is->is_refcnt, is); 389 390 /* 391 * Make sure all available transports are setup. We call this now 392 * instead of at initialization time in case IB has become available 393 * since we started (hotplug, etc). 394 */ 395 idm_transport_setup(sr->sr_li); 396 397 /* 398 * Loop through the transports, configuring the transport-specific 399 * components of each one. 400 */ 401 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 402 403 it = &idm_transport_list[type]; 404 /* 405 * If it_ops is NULL then the transport is unconfigured 406 * and we shouldn't try to start the service. 407 */ 408 if (it->it_ops == NULL) { 409 continue; 410 } 411 412 rc = it->it_ops->it_tgt_svc_create(sr, is); 413 if (rc != IDM_STATUS_SUCCESS) { 414 /* Teardown any configured services */ 415 while (type--) { 416 it = &idm_transport_list[type]; 417 if (it->it_ops == NULL) { 418 continue; 419 } 420 it->it_ops->it_tgt_svc_destroy(is); 421 } 422 /* Free the svc context and return */ 423 kmem_free(is, sizeof (idm_svc_t)); 424 return (rc); 425 } 426 } 427 428 *new_svc = is; 429 430 mutex_enter(&idm.idm_global_mutex); 431 list_insert_tail(&idm.idm_tgt_svc_list, is); 432 mutex_exit(&idm.idm_global_mutex); 433 434 return (IDM_STATUS_SUCCESS); 435 } 436 437 /* 438 * idm_tgt_svc_destroy 439 * 440 * is - idm_svc_t returned by the call to idm_tgt_svc_create 441 * 442 * Cleanup any resources associated with the idm_svc_t. 443 */ 444 void 445 idm_tgt_svc_destroy(idm_svc_t *is) 446 { 447 idm_transport_type_t type; 448 idm_transport_t *it; 449 450 mutex_enter(&idm.idm_global_mutex); 451 /* remove this service from the global list */ 452 list_remove(&idm.idm_tgt_svc_list, is); 453 /* wakeup any waiters for service change */ 454 cv_broadcast(&idm.idm_tgt_svc_cv); 455 mutex_exit(&idm.idm_global_mutex); 456 457 /* teardown each transport-specific service */ 458 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 459 it = &idm_transport_list[type]; 460 if (it->it_ops == NULL) { 461 continue; 462 } 463 464 it->it_ops->it_tgt_svc_destroy(is); 465 } 466 467 /* tear down the svc resources */ 468 idm_refcnt_destroy(&is->is_refcnt); 469 cv_destroy(&is->is_count_cv); 470 mutex_destroy(&is->is_count_mutex); 471 cv_destroy(&is->is_cv); 472 mutex_destroy(&is->is_mutex); 473 474 /* free the svc handle */ 475 kmem_free(is, sizeof (idm_svc_t)); 476 } 477 478 void 479 idm_tgt_svc_hold(idm_svc_t *is) 480 { 481 idm_refcnt_hold(&is->is_refcnt); 482 } 483 484 void 485 idm_tgt_svc_rele_and_destroy(idm_svc_t *is) 486 { 487 idm_refcnt_rele_and_destroy(&is->is_refcnt, 488 (idm_refcnt_cb_t *)&idm_tgt_svc_destroy); 489 } 490 491 /* 492 * idm_tgt_svc_online 493 * 494 * is - idm_svc_t returned by the call to idm_tgt_svc_create 495 * 496 * Online each transport service, as we want this target to be accessible 497 * via any configured transport. 498 * 499 * When the initiator establishes a new connection to the target, IDM will 500 * call the "new connect" callback defined in the idm_svc_req_t structure 501 * and it will pass an idm_conn_t structure representing that new connection. 502 */ 503 idm_status_t 504 idm_tgt_svc_online(idm_svc_t *is) 505 { 506 507 idm_transport_type_t type, last_type; 508 idm_transport_t *it; 509 int rc = IDM_STATUS_SUCCESS; 510 511 mutex_enter(&is->is_mutex); 512 if (is->is_online == 0) { 513 /* Walk through each of the transports and online them */ 514 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 515 it = &idm_transport_list[type]; 516 if (it->it_ops == NULL) { 517 /* transport is not registered */ 518 continue; 519 } 520 521 mutex_exit(&is->is_mutex); 522 rc = it->it_ops->it_tgt_svc_online(is); 523 mutex_enter(&is->is_mutex); 524 if (rc != IDM_STATUS_SUCCESS) { 525 last_type = type; 526 break; 527 } 528 } 529 if (rc != IDM_STATUS_SUCCESS) { 530 /* 531 * The last transport failed to online. 532 * Offline any transport onlined above and 533 * do not online the target. 534 */ 535 for (type = 0; type < last_type; type++) { 536 it = &idm_transport_list[type]; 537 if (it->it_ops == NULL) { 538 /* transport is not registered */ 539 continue; 540 } 541 542 mutex_exit(&is->is_mutex); 543 it->it_ops->it_tgt_svc_offline(is); 544 mutex_enter(&is->is_mutex); 545 } 546 } else { 547 /* Target service now online */ 548 is->is_online = 1; 549 } 550 } else { 551 /* Target service already online, just bump the count */ 552 is->is_online++; 553 } 554 mutex_exit(&is->is_mutex); 555 556 return (rc); 557 } 558 559 /* 560 * idm_tgt_svc_offline 561 * 562 * is - idm_svc_t returned by the call to idm_tgt_svc_create 563 * 564 * Shutdown any online target services. 565 */ 566 void 567 idm_tgt_svc_offline(idm_svc_t *is) 568 { 569 idm_transport_type_t type; 570 idm_transport_t *it; 571 572 mutex_enter(&is->is_mutex); 573 is->is_online--; 574 if (is->is_online == 0) { 575 /* Walk through each of the transports and offline them */ 576 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 577 it = &idm_transport_list[type]; 578 if (it->it_ops == NULL) { 579 /* transport is not registered */ 580 continue; 581 } 582 583 mutex_exit(&is->is_mutex); 584 it->it_ops->it_tgt_svc_offline(is); 585 mutex_enter(&is->is_mutex); 586 } 587 } 588 mutex_exit(&is->is_mutex); 589 } 590 591 /* 592 * idm_tgt_svc_lookup 593 * 594 * Lookup a service instance listening on the specified port 595 */ 596 597 idm_svc_t * 598 idm_tgt_svc_lookup(uint16_t port) 599 { 600 idm_svc_t *result; 601 602 retry: 603 mutex_enter(&idm.idm_global_mutex); 604 for (result = list_head(&idm.idm_tgt_svc_list); 605 result != NULL; 606 result = list_next(&idm.idm_tgt_svc_list, result)) { 607 if (result->is_svc_req.sr_port == port) { 608 if (result->is_online == 0) { 609 /* 610 * A service exists on this port, but it 611 * is going away, wait for it to cleanup. 612 */ 613 cv_wait(&idm.idm_tgt_svc_cv, 614 &idm.idm_global_mutex); 615 mutex_exit(&idm.idm_global_mutex); 616 goto retry; 617 } 618 idm_tgt_svc_hold(result); 619 mutex_exit(&idm.idm_global_mutex); 620 return (result); 621 } 622 } 623 mutex_exit(&idm.idm_global_mutex); 624 625 return (NULL); 626 } 627 628 /* 629 * idm_negotiate_key_values() 630 * Give IDM level a chance to negotiate any login parameters it should own. 631 * -- leave unhandled parameters alone on request_nvl 632 * -- move all handled parameters to response_nvl with an appropriate response 633 * -- also add an entry to negotiated_nvl for any accepted parameters 634 */ 635 kv_status_t 636 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl, 637 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 638 { 639 ASSERT(ic->ic_transport_ops != NULL); 640 return (ic->ic_transport_ops->it_negotiate_key_values(ic, 641 request_nvl, response_nvl, negotiated_nvl)); 642 } 643 644 /* 645 * idm_notice_key_values() 646 * Activate at the IDM level any parameters that have been negotiated. 647 * Passes the set of key value pairs to the transport for activation. 648 * This will be invoked as the connection is entering full-feature mode. 649 */ 650 void 651 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl) 652 { 653 ASSERT(ic->ic_transport_ops != NULL); 654 ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl); 655 } 656 657 /* 658 * idm_declare_key_values() 659 * Activate an operational set of declarative parameters from the config_nvl, 660 * and return the selected values in the outgoing_nvl. 661 */ 662 kv_status_t 663 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl, 664 nvlist_t *outgoing_nvl) 665 { 666 ASSERT(ic->ic_transport_ops != NULL); 667 return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl, 668 outgoing_nvl)); 669 } 670 671 /* 672 * idm_buf_tx_to_ini 673 * 674 * This is IDM's implementation of the 'Put_Data' operational primitive. 675 * 676 * This function is invoked by a target iSCSI layer to request its local 677 * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer 678 * on the remote iSCSI node. The I/O buffer represented by 'idb' is 679 * transferred to the initiator associated with task 'idt'. The connection 680 * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS, 681 * and the callback (idb->idb_buf_cb) at transfer completion are 682 * provided as input. 683 * 684 * This data transfer takes place transparently to the remote iSCSI layer, 685 * i.e. without its participation. 686 * 687 * Using sockets, IDM implements the data transfer by segmenting the data 688 * buffer into appropriately sized iSCSI PDUs and transmitting them to the 689 * initiator. iSER performs the transfer using RDMA write. 690 * 691 */ 692 idm_status_t 693 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb, 694 uint32_t offset, uint32_t xfer_len, 695 idm_buf_cb_t idb_buf_cb, void *cb_arg) 696 { 697 idm_status_t rc; 698 699 idb->idb_bufoffset = offset; 700 idb->idb_xfer_len = xfer_len; 701 idb->idb_buf_cb = idb_buf_cb; 702 idb->idb_cb_arg = cb_arg; 703 gethrestime(&idb->idb_xfer_start); 704 705 /* 706 * Buffer should not contain the pattern. If the pattern is 707 * present then we've been asked to transmit initialized data 708 */ 709 IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT); 710 711 mutex_enter(&idt->idt_mutex); 712 switch (idt->idt_state) { 713 case TASK_ACTIVE: 714 idt->idt_tx_to_ini_start++; 715 idm_task_hold(idt); 716 idm_buf_bind_in_locked(idt, idb); 717 idb->idb_in_transport = B_TRUE; 718 rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini) 719 (idt, idb); 720 return (rc); 721 722 case TASK_SUSPENDING: 723 case TASK_SUSPENDED: 724 /* 725 * Bind buffer but don't start a transfer since the task 726 * is suspended 727 */ 728 idm_buf_bind_in_locked(idt, idb); 729 mutex_exit(&idt->idt_mutex); 730 return (IDM_STATUS_SUCCESS); 731 732 case TASK_ABORTING: 733 case TASK_ABORTED: 734 /* 735 * Once the task is aborted, any buffers added to the 736 * idt_inbufv will never get cleaned up, so just return 737 * SUCCESS. The buffer should get cleaned up by the 738 * client or framework once task_aborted has completed. 739 */ 740 mutex_exit(&idt->idt_mutex); 741 return (IDM_STATUS_SUCCESS); 742 743 default: 744 ASSERT(0); 745 break; 746 } 747 mutex_exit(&idt->idt_mutex); 748 749 return (IDM_STATUS_FAIL); 750 } 751 752 /* 753 * idm_buf_rx_from_ini 754 * 755 * This is IDM's implementation of the 'Get_Data' operational primitive. 756 * 757 * This function is invoked by a target iSCSI layer to request its local 758 * Datamover layer to retrieve certain data identified by the R2T PDU from the 759 * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be 760 * mapped to the respective buffer by the task tags (ITT & TTT). 761 * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and 762 * the callback (idb->idb_buf_cb) notification for data transfer completion are 763 * are provided as input. 764 * 765 * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local 766 * Datamover layer, the local and remote Datamover layers transparently bring 767 * about the data transfer requested by the R2T PDU, without the participation 768 * of the iSCSI layers. 769 * 770 * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out() 771 * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read. 772 * 773 */ 774 idm_status_t 775 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb, 776 uint32_t offset, uint32_t xfer_len, 777 idm_buf_cb_t idb_buf_cb, void *cb_arg) 778 { 779 idm_status_t rc; 780 781 idb->idb_bufoffset = offset; 782 idb->idb_xfer_len = xfer_len; 783 idb->idb_buf_cb = idb_buf_cb; 784 idb->idb_cb_arg = cb_arg; 785 gethrestime(&idb->idb_xfer_start); 786 787 /* 788 * "In" buf list is for "Data In" PDU's, "Out" buf list is for 789 * "Data Out" PDU's 790 */ 791 mutex_enter(&idt->idt_mutex); 792 switch (idt->idt_state) { 793 case TASK_ACTIVE: 794 idt->idt_rx_from_ini_start++; 795 idm_task_hold(idt); 796 idm_buf_bind_out_locked(idt, idb); 797 idb->idb_in_transport = B_TRUE; 798 rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini) 799 (idt, idb); 800 return (rc); 801 case TASK_SUSPENDING: 802 case TASK_SUSPENDED: 803 case TASK_ABORTING: 804 case TASK_ABORTED: 805 /* 806 * Bind buffer but don't start a transfer since the task 807 * is suspended 808 */ 809 idm_buf_bind_out_locked(idt, idb); 810 mutex_exit(&idt->idt_mutex); 811 return (IDM_STATUS_SUCCESS); 812 default: 813 ASSERT(0); 814 break; 815 } 816 mutex_exit(&idt->idt_mutex); 817 818 return (IDM_STATUS_FAIL); 819 } 820 821 /* 822 * idm_buf_tx_to_ini_done 823 * 824 * The transport calls this after it has completed a transfer requested by 825 * a call to transport_buf_tx_to_ini 826 * 827 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning. 828 * idt may be freed after the call to idb->idb_buf_cb. 829 */ 830 void 831 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status) 832 { 833 ASSERT(mutex_owned(&idt->idt_mutex)); 834 idb->idb_in_transport = B_FALSE; 835 idb->idb_tx_thread = B_FALSE; 836 idt->idt_tx_to_ini_done++; 837 gethrestime(&idb->idb_xfer_done); 838 839 /* 840 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or 841 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes 842 * to 0. 843 */ 844 idm_task_rele(idt); 845 idb->idb_status = status; 846 847 switch (idt->idt_state) { 848 case TASK_ACTIVE: 849 idt->idt_ic->ic_timestamp = ddi_get_lbolt(); 850 idm_buf_unbind_in_locked(idt, idb); 851 mutex_exit(&idt->idt_mutex); 852 (*idb->idb_buf_cb)(idb, status); 853 return; 854 case TASK_SUSPENDING: 855 case TASK_SUSPENDED: 856 case TASK_ABORTING: 857 case TASK_ABORTED: 858 /* 859 * To keep things simple we will ignore the case where the 860 * transfer was successful and leave all buffers bound to the 861 * task. This allows us to also ignore the case where we've 862 * been asked to abort a task but the last transfer of the 863 * task has completed. IDM has no idea whether this was, in 864 * fact, the last transfer of the task so it would be difficult 865 * to handle this case. Everything should get sorted out again 866 * after task reassignment is complete. 867 * 868 * In the case of TASK_ABORTING we could conceivably call the 869 * buffer callback here but the timing of when the client's 870 * client_task_aborted callback is invoked vs. when the client's 871 * buffer callback gets invoked gets sticky. We don't want 872 * the client to here from us again after the call to 873 * client_task_aborted() but we don't want to give it a bunch 874 * of failed buffer transfers until we've called 875 * client_task_aborted(). Instead we'll just leave all the 876 * buffers bound and allow the client to cleanup. 877 */ 878 break; 879 default: 880 ASSERT(0); 881 } 882 mutex_exit(&idt->idt_mutex); 883 } 884 885 /* 886 * idm_buf_rx_from_ini_done 887 * 888 * The transport calls this after it has completed a transfer requested by 889 * a call totransport_buf_tx_to_ini 890 * 891 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning. 892 * idt may be freed after the call to idb->idb_buf_cb. 893 */ 894 void 895 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status) 896 { 897 ASSERT(mutex_owned(&idt->idt_mutex)); 898 idb->idb_in_transport = B_FALSE; 899 idt->idt_rx_from_ini_done++; 900 gethrestime(&idb->idb_xfer_done); 901 902 /* 903 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or 904 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes 905 * to 0. 906 */ 907 idm_task_rele(idt); 908 idb->idb_status = status; 909 910 if (status == IDM_STATUS_SUCCESS) { 911 /* 912 * Buffer should not contain the pattern. If it does then 913 * we did not get the data from the remote host. 914 */ 915 IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT); 916 } 917 918 switch (idt->idt_state) { 919 case TASK_ACTIVE: 920 idt->idt_ic->ic_timestamp = ddi_get_lbolt(); 921 idm_buf_unbind_out_locked(idt, idb); 922 mutex_exit(&idt->idt_mutex); 923 (*idb->idb_buf_cb)(idb, status); 924 return; 925 case TASK_SUSPENDING: 926 case TASK_SUSPENDED: 927 case TASK_ABORTING: 928 case TASK_ABORTED: 929 /* 930 * To keep things simple we will ignore the case where the 931 * transfer was successful and leave all buffers bound to the 932 * task. This allows us to also ignore the case where we've 933 * been asked to abort a task but the last transfer of the 934 * task has completed. IDM has no idea whether this was, in 935 * fact, the last transfer of the task so it would be difficult 936 * to handle this case. Everything should get sorted out again 937 * after task reassignment is complete. 938 * 939 * In the case of TASK_ABORTING we could conceivably call the 940 * buffer callback here but the timing of when the client's 941 * client_task_aborted callback is invoked vs. when the client's 942 * buffer callback gets invoked gets sticky. We don't want 943 * the client to here from us again after the call to 944 * client_task_aborted() but we don't want to give it a bunch 945 * of failed buffer transfers until we've called 946 * client_task_aborted(). Instead we'll just leave all the 947 * buffers bound and allow the client to cleanup. 948 */ 949 break; 950 default: 951 ASSERT(0); 952 } 953 mutex_exit(&idt->idt_mutex); 954 } 955 956 /* 957 * idm_buf_alloc 958 * 959 * Allocates a buffer handle and registers it for use with the transport 960 * layer. If a buffer is not passed on bufptr, the buffer will be allocated 961 * as well as the handle. 962 * 963 * ic - connection on which the buffer will be transferred 964 * bufptr - allocate memory for buffer if NULL, else assign to buffer 965 * buflen - length of buffer 966 * 967 * Returns idm_buf_t handle if successful, otherwise NULL 968 */ 969 idm_buf_t * 970 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen) 971 { 972 idm_buf_t *buf = NULL; 973 int rc; 974 975 ASSERT(ic != NULL); 976 ASSERT(idm.idm_buf_cache != NULL); 977 ASSERT(buflen > 0); 978 979 /* Don't allocate new buffers if we are not in FFP */ 980 mutex_enter(&ic->ic_state_mutex); 981 if (!ic->ic_ffp) { 982 mutex_exit(&ic->ic_state_mutex); 983 return (NULL); 984 } 985 986 987 idm_conn_hold(ic); 988 mutex_exit(&ic->ic_state_mutex); 989 990 buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP); 991 if (buf == NULL) { 992 idm_conn_rele(ic); 993 return (NULL); 994 } 995 996 buf->idb_ic = ic; 997 buf->idb_buflen = buflen; 998 buf->idb_exp_offset = 0; 999 buf->idb_bufoffset = 0; 1000 buf->idb_xfer_len = 0; 1001 buf->idb_magic = IDM_BUF_MAGIC; 1002 buf->idb_in_transport = B_FALSE; 1003 buf->idb_bufbcopy = B_FALSE; 1004 1005 /* 1006 * If bufptr is NULL, we have an implicit request to allocate 1007 * memory for this IDM buffer handle and register it for use 1008 * with the transport. To simplify this, and to give more freedom 1009 * to the transport layer for it's own buffer management, both of 1010 * these actions will take place in the transport layer. 1011 * If bufptr is set, then the caller has allocated memory (or more 1012 * likely it's been passed from an upper layer), and we need only 1013 * register the buffer for use with the transport layer. 1014 */ 1015 if (bufptr == NULL) { 1016 /* 1017 * Allocate a buffer from the transport layer (which 1018 * will also register the buffer for use). 1019 */ 1020 rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen); 1021 if (rc != 0) { 1022 idm_conn_rele(ic); 1023 kmem_cache_free(idm.idm_buf_cache, buf); 1024 return (NULL); 1025 } 1026 /* Set the bufalloc'd flag */ 1027 buf->idb_bufalloc = B_TRUE; 1028 } else { 1029 /* 1030 * For large transfers, Set the passed bufptr into 1031 * the buf handle, and register the handle with the 1032 * transport layer. As memory registration with the 1033 * transport layer is a time/cpu intensive operation, 1034 * for small transfers (up to a pre-defined bcopy 1035 * threshold), use pre-registered memory buffers 1036 * and bcopy data at the appropriate time. 1037 */ 1038 buf->idb_buf = bufptr; 1039 1040 rc = ic->ic_transport_ops->it_buf_setup(buf); 1041 if (rc != 0) { 1042 idm_conn_rele(ic); 1043 kmem_cache_free(idm.idm_buf_cache, buf); 1044 return (NULL); 1045 } 1046 /* 1047 * The transport layer is now expected to set the idb_bufalloc 1048 * correctly to indicate if resources have been allocated. 1049 */ 1050 } 1051 1052 IDM_BUFPAT_SET(buf); 1053 1054 return (buf); 1055 } 1056 1057 /* 1058 * idm_buf_free 1059 * 1060 * Release a buffer handle along with the associated buffer that was allocated 1061 * or assigned with idm_buf_alloc 1062 */ 1063 void 1064 idm_buf_free(idm_buf_t *buf) 1065 { 1066 idm_conn_t *ic = buf->idb_ic; 1067 1068 1069 buf->idb_task_binding = NULL; 1070 1071 if (buf->idb_bufalloc) { 1072 ic->ic_transport_ops->it_buf_free(buf); 1073 } else { 1074 ic->ic_transport_ops->it_buf_teardown(buf); 1075 } 1076 kmem_cache_free(idm.idm_buf_cache, buf); 1077 idm_conn_rele(ic); 1078 } 1079 1080 /* 1081 * idm_buf_bind_in 1082 * 1083 * This function associates a buffer with a task. This is only for use by the 1084 * iSCSI initiator that will have only one buffer per transfer direction 1085 * 1086 */ 1087 void 1088 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf) 1089 { 1090 mutex_enter(&idt->idt_mutex); 1091 idm_buf_bind_in_locked(idt, buf); 1092 mutex_exit(&idt->idt_mutex); 1093 } 1094 1095 static void 1096 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf) 1097 { 1098 buf->idb_task_binding = idt; 1099 buf->idb_ic = idt->idt_ic; 1100 idm_listbuf_insert(&idt->idt_inbufv, buf); 1101 } 1102 1103 void 1104 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf) 1105 { 1106 /* 1107 * For small transfers, the iSER transport delegates the IDM 1108 * layer to bcopy the SCSI Write data for faster IOPS. 1109 */ 1110 if (buf->idb_bufbcopy == B_TRUE) { 1111 1112 bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen); 1113 } 1114 mutex_enter(&idt->idt_mutex); 1115 idm_buf_bind_out_locked(idt, buf); 1116 mutex_exit(&idt->idt_mutex); 1117 } 1118 1119 static void 1120 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf) 1121 { 1122 buf->idb_task_binding = idt; 1123 buf->idb_ic = idt->idt_ic; 1124 idm_listbuf_insert(&idt->idt_outbufv, buf); 1125 } 1126 1127 void 1128 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf) 1129 { 1130 /* 1131 * For small transfers, the iSER transport delegates the IDM 1132 * layer to bcopy the SCSI Read data into the read buufer 1133 * for faster IOPS. 1134 */ 1135 if (buf->idb_bufbcopy == B_TRUE) { 1136 bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen); 1137 } 1138 mutex_enter(&idt->idt_mutex); 1139 idm_buf_unbind_in_locked(idt, buf); 1140 mutex_exit(&idt->idt_mutex); 1141 } 1142 1143 static void 1144 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf) 1145 { 1146 list_remove(&idt->idt_inbufv, buf); 1147 } 1148 1149 void 1150 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf) 1151 { 1152 mutex_enter(&idt->idt_mutex); 1153 idm_buf_unbind_out_locked(idt, buf); 1154 mutex_exit(&idt->idt_mutex); 1155 } 1156 1157 static void 1158 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf) 1159 { 1160 list_remove(&idt->idt_outbufv, buf); 1161 } 1162 1163 /* 1164 * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the 1165 * iSCSI PDU 1166 */ 1167 idm_buf_t * 1168 idm_buf_find(void *lbuf, size_t data_offset) 1169 { 1170 idm_buf_t *idb; 1171 list_t *lst = (list_t *)lbuf; 1172 1173 /* iterate through the list to find the buffer */ 1174 for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) { 1175 1176 ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) || 1177 (idb->idb_bufoffset == 0)); 1178 1179 if ((data_offset >= idb->idb_bufoffset) && 1180 (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) { 1181 1182 return (idb); 1183 } 1184 } 1185 1186 return (NULL); 1187 } 1188 1189 void 1190 idm_bufpat_set(idm_buf_t *idb) 1191 { 1192 idm_bufpat_t *bufpat; 1193 int len, i; 1194 1195 len = idb->idb_buflen; 1196 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t); 1197 1198 bufpat = idb->idb_buf; 1199 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) { 1200 bufpat->bufpat_idb = idb; 1201 bufpat->bufpat_bufmagic = IDM_BUF_MAGIC; 1202 bufpat->bufpat_offset = i; 1203 bufpat++; 1204 } 1205 } 1206 1207 boolean_t 1208 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type) 1209 { 1210 idm_bufpat_t *bufpat; 1211 int len, i; 1212 1213 len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len; 1214 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t); 1215 ASSERT(len <= idb->idb_buflen); 1216 bufpat = idb->idb_buf; 1217 1218 /* 1219 * Don't check the pattern in buffers that came from outside IDM 1220 * (these will be buffers from the initiator that we opted not 1221 * to double-buffer) 1222 */ 1223 if (!idb->idb_bufalloc) 1224 return (B_FALSE); 1225 1226 /* 1227 * Return true if we find the pattern anywhere in the buffer 1228 */ 1229 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) { 1230 if (BUFPAT_MATCH(bufpat, idb)) { 1231 IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: " 1232 "idb %p bufpat %p " 1233 "bufpat_idb=%p bufmagic=%08x offset=%08x", 1234 (void *)idb, (void *)bufpat, bufpat->bufpat_idb, 1235 bufpat->bufpat_bufmagic, bufpat->bufpat_offset); 1236 DTRACE_PROBE2(bufpat__pattern__found, 1237 idm_buf_t *, idb, idm_bufpat_t *, bufpat); 1238 if (type == BP_CHECK_ASSERT) { 1239 ASSERT(0); 1240 } 1241 return (B_TRUE); 1242 } 1243 bufpat++; 1244 } 1245 1246 return (B_FALSE); 1247 } 1248 1249 /* 1250 * idm_task_alloc 1251 * 1252 * This function will allocate a idm_task_t structure. A task tag is also 1253 * generated and saved in idt_tt. The task is not active. 1254 */ 1255 idm_task_t * 1256 idm_task_alloc(idm_conn_t *ic) 1257 { 1258 idm_task_t *idt; 1259 1260 ASSERT(ic != NULL); 1261 1262 /* Don't allocate new tasks if we are not in FFP */ 1263 mutex_enter(&ic->ic_state_mutex); 1264 if (!ic->ic_ffp) { 1265 mutex_exit(&ic->ic_state_mutex); 1266 return (NULL); 1267 } 1268 idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP); 1269 if (idt == NULL) { 1270 mutex_exit(&ic->ic_state_mutex); 1271 return (NULL); 1272 } 1273 1274 ASSERT(list_is_empty(&idt->idt_inbufv)); 1275 ASSERT(list_is_empty(&idt->idt_outbufv)); 1276 1277 idm_conn_hold(ic); 1278 mutex_exit(&ic->ic_state_mutex); 1279 1280 idt->idt_state = TASK_IDLE; 1281 idt->idt_ic = ic; 1282 idt->idt_private = NULL; 1283 idt->idt_exp_datasn = 0; 1284 idt->idt_exp_rttsn = 0; 1285 1286 return (idt); 1287 } 1288 1289 /* 1290 * idm_task_start 1291 * 1292 * Mark the task active and initialize some stats. The caller 1293 * sets up the idm_task_t structure with a prior call to idm_task_alloc(). 1294 * The task service does not function as a task/work engine, it is the 1295 * responsibility of the initiator to start the data transfer and free the 1296 * resources. 1297 */ 1298 void 1299 idm_task_start(idm_task_t *idt, uintptr_t handle) 1300 { 1301 ASSERT(idt != NULL); 1302 1303 /* mark the task as ACTIVE */ 1304 idt->idt_state = TASK_ACTIVE; 1305 idt->idt_client_handle = handle; 1306 idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done = 1307 idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done = 1308 idt->idt_tx_bytes = idt->idt_rx_bytes = 0; 1309 } 1310 1311 /* 1312 * idm_task_done 1313 * 1314 * This function sets the state to indicate that the task is no longer active. 1315 */ 1316 void 1317 idm_task_done(idm_task_t *idt) 1318 { 1319 ASSERT(idt != NULL); 1320 1321 mutex_enter(&idt->idt_mutex); 1322 idt->idt_state = TASK_IDLE; 1323 mutex_exit(&idt->idt_mutex); 1324 1325 /* 1326 * Although unlikely it is possible for a reference to come in after 1327 * the client has decided the task is over but before we've marked 1328 * the task idle. One specific unavoidable scenario is the case where 1329 * received PDU with the matching ITT/TTT results in a successful 1330 * lookup of this task. We are at the mercy of the remote node in 1331 * that case so we need to handle it. Now that the task state 1332 * has changed no more references will occur so a simple call to 1333 * idm_refcnt_wait_ref should deal with the situation. 1334 */ 1335 idm_refcnt_wait_ref(&idt->idt_refcnt); 1336 idm_refcnt_reset(&idt->idt_refcnt); 1337 } 1338 1339 /* 1340 * idm_task_free 1341 * 1342 * This function will free the Task Tag and the memory allocated for the task 1343 * idm_task_done should be called prior to this call 1344 */ 1345 void 1346 idm_task_free(idm_task_t *idt) 1347 { 1348 idm_conn_t *ic; 1349 1350 ASSERT(idt != NULL); 1351 ASSERT(idt->idt_refcnt.ir_refcnt == 0); 1352 ASSERT(idt->idt_state == TASK_IDLE); 1353 1354 ic = idt->idt_ic; 1355 1356 /* 1357 * It's possible for items to still be in the idt_inbufv list if 1358 * they were added after idm_task_cleanup was called. We rely on 1359 * STMF to free all buffers associated with the task however STMF 1360 * doesn't know that we have this reference to the buffers. 1361 * Use list_create so that we don't end up with stale references 1362 * to these buffers. 1363 */ 1364 list_create(&idt->idt_inbufv, sizeof (idm_buf_t), 1365 offsetof(idm_buf_t, idb_buflink)); 1366 list_create(&idt->idt_outbufv, sizeof (idm_buf_t), 1367 offsetof(idm_buf_t, idb_buflink)); 1368 1369 kmem_cache_free(idm.idm_task_cache, idt); 1370 1371 idm_conn_rele(ic); 1372 } 1373 1374 /* 1375 * idm_task_find_common 1376 * common code for idm_task_find() and idm_task_find_and_complete() 1377 */ 1378 /*ARGSUSED*/ 1379 static idm_task_t * 1380 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt, 1381 boolean_t complete) 1382 { 1383 uint32_t tt, client_handle; 1384 idm_task_t *idt; 1385 1386 /* 1387 * Must match both itt and ttt. The table is indexed by itt 1388 * for initiator connections and ttt for target connections. 1389 */ 1390 if (IDM_CONN_ISTGT(ic)) { 1391 tt = ttt; 1392 client_handle = itt; 1393 } else { 1394 tt = itt; 1395 client_handle = ttt; 1396 } 1397 1398 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1399 if (tt >= idm.idm_taskid_max) { 1400 rw_exit(&idm.idm_taskid_table_lock); 1401 return (NULL); 1402 } 1403 1404 idt = idm.idm_taskid_table[tt]; 1405 1406 if (idt != NULL) { 1407 mutex_enter(&idt->idt_mutex); 1408 if ((idt->idt_state != TASK_ACTIVE) || 1409 (idt->idt_ic != ic) || 1410 (IDM_CONN_ISTGT(ic) && 1411 (idt->idt_client_handle != client_handle))) { 1412 /* 1413 * Task doesn't match or task is aborting and 1414 * we don't want any more references. 1415 */ 1416 if ((idt->idt_ic != ic) && 1417 (idt->idt_state == TASK_ACTIVE) && 1418 (IDM_CONN_ISINI(ic) || idt->idt_client_handle == 1419 client_handle)) { 1420 IDM_CONN_LOG(CE_WARN, 1421 "idm_task_find: wrong connection %p != %p", 1422 (void *)ic, (void *)idt->idt_ic); 1423 } 1424 mutex_exit(&idt->idt_mutex); 1425 rw_exit(&idm.idm_taskid_table_lock); 1426 return (NULL); 1427 } 1428 idm_task_hold(idt); 1429 /* 1430 * Set the task state to TASK_COMPLETE so it can no longer 1431 * be found or aborted. 1432 */ 1433 if (B_TRUE == complete) 1434 idt->idt_state = TASK_COMPLETE; 1435 mutex_exit(&idt->idt_mutex); 1436 } 1437 rw_exit(&idm.idm_taskid_table_lock); 1438 1439 return (idt); 1440 } 1441 1442 /* 1443 * This function looks up a task by task tag. 1444 */ 1445 idm_task_t * 1446 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt) 1447 { 1448 return (idm_task_find_common(ic, itt, ttt, B_FALSE)); 1449 } 1450 1451 /* 1452 * This function looks up a task by task tag. If found, the task state 1453 * is atomically set to TASK_COMPLETE so it can longer be found or aborted. 1454 */ 1455 idm_task_t * 1456 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt) 1457 { 1458 return (idm_task_find_common(ic, itt, ttt, B_TRUE)); 1459 } 1460 1461 /* 1462 * idm_task_find_by_handle 1463 * 1464 * This function looks up a task by the client-private idt_client_handle. 1465 * 1466 * This function should NEVER be called in the performance path. It is 1467 * intended strictly for error recovery/task management. 1468 */ 1469 /*ARGSUSED*/ 1470 void * 1471 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle) 1472 { 1473 idm_task_t *idt = NULL; 1474 int idx = 0; 1475 1476 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1477 1478 for (idx = 0; idx < idm.idm_taskid_max; idx++) { 1479 idt = idm.idm_taskid_table[idx]; 1480 1481 if (idt == NULL) 1482 continue; 1483 1484 mutex_enter(&idt->idt_mutex); 1485 1486 if (idt->idt_state != TASK_ACTIVE) { 1487 /* 1488 * Task is either in suspend, abort, or already 1489 * complete. 1490 */ 1491 mutex_exit(&idt->idt_mutex); 1492 continue; 1493 } 1494 1495 if (idt->idt_client_handle == handle) { 1496 idm_task_hold(idt); 1497 mutex_exit(&idt->idt_mutex); 1498 break; 1499 } 1500 1501 mutex_exit(&idt->idt_mutex); 1502 } 1503 1504 rw_exit(&idm.idm_taskid_table_lock); 1505 1506 if ((idt == NULL) || (idx == idm.idm_taskid_max)) 1507 return (NULL); 1508 1509 return (idt->idt_private); 1510 } 1511 1512 void 1513 idm_task_hold(idm_task_t *idt) 1514 { 1515 idm_refcnt_hold(&idt->idt_refcnt); 1516 } 1517 1518 void 1519 idm_task_rele(idm_task_t *idt) 1520 { 1521 idm_refcnt_rele(&idt->idt_refcnt); 1522 } 1523 1524 void 1525 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) 1526 { 1527 idm_task_t *task; 1528 int idx; 1529 1530 /* 1531 * Passing NULL as the task indicates that all tasks 1532 * for this connection should be aborted. 1533 */ 1534 if (idt == NULL) { 1535 /* 1536 * Only the connection state machine should ask for 1537 * all tasks to abort and this should never happen in FFP. 1538 */ 1539 ASSERT(!ic->ic_ffp); 1540 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1541 for (idx = 0; idx < idm.idm_taskid_max; idx++) { 1542 task = idm.idm_taskid_table[idx]; 1543 if (task == NULL) 1544 continue; 1545 mutex_enter(&task->idt_mutex); 1546 if ((task->idt_state != TASK_IDLE) && 1547 (task->idt_state != TASK_COMPLETE) && 1548 (task->idt_ic == ic)) { 1549 rw_exit(&idm.idm_taskid_table_lock); 1550 idm_task_abort_one(ic, task, abort_type); 1551 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1552 } else 1553 mutex_exit(&task->idt_mutex); 1554 } 1555 rw_exit(&idm.idm_taskid_table_lock); 1556 } else { 1557 mutex_enter(&idt->idt_mutex); 1558 idm_task_abort_one(ic, idt, abort_type); 1559 } 1560 } 1561 1562 static void 1563 idm_task_abort_unref_cb(void *ref) 1564 { 1565 idm_task_t *idt = ref; 1566 1567 mutex_enter(&idt->idt_mutex); 1568 switch (idt->idt_state) { 1569 case TASK_SUSPENDING: 1570 idt->idt_state = TASK_SUSPENDED; 1571 mutex_exit(&idt->idt_mutex); 1572 idm_task_aborted(idt, IDM_STATUS_SUSPENDED); 1573 return; 1574 case TASK_ABORTING: 1575 idt->idt_state = TASK_ABORTED; 1576 mutex_exit(&idt->idt_mutex); 1577 idm_task_aborted(idt, IDM_STATUS_ABORTED); 1578 return; 1579 default: 1580 mutex_exit(&idt->idt_mutex); 1581 ASSERT(0); 1582 break; 1583 } 1584 } 1585 1586 /* 1587 * Abort the idm task. 1588 * Caller must hold the task mutex, which will be released before return 1589 */ 1590 static void 1591 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) 1592 { 1593 /* Caller must hold connection mutex */ 1594 ASSERT(mutex_owned(&idt->idt_mutex)); 1595 switch (idt->idt_state) { 1596 case TASK_ACTIVE: 1597 switch (abort_type) { 1598 case AT_INTERNAL_SUSPEND: 1599 /* Call transport to release any resources */ 1600 idt->idt_state = TASK_SUSPENDING; 1601 mutex_exit(&idt->idt_mutex); 1602 ic->ic_transport_ops->it_free_task_rsrc(idt); 1603 1604 /* 1605 * Wait for outstanding references. When all 1606 * references are released the callback will call 1607 * idm_task_aborted(). 1608 */ 1609 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1610 &idm_task_abort_unref_cb); 1611 return; 1612 case AT_INTERNAL_ABORT: 1613 case AT_TASK_MGMT_ABORT: 1614 idt->idt_state = TASK_ABORTING; 1615 mutex_exit(&idt->idt_mutex); 1616 ic->ic_transport_ops->it_free_task_rsrc(idt); 1617 1618 /* 1619 * Wait for outstanding references. When all 1620 * references are released the callback will call 1621 * idm_task_aborted(). 1622 */ 1623 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1624 &idm_task_abort_unref_cb); 1625 return; 1626 default: 1627 ASSERT(0); 1628 } 1629 break; 1630 case TASK_SUSPENDING: 1631 /* Already called transport_free_task_rsrc(); */ 1632 switch (abort_type) { 1633 case AT_INTERNAL_SUSPEND: 1634 /* Already doing it */ 1635 break; 1636 case AT_INTERNAL_ABORT: 1637 case AT_TASK_MGMT_ABORT: 1638 idt->idt_state = TASK_ABORTING; 1639 break; 1640 default: 1641 ASSERT(0); 1642 } 1643 break; 1644 case TASK_SUSPENDED: 1645 /* Already called transport_free_task_rsrc(); */ 1646 switch (abort_type) { 1647 case AT_INTERNAL_SUSPEND: 1648 /* Already doing it */ 1649 break; 1650 case AT_INTERNAL_ABORT: 1651 case AT_TASK_MGMT_ABORT: 1652 idt->idt_state = TASK_ABORTING; 1653 mutex_exit(&idt->idt_mutex); 1654 1655 /* 1656 * We could probably call idm_task_aborted directly 1657 * here but we may be holding the conn lock. It's 1658 * easier to just switch contexts. Even though 1659 * we shouldn't really have any references we'll 1660 * set the state to TASK_ABORTING instead of 1661 * TASK_ABORTED so we can use the same code path. 1662 */ 1663 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1664 &idm_task_abort_unref_cb); 1665 return; 1666 default: 1667 ASSERT(0); 1668 } 1669 break; 1670 case TASK_ABORTING: 1671 case TASK_ABORTED: 1672 switch (abort_type) { 1673 case AT_INTERNAL_SUSPEND: 1674 /* We're already past this point... */ 1675 case AT_INTERNAL_ABORT: 1676 case AT_TASK_MGMT_ABORT: 1677 /* Already doing it */ 1678 break; 1679 default: 1680 ASSERT(0); 1681 } 1682 break; 1683 case TASK_COMPLETE: 1684 /* 1685 * In this case, let it go. The status has already been 1686 * sent (which may or may not get successfully transmitted) 1687 * and we don't want to end up in a race between completing 1688 * the status PDU and marking the task suspended. 1689 */ 1690 break; 1691 default: 1692 ASSERT(0); 1693 } 1694 mutex_exit(&idt->idt_mutex); 1695 } 1696 1697 static void 1698 idm_task_aborted(idm_task_t *idt, idm_status_t status) 1699 { 1700 (*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status); 1701 } 1702 1703 void 1704 idm_task_cleanup(idm_task_t *idt) 1705 { 1706 idm_buf_t *idb, *next_idb; 1707 list_t tmp_buflist; 1708 ASSERT((idt->idt_state == TASK_SUSPENDED) || 1709 (idt->idt_state == TASK_ABORTED)); 1710 1711 list_create(&tmp_buflist, sizeof (idm_buf_t), 1712 offsetof(idm_buf_t, idb_buflink)); 1713 1714 /* 1715 * Remove all the buffers from the task and add them to a 1716 * temporary local list -- we do this so that we can hold 1717 * the task lock and prevent the task from going away if 1718 * the client decides to call idm_task_done/idm_task_free. 1719 * This could happen during abort in iscsit. 1720 */ 1721 mutex_enter(&idt->idt_mutex); 1722 for (idb = list_head(&idt->idt_inbufv); 1723 idb != NULL; 1724 idb = next_idb) { 1725 next_idb = list_next(&idt->idt_inbufv, idb); 1726 idm_buf_unbind_in_locked(idt, idb); 1727 list_insert_tail(&tmp_buflist, idb); 1728 } 1729 1730 for (idb = list_head(&idt->idt_outbufv); 1731 idb != NULL; 1732 idb = next_idb) { 1733 next_idb = list_next(&idt->idt_outbufv, idb); 1734 idm_buf_unbind_out_locked(idt, idb); 1735 list_insert_tail(&tmp_buflist, idb); 1736 } 1737 mutex_exit(&idt->idt_mutex); 1738 1739 for (idb = list_head(&tmp_buflist); idb != NULL; idb = next_idb) { 1740 next_idb = list_next(&tmp_buflist, idb); 1741 list_remove(&tmp_buflist, idb); 1742 (*idb->idb_buf_cb)(idb, IDM_STATUS_ABORTED); 1743 } 1744 list_destroy(&tmp_buflist); 1745 } 1746 1747 1748 /* 1749 * idm_pdu_tx 1750 * 1751 * This is IDM's implementation of the 'Send_Control' operational primitive. 1752 * This function is invoked by an initiator iSCSI layer requesting the transfer 1753 * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a 1754 * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover 1755 * layer to the peer iSCSI layer in the remote iSCSI node. The connection info 1756 * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size 1757 * are provided as input. 1758 * 1759 */ 1760 void 1761 idm_pdu_tx(idm_pdu_t *pdu) 1762 { 1763 idm_conn_t *ic = pdu->isp_ic; 1764 iscsi_async_evt_hdr_t *async_evt; 1765 1766 /* 1767 * If we are in full-featured mode then route SCSI-related 1768 * commands to the appropriate function vector without checking 1769 * the connection state. We will only be in full-feature mode 1770 * when we are in an acceptable state for SCSI PDU's. 1771 * 1772 * We also need to ensure that there are no PDU events outstanding 1773 * on the state machine. Any non-SCSI PDU's received in full-feature 1774 * mode will result in PDU events and until these have been handled 1775 * we need to route all PDU's through the state machine as PDU 1776 * events to maintain ordering. 1777 * 1778 * Note that IDM cannot enter FFP mode until it processes in 1779 * its state machine the last xmit of the login process. 1780 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be 1781 * superfluous. 1782 */ 1783 mutex_enter(&ic->ic_state_mutex); 1784 if (ic->ic_ffp && (ic->ic_pdu_events == 0)) { 1785 mutex_exit(&ic->ic_state_mutex); 1786 switch (IDM_PDU_OPCODE(pdu)) { 1787 case ISCSI_OP_SCSI_RSP: 1788 /* Target only */ 1789 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic, 1790 iscsi_scsi_rsp_hdr_t *, 1791 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr); 1792 idm_pdu_tx_forward(ic, pdu); 1793 return; 1794 case ISCSI_OP_SCSI_TASK_MGT_RSP: 1795 /* Target only */ 1796 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic, 1797 iscsi_text_rsp_hdr_t *, 1798 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1799 idm_pdu_tx_forward(ic, pdu); 1800 return; 1801 case ISCSI_OP_SCSI_DATA_RSP: 1802 /* Target only */ 1803 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic, 1804 iscsi_data_rsp_hdr_t *, 1805 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 1806 idm_pdu_tx_forward(ic, pdu); 1807 return; 1808 case ISCSI_OP_RTT_RSP: 1809 /* Target only */ 1810 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic, 1811 iscsi_rtt_hdr_t *, 1812 (iscsi_rtt_hdr_t *)pdu->isp_hdr); 1813 idm_pdu_tx_forward(ic, pdu); 1814 return; 1815 case ISCSI_OP_NOOP_IN: 1816 /* Target only */ 1817 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic, 1818 iscsi_nop_in_hdr_t *, 1819 (iscsi_nop_in_hdr_t *)pdu->isp_hdr); 1820 idm_pdu_tx_forward(ic, pdu); 1821 return; 1822 case ISCSI_OP_TEXT_RSP: 1823 /* Target only */ 1824 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic, 1825 iscsi_text_rsp_hdr_t *, 1826 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1827 idm_pdu_tx_forward(ic, pdu); 1828 return; 1829 case ISCSI_OP_TEXT_CMD: 1830 case ISCSI_OP_NOOP_OUT: 1831 case ISCSI_OP_SCSI_CMD: 1832 case ISCSI_OP_SCSI_DATA: 1833 case ISCSI_OP_SCSI_TASK_MGT_MSG: 1834 /* Initiator only */ 1835 idm_pdu_tx_forward(ic, pdu); 1836 return; 1837 default: 1838 break; 1839 } 1840 1841 mutex_enter(&ic->ic_state_mutex); 1842 } 1843 1844 /* 1845 * Any PDU's processed outside of full-feature mode and non-SCSI 1846 * PDU's in full-feature mode are handled by generating an 1847 * event to the connection state machine. The state machine 1848 * will validate the PDU against the current state and either 1849 * transmit the PDU if the opcode is allowed or handle an 1850 * error if the PDU is not allowed. 1851 * 1852 * This code-path will also generate any events that are implied 1853 * by the PDU opcode. For example a "login response" with success 1854 * status generates a CE_LOGOUT_SUCCESS_SND event. 1855 */ 1856 switch (IDM_PDU_OPCODE(pdu)) { 1857 case ISCSI_OP_LOGIN_CMD: 1858 idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu); 1859 break; 1860 case ISCSI_OP_LOGIN_RSP: 1861 DTRACE_ISCSI_2(login__response, idm_conn_t *, ic, 1862 iscsi_login_rsp_hdr_t *, 1863 (iscsi_login_rsp_hdr_t *)pdu->isp_hdr); 1864 idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE); 1865 break; 1866 case ISCSI_OP_LOGOUT_CMD: 1867 idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE); 1868 break; 1869 case ISCSI_OP_LOGOUT_RSP: 1870 DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic, 1871 iscsi_logout_rsp_hdr_t *, 1872 (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr); 1873 idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE); 1874 break; 1875 case ISCSI_OP_ASYNC_EVENT: 1876 DTRACE_ISCSI_2(async__send, idm_conn_t *, ic, 1877 iscsi_async_evt_hdr_t *, 1878 (iscsi_async_evt_hdr_t *)pdu->isp_hdr); 1879 async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr; 1880 switch (async_evt->async_event) { 1881 case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT: 1882 idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND, 1883 (uintptr_t)pdu); 1884 break; 1885 case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION: 1886 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND, 1887 (uintptr_t)pdu); 1888 break; 1889 case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS: 1890 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND, 1891 (uintptr_t)pdu); 1892 break; 1893 case ISCSI_ASYNC_EVENT_SCSI_EVENT: 1894 case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION: 1895 default: 1896 idm_conn_tx_pdu_event(ic, CE_MISC_TX, 1897 (uintptr_t)pdu); 1898 break; 1899 } 1900 break; 1901 case ISCSI_OP_SCSI_RSP: 1902 /* Target only */ 1903 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic, 1904 iscsi_scsi_rsp_hdr_t *, 1905 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr); 1906 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1907 break; 1908 case ISCSI_OP_SCSI_TASK_MGT_RSP: 1909 /* Target only */ 1910 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic, 1911 iscsi_scsi_task_mgt_rsp_hdr_t *, 1912 (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr); 1913 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1914 break; 1915 case ISCSI_OP_SCSI_DATA_RSP: 1916 /* Target only */ 1917 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic, 1918 iscsi_data_rsp_hdr_t *, 1919 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 1920 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1921 break; 1922 case ISCSI_OP_RTT_RSP: 1923 /* Target only */ 1924 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic, 1925 iscsi_rtt_hdr_t *, 1926 (iscsi_rtt_hdr_t *)pdu->isp_hdr); 1927 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1928 break; 1929 case ISCSI_OP_NOOP_IN: 1930 /* Target only */ 1931 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic, 1932 iscsi_nop_in_hdr_t *, 1933 (iscsi_nop_in_hdr_t *)pdu->isp_hdr); 1934 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1935 break; 1936 case ISCSI_OP_TEXT_RSP: 1937 /* Target only */ 1938 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic, 1939 iscsi_text_rsp_hdr_t *, 1940 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1941 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1942 break; 1943 /* Initiator only */ 1944 case ISCSI_OP_SCSI_CMD: 1945 case ISCSI_OP_SCSI_TASK_MGT_MSG: 1946 case ISCSI_OP_SCSI_DATA: 1947 case ISCSI_OP_NOOP_OUT: 1948 case ISCSI_OP_TEXT_CMD: 1949 case ISCSI_OP_SNACK_CMD: 1950 case ISCSI_OP_REJECT_MSG: 1951 default: 1952 /* 1953 * Connection state machine will validate these PDU's against 1954 * the current state. A PDU not allowed in the current 1955 * state will cause a protocol error. 1956 */ 1957 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1958 break; 1959 } 1960 mutex_exit(&ic->ic_state_mutex); 1961 } 1962 1963 /* 1964 * Common allocation of a PDU along with memory for header and data. 1965 */ 1966 static idm_pdu_t * 1967 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag) 1968 { 1969 idm_pdu_t *result; 1970 1971 /* 1972 * IDM clients should cache these structures for performance 1973 * critical paths. We can't cache effectively in IDM because we 1974 * don't know the correct header and data size. 1975 * 1976 * Valid header length is assumed to be hdrlen and valid data 1977 * length is assumed to be datalen. isp_hdrlen and isp_datalen 1978 * can be adjusted after the PDU is returned if necessary. 1979 */ 1980 result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag); 1981 if (result != NULL) { 1982 /* For idm_pdu_free sanity check */ 1983 result->isp_flags |= IDM_PDU_ALLOC; 1984 /* pointer arithmetic */ 1985 result->isp_hdr = (iscsi_hdr_t *)(result + 1); 1986 result->isp_hdrlen = hdrlen; 1987 result->isp_hdrbuflen = hdrlen; 1988 result->isp_transport_hdrlen = 0; 1989 if (datalen != 0) 1990 result->isp_data = (uint8_t *)result->isp_hdr + hdrlen; 1991 result->isp_datalen = datalen; 1992 result->isp_databuflen = datalen; 1993 result->isp_magic = IDM_PDU_MAGIC; 1994 } 1995 1996 return (result); 1997 } 1998 1999 /* 2000 * Typical idm_pdu_alloc invocation, will block for resources. 2001 */ 2002 idm_pdu_t * 2003 idm_pdu_alloc(uint_t hdrlen, uint_t datalen) 2004 { 2005 return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP)); 2006 } 2007 2008 /* 2009 * Non-blocking idm_pdu_alloc implementation, returns NULL if resources 2010 * are not available. Needed for transport-layer allocations which may 2011 * be invoking in interrupt context. 2012 */ 2013 idm_pdu_t * 2014 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen) 2015 { 2016 return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP)); 2017 } 2018 2019 /* 2020 * Free a PDU previously allocated with idm_pdu_alloc() including any 2021 * header and data space allocated as part of the original request. 2022 * Additional memory regions referenced by subsequent modification of 2023 * the isp_hdr and/or isp_data fields will not be freed. 2024 */ 2025 void 2026 idm_pdu_free(idm_pdu_t *pdu) 2027 { 2028 /* Make sure the structure was allocated using idm_pdu_alloc() */ 2029 ASSERT(pdu->isp_flags & IDM_PDU_ALLOC); 2030 kmem_free(pdu, 2031 sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen); 2032 } 2033 2034 /* 2035 * Initialize the connection, private and callback fields in a PDU. 2036 */ 2037 void 2038 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb) 2039 { 2040 /* 2041 * idm_pdu_complete() will call idm_pdu_free if the callback is 2042 * NULL. This will only work if the PDU was originally allocated 2043 * with idm_pdu_alloc(). 2044 */ 2045 ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) || 2046 (cb != NULL)); 2047 pdu->isp_magic = IDM_PDU_MAGIC; 2048 pdu->isp_ic = ic; 2049 pdu->isp_private = private; 2050 pdu->isp_callback = cb; 2051 } 2052 2053 /* 2054 * Initialize the header and header length field. This function should 2055 * not be used to adjust the header length in a buffer allocated via 2056 * pdu_pdu_alloc since it overwrites the existing header pointer. 2057 */ 2058 void 2059 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen) 2060 { 2061 pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr); 2062 pdu->isp_hdrlen = hdrlen; 2063 } 2064 2065 /* 2066 * Initialize the data and data length fields. This function should 2067 * not be used to adjust the data length of a buffer allocated via 2068 * idm_pdu_alloc since it overwrites the existing data pointer. 2069 */ 2070 void 2071 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen) 2072 { 2073 pdu->isp_data = data; 2074 pdu->isp_datalen = datalen; 2075 } 2076 2077 void 2078 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status) 2079 { 2080 if (pdu->isp_callback) { 2081 pdu->isp_status = status; 2082 (*pdu->isp_callback)(pdu, status); 2083 } else { 2084 idm_pdu_free(pdu); 2085 } 2086 } 2087 2088 /* 2089 * State machine auditing 2090 */ 2091 2092 void 2093 idm_sm_audit_init(sm_audit_buf_t *audit_buf) 2094 { 2095 bzero(audit_buf, sizeof (sm_audit_buf_t)); 2096 audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1; 2097 } 2098 2099 static 2100 sm_audit_record_t * 2101 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type, 2102 sm_audit_sm_type_t sm_type, 2103 int current_state) 2104 { 2105 sm_audit_record_t *sar; 2106 2107 sar = audit_buf->sab_records; 2108 sar += audit_buf->sab_index; 2109 audit_buf->sab_index++; 2110 audit_buf->sab_index &= audit_buf->sab_max_index; 2111 2112 sar->sar_type = r_type; 2113 gethrestime(&sar->sar_timestamp); 2114 sar->sar_sm_type = sm_type; 2115 sar->sar_state = current_state; 2116 2117 return (sar); 2118 } 2119 2120 void 2121 idm_sm_audit_event(sm_audit_buf_t *audit_buf, 2122 sm_audit_sm_type_t sm_type, int current_state, 2123 int event, uintptr_t event_info) 2124 { 2125 sm_audit_record_t *sar; 2126 2127 sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT, 2128 sm_type, current_state); 2129 sar->sar_event = event; 2130 sar->sar_event_info = event_info; 2131 } 2132 2133 void 2134 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf, 2135 sm_audit_sm_type_t sm_type, int current_state, int new_state) 2136 { 2137 sm_audit_record_t *sar; 2138 2139 sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE, 2140 sm_type, current_state); 2141 sar->sar_new_state = new_state; 2142 } 2143 2144 2145 /* 2146 * Object reference tracking 2147 */ 2148 2149 void 2150 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj) 2151 { 2152 bzero(refcnt, sizeof (*refcnt)); 2153 idm_refcnt_reset(refcnt); 2154 refcnt->ir_referenced_obj = referenced_obj; 2155 bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t)); 2156 refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1; 2157 mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL); 2158 cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL); 2159 } 2160 2161 void 2162 idm_refcnt_destroy(idm_refcnt_t *refcnt) 2163 { 2164 /* 2165 * Grab the mutex to there are no other lingering threads holding 2166 * the mutex before we destroy it (e.g. idm_refcnt_rele just after 2167 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC) 2168 */ 2169 mutex_enter(&refcnt->ir_mutex); 2170 ASSERT(refcnt->ir_refcnt == 0); 2171 cv_destroy(&refcnt->ir_cv); 2172 mutex_destroy(&refcnt->ir_mutex); 2173 } 2174 2175 void 2176 idm_refcnt_reset(idm_refcnt_t *refcnt) 2177 { 2178 refcnt->ir_waiting = REF_NOWAIT; 2179 refcnt->ir_refcnt = 0; 2180 } 2181 2182 void 2183 idm_refcnt_hold(idm_refcnt_t *refcnt) 2184 { 2185 /* 2186 * Nothing should take a hold on an object after a call to 2187 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref 2188 */ 2189 ASSERT(refcnt->ir_waiting == REF_NOWAIT); 2190 2191 mutex_enter(&refcnt->ir_mutex); 2192 refcnt->ir_refcnt++; 2193 REFCNT_AUDIT(refcnt); 2194 mutex_exit(&refcnt->ir_mutex); 2195 } 2196 2197 static void 2198 idm_refcnt_unref_task(void *refcnt_void) 2199 { 2200 idm_refcnt_t *refcnt = refcnt_void; 2201 2202 REFCNT_AUDIT(refcnt); 2203 (*refcnt->ir_cb)(refcnt->ir_referenced_obj); 2204 } 2205 2206 void 2207 idm_refcnt_rele(idm_refcnt_t *refcnt) 2208 { 2209 mutex_enter(&refcnt->ir_mutex); 2210 ASSERT(refcnt->ir_refcnt > 0); 2211 refcnt->ir_refcnt--; 2212 REFCNT_AUDIT(refcnt); 2213 if (refcnt->ir_waiting == REF_NOWAIT) { 2214 /* No one is waiting on this object */ 2215 mutex_exit(&refcnt->ir_mutex); 2216 return; 2217 } 2218 2219 /* 2220 * Someone is waiting for this object to go idle so check if 2221 * refcnt is 0. Waiting on an object then later grabbing another 2222 * reference is not allowed so we don't need to handle that case. 2223 */ 2224 if (refcnt->ir_refcnt == 0) { 2225 if (refcnt->ir_waiting == REF_WAIT_ASYNC) { 2226 if (taskq_dispatch(idm.idm_global_taskq, 2227 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) { 2228 cmn_err(CE_WARN, 2229 "idm_refcnt_rele: Couldn't dispatch task"); 2230 } 2231 } else if (refcnt->ir_waiting == REF_WAIT_SYNC) { 2232 cv_signal(&refcnt->ir_cv); 2233 } 2234 } 2235 mutex_exit(&refcnt->ir_mutex); 2236 } 2237 2238 void 2239 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func) 2240 { 2241 mutex_enter(&refcnt->ir_mutex); 2242 ASSERT(refcnt->ir_refcnt > 0); 2243 refcnt->ir_refcnt--; 2244 REFCNT_AUDIT(refcnt); 2245 2246 /* 2247 * Someone is waiting for this object to go idle so check if 2248 * refcnt is 0. Waiting on an object then later grabbing another 2249 * reference is not allowed so we don't need to handle that case. 2250 */ 2251 if (refcnt->ir_refcnt == 0) { 2252 refcnt->ir_cb = cb_func; 2253 refcnt->ir_waiting = REF_WAIT_ASYNC; 2254 if (taskq_dispatch(idm.idm_global_taskq, 2255 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) { 2256 cmn_err(CE_WARN, 2257 "idm_refcnt_rele: Couldn't dispatch task"); 2258 } 2259 } 2260 mutex_exit(&refcnt->ir_mutex); 2261 } 2262 2263 void 2264 idm_refcnt_wait_ref(idm_refcnt_t *refcnt) 2265 { 2266 mutex_enter(&refcnt->ir_mutex); 2267 refcnt->ir_waiting = REF_WAIT_SYNC; 2268 REFCNT_AUDIT(refcnt); 2269 while (refcnt->ir_refcnt != 0) 2270 cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex); 2271 mutex_exit(&refcnt->ir_mutex); 2272 } 2273 2274 void 2275 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func) 2276 { 2277 mutex_enter(&refcnt->ir_mutex); 2278 refcnt->ir_waiting = REF_WAIT_ASYNC; 2279 refcnt->ir_cb = cb_func; 2280 REFCNT_AUDIT(refcnt); 2281 /* 2282 * It's possible we don't have any references. To make things easier 2283 * on the caller use a taskq to call the callback instead of 2284 * calling it synchronously 2285 */ 2286 if (refcnt->ir_refcnt == 0) { 2287 if (taskq_dispatch(idm.idm_global_taskq, 2288 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) { 2289 cmn_err(CE_WARN, 2290 "idm_refcnt_async_wait_ref: " 2291 "Couldn't dispatch task"); 2292 } 2293 } 2294 mutex_exit(&refcnt->ir_mutex); 2295 } 2296 2297 void 2298 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt, 2299 idm_refcnt_cb_t *cb_func) 2300 { 2301 mutex_enter(&refcnt->ir_mutex); 2302 if (refcnt->ir_refcnt == 0) { 2303 mutex_exit(&refcnt->ir_mutex); 2304 (*cb_func)(refcnt->ir_referenced_obj); 2305 return; 2306 } 2307 mutex_exit(&refcnt->ir_mutex); 2308 } 2309 2310 void 2311 idm_conn_hold(idm_conn_t *ic) 2312 { 2313 idm_refcnt_hold(&ic->ic_refcnt); 2314 } 2315 2316 void 2317 idm_conn_rele(idm_conn_t *ic) 2318 { 2319 idm_refcnt_rele(&ic->ic_refcnt); 2320 } 2321 2322 void 2323 idm_conn_set_target_name(idm_conn_t *ic, char *target_name) 2324 { 2325 (void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1); 2326 } 2327 2328 void 2329 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name) 2330 { 2331 (void) strlcpy(ic->ic_initiator_name, initiator_name, 2332 ISCSI_MAX_NAME_LEN + 1); 2333 } 2334 2335 void 2336 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN]) 2337 { 2338 (void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1, 2339 "%02x%02x%02x%02x%02x%02x", 2340 isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]); 2341 } 2342 2343 static int 2344 _idm_init(void) 2345 { 2346 /* Initialize the rwlock for the taskid table */ 2347 rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL); 2348 2349 /* Initialize the global mutex and taskq */ 2350 mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL); 2351 2352 cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL); 2353 cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL); 2354 2355 /* 2356 * The maximum allocation needs to be high here since there can be 2357 * many concurrent tasks using the global taskq. 2358 */ 2359 idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri, 2360 128, 16384, TASKQ_PREPOPULATE); 2361 if (idm.idm_global_taskq == NULL) { 2362 cv_destroy(&idm.idm_wd_cv); 2363 cv_destroy(&idm.idm_tgt_svc_cv); 2364 mutex_destroy(&idm.idm_global_mutex); 2365 rw_destroy(&idm.idm_taskid_table_lock); 2366 return (ENOMEM); 2367 } 2368 2369 /* Start watchdog thread */ 2370 idm.idm_wd_thread = thread_create(NULL, 0, 2371 idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri); 2372 if (idm.idm_wd_thread == NULL) { 2373 /* Couldn't create the watchdog thread */ 2374 taskq_destroy(idm.idm_global_taskq); 2375 cv_destroy(&idm.idm_wd_cv); 2376 cv_destroy(&idm.idm_tgt_svc_cv); 2377 mutex_destroy(&idm.idm_global_mutex); 2378 rw_destroy(&idm.idm_taskid_table_lock); 2379 return (ENOMEM); 2380 } 2381 2382 /* Pause until the watchdog thread is running */ 2383 mutex_enter(&idm.idm_global_mutex); 2384 while (!idm.idm_wd_thread_running) 2385 cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex); 2386 mutex_exit(&idm.idm_global_mutex); 2387 2388 /* 2389 * Allocate the task ID table and set "next" to 0. 2390 */ 2391 2392 idm.idm_taskid_max = idm_max_taskids; 2393 idm.idm_taskid_table = (idm_task_t **) 2394 kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP); 2395 idm.idm_taskid_next = 0; 2396 2397 /* Create the global buffer and task kmem caches */ 2398 idm.idm_buf_cache = kmem_cache_create("idm_buf_cache", 2399 sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP); 2400 2401 /* 2402 * Note, we're explicitly allocating an additional iSER header- 2403 * sized chunk for each of these elements. See idm_task_constructor(). 2404 */ 2405 idm.idm_task_cache = kmem_cache_create("idm_task_cache", 2406 sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8, 2407 &idm_task_constructor, &idm_task_destructor, 2408 NULL, NULL, NULL, KM_SLEEP); 2409 2410 /* Create the service and connection context lists */ 2411 list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t), 2412 offsetof(idm_svc_t, is_list_node)); 2413 list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t), 2414 offsetof(idm_conn_t, ic_list_node)); 2415 list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t), 2416 offsetof(idm_conn_t, ic_list_node)); 2417 2418 /* Initialize the native sockets transport */ 2419 idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]); 2420 2421 /* Create connection ID pool */ 2422 (void) idm_idpool_create(&idm.idm_conn_id_pool); 2423 2424 return (DDI_SUCCESS); 2425 } 2426 2427 static int 2428 _idm_fini(void) 2429 { 2430 if (!list_is_empty(&idm.idm_ini_conn_list) || 2431 !list_is_empty(&idm.idm_tgt_conn_list) || 2432 !list_is_empty(&idm.idm_tgt_svc_list)) { 2433 return (EBUSY); 2434 } 2435 2436 mutex_enter(&idm.idm_global_mutex); 2437 idm.idm_wd_thread_running = B_FALSE; 2438 cv_signal(&idm.idm_wd_cv); 2439 mutex_exit(&idm.idm_global_mutex); 2440 2441 thread_join(idm.idm_wd_thread_did); 2442 2443 idm_idpool_destroy(&idm.idm_conn_id_pool); 2444 2445 /* Close any LDI handles we have open on transport drivers */ 2446 mutex_enter(&idm.idm_global_mutex); 2447 idm_transport_teardown(); 2448 mutex_exit(&idm.idm_global_mutex); 2449 2450 /* Teardown the native sockets transport */ 2451 idm_so_fini(); 2452 2453 list_destroy(&idm.idm_ini_conn_list); 2454 list_destroy(&idm.idm_tgt_conn_list); 2455 list_destroy(&idm.idm_tgt_svc_list); 2456 kmem_cache_destroy(idm.idm_task_cache); 2457 kmem_cache_destroy(idm.idm_buf_cache); 2458 kmem_free(idm.idm_taskid_table, 2459 idm.idm_taskid_max * sizeof (idm_task_t *)); 2460 mutex_destroy(&idm.idm_global_mutex); 2461 cv_destroy(&idm.idm_wd_cv); 2462 cv_destroy(&idm.idm_tgt_svc_cv); 2463 rw_destroy(&idm.idm_taskid_table_lock); 2464 2465 return (0); 2466 } 2467