1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 #include <sys/cpuvar.h> 27 #include <sys/conf.h> 28 #include <sys/file.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/modctl.h> 32 33 #include <sys/socket.h> 34 #include <sys/strsubr.h> 35 #include <sys/sysmacros.h> 36 37 #include <sys/socketvar.h> 38 #include <netinet/in.h> 39 40 #include <sys/idm/idm.h> 41 #include <sys/idm/idm_so.h> 42 43 #define IDM_NAME_VERSION "iSCSI Data Mover" 44 45 extern struct mod_ops mod_miscops; 46 extern struct mod_ops mod_miscops; 47 48 static struct modlmisc modlmisc = { 49 &mod_miscops, /* Type of module */ 50 IDM_NAME_VERSION 51 }; 52 53 static struct modlinkage modlinkage = { 54 MODREV_1, (void *)&modlmisc, NULL 55 }; 56 57 extern void idm_wd_thread(void *arg); 58 59 static int _idm_init(void); 60 static int _idm_fini(void); 61 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf); 62 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf); 63 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf); 64 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf); 65 static stmf_status_t idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, 66 idm_abort_type_t abort_type); 67 static void idm_task_aborted(idm_task_t *idt, idm_status_t status); 68 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, 69 int sleepflag); 70 71 boolean_t idm_conn_logging = 0; 72 boolean_t idm_svc_logging = 0; 73 #ifdef DEBUG 74 boolean_t idm_pattern_checking = 1; 75 #else 76 boolean_t idm_pattern_checking = 0; 77 #endif 78 79 /* 80 * Potential tuneable for the maximum number of tasks. Default to 81 * IDM_TASKIDS_MAX 82 */ 83 84 uint32_t idm_max_taskids = IDM_TASKIDS_MAX; 85 86 /* 87 * Global list of transport handles 88 * These are listed in preferential order, so we can simply take the 89 * first "it_conn_is_capable" hit. Note also that the order maps to 90 * the order of the idm_transport_type_t list. 91 */ 92 idm_transport_t idm_transport_list[] = { 93 94 /* iSER on InfiniBand transport handle */ 95 {IDM_TRANSPORT_TYPE_ISER, /* type */ 96 "/devices/ib/iser@0:iser", /* device path */ 97 NULL, /* LDI handle */ 98 NULL, /* transport ops */ 99 NULL}, /* transport caps */ 100 101 /* IDM native sockets transport handle */ 102 {IDM_TRANSPORT_TYPE_SOCKETS, /* type */ 103 NULL, /* device path */ 104 NULL, /* LDI handle */ 105 NULL, /* transport ops */ 106 NULL} /* transport caps */ 107 108 }; 109 110 idm_global_t idm; /* Global state */ 111 112 int 113 _init(void) 114 { 115 int rc; 116 117 if ((rc = _idm_init()) != 0) { 118 return (rc); 119 } 120 121 return (mod_install(&modlinkage)); 122 } 123 124 int 125 _fini(void) 126 { 127 int rc; 128 129 if ((rc = _idm_fini()) != 0) { 130 return (rc); 131 } 132 133 if ((rc = mod_remove(&modlinkage)) != 0) { 134 return (rc); 135 } 136 137 return (rc); 138 } 139 140 int 141 _info(struct modinfo *modinfop) 142 { 143 return (mod_info(&modlinkage, modinfop)); 144 } 145 146 /* 147 * idm_transport_register() 148 * 149 * Provides a mechanism for an IDM transport driver to register its 150 * transport ops and caps with the IDM kernel module. Invoked during 151 * a transport driver's attach routine. 152 */ 153 idm_status_t 154 idm_transport_register(idm_transport_attr_t *attr) 155 { 156 ASSERT(attr->it_ops != NULL); 157 ASSERT(attr->it_caps != NULL); 158 159 switch (attr->type) { 160 /* All known non-native transports here; for now, iSER */ 161 case IDM_TRANSPORT_TYPE_ISER: 162 idm_transport_list[attr->type].it_ops = attr->it_ops; 163 idm_transport_list[attr->type].it_caps = attr->it_caps; 164 return (IDM_STATUS_SUCCESS); 165 166 default: 167 cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in " 168 "idm_transport_register", attr->type); 169 return (IDM_STATUS_SUCCESS); 170 } 171 } 172 173 /* 174 * idm_ini_conn_create 175 * 176 * This function is invoked by the iSCSI layer to create a connection context. 177 * This does not actually establish the socket connection. 178 * 179 * cr - Connection request parameters 180 * new_con - Output parameter that contains the new request if successful 181 * 182 */ 183 idm_status_t 184 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con) 185 { 186 idm_transport_t *it; 187 idm_conn_t *ic; 188 int rc; 189 190 it = idm_transport_lookup(cr); 191 192 retry: 193 ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type, 194 &cr->icr_conn_ops); 195 196 bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr, 197 sizeof (cr->cr_ini_dst_addr)); 198 199 /* create the transport-specific connection components */ 200 rc = it->it_ops->it_ini_conn_create(cr, ic); 201 if (rc != IDM_STATUS_SUCCESS) { 202 /* cleanup the failed connection */ 203 idm_conn_destroy_common(ic); 204 205 /* 206 * It is possible for an IB client to connect to 207 * an ethernet-only client via an IB-eth gateway. 208 * Therefore, if we are attempting to use iSER and 209 * fail, retry with sockets before ultimately 210 * failing the connection. 211 */ 212 if (it->it_type == IDM_TRANSPORT_TYPE_ISER) { 213 it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]; 214 goto retry; 215 } 216 217 return (IDM_STATUS_FAIL); 218 } 219 220 *new_con = ic; 221 222 mutex_enter(&idm.idm_global_mutex); 223 list_insert_tail(&idm.idm_ini_conn_list, ic); 224 mutex_exit(&idm.idm_global_mutex); 225 226 return (IDM_STATUS_SUCCESS); 227 } 228 229 /* 230 * idm_ini_conn_destroy 231 * 232 * Releases any resources associated with the connection. This is the 233 * complement to idm_ini_conn_create. 234 * ic - idm_conn_t structure representing the relevant connection 235 * 236 */ 237 void 238 idm_ini_conn_destroy_task(void *ic_void) 239 { 240 idm_conn_t *ic = ic_void; 241 242 ic->ic_transport_ops->it_ini_conn_destroy(ic); 243 idm_conn_destroy_common(ic); 244 } 245 246 void 247 idm_ini_conn_destroy(idm_conn_t *ic) 248 { 249 /* 250 * It's reasonable for the initiator to call idm_ini_conn_destroy 251 * from within the context of the CN_CONNECT_DESTROY notification. 252 * That's a problem since we want to destroy the taskq for the 253 * state machine associated with the connection. Remove the 254 * connection from the list right away then handle the remaining 255 * work via the idm_global_taskq. 256 */ 257 mutex_enter(&idm.idm_global_mutex); 258 list_remove(&idm.idm_ini_conn_list, ic); 259 mutex_exit(&idm.idm_global_mutex); 260 261 if (taskq_dispatch(idm.idm_global_taskq, 262 &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == TASKQID_INVALID) { 263 cmn_err(CE_WARN, 264 "idm_ini_conn_destroy: Couldn't dispatch task"); 265 } 266 } 267 268 /* 269 * idm_ini_conn_connect 270 * 271 * Establish connection to the remote system identified in idm_conn_t. 272 * The connection parameters including the remote IP address were established 273 * in the call to idm_ini_conn_create. The IDM state machine will 274 * perform client notifications as necessary to prompt the initiator through 275 * the login process. IDM also keeps a timer running so that if the login 276 * process doesn't complete in a timely manner it will fail. 277 * 278 * ic - idm_conn_t structure representing the relevant connection 279 * 280 * Returns success if the connection was established, otherwise some kind 281 * of meaningful error code. 282 * 283 * Upon return the login has either failed or is loggin in (ffp) 284 */ 285 idm_status_t 286 idm_ini_conn_connect(idm_conn_t *ic) 287 { 288 idm_status_t rc; 289 290 rc = idm_conn_sm_init(ic); 291 if (rc != IDM_STATUS_SUCCESS) { 292 return (ic->ic_conn_sm_status); 293 } 294 295 /* Hold connection until we return */ 296 idm_conn_hold(ic); 297 298 /* Kick state machine */ 299 idm_conn_event(ic, CE_CONNECT_REQ, (uintptr_t)NULL); 300 301 /* Wait for login flag */ 302 mutex_enter(&ic->ic_state_mutex); 303 while (!(ic->ic_state_flags & CF_LOGIN_READY) && 304 !(ic->ic_state_flags & CF_ERROR)) { 305 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); 306 } 307 308 /* 309 * The CN_READY_TO_LOGIN and/or the CN_CONNECT_FAIL call to 310 * idm_notify_client has already been generated by the idm conn 311 * state machine. If connection fails any time after this 312 * check, we will detect it in iscsi_login. 313 */ 314 if (ic->ic_state_flags & CF_ERROR) { 315 rc = ic->ic_conn_sm_status; 316 } 317 mutex_exit(&ic->ic_state_mutex); 318 idm_conn_rele(ic); 319 320 return (rc); 321 } 322 323 /* 324 * idm_ini_conn_disconnect 325 * 326 * Forces a connection (previously established using idm_ini_conn_connect) 327 * to perform a controlled shutdown, cleaning up any outstanding requests. 328 * 329 * ic - idm_conn_t structure representing the relevant connection 330 * 331 * This is asynchronous and will return before the connection is properly 332 * shutdown 333 */ 334 /* ARGSUSED */ 335 void 336 idm_ini_conn_disconnect(idm_conn_t *ic) 337 { 338 idm_conn_event(ic, CE_TRANSPORT_FAIL, (uintptr_t)NULL); 339 } 340 341 /* 342 * idm_ini_conn_disconnect_wait 343 * 344 * Forces a connection (previously established using idm_ini_conn_connect) 345 * to perform a controlled shutdown. Blocks until the connection is 346 * disconnected. 347 * 348 * ic - idm_conn_t structure representing the relevant connection 349 */ 350 /* ARGSUSED */ 351 void 352 idm_ini_conn_disconnect_sync(idm_conn_t *ic) 353 { 354 mutex_enter(&ic->ic_state_mutex); 355 if ((ic->ic_state != CS_S9_INIT_ERROR) && 356 (ic->ic_state != CS_S11_COMPLETE)) { 357 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, (uintptr_t)NULL, 358 CT_NONE); 359 while ((ic->ic_state != CS_S9_INIT_ERROR) && 360 (ic->ic_state != CS_S11_COMPLETE)) 361 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); 362 } 363 mutex_exit(&ic->ic_state_mutex); 364 } 365 366 /* 367 * idm_tgt_svc_create 368 * 369 * The target calls this service to obtain a service context for each available 370 * transport, starting a service of each type related to the IP address and port 371 * passed. The idm_svc_req_t contains the service parameters. 372 */ 373 idm_status_t 374 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc) 375 { 376 idm_transport_type_t type; 377 idm_transport_t *it; 378 idm_svc_t *is; 379 int rc; 380 381 *new_svc = NULL; 382 is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP); 383 384 /* Initialize transport-agnostic components of the service handle */ 385 is->is_svc_req = *sr; 386 mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL); 387 cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL); 388 mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL); 389 cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL); 390 idm_refcnt_init(&is->is_refcnt, is); 391 392 /* 393 * Make sure all available transports are setup. We call this now 394 * instead of at initialization time in case IB has become available 395 * since we started (hotplug, etc). 396 */ 397 idm_transport_setup(sr->sr_li, B_FALSE); 398 399 /* 400 * Loop through the transports, configuring the transport-specific 401 * components of each one. 402 */ 403 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 404 405 it = &idm_transport_list[type]; 406 /* 407 * If it_ops is NULL then the transport is unconfigured 408 * and we shouldn't try to start the service. 409 */ 410 if (it->it_ops == NULL) { 411 continue; 412 } 413 414 rc = it->it_ops->it_tgt_svc_create(sr, is); 415 if (rc != IDM_STATUS_SUCCESS) { 416 /* Teardown any configured services */ 417 while (type--) { 418 it = &idm_transport_list[type]; 419 if (it->it_ops == NULL) { 420 continue; 421 } 422 it->it_ops->it_tgt_svc_destroy(is); 423 } 424 /* Free the svc context and return */ 425 kmem_free(is, sizeof (idm_svc_t)); 426 return (rc); 427 } 428 } 429 430 *new_svc = is; 431 432 mutex_enter(&idm.idm_global_mutex); 433 list_insert_tail(&idm.idm_tgt_svc_list, is); 434 mutex_exit(&idm.idm_global_mutex); 435 436 return (IDM_STATUS_SUCCESS); 437 } 438 439 /* 440 * idm_tgt_svc_destroy 441 * 442 * is - idm_svc_t returned by the call to idm_tgt_svc_create 443 * 444 * Cleanup any resources associated with the idm_svc_t. 445 */ 446 void 447 idm_tgt_svc_destroy(idm_svc_t *is) 448 { 449 idm_transport_type_t type; 450 idm_transport_t *it; 451 452 mutex_enter(&idm.idm_global_mutex); 453 /* remove this service from the global list */ 454 list_remove(&idm.idm_tgt_svc_list, is); 455 /* wakeup any waiters for service change */ 456 cv_broadcast(&idm.idm_tgt_svc_cv); 457 mutex_exit(&idm.idm_global_mutex); 458 459 /* teardown each transport-specific service */ 460 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 461 it = &idm_transport_list[type]; 462 if (it->it_ops == NULL) { 463 continue; 464 } 465 466 it->it_ops->it_tgt_svc_destroy(is); 467 } 468 469 /* tear down the svc resources */ 470 idm_refcnt_destroy(&is->is_refcnt); 471 cv_destroy(&is->is_count_cv); 472 mutex_destroy(&is->is_count_mutex); 473 cv_destroy(&is->is_cv); 474 mutex_destroy(&is->is_mutex); 475 476 /* free the svc handle */ 477 kmem_free(is, sizeof (idm_svc_t)); 478 } 479 480 void 481 idm_tgt_svc_hold(idm_svc_t *is) 482 { 483 idm_refcnt_hold(&is->is_refcnt); 484 } 485 486 void 487 idm_tgt_svc_rele_and_destroy(idm_svc_t *is) 488 { 489 idm_refcnt_rele_and_destroy(&is->is_refcnt, 490 (idm_refcnt_cb_t *)&idm_tgt_svc_destroy); 491 } 492 493 /* 494 * idm_tgt_svc_online 495 * 496 * is - idm_svc_t returned by the call to idm_tgt_svc_create 497 * 498 * Online each transport service, as we want this target to be accessible 499 * via any configured transport. 500 * 501 * When the initiator establishes a new connection to the target, IDM will 502 * call the "new connect" callback defined in the idm_svc_req_t structure 503 * and it will pass an idm_conn_t structure representing that new connection. 504 */ 505 idm_status_t 506 idm_tgt_svc_online(idm_svc_t *is) 507 { 508 509 idm_transport_type_t type, last_type; 510 idm_transport_t *it; 511 int rc = IDM_STATUS_SUCCESS; 512 513 mutex_enter(&is->is_mutex); 514 if (is->is_online == 0) { 515 /* Walk through each of the transports and online them */ 516 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 517 it = &idm_transport_list[type]; 518 if (it->it_ops == NULL) { 519 /* transport is not registered */ 520 continue; 521 } 522 523 mutex_exit(&is->is_mutex); 524 rc = it->it_ops->it_tgt_svc_online(is); 525 mutex_enter(&is->is_mutex); 526 if (rc != IDM_STATUS_SUCCESS) { 527 last_type = type; 528 break; 529 } 530 } 531 if (rc != IDM_STATUS_SUCCESS) { 532 /* 533 * The last transport failed to online. 534 * Offline any transport onlined above and 535 * do not online the target. 536 */ 537 for (type = 0; type < last_type; type++) { 538 it = &idm_transport_list[type]; 539 if (it->it_ops == NULL) { 540 /* transport is not registered */ 541 continue; 542 } 543 544 mutex_exit(&is->is_mutex); 545 it->it_ops->it_tgt_svc_offline(is); 546 mutex_enter(&is->is_mutex); 547 } 548 } else { 549 /* Target service now online */ 550 is->is_online = 1; 551 } 552 } else { 553 /* Target service already online, just bump the count */ 554 is->is_online++; 555 } 556 mutex_exit(&is->is_mutex); 557 558 return (rc); 559 } 560 561 /* 562 * idm_tgt_svc_offline 563 * 564 * is - idm_svc_t returned by the call to idm_tgt_svc_create 565 * 566 * Shutdown any online target services. 567 */ 568 void 569 idm_tgt_svc_offline(idm_svc_t *is) 570 { 571 idm_transport_type_t type; 572 idm_transport_t *it; 573 574 mutex_enter(&is->is_mutex); 575 is->is_online--; 576 if (is->is_online == 0) { 577 /* Walk through each of the transports and offline them */ 578 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 579 it = &idm_transport_list[type]; 580 if (it->it_ops == NULL) { 581 /* transport is not registered */ 582 continue; 583 } 584 585 mutex_exit(&is->is_mutex); 586 it->it_ops->it_tgt_svc_offline(is); 587 mutex_enter(&is->is_mutex); 588 } 589 } 590 mutex_exit(&is->is_mutex); 591 } 592 593 /* 594 * idm_tgt_svc_lookup 595 * 596 * Lookup a service instance listening on the specified port 597 */ 598 599 idm_svc_t * 600 idm_tgt_svc_lookup(uint16_t port) 601 { 602 idm_svc_t *result; 603 604 retry: 605 mutex_enter(&idm.idm_global_mutex); 606 for (result = list_head(&idm.idm_tgt_svc_list); 607 result != NULL; 608 result = list_next(&idm.idm_tgt_svc_list, result)) { 609 if (result->is_svc_req.sr_port == port) { 610 if (result->is_online == 0) { 611 /* 612 * A service exists on this port, but it 613 * is going away, wait for it to cleanup. 614 */ 615 cv_wait(&idm.idm_tgt_svc_cv, 616 &idm.idm_global_mutex); 617 mutex_exit(&idm.idm_global_mutex); 618 goto retry; 619 } 620 idm_tgt_svc_hold(result); 621 mutex_exit(&idm.idm_global_mutex); 622 return (result); 623 } 624 } 625 mutex_exit(&idm.idm_global_mutex); 626 627 return (NULL); 628 } 629 630 /* 631 * idm_negotiate_key_values() 632 * Give IDM level a chance to negotiate any login parameters it should own. 633 * -- leave unhandled parameters alone on request_nvl 634 * -- move all handled parameters to response_nvl with an appropriate response 635 * -- also add an entry to negotiated_nvl for any accepted parameters 636 */ 637 kv_status_t 638 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl, 639 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 640 { 641 ASSERT(ic->ic_transport_ops != NULL); 642 return (ic->ic_transport_ops->it_negotiate_key_values(ic, 643 request_nvl, response_nvl, negotiated_nvl)); 644 } 645 646 /* 647 * idm_notice_key_values() 648 * Activate at the IDM level any parameters that have been negotiated. 649 * Passes the set of key value pairs to the transport for activation. 650 * This will be invoked as the connection is entering full-feature mode. 651 */ 652 void 653 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl) 654 { 655 ASSERT(ic->ic_transport_ops != NULL); 656 ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl); 657 } 658 659 /* 660 * idm_declare_key_values() 661 * Activate an operational set of declarative parameters from the config_nvl, 662 * and return the selected values in the outgoing_nvl. 663 */ 664 kv_status_t 665 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl, 666 nvlist_t *outgoing_nvl) 667 { 668 ASSERT(ic->ic_transport_ops != NULL); 669 return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl, 670 outgoing_nvl)); 671 } 672 673 /* 674 * idm_buf_tx_to_ini 675 * 676 * This is IDM's implementation of the 'Put_Data' operational primitive. 677 * 678 * This function is invoked by a target iSCSI layer to request its local 679 * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer 680 * on the remote iSCSI node. The I/O buffer represented by 'idb' is 681 * transferred to the initiator associated with task 'idt'. The connection 682 * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS, 683 * and the callback (idb->idb_buf_cb) at transfer completion are 684 * provided as input. 685 * 686 * This data transfer takes place transparently to the remote iSCSI layer, 687 * i.e. without its participation. 688 * 689 * Using sockets, IDM implements the data transfer by segmenting the data 690 * buffer into appropriately sized iSCSI PDUs and transmitting them to the 691 * initiator. iSER performs the transfer using RDMA write. 692 * 693 */ 694 idm_status_t 695 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb, 696 uint32_t offset, uint32_t xfer_len, 697 idm_buf_cb_t idb_buf_cb, void *cb_arg) 698 { 699 idm_status_t rc; 700 701 idb->idb_bufoffset = offset; 702 idb->idb_xfer_len = xfer_len; 703 idb->idb_buf_cb = idb_buf_cb; 704 idb->idb_cb_arg = cb_arg; 705 gethrestime(&idb->idb_xfer_start); 706 707 /* 708 * Buffer should not contain the pattern. If the pattern is 709 * present then we've been asked to transmit initialized data 710 */ 711 IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT); 712 713 mutex_enter(&idt->idt_mutex); 714 switch (idt->idt_state) { 715 case TASK_ACTIVE: 716 idt->idt_tx_to_ini_start++; 717 idm_task_hold(idt); 718 idm_buf_bind_in_locked(idt, idb); 719 idb->idb_in_transport = B_TRUE; 720 rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini) 721 (idt, idb); 722 return (rc); 723 724 case TASK_SUSPENDING: 725 case TASK_SUSPENDED: 726 /* 727 * Bind buffer but don't start a transfer since the task 728 * is suspended 729 */ 730 idm_buf_bind_in_locked(idt, idb); 731 mutex_exit(&idt->idt_mutex); 732 return (IDM_STATUS_SUCCESS); 733 734 case TASK_ABORTING: 735 case TASK_ABORTED: 736 /* 737 * Once the task is aborted, any buffers added to the 738 * idt_inbufv will never get cleaned up, so just return 739 * SUCCESS. The buffer should get cleaned up by the 740 * client or framework once task_aborted has completed. 741 */ 742 mutex_exit(&idt->idt_mutex); 743 return (IDM_STATUS_SUCCESS); 744 745 default: 746 ASSERT(0); 747 break; 748 } 749 mutex_exit(&idt->idt_mutex); 750 751 return (IDM_STATUS_FAIL); 752 } 753 754 /* 755 * idm_buf_rx_from_ini 756 * 757 * This is IDM's implementation of the 'Get_Data' operational primitive. 758 * 759 * This function is invoked by a target iSCSI layer to request its local 760 * Datamover layer to retrieve certain data identified by the R2T PDU from the 761 * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be 762 * mapped to the respective buffer by the task tags (ITT & TTT). 763 * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and 764 * the callback (idb->idb_buf_cb) notification for data transfer completion are 765 * are provided as input. 766 * 767 * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local 768 * Datamover layer, the local and remote Datamover layers transparently bring 769 * about the data transfer requested by the R2T PDU, without the participation 770 * of the iSCSI layers. 771 * 772 * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out() 773 * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read. 774 * 775 */ 776 idm_status_t 777 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb, 778 uint32_t offset, uint32_t xfer_len, 779 idm_buf_cb_t idb_buf_cb, void *cb_arg) 780 { 781 idm_status_t rc; 782 783 idb->idb_bufoffset = offset; 784 idb->idb_xfer_len = xfer_len; 785 idb->idb_buf_cb = idb_buf_cb; 786 idb->idb_cb_arg = cb_arg; 787 gethrestime(&idb->idb_xfer_start); 788 789 /* 790 * "In" buf list is for "Data In" PDU's, "Out" buf list is for 791 * "Data Out" PDU's 792 */ 793 mutex_enter(&idt->idt_mutex); 794 switch (idt->idt_state) { 795 case TASK_ACTIVE: 796 idt->idt_rx_from_ini_start++; 797 idm_task_hold(idt); 798 idm_buf_bind_out_locked(idt, idb); 799 idb->idb_in_transport = B_TRUE; 800 rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini) 801 (idt, idb); 802 return (rc); 803 case TASK_SUSPENDING: 804 case TASK_SUSPENDED: 805 case TASK_ABORTING: 806 case TASK_ABORTED: 807 /* 808 * Bind buffer but don't start a transfer since the task 809 * is suspended 810 */ 811 idm_buf_bind_out_locked(idt, idb); 812 mutex_exit(&idt->idt_mutex); 813 return (IDM_STATUS_SUCCESS); 814 default: 815 ASSERT(0); 816 break; 817 } 818 mutex_exit(&idt->idt_mutex); 819 820 return (IDM_STATUS_FAIL); 821 } 822 823 /* 824 * idm_buf_tx_to_ini_done 825 * 826 * The transport calls this after it has completed a transfer requested by 827 * a call to transport_buf_tx_to_ini 828 * 829 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning. 830 * idt may be freed after the call to idb->idb_buf_cb. 831 */ 832 void 833 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status) 834 { 835 ASSERT(mutex_owned(&idt->idt_mutex)); 836 idb->idb_in_transport = B_FALSE; 837 idb->idb_tx_thread = B_FALSE; 838 idt->idt_tx_to_ini_done++; 839 gethrestime(&idb->idb_xfer_done); 840 841 /* 842 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or 843 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes 844 * to 0. 845 */ 846 idm_task_rele(idt); 847 idb->idb_status = status; 848 849 switch (idt->idt_state) { 850 case TASK_ACTIVE: 851 idt->idt_ic->ic_timestamp = ddi_get_lbolt(); 852 idm_buf_unbind_in_locked(idt, idb); 853 mutex_exit(&idt->idt_mutex); 854 (*idb->idb_buf_cb)(idb, status); 855 return; 856 case TASK_SUSPENDING: 857 case TASK_SUSPENDED: 858 case TASK_ABORTING: 859 case TASK_ABORTED: 860 /* 861 * To keep things simple we will ignore the case where the 862 * transfer was successful and leave all buffers bound to the 863 * task. This allows us to also ignore the case where we've 864 * been asked to abort a task but the last transfer of the 865 * task has completed. IDM has no idea whether this was, in 866 * fact, the last transfer of the task so it would be difficult 867 * to handle this case. Everything should get sorted out again 868 * after task reassignment is complete. 869 * 870 * In the case of TASK_ABORTING we could conceivably call the 871 * buffer callback here but the timing of when the client's 872 * client_task_aborted callback is invoked vs. when the client's 873 * buffer callback gets invoked gets sticky. We don't want 874 * the client to here from us again after the call to 875 * client_task_aborted() but we don't want to give it a bunch 876 * of failed buffer transfers until we've called 877 * client_task_aborted(). Instead we'll just leave all the 878 * buffers bound and allow the client to cleanup. 879 */ 880 break; 881 default: 882 ASSERT(0); 883 } 884 mutex_exit(&idt->idt_mutex); 885 } 886 887 /* 888 * idm_buf_rx_from_ini_done 889 * 890 * The transport calls this after it has completed a transfer requested by 891 * a call totransport_buf_tx_to_ini 892 * 893 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning. 894 * idt may be freed after the call to idb->idb_buf_cb. 895 */ 896 void 897 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status) 898 { 899 ASSERT(mutex_owned(&idt->idt_mutex)); 900 idb->idb_in_transport = B_FALSE; 901 idt->idt_rx_from_ini_done++; 902 gethrestime(&idb->idb_xfer_done); 903 904 /* 905 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or 906 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes 907 * to 0. 908 */ 909 idm_task_rele(idt); 910 idb->idb_status = status; 911 912 if (status == IDM_STATUS_SUCCESS) { 913 /* 914 * Buffer should not contain the pattern. If it does then 915 * we did not get the data from the remote host. 916 */ 917 IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT); 918 } 919 920 switch (idt->idt_state) { 921 case TASK_ACTIVE: 922 idt->idt_ic->ic_timestamp = ddi_get_lbolt(); 923 idm_buf_unbind_out_locked(idt, idb); 924 mutex_exit(&idt->idt_mutex); 925 (*idb->idb_buf_cb)(idb, status); 926 return; 927 case TASK_SUSPENDING: 928 case TASK_SUSPENDED: 929 case TASK_ABORTING: 930 case TASK_ABORTED: 931 /* 932 * To keep things simple we will ignore the case where the 933 * transfer was successful and leave all buffers bound to the 934 * task. This allows us to also ignore the case where we've 935 * been asked to abort a task but the last transfer of the 936 * task has completed. IDM has no idea whether this was, in 937 * fact, the last transfer of the task so it would be difficult 938 * to handle this case. Everything should get sorted out again 939 * after task reassignment is complete. 940 * 941 * In the case of TASK_ABORTING we could conceivably call the 942 * buffer callback here but the timing of when the client's 943 * client_task_aborted callback is invoked vs. when the client's 944 * buffer callback gets invoked gets sticky. We don't want 945 * the client to here from us again after the call to 946 * client_task_aborted() but we don't want to give it a bunch 947 * of failed buffer transfers until we've called 948 * client_task_aborted(). Instead we'll just leave all the 949 * buffers bound and allow the client to cleanup. 950 */ 951 break; 952 default: 953 ASSERT(0); 954 } 955 mutex_exit(&idt->idt_mutex); 956 } 957 958 /* 959 * idm_buf_alloc 960 * 961 * Allocates a buffer handle and registers it for use with the transport 962 * layer. If a buffer is not passed on bufptr, the buffer will be allocated 963 * as well as the handle. 964 * 965 * ic - connection on which the buffer will be transferred 966 * bufptr - allocate memory for buffer if NULL, else assign to buffer 967 * buflen - length of buffer 968 * 969 * Returns idm_buf_t handle if successful, otherwise NULL 970 */ 971 idm_buf_t * 972 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen) 973 { 974 idm_buf_t *buf = NULL; 975 int rc; 976 977 ASSERT(ic != NULL); 978 ASSERT(idm.idm_buf_cache != NULL); 979 ASSERT(buflen > 0); 980 981 /* Don't allocate new buffers if we are not in FFP */ 982 mutex_enter(&ic->ic_state_mutex); 983 if (!ic->ic_ffp) { 984 mutex_exit(&ic->ic_state_mutex); 985 return (NULL); 986 } 987 988 989 idm_conn_hold(ic); 990 mutex_exit(&ic->ic_state_mutex); 991 992 buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP); 993 if (buf == NULL) { 994 idm_conn_rele(ic); 995 return (NULL); 996 } 997 998 buf->idb_ic = ic; 999 buf->idb_buflen = buflen; 1000 buf->idb_exp_offset = 0; 1001 buf->idb_bufoffset = 0; 1002 buf->idb_xfer_len = 0; 1003 buf->idb_magic = IDM_BUF_MAGIC; 1004 buf->idb_in_transport = B_FALSE; 1005 buf->idb_bufbcopy = B_FALSE; 1006 1007 /* 1008 * If bufptr is NULL, we have an implicit request to allocate 1009 * memory for this IDM buffer handle and register it for use 1010 * with the transport. To simplify this, and to give more freedom 1011 * to the transport layer for it's own buffer management, both of 1012 * these actions will take place in the transport layer. 1013 * If bufptr is set, then the caller has allocated memory (or more 1014 * likely it's been passed from an upper layer), and we need only 1015 * register the buffer for use with the transport layer. 1016 */ 1017 if (bufptr == NULL) { 1018 /* 1019 * Allocate a buffer from the transport layer (which 1020 * will also register the buffer for use). 1021 */ 1022 rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen); 1023 if (rc != 0) { 1024 idm_conn_rele(ic); 1025 kmem_cache_free(idm.idm_buf_cache, buf); 1026 return (NULL); 1027 } 1028 /* Set the bufalloc'd flag */ 1029 buf->idb_bufalloc = B_TRUE; 1030 } else { 1031 /* 1032 * For large transfers, Set the passed bufptr into 1033 * the buf handle, and register the handle with the 1034 * transport layer. As memory registration with the 1035 * transport layer is a time/cpu intensive operation, 1036 * for small transfers (up to a pre-defined bcopy 1037 * threshold), use pre-registered memory buffers 1038 * and bcopy data at the appropriate time. 1039 */ 1040 buf->idb_buf = bufptr; 1041 1042 rc = ic->ic_transport_ops->it_buf_setup(buf); 1043 if (rc != 0) { 1044 idm_conn_rele(ic); 1045 kmem_cache_free(idm.idm_buf_cache, buf); 1046 return (NULL); 1047 } 1048 /* 1049 * The transport layer is now expected to set the idb_bufalloc 1050 * correctly to indicate if resources have been allocated. 1051 */ 1052 } 1053 1054 IDM_BUFPAT_SET(buf); 1055 1056 return (buf); 1057 } 1058 1059 /* 1060 * idm_buf_free 1061 * 1062 * Release a buffer handle along with the associated buffer that was allocated 1063 * or assigned with idm_buf_alloc 1064 */ 1065 void 1066 idm_buf_free(idm_buf_t *buf) 1067 { 1068 idm_conn_t *ic = buf->idb_ic; 1069 1070 1071 buf->idb_task_binding = NULL; 1072 1073 if (buf->idb_bufalloc) { 1074 ic->ic_transport_ops->it_buf_free(buf); 1075 } else { 1076 ic->ic_transport_ops->it_buf_teardown(buf); 1077 } 1078 kmem_cache_free(idm.idm_buf_cache, buf); 1079 idm_conn_rele(ic); 1080 } 1081 1082 /* 1083 * idm_buf_bind_in 1084 * 1085 * This function associates a buffer with a task. This is only for use by the 1086 * iSCSI initiator that will have only one buffer per transfer direction 1087 * 1088 */ 1089 void 1090 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf) 1091 { 1092 mutex_enter(&idt->idt_mutex); 1093 idm_buf_bind_in_locked(idt, buf); 1094 mutex_exit(&idt->idt_mutex); 1095 } 1096 1097 static void 1098 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf) 1099 { 1100 buf->idb_task_binding = idt; 1101 buf->idb_ic = idt->idt_ic; 1102 idm_listbuf_insert(&idt->idt_inbufv, buf); 1103 } 1104 1105 void 1106 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf) 1107 { 1108 /* 1109 * For small transfers, the iSER transport delegates the IDM 1110 * layer to bcopy the SCSI Write data for faster IOPS. 1111 */ 1112 if (buf->idb_bufbcopy == B_TRUE) { 1113 1114 bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen); 1115 } 1116 mutex_enter(&idt->idt_mutex); 1117 idm_buf_bind_out_locked(idt, buf); 1118 mutex_exit(&idt->idt_mutex); 1119 } 1120 1121 static void 1122 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf) 1123 { 1124 buf->idb_task_binding = idt; 1125 buf->idb_ic = idt->idt_ic; 1126 idm_listbuf_insert(&idt->idt_outbufv, buf); 1127 } 1128 1129 void 1130 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf) 1131 { 1132 /* 1133 * For small transfers, the iSER transport delegates the IDM 1134 * layer to bcopy the SCSI Read data into the read buufer 1135 * for faster IOPS. 1136 */ 1137 if (buf->idb_bufbcopy == B_TRUE) { 1138 bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen); 1139 } 1140 mutex_enter(&idt->idt_mutex); 1141 idm_buf_unbind_in_locked(idt, buf); 1142 mutex_exit(&idt->idt_mutex); 1143 } 1144 1145 static void 1146 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf) 1147 { 1148 list_remove(&idt->idt_inbufv, buf); 1149 } 1150 1151 void 1152 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf) 1153 { 1154 mutex_enter(&idt->idt_mutex); 1155 idm_buf_unbind_out_locked(idt, buf); 1156 mutex_exit(&idt->idt_mutex); 1157 } 1158 1159 static void 1160 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf) 1161 { 1162 list_remove(&idt->idt_outbufv, buf); 1163 } 1164 1165 /* 1166 * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the 1167 * iSCSI PDU 1168 */ 1169 idm_buf_t * 1170 idm_buf_find(void *lbuf, size_t data_offset) 1171 { 1172 idm_buf_t *idb; 1173 list_t *lst = (list_t *)lbuf; 1174 1175 /* iterate through the list to find the buffer */ 1176 for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) { 1177 1178 ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) || 1179 (idb->idb_bufoffset == 0)); 1180 1181 if ((data_offset >= idb->idb_bufoffset) && 1182 (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) { 1183 1184 return (idb); 1185 } 1186 } 1187 1188 return (NULL); 1189 } 1190 1191 void 1192 idm_bufpat_set(idm_buf_t *idb) 1193 { 1194 idm_bufpat_t *bufpat; 1195 int len, i; 1196 1197 len = idb->idb_buflen; 1198 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t); 1199 1200 bufpat = idb->idb_buf; 1201 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) { 1202 bufpat->bufpat_idb = idb; 1203 bufpat->bufpat_bufmagic = IDM_BUF_MAGIC; 1204 bufpat->bufpat_offset = i; 1205 bufpat++; 1206 } 1207 } 1208 1209 boolean_t 1210 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type) 1211 { 1212 idm_bufpat_t *bufpat; 1213 int len, i; 1214 1215 len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len; 1216 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t); 1217 ASSERT(len <= idb->idb_buflen); 1218 bufpat = idb->idb_buf; 1219 1220 /* 1221 * Don't check the pattern in buffers that came from outside IDM 1222 * (these will be buffers from the initiator that we opted not 1223 * to double-buffer) 1224 */ 1225 if (!idb->idb_bufalloc) 1226 return (B_FALSE); 1227 1228 /* 1229 * Return true if we find the pattern anywhere in the buffer 1230 */ 1231 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) { 1232 if (BUFPAT_MATCH(bufpat, idb)) { 1233 IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: " 1234 "idb %p bufpat %p " 1235 "bufpat_idb=%p bufmagic=%08x offset=%08x", 1236 (void *)idb, (void *)bufpat, bufpat->bufpat_idb, 1237 bufpat->bufpat_bufmagic, bufpat->bufpat_offset); 1238 DTRACE_PROBE2(bufpat__pattern__found, 1239 idm_buf_t *, idb, idm_bufpat_t *, bufpat); 1240 if (type == BP_CHECK_ASSERT) { 1241 ASSERT(0); 1242 } 1243 return (B_TRUE); 1244 } 1245 bufpat++; 1246 } 1247 1248 return (B_FALSE); 1249 } 1250 1251 /* 1252 * idm_task_alloc 1253 * 1254 * This function will allocate a idm_task_t structure. A task tag is also 1255 * generated and saved in idt_tt. The task is not active. 1256 */ 1257 idm_task_t * 1258 idm_task_alloc(idm_conn_t *ic) 1259 { 1260 idm_task_t *idt; 1261 1262 ASSERT(ic != NULL); 1263 1264 /* Don't allocate new tasks if we are not in FFP */ 1265 if (!ic->ic_ffp) { 1266 return (NULL); 1267 } 1268 idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP); 1269 if (idt == NULL) { 1270 return (NULL); 1271 } 1272 1273 ASSERT(list_is_empty(&idt->idt_inbufv)); 1274 ASSERT(list_is_empty(&idt->idt_outbufv)); 1275 1276 mutex_enter(&ic->ic_state_mutex); 1277 if (!ic->ic_ffp) { 1278 mutex_exit(&ic->ic_state_mutex); 1279 kmem_cache_free(idm.idm_task_cache, idt); 1280 return (NULL); 1281 } 1282 idm_conn_hold(ic); 1283 mutex_exit(&ic->ic_state_mutex); 1284 1285 idt->idt_state = TASK_IDLE; 1286 idt->idt_ic = ic; 1287 idt->idt_private = NULL; 1288 idt->idt_exp_datasn = 0; 1289 idt->idt_exp_rttsn = 0; 1290 idt->idt_flags = 0; 1291 return (idt); 1292 } 1293 1294 /* 1295 * idm_task_start 1296 * 1297 * Mark the task active and initialize some stats. The caller 1298 * sets up the idm_task_t structure with a prior call to idm_task_alloc(). 1299 * The task service does not function as a task/work engine, it is the 1300 * responsibility of the initiator to start the data transfer and free the 1301 * resources. 1302 */ 1303 void 1304 idm_task_start(idm_task_t *idt, uintptr_t handle) 1305 { 1306 ASSERT(idt != NULL); 1307 1308 /* mark the task as ACTIVE */ 1309 idt->idt_state = TASK_ACTIVE; 1310 idt->idt_client_handle = handle; 1311 idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done = 1312 idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done = 1313 idt->idt_tx_bytes = idt->idt_rx_bytes = 0; 1314 } 1315 1316 /* 1317 * idm_task_done 1318 * 1319 * This function sets the state to indicate that the task is no longer active. 1320 */ 1321 void 1322 idm_task_done(idm_task_t *idt) 1323 { 1324 ASSERT(idt != NULL); 1325 1326 mutex_enter(&idt->idt_mutex); 1327 idt->idt_state = TASK_IDLE; 1328 mutex_exit(&idt->idt_mutex); 1329 1330 /* 1331 * Although unlikely it is possible for a reference to come in after 1332 * the client has decided the task is over but before we've marked 1333 * the task idle. One specific unavoidable scenario is the case where 1334 * received PDU with the matching ITT/TTT results in a successful 1335 * lookup of this task. We are at the mercy of the remote node in 1336 * that case so we need to handle it. Now that the task state 1337 * has changed no more references will occur so a simple call to 1338 * idm_refcnt_wait_ref should deal with the situation. 1339 */ 1340 idm_refcnt_wait_ref(&idt->idt_refcnt); 1341 idm_refcnt_reset(&idt->idt_refcnt); 1342 } 1343 1344 /* 1345 * idm_task_free 1346 * 1347 * This function will free the Task Tag and the memory allocated for the task 1348 * idm_task_done should be called prior to this call 1349 */ 1350 void 1351 idm_task_free(idm_task_t *idt) 1352 { 1353 idm_conn_t *ic; 1354 1355 ASSERT(idt != NULL); 1356 ASSERT(idt->idt_refcnt.ir_refcnt == 0); 1357 ASSERT(idt->idt_state == TASK_IDLE); 1358 1359 ic = idt->idt_ic; 1360 1361 /* 1362 * It's possible for items to still be in the idt_inbufv list if 1363 * they were added after idm_free_task_rsrc was called. We rely on 1364 * STMF to free all buffers associated with the task however STMF 1365 * doesn't know that we have this reference to the buffers. 1366 * Use list_create so that we don't end up with stale references 1367 * to these buffers. 1368 */ 1369 list_create(&idt->idt_inbufv, sizeof (idm_buf_t), 1370 offsetof(idm_buf_t, idb_buflink)); 1371 list_create(&idt->idt_outbufv, sizeof (idm_buf_t), 1372 offsetof(idm_buf_t, idb_buflink)); 1373 1374 kmem_cache_free(idm.idm_task_cache, idt); 1375 1376 idm_conn_rele(ic); 1377 } 1378 1379 /* 1380 * idm_task_find_common 1381 * common code for idm_task_find() and idm_task_find_and_complete() 1382 */ 1383 /*ARGSUSED*/ 1384 static idm_task_t * 1385 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt, 1386 boolean_t complete) 1387 { 1388 uint32_t tt, client_handle; 1389 idm_task_t *idt; 1390 1391 /* 1392 * Must match both itt and ttt. The table is indexed by itt 1393 * for initiator connections and ttt for target connections. 1394 */ 1395 if (IDM_CONN_ISTGT(ic)) { 1396 tt = ttt; 1397 client_handle = itt; 1398 } else { 1399 tt = itt; 1400 client_handle = ttt; 1401 } 1402 1403 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1404 if (tt >= idm.idm_taskid_max) { 1405 rw_exit(&idm.idm_taskid_table_lock); 1406 return (NULL); 1407 } 1408 1409 idt = idm.idm_taskid_table[tt]; 1410 1411 if (idt != NULL) { 1412 mutex_enter(&idt->idt_mutex); 1413 if ((idt->idt_state != TASK_ACTIVE) || 1414 (idt->idt_ic != ic) || 1415 (IDM_CONN_ISTGT(ic) && 1416 (idt->idt_client_handle != client_handle))) { 1417 /* 1418 * Task doesn't match or task is aborting and 1419 * we don't want any more references. 1420 */ 1421 if ((idt->idt_ic != ic) && 1422 (idt->idt_state == TASK_ACTIVE) && 1423 (IDM_CONN_ISINI(ic) || idt->idt_client_handle == 1424 client_handle)) { 1425 IDM_CONN_LOG(CE_WARN, 1426 "idm_task_find: wrong connection %p != %p", 1427 (void *)ic, (void *)idt->idt_ic); 1428 } 1429 mutex_exit(&idt->idt_mutex); 1430 rw_exit(&idm.idm_taskid_table_lock); 1431 return (NULL); 1432 } 1433 idm_task_hold(idt); 1434 /* 1435 * Set the task state to TASK_COMPLETE so it can no longer 1436 * be found or aborted. 1437 */ 1438 if (B_TRUE == complete) 1439 idt->idt_state = TASK_COMPLETE; 1440 mutex_exit(&idt->idt_mutex); 1441 } 1442 rw_exit(&idm.idm_taskid_table_lock); 1443 1444 return (idt); 1445 } 1446 1447 /* 1448 * This function looks up a task by task tag. 1449 */ 1450 idm_task_t * 1451 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt) 1452 { 1453 return (idm_task_find_common(ic, itt, ttt, B_FALSE)); 1454 } 1455 1456 /* 1457 * This function looks up a task by task tag. If found, the task state 1458 * is atomically set to TASK_COMPLETE so it can longer be found or aborted. 1459 */ 1460 idm_task_t * 1461 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt) 1462 { 1463 return (idm_task_find_common(ic, itt, ttt, B_TRUE)); 1464 } 1465 1466 /* 1467 * idm_task_find_by_handle 1468 * 1469 * This function looks up a task by the client-private idt_client_handle. 1470 * 1471 * This function should NEVER be called in the performance path. It is 1472 * intended strictly for error recovery/task management. 1473 */ 1474 /*ARGSUSED*/ 1475 void * 1476 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle) 1477 { 1478 idm_task_t *idt = NULL; 1479 int idx = 0; 1480 1481 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1482 1483 for (idx = 0; idx < idm.idm_taskid_max; idx++) { 1484 idt = idm.idm_taskid_table[idx]; 1485 1486 if (idt == NULL) 1487 continue; 1488 1489 mutex_enter(&idt->idt_mutex); 1490 1491 if (idt->idt_state != TASK_ACTIVE) { 1492 /* 1493 * Task is either in suspend, abort, or already 1494 * complete. 1495 */ 1496 mutex_exit(&idt->idt_mutex); 1497 continue; 1498 } 1499 1500 if (idt->idt_client_handle == handle) { 1501 idm_task_hold(idt); 1502 mutex_exit(&idt->idt_mutex); 1503 break; 1504 } 1505 1506 mutex_exit(&idt->idt_mutex); 1507 } 1508 1509 rw_exit(&idm.idm_taskid_table_lock); 1510 1511 if ((idt == NULL) || (idx == idm.idm_taskid_max)) 1512 return (NULL); 1513 1514 return (idt->idt_private); 1515 } 1516 1517 void 1518 idm_task_hold(idm_task_t *idt) 1519 { 1520 idm_refcnt_hold(&idt->idt_refcnt); 1521 } 1522 1523 void 1524 idm_task_rele(idm_task_t *idt) 1525 { 1526 idm_refcnt_rele(&idt->idt_refcnt); 1527 } 1528 1529 stmf_status_t 1530 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) 1531 { 1532 idm_task_t *task; 1533 int idx; 1534 stmf_status_t s = STMF_SUCCESS; 1535 1536 /* 1537 * Passing NULL as the task indicates that all tasks 1538 * for this connection should be aborted. 1539 */ 1540 if (idt == NULL) { 1541 /* 1542 * Only the connection state machine should ask for 1543 * all tasks to abort and this should never happen in FFP. 1544 */ 1545 ASSERT(!ic->ic_ffp); 1546 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1547 for (idx = 0; idx < idm.idm_taskid_max; idx++) { 1548 task = idm.idm_taskid_table[idx]; 1549 if (task == NULL) 1550 continue; 1551 mutex_enter(&task->idt_mutex); 1552 if ((task->idt_state != TASK_IDLE) && 1553 (task->idt_state != TASK_COMPLETE) && 1554 (task->idt_ic == ic)) { 1555 rw_exit(&idm.idm_taskid_table_lock); 1556 s = idm_task_abort_one(ic, task, abort_type); 1557 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1558 } else 1559 mutex_exit(&task->idt_mutex); 1560 } 1561 rw_exit(&idm.idm_taskid_table_lock); 1562 } else { 1563 mutex_enter(&idt->idt_mutex); 1564 s = idm_task_abort_one(ic, idt, abort_type); 1565 } 1566 return (s); 1567 } 1568 1569 static void 1570 idm_task_abort_unref_cb(void *ref) 1571 { 1572 idm_task_t *idt = ref; 1573 1574 mutex_enter(&idt->idt_mutex); 1575 switch (idt->idt_state) { 1576 case TASK_SUSPENDING: 1577 idt->idt_state = TASK_SUSPENDED; 1578 mutex_exit(&idt->idt_mutex); 1579 idm_task_aborted(idt, IDM_STATUS_SUSPENDED); 1580 return; 1581 case TASK_ABORTING: 1582 idt->idt_state = TASK_ABORTED; 1583 mutex_exit(&idt->idt_mutex); 1584 idm_task_aborted(idt, IDM_STATUS_ABORTED); 1585 return; 1586 default: 1587 mutex_exit(&idt->idt_mutex); 1588 ASSERT(0); 1589 break; 1590 } 1591 } 1592 1593 /* 1594 * Abort the idm task. 1595 * Caller must hold the task mutex, which will be released before return 1596 */ 1597 static stmf_status_t 1598 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) 1599 { 1600 stmf_status_t s = STMF_SUCCESS; 1601 1602 /* Caller must hold connection mutex */ 1603 ASSERT(mutex_owned(&idt->idt_mutex)); 1604 switch (idt->idt_state) { 1605 case TASK_ACTIVE: 1606 switch (abort_type) { 1607 case AT_INTERNAL_SUSPEND: 1608 /* Call transport to release any resources */ 1609 idt->idt_state = TASK_SUSPENDING; 1610 mutex_exit(&idt->idt_mutex); 1611 ic->ic_transport_ops->it_free_task_rsrc(idt); 1612 1613 /* 1614 * Wait for outstanding references. When all 1615 * references are released the callback will call 1616 * idm_task_aborted(). 1617 */ 1618 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1619 &idm_task_abort_unref_cb); 1620 return (s); 1621 case AT_INTERNAL_ABORT: 1622 case AT_TASK_MGMT_ABORT: 1623 idt->idt_state = TASK_ABORTING; 1624 mutex_exit(&idt->idt_mutex); 1625 ic->ic_transport_ops->it_free_task_rsrc(idt); 1626 1627 /* 1628 * Wait for outstanding references. When all 1629 * references are released the callback will call 1630 * idm_task_aborted(). 1631 */ 1632 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1633 &idm_task_abort_unref_cb); 1634 return (s); 1635 default: 1636 ASSERT(0); 1637 } 1638 break; 1639 case TASK_SUSPENDING: 1640 /* Already called transport_free_task_rsrc(); */ 1641 switch (abort_type) { 1642 case AT_INTERNAL_SUSPEND: 1643 /* Already doing it */ 1644 break; 1645 case AT_INTERNAL_ABORT: 1646 case AT_TASK_MGMT_ABORT: 1647 idt->idt_state = TASK_ABORTING; 1648 break; 1649 default: 1650 ASSERT(0); 1651 } 1652 break; 1653 case TASK_SUSPENDED: 1654 /* Already called transport_free_task_rsrc(); */ 1655 switch (abort_type) { 1656 case AT_INTERNAL_SUSPEND: 1657 /* Already doing it */ 1658 break; 1659 case AT_INTERNAL_ABORT: 1660 case AT_TASK_MGMT_ABORT: 1661 idt->idt_state = TASK_ABORTING; 1662 mutex_exit(&idt->idt_mutex); 1663 1664 /* 1665 * We could probably call idm_task_aborted directly 1666 * here but we may be holding the conn lock. It's 1667 * easier to just switch contexts. Even though 1668 * we shouldn't really have any references we'll 1669 * set the state to TASK_ABORTING instead of 1670 * TASK_ABORTED so we can use the same code path. 1671 */ 1672 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1673 &idm_task_abort_unref_cb); 1674 return (s); 1675 default: 1676 ASSERT(0); 1677 } 1678 break; 1679 case TASK_ABORTING: 1680 case TASK_ABORTED: 1681 switch (abort_type) { 1682 case AT_INTERNAL_SUSPEND: 1683 /* We're already past this point... */ 1684 case AT_INTERNAL_ABORT: 1685 case AT_TASK_MGMT_ABORT: 1686 /* Already doing it */ 1687 break; 1688 default: 1689 ASSERT(0); 1690 } 1691 break; 1692 case TASK_COMPLETE: 1693 idm_refcnt_wait_ref(&idt->idt_refcnt); 1694 s = STMF_ABORT_SUCCESS; 1695 break; 1696 default: 1697 ASSERT(0); 1698 } 1699 mutex_exit(&idt->idt_mutex); 1700 1701 return (s); 1702 } 1703 1704 static void 1705 idm_task_aborted(idm_task_t *idt, idm_status_t status) 1706 { 1707 (*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status); 1708 } 1709 1710 /* 1711 * idm_pdu_tx 1712 * 1713 * This is IDM's implementation of the 'Send_Control' operational primitive. 1714 * This function is invoked by an initiator iSCSI layer requesting the transfer 1715 * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a 1716 * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover 1717 * layer to the peer iSCSI layer in the remote iSCSI node. The connection info 1718 * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size 1719 * are provided as input. 1720 * 1721 */ 1722 void 1723 idm_pdu_tx(idm_pdu_t *pdu) 1724 { 1725 idm_conn_t *ic = pdu->isp_ic; 1726 iscsi_async_evt_hdr_t *async_evt; 1727 1728 /* 1729 * If we are in full-featured mode then route SCSI-related 1730 * commands to the appropriate function vector without checking 1731 * the connection state. We will only be in full-feature mode 1732 * when we are in an acceptable state for SCSI PDU's. 1733 * 1734 * We also need to ensure that there are no PDU events outstanding 1735 * on the state machine. Any non-SCSI PDU's received in full-feature 1736 * mode will result in PDU events and until these have been handled 1737 * we need to route all PDU's through the state machine as PDU 1738 * events to maintain ordering. 1739 * 1740 * Note that IDM cannot enter FFP mode until it processes in 1741 * its state machine the last xmit of the login process. 1742 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be 1743 * superfluous. 1744 */ 1745 mutex_enter(&ic->ic_state_mutex); 1746 if (ic->ic_ffp && (ic->ic_pdu_events == 0)) { 1747 mutex_exit(&ic->ic_state_mutex); 1748 switch (IDM_PDU_OPCODE(pdu)) { 1749 case ISCSI_OP_SCSI_RSP: 1750 /* Target only */ 1751 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic, 1752 iscsi_scsi_rsp_hdr_t *, 1753 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr); 1754 idm_pdu_tx_forward(ic, pdu); 1755 return; 1756 case ISCSI_OP_SCSI_TASK_MGT_RSP: 1757 /* Target only */ 1758 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic, 1759 iscsi_text_rsp_hdr_t *, 1760 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1761 idm_pdu_tx_forward(ic, pdu); 1762 return; 1763 case ISCSI_OP_SCSI_DATA_RSP: 1764 /* Target only */ 1765 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic, 1766 iscsi_data_rsp_hdr_t *, 1767 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 1768 idm_pdu_tx_forward(ic, pdu); 1769 return; 1770 case ISCSI_OP_RTT_RSP: 1771 /* Target only */ 1772 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic, 1773 iscsi_rtt_hdr_t *, 1774 (iscsi_rtt_hdr_t *)pdu->isp_hdr); 1775 idm_pdu_tx_forward(ic, pdu); 1776 return; 1777 case ISCSI_OP_NOOP_IN: 1778 /* Target only */ 1779 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic, 1780 iscsi_nop_in_hdr_t *, 1781 (iscsi_nop_in_hdr_t *)pdu->isp_hdr); 1782 idm_pdu_tx_forward(ic, pdu); 1783 return; 1784 case ISCSI_OP_TEXT_RSP: 1785 /* Target only */ 1786 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic, 1787 iscsi_text_rsp_hdr_t *, 1788 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1789 idm_pdu_tx_forward(ic, pdu); 1790 return; 1791 case ISCSI_OP_TEXT_CMD: 1792 case ISCSI_OP_NOOP_OUT: 1793 case ISCSI_OP_SCSI_CMD: 1794 case ISCSI_OP_SCSI_DATA: 1795 case ISCSI_OP_SCSI_TASK_MGT_MSG: 1796 /* Initiator only */ 1797 idm_pdu_tx_forward(ic, pdu); 1798 return; 1799 default: 1800 break; 1801 } 1802 1803 mutex_enter(&ic->ic_state_mutex); 1804 } 1805 1806 /* 1807 * Any PDU's processed outside of full-feature mode and non-SCSI 1808 * PDU's in full-feature mode are handled by generating an 1809 * event to the connection state machine. The state machine 1810 * will validate the PDU against the current state and either 1811 * transmit the PDU if the opcode is allowed or handle an 1812 * error if the PDU is not allowed. 1813 * 1814 * This code-path will also generate any events that are implied 1815 * by the PDU opcode. For example a "login response" with success 1816 * status generates a CE_LOGOUT_SUCCESS_SND event. 1817 */ 1818 switch (IDM_PDU_OPCODE(pdu)) { 1819 case ISCSI_OP_LOGIN_CMD: 1820 idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu); 1821 break; 1822 case ISCSI_OP_LOGIN_RSP: 1823 DTRACE_ISCSI_2(login__response, idm_conn_t *, ic, 1824 iscsi_login_rsp_hdr_t *, 1825 (iscsi_login_rsp_hdr_t *)pdu->isp_hdr); 1826 idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE); 1827 break; 1828 case ISCSI_OP_LOGOUT_CMD: 1829 idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE); 1830 break; 1831 case ISCSI_OP_LOGOUT_RSP: 1832 DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic, 1833 iscsi_logout_rsp_hdr_t *, 1834 (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr); 1835 idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE); 1836 break; 1837 case ISCSI_OP_ASYNC_EVENT: 1838 DTRACE_ISCSI_2(async__send, idm_conn_t *, ic, 1839 iscsi_async_evt_hdr_t *, 1840 (iscsi_async_evt_hdr_t *)pdu->isp_hdr); 1841 async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr; 1842 switch (async_evt->async_event) { 1843 case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT: 1844 idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND, 1845 (uintptr_t)pdu); 1846 break; 1847 case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION: 1848 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND, 1849 (uintptr_t)pdu); 1850 break; 1851 case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS: 1852 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND, 1853 (uintptr_t)pdu); 1854 break; 1855 case ISCSI_ASYNC_EVENT_SCSI_EVENT: 1856 case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION: 1857 default: 1858 idm_conn_tx_pdu_event(ic, CE_MISC_TX, 1859 (uintptr_t)pdu); 1860 break; 1861 } 1862 break; 1863 case ISCSI_OP_SCSI_RSP: 1864 /* Target only */ 1865 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic, 1866 iscsi_scsi_rsp_hdr_t *, 1867 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr); 1868 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1869 break; 1870 case ISCSI_OP_SCSI_TASK_MGT_RSP: 1871 /* Target only */ 1872 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic, 1873 iscsi_scsi_task_mgt_rsp_hdr_t *, 1874 (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr); 1875 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1876 break; 1877 case ISCSI_OP_SCSI_DATA_RSP: 1878 /* Target only */ 1879 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic, 1880 iscsi_data_rsp_hdr_t *, 1881 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 1882 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1883 break; 1884 case ISCSI_OP_RTT_RSP: 1885 /* Target only */ 1886 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic, 1887 iscsi_rtt_hdr_t *, 1888 (iscsi_rtt_hdr_t *)pdu->isp_hdr); 1889 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1890 break; 1891 case ISCSI_OP_NOOP_IN: 1892 /* Target only */ 1893 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic, 1894 iscsi_nop_in_hdr_t *, 1895 (iscsi_nop_in_hdr_t *)pdu->isp_hdr); 1896 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1897 break; 1898 case ISCSI_OP_TEXT_RSP: 1899 /* Target only */ 1900 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic, 1901 iscsi_text_rsp_hdr_t *, 1902 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1903 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1904 break; 1905 /* Initiator only */ 1906 case ISCSI_OP_SCSI_CMD: 1907 case ISCSI_OP_SCSI_TASK_MGT_MSG: 1908 case ISCSI_OP_SCSI_DATA: 1909 case ISCSI_OP_NOOP_OUT: 1910 case ISCSI_OP_TEXT_CMD: 1911 case ISCSI_OP_SNACK_CMD: 1912 case ISCSI_OP_REJECT_MSG: 1913 default: 1914 /* 1915 * Connection state machine will validate these PDU's against 1916 * the current state. A PDU not allowed in the current 1917 * state will cause a protocol error. 1918 */ 1919 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1920 break; 1921 } 1922 mutex_exit(&ic->ic_state_mutex); 1923 } 1924 1925 /* 1926 * Common allocation of a PDU along with memory for header and data. 1927 */ 1928 static idm_pdu_t * 1929 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag) 1930 { 1931 idm_pdu_t *result; 1932 1933 /* 1934 * IDM clients should cache these structures for performance 1935 * critical paths. We can't cache effectively in IDM because we 1936 * don't know the correct header and data size. 1937 * 1938 * Valid header length is assumed to be hdrlen and valid data 1939 * length is assumed to be datalen. isp_hdrlen and isp_datalen 1940 * can be adjusted after the PDU is returned if necessary. 1941 */ 1942 result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag); 1943 if (result != NULL) { 1944 /* For idm_pdu_free sanity check */ 1945 result->isp_flags |= IDM_PDU_ALLOC; 1946 /* pointer arithmetic */ 1947 result->isp_hdr = (iscsi_hdr_t *)(result + 1); 1948 result->isp_hdrlen = hdrlen; 1949 result->isp_hdrbuflen = hdrlen; 1950 result->isp_transport_hdrlen = 0; 1951 if (datalen != 0) 1952 result->isp_data = (uint8_t *)result->isp_hdr + hdrlen; 1953 result->isp_datalen = datalen; 1954 result->isp_databuflen = datalen; 1955 result->isp_magic = IDM_PDU_MAGIC; 1956 } 1957 1958 return (result); 1959 } 1960 1961 /* 1962 * Typical idm_pdu_alloc invocation, will block for resources. 1963 */ 1964 idm_pdu_t * 1965 idm_pdu_alloc(uint_t hdrlen, uint_t datalen) 1966 { 1967 return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP)); 1968 } 1969 1970 /* 1971 * Non-blocking idm_pdu_alloc implementation, returns NULL if resources 1972 * are not available. Needed for transport-layer allocations which may 1973 * be invoking in interrupt context. 1974 */ 1975 idm_pdu_t * 1976 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen) 1977 { 1978 return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP)); 1979 } 1980 1981 /* 1982 * Free a PDU previously allocated with idm_pdu_alloc() including any 1983 * header and data space allocated as part of the original request. 1984 * Additional memory regions referenced by subsequent modification of 1985 * the isp_hdr and/or isp_data fields will not be freed. 1986 */ 1987 void 1988 idm_pdu_free(idm_pdu_t *pdu) 1989 { 1990 /* Make sure the structure was allocated using idm_pdu_alloc() */ 1991 ASSERT(pdu->isp_flags & IDM_PDU_ALLOC); 1992 kmem_free(pdu, 1993 sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen); 1994 } 1995 1996 /* 1997 * Initialize the connection, private and callback fields in a PDU. 1998 */ 1999 void 2000 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb) 2001 { 2002 /* 2003 * idm_pdu_complete() will call idm_pdu_free if the callback is 2004 * NULL. This will only work if the PDU was originally allocated 2005 * with idm_pdu_alloc(). 2006 */ 2007 ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) || 2008 (cb != NULL)); 2009 pdu->isp_magic = IDM_PDU_MAGIC; 2010 pdu->isp_ic = ic; 2011 pdu->isp_private = private; 2012 pdu->isp_callback = cb; 2013 } 2014 2015 /* 2016 * Initialize the header and header length field. This function should 2017 * not be used to adjust the header length in a buffer allocated via 2018 * pdu_pdu_alloc since it overwrites the existing header pointer. 2019 */ 2020 void 2021 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen) 2022 { 2023 pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr); 2024 pdu->isp_hdrlen = hdrlen; 2025 } 2026 2027 /* 2028 * Initialize the data and data length fields. This function should 2029 * not be used to adjust the data length of a buffer allocated via 2030 * idm_pdu_alloc since it overwrites the existing data pointer. 2031 */ 2032 void 2033 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen) 2034 { 2035 pdu->isp_data = data; 2036 pdu->isp_datalen = datalen; 2037 } 2038 2039 void 2040 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status) 2041 { 2042 if (pdu->isp_callback) { 2043 pdu->isp_status = status; 2044 (*pdu->isp_callback)(pdu, status); 2045 } else { 2046 idm_pdu_free(pdu); 2047 } 2048 } 2049 2050 /* 2051 * State machine auditing 2052 */ 2053 2054 void 2055 idm_sm_audit_init(sm_audit_buf_t *audit_buf) 2056 { 2057 bzero(audit_buf, sizeof (sm_audit_buf_t)); 2058 audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1; 2059 } 2060 2061 static 2062 sm_audit_record_t * 2063 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type, 2064 sm_audit_sm_type_t sm_type, 2065 int current_state) 2066 { 2067 sm_audit_record_t *sar; 2068 2069 sar = audit_buf->sab_records; 2070 sar += audit_buf->sab_index; 2071 audit_buf->sab_index++; 2072 audit_buf->sab_index &= audit_buf->sab_max_index; 2073 2074 sar->sar_type = r_type; 2075 gethrestime(&sar->sar_timestamp); 2076 sar->sar_sm_type = sm_type; 2077 sar->sar_state = current_state; 2078 2079 return (sar); 2080 } 2081 2082 void 2083 idm_sm_audit_event(sm_audit_buf_t *audit_buf, 2084 sm_audit_sm_type_t sm_type, int current_state, 2085 int event, uintptr_t event_info) 2086 { 2087 sm_audit_record_t *sar; 2088 2089 sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT, 2090 sm_type, current_state); 2091 sar->sar_event = event; 2092 sar->sar_event_info = event_info; 2093 } 2094 2095 void 2096 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf, 2097 sm_audit_sm_type_t sm_type, int current_state, int new_state) 2098 { 2099 sm_audit_record_t *sar; 2100 2101 sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE, 2102 sm_type, current_state); 2103 sar->sar_new_state = new_state; 2104 } 2105 2106 2107 /* 2108 * Object reference tracking 2109 */ 2110 2111 void 2112 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj) 2113 { 2114 bzero(refcnt, sizeof (*refcnt)); 2115 idm_refcnt_reset(refcnt); 2116 refcnt->ir_referenced_obj = referenced_obj; 2117 bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t)); 2118 refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1; 2119 mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL); 2120 cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL); 2121 } 2122 2123 void 2124 idm_refcnt_destroy(idm_refcnt_t *refcnt) 2125 { 2126 /* 2127 * Grab the mutex to there are no other lingering threads holding 2128 * the mutex before we destroy it (e.g. idm_refcnt_rele just after 2129 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC) 2130 */ 2131 mutex_enter(&refcnt->ir_mutex); 2132 ASSERT(refcnt->ir_refcnt == 0); 2133 cv_destroy(&refcnt->ir_cv); 2134 mutex_destroy(&refcnt->ir_mutex); 2135 } 2136 2137 void 2138 idm_refcnt_reset(idm_refcnt_t *refcnt) 2139 { 2140 refcnt->ir_waiting = REF_NOWAIT; 2141 refcnt->ir_refcnt = 0; 2142 } 2143 2144 void 2145 idm_refcnt_hold(idm_refcnt_t *refcnt) 2146 { 2147 /* 2148 * Nothing should take a hold on an object after a call to 2149 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref 2150 */ 2151 ASSERT(refcnt->ir_waiting == REF_NOWAIT); 2152 2153 mutex_enter(&refcnt->ir_mutex); 2154 refcnt->ir_refcnt++; 2155 REFCNT_AUDIT(refcnt); 2156 mutex_exit(&refcnt->ir_mutex); 2157 } 2158 2159 static void 2160 idm_refcnt_unref_task(void *refcnt_void) 2161 { 2162 idm_refcnt_t *refcnt = refcnt_void; 2163 2164 REFCNT_AUDIT(refcnt); 2165 (*refcnt->ir_cb)(refcnt->ir_referenced_obj); 2166 } 2167 2168 void 2169 idm_refcnt_rele(idm_refcnt_t *refcnt) 2170 { 2171 mutex_enter(&refcnt->ir_mutex); 2172 ASSERT(refcnt->ir_refcnt > 0); 2173 refcnt->ir_refcnt--; 2174 REFCNT_AUDIT(refcnt); 2175 if (refcnt->ir_waiting == REF_NOWAIT) { 2176 /* No one is waiting on this object */ 2177 mutex_exit(&refcnt->ir_mutex); 2178 return; 2179 } 2180 2181 /* 2182 * Someone is waiting for this object to go idle so check if 2183 * refcnt is 0. Waiting on an object then later grabbing another 2184 * reference is not allowed so we don't need to handle that case. 2185 */ 2186 if (refcnt->ir_refcnt == 0) { 2187 if (refcnt->ir_waiting == REF_WAIT_ASYNC) { 2188 if (taskq_dispatch(idm.idm_global_taskq, 2189 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == 2190 TASKQID_INVALID) { 2191 cmn_err(CE_WARN, 2192 "idm_refcnt_rele: Couldn't dispatch task"); 2193 } 2194 } else if (refcnt->ir_waiting == REF_WAIT_SYNC) { 2195 cv_signal(&refcnt->ir_cv); 2196 } 2197 } 2198 mutex_exit(&refcnt->ir_mutex); 2199 } 2200 2201 void 2202 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func) 2203 { 2204 mutex_enter(&refcnt->ir_mutex); 2205 ASSERT(refcnt->ir_refcnt > 0); 2206 refcnt->ir_refcnt--; 2207 REFCNT_AUDIT(refcnt); 2208 2209 /* 2210 * Someone is waiting for this object to go idle so check if 2211 * refcnt is 0. Waiting on an object then later grabbing another 2212 * reference is not allowed so we don't need to handle that case. 2213 */ 2214 if (refcnt->ir_refcnt == 0) { 2215 refcnt->ir_cb = cb_func; 2216 refcnt->ir_waiting = REF_WAIT_ASYNC; 2217 if (taskq_dispatch(idm.idm_global_taskq, 2218 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == 2219 TASKQID_INVALID) { 2220 cmn_err(CE_WARN, 2221 "idm_refcnt_rele: Couldn't dispatch task"); 2222 } 2223 } 2224 mutex_exit(&refcnt->ir_mutex); 2225 } 2226 2227 void 2228 idm_refcnt_wait_ref(idm_refcnt_t *refcnt) 2229 { 2230 mutex_enter(&refcnt->ir_mutex); 2231 refcnt->ir_waiting = REF_WAIT_SYNC; 2232 REFCNT_AUDIT(refcnt); 2233 while (refcnt->ir_refcnt != 0) 2234 cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex); 2235 mutex_exit(&refcnt->ir_mutex); 2236 } 2237 2238 void 2239 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func) 2240 { 2241 mutex_enter(&refcnt->ir_mutex); 2242 refcnt->ir_waiting = REF_WAIT_ASYNC; 2243 refcnt->ir_cb = cb_func; 2244 REFCNT_AUDIT(refcnt); 2245 /* 2246 * It's possible we don't have any references. To make things easier 2247 * on the caller use a taskq to call the callback instead of 2248 * calling it synchronously 2249 */ 2250 if (refcnt->ir_refcnt == 0) { 2251 if (taskq_dispatch(idm.idm_global_taskq, 2252 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == 2253 TASKQID_INVALID) { 2254 cmn_err(CE_WARN, 2255 "idm_refcnt_async_wait_ref: " 2256 "Couldn't dispatch task"); 2257 } 2258 } 2259 mutex_exit(&refcnt->ir_mutex); 2260 } 2261 2262 void 2263 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt, 2264 idm_refcnt_cb_t *cb_func) 2265 { 2266 mutex_enter(&refcnt->ir_mutex); 2267 if (refcnt->ir_refcnt == 0) { 2268 mutex_exit(&refcnt->ir_mutex); 2269 (*cb_func)(refcnt->ir_referenced_obj); 2270 return; 2271 } 2272 mutex_exit(&refcnt->ir_mutex); 2273 } 2274 2275 /* 2276 * used to determine the status of the refcnt. 2277 * 2278 * if refcnt is 0 return is 0 2279 * if refcnt is negative return is -1 2280 * if refcnt > 0 and no waiters return is 1 2281 * if refcnt > 0 and waiters return is 2 2282 */ 2283 int 2284 idm_refcnt_is_held(idm_refcnt_t *refcnt) 2285 { 2286 if (refcnt->ir_refcnt < 0) 2287 return (-1); 2288 2289 if (refcnt->ir_refcnt == 0) 2290 return (0); 2291 2292 if (refcnt->ir_waiting == REF_NOWAIT && refcnt->ir_refcnt > 0) 2293 return (1); 2294 2295 return (2); 2296 } 2297 2298 void 2299 idm_conn_hold(idm_conn_t *ic) 2300 { 2301 idm_refcnt_hold(&ic->ic_refcnt); 2302 } 2303 2304 void 2305 idm_conn_rele(idm_conn_t *ic) 2306 { 2307 idm_refcnt_rele(&ic->ic_refcnt); 2308 } 2309 2310 void 2311 idm_conn_set_target_name(idm_conn_t *ic, char *target_name) 2312 { 2313 (void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1); 2314 } 2315 2316 void 2317 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name) 2318 { 2319 (void) strlcpy(ic->ic_initiator_name, initiator_name, 2320 ISCSI_MAX_NAME_LEN + 1); 2321 } 2322 2323 void 2324 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN]) 2325 { 2326 (void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1, 2327 "%02x%02x%02x%02x%02x%02x", 2328 isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]); 2329 } 2330 2331 static int 2332 _idm_init(void) 2333 { 2334 /* Initialize the rwlock for the taskid table */ 2335 rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL); 2336 2337 /* Initialize the global mutex and taskq */ 2338 mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL); 2339 2340 cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL); 2341 cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL); 2342 2343 /* 2344 * The maximum allocation needs to be high here since there can be 2345 * many concurrent tasks using the global taskq. 2346 */ 2347 idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri, 2348 128, 16384, TASKQ_PREPOPULATE); 2349 if (idm.idm_global_taskq == NULL) { 2350 cv_destroy(&idm.idm_wd_cv); 2351 cv_destroy(&idm.idm_tgt_svc_cv); 2352 mutex_destroy(&idm.idm_global_mutex); 2353 rw_destroy(&idm.idm_taskid_table_lock); 2354 return (ENOMEM); 2355 } 2356 2357 /* Start watchdog thread */ 2358 idm.idm_wd_thread = thread_create(NULL, 0, 2359 idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri); 2360 if (idm.idm_wd_thread == NULL) { 2361 /* Couldn't create the watchdog thread */ 2362 taskq_destroy(idm.idm_global_taskq); 2363 cv_destroy(&idm.idm_wd_cv); 2364 cv_destroy(&idm.idm_tgt_svc_cv); 2365 mutex_destroy(&idm.idm_global_mutex); 2366 rw_destroy(&idm.idm_taskid_table_lock); 2367 return (ENOMEM); 2368 } 2369 2370 /* Pause until the watchdog thread is running */ 2371 mutex_enter(&idm.idm_global_mutex); 2372 while (!idm.idm_wd_thread_running) 2373 cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex); 2374 mutex_exit(&idm.idm_global_mutex); 2375 2376 /* 2377 * Allocate the task ID table and set "next" to 0. 2378 */ 2379 2380 idm.idm_taskid_max = idm_max_taskids; 2381 idm.idm_taskid_table = (idm_task_t **) 2382 kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP); 2383 idm.idm_taskid_next = 0; 2384 2385 /* Create the global buffer and task kmem caches */ 2386 idm.idm_buf_cache = kmem_cache_create("idm_buf_cache", 2387 sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP); 2388 2389 /* 2390 * Note, we're explicitly allocating an additional iSER header- 2391 * sized chunk for each of these elements. See idm_task_constructor(). 2392 */ 2393 idm.idm_task_cache = kmem_cache_create("idm_task_cache", 2394 sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8, 2395 &idm_task_constructor, &idm_task_destructor, 2396 NULL, NULL, NULL, KM_SLEEP); 2397 2398 /* Create the service and connection context lists */ 2399 list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t), 2400 offsetof(idm_svc_t, is_list_node)); 2401 list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t), 2402 offsetof(idm_conn_t, ic_list_node)); 2403 list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t), 2404 offsetof(idm_conn_t, ic_list_node)); 2405 2406 /* Initialize the native sockets transport */ 2407 idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]); 2408 2409 /* Create connection ID pool */ 2410 (void) idm_idpool_create(&idm.idm_conn_id_pool); 2411 2412 return (DDI_SUCCESS); 2413 } 2414 2415 static int 2416 _idm_fini(void) 2417 { 2418 if (!list_is_empty(&idm.idm_ini_conn_list) || 2419 !list_is_empty(&idm.idm_tgt_conn_list) || 2420 !list_is_empty(&idm.idm_tgt_svc_list)) { 2421 return (EBUSY); 2422 } 2423 2424 mutex_enter(&idm.idm_global_mutex); 2425 idm.idm_wd_thread_running = B_FALSE; 2426 cv_signal(&idm.idm_wd_cv); 2427 mutex_exit(&idm.idm_global_mutex); 2428 2429 thread_join(idm.idm_wd_thread_did); 2430 2431 idm_idpool_destroy(&idm.idm_conn_id_pool); 2432 2433 /* Close any LDI handles we have open on transport drivers */ 2434 mutex_enter(&idm.idm_global_mutex); 2435 idm_transport_teardown(); 2436 mutex_exit(&idm.idm_global_mutex); 2437 2438 /* Teardown the native sockets transport */ 2439 idm_so_fini(); 2440 2441 list_destroy(&idm.idm_ini_conn_list); 2442 list_destroy(&idm.idm_tgt_conn_list); 2443 list_destroy(&idm.idm_tgt_svc_list); 2444 kmem_cache_destroy(idm.idm_task_cache); 2445 kmem_cache_destroy(idm.idm_buf_cache); 2446 kmem_free(idm.idm_taskid_table, 2447 idm.idm_taskid_max * sizeof (idm_task_t *)); 2448 mutex_destroy(&idm.idm_global_mutex); 2449 cv_destroy(&idm.idm_wd_cv); 2450 cv_destroy(&idm.idm_tgt_svc_cv); 2451 rw_destroy(&idm.idm_taskid_table_lock); 2452 2453 return (0); 2454 } 2455