1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cpuvar.h> 27 #include <sys/conf.h> 28 #include <sys/file.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/modctl.h> 32 33 #include <sys/socket.h> 34 #include <sys/strsubr.h> 35 #include <sys/sysmacros.h> 36 37 #include <sys/socketvar.h> 38 #include <netinet/in.h> 39 40 #include <sys/idm/idm.h> 41 #include <sys/idm/idm_so.h> 42 43 #define IDM_NAME_VERSION "iSCSI Data Mover" 44 45 extern struct mod_ops mod_miscops; 46 extern struct mod_ops mod_miscops; 47 48 static struct modlmisc modlmisc = { 49 &mod_miscops, /* Type of module */ 50 IDM_NAME_VERSION 51 }; 52 53 static struct modlinkage modlinkage = { 54 MODREV_1, (void *)&modlmisc, NULL 55 }; 56 57 extern int idm_task_compare(const void *t1, const void *t2); 58 extern void idm_wd_thread(void *arg); 59 60 static int _idm_init(void); 61 static int _idm_fini(void); 62 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf); 63 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf); 64 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf); 65 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf); 66 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, 67 idm_abort_type_t abort_type); 68 static void idm_task_aborted(idm_task_t *idt, idm_status_t status); 69 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, 70 int sleepflag); 71 72 boolean_t idm_conn_logging = 0; 73 boolean_t idm_svc_logging = 0; 74 #ifdef DEBUG 75 boolean_t idm_pattern_checking = 1; 76 #else 77 boolean_t idm_pattern_checking = 0; 78 #endif 79 80 /* 81 * Potential tuneable for the maximum number of tasks. Default to 82 * IDM_TASKIDS_MAX 83 */ 84 85 uint32_t idm_max_taskids = IDM_TASKIDS_MAX; 86 87 /* 88 * Global list of transport handles 89 * These are listed in preferential order, so we can simply take the 90 * first "it_conn_is_capable" hit. Note also that the order maps to 91 * the order of the idm_transport_type_t list. 92 */ 93 idm_transport_t idm_transport_list[] = { 94 95 /* iSER on InfiniBand transport handle */ 96 {IDM_TRANSPORT_TYPE_ISER, /* type */ 97 "/devices/ib/iser@0:iser", /* device path */ 98 NULL, /* LDI handle */ 99 NULL, /* transport ops */ 100 NULL}, /* transport caps */ 101 102 /* IDM native sockets transport handle */ 103 {IDM_TRANSPORT_TYPE_SOCKETS, /* type */ 104 NULL, /* device path */ 105 NULL, /* LDI handle */ 106 NULL, /* transport ops */ 107 NULL} /* transport caps */ 108 109 }; 110 111 int 112 _init(void) 113 { 114 int rc; 115 116 if ((rc = _idm_init()) != 0) { 117 return (rc); 118 } 119 120 return (mod_install(&modlinkage)); 121 } 122 123 int 124 _fini(void) 125 { 126 int rc; 127 128 if ((rc = _idm_fini()) != 0) { 129 return (rc); 130 } 131 132 if ((rc = mod_remove(&modlinkage)) != 0) { 133 return (rc); 134 } 135 136 return (rc); 137 } 138 139 int 140 _info(struct modinfo *modinfop) 141 { 142 return (mod_info(&modlinkage, modinfop)); 143 } 144 145 /* 146 * idm_transport_register() 147 * 148 * Provides a mechanism for an IDM transport driver to register its 149 * transport ops and caps with the IDM kernel module. Invoked during 150 * a transport driver's attach routine. 151 */ 152 idm_status_t 153 idm_transport_register(idm_transport_attr_t *attr) 154 { 155 ASSERT(attr->it_ops != NULL); 156 ASSERT(attr->it_caps != NULL); 157 158 switch (attr->type) { 159 /* All known non-native transports here; for now, iSER */ 160 case IDM_TRANSPORT_TYPE_ISER: 161 idm_transport_list[attr->type].it_ops = attr->it_ops; 162 idm_transport_list[attr->type].it_caps = attr->it_caps; 163 return (IDM_STATUS_SUCCESS); 164 165 default: 166 cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in " 167 "idm_transport_register", attr->type); 168 return (IDM_STATUS_SUCCESS); 169 } 170 } 171 172 /* 173 * idm_ini_conn_create 174 * 175 * This function is invoked by the iSCSI layer to create a connection context. 176 * This does not actually establish the socket connection. 177 * 178 * cr - Connection request parameters 179 * new_con - Output parameter that contains the new request if successful 180 * 181 */ 182 idm_status_t 183 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con) 184 { 185 idm_transport_t *it; 186 idm_conn_t *ic; 187 int rc; 188 189 it = idm_transport_lookup(cr); 190 191 retry: 192 ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type, 193 &cr->icr_conn_ops); 194 195 bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr, 196 sizeof (cr->cr_ini_dst_addr)); 197 198 /* create the transport-specific connection components */ 199 rc = it->it_ops->it_ini_conn_create(cr, ic); 200 if (rc != IDM_STATUS_SUCCESS) { 201 /* cleanup the failed connection */ 202 idm_conn_destroy_common(ic); 203 204 /* 205 * It is possible for an IB client to connect to 206 * an ethernet-only client via an IB-eth gateway. 207 * Therefore, if we are attempting to use iSER and 208 * fail, retry with sockets before ultimately 209 * failing the connection. 210 */ 211 if (it->it_type == IDM_TRANSPORT_TYPE_ISER) { 212 it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]; 213 goto retry; 214 } 215 216 return (IDM_STATUS_FAIL); 217 } 218 219 *new_con = ic; 220 221 mutex_enter(&idm.idm_global_mutex); 222 list_insert_tail(&idm.idm_ini_conn_list, ic); 223 mutex_exit(&idm.idm_global_mutex); 224 225 return (IDM_STATUS_SUCCESS); 226 } 227 228 /* 229 * idm_ini_conn_destroy 230 * 231 * Releases any resources associated with the connection. This is the 232 * complement to idm_ini_conn_create. 233 * ic - idm_conn_t structure representing the relevant connection 234 * 235 */ 236 void 237 idm_ini_conn_destroy_task(void *ic_void) 238 { 239 idm_conn_t *ic = ic_void; 240 241 ic->ic_transport_ops->it_ini_conn_destroy(ic); 242 idm_conn_destroy_common(ic); 243 } 244 245 void 246 idm_ini_conn_destroy(idm_conn_t *ic) 247 { 248 /* 249 * It's reasonable for the initiator to call idm_ini_conn_destroy 250 * from within the context of the CN_CONNECT_DESTROY notification. 251 * That's a problem since we want to destroy the taskq for the 252 * state machine associated with the connection. Remove the 253 * connection from the list right away then handle the remaining 254 * work via the idm_global_taskq. 255 */ 256 mutex_enter(&idm.idm_global_mutex); 257 list_remove(&idm.idm_ini_conn_list, ic); 258 mutex_exit(&idm.idm_global_mutex); 259 260 if (taskq_dispatch(idm.idm_global_taskq, 261 &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) { 262 cmn_err(CE_WARN, 263 "idm_ini_conn_destroy: Couldn't dispatch task"); 264 } 265 } 266 267 /* 268 * idm_ini_conn_connect 269 * 270 * Establish connection to the remote system identified in idm_conn_t. 271 * The connection parameters including the remote IP address were established 272 * in the call to idm_ini_conn_create. The IDM state machine will 273 * perform client notifications as necessary to prompt the initiator through 274 * the login process. IDM also keeps a timer running so that if the login 275 * process doesn't complete in a timely manner it will fail. 276 * 277 * ic - idm_conn_t structure representing the relevant connection 278 * 279 * Returns success if the connection was established, otherwise some kind 280 * of meaningful error code. 281 * 282 * Upon return the login has either failed or is loggin in (ffp) 283 */ 284 idm_status_t 285 idm_ini_conn_connect(idm_conn_t *ic) 286 { 287 idm_status_t rc = IDM_STATUS_SUCCESS; 288 289 rc = idm_conn_sm_init(ic); 290 if (rc != IDM_STATUS_SUCCESS) { 291 return (ic->ic_conn_sm_status); 292 } 293 294 /* Hold connection until we return */ 295 idm_conn_hold(ic); 296 297 /* Kick state machine */ 298 idm_conn_event(ic, CE_CONNECT_REQ, NULL); 299 300 /* Wait for login flag */ 301 mutex_enter(&ic->ic_state_mutex); 302 while (!(ic->ic_state_flags & CF_LOGIN_READY) && 303 !(ic->ic_state_flags & CF_ERROR)) { 304 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); 305 } 306 mutex_exit(&ic->ic_state_mutex); 307 308 if (ic->ic_state_flags & CF_ERROR) { 309 /* ic->ic_conn_sm_status will contains failure status */ 310 idm_conn_rele(ic); 311 return (ic->ic_conn_sm_status); 312 } 313 314 /* Ready to login */ 315 ASSERT(ic->ic_state_flags & CF_LOGIN_READY); 316 (void) idm_notify_client(ic, CN_READY_FOR_LOGIN, NULL); 317 318 idm_conn_rele(ic); 319 320 return (rc); 321 } 322 323 /* 324 * idm_ini_conn_disconnect 325 * 326 * Forces a connection (previously established using idm_ini_conn_connect) 327 * to perform a controlled shutdown, cleaning up any outstanding requests. 328 * 329 * ic - idm_conn_t structure representing the relevant connection 330 * 331 * This is asynchronous and will return before the connection is properly 332 * shutdown 333 */ 334 /* ARGSUSED */ 335 void 336 idm_ini_conn_disconnect(idm_conn_t *ic) 337 { 338 idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL); 339 } 340 341 /* 342 * idm_ini_conn_disconnect_wait 343 * 344 * Forces a connection (previously established using idm_ini_conn_connect) 345 * to perform a controlled shutdown. Blocks until the connection is 346 * disconnected. 347 * 348 * ic - idm_conn_t structure representing the relevant connection 349 */ 350 /* ARGSUSED */ 351 void 352 idm_ini_conn_disconnect_sync(idm_conn_t *ic) 353 { 354 mutex_enter(&ic->ic_state_mutex); 355 if ((ic->ic_state != CS_S9_INIT_ERROR) && 356 (ic->ic_state != CS_S11_COMPLETE)) { 357 idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE); 358 while ((ic->ic_state != CS_S9_INIT_ERROR) && 359 (ic->ic_state != CS_S11_COMPLETE)) 360 cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex); 361 } 362 mutex_exit(&ic->ic_state_mutex); 363 } 364 365 /* 366 * idm_tgt_svc_create 367 * 368 * The target calls this service to obtain a service context for each available 369 * transport, starting a service of each type related to the IP address and port 370 * passed. The idm_svc_req_t contains the service parameters. 371 */ 372 idm_status_t 373 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc) 374 { 375 idm_transport_type_t type; 376 idm_transport_t *it; 377 idm_svc_t *is; 378 int rc; 379 380 *new_svc = NULL; 381 is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP); 382 383 /* Initialize transport-agnostic components of the service handle */ 384 is->is_svc_req = *sr; 385 mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL); 386 cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL); 387 mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL); 388 cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL); 389 idm_refcnt_init(&is->is_refcnt, is); 390 391 /* 392 * Make sure all available transports are setup. We call this now 393 * instead of at initialization time in case IB has become available 394 * since we started (hotplug, etc). 395 */ 396 idm_transport_setup(sr->sr_li); 397 398 /* 399 * Loop through the transports, configuring the transport-specific 400 * components of each one. 401 */ 402 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 403 404 it = &idm_transport_list[type]; 405 /* 406 * If it_ops is NULL then the transport is unconfigured 407 * and we shouldn't try to start the service. 408 */ 409 if (it->it_ops == NULL) { 410 continue; 411 } 412 413 rc = it->it_ops->it_tgt_svc_create(sr, is); 414 if (rc != IDM_STATUS_SUCCESS) { 415 /* Teardown any configured services */ 416 while (type--) { 417 it = &idm_transport_list[type]; 418 if (it->it_ops == NULL) { 419 continue; 420 } 421 it->it_ops->it_tgt_svc_destroy(is); 422 } 423 /* Free the svc context and return */ 424 kmem_free(is, sizeof (idm_svc_t)); 425 return (rc); 426 } 427 } 428 429 *new_svc = is; 430 431 mutex_enter(&idm.idm_global_mutex); 432 list_insert_tail(&idm.idm_tgt_svc_list, is); 433 mutex_exit(&idm.idm_global_mutex); 434 435 return (IDM_STATUS_SUCCESS); 436 } 437 438 /* 439 * idm_tgt_svc_destroy 440 * 441 * is - idm_svc_t returned by the call to idm_tgt_svc_create 442 * 443 * Cleanup any resources associated with the idm_svc_t. 444 */ 445 void 446 idm_tgt_svc_destroy(idm_svc_t *is) 447 { 448 idm_transport_type_t type; 449 idm_transport_t *it; 450 451 mutex_enter(&idm.idm_global_mutex); 452 /* remove this service from the global list */ 453 list_remove(&idm.idm_tgt_svc_list, is); 454 /* wakeup any waiters for service change */ 455 cv_broadcast(&idm.idm_tgt_svc_cv); 456 mutex_exit(&idm.idm_global_mutex); 457 458 /* teardown each transport-specific service */ 459 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 460 it = &idm_transport_list[type]; 461 if (it->it_ops == NULL) { 462 continue; 463 } 464 465 it->it_ops->it_tgt_svc_destroy(is); 466 } 467 468 /* tear down the svc resources */ 469 idm_refcnt_destroy(&is->is_refcnt); 470 cv_destroy(&is->is_count_cv); 471 mutex_destroy(&is->is_count_mutex); 472 cv_destroy(&is->is_cv); 473 mutex_destroy(&is->is_mutex); 474 475 /* free the svc handle */ 476 kmem_free(is, sizeof (idm_svc_t)); 477 } 478 479 void 480 idm_tgt_svc_hold(idm_svc_t *is) 481 { 482 idm_refcnt_hold(&is->is_refcnt); 483 } 484 485 void 486 idm_tgt_svc_rele_and_destroy(idm_svc_t *is) 487 { 488 idm_refcnt_rele_and_destroy(&is->is_refcnt, 489 (idm_refcnt_cb_t *)&idm_tgt_svc_destroy); 490 } 491 492 /* 493 * idm_tgt_svc_online 494 * 495 * is - idm_svc_t returned by the call to idm_tgt_svc_create 496 * 497 * Online each transport service, as we want this target to be accessible 498 * via any configured transport. 499 * 500 * When the initiator establishes a new connection to the target, IDM will 501 * call the "new connect" callback defined in the idm_svc_req_t structure 502 * and it will pass an idm_conn_t structure representing that new connection. 503 */ 504 idm_status_t 505 idm_tgt_svc_online(idm_svc_t *is) 506 { 507 508 idm_transport_type_t type, last_type; 509 idm_transport_t *it; 510 int rc = IDM_STATUS_SUCCESS; 511 512 mutex_enter(&is->is_mutex); 513 if (is->is_online == 0) { 514 /* Walk through each of the transports and online them */ 515 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 516 it = &idm_transport_list[type]; 517 if (it->it_ops == NULL) { 518 /* transport is not registered */ 519 continue; 520 } 521 522 mutex_exit(&is->is_mutex); 523 rc = it->it_ops->it_tgt_svc_online(is); 524 mutex_enter(&is->is_mutex); 525 if (rc != IDM_STATUS_SUCCESS) { 526 last_type = type; 527 break; 528 } 529 } 530 if (rc != IDM_STATUS_SUCCESS) { 531 /* 532 * The last transport failed to online. 533 * Offline any transport onlined above and 534 * do not online the target. 535 */ 536 for (type = 0; type < last_type; type++) { 537 it = &idm_transport_list[type]; 538 if (it->it_ops == NULL) { 539 /* transport is not registered */ 540 continue; 541 } 542 543 mutex_exit(&is->is_mutex); 544 it->it_ops->it_tgt_svc_offline(is); 545 mutex_enter(&is->is_mutex); 546 } 547 } else { 548 /* Target service now online */ 549 is->is_online = 1; 550 } 551 } else { 552 /* Target service already online, just bump the count */ 553 is->is_online++; 554 } 555 mutex_exit(&is->is_mutex); 556 557 return (rc); 558 } 559 560 /* 561 * idm_tgt_svc_offline 562 * 563 * is - idm_svc_t returned by the call to idm_tgt_svc_create 564 * 565 * Shutdown any online target services. 566 */ 567 void 568 idm_tgt_svc_offline(idm_svc_t *is) 569 { 570 idm_transport_type_t type; 571 idm_transport_t *it; 572 573 mutex_enter(&is->is_mutex); 574 is->is_online--; 575 if (is->is_online == 0) { 576 /* Walk through each of the transports and offline them */ 577 for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) { 578 it = &idm_transport_list[type]; 579 if (it->it_ops == NULL) { 580 /* transport is not registered */ 581 continue; 582 } 583 584 mutex_exit(&is->is_mutex); 585 it->it_ops->it_tgt_svc_offline(is); 586 mutex_enter(&is->is_mutex); 587 } 588 } 589 mutex_exit(&is->is_mutex); 590 } 591 592 /* 593 * idm_tgt_svc_lookup 594 * 595 * Lookup a service instance listening on the specified port 596 */ 597 598 idm_svc_t * 599 idm_tgt_svc_lookup(uint16_t port) 600 { 601 idm_svc_t *result; 602 603 retry: 604 mutex_enter(&idm.idm_global_mutex); 605 for (result = list_head(&idm.idm_tgt_svc_list); 606 result != NULL; 607 result = list_next(&idm.idm_tgt_svc_list, result)) { 608 if (result->is_svc_req.sr_port == port) { 609 if (result->is_online == 0) { 610 /* 611 * A service exists on this port, but it 612 * is going away, wait for it to cleanup. 613 */ 614 cv_wait(&idm.idm_tgt_svc_cv, 615 &idm.idm_global_mutex); 616 mutex_exit(&idm.idm_global_mutex); 617 goto retry; 618 } 619 idm_tgt_svc_hold(result); 620 mutex_exit(&idm.idm_global_mutex); 621 return (result); 622 } 623 } 624 mutex_exit(&idm.idm_global_mutex); 625 626 return (NULL); 627 } 628 629 /* 630 * idm_negotiate_key_values() 631 * Give IDM level a chance to negotiate any login parameters it should own. 632 * -- leave unhandled parameters alone on request_nvl 633 * -- move all handled parameters to response_nvl with an appropriate response 634 * -- also add an entry to negotiated_nvl for any accepted parameters 635 */ 636 kv_status_t 637 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl, 638 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 639 { 640 ASSERT(ic->ic_transport_ops != NULL); 641 return (ic->ic_transport_ops->it_negotiate_key_values(ic, 642 request_nvl, response_nvl, negotiated_nvl)); 643 } 644 645 /* 646 * idm_notice_key_values() 647 * Activate at the IDM level any parameters that have been negotiated. 648 * Passes the set of key value pairs to the transport for activation. 649 * This will be invoked as the connection is entering full-feature mode. 650 */ 651 void 652 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl) 653 { 654 ASSERT(ic->ic_transport_ops != NULL); 655 ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl); 656 } 657 658 /* 659 * idm_declare_key_values() 660 * Activate an operational set of declarative parameters from the config_nvl, 661 * and return the selected values in the outgoing_nvl. 662 */ 663 kv_status_t 664 idm_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl, 665 nvlist_t *outgoing_nvl) 666 { 667 ASSERT(ic->ic_transport_ops != NULL); 668 return (ic->ic_transport_ops->it_declare_key_values(ic, config_nvl, 669 outgoing_nvl)); 670 } 671 672 /* 673 * idm_buf_tx_to_ini 674 * 675 * This is IDM's implementation of the 'Put_Data' operational primitive. 676 * 677 * This function is invoked by a target iSCSI layer to request its local 678 * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer 679 * on the remote iSCSI node. The I/O buffer represented by 'idb' is 680 * transferred to the initiator associated with task 'idt'. The connection 681 * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS, 682 * and the callback (idb->idb_buf_cb) at transfer completion are 683 * provided as input. 684 * 685 * This data transfer takes place transparently to the remote iSCSI layer, 686 * i.e. without its participation. 687 * 688 * Using sockets, IDM implements the data transfer by segmenting the data 689 * buffer into appropriately sized iSCSI PDUs and transmitting them to the 690 * initiator. iSER performs the transfer using RDMA write. 691 * 692 */ 693 idm_status_t 694 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb, 695 uint32_t offset, uint32_t xfer_len, 696 idm_buf_cb_t idb_buf_cb, void *cb_arg) 697 { 698 idm_status_t rc; 699 700 idb->idb_bufoffset = offset; 701 idb->idb_xfer_len = xfer_len; 702 idb->idb_buf_cb = idb_buf_cb; 703 idb->idb_cb_arg = cb_arg; 704 gethrestime(&idb->idb_xfer_start); 705 706 /* 707 * Buffer should not contain the pattern. If the pattern is 708 * present then we've been asked to transmit initialized data 709 */ 710 IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT); 711 712 mutex_enter(&idt->idt_mutex); 713 switch (idt->idt_state) { 714 case TASK_ACTIVE: 715 idt->idt_tx_to_ini_start++; 716 idm_task_hold(idt); 717 idm_buf_bind_in_locked(idt, idb); 718 idb->idb_in_transport = B_TRUE; 719 rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini) 720 (idt, idb); 721 return (rc); 722 723 case TASK_SUSPENDING: 724 case TASK_SUSPENDED: 725 /* 726 * Bind buffer but don't start a transfer since the task 727 * is suspended 728 */ 729 idm_buf_bind_in_locked(idt, idb); 730 mutex_exit(&idt->idt_mutex); 731 return (IDM_STATUS_SUCCESS); 732 733 case TASK_ABORTING: 734 case TASK_ABORTED: 735 /* 736 * Once the task is aborted, any buffers added to the 737 * idt_inbufv will never get cleaned up, so just return 738 * SUCCESS. The buffer should get cleaned up by the 739 * client or framework once task_aborted has completed. 740 */ 741 mutex_exit(&idt->idt_mutex); 742 return (IDM_STATUS_SUCCESS); 743 744 default: 745 ASSERT(0); 746 break; 747 } 748 mutex_exit(&idt->idt_mutex); 749 750 return (IDM_STATUS_FAIL); 751 } 752 753 /* 754 * idm_buf_rx_from_ini 755 * 756 * This is IDM's implementation of the 'Get_Data' operational primitive. 757 * 758 * This function is invoked by a target iSCSI layer to request its local 759 * Datamover layer to retrieve certain data identified by the R2T PDU from the 760 * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be 761 * mapped to the respective buffer by the task tags (ITT & TTT). 762 * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and 763 * the callback (idb->idb_buf_cb) notification for data transfer completion are 764 * are provided as input. 765 * 766 * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local 767 * Datamover layer, the local and remote Datamover layers transparently bring 768 * about the data transfer requested by the R2T PDU, without the participation 769 * of the iSCSI layers. 770 * 771 * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out() 772 * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read. 773 * 774 */ 775 idm_status_t 776 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb, 777 uint32_t offset, uint32_t xfer_len, 778 idm_buf_cb_t idb_buf_cb, void *cb_arg) 779 { 780 idm_status_t rc; 781 782 idb->idb_bufoffset = offset; 783 idb->idb_xfer_len = xfer_len; 784 idb->idb_buf_cb = idb_buf_cb; 785 idb->idb_cb_arg = cb_arg; 786 gethrestime(&idb->idb_xfer_start); 787 788 /* 789 * "In" buf list is for "Data In" PDU's, "Out" buf list is for 790 * "Data Out" PDU's 791 */ 792 mutex_enter(&idt->idt_mutex); 793 switch (idt->idt_state) { 794 case TASK_ACTIVE: 795 idt->idt_rx_from_ini_start++; 796 idm_task_hold(idt); 797 idm_buf_bind_out_locked(idt, idb); 798 idb->idb_in_transport = B_TRUE; 799 rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini) 800 (idt, idb); 801 return (rc); 802 case TASK_SUSPENDING: 803 case TASK_SUSPENDED: 804 case TASK_ABORTING: 805 case TASK_ABORTED: 806 /* 807 * Bind buffer but don't start a transfer since the task 808 * is suspended 809 */ 810 idm_buf_bind_out_locked(idt, idb); 811 mutex_exit(&idt->idt_mutex); 812 return (IDM_STATUS_SUCCESS); 813 default: 814 ASSERT(0); 815 break; 816 } 817 mutex_exit(&idt->idt_mutex); 818 819 return (IDM_STATUS_FAIL); 820 } 821 822 /* 823 * idm_buf_tx_to_ini_done 824 * 825 * The transport calls this after it has completed a transfer requested by 826 * a call to transport_buf_tx_to_ini 827 * 828 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning. 829 * idt may be freed after the call to idb->idb_buf_cb. 830 */ 831 void 832 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status) 833 { 834 ASSERT(mutex_owned(&idt->idt_mutex)); 835 idb->idb_in_transport = B_FALSE; 836 idb->idb_tx_thread = B_FALSE; 837 idt->idt_tx_to_ini_done++; 838 gethrestime(&idb->idb_xfer_done); 839 840 /* 841 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or 842 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes 843 * to 0. 844 */ 845 idm_task_rele(idt); 846 idb->idb_status = status; 847 848 switch (idt->idt_state) { 849 case TASK_ACTIVE: 850 idt->idt_ic->ic_timestamp = ddi_get_lbolt(); 851 idm_buf_unbind_in_locked(idt, idb); 852 mutex_exit(&idt->idt_mutex); 853 (*idb->idb_buf_cb)(idb, status); 854 return; 855 case TASK_SUSPENDING: 856 case TASK_SUSPENDED: 857 case TASK_ABORTING: 858 case TASK_ABORTED: 859 /* 860 * To keep things simple we will ignore the case where the 861 * transfer was successful and leave all buffers bound to the 862 * task. This allows us to also ignore the case where we've 863 * been asked to abort a task but the last transfer of the 864 * task has completed. IDM has no idea whether this was, in 865 * fact, the last transfer of the task so it would be difficult 866 * to handle this case. Everything should get sorted out again 867 * after task reassignment is complete. 868 * 869 * In the case of TASK_ABORTING we could conceivably call the 870 * buffer callback here but the timing of when the client's 871 * client_task_aborted callback is invoked vs. when the client's 872 * buffer callback gets invoked gets sticky. We don't want 873 * the client to here from us again after the call to 874 * client_task_aborted() but we don't want to give it a bunch 875 * of failed buffer transfers until we've called 876 * client_task_aborted(). Instead we'll just leave all the 877 * buffers bound and allow the client to cleanup. 878 */ 879 break; 880 default: 881 ASSERT(0); 882 } 883 mutex_exit(&idt->idt_mutex); 884 } 885 886 /* 887 * idm_buf_rx_from_ini_done 888 * 889 * The transport calls this after it has completed a transfer requested by 890 * a call totransport_buf_tx_to_ini 891 * 892 * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning. 893 * idt may be freed after the call to idb->idb_buf_cb. 894 */ 895 void 896 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status) 897 { 898 ASSERT(mutex_owned(&idt->idt_mutex)); 899 idb->idb_in_transport = B_FALSE; 900 idt->idt_rx_from_ini_done++; 901 gethrestime(&idb->idb_xfer_done); 902 903 /* 904 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or 905 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes 906 * to 0. 907 */ 908 idm_task_rele(idt); 909 idb->idb_status = status; 910 911 if (status == IDM_STATUS_SUCCESS) { 912 /* 913 * Buffer should not contain the pattern. If it does then 914 * we did not get the data from the remote host. 915 */ 916 IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT); 917 } 918 919 switch (idt->idt_state) { 920 case TASK_ACTIVE: 921 idt->idt_ic->ic_timestamp = ddi_get_lbolt(); 922 idm_buf_unbind_out_locked(idt, idb); 923 mutex_exit(&idt->idt_mutex); 924 (*idb->idb_buf_cb)(idb, status); 925 return; 926 case TASK_SUSPENDING: 927 case TASK_SUSPENDED: 928 case TASK_ABORTING: 929 case TASK_ABORTED: 930 /* 931 * To keep things simple we will ignore the case where the 932 * transfer was successful and leave all buffers bound to the 933 * task. This allows us to also ignore the case where we've 934 * been asked to abort a task but the last transfer of the 935 * task has completed. IDM has no idea whether this was, in 936 * fact, the last transfer of the task so it would be difficult 937 * to handle this case. Everything should get sorted out again 938 * after task reassignment is complete. 939 * 940 * In the case of TASK_ABORTING we could conceivably call the 941 * buffer callback here but the timing of when the client's 942 * client_task_aborted callback is invoked vs. when the client's 943 * buffer callback gets invoked gets sticky. We don't want 944 * the client to here from us again after the call to 945 * client_task_aborted() but we don't want to give it a bunch 946 * of failed buffer transfers until we've called 947 * client_task_aborted(). Instead we'll just leave all the 948 * buffers bound and allow the client to cleanup. 949 */ 950 break; 951 default: 952 ASSERT(0); 953 } 954 mutex_exit(&idt->idt_mutex); 955 } 956 957 /* 958 * idm_buf_alloc 959 * 960 * Allocates a buffer handle and registers it for use with the transport 961 * layer. If a buffer is not passed on bufptr, the buffer will be allocated 962 * as well as the handle. 963 * 964 * ic - connection on which the buffer will be transferred 965 * bufptr - allocate memory for buffer if NULL, else assign to buffer 966 * buflen - length of buffer 967 * 968 * Returns idm_buf_t handle if successful, otherwise NULL 969 */ 970 idm_buf_t * 971 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen) 972 { 973 idm_buf_t *buf = NULL; 974 int rc; 975 976 ASSERT(ic != NULL); 977 ASSERT(idm.idm_buf_cache != NULL); 978 ASSERT(buflen > 0); 979 980 /* Don't allocate new buffers if we are not in FFP */ 981 mutex_enter(&ic->ic_state_mutex); 982 if (!ic->ic_ffp) { 983 mutex_exit(&ic->ic_state_mutex); 984 return (NULL); 985 } 986 987 988 idm_conn_hold(ic); 989 mutex_exit(&ic->ic_state_mutex); 990 991 buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP); 992 if (buf == NULL) { 993 idm_conn_rele(ic); 994 return (NULL); 995 } 996 997 buf->idb_ic = ic; 998 buf->idb_buflen = buflen; 999 buf->idb_exp_offset = 0; 1000 buf->idb_bufoffset = 0; 1001 buf->idb_xfer_len = 0; 1002 buf->idb_magic = IDM_BUF_MAGIC; 1003 buf->idb_in_transport = B_FALSE; 1004 buf->idb_bufbcopy = B_FALSE; 1005 1006 /* 1007 * If bufptr is NULL, we have an implicit request to allocate 1008 * memory for this IDM buffer handle and register it for use 1009 * with the transport. To simplify this, and to give more freedom 1010 * to the transport layer for it's own buffer management, both of 1011 * these actions will take place in the transport layer. 1012 * If bufptr is set, then the caller has allocated memory (or more 1013 * likely it's been passed from an upper layer), and we need only 1014 * register the buffer for use with the transport layer. 1015 */ 1016 if (bufptr == NULL) { 1017 /* 1018 * Allocate a buffer from the transport layer (which 1019 * will also register the buffer for use). 1020 */ 1021 rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen); 1022 if (rc != 0) { 1023 idm_conn_rele(ic); 1024 kmem_cache_free(idm.idm_buf_cache, buf); 1025 return (NULL); 1026 } 1027 /* Set the bufalloc'd flag */ 1028 buf->idb_bufalloc = B_TRUE; 1029 } else { 1030 /* 1031 * For large transfers, Set the passed bufptr into 1032 * the buf handle, and register the handle with the 1033 * transport layer. As memory registration with the 1034 * transport layer is a time/cpu intensive operation, 1035 * for small transfers (up to a pre-defined bcopy 1036 * threshold), use pre-registered memory buffers 1037 * and bcopy data at the appropriate time. 1038 */ 1039 buf->idb_buf = bufptr; 1040 1041 rc = ic->ic_transport_ops->it_buf_setup(buf); 1042 if (rc != 0) { 1043 idm_conn_rele(ic); 1044 kmem_cache_free(idm.idm_buf_cache, buf); 1045 return (NULL); 1046 } 1047 /* 1048 * The transport layer is now expected to set the idb_bufalloc 1049 * correctly to indicate if resources have been allocated. 1050 */ 1051 } 1052 1053 IDM_BUFPAT_SET(buf); 1054 1055 return (buf); 1056 } 1057 1058 /* 1059 * idm_buf_free 1060 * 1061 * Release a buffer handle along with the associated buffer that was allocated 1062 * or assigned with idm_buf_alloc 1063 */ 1064 void 1065 idm_buf_free(idm_buf_t *buf) 1066 { 1067 idm_conn_t *ic = buf->idb_ic; 1068 1069 1070 buf->idb_task_binding = NULL; 1071 1072 if (buf->idb_bufalloc) { 1073 ic->ic_transport_ops->it_buf_free(buf); 1074 } else { 1075 ic->ic_transport_ops->it_buf_teardown(buf); 1076 } 1077 kmem_cache_free(idm.idm_buf_cache, buf); 1078 idm_conn_rele(ic); 1079 } 1080 1081 /* 1082 * idm_buf_bind_in 1083 * 1084 * This function associates a buffer with a task. This is only for use by the 1085 * iSCSI initiator that will have only one buffer per transfer direction 1086 * 1087 */ 1088 void 1089 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf) 1090 { 1091 mutex_enter(&idt->idt_mutex); 1092 idm_buf_bind_in_locked(idt, buf); 1093 mutex_exit(&idt->idt_mutex); 1094 } 1095 1096 static void 1097 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf) 1098 { 1099 buf->idb_task_binding = idt; 1100 buf->idb_ic = idt->idt_ic; 1101 idm_listbuf_insert(&idt->idt_inbufv, buf); 1102 } 1103 1104 void 1105 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf) 1106 { 1107 /* 1108 * For small transfers, the iSER transport delegates the IDM 1109 * layer to bcopy the SCSI Write data for faster IOPS. 1110 */ 1111 if (buf->idb_bufbcopy == B_TRUE) { 1112 1113 bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen); 1114 } 1115 mutex_enter(&idt->idt_mutex); 1116 idm_buf_bind_out_locked(idt, buf); 1117 mutex_exit(&idt->idt_mutex); 1118 } 1119 1120 static void 1121 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf) 1122 { 1123 buf->idb_task_binding = idt; 1124 buf->idb_ic = idt->idt_ic; 1125 idm_listbuf_insert(&idt->idt_outbufv, buf); 1126 } 1127 1128 void 1129 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf) 1130 { 1131 /* 1132 * For small transfers, the iSER transport delegates the IDM 1133 * layer to bcopy the SCSI Read data into the read buufer 1134 * for faster IOPS. 1135 */ 1136 if (buf->idb_bufbcopy == B_TRUE) { 1137 bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen); 1138 } 1139 mutex_enter(&idt->idt_mutex); 1140 idm_buf_unbind_in_locked(idt, buf); 1141 mutex_exit(&idt->idt_mutex); 1142 } 1143 1144 static void 1145 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf) 1146 { 1147 list_remove(&idt->idt_inbufv, buf); 1148 } 1149 1150 void 1151 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf) 1152 { 1153 mutex_enter(&idt->idt_mutex); 1154 idm_buf_unbind_out_locked(idt, buf); 1155 mutex_exit(&idt->idt_mutex); 1156 } 1157 1158 static void 1159 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf) 1160 { 1161 list_remove(&idt->idt_outbufv, buf); 1162 } 1163 1164 /* 1165 * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the 1166 * iSCSI PDU 1167 */ 1168 idm_buf_t * 1169 idm_buf_find(void *lbuf, size_t data_offset) 1170 { 1171 idm_buf_t *idb; 1172 list_t *lst = (list_t *)lbuf; 1173 1174 /* iterate through the list to find the buffer */ 1175 for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) { 1176 1177 ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) || 1178 (idb->idb_bufoffset == 0)); 1179 1180 if ((data_offset >= idb->idb_bufoffset) && 1181 (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) { 1182 1183 return (idb); 1184 } 1185 } 1186 1187 return (NULL); 1188 } 1189 1190 void 1191 idm_bufpat_set(idm_buf_t *idb) 1192 { 1193 idm_bufpat_t *bufpat; 1194 int len, i; 1195 1196 len = idb->idb_buflen; 1197 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t); 1198 1199 bufpat = idb->idb_buf; 1200 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) { 1201 bufpat->bufpat_idb = idb; 1202 bufpat->bufpat_bufmagic = IDM_BUF_MAGIC; 1203 bufpat->bufpat_offset = i; 1204 bufpat++; 1205 } 1206 } 1207 1208 boolean_t 1209 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type) 1210 { 1211 idm_bufpat_t *bufpat; 1212 int len, i; 1213 1214 len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len; 1215 len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t); 1216 ASSERT(len <= idb->idb_buflen); 1217 bufpat = idb->idb_buf; 1218 1219 /* 1220 * Don't check the pattern in buffers that came from outside IDM 1221 * (these will be buffers from the initiator that we opted not 1222 * to double-buffer) 1223 */ 1224 if (!idb->idb_bufalloc) 1225 return (B_FALSE); 1226 1227 /* 1228 * Return true if we find the pattern anywhere in the buffer 1229 */ 1230 for (i = 0; i < len; i += sizeof (idm_bufpat_t)) { 1231 if (BUFPAT_MATCH(bufpat, idb)) { 1232 IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: " 1233 "idb %p bufpat %p " 1234 "bufpat_idb=%p bufmagic=%08x offset=%08x", 1235 (void *)idb, (void *)bufpat, bufpat->bufpat_idb, 1236 bufpat->bufpat_bufmagic, bufpat->bufpat_offset); 1237 DTRACE_PROBE2(bufpat__pattern__found, 1238 idm_buf_t *, idb, idm_bufpat_t *, bufpat); 1239 if (type == BP_CHECK_ASSERT) { 1240 ASSERT(0); 1241 } 1242 return (B_TRUE); 1243 } 1244 bufpat++; 1245 } 1246 1247 return (B_FALSE); 1248 } 1249 1250 /* 1251 * idm_task_alloc 1252 * 1253 * This function will allocate a idm_task_t structure. A task tag is also 1254 * generated and saved in idt_tt. The task is not active. 1255 */ 1256 idm_task_t * 1257 idm_task_alloc(idm_conn_t *ic) 1258 { 1259 idm_task_t *idt; 1260 1261 ASSERT(ic != NULL); 1262 1263 /* Don't allocate new tasks if we are not in FFP */ 1264 mutex_enter(&ic->ic_state_mutex); 1265 if (!ic->ic_ffp) { 1266 mutex_exit(&ic->ic_state_mutex); 1267 return (NULL); 1268 } 1269 idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP); 1270 if (idt == NULL) { 1271 mutex_exit(&ic->ic_state_mutex); 1272 return (NULL); 1273 } 1274 1275 ASSERT(list_is_empty(&idt->idt_inbufv)); 1276 ASSERT(list_is_empty(&idt->idt_outbufv)); 1277 1278 idm_conn_hold(ic); 1279 mutex_exit(&ic->ic_state_mutex); 1280 1281 idt->idt_state = TASK_IDLE; 1282 idt->idt_ic = ic; 1283 idt->idt_private = NULL; 1284 idt->idt_exp_datasn = 0; 1285 idt->idt_exp_rttsn = 0; 1286 1287 return (idt); 1288 } 1289 1290 /* 1291 * idm_task_start 1292 * 1293 * Mark the task active and initialize some stats. The caller 1294 * sets up the idm_task_t structure with a prior call to idm_task_alloc(). 1295 * The task service does not function as a task/work engine, it is the 1296 * responsibility of the initiator to start the data transfer and free the 1297 * resources. 1298 */ 1299 void 1300 idm_task_start(idm_task_t *idt, uintptr_t handle) 1301 { 1302 ASSERT(idt != NULL); 1303 1304 /* mark the task as ACTIVE */ 1305 idt->idt_state = TASK_ACTIVE; 1306 idt->idt_client_handle = handle; 1307 idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done = 1308 idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done = 1309 idt->idt_tx_bytes = idt->idt_rx_bytes = 0; 1310 } 1311 1312 /* 1313 * idm_task_done 1314 * 1315 * This function sets the state to indicate that the task is no longer active. 1316 */ 1317 void 1318 idm_task_done(idm_task_t *idt) 1319 { 1320 ASSERT(idt != NULL); 1321 1322 mutex_enter(&idt->idt_mutex); 1323 idt->idt_state = TASK_IDLE; 1324 mutex_exit(&idt->idt_mutex); 1325 1326 /* 1327 * Although unlikely it is possible for a reference to come in after 1328 * the client has decided the task is over but before we've marked 1329 * the task idle. One specific unavoidable scenario is the case where 1330 * received PDU with the matching ITT/TTT results in a successful 1331 * lookup of this task. We are at the mercy of the remote node in 1332 * that case so we need to handle it. Now that the task state 1333 * has changed no more references will occur so a simple call to 1334 * idm_refcnt_wait_ref should deal with the situation. 1335 */ 1336 idm_refcnt_wait_ref(&idt->idt_refcnt); 1337 idm_refcnt_reset(&idt->idt_refcnt); 1338 } 1339 1340 /* 1341 * idm_task_free 1342 * 1343 * This function will free the Task Tag and the memory allocated for the task 1344 * idm_task_done should be called prior to this call 1345 */ 1346 void 1347 idm_task_free(idm_task_t *idt) 1348 { 1349 idm_conn_t *ic; 1350 1351 ASSERT(idt != NULL); 1352 ASSERT(idt->idt_refcnt.ir_refcnt == 0); 1353 ASSERT(idt->idt_state == TASK_IDLE); 1354 1355 ic = idt->idt_ic; 1356 1357 /* 1358 * It's possible for items to still be in the idt_inbufv list if 1359 * they were added after idm_task_cleanup was called. We rely on 1360 * STMF to free all buffers associated with the task however STMF 1361 * doesn't know that we have this reference to the buffers. 1362 * Use list_create so that we don't end up with stale references 1363 * to these buffers. 1364 */ 1365 list_create(&idt->idt_inbufv, sizeof (idm_buf_t), 1366 offsetof(idm_buf_t, idb_buflink)); 1367 list_create(&idt->idt_outbufv, sizeof (idm_buf_t), 1368 offsetof(idm_buf_t, idb_buflink)); 1369 1370 kmem_cache_free(idm.idm_task_cache, idt); 1371 1372 idm_conn_rele(ic); 1373 } 1374 1375 /* 1376 * idm_task_find_common 1377 * common code for idm_task_find() and idm_task_find_and_complete() 1378 */ 1379 /*ARGSUSED*/ 1380 static idm_task_t * 1381 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt, 1382 boolean_t complete) 1383 { 1384 uint32_t tt, client_handle; 1385 idm_task_t *idt; 1386 1387 /* 1388 * Must match both itt and ttt. The table is indexed by itt 1389 * for initiator connections and ttt for target connections. 1390 */ 1391 if (IDM_CONN_ISTGT(ic)) { 1392 tt = ttt; 1393 client_handle = itt; 1394 } else { 1395 tt = itt; 1396 client_handle = ttt; 1397 } 1398 1399 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1400 if (tt >= idm.idm_taskid_max) { 1401 rw_exit(&idm.idm_taskid_table_lock); 1402 return (NULL); 1403 } 1404 1405 idt = idm.idm_taskid_table[tt]; 1406 1407 if (idt != NULL) { 1408 mutex_enter(&idt->idt_mutex); 1409 if ((idt->idt_state != TASK_ACTIVE) || 1410 (idt->idt_ic != ic) || 1411 (IDM_CONN_ISTGT(ic) && 1412 (idt->idt_client_handle != client_handle))) { 1413 /* 1414 * Task doesn't match or task is aborting and 1415 * we don't want any more references. 1416 */ 1417 if ((idt->idt_ic != ic) && 1418 (idt->idt_state == TASK_ACTIVE) && 1419 (IDM_CONN_ISINI(ic) || idt->idt_client_handle == 1420 client_handle)) { 1421 IDM_CONN_LOG(CE_WARN, 1422 "idm_task_find: wrong connection %p != %p", 1423 (void *)ic, (void *)idt->idt_ic); 1424 } 1425 mutex_exit(&idt->idt_mutex); 1426 rw_exit(&idm.idm_taskid_table_lock); 1427 return (NULL); 1428 } 1429 idm_task_hold(idt); 1430 /* 1431 * Set the task state to TASK_COMPLETE so it can no longer 1432 * be found or aborted. 1433 */ 1434 if (B_TRUE == complete) 1435 idt->idt_state = TASK_COMPLETE; 1436 mutex_exit(&idt->idt_mutex); 1437 } 1438 rw_exit(&idm.idm_taskid_table_lock); 1439 1440 return (idt); 1441 } 1442 1443 /* 1444 * This function looks up a task by task tag. 1445 */ 1446 idm_task_t * 1447 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt) 1448 { 1449 return (idm_task_find_common(ic, itt, ttt, B_FALSE)); 1450 } 1451 1452 /* 1453 * This function looks up a task by task tag. If found, the task state 1454 * is atomically set to TASK_COMPLETE so it can longer be found or aborted. 1455 */ 1456 idm_task_t * 1457 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt) 1458 { 1459 return (idm_task_find_common(ic, itt, ttt, B_TRUE)); 1460 } 1461 1462 /* 1463 * idm_task_find_by_handle 1464 * 1465 * This function looks up a task by the client-private idt_client_handle. 1466 * 1467 * This function should NEVER be called in the performance path. It is 1468 * intended strictly for error recovery/task management. 1469 */ 1470 /*ARGSUSED*/ 1471 void * 1472 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle) 1473 { 1474 idm_task_t *idt = NULL; 1475 int idx = 0; 1476 1477 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1478 1479 for (idx = 0; idx < idm.idm_taskid_max; idx++) { 1480 idt = idm.idm_taskid_table[idx]; 1481 1482 if (idt == NULL) 1483 continue; 1484 1485 mutex_enter(&idt->idt_mutex); 1486 1487 if (idt->idt_state != TASK_ACTIVE) { 1488 /* 1489 * Task is either in suspend, abort, or already 1490 * complete. 1491 */ 1492 mutex_exit(&idt->idt_mutex); 1493 continue; 1494 } 1495 1496 if (idt->idt_client_handle == handle) { 1497 idm_task_hold(idt); 1498 mutex_exit(&idt->idt_mutex); 1499 break; 1500 } 1501 1502 mutex_exit(&idt->idt_mutex); 1503 } 1504 1505 rw_exit(&idm.idm_taskid_table_lock); 1506 1507 if ((idt == NULL) || (idx == idm.idm_taskid_max)) 1508 return (NULL); 1509 1510 return (idt->idt_private); 1511 } 1512 1513 void 1514 idm_task_hold(idm_task_t *idt) 1515 { 1516 idm_refcnt_hold(&idt->idt_refcnt); 1517 } 1518 1519 void 1520 idm_task_rele(idm_task_t *idt) 1521 { 1522 idm_refcnt_rele(&idt->idt_refcnt); 1523 } 1524 1525 void 1526 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) 1527 { 1528 idm_task_t *task; 1529 int idx; 1530 1531 /* 1532 * Passing NULL as the task indicates that all tasks 1533 * for this connection should be aborted. 1534 */ 1535 if (idt == NULL) { 1536 /* 1537 * Only the connection state machine should ask for 1538 * all tasks to abort and this should never happen in FFP. 1539 */ 1540 ASSERT(!ic->ic_ffp); 1541 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1542 for (idx = 0; idx < idm.idm_taskid_max; idx++) { 1543 task = idm.idm_taskid_table[idx]; 1544 if (task == NULL) 1545 continue; 1546 mutex_enter(&task->idt_mutex); 1547 if ((task->idt_state != TASK_IDLE) && 1548 (task->idt_state != TASK_COMPLETE) && 1549 (task->idt_ic == ic)) { 1550 rw_exit(&idm.idm_taskid_table_lock); 1551 idm_task_abort_one(ic, task, abort_type); 1552 rw_enter(&idm.idm_taskid_table_lock, RW_READER); 1553 } else 1554 mutex_exit(&task->idt_mutex); 1555 } 1556 rw_exit(&idm.idm_taskid_table_lock); 1557 } else { 1558 mutex_enter(&idt->idt_mutex); 1559 idm_task_abort_one(ic, idt, abort_type); 1560 } 1561 } 1562 1563 static void 1564 idm_task_abort_unref_cb(void *ref) 1565 { 1566 idm_task_t *idt = ref; 1567 1568 mutex_enter(&idt->idt_mutex); 1569 switch (idt->idt_state) { 1570 case TASK_SUSPENDING: 1571 idt->idt_state = TASK_SUSPENDED; 1572 mutex_exit(&idt->idt_mutex); 1573 idm_task_aborted(idt, IDM_STATUS_SUSPENDED); 1574 return; 1575 case TASK_ABORTING: 1576 idt->idt_state = TASK_ABORTED; 1577 mutex_exit(&idt->idt_mutex); 1578 idm_task_aborted(idt, IDM_STATUS_ABORTED); 1579 return; 1580 default: 1581 mutex_exit(&idt->idt_mutex); 1582 ASSERT(0); 1583 break; 1584 } 1585 } 1586 1587 /* 1588 * Abort the idm task. 1589 * Caller must hold the task mutex, which will be released before return 1590 */ 1591 static void 1592 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type) 1593 { 1594 /* Caller must hold connection mutex */ 1595 ASSERT(mutex_owned(&idt->idt_mutex)); 1596 switch (idt->idt_state) { 1597 case TASK_ACTIVE: 1598 switch (abort_type) { 1599 case AT_INTERNAL_SUSPEND: 1600 /* Call transport to release any resources */ 1601 idt->idt_state = TASK_SUSPENDING; 1602 mutex_exit(&idt->idt_mutex); 1603 ic->ic_transport_ops->it_free_task_rsrc(idt); 1604 1605 /* 1606 * Wait for outstanding references. When all 1607 * references are released the callback will call 1608 * idm_task_aborted(). 1609 */ 1610 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1611 &idm_task_abort_unref_cb); 1612 return; 1613 case AT_INTERNAL_ABORT: 1614 case AT_TASK_MGMT_ABORT: 1615 idt->idt_state = TASK_ABORTING; 1616 mutex_exit(&idt->idt_mutex); 1617 ic->ic_transport_ops->it_free_task_rsrc(idt); 1618 1619 /* 1620 * Wait for outstanding references. When all 1621 * references are released the callback will call 1622 * idm_task_aborted(). 1623 */ 1624 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1625 &idm_task_abort_unref_cb); 1626 return; 1627 default: 1628 ASSERT(0); 1629 } 1630 break; 1631 case TASK_SUSPENDING: 1632 /* Already called transport_free_task_rsrc(); */ 1633 switch (abort_type) { 1634 case AT_INTERNAL_SUSPEND: 1635 /* Already doing it */ 1636 break; 1637 case AT_INTERNAL_ABORT: 1638 case AT_TASK_MGMT_ABORT: 1639 idt->idt_state = TASK_ABORTING; 1640 break; 1641 default: 1642 ASSERT(0); 1643 } 1644 break; 1645 case TASK_SUSPENDED: 1646 /* Already called transport_free_task_rsrc(); */ 1647 switch (abort_type) { 1648 case AT_INTERNAL_SUSPEND: 1649 /* Already doing it */ 1650 break; 1651 case AT_INTERNAL_ABORT: 1652 case AT_TASK_MGMT_ABORT: 1653 idt->idt_state = TASK_ABORTING; 1654 mutex_exit(&idt->idt_mutex); 1655 1656 /* 1657 * We could probably call idm_task_aborted directly 1658 * here but we may be holding the conn lock. It's 1659 * easier to just switch contexts. Even though 1660 * we shouldn't really have any references we'll 1661 * set the state to TASK_ABORTING instead of 1662 * TASK_ABORTED so we can use the same code path. 1663 */ 1664 idm_refcnt_async_wait_ref(&idt->idt_refcnt, 1665 &idm_task_abort_unref_cb); 1666 return; 1667 default: 1668 ASSERT(0); 1669 } 1670 break; 1671 case TASK_ABORTING: 1672 case TASK_ABORTED: 1673 switch (abort_type) { 1674 case AT_INTERNAL_SUSPEND: 1675 /* We're already past this point... */ 1676 case AT_INTERNAL_ABORT: 1677 case AT_TASK_MGMT_ABORT: 1678 /* Already doing it */ 1679 break; 1680 default: 1681 ASSERT(0); 1682 } 1683 break; 1684 case TASK_COMPLETE: 1685 /* 1686 * In this case, let it go. The status has already been 1687 * sent (which may or may not get successfully transmitted) 1688 * and we don't want to end up in a race between completing 1689 * the status PDU and marking the task suspended. 1690 */ 1691 break; 1692 default: 1693 ASSERT(0); 1694 } 1695 mutex_exit(&idt->idt_mutex); 1696 } 1697 1698 static void 1699 idm_task_aborted(idm_task_t *idt, idm_status_t status) 1700 { 1701 (*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status); 1702 } 1703 1704 void 1705 idm_task_cleanup(idm_task_t *idt) 1706 { 1707 idm_buf_t *idb, *next_idb; 1708 list_t tmp_buflist; 1709 ASSERT((idt->idt_state == TASK_SUSPENDED) || 1710 (idt->idt_state == TASK_ABORTED)); 1711 1712 list_create(&tmp_buflist, sizeof (idm_buf_t), 1713 offsetof(idm_buf_t, idb_buflink)); 1714 1715 /* 1716 * Remove all the buffers from the task and add them to a 1717 * temporary local list -- we do this so that we can hold 1718 * the task lock and prevent the task from going away if 1719 * the client decides to call idm_task_done/idm_task_free. 1720 * This could happen during abort in iscsit. 1721 */ 1722 mutex_enter(&idt->idt_mutex); 1723 for (idb = list_head(&idt->idt_inbufv); 1724 idb != NULL; 1725 idb = next_idb) { 1726 next_idb = list_next(&idt->idt_inbufv, idb); 1727 idm_buf_unbind_in_locked(idt, idb); 1728 list_insert_tail(&tmp_buflist, idb); 1729 } 1730 1731 for (idb = list_head(&idt->idt_outbufv); 1732 idb != NULL; 1733 idb = next_idb) { 1734 next_idb = list_next(&idt->idt_outbufv, idb); 1735 idm_buf_unbind_out_locked(idt, idb); 1736 list_insert_tail(&tmp_buflist, idb); 1737 } 1738 mutex_exit(&idt->idt_mutex); 1739 1740 for (idb = list_head(&tmp_buflist); idb != NULL; idb = next_idb) { 1741 next_idb = list_next(&tmp_buflist, idb); 1742 list_remove(&tmp_buflist, idb); 1743 (*idb->idb_buf_cb)(idb, IDM_STATUS_ABORTED); 1744 } 1745 list_destroy(&tmp_buflist); 1746 } 1747 1748 1749 /* 1750 * idm_pdu_tx 1751 * 1752 * This is IDM's implementation of the 'Send_Control' operational primitive. 1753 * This function is invoked by an initiator iSCSI layer requesting the transfer 1754 * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a 1755 * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover 1756 * layer to the peer iSCSI layer in the remote iSCSI node. The connection info 1757 * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size 1758 * are provided as input. 1759 * 1760 */ 1761 void 1762 idm_pdu_tx(idm_pdu_t *pdu) 1763 { 1764 idm_conn_t *ic = pdu->isp_ic; 1765 iscsi_async_evt_hdr_t *async_evt; 1766 1767 /* 1768 * If we are in full-featured mode then route SCSI-related 1769 * commands to the appropriate function vector without checking 1770 * the connection state. We will only be in full-feature mode 1771 * when we are in an acceptable state for SCSI PDU's. 1772 * 1773 * We also need to ensure that there are no PDU events outstanding 1774 * on the state machine. Any non-SCSI PDU's received in full-feature 1775 * mode will result in PDU events and until these have been handled 1776 * we need to route all PDU's through the state machine as PDU 1777 * events to maintain ordering. 1778 * 1779 * Note that IDM cannot enter FFP mode until it processes in 1780 * its state machine the last xmit of the login process. 1781 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be 1782 * superfluous. 1783 */ 1784 mutex_enter(&ic->ic_state_mutex); 1785 if (ic->ic_ffp && (ic->ic_pdu_events == 0)) { 1786 mutex_exit(&ic->ic_state_mutex); 1787 switch (IDM_PDU_OPCODE(pdu)) { 1788 case ISCSI_OP_SCSI_RSP: 1789 /* Target only */ 1790 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic, 1791 iscsi_scsi_rsp_hdr_t *, 1792 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr); 1793 idm_pdu_tx_forward(ic, pdu); 1794 return; 1795 case ISCSI_OP_SCSI_TASK_MGT_RSP: 1796 /* Target only */ 1797 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic, 1798 iscsi_text_rsp_hdr_t *, 1799 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1800 idm_pdu_tx_forward(ic, pdu); 1801 return; 1802 case ISCSI_OP_SCSI_DATA_RSP: 1803 /* Target only */ 1804 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic, 1805 iscsi_data_rsp_hdr_t *, 1806 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 1807 idm_pdu_tx_forward(ic, pdu); 1808 return; 1809 case ISCSI_OP_RTT_RSP: 1810 /* Target only */ 1811 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic, 1812 iscsi_rtt_hdr_t *, 1813 (iscsi_rtt_hdr_t *)pdu->isp_hdr); 1814 idm_pdu_tx_forward(ic, pdu); 1815 return; 1816 case ISCSI_OP_NOOP_IN: 1817 /* Target only */ 1818 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic, 1819 iscsi_nop_in_hdr_t *, 1820 (iscsi_nop_in_hdr_t *)pdu->isp_hdr); 1821 idm_pdu_tx_forward(ic, pdu); 1822 return; 1823 case ISCSI_OP_TEXT_RSP: 1824 /* Target only */ 1825 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic, 1826 iscsi_text_rsp_hdr_t *, 1827 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1828 idm_pdu_tx_forward(ic, pdu); 1829 return; 1830 case ISCSI_OP_TEXT_CMD: 1831 case ISCSI_OP_NOOP_OUT: 1832 case ISCSI_OP_SCSI_CMD: 1833 case ISCSI_OP_SCSI_DATA: 1834 case ISCSI_OP_SCSI_TASK_MGT_MSG: 1835 /* Initiator only */ 1836 idm_pdu_tx_forward(ic, pdu); 1837 return; 1838 default: 1839 break; 1840 } 1841 1842 mutex_enter(&ic->ic_state_mutex); 1843 } 1844 1845 /* 1846 * Any PDU's processed outside of full-feature mode and non-SCSI 1847 * PDU's in full-feature mode are handled by generating an 1848 * event to the connection state machine. The state machine 1849 * will validate the PDU against the current state and either 1850 * transmit the PDU if the opcode is allowed or handle an 1851 * error if the PDU is not allowed. 1852 * 1853 * This code-path will also generate any events that are implied 1854 * by the PDU opcode. For example a "login response" with success 1855 * status generates a CE_LOGOUT_SUCCESS_SND event. 1856 */ 1857 switch (IDM_PDU_OPCODE(pdu)) { 1858 case ISCSI_OP_LOGIN_CMD: 1859 idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu); 1860 break; 1861 case ISCSI_OP_LOGIN_RSP: 1862 DTRACE_ISCSI_2(login__response, idm_conn_t *, ic, 1863 iscsi_login_rsp_hdr_t *, 1864 (iscsi_login_rsp_hdr_t *)pdu->isp_hdr); 1865 idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE); 1866 break; 1867 case ISCSI_OP_LOGOUT_CMD: 1868 idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE); 1869 break; 1870 case ISCSI_OP_LOGOUT_RSP: 1871 DTRACE_ISCSI_2(logout__response, idm_conn_t *, ic, 1872 iscsi_logout_rsp_hdr_t *, 1873 (iscsi_logout_rsp_hdr_t *)pdu->isp_hdr); 1874 idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE); 1875 break; 1876 case ISCSI_OP_ASYNC_EVENT: 1877 DTRACE_ISCSI_2(async__send, idm_conn_t *, ic, 1878 iscsi_async_evt_hdr_t *, 1879 (iscsi_async_evt_hdr_t *)pdu->isp_hdr); 1880 async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr; 1881 switch (async_evt->async_event) { 1882 case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT: 1883 idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND, 1884 (uintptr_t)pdu); 1885 break; 1886 case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION: 1887 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND, 1888 (uintptr_t)pdu); 1889 break; 1890 case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS: 1891 idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND, 1892 (uintptr_t)pdu); 1893 break; 1894 case ISCSI_ASYNC_EVENT_SCSI_EVENT: 1895 case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION: 1896 default: 1897 idm_conn_tx_pdu_event(ic, CE_MISC_TX, 1898 (uintptr_t)pdu); 1899 break; 1900 } 1901 break; 1902 case ISCSI_OP_SCSI_RSP: 1903 /* Target only */ 1904 DTRACE_ISCSI_2(scsi__response, idm_conn_t *, ic, 1905 iscsi_scsi_rsp_hdr_t *, 1906 (iscsi_scsi_rsp_hdr_t *)pdu->isp_hdr); 1907 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1908 break; 1909 case ISCSI_OP_SCSI_TASK_MGT_RSP: 1910 /* Target only */ 1911 DTRACE_ISCSI_2(task__response, idm_conn_t *, ic, 1912 iscsi_scsi_task_mgt_rsp_hdr_t *, 1913 (iscsi_scsi_task_mgt_rsp_hdr_t *)pdu->isp_hdr); 1914 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1915 break; 1916 case ISCSI_OP_SCSI_DATA_RSP: 1917 /* Target only */ 1918 DTRACE_ISCSI_2(data__send, idm_conn_t *, ic, 1919 iscsi_data_rsp_hdr_t *, 1920 (iscsi_data_rsp_hdr_t *)pdu->isp_hdr); 1921 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1922 break; 1923 case ISCSI_OP_RTT_RSP: 1924 /* Target only */ 1925 DTRACE_ISCSI_2(data__request, idm_conn_t *, ic, 1926 iscsi_rtt_hdr_t *, 1927 (iscsi_rtt_hdr_t *)pdu->isp_hdr); 1928 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1929 break; 1930 case ISCSI_OP_NOOP_IN: 1931 /* Target only */ 1932 DTRACE_ISCSI_2(nop__send, idm_conn_t *, ic, 1933 iscsi_nop_in_hdr_t *, 1934 (iscsi_nop_in_hdr_t *)pdu->isp_hdr); 1935 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1936 break; 1937 case ISCSI_OP_TEXT_RSP: 1938 /* Target only */ 1939 DTRACE_ISCSI_2(text__response, idm_conn_t *, ic, 1940 iscsi_text_rsp_hdr_t *, 1941 (iscsi_text_rsp_hdr_t *)pdu->isp_hdr); 1942 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1943 break; 1944 /* Initiator only */ 1945 case ISCSI_OP_SCSI_CMD: 1946 case ISCSI_OP_SCSI_TASK_MGT_MSG: 1947 case ISCSI_OP_SCSI_DATA: 1948 case ISCSI_OP_NOOP_OUT: 1949 case ISCSI_OP_TEXT_CMD: 1950 case ISCSI_OP_SNACK_CMD: 1951 case ISCSI_OP_REJECT_MSG: 1952 default: 1953 /* 1954 * Connection state machine will validate these PDU's against 1955 * the current state. A PDU not allowed in the current 1956 * state will cause a protocol error. 1957 */ 1958 idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu); 1959 break; 1960 } 1961 mutex_exit(&ic->ic_state_mutex); 1962 } 1963 1964 /* 1965 * Common allocation of a PDU along with memory for header and data. 1966 */ 1967 static idm_pdu_t * 1968 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag) 1969 { 1970 idm_pdu_t *result; 1971 1972 /* 1973 * IDM clients should cache these structures for performance 1974 * critical paths. We can't cache effectively in IDM because we 1975 * don't know the correct header and data size. 1976 * 1977 * Valid header length is assumed to be hdrlen and valid data 1978 * length is assumed to be datalen. isp_hdrlen and isp_datalen 1979 * can be adjusted after the PDU is returned if necessary. 1980 */ 1981 result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag); 1982 if (result != NULL) { 1983 /* For idm_pdu_free sanity check */ 1984 result->isp_flags |= IDM_PDU_ALLOC; 1985 /* pointer arithmetic */ 1986 result->isp_hdr = (iscsi_hdr_t *)(result + 1); 1987 result->isp_hdrlen = hdrlen; 1988 result->isp_hdrbuflen = hdrlen; 1989 result->isp_transport_hdrlen = 0; 1990 if (datalen != 0) 1991 result->isp_data = (uint8_t *)result->isp_hdr + hdrlen; 1992 result->isp_datalen = datalen; 1993 result->isp_databuflen = datalen; 1994 result->isp_magic = IDM_PDU_MAGIC; 1995 } 1996 1997 return (result); 1998 } 1999 2000 /* 2001 * Typical idm_pdu_alloc invocation, will block for resources. 2002 */ 2003 idm_pdu_t * 2004 idm_pdu_alloc(uint_t hdrlen, uint_t datalen) 2005 { 2006 return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP)); 2007 } 2008 2009 /* 2010 * Non-blocking idm_pdu_alloc implementation, returns NULL if resources 2011 * are not available. Needed for transport-layer allocations which may 2012 * be invoking in interrupt context. 2013 */ 2014 idm_pdu_t * 2015 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen) 2016 { 2017 return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP)); 2018 } 2019 2020 /* 2021 * Free a PDU previously allocated with idm_pdu_alloc() including any 2022 * header and data space allocated as part of the original request. 2023 * Additional memory regions referenced by subsequent modification of 2024 * the isp_hdr and/or isp_data fields will not be freed. 2025 */ 2026 void 2027 idm_pdu_free(idm_pdu_t *pdu) 2028 { 2029 /* Make sure the structure was allocated using idm_pdu_alloc() */ 2030 ASSERT(pdu->isp_flags & IDM_PDU_ALLOC); 2031 kmem_free(pdu, 2032 sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen); 2033 } 2034 2035 /* 2036 * Initialize the connection, private and callback fields in a PDU. 2037 */ 2038 void 2039 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb) 2040 { 2041 /* 2042 * idm_pdu_complete() will call idm_pdu_free if the callback is 2043 * NULL. This will only work if the PDU was originally allocated 2044 * with idm_pdu_alloc(). 2045 */ 2046 ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) || 2047 (cb != NULL)); 2048 pdu->isp_magic = IDM_PDU_MAGIC; 2049 pdu->isp_ic = ic; 2050 pdu->isp_private = private; 2051 pdu->isp_callback = cb; 2052 } 2053 2054 /* 2055 * Initialize the header and header length field. This function should 2056 * not be used to adjust the header length in a buffer allocated via 2057 * pdu_pdu_alloc since it overwrites the existing header pointer. 2058 */ 2059 void 2060 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen) 2061 { 2062 pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr); 2063 pdu->isp_hdrlen = hdrlen; 2064 } 2065 2066 /* 2067 * Initialize the data and data length fields. This function should 2068 * not be used to adjust the data length of a buffer allocated via 2069 * idm_pdu_alloc since it overwrites the existing data pointer. 2070 */ 2071 void 2072 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen) 2073 { 2074 pdu->isp_data = data; 2075 pdu->isp_datalen = datalen; 2076 } 2077 2078 void 2079 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status) 2080 { 2081 if (pdu->isp_callback) { 2082 pdu->isp_status = status; 2083 (*pdu->isp_callback)(pdu, status); 2084 } else { 2085 idm_pdu_free(pdu); 2086 } 2087 } 2088 2089 /* 2090 * State machine auditing 2091 */ 2092 2093 void 2094 idm_sm_audit_init(sm_audit_buf_t *audit_buf) 2095 { 2096 bzero(audit_buf, sizeof (sm_audit_buf_t)); 2097 audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1; 2098 } 2099 2100 static 2101 sm_audit_record_t * 2102 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type, 2103 sm_audit_sm_type_t sm_type, 2104 int current_state) 2105 { 2106 sm_audit_record_t *sar; 2107 2108 sar = audit_buf->sab_records; 2109 sar += audit_buf->sab_index; 2110 audit_buf->sab_index++; 2111 audit_buf->sab_index &= audit_buf->sab_max_index; 2112 2113 sar->sar_type = r_type; 2114 gethrestime(&sar->sar_timestamp); 2115 sar->sar_sm_type = sm_type; 2116 sar->sar_state = current_state; 2117 2118 return (sar); 2119 } 2120 2121 void 2122 idm_sm_audit_event(sm_audit_buf_t *audit_buf, 2123 sm_audit_sm_type_t sm_type, int current_state, 2124 int event, uintptr_t event_info) 2125 { 2126 sm_audit_record_t *sar; 2127 2128 sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT, 2129 sm_type, current_state); 2130 sar->sar_event = event; 2131 sar->sar_event_info = event_info; 2132 } 2133 2134 void 2135 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf, 2136 sm_audit_sm_type_t sm_type, int current_state, int new_state) 2137 { 2138 sm_audit_record_t *sar; 2139 2140 sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE, 2141 sm_type, current_state); 2142 sar->sar_new_state = new_state; 2143 } 2144 2145 2146 /* 2147 * Object reference tracking 2148 */ 2149 2150 void 2151 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj) 2152 { 2153 bzero(refcnt, sizeof (*refcnt)); 2154 idm_refcnt_reset(refcnt); 2155 refcnt->ir_referenced_obj = referenced_obj; 2156 bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t)); 2157 refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1; 2158 mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL); 2159 cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL); 2160 } 2161 2162 void 2163 idm_refcnt_destroy(idm_refcnt_t *refcnt) 2164 { 2165 /* 2166 * Grab the mutex to there are no other lingering threads holding 2167 * the mutex before we destroy it (e.g. idm_refcnt_rele just after 2168 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC) 2169 */ 2170 mutex_enter(&refcnt->ir_mutex); 2171 ASSERT(refcnt->ir_refcnt == 0); 2172 cv_destroy(&refcnt->ir_cv); 2173 mutex_destroy(&refcnt->ir_mutex); 2174 } 2175 2176 void 2177 idm_refcnt_reset(idm_refcnt_t *refcnt) 2178 { 2179 refcnt->ir_waiting = REF_NOWAIT; 2180 refcnt->ir_refcnt = 0; 2181 } 2182 2183 void 2184 idm_refcnt_hold(idm_refcnt_t *refcnt) 2185 { 2186 /* 2187 * Nothing should take a hold on an object after a call to 2188 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref 2189 */ 2190 ASSERT(refcnt->ir_waiting == REF_NOWAIT); 2191 2192 mutex_enter(&refcnt->ir_mutex); 2193 refcnt->ir_refcnt++; 2194 REFCNT_AUDIT(refcnt); 2195 mutex_exit(&refcnt->ir_mutex); 2196 } 2197 2198 static void 2199 idm_refcnt_unref_task(void *refcnt_void) 2200 { 2201 idm_refcnt_t *refcnt = refcnt_void; 2202 2203 REFCNT_AUDIT(refcnt); 2204 (*refcnt->ir_cb)(refcnt->ir_referenced_obj); 2205 } 2206 2207 void 2208 idm_refcnt_rele(idm_refcnt_t *refcnt) 2209 { 2210 mutex_enter(&refcnt->ir_mutex); 2211 ASSERT(refcnt->ir_refcnt > 0); 2212 refcnt->ir_refcnt--; 2213 REFCNT_AUDIT(refcnt); 2214 if (refcnt->ir_waiting == REF_NOWAIT) { 2215 /* No one is waiting on this object */ 2216 mutex_exit(&refcnt->ir_mutex); 2217 return; 2218 } 2219 2220 /* 2221 * Someone is waiting for this object to go idle so check if 2222 * refcnt is 0. Waiting on an object then later grabbing another 2223 * reference is not allowed so we don't need to handle that case. 2224 */ 2225 if (refcnt->ir_refcnt == 0) { 2226 if (refcnt->ir_waiting == REF_WAIT_ASYNC) { 2227 if (taskq_dispatch(idm.idm_global_taskq, 2228 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) { 2229 cmn_err(CE_WARN, 2230 "idm_refcnt_rele: Couldn't dispatch task"); 2231 } 2232 } else if (refcnt->ir_waiting == REF_WAIT_SYNC) { 2233 cv_signal(&refcnt->ir_cv); 2234 } 2235 } 2236 mutex_exit(&refcnt->ir_mutex); 2237 } 2238 2239 void 2240 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func) 2241 { 2242 mutex_enter(&refcnt->ir_mutex); 2243 ASSERT(refcnt->ir_refcnt > 0); 2244 refcnt->ir_refcnt--; 2245 REFCNT_AUDIT(refcnt); 2246 2247 /* 2248 * Someone is waiting for this object to go idle so check if 2249 * refcnt is 0. Waiting on an object then later grabbing another 2250 * reference is not allowed so we don't need to handle that case. 2251 */ 2252 if (refcnt->ir_refcnt == 0) { 2253 refcnt->ir_cb = cb_func; 2254 refcnt->ir_waiting = REF_WAIT_ASYNC; 2255 if (taskq_dispatch(idm.idm_global_taskq, 2256 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) { 2257 cmn_err(CE_WARN, 2258 "idm_refcnt_rele: Couldn't dispatch task"); 2259 } 2260 } 2261 mutex_exit(&refcnt->ir_mutex); 2262 } 2263 2264 void 2265 idm_refcnt_wait_ref(idm_refcnt_t *refcnt) 2266 { 2267 mutex_enter(&refcnt->ir_mutex); 2268 refcnt->ir_waiting = REF_WAIT_SYNC; 2269 REFCNT_AUDIT(refcnt); 2270 while (refcnt->ir_refcnt != 0) 2271 cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex); 2272 mutex_exit(&refcnt->ir_mutex); 2273 } 2274 2275 void 2276 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func) 2277 { 2278 mutex_enter(&refcnt->ir_mutex); 2279 refcnt->ir_waiting = REF_WAIT_ASYNC; 2280 refcnt->ir_cb = cb_func; 2281 REFCNT_AUDIT(refcnt); 2282 /* 2283 * It's possible we don't have any references. To make things easier 2284 * on the caller use a taskq to call the callback instead of 2285 * calling it synchronously 2286 */ 2287 if (refcnt->ir_refcnt == 0) { 2288 if (taskq_dispatch(idm.idm_global_taskq, 2289 &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) { 2290 cmn_err(CE_WARN, 2291 "idm_refcnt_async_wait_ref: " 2292 "Couldn't dispatch task"); 2293 } 2294 } 2295 mutex_exit(&refcnt->ir_mutex); 2296 } 2297 2298 void 2299 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt, 2300 idm_refcnt_cb_t *cb_func) 2301 { 2302 mutex_enter(&refcnt->ir_mutex); 2303 if (refcnt->ir_refcnt == 0) { 2304 mutex_exit(&refcnt->ir_mutex); 2305 (*cb_func)(refcnt->ir_referenced_obj); 2306 return; 2307 } 2308 mutex_exit(&refcnt->ir_mutex); 2309 } 2310 2311 void 2312 idm_conn_hold(idm_conn_t *ic) 2313 { 2314 idm_refcnt_hold(&ic->ic_refcnt); 2315 } 2316 2317 void 2318 idm_conn_rele(idm_conn_t *ic) 2319 { 2320 idm_refcnt_rele(&ic->ic_refcnt); 2321 } 2322 2323 void 2324 idm_conn_set_target_name(idm_conn_t *ic, char *target_name) 2325 { 2326 (void) strlcpy(ic->ic_target_name, target_name, ISCSI_MAX_NAME_LEN + 1); 2327 } 2328 2329 void 2330 idm_conn_set_initiator_name(idm_conn_t *ic, char *initiator_name) 2331 { 2332 (void) strlcpy(ic->ic_initiator_name, initiator_name, 2333 ISCSI_MAX_NAME_LEN + 1); 2334 } 2335 2336 void 2337 idm_conn_set_isid(idm_conn_t *ic, uint8_t isid[ISCSI_ISID_LEN]) 2338 { 2339 (void) snprintf(ic->ic_isid, ISCSI_MAX_ISID_LEN + 1, 2340 "%02x%02x%02x%02x%02x%02x", 2341 isid[0], isid[1], isid[2], isid[3], isid[4], isid[5]); 2342 } 2343 2344 static int 2345 _idm_init(void) 2346 { 2347 /* Initialize the rwlock for the taskid table */ 2348 rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL); 2349 2350 /* Initialize the global mutex and taskq */ 2351 mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL); 2352 2353 cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL); 2354 cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL); 2355 2356 /* 2357 * The maximum allocation needs to be high here since there can be 2358 * many concurrent tasks using the global taskq. 2359 */ 2360 idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri, 2361 128, 16384, TASKQ_PREPOPULATE); 2362 if (idm.idm_global_taskq == NULL) { 2363 cv_destroy(&idm.idm_wd_cv); 2364 cv_destroy(&idm.idm_tgt_svc_cv); 2365 mutex_destroy(&idm.idm_global_mutex); 2366 rw_destroy(&idm.idm_taskid_table_lock); 2367 return (ENOMEM); 2368 } 2369 2370 /* Start watchdog thread */ 2371 idm.idm_wd_thread = thread_create(NULL, 0, 2372 idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri); 2373 if (idm.idm_wd_thread == NULL) { 2374 /* Couldn't create the watchdog thread */ 2375 taskq_destroy(idm.idm_global_taskq); 2376 cv_destroy(&idm.idm_wd_cv); 2377 cv_destroy(&idm.idm_tgt_svc_cv); 2378 mutex_destroy(&idm.idm_global_mutex); 2379 rw_destroy(&idm.idm_taskid_table_lock); 2380 return (ENOMEM); 2381 } 2382 2383 /* Pause until the watchdog thread is running */ 2384 mutex_enter(&idm.idm_global_mutex); 2385 while (!idm.idm_wd_thread_running) 2386 cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex); 2387 mutex_exit(&idm.idm_global_mutex); 2388 2389 /* 2390 * Allocate the task ID table and set "next" to 0. 2391 */ 2392 2393 idm.idm_taskid_max = idm_max_taskids; 2394 idm.idm_taskid_table = (idm_task_t **) 2395 kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP); 2396 idm.idm_taskid_next = 0; 2397 2398 /* Create the global buffer and task kmem caches */ 2399 idm.idm_buf_cache = kmem_cache_create("idm_buf_cache", 2400 sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP); 2401 2402 /* 2403 * Note, we're explicitly allocating an additional iSER header- 2404 * sized chunk for each of these elements. See idm_task_constructor(). 2405 */ 2406 idm.idm_task_cache = kmem_cache_create("idm_task_cache", 2407 sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8, 2408 &idm_task_constructor, &idm_task_destructor, 2409 NULL, NULL, NULL, KM_SLEEP); 2410 2411 /* Create the service and connection context lists */ 2412 list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t), 2413 offsetof(idm_svc_t, is_list_node)); 2414 list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t), 2415 offsetof(idm_conn_t, ic_list_node)); 2416 list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t), 2417 offsetof(idm_conn_t, ic_list_node)); 2418 2419 /* Initialize the native sockets transport */ 2420 idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]); 2421 2422 /* Create connection ID pool */ 2423 (void) idm_idpool_create(&idm.idm_conn_id_pool); 2424 2425 return (DDI_SUCCESS); 2426 } 2427 2428 static int 2429 _idm_fini(void) 2430 { 2431 if (!list_is_empty(&idm.idm_ini_conn_list) || 2432 !list_is_empty(&idm.idm_tgt_conn_list) || 2433 !list_is_empty(&idm.idm_tgt_svc_list)) { 2434 return (EBUSY); 2435 } 2436 2437 mutex_enter(&idm.idm_global_mutex); 2438 idm.idm_wd_thread_running = B_FALSE; 2439 cv_signal(&idm.idm_wd_cv); 2440 mutex_exit(&idm.idm_global_mutex); 2441 2442 thread_join(idm.idm_wd_thread_did); 2443 2444 idm_idpool_destroy(&idm.idm_conn_id_pool); 2445 2446 /* Close any LDI handles we have open on transport drivers */ 2447 mutex_enter(&idm.idm_global_mutex); 2448 idm_transport_teardown(); 2449 mutex_exit(&idm.idm_global_mutex); 2450 2451 /* Teardown the native sockets transport */ 2452 idm_so_fini(); 2453 2454 list_destroy(&idm.idm_ini_conn_list); 2455 list_destroy(&idm.idm_tgt_conn_list); 2456 list_destroy(&idm.idm_tgt_svc_list); 2457 kmem_cache_destroy(idm.idm_task_cache); 2458 kmem_cache_destroy(idm.idm_buf_cache); 2459 kmem_free(idm.idm_taskid_table, 2460 idm.idm_taskid_max * sizeof (idm_task_t *)); 2461 mutex_destroy(&idm.idm_global_mutex); 2462 cv_destroy(&idm.idm_wd_cv); 2463 cv_destroy(&idm.idm_tgt_svc_cv); 2464 rw_destroy(&idm.idm_taskid_table_lock); 2465 2466 return (0); 2467 } 2468