1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/ddi.h> 27 #include <sys/sunddi.h> 28 29 #include <sys/socket.h> /* networking stuff */ 30 #include <sys/sysmacros.h> /* offsetof */ 31 32 #include <sys/ib/clients/iser/iser.h> 33 #include <sys/ib/clients/iser/iser_idm.h> 34 35 /* 36 * iSER transport routines 37 * 38 * All transport functions except iser_tgt_svc_create() are called through 39 * the ops vector, iser_tgt_svc_create() is called from the async handler 40 * inaddition to being called by the ULP 41 */ 42 43 static void iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu); 44 45 static idm_status_t iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb); 46 static idm_status_t iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb); 47 static idm_status_t iser_tgt_enable_datamover(idm_conn_t *ic); 48 static idm_status_t iser_ini_enable_datamover(idm_conn_t *ic); 49 static void iser_notice_key_values(struct idm_conn_s *ic, 50 nvlist_t *negotiated_nvl); 51 static idm_status_t iser_free_task_rsrcs(idm_task_t *idt); 52 static kv_status_t iser_negotiate_key_values(idm_conn_t *ic, 53 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 54 static kv_status_t iser_handle_numerical(nvpair_t *nvp, uint64_t value, 55 const idm_kv_xlate_t *ikvx, uint64_t min_value, uint64_t max_value, 56 uint64_t iser_max_value, nvlist_t *request_nvl, nvlist_t *response_nvl, 57 nvlist_t *negotiated_nvl); 58 static kv_status_t iser_handle_boolean(nvpair_t *nvp, boolean_t value, 59 const idm_kv_xlate_t *ikvx, boolean_t iser_value, nvlist_t *request_nvl, 60 nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 61 static kv_status_t iser_handle_digest(nvpair_t *choices, 62 const idm_kv_xlate_t *ikvx, nvlist_t *request_nvl, nvlist_t *response_nvl, 63 nvlist_t *negotiated_nvl); 64 static kv_status_t iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx, 65 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 66 static kv_status_t iser_process_request_nvlist(nvlist_t *request_nvl, 67 nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 68 static boolean_t iser_conn_is_capable(idm_conn_req_t *ic, 69 idm_transport_caps_t *caps); 70 static idm_status_t iser_buf_alloc(idm_buf_t *idb, uint64_t buflen); 71 static idm_status_t iser_buf_setup(idm_buf_t *idb); 72 static void iser_buf_teardown(idm_buf_t *idb); 73 static void iser_buf_free(idm_buf_t *idb); 74 static void iser_tgt_svc_destroy(struct idm_svc_s *is); 75 static idm_status_t iser_tgt_svc_online(struct idm_svc_s *is); 76 static void iser_tgt_svc_offline(struct idm_svc_s *is); 77 static idm_status_t iser_tgt_conn_connect(struct idm_conn_s *ic); 78 static idm_status_t iser_ini_conn_create(idm_conn_req_t *cr, 79 struct idm_conn_s *ic); 80 static void iser_conn_destroy(struct idm_conn_s *ic); 81 static idm_status_t iser_ini_conn_connect(struct idm_conn_s *ic); 82 static void iser_conn_disconnect(struct idm_conn_s *ic); 83 84 /* 85 * iSER IDM transport operations 86 */ 87 idm_transport_ops_t iser_transport_ops = { 88 &iser_pdu_tx, /* it_tx_pdu */ 89 &iser_buf_tx_to_ini, /* it_buf_tx_to_ini */ 90 &iser_buf_rx_from_ini, /* it_buf_rx_from_ini */ 91 NULL, /* it_rx_datain */ 92 NULL, /* it_rx_rtt */ 93 NULL, /* it_rx_dataout */ 94 NULL, /* it_alloc_conn_rsrc */ 95 NULL, /* it_free_conn_rsrc */ 96 &iser_tgt_enable_datamover, /* it_tgt_enable_datamover */ 97 &iser_ini_enable_datamover, /* it_ini_enable_datamover */ 98 NULL, /* it_conn_terminate */ 99 &iser_free_task_rsrcs, /* it_free_task_rsrc */ 100 &iser_negotiate_key_values, /* it_negotiate_key_values */ 101 &iser_notice_key_values, /* it_notice_key_values */ 102 &iser_conn_is_capable, /* it_conn_is_capable */ 103 &iser_buf_alloc, /* it_buf_alloc */ 104 &iser_buf_free, /* it_buf_free */ 105 &iser_buf_setup, /* it_buf_setup */ 106 &iser_buf_teardown, /* it_buf_teardown */ 107 &iser_tgt_svc_create, /* it_tgt_svc_create */ 108 &iser_tgt_svc_destroy, /* it_tgt_svc_destroy */ 109 &iser_tgt_svc_online, /* it_tgt_svc_online */ 110 &iser_tgt_svc_offline, /* it_tgt_svc_offline */ 111 &iser_conn_destroy, /* it_tgt_conn_destroy */ 112 &iser_tgt_conn_connect, /* it_tgt_conn_connect */ 113 &iser_conn_disconnect, /* it_tgt_conn_disconnect */ 114 &iser_ini_conn_create, /* it_ini_conn_create */ 115 &iser_conn_destroy, /* it_ini_conn_destroy */ 116 &iser_ini_conn_connect, /* it_ini_conn_connect */ 117 &iser_conn_disconnect /* it_ini_conn_disconnect */ 118 }; 119 120 /* 121 * iSER IDM transport capabilities 122 */ 123 idm_transport_caps_t iser_transport_caps = { 124 0 /* flags */ 125 }; 126 127 int 128 iser_idm_register() 129 { 130 idm_transport_attr_t attr; 131 idm_status_t status; 132 133 attr.type = IDM_TRANSPORT_TYPE_ISER; 134 attr.it_ops = &iser_transport_ops; 135 attr.it_caps = &iser_transport_caps; 136 137 status = idm_transport_register(&attr); 138 if (status != IDM_STATUS_SUCCESS) { 139 ISER_LOG(CE_WARN, "Failed to register iSER transport with IDM"); 140 return (DDI_FAILURE); 141 } 142 143 ISER_LOG(CE_NOTE, "Registered iSER transport with IDM"); 144 145 return (DDI_SUCCESS); 146 } 147 148 /* 149 * iser_ini_conn_create() 150 * Allocate an iSER initiator connection context 151 */ 152 static idm_status_t 153 iser_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic) 154 { 155 iser_chan_t *iser_chan = NULL; 156 iser_conn_t *iser_conn; 157 158 /* Allocate and set up a connection handle */ 159 iser_conn = kmem_zalloc(sizeof (iser_conn_t), KM_SLEEP); 160 mutex_init(&iser_conn->ic_lock, NULL, MUTEX_DRIVER, NULL); 161 162 /* Allocate and open a channel to the target node */ 163 iser_chan = iser_channel_alloc(NULL, &cr->cr_ini_dst_addr); 164 if (iser_chan == NULL) { 165 ISER_LOG(CE_WARN, "iser: failed to allocate channel"); 166 mutex_destroy(&iser_conn->ic_lock); 167 kmem_free(iser_conn, sizeof (iser_conn_t)); 168 return (IDM_STATUS_FAIL); 169 } 170 171 /* 172 * The local IP and remote IP are filled in iser_channel_alloc. The 173 * remote port needs to be filled in from idm_conn_req_t. The local 174 * port is irrelevant. Internal representation of the port in the 175 * IDM sockaddr structure is in network byte order. IBT expects the 176 * port in host byte order. 177 */ 178 switch (cr->cr_ini_dst_addr.sin.sa_family) { 179 case AF_INET: 180 iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin4.sin_port); 181 break; 182 case AF_INET6: 183 iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin6.sin6_port); 184 break; 185 default: 186 iser_chan->ic_rport = ISCSI_LISTEN_PORT; 187 } 188 iser_chan->ic_lport = 0; 189 190 cv_init(&iser_conn->ic_stage_cv, NULL, CV_DEFAULT, NULL); 191 iser_conn->ic_type = ISER_CONN_TYPE_INI; 192 iser_conn->ic_stage = ISER_CONN_STAGE_ALLOCATED; 193 iser_conn->ic_chan = iser_chan; 194 iser_conn->ic_idmc = ic; 195 196 /* 197 * Set a pointer to the iser_conn in the iser_chan for easy 198 * access during CM event handling 199 */ 200 iser_chan->ic_conn = iser_conn; 201 202 /* Set the iSER conn handle in the IDM conn private handle */ 203 ic->ic_transport_private = (void *)iser_conn; 204 205 /* Set the transport header length */ 206 ic->ic_transport_hdrlen = ISER_HEADER_LENGTH; 207 208 return (IDM_STATUS_SUCCESS); 209 } 210 211 /* 212 * iser_internal_conn_destroy() 213 * Tear down iSER-specific connection resources. This is used below 214 * in iser_conn_destroy(), but also from the CM code when we may have 215 * some of the connection established, but not fully connected. 216 */ 217 void 218 iser_internal_conn_destroy(iser_conn_t *ic) 219 { 220 mutex_enter(&ic->ic_lock); 221 iser_channel_free(ic->ic_chan); 222 if ((ic->ic_type == ISER_CONN_TYPE_TGT) && 223 (ic->ic_stage == ISER_CONN_STAGE_ALLOCATED)) { 224 /* 225 * This is a target connection that has yet to be 226 * established. Free our reference on the target 227 * service handle. 228 */ 229 iser_tgt_svc_rele(ic->ic_idms->is_iser_svc); 230 } 231 cv_destroy(&ic->ic_stage_cv); 232 mutex_exit(&ic->ic_lock); 233 mutex_destroy(&ic->ic_lock); 234 kmem_free(ic, sizeof (iser_conn_t)); 235 } 236 237 /* 238 * iser_conn_destroy() 239 * Tear down an initiator or target connection. 240 */ 241 static void 242 iser_conn_destroy(idm_conn_t *ic) 243 { 244 iser_conn_t *iser_conn; 245 iser_conn = (iser_conn_t *)ic->ic_transport_private; 246 247 iser_internal_conn_destroy(iser_conn); 248 ic->ic_transport_private = NULL; 249 } 250 251 /* 252 * iser_ini_conn_connect() 253 * Establish the connection referred to by the handle previously allocated via 254 * iser_ini_conn_create(). 255 */ 256 static idm_status_t 257 iser_ini_conn_connect(idm_conn_t *ic) 258 { 259 iser_conn_t *iser_conn; 260 iser_status_t status; 261 262 iser_conn = (iser_conn_t *)ic->ic_transport_private; 263 264 status = iser_channel_open(iser_conn->ic_chan); 265 if (status != ISER_STATUS_SUCCESS) { 266 ISER_LOG(CE_WARN, "iser: failed to open channel"); 267 return (IDM_STATUS_FAIL); 268 } 269 270 /* 271 * Set the local and remote addresses in the idm conn handle. 272 */ 273 iser_ib_conv_ibtaddr2sockaddr(&ic->ic_laddr, 274 &iser_conn->ic_chan->ic_localip, iser_conn->ic_chan->ic_lport); 275 iser_ib_conv_ibtaddr2sockaddr(&ic->ic_raddr, 276 &iser_conn->ic_chan->ic_remoteip, iser_conn->ic_chan->ic_rport); 277 278 mutex_enter(&iser_conn->ic_lock); 279 /* Hold a reference on the IDM connection handle */ 280 idm_conn_hold(ic); 281 iser_conn->ic_stage = ISER_CONN_STAGE_IC_CONNECTED; 282 mutex_exit(&iser_conn->ic_lock); 283 284 return (IDM_STATUS_SUCCESS); 285 } 286 287 /* 288 * iser_conn_disconnect() 289 * Shutdown this iSER connection 290 */ 291 static void 292 iser_conn_disconnect(idm_conn_t *ic) 293 { 294 iser_conn_t *iser_conn; 295 296 iser_conn = (iser_conn_t *)ic->ic_transport_private; 297 298 mutex_enter(&iser_conn->ic_lock); 299 iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING; 300 mutex_exit(&iser_conn->ic_lock); 301 302 /* Close the channel */ 303 iser_channel_close(iser_conn->ic_chan); 304 305 /* Free our reference held on the IDM conn handle, and set CLOSED */ 306 mutex_enter(&iser_conn->ic_lock); 307 idm_conn_rele(iser_conn->ic_idmc); 308 iser_conn->ic_stage = ISER_CONN_STAGE_CLOSED; 309 mutex_exit(&iser_conn->ic_lock); 310 } 311 312 /* 313 * iser_tgt_svc_create() 314 * Establish the CM service for inbound iSER service requests on the port 315 * indicated by sr->sr_port. 316 * idm_svc_req_t contains the service parameters. 317 */ 318 idm_status_t 319 iser_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is) 320 { 321 iser_svc_t *iser_svc; 322 323 int rc; 324 325 iser_svc = kmem_zalloc(sizeof (iser_svc_t), KM_SLEEP); 326 is->is_iser_svc = (void *)iser_svc; 327 328 idm_refcnt_init(&iser_svc->is_refcnt, iser_svc); 329 330 list_create(&iser_svc->is_sbindlist, sizeof (iser_sbind_t), 331 offsetof(iser_sbind_t, is_list_node)); 332 iser_svc->is_svcid = ibt_get_ip_sid(IPPROTO_TCP, sr->sr_port); 333 334 /* 335 * Register an iSER target service for the requested port 336 * and set the iser_svc structure in the idm_svc handle. 337 */ 338 rc = iser_register_service(is); 339 if (rc != DDI_SUCCESS) { 340 ISER_LOG(CE_NOTE, "iser_tgt_svc_create: iser_register_service " 341 "failed on port (%d): rc (0x%x)", sr->sr_port, rc); 342 ibt_release_ip_sid(iser_svc->is_svcid); 343 list_destroy(&iser_svc->is_sbindlist); 344 idm_refcnt_destroy(&iser_svc->is_refcnt); 345 kmem_free(iser_svc, sizeof (iser_svc_t)); 346 return (IDM_STATUS_FAIL); 347 } 348 349 return (IDM_STATUS_SUCCESS); 350 } 351 352 /* IDM refcnt utilities for the iSER service handle */ 353 void 354 iser_tgt_svc_hold(iser_svc_t *is) 355 { 356 idm_refcnt_hold(&is->is_refcnt); 357 } 358 359 void 360 iser_tgt_svc_rele(iser_svc_t *is) 361 { 362 idm_refcnt_rele(&is->is_refcnt); 363 } 364 365 /* 366 * iser_tgt_svc_destroy() 367 * Teardown resources allocated in iser_tgt_svc_create() 368 */ 369 static void 370 iser_tgt_svc_destroy(idm_svc_t *is) 371 { 372 iser_svc_t *iser_svc; 373 374 iser_svc = (iser_svc_t *)is->is_iser_svc; 375 376 /* 377 * Deregister the iSER target service on this port and free 378 * the iser_svc structure from the idm_svc handle. 379 */ 380 iser_deregister_service(is); 381 382 /* Wait for the iSER service handle's refcnt to zero */ 383 idm_refcnt_wait_ref(&iser_svc->is_refcnt); 384 385 list_destroy(&iser_svc->is_sbindlist); 386 387 idm_refcnt_destroy(&iser_svc->is_refcnt); 388 389 kmem_free(iser_svc, sizeof (iser_svc_t)); 390 } 391 392 /* 393 * iser_tgt_svc_online() 394 * Bind the CM service allocated via iser_tgt_svc_create(). 395 */ 396 static idm_status_t 397 iser_tgt_svc_online(idm_svc_t *is) 398 { 399 iser_status_t status; 400 401 mutex_enter(&is->is_mutex); 402 403 /* 404 * Pass the IDM service handle as the client private data for 405 * later use. 406 */ 407 status = iser_bind_service(is); 408 if (status != ISER_STATUS_SUCCESS) { 409 ISER_LOG(CE_NOTE, "iser_tgt_svc_online: failed bind service"); 410 mutex_exit(&is->is_mutex); 411 return (IDM_STATUS_FAIL); 412 } 413 414 mutex_exit(&is->is_mutex); 415 return (IDM_STATUS_SUCCESS); 416 } 417 418 /* 419 * iser_tgt_svc_offline 420 * Unbind the service on all available HCA ports. 421 */ 422 static void 423 iser_tgt_svc_offline(idm_svc_t *is) 424 { 425 mutex_enter(&is->is_mutex); 426 427 iser_unbind_service(is); 428 mutex_exit(&is->is_mutex); 429 430 } 431 432 /* 433 * iser_tgt_conn_connect() 434 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which 435 * is invoked from the SM as a result of an inbound connection request. 436 */ 437 /* ARGSUSED */ 438 static idm_status_t 439 iser_tgt_conn_connect(idm_conn_t *ic) 440 { 441 /* No action required */ 442 return (IDM_STATUS_SUCCESS); 443 } 444 445 /* 446 * iser_tgt_enable_datamover() sets the transport private data on the 447 * idm_conn_t and move the conn stage to indicate logged in. 448 */ 449 static idm_status_t 450 iser_tgt_enable_datamover(idm_conn_t *ic) 451 { 452 iser_conn_t *iser_conn; 453 454 iser_conn = (iser_conn_t *)ic->ic_transport_private; 455 mutex_enter(&iser_conn->ic_lock); 456 457 iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN; 458 mutex_exit(&iser_conn->ic_lock); 459 460 return (IDM_STATUS_SUCCESS); 461 } 462 463 /* 464 * iser_ini_enable_datamover() is used by the iSCSI initator to request that a 465 * specified iSCSI connection be transitioned to iSER-assisted mode. 466 * In the case of iSER, the RDMA resources for a reliable connection have 467 * already been allocated at this time, and the 'RDMAExtensions' is set to 'Yes' 468 * so no further negotiations are required at this time. 469 * The initiator now sends the first iSER Message - 'Hello' to the target 470 * and waits for the 'HelloReply' Message from the target before directing 471 * the initiator to go into the Full Feature Phase. 472 * 473 * No transport op is required on the target side. 474 */ 475 static idm_status_t 476 iser_ini_enable_datamover(idm_conn_t *ic) 477 { 478 479 iser_conn_t *iser_conn; 480 clock_t delay; 481 int status; 482 483 iser_conn = (iser_conn_t *)ic->ic_transport_private; 484 485 mutex_enter(&iser_conn->ic_lock); 486 iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT; 487 mutex_exit(&iser_conn->ic_lock); 488 489 /* Send the iSER Hello Message to the target */ 490 status = iser_xfer_hello_msg(iser_conn->ic_chan); 491 if (status != ISER_STATUS_SUCCESS) { 492 493 mutex_enter(&iser_conn->ic_lock); 494 iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT_FAIL; 495 mutex_exit(&iser_conn->ic_lock); 496 497 return (IDM_STATUS_FAIL); 498 } 499 500 /* 501 * Acquire the iser_conn->ic_lock and wait for the iSER HelloReply 502 * Message from the target, i.e. iser_conn_stage_t to be set to 503 * ISER_CONN_STAGE_HELLOREPLY_RCV. If the handshake does not 504 * complete within a specified time period (.5s), then return failure. 505 * 506 */ 507 delay = ddi_get_lbolt() + drv_usectohz(500000); 508 509 mutex_enter(&iser_conn->ic_lock); 510 while ((iser_conn->ic_stage != ISER_CONN_STAGE_HELLOREPLY_RCV) && 511 (ddi_get_lbolt() < delay)) { 512 513 (void) cv_timedwait(&iser_conn->ic_stage_cv, 514 &iser_conn->ic_lock, delay); 515 } 516 517 switch (iser_conn->ic_stage) { 518 case ISER_CONN_STAGE_HELLOREPLY_RCV: 519 iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN; 520 mutex_exit(&iser_conn->ic_lock); 521 /* 522 * Return suceess to indicate that the initiator connection can 523 * go to the next phase - FFP 524 */ 525 return (IDM_STATUS_SUCCESS); 526 default: 527 iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL; 528 mutex_exit(&iser_conn->ic_lock); 529 return (IDM_STATUS_FAIL); 530 531 } 532 533 /* STATEMENT_NEVER_REACHED */ 534 } 535 536 /* 537 * iser_free_task_rsrcs() 538 * This routine does not currently need to do anything. It is used in 539 * the sockets transport to explicitly complete any buffers on the task, 540 * but we can rely on our RCaP layer to finish up it's work without any 541 * intervention. 542 */ 543 /* ARGSUSED */ 544 idm_status_t 545 iser_free_task_rsrcs(idm_task_t *idt) 546 { 547 return (IDM_STATUS_SUCCESS); 548 } 549 550 /* 551 * iser_negotiate_key_values() validates the key values for this connection 552 */ 553 /* ARGSUSED */ 554 static kv_status_t 555 iser_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl, 556 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 557 { 558 kv_status_t kvrc = KV_HANDLED; 559 560 /* Process the request nvlist */ 561 kvrc = iser_process_request_nvlist(request_nvl, response_nvl, 562 negotiated_nvl); 563 564 /* We must be using RDMA, so set the flag on the ic handle */ 565 ic->ic_rdma_extensions = B_TRUE; 566 567 return (kvrc); 568 } 569 570 /* Process a list of key=value pairs from a login request */ 571 static kv_status_t 572 iser_process_request_nvlist(nvlist_t *request_nvl, nvlist_t *response_nvl, 573 nvlist_t *negotiated_nvl) 574 { 575 const idm_kv_xlate_t *ikvx; 576 char *nvp_name; 577 nvpair_t *nvp; 578 nvpair_t *next_nvp; 579 kv_status_t kvrc = KV_HANDLED; 580 boolean_t transit = B_TRUE; 581 582 /* Process the list */ 583 nvp = nvlist_next_nvpair(request_nvl, NULL); 584 while (nvp != NULL) { 585 next_nvp = nvlist_next_nvpair(request_nvl, nvp); 586 587 nvp_name = nvpair_name(nvp); 588 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 589 590 kvrc = iser_handle_key(nvp, ikvx, request_nvl, response_nvl, 591 negotiated_nvl); 592 if (kvrc != KV_HANDLED) { 593 if (kvrc == KV_HANDLED_NO_TRANSIT) { 594 /* we countered, clear the transit flag */ 595 transit = B_FALSE; 596 } else { 597 /* error, bail out */ 598 break; 599 } 600 } 601 602 nvp = next_nvp; 603 } 604 /* 605 * If the current kv_status_t indicates success, we've handled 606 * the entire list. Explicitly set kvrc to NO_TRANSIT if we've 607 * cleared the transit flag along the way. 608 */ 609 if ((kvrc == KV_HANDLED) && (transit == B_FALSE)) { 610 kvrc = KV_HANDLED_NO_TRANSIT; 611 } 612 613 return (kvrc); 614 } 615 616 /* Handle a given list, boolean or numerical key=value pair */ 617 static kv_status_t 618 iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx, 619 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 620 { 621 kv_status_t kvrc = KV_UNHANDLED; 622 boolean_t bool_val; 623 uint64_t num_val; 624 int nvrc; 625 626 /* Retrieve values for booleans and numericals */ 627 switch (ikvx->ik_key_id) { 628 /* Booleans */ 629 case KI_RDMA_EXTENSIONS: 630 case KI_IMMEDIATE_DATA: 631 case KI_IFMARKER: 632 case KI_OFMARKER: 633 nvrc = nvpair_value_boolean_value(nvp, &bool_val); 634 ASSERT(nvrc == 0); 635 break; 636 /* Numericals */ 637 case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH: 638 case KI_TARGET_RECV_DATA_SEGMENT_LENGTH: 639 case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS: 640 nvrc = nvpair_value_uint64(nvp, &num_val); 641 ASSERT(nvrc == 0); 642 break; 643 default: 644 break; 645 } 646 647 /* Now handle the values according to the key name */ 648 switch (ikvx->ik_key_id) { 649 case KI_HEADER_DIGEST: 650 case KI_DATA_DIGEST: 651 /* Ensure "None" */ 652 kvrc = iser_handle_digest(nvp, ikvx, request_nvl, response_nvl, 653 negotiated_nvl); 654 break; 655 case KI_RDMA_EXTENSIONS: 656 /* Ensure "Yes" */ 657 kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_TRUE, 658 request_nvl, response_nvl, negotiated_nvl); 659 break; 660 case KI_TARGET_RECV_DATA_SEGMENT_LENGTH: 661 /* Validate the proposed value */ 662 kvrc = iser_handle_numerical(nvp, num_val, ikvx, 663 ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MIN, 664 ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MAX, 665 ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX, 666 request_nvl, response_nvl, negotiated_nvl); 667 break; 668 case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH: 669 /* Validate the proposed value */ 670 kvrc = iser_handle_numerical(nvp, num_val, ikvx, 671 ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MIN, 672 ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MAX, 673 ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX, 674 request_nvl, response_nvl, negotiated_nvl); 675 break; 676 case KI_IMMEDIATE_DATA: 677 case KI_OFMARKER: 678 case KI_IFMARKER: 679 /* Ensure "No" */ 680 kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_FALSE, 681 request_nvl, response_nvl, negotiated_nvl); 682 break; 683 case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS: 684 /* Validate the proposed value */ 685 kvrc = iser_handle_numerical(nvp, num_val, ikvx, 686 ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MIN, 687 ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MAX, 688 ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_IMPL_MAX, 689 request_nvl, response_nvl, negotiated_nvl); 690 break; 691 default: 692 /* 693 * All other keys, including invalid keys, will be 694 * handled at the client layer. 695 */ 696 kvrc = KV_HANDLED; 697 break; 698 } 699 700 return (kvrc); 701 } 702 703 /* Ensure that "None" is an option in the digest list, and select it */ 704 static kv_status_t 705 iser_handle_digest(nvpair_t *choices, const idm_kv_xlate_t *ikvx, 706 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 707 { 708 kv_status_t kvrc = KV_VALUE_ERROR; 709 int nvrc = 0; 710 nvpair_t *digest_choice; 711 char *digest_choice_string; 712 713 /* 714 * Loop through all digest choices. We need to enforce no 715 * "None" for both header and data digest. If we find our 716 * required value, add the value to our negotiated values list 717 * and respond with that value in the login response. If not, 718 * indicate a value error for the iSCSI layer to work with. 719 */ 720 digest_choice = idm_get_next_listvalue(choices, NULL); 721 while (digest_choice != NULL) { 722 nvrc = nvpair_value_string(digest_choice, 723 &digest_choice_string); 724 ASSERT(nvrc == 0); 725 726 if (strcasecmp(digest_choice_string, "none") == 0) { 727 /* Add to negotiated values list */ 728 nvrc = nvlist_add_string(negotiated_nvl, 729 ikvx->ik_key_name, digest_choice_string); 730 kvrc = idm_nvstat_to_kvstat(nvrc); 731 if (nvrc == 0) { 732 /* Add to login response list */ 733 nvrc = nvlist_add_string(response_nvl, 734 ikvx->ik_key_name, digest_choice_string); 735 kvrc = idm_nvstat_to_kvstat(nvrc); 736 /* Remove from the request (we've handled it) */ 737 (void) nvlist_remove_all(request_nvl, 738 ikvx->ik_key_name); 739 } 740 break; 741 } 742 digest_choice = idm_get_next_listvalue(choices, 743 digest_choice); 744 } 745 746 ASSERT(digest_choice != NULL); 747 748 return (kvrc); 749 } 750 751 /* Validate a proposed boolean value, and set the alternate if necessary */ 752 static kv_status_t 753 iser_handle_boolean(nvpair_t *nvp, boolean_t value, const idm_kv_xlate_t *ikvx, 754 boolean_t iser_value, nvlist_t *request_nvl, nvlist_t *response_nvl, 755 nvlist_t *negotiated_nvl) 756 { 757 kv_status_t kvrc; 758 int nvrc; 759 boolean_t respond; 760 761 if (value != iser_value) { 762 /* 763 * Respond back to initiator with our value, and 764 * set the return value to unset the transit bit. 765 */ 766 value = iser_value; 767 kvrc = KV_HANDLED_NO_TRANSIT; 768 nvrc = 0; 769 respond = B_TRUE; 770 771 } else { 772 /* Add this to our negotiated values */ 773 nvrc = nvlist_add_nvpair(negotiated_nvl, nvp); 774 /* Respond if this is not a declarative */ 775 respond = (ikvx->ik_declarative == B_FALSE); 776 } 777 778 /* Response of Simple-value Negotiation */ 779 if (nvrc == 0 && respond) { 780 nvrc = nvlist_add_boolean_value(response_nvl, 781 ikvx->ik_key_name, value); 782 /* Remove from the request (we've handled it) */ 783 (void) nvlist_remove_all(request_nvl, ikvx->ik_key_name); 784 } 785 786 if (kvrc == KV_HANDLED_NO_TRANSIT) { 787 return (kvrc); 788 } 789 790 return (idm_nvstat_to_kvstat(nvrc)); 791 } 792 793 /* 794 * Validate a proposed value against the iSER and/or iSCSI RFC's minimum and 795 * maximum values, and set an alternate, if necessary. Note that the value 796 * 'iser_max_value" represents our implementation maximum (typically the max). 797 */ 798 static kv_status_t 799 iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx, 800 uint64_t min_value, uint64_t max_value, uint64_t iser_max_value, 801 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 802 { 803 kv_status_t kvrc; 804 int nvrc; 805 boolean_t respond; 806 807 /* Validate against standard */ 808 if ((value < min_value) || (value > max_value)) { 809 kvrc = KV_VALUE_ERROR; 810 } else { 811 if (value > iser_max_value) { 812 /* 813 * Respond back to initiator with our value, and 814 * set the return value to unset the transit bit. 815 */ 816 value = iser_max_value; 817 kvrc = KV_HANDLED_NO_TRANSIT; 818 nvrc = 0; 819 respond = B_TRUE; 820 } else { 821 /* Add this to our negotiated values */ 822 nvrc = nvlist_add_nvpair(negotiated_nvl, nvp); 823 /* Respond if this is not a declarative */ 824 respond = (ikvx->ik_declarative == B_FALSE); 825 } 826 827 /* Response of Simple-value Negotiation */ 828 if (nvrc == 0 && respond) { 829 nvrc = nvlist_add_uint64(response_nvl, 830 ikvx->ik_key_name, value); 831 /* Remove from the request (we've handled it) */ 832 (void) nvlist_remove_all(request_nvl, 833 ikvx->ik_key_name); 834 } 835 } 836 837 if (kvrc == KV_HANDLED_NO_TRANSIT) { 838 return (kvrc); 839 } 840 841 return (idm_nvstat_to_kvstat(nvrc)); 842 } 843 844 /* 845 * iser_notice_key_values() activates the negotiated key values for 846 * this connection. 847 */ 848 static void 849 iser_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl) 850 { 851 iser_conn_t *iser_conn; 852 boolean_t boolean_val; 853 uint64_t uint64_val; 854 int nvrc; 855 856 iser_conn = (iser_conn_t *)ic->ic_transport_private; 857 858 /* 859 * Validate the final negotiated operational parameters, 860 * and save a copy. 861 */ 862 if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, 863 "HeaderDigest", &boolean_val)) != ENOENT) { 864 ASSERT(nvrc == 0); 865 iser_conn->ic_op_params.op_header_digest = boolean_val; 866 } 867 868 if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, 869 "DataDigest", &boolean_val)) != ENOENT) { 870 ASSERT(nvrc == 0); 871 iser_conn->ic_op_params.op_data_digest = boolean_val; 872 } 873 874 if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, 875 "RDMAExtensions", &boolean_val)) != ENOENT) { 876 ASSERT(nvrc == 0); 877 iser_conn->ic_op_params.op_rdma_extensions = boolean_val; 878 } 879 880 if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, 881 "OFMarker", &boolean_val)) != ENOENT) { 882 ASSERT(nvrc == 0); 883 iser_conn->ic_op_params.op_ofmarker = boolean_val; 884 } 885 886 if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, 887 "IFMarker", &boolean_val)) != ENOENT) { 888 ASSERT(nvrc == 0); 889 iser_conn->ic_op_params.op_ifmarker = boolean_val; 890 } 891 892 if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, 893 "TargetRecvDataSegmentLength", &uint64_val)) != ENOENT) { 894 ASSERT(nvrc == 0); 895 iser_conn->ic_op_params.op_target_recv_data_segment_length = 896 uint64_val; 897 } 898 899 if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, 900 "InitiatorRecvDataSegmentLength", &uint64_val)) != ENOENT) { 901 ASSERT(nvrc == 0); 902 iser_conn->ic_op_params.op_initiator_recv_data_segment_length = 903 uint64_val; 904 } 905 906 if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, 907 "MaxOutstandingUnexpectedPDUs", &uint64_val)) != ENOENT) { 908 ASSERT(nvrc == 0); 909 iser_conn->ic_op_params.op_max_outstanding_unexpected_pdus = 910 uint64_val; 911 } 912 913 /* Test boolean values which are required by RFC 5046 */ 914 #ifdef ISER_DEBUG 915 ASSERT(iser_conn->ic_op_params.op_rdma_extensions == B_TRUE); 916 ASSERT(iser_conn->ic_op_params.op_header_digest == B_FALSE); 917 ASSERT(iser_conn->ic_op_params.op_data_digest == B_FALSE); 918 ASSERT(iser_conn->ic_op_params.op_ofmarker == B_FALSE); 919 ASSERT(iser_conn->ic_op_params.op_ifmarker == B_FALSE); 920 #endif 921 } 922 923 924 /* 925 * iser_conn_is_capable() verifies that the passed connection is provided 926 * for by an iSER-capable link. 927 * NOTE: When utilizing InfiniBand RC as an RCaP, this routine will check 928 * if the link is on IPoIB. This only indicates a chance that the link is 929 * on an RCaP, and thus iSER-capable, since we may be running on an IB-Eth 930 * gateway, or other IB but non-RCaP link. Rather than fully establishing the 931 * link to verify RCaP here, we instead will return B_TRUE 932 * indicating the link is iSER-capable, if the link is IPoIB. If then in 933 * iser_ini_conn_create() the link proves not be RCaP, IDM will fall back 934 * to using the IDM Sockets transport. 935 */ 936 /* ARGSUSED */ 937 static boolean_t 938 iser_conn_is_capable(idm_conn_req_t *cr, idm_transport_caps_t *caps) 939 { 940 /* A NULL value for laddr indicates implicit source */ 941 return (iser_path_exists(NULL, &cr->cr_ini_dst_addr)); 942 } 943 944 /* 945 * iser_pdu_tx() transmits a Control PDU via the iSER channel. We pull the 946 * channel out of the idm_conn_t passed in, and pass it and the pdu to the 947 * iser_xfer routine. 948 */ 949 static void 950 iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu) 951 { 952 iser_conn_t *iser_conn; 953 iser_status_t iser_status; 954 955 iser_conn = (iser_conn_t *)ic->ic_transport_private; 956 957 iser_status = iser_xfer_ctrlpdu(iser_conn->ic_chan, pdu); 958 if (iser_status != ISER_STATUS_SUCCESS) { 959 ISER_LOG(CE_WARN, "iser_pdu_tx: failed iser_xfer_ctrlpdu: " 960 "ic (0x%p) pdu (0x%p)", (void *) ic, (void *) pdu); 961 /* Fail this PDU transmission */ 962 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 963 } 964 965 /* 966 * We successfully posted this PDU for transmission. 967 * The completion handler will invoke idm_pdu_complete() 968 * with the completion status. See iser_cq.c for more 969 * information. 970 */ 971 } 972 973 /* 974 * iser_buf_tx_to_ini() transmits the data buffer encoded in idb to the 975 * initiator to fulfill SCSI Read commands. An iser_xfer routine is invoked 976 * to implement the RDMA operations. 977 * 978 * Caller holds idt->idt_mutex. 979 */ 980 static idm_status_t 981 iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb) 982 { 983 iser_status_t iser_status; 984 idm_status_t idm_status = IDM_STATUS_SUCCESS; 985 986 ASSERT(mutex_owned(&idt->idt_mutex)); 987 988 iser_status = iser_xfer_buf_to_ini(idt, idb); 989 990 if (iser_status != ISER_STATUS_SUCCESS) { 991 ISER_LOG(CE_WARN, "iser_buf_tx_to_ini: failed " 992 "iser_xfer_buf_to_ini: idt (0x%p) idb (0x%p)", 993 (void *) idt, (void *) idb); 994 idm_status = IDM_STATUS_FAIL; 995 } 996 997 /* 998 * iSCSIt's Data Completion Notify callback is invoked from 999 * the Work Request Send completion Handler 1000 */ 1001 1002 mutex_exit(&idt->idt_mutex); 1003 return (idm_status); 1004 } 1005 1006 /* 1007 * iser_buf_tx_from_ini() transmits data from the initiator into the buffer 1008 * in idb to fulfill SCSI Write commands. An iser_xfer routine is invoked 1009 * to implement the RDMA operations. 1010 * 1011 * Caller holds idt->idt_mutex. 1012 */ 1013 static idm_status_t 1014 iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb) 1015 { 1016 iser_status_t iser_status; 1017 idm_status_t idm_status = IDM_STATUS_SUCCESS; 1018 1019 ASSERT(mutex_owned(&idt->idt_mutex)); 1020 1021 iser_status = iser_xfer_buf_from_ini(idt, idb); 1022 1023 if (iser_status != ISER_STATUS_SUCCESS) { 1024 ISER_LOG(CE_WARN, "iser_buf_tx_from_ini: failed " 1025 "iser_xfer_buf_to_ini: idt (0x%p) idb (0x%p)", 1026 (void *) idt, (void *) idb); 1027 idm_status = IDM_STATUS_FAIL; 1028 } 1029 1030 /* 1031 * iSCSIt's Data Completion Notify callback is invoked from 1032 * the Work Request Send completion Handler 1033 */ 1034 1035 mutex_exit(&idt->idt_mutex); 1036 return (idm_status); 1037 } 1038 1039 /* 1040 * iser_buf_alloc() allocates a buffer and registers it with the IBTF for 1041 * use with iSER. Each HCA has it's own kmem cache for establishing a pool 1042 * of registered buffers, when once initially allocated, will remain 1043 * registered with the HCA. This routine is invoked only on the target, 1044 * where we have the requirement to pre-allocate buffers for the upper layers. 1045 * Note: buflen is compared to ISER_DEFAULT_BUFLEN, and allocation is failed 1046 * if the requested buflen is larger than our default. 1047 */ 1048 /* ARGSUSED */ 1049 static idm_status_t 1050 iser_buf_alloc(idm_buf_t *idb, uint64_t buflen) 1051 { 1052 iser_conn_t *iser_conn; 1053 iser_hca_t *iser_hca; 1054 iser_buf_t *iser_buf; 1055 1056 if (buflen > ISER_DEFAULT_BUFLEN) { 1057 return (IDM_STATUS_FAIL); 1058 } 1059 1060 iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; 1061 iser_hca = iser_conn->ic_chan->ic_hca; 1062 1063 /* 1064 * Allocate a buffer from this HCA's cache. Once initialized, these 1065 * will remain allocated and registered (see above). 1066 */ 1067 iser_buf = kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP); 1068 if (iser_buf == NULL) { 1069 ISER_LOG(CE_NOTE, "iser_buf_alloc: alloc failed"); 1070 return (IDM_STATUS_FAIL); 1071 } 1072 1073 /* Set the allocated data buffer pointer in the IDM buf handle */ 1074 idb->idb_buf = iser_buf->buf; 1075 1076 /* Set the private buf and reg handles in the IDM buf handle */ 1077 idb->idb_buf_private = (void *)iser_buf; 1078 idb->idb_reg_private = (void *)iser_buf->iser_mr; 1079 1080 return (IDM_STATUS_SUCCESS); 1081 } 1082 1083 /* 1084 * iser_buf_free() frees the buffer handle passed in. Note that the cached 1085 * kmem object has an HCA-registered buffer in it which will not be freed. 1086 * This allows us to build up a cache of pre-allocated and registered 1087 * buffers for use on the target. 1088 */ 1089 static void 1090 iser_buf_free(idm_buf_t *buf) 1091 { 1092 iser_buf_t *iser_buf; 1093 1094 iser_buf = buf->idb_buf_private; 1095 kmem_cache_free(iser_buf->cache, iser_buf); 1096 } 1097 1098 /* 1099 * iser_buf_setup() is invoked on the initiator in order to register memory 1100 * on demand for use with the iSER layer. 1101 */ 1102 static idm_status_t 1103 iser_buf_setup(idm_buf_t *idb) 1104 { 1105 iser_conn_t *iser_conn; 1106 iser_chan_t *iser_chan; 1107 iser_hca_t *iser_hca; 1108 iser_buf_t *iser_buf; 1109 int status; 1110 1111 ASSERT(idb->idb_buf != NULL); 1112 1113 iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; 1114 ASSERT(iser_conn != NULL); 1115 1116 iser_hca = iser_conn->ic_chan->ic_hca; 1117 1118 iser_chan = iser_conn->ic_chan; 1119 ASSERT(iser_chan != NULL); 1120 1121 /* 1122 * Memory registration is known to be slow, so for small 1123 * transfers, use pre-registered memory buffers and just 1124 * copy the data into/from them at the appropriate time 1125 */ 1126 if (idb->idb_buflen < ISER_BCOPY_THRESHOLD) { 1127 iser_buf = 1128 kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP); 1129 1130 if (iser_buf == NULL) { 1131 1132 /* Fail over to dynamic registration */ 1133 status = iser_reg_rdma_mem(iser_chan->ic_hca, idb); 1134 idb->idb_bufalloc = B_FALSE; 1135 return (status); 1136 } 1137 1138 /* 1139 * Set the allocated data buffer pointer in the IDM buf handle 1140 * Data is to be copied from/to this buffer using bcopy 1141 */ 1142 idb->idb_bufptr = idb->idb_buf; 1143 idb->idb_bufbcopy = B_TRUE; 1144 1145 idb->idb_buf = iser_buf->buf; 1146 1147 /* Set the private buf and reg handles in the IDM buf handle */ 1148 idb->idb_buf_private = (void *)iser_buf; 1149 idb->idb_reg_private = (void *)iser_buf->iser_mr; 1150 1151 /* Ensure bufalloc'd flag is set */ 1152 idb->idb_bufalloc = B_TRUE; 1153 1154 return (IDM_STATUS_SUCCESS); 1155 1156 } else { 1157 1158 /* Dynamically register the memory passed in on the idb */ 1159 status = iser_reg_rdma_mem(iser_chan->ic_hca, idb); 1160 1161 /* Ensure bufalloc'd flag is unset */ 1162 idb->idb_bufalloc = B_FALSE; 1163 1164 return (status); 1165 } 1166 } 1167 1168 /* 1169 * iser_buf_teardown() is invoked on the initiator in order to register memory 1170 * on demand for use with the iSER layer. 1171 */ 1172 static void 1173 iser_buf_teardown(idm_buf_t *idb) 1174 { 1175 iser_conn_t *iser_conn; 1176 1177 iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; 1178 1179 /* Deregister the memory passed in on the idb */ 1180 iser_dereg_rdma_mem(iser_conn->ic_chan->ic_hca, idb); 1181 } 1182