1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/ddi.h> 26 #include <sys/sunddi.h> 27 28 #include <sys/socket.h> /* networking stuff */ 29 #include <sys/sysmacros.h> /* offsetof */ 30 31 #include <sys/ib/clients/iser/iser.h> 32 #include <sys/ib/clients/iser/iser_idm.h> 33 34 /* 35 * iSER transport routines 36 * 37 * All transport functions except iser_tgt_svc_create() are called through 38 * the ops vector, iser_tgt_svc_create() is called from the async handler 39 * inaddition to being called by the ULP 40 */ 41 42 static void iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu); 43 44 static idm_status_t iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb); 45 static idm_status_t iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb); 46 static idm_status_t iser_tgt_enable_datamover(idm_conn_t *ic); 47 static idm_status_t iser_ini_enable_datamover(idm_conn_t *ic); 48 static void iser_notice_key_values(struct idm_conn_s *ic, 49 nvlist_t *negotiated_nvl); 50 static kv_status_t iser_declare_key_values(struct idm_conn_s *ic, 51 nvlist_t *config_nvl, nvlist_t *outgoing_nvl); 52 static idm_status_t iser_free_task_rsrcs(idm_task_t *idt); 53 static kv_status_t iser_negotiate_key_values(idm_conn_t *ic, 54 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 55 static kv_status_t iser_handle_numerical(nvpair_t *nvp, uint64_t value, 56 const idm_kv_xlate_t *ikvx, uint64_t min_value, uint64_t max_value, 57 uint64_t iser_max_value, nvlist_t *request_nvl, nvlist_t *response_nvl, 58 nvlist_t *negotiated_nvl); 59 static kv_status_t iser_handle_boolean(nvpair_t *nvp, boolean_t value, 60 const idm_kv_xlate_t *ikvx, boolean_t iser_value, nvlist_t *request_nvl, 61 nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 62 static kv_status_t iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx, 63 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 64 static kv_status_t iser_process_request_nvlist(nvlist_t *request_nvl, 65 nvlist_t *response_nvl, nvlist_t *negotiated_nvl); 66 static boolean_t iser_conn_is_capable(idm_conn_req_t *ic, 67 idm_transport_caps_t *caps); 68 static idm_status_t iser_buf_alloc(idm_buf_t *idb, uint64_t buflen); 69 static idm_status_t iser_buf_setup(idm_buf_t *idb); 70 static void iser_buf_teardown(idm_buf_t *idb); 71 static void iser_buf_free(idm_buf_t *idb); 72 static void iser_tgt_svc_destroy(struct idm_svc_s *is); 73 static idm_status_t iser_tgt_svc_online(struct idm_svc_s *is); 74 static void iser_tgt_svc_offline(struct idm_svc_s *is); 75 static idm_status_t iser_tgt_conn_connect(struct idm_conn_s *ic); 76 static idm_status_t iser_ini_conn_create(idm_conn_req_t *cr, 77 struct idm_conn_s *ic); 78 static void iser_conn_destroy(struct idm_conn_s *ic); 79 static idm_status_t iser_ini_conn_connect(struct idm_conn_s *ic); 80 static void iser_conn_disconnect(struct idm_conn_s *ic); 81 82 /* 83 * iSER IDM transport operations 84 */ 85 idm_transport_ops_t iser_transport_ops = { 86 &iser_pdu_tx, /* it_tx_pdu */ 87 &iser_buf_tx_to_ini, /* it_buf_tx_to_ini */ 88 &iser_buf_rx_from_ini, /* it_buf_rx_from_ini */ 89 NULL, /* it_rx_datain */ 90 NULL, /* it_rx_rtt */ 91 NULL, /* it_rx_dataout */ 92 NULL, /* it_alloc_conn_rsrc */ 93 NULL, /* it_free_conn_rsrc */ 94 &iser_tgt_enable_datamover, /* it_tgt_enable_datamover */ 95 &iser_ini_enable_datamover, /* it_ini_enable_datamover */ 96 NULL, /* it_conn_terminate */ 97 &iser_free_task_rsrcs, /* it_free_task_rsrc */ 98 &iser_negotiate_key_values, /* it_negotiate_key_values */ 99 &iser_notice_key_values, /* it_notice_key_values */ 100 &iser_conn_is_capable, /* it_conn_is_capable */ 101 &iser_buf_alloc, /* it_buf_alloc */ 102 &iser_buf_free, /* it_buf_free */ 103 &iser_buf_setup, /* it_buf_setup */ 104 &iser_buf_teardown, /* it_buf_teardown */ 105 &iser_tgt_svc_create, /* it_tgt_svc_create */ 106 &iser_tgt_svc_destroy, /* it_tgt_svc_destroy */ 107 &iser_tgt_svc_online, /* it_tgt_svc_online */ 108 &iser_tgt_svc_offline, /* it_tgt_svc_offline */ 109 &iser_conn_destroy, /* it_tgt_conn_destroy */ 110 &iser_tgt_conn_connect, /* it_tgt_conn_connect */ 111 &iser_conn_disconnect, /* it_tgt_conn_disconnect */ 112 &iser_ini_conn_create, /* it_ini_conn_create */ 113 &iser_conn_destroy, /* it_ini_conn_destroy */ 114 &iser_ini_conn_connect, /* it_ini_conn_connect */ 115 &iser_conn_disconnect, /* it_ini_conn_disconnect */ 116 &iser_declare_key_values /* it_declare_key_values */ 117 }; 118 119 /* 120 * iSER IDM transport capabilities 121 */ 122 idm_transport_caps_t iser_transport_caps = { 123 0 /* flags */ 124 }; 125 126 int 127 iser_idm_register() 128 { 129 idm_transport_attr_t attr; 130 idm_status_t status; 131 132 attr.type = IDM_TRANSPORT_TYPE_ISER; 133 attr.it_ops = &iser_transport_ops; 134 attr.it_caps = &iser_transport_caps; 135 136 status = idm_transport_register(&attr); 137 if (status != IDM_STATUS_SUCCESS) { 138 ISER_LOG(CE_WARN, "Failed to register iSER transport with IDM"); 139 return (DDI_FAILURE); 140 } 141 142 ISER_LOG(CE_NOTE, "Registered iSER transport with IDM"); 143 144 return (DDI_SUCCESS); 145 } 146 147 /* 148 * iser_ini_conn_create() 149 * Allocate an iSER initiator connection context 150 */ 151 static idm_status_t 152 iser_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic) 153 { 154 iser_chan_t *iser_chan = NULL; 155 iser_conn_t *iser_conn; 156 157 /* Allocate and set up a connection handle */ 158 iser_conn = kmem_zalloc(sizeof (iser_conn_t), KM_SLEEP); 159 mutex_init(&iser_conn->ic_lock, NULL, MUTEX_DRIVER, NULL); 160 161 /* Allocate and open a channel to the target node */ 162 iser_chan = iser_channel_alloc(NULL, &cr->cr_ini_dst_addr); 163 if (iser_chan == NULL) { 164 ISER_LOG(CE_WARN, "iser: failed to allocate channel"); 165 mutex_destroy(&iser_conn->ic_lock); 166 kmem_free(iser_conn, sizeof (iser_conn_t)); 167 return (IDM_STATUS_FAIL); 168 } 169 170 /* 171 * The local IP and remote IP are filled in iser_channel_alloc. The 172 * remote port needs to be filled in from idm_conn_req_t. The local 173 * port is irrelevant. Internal representation of the port in the 174 * IDM sockaddr structure is in network byte order. IBT expects the 175 * port in host byte order. 176 */ 177 switch (cr->cr_ini_dst_addr.sin.sa_family) { 178 case AF_INET: 179 iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin4.sin_port); 180 break; 181 case AF_INET6: 182 iser_chan->ic_rport = ntohs(cr->cr_ini_dst_addr.sin6.sin6_port); 183 break; 184 default: 185 iser_chan->ic_rport = ISCSI_LISTEN_PORT; 186 } 187 iser_chan->ic_lport = 0; 188 189 cv_init(&iser_conn->ic_stage_cv, NULL, CV_DEFAULT, NULL); 190 iser_conn->ic_type = ISER_CONN_TYPE_INI; 191 iser_conn->ic_stage = ISER_CONN_STAGE_ALLOCATED; 192 iser_conn->ic_chan = iser_chan; 193 iser_conn->ic_idmc = ic; 194 195 /* 196 * Set a pointer to the iser_conn in the iser_chan for easy 197 * access during CM event handling 198 */ 199 iser_chan->ic_conn = iser_conn; 200 201 /* Set the iSER conn handle in the IDM conn private handle */ 202 ic->ic_transport_private = (void *)iser_conn; 203 204 /* Set the transport header length */ 205 ic->ic_transport_hdrlen = ISER_HEADER_LENGTH; 206 207 return (IDM_STATUS_SUCCESS); 208 } 209 210 /* 211 * iser_internal_conn_destroy() 212 * Tear down iSER-specific connection resources. This is used below 213 * in iser_conn_destroy(), but also from the CM code when we may have 214 * some of the connection established, but not fully connected. 215 */ 216 void 217 iser_internal_conn_destroy(iser_conn_t *ic) 218 { 219 mutex_enter(&ic->ic_lock); 220 iser_channel_free(ic->ic_chan); 221 if ((ic->ic_type == ISER_CONN_TYPE_TGT) && 222 (ic->ic_stage == ISER_CONN_STAGE_ALLOCATED)) { 223 /* 224 * This is a target connection that has yet to be 225 * established. Free our reference on the target 226 * service handle. 227 */ 228 iser_tgt_svc_rele(ic->ic_idms->is_iser_svc); 229 } 230 cv_destroy(&ic->ic_stage_cv); 231 mutex_exit(&ic->ic_lock); 232 mutex_destroy(&ic->ic_lock); 233 kmem_free(ic, sizeof (iser_conn_t)); 234 } 235 236 /* 237 * iser_conn_destroy() 238 * Tear down an initiator or target connection. 239 */ 240 static void 241 iser_conn_destroy(idm_conn_t *ic) 242 { 243 iser_conn_t *iser_conn; 244 iser_conn = (iser_conn_t *)ic->ic_transport_private; 245 246 iser_internal_conn_destroy(iser_conn); 247 ic->ic_transport_private = NULL; 248 } 249 250 /* 251 * iser_ini_conn_connect() 252 * Establish the connection referred to by the handle previously allocated via 253 * iser_ini_conn_create(). 254 */ 255 static idm_status_t 256 iser_ini_conn_connect(idm_conn_t *ic) 257 { 258 iser_conn_t *iser_conn; 259 iser_status_t status; 260 261 iser_conn = (iser_conn_t *)ic->ic_transport_private; 262 263 status = iser_channel_open(iser_conn->ic_chan); 264 if (status != ISER_STATUS_SUCCESS) { 265 ISER_LOG(CE_WARN, "iser: failed to open channel"); 266 return (IDM_STATUS_FAIL); 267 } 268 269 /* 270 * Set the local and remote addresses in the idm conn handle. 271 */ 272 iser_ib_conv_ibtaddr2sockaddr(&ic->ic_laddr, 273 &iser_conn->ic_chan->ic_localip, iser_conn->ic_chan->ic_lport); 274 iser_ib_conv_ibtaddr2sockaddr(&ic->ic_raddr, 275 &iser_conn->ic_chan->ic_remoteip, iser_conn->ic_chan->ic_rport); 276 277 mutex_enter(&iser_conn->ic_lock); 278 /* Hold a reference on the IDM connection handle */ 279 idm_conn_hold(ic); 280 iser_conn->ic_stage = ISER_CONN_STAGE_IC_CONNECTED; 281 mutex_exit(&iser_conn->ic_lock); 282 283 return (IDM_STATUS_SUCCESS); 284 } 285 286 /* 287 * iser_conn_disconnect() 288 * Shutdown this iSER connection 289 */ 290 static void 291 iser_conn_disconnect(idm_conn_t *ic) 292 { 293 iser_conn_t *iser_conn; 294 295 iser_conn = (iser_conn_t *)ic->ic_transport_private; 296 297 mutex_enter(&iser_conn->ic_lock); 298 iser_conn->ic_stage = ISER_CONN_STAGE_CLOSING; 299 mutex_exit(&iser_conn->ic_lock); 300 301 /* Close the channel */ 302 iser_channel_close(iser_conn->ic_chan); 303 304 /* Free our reference held on the IDM conn handle, and set CLOSED */ 305 mutex_enter(&iser_conn->ic_lock); 306 idm_conn_rele(iser_conn->ic_idmc); 307 iser_conn->ic_stage = ISER_CONN_STAGE_CLOSED; 308 mutex_exit(&iser_conn->ic_lock); 309 } 310 311 /* 312 * iser_tgt_svc_create() 313 * Establish the CM service for inbound iSER service requests on the port 314 * indicated by sr->sr_port. 315 * idm_svc_req_t contains the service parameters. 316 */ 317 idm_status_t 318 iser_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is) 319 { 320 iser_svc_t *iser_svc; 321 322 int rc; 323 324 iser_svc = kmem_zalloc(sizeof (iser_svc_t), KM_SLEEP); 325 is->is_iser_svc = (void *)iser_svc; 326 327 idm_refcnt_init(&iser_svc->is_refcnt, iser_svc); 328 329 list_create(&iser_svc->is_sbindlist, sizeof (iser_sbind_t), 330 offsetof(iser_sbind_t, is_list_node)); 331 iser_svc->is_svcid = ibt_get_ip_sid(IPPROTO_TCP, sr->sr_port); 332 333 /* 334 * Register an iSER target service for the requested port 335 * and set the iser_svc structure in the idm_svc handle. 336 */ 337 rc = iser_register_service(is); 338 if (rc != DDI_SUCCESS) { 339 ISER_LOG(CE_NOTE, "iser_tgt_svc_create: iser_register_service " 340 "failed on port (%d): rc (0x%x)", sr->sr_port, rc); 341 (void) ibt_release_ip_sid(iser_svc->is_svcid); 342 list_destroy(&iser_svc->is_sbindlist); 343 idm_refcnt_destroy(&iser_svc->is_refcnt); 344 kmem_free(iser_svc, sizeof (iser_svc_t)); 345 return (IDM_STATUS_FAIL); 346 } 347 348 return (IDM_STATUS_SUCCESS); 349 } 350 351 /* IDM refcnt utilities for the iSER service handle */ 352 void 353 iser_tgt_svc_hold(iser_svc_t *is) 354 { 355 idm_refcnt_hold(&is->is_refcnt); 356 } 357 358 void 359 iser_tgt_svc_rele(iser_svc_t *is) 360 { 361 idm_refcnt_rele(&is->is_refcnt); 362 } 363 364 /* 365 * iser_tgt_svc_destroy() 366 * Teardown resources allocated in iser_tgt_svc_create() 367 */ 368 static void 369 iser_tgt_svc_destroy(idm_svc_t *is) 370 { 371 iser_svc_t *iser_svc; 372 373 iser_svc = (iser_svc_t *)is->is_iser_svc; 374 375 /* 376 * Deregister the iSER target service on this port and free 377 * the iser_svc structure from the idm_svc handle. 378 */ 379 iser_deregister_service(is); 380 381 /* Wait for the iSER service handle's refcnt to zero */ 382 idm_refcnt_wait_ref(&iser_svc->is_refcnt); 383 384 list_destroy(&iser_svc->is_sbindlist); 385 386 idm_refcnt_destroy(&iser_svc->is_refcnt); 387 388 kmem_free(iser_svc, sizeof (iser_svc_t)); 389 } 390 391 /* 392 * iser_tgt_svc_online() 393 * Bind the CM service allocated via iser_tgt_svc_create(). 394 */ 395 static idm_status_t 396 iser_tgt_svc_online(idm_svc_t *is) 397 { 398 iser_status_t status; 399 400 mutex_enter(&is->is_mutex); 401 402 /* 403 * Pass the IDM service handle as the client private data for 404 * later use. 405 */ 406 status = iser_bind_service(is); 407 if (status != ISER_STATUS_SUCCESS) { 408 ISER_LOG(CE_NOTE, "iser_tgt_svc_online: failed bind service"); 409 mutex_exit(&is->is_mutex); 410 return (IDM_STATUS_FAIL); 411 } 412 413 mutex_exit(&is->is_mutex); 414 return (IDM_STATUS_SUCCESS); 415 } 416 417 /* 418 * iser_tgt_svc_offline 419 * Unbind the service on all available HCA ports. 420 */ 421 static void 422 iser_tgt_svc_offline(idm_svc_t *is) 423 { 424 mutex_enter(&is->is_mutex); 425 426 iser_unbind_service(is); 427 mutex_exit(&is->is_mutex); 428 429 } 430 431 /* 432 * iser_tgt_conn_connect() 433 * Establish the connection in ic, passed from idm_tgt_conn_finish(), which 434 * is invoked from the SM as a result of an inbound connection request. 435 */ 436 /* ARGSUSED */ 437 static idm_status_t 438 iser_tgt_conn_connect(idm_conn_t *ic) 439 { 440 /* No action required */ 441 return (IDM_STATUS_SUCCESS); 442 } 443 444 /* 445 * iser_tgt_enable_datamover() sets the transport private data on the 446 * idm_conn_t and move the conn stage to indicate logged in. 447 */ 448 static idm_status_t 449 iser_tgt_enable_datamover(idm_conn_t *ic) 450 { 451 iser_conn_t *iser_conn; 452 453 iser_conn = (iser_conn_t *)ic->ic_transport_private; 454 mutex_enter(&iser_conn->ic_lock); 455 456 iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN; 457 mutex_exit(&iser_conn->ic_lock); 458 459 return (IDM_STATUS_SUCCESS); 460 } 461 462 /* 463 * iser_ini_enable_datamover() is used by the iSCSI initator to request that a 464 * specified iSCSI connection be transitioned to iSER-assisted mode. 465 * In the case of iSER, the RDMA resources for a reliable connection have 466 * already been allocated at this time, and the 'RDMAExtensions' is set to 'Yes' 467 * so no further negotiations are required at this time. 468 * The initiator now sends the first iSER Message - 'Hello' to the target 469 * and waits for the 'HelloReply' Message from the target before directing 470 * the initiator to go into the Full Feature Phase. 471 * 472 * No transport op is required on the target side. 473 */ 474 static idm_status_t 475 iser_ini_enable_datamover(idm_conn_t *ic) 476 { 477 478 iser_conn_t *iser_conn; 479 clock_t delay; 480 int status; 481 482 iser_conn = (iser_conn_t *)ic->ic_transport_private; 483 484 mutex_enter(&iser_conn->ic_lock); 485 iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT; 486 mutex_exit(&iser_conn->ic_lock); 487 488 /* Send the iSER Hello Message to the target */ 489 status = iser_xfer_hello_msg(iser_conn->ic_chan); 490 if (status != ISER_STATUS_SUCCESS) { 491 492 mutex_enter(&iser_conn->ic_lock); 493 iser_conn->ic_stage = ISER_CONN_STAGE_HELLO_SENT_FAIL; 494 mutex_exit(&iser_conn->ic_lock); 495 496 return (IDM_STATUS_FAIL); 497 } 498 499 /* 500 * Acquire the iser_conn->ic_lock and wait for the iSER HelloReply 501 * Message from the target, i.e. iser_conn_stage_t to be set to 502 * ISER_CONN_STAGE_HELLOREPLY_RCV. If the handshake does not 503 * complete within a specified time period (.5s), then return failure. 504 * 505 */ 506 delay = ddi_get_lbolt() + drv_usectohz(500000); 507 508 mutex_enter(&iser_conn->ic_lock); 509 while ((iser_conn->ic_stage != ISER_CONN_STAGE_HELLOREPLY_RCV) && 510 (ddi_get_lbolt() < delay)) { 511 512 (void) cv_timedwait(&iser_conn->ic_stage_cv, 513 &iser_conn->ic_lock, delay); 514 } 515 516 switch (iser_conn->ic_stage) { 517 case ISER_CONN_STAGE_HELLOREPLY_RCV: 518 iser_conn->ic_stage = ISER_CONN_STAGE_LOGGED_IN; 519 mutex_exit(&iser_conn->ic_lock); 520 /* 521 * Return suceess to indicate that the initiator connection can 522 * go to the next phase - FFP 523 */ 524 return (IDM_STATUS_SUCCESS); 525 default: 526 iser_conn->ic_stage = ISER_CONN_STAGE_HELLOREPLY_RCV_FAIL; 527 mutex_exit(&iser_conn->ic_lock); 528 return (IDM_STATUS_FAIL); 529 530 } 531 532 /* STATEMENT_NEVER_REACHED */ 533 } 534 535 /* 536 * iser_free_task_rsrcs() 537 * This routine does not currently need to do anything. It is used in 538 * the sockets transport to explicitly complete any buffers on the task, 539 * but we can rely on our RCaP layer to finish up it's work without any 540 * intervention. 541 */ 542 /* ARGSUSED */ 543 idm_status_t 544 iser_free_task_rsrcs(idm_task_t *idt) 545 { 546 return (IDM_STATUS_SUCCESS); 547 } 548 549 /* 550 * iser_negotiate_key_values() validates the key values for this connection 551 */ 552 /* ARGSUSED */ 553 static kv_status_t 554 iser_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl, 555 nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 556 { 557 kv_status_t kvrc = KV_HANDLED; 558 559 /* Process the request nvlist */ 560 kvrc = iser_process_request_nvlist(request_nvl, response_nvl, 561 negotiated_nvl); 562 563 /* We must be using RDMA, so set the flag on the ic handle */ 564 ic->ic_rdma_extensions = B_TRUE; 565 566 return (kvrc); 567 } 568 569 /* Process a list of key=value pairs from a login request */ 570 static kv_status_t 571 iser_process_request_nvlist(nvlist_t *request_nvl, nvlist_t *response_nvl, 572 nvlist_t *negotiated_nvl) 573 { 574 const idm_kv_xlate_t *ikvx; 575 char *nvp_name; 576 nvpair_t *nvp; 577 nvpair_t *next_nvp; 578 kv_status_t kvrc = KV_HANDLED; 579 boolean_t transit = B_TRUE; 580 581 /* Process the list */ 582 nvp = nvlist_next_nvpair(request_nvl, NULL); 583 while (nvp != NULL) { 584 next_nvp = nvlist_next_nvpair(request_nvl, nvp); 585 586 nvp_name = nvpair_name(nvp); 587 ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name)); 588 589 kvrc = iser_handle_key(nvp, ikvx, request_nvl, response_nvl, 590 negotiated_nvl); 591 if (kvrc != KV_HANDLED) { 592 if (kvrc == KV_HANDLED_NO_TRANSIT) { 593 /* we countered, clear the transit flag */ 594 transit = B_FALSE; 595 } else { 596 /* error, bail out */ 597 break; 598 } 599 } 600 601 nvp = next_nvp; 602 } 603 /* 604 * If the current kv_status_t indicates success, we've handled 605 * the entire list. Explicitly set kvrc to NO_TRANSIT if we've 606 * cleared the transit flag along the way. 607 */ 608 if ((kvrc == KV_HANDLED) && (transit == B_FALSE)) { 609 kvrc = KV_HANDLED_NO_TRANSIT; 610 } 611 612 return (kvrc); 613 } 614 615 /* Handle a given list, boolean or numerical key=value pair */ 616 static kv_status_t 617 iser_handle_key(nvpair_t *nvp, const idm_kv_xlate_t *ikvx, 618 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 619 { 620 kv_status_t kvrc = KV_UNHANDLED; 621 boolean_t bool_val; 622 uint64_t num_val; 623 int nvrc; 624 625 /* Retrieve values for booleans and numericals */ 626 switch (ikvx->ik_key_id) { 627 /* Booleans */ 628 case KI_RDMA_EXTENSIONS: 629 case KI_IMMEDIATE_DATA: 630 nvrc = nvpair_value_boolean_value(nvp, &bool_val); 631 ASSERT(nvrc == 0); 632 break; 633 /* Numericals */ 634 case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH: 635 case KI_TARGET_RECV_DATA_SEGMENT_LENGTH: 636 case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS: 637 nvrc = nvpair_value_uint64(nvp, &num_val); 638 ASSERT(nvrc == 0); 639 break; 640 default: 641 break; 642 } 643 644 /* 645 * Now handle the values according to the key name. Keys not 646 * specifically handled here will be negotiated by the iscsi 647 * target. Negotiated values take effect when 648 * iser_notice_key_values gets called. 649 */ 650 switch (ikvx->ik_key_id) { 651 case KI_RDMA_EXTENSIONS: 652 /* Ensure "Yes" */ 653 kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_TRUE, 654 request_nvl, response_nvl, negotiated_nvl); 655 break; 656 case KI_TARGET_RECV_DATA_SEGMENT_LENGTH: 657 /* Validate the proposed value */ 658 kvrc = iser_handle_numerical(nvp, num_val, ikvx, 659 ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MIN, 660 ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_MAX, 661 ISER_TARGET_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX, 662 request_nvl, response_nvl, negotiated_nvl); 663 break; 664 case KI_INITIATOR_RECV_DATA_SEGMENT_LENGTH: 665 /* Validate the proposed value */ 666 kvrc = iser_handle_numerical(nvp, num_val, ikvx, 667 ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MIN, 668 ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_MAX, 669 ISER_INITIATOR_RECV_DATA_SEGMENT_LENGTH_IMPL_MAX, 670 request_nvl, response_nvl, negotiated_nvl); 671 break; 672 case KI_IMMEDIATE_DATA: 673 /* Ensure "No" */ 674 kvrc = iser_handle_boolean(nvp, bool_val, ikvx, B_FALSE, 675 request_nvl, response_nvl, negotiated_nvl); 676 break; 677 case KI_MAX_OUTSTANDING_UNEXPECTED_PDUS: 678 /* Validate the proposed value */ 679 kvrc = iser_handle_numerical(nvp, num_val, ikvx, 680 ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MIN, 681 ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_MAX, 682 ISER_MAX_OUTSTANDING_UNEXPECTED_PDUS_IMPL_MAX, 683 request_nvl, response_nvl, negotiated_nvl); 684 break; 685 default: 686 /* 687 * All other keys, including invalid keys, will be 688 * handled at the client layer. 689 */ 690 kvrc = KV_HANDLED; 691 break; 692 } 693 694 return (kvrc); 695 } 696 697 698 /* Validate a proposed boolean value, and set the alternate if necessary */ 699 static kv_status_t 700 iser_handle_boolean(nvpair_t *nvp, boolean_t value, const idm_kv_xlate_t *ikvx, 701 boolean_t iser_value, nvlist_t *request_nvl, nvlist_t *response_nvl, 702 nvlist_t *negotiated_nvl) 703 { 704 kv_status_t kvrc = KV_UNHANDLED; 705 int nvrc; 706 boolean_t respond = B_FALSE; 707 708 if (value != iser_value) { 709 /* 710 * Respond back to initiator with our value, and 711 * set the return value to unset the transit bit. 712 */ 713 value = iser_value; 714 nvrc = nvlist_add_boolean_value(negotiated_nvl, 715 ikvx->ik_key_name, value); 716 if (nvrc == 0) { 717 kvrc = KV_HANDLED_NO_TRANSIT; 718 respond = B_TRUE; 719 } 720 721 } else { 722 /* Add this to our negotiated values */ 723 nvrc = nvlist_add_nvpair(negotiated_nvl, nvp); 724 /* Respond if this is not a declarative */ 725 respond = (ikvx->ik_declarative == B_FALSE); 726 } 727 728 /* Response of Simple-value Negotiation */ 729 if (nvrc == 0 && respond) { 730 nvrc = nvlist_add_boolean_value(response_nvl, 731 ikvx->ik_key_name, value); 732 /* Remove from the request (we've handled it) */ 733 (void) nvlist_remove_all(request_nvl, ikvx->ik_key_name); 734 } 735 736 if (kvrc == KV_HANDLED_NO_TRANSIT) { 737 return (kvrc); 738 } 739 740 return (idm_nvstat_to_kvstat(nvrc)); 741 } 742 743 /* 744 * Validate a proposed value against the iSER and/or iSCSI RFC's minimum and 745 * maximum values, and set an alternate, if necessary. Note that the value 746 * 'iser_max_value" represents our implementation maximum (typically the max). 747 */ 748 static kv_status_t 749 iser_handle_numerical(nvpair_t *nvp, uint64_t value, const idm_kv_xlate_t *ikvx, 750 uint64_t min_value, uint64_t max_value, uint64_t iser_max_value, 751 nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl) 752 { 753 kv_status_t kvrc = KV_UNHANDLED; 754 int nvrc; 755 boolean_t respond = B_FALSE; 756 757 /* Validate against standard */ 758 if ((value < min_value) || (value > max_value)) { 759 kvrc = KV_VALUE_ERROR; 760 } else { 761 if (value > iser_max_value) { 762 /* 763 * Respond back to initiator with our value, and 764 * set the return value to unset the transit bit. 765 */ 766 value = iser_max_value; 767 nvrc = nvlist_add_uint64(negotiated_nvl, 768 ikvx->ik_key_name, value); 769 if (nvrc == 0) { 770 kvrc = KV_HANDLED_NO_TRANSIT; 771 respond = B_TRUE; 772 } 773 } else { 774 /* Add this to our negotiated values */ 775 nvrc = nvlist_add_nvpair(negotiated_nvl, nvp); 776 /* Respond if this is not a declarative */ 777 respond = (ikvx->ik_declarative == B_FALSE); 778 } 779 780 /* Response of Simple-value Negotiation */ 781 if (nvrc == 0 && respond) { 782 nvrc = nvlist_add_uint64(response_nvl, 783 ikvx->ik_key_name, value); 784 /* Remove from the request (we've handled it) */ 785 (void) nvlist_remove_all(request_nvl, 786 ikvx->ik_key_name); 787 } 788 } 789 790 if (kvrc == KV_HANDLED_NO_TRANSIT) { 791 return (kvrc); 792 } 793 794 return (idm_nvstat_to_kvstat(nvrc)); 795 } 796 797 /* 798 * iser_declare_key_values() declares the declarative key values for 799 * this connection. 800 */ 801 /* ARGSUSED */ 802 static kv_status_t 803 iser_declare_key_values(idm_conn_t *ic, nvlist_t *config_nvl, 804 nvlist_t *outgoing_nvl) 805 { 806 kv_status_t kvrc; 807 int nvrc = 0; 808 int rc; 809 uint64_t uint64_val; 810 811 if ((rc = nvlist_lookup_uint64(config_nvl, 812 ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, &uint64_val)) != ENOENT) { 813 ASSERT(rc == 0); 814 if (outgoing_nvl) { 815 nvrc = nvlist_add_uint64(outgoing_nvl, 816 ISER_KV_KEY_NAME_MAX_OUTSTANDING_PDU, uint64_val); 817 } 818 } 819 kvrc = idm_nvstat_to_kvstat(nvrc); 820 return (kvrc); 821 } 822 823 /* 824 * iser_notice_key_values() activates the negotiated key values for 825 * this connection. 826 */ 827 static void 828 iser_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl) 829 { 830 iser_conn_t *iser_conn; 831 boolean_t boolean_val; 832 uint64_t uint64_val; 833 int nvrc; 834 char *digest_choice_string; 835 836 iser_conn = (iser_conn_t *)ic->ic_transport_private; 837 838 /* 839 * Validate the final negotiated operational parameters, 840 * and save a copy. 841 */ 842 if ((nvrc = nvlist_lookup_string(negotiated_nvl, 843 "HeaderDigest", &digest_choice_string)) != ENOENT) { 844 ASSERT(nvrc == 0); 845 846 /* 847 * Per the iSER RFC, override the negotiated value with "None" 848 */ 849 iser_conn->ic_op_params.op_header_digest = B_FALSE; 850 } 851 852 if ((nvrc = nvlist_lookup_string(negotiated_nvl, 853 "DataDigest", &digest_choice_string)) != ENOENT) { 854 ASSERT(nvrc == 0); 855 856 /* 857 * Per the iSER RFC, override the negotiated value with "None" 858 */ 859 iser_conn->ic_op_params.op_data_digest = B_FALSE; 860 } 861 862 if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, 863 "RDMAExtensions", &boolean_val)) != ENOENT) { 864 ASSERT(nvrc == 0); 865 iser_conn->ic_op_params.op_rdma_extensions = boolean_val; 866 } 867 868 if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, 869 "OFMarker", &boolean_val)) != ENOENT) { 870 ASSERT(nvrc == 0); 871 /* 872 * Per the iSER RFC, override the negotiated value with "No" 873 */ 874 iser_conn->ic_op_params.op_ofmarker = B_FALSE; 875 } 876 877 if ((nvrc = nvlist_lookup_boolean_value(negotiated_nvl, 878 "IFMarker", &boolean_val)) != ENOENT) { 879 ASSERT(nvrc == 0); 880 /* 881 * Per the iSER RFC, override the negotiated value with "No" 882 */ 883 iser_conn->ic_op_params.op_ifmarker = B_FALSE; 884 } 885 886 if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, 887 "TargetRecvDataSegmentLength", &uint64_val)) != ENOENT) { 888 ASSERT(nvrc == 0); 889 iser_conn->ic_op_params.op_target_recv_data_segment_length = 890 uint64_val; 891 } 892 893 if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, 894 "InitiatorRecvDataSegmentLength", &uint64_val)) != ENOENT) { 895 ASSERT(nvrc == 0); 896 iser_conn->ic_op_params.op_initiator_recv_data_segment_length = 897 uint64_val; 898 } 899 900 if ((nvrc = nvlist_lookup_uint64(negotiated_nvl, 901 "MaxOutstandingUnexpectedPDUs", &uint64_val)) != ENOENT) { 902 ASSERT(nvrc == 0); 903 iser_conn->ic_op_params.op_max_outstanding_unexpected_pdus = 904 uint64_val; 905 } 906 907 /* Test boolean values which are required by RFC 5046 */ 908 #ifdef ISER_DEBUG 909 ASSERT(iser_conn->ic_op_params.op_rdma_extensions == B_TRUE); 910 ASSERT(iser_conn->ic_op_params.op_header_digest == B_FALSE); 911 ASSERT(iser_conn->ic_op_params.op_data_digest == B_FALSE); 912 ASSERT(iser_conn->ic_op_params.op_ofmarker == B_FALSE); 913 ASSERT(iser_conn->ic_op_params.op_ifmarker == B_FALSE); 914 #endif 915 } 916 917 918 /* 919 * iser_conn_is_capable() verifies that the passed connection is provided 920 * for by an iSER-capable link. 921 * NOTE: When utilizing InfiniBand RC as an RCaP, this routine will check 922 * if the link is on IPoIB. This only indicates a chance that the link is 923 * on an RCaP, and thus iSER-capable, since we may be running on an IB-Eth 924 * gateway, or other IB but non-RCaP link. Rather than fully establishing the 925 * link to verify RCaP here, we instead will return B_TRUE 926 * indicating the link is iSER-capable, if the link is IPoIB. If then in 927 * iser_ini_conn_create() the link proves not be RCaP, IDM will fall back 928 * to using the IDM Sockets transport. 929 */ 930 /* ARGSUSED */ 931 static boolean_t 932 iser_conn_is_capable(idm_conn_req_t *cr, idm_transport_caps_t *caps) 933 { 934 /* A NULL value for laddr indicates implicit source */ 935 return (iser_path_exists(NULL, &cr->cr_ini_dst_addr)); 936 } 937 938 /* 939 * iser_pdu_tx() transmits a Control PDU via the iSER channel. We pull the 940 * channel out of the idm_conn_t passed in, and pass it and the pdu to the 941 * iser_xfer routine. 942 */ 943 static void 944 iser_pdu_tx(idm_conn_t *ic, idm_pdu_t *pdu) 945 { 946 iser_conn_t *iser_conn; 947 iser_status_t iser_status; 948 949 iser_conn = (iser_conn_t *)ic->ic_transport_private; 950 951 iser_status = iser_xfer_ctrlpdu(iser_conn->ic_chan, pdu); 952 if (iser_status != ISER_STATUS_SUCCESS) { 953 ISER_LOG(CE_WARN, "iser_pdu_tx: failed iser_xfer_ctrlpdu: " 954 "ic (0x%p) pdu (0x%p)", (void *) ic, (void *) pdu); 955 /* Fail this PDU transmission */ 956 idm_pdu_complete(pdu, IDM_STATUS_FAIL); 957 } 958 959 /* 960 * We successfully posted this PDU for transmission. 961 * The completion handler will invoke idm_pdu_complete() 962 * with the completion status. See iser_cq.c for more 963 * information. 964 */ 965 } 966 967 /* 968 * iser_buf_tx_to_ini() transmits the data buffer encoded in idb to the 969 * initiator to fulfill SCSI Read commands. An iser_xfer routine is invoked 970 * to implement the RDMA operations. 971 * 972 * Caller holds idt->idt_mutex. 973 */ 974 static idm_status_t 975 iser_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb) 976 { 977 iser_status_t iser_status; 978 idm_status_t idm_status = IDM_STATUS_SUCCESS; 979 980 ASSERT(mutex_owned(&idt->idt_mutex)); 981 982 iser_status = iser_xfer_buf_to_ini(idt, idb); 983 984 if (iser_status != ISER_STATUS_SUCCESS) { 985 ISER_LOG(CE_WARN, "iser_buf_tx_to_ini: failed " 986 "iser_xfer_buf_to_ini: idt (0x%p) idb (0x%p)", 987 (void *) idt, (void *) idb); 988 idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED); 989 return (IDM_STATUS_FAIL); 990 } 991 992 /* 993 * iSCSIt's Data Completion Notify callback is invoked from 994 * the Work Request Send completion Handler 995 */ 996 997 mutex_exit(&idt->idt_mutex); 998 return (idm_status); 999 } 1000 1001 /* 1002 * iser_buf_tx_from_ini() transmits data from the initiator into the buffer 1003 * in idb to fulfill SCSI Write commands. An iser_xfer routine is invoked 1004 * to implement the RDMA operations. 1005 * 1006 * Caller holds idt->idt_mutex. 1007 */ 1008 static idm_status_t 1009 iser_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb) 1010 { 1011 iser_status_t iser_status; 1012 idm_status_t idm_status = IDM_STATUS_SUCCESS; 1013 1014 ASSERT(mutex_owned(&idt->idt_mutex)); 1015 1016 iser_status = iser_xfer_buf_from_ini(idt, idb); 1017 1018 if (iser_status != ISER_STATUS_SUCCESS) { 1019 ISER_LOG(CE_WARN, "iser_buf_rx_from_ini: failed " 1020 "iser_xfer_buf_from_ini: idt (0x%p) idb (0x%p)", 1021 (void *) idt, (void *) idb); 1022 idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED); 1023 return (IDM_STATUS_FAIL); 1024 } 1025 1026 /* 1027 * iSCSIt's Data Completion Notify callback is invoked from 1028 * the Work Request Send completion Handler 1029 */ 1030 1031 mutex_exit(&idt->idt_mutex); 1032 return (idm_status); 1033 } 1034 1035 /* 1036 * iser_buf_alloc() allocates a buffer and registers it with the IBTF for 1037 * use with iSER. Each HCA has it's own kmem cache for establishing a pool 1038 * of registered buffers, when once initially allocated, will remain 1039 * registered with the HCA. This routine is invoked only on the target, 1040 * where we have the requirement to pre-allocate buffers for the upper layers. 1041 * Note: buflen is compared to ISER_DEFAULT_BUFLEN, and allocation is failed 1042 * if the requested buflen is larger than our default. 1043 */ 1044 /* ARGSUSED */ 1045 static idm_status_t 1046 iser_buf_alloc(idm_buf_t *idb, uint64_t buflen) 1047 { 1048 iser_conn_t *iser_conn; 1049 iser_hca_t *iser_hca; 1050 iser_buf_t *iser_buf; 1051 1052 if (buflen > ISER_DEFAULT_BUFLEN) { 1053 return (IDM_STATUS_FAIL); 1054 } 1055 1056 iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; 1057 iser_hca = iser_conn->ic_chan->ic_hca; 1058 1059 /* 1060 * Allocate a buffer from this HCA's cache. Once initialized, these 1061 * will remain allocated and registered (see above). 1062 */ 1063 iser_buf = kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP); 1064 if (iser_buf == NULL) { 1065 ISER_LOG(CE_NOTE, "iser_buf_alloc: alloc failed"); 1066 return (IDM_STATUS_FAIL); 1067 } 1068 1069 /* Set the allocated data buffer pointer in the IDM buf handle */ 1070 idb->idb_buf = iser_buf->buf; 1071 1072 /* Set the private buf and reg handles in the IDM buf handle */ 1073 idb->idb_buf_private = (void *)iser_buf; 1074 idb->idb_reg_private = (void *)iser_buf->iser_mr; 1075 1076 return (IDM_STATUS_SUCCESS); 1077 } 1078 1079 /* 1080 * iser_buf_free() frees the buffer handle passed in. Note that the cached 1081 * kmem object has an HCA-registered buffer in it which will not be freed. 1082 * This allows us to build up a cache of pre-allocated and registered 1083 * buffers for use on the target. 1084 */ 1085 static void 1086 iser_buf_free(idm_buf_t *buf) 1087 { 1088 iser_buf_t *iser_buf; 1089 1090 iser_buf = buf->idb_buf_private; 1091 kmem_cache_free(iser_buf->cache, iser_buf); 1092 } 1093 1094 /* 1095 * iser_buf_setup() is invoked on the initiator in order to register memory 1096 * on demand for use with the iSER layer. 1097 */ 1098 static idm_status_t 1099 iser_buf_setup(idm_buf_t *idb) 1100 { 1101 iser_conn_t *iser_conn; 1102 iser_chan_t *iser_chan; 1103 iser_hca_t *iser_hca; 1104 iser_buf_t *iser_buf; 1105 int status; 1106 1107 ASSERT(idb->idb_buf != NULL); 1108 1109 iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; 1110 ASSERT(iser_conn != NULL); 1111 1112 iser_hca = iser_conn->ic_chan->ic_hca; 1113 1114 iser_chan = iser_conn->ic_chan; 1115 ASSERT(iser_chan != NULL); 1116 1117 /* 1118 * Memory registration is known to be slow, so for small 1119 * transfers, use pre-registered memory buffers and just 1120 * copy the data into/from them at the appropriate time 1121 */ 1122 if (idb->idb_buflen < ISER_BCOPY_THRESHOLD) { 1123 iser_buf = 1124 kmem_cache_alloc(iser_hca->iser_buf_cache, KM_NOSLEEP); 1125 1126 if (iser_buf == NULL) { 1127 1128 /* Fail over to dynamic registration */ 1129 status = iser_reg_rdma_mem(iser_chan->ic_hca, idb); 1130 idb->idb_bufalloc = B_FALSE; 1131 return (status); 1132 } 1133 1134 /* 1135 * Set the allocated data buffer pointer in the IDM buf handle 1136 * Data is to be copied from/to this buffer using bcopy 1137 */ 1138 idb->idb_bufptr = idb->idb_buf; 1139 idb->idb_bufbcopy = B_TRUE; 1140 1141 idb->idb_buf = iser_buf->buf; 1142 1143 /* Set the private buf and reg handles in the IDM buf handle */ 1144 idb->idb_buf_private = (void *)iser_buf; 1145 idb->idb_reg_private = (void *)iser_buf->iser_mr; 1146 1147 /* Ensure bufalloc'd flag is set */ 1148 idb->idb_bufalloc = B_TRUE; 1149 1150 return (IDM_STATUS_SUCCESS); 1151 1152 } else { 1153 1154 /* Dynamically register the memory passed in on the idb */ 1155 status = iser_reg_rdma_mem(iser_chan->ic_hca, idb); 1156 1157 /* Ensure bufalloc'd flag is unset */ 1158 idb->idb_bufalloc = B_FALSE; 1159 1160 return (status); 1161 } 1162 } 1163 1164 /* 1165 * iser_buf_teardown() is invoked on the initiator in order to register memory 1166 * on demand for use with the iSER layer. 1167 */ 1168 static void 1169 iser_buf_teardown(idm_buf_t *idb) 1170 { 1171 iser_conn_t *iser_conn; 1172 1173 iser_conn = (iser_conn_t *)idb->idb_ic->ic_transport_private; 1174 1175 /* Deregister the memory passed in on the idb */ 1176 iser_dereg_rdma_mem(iser_conn->ic_chan->ic_hca, idb); 1177 } 1178