1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #include <sys/ib/clients/rds/rdsib_cm.h> 76 #include <sys/ib/clients/rds/rdsib_ib.h> 77 #include <sys/ib/clients/rds/rdsib_buf.h> 78 #include <sys/ib/clients/rds/rdsib_ep.h> 79 80 /* 81 * This file contains CM related work: 82 * 83 * Service registration/deregistration 84 * Path lookup 85 * CM connection callbacks 86 * CM active and passive connection establishment 87 * Connection failover 88 */ 89 90 #define SRCIP src_addr.un.ip4addr 91 #define DSTIP dst_addr.un.ip4addr 92 93 /* 94 * Handle an incoming CM REQ 95 */ 96 /* ARGSUSED */ 97 static ibt_cm_status_t 98 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, 99 ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len) 100 { 101 ibt_cm_req_rcv_t *reqp; 102 ib_gid_t lgid, rgid; 103 rds_cm_private_data_t cmp; 104 rds_session_t *sp; 105 rds_ep_t *ep; 106 ibt_channel_hdl_t chanhdl; 107 ibt_ip_cm_info_t ipcm_info; 108 uint8_t save_state, save_type; 109 int ret; 110 111 RDS_DPRINTF2("rds_handle_cm_req", "Enter"); 112 113 reqp = &evp->cm_event.req; 114 rgid = reqp->req_prim_addr.av_dgid; /* requester gid */ 115 lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */ 116 117 RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", 118 rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); 119 120 /* 121 * CM private data brings IP information 122 * Private data received is a stream of bytes and may not be properly 123 * aligned. So, bcopy the data onto the stack before accessing it. 124 */ 125 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 126 sizeof (rds_cm_private_data_t)); 127 128 /* extract the CM IP info */ 129 ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data, 130 &ipcm_info); 131 if (ret != IBT_SUCCESS) { 132 RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d", 133 ret); 134 return (IBT_CM_REJECT); 135 } 136 137 RDS_DPRINTF2("rds_handle_cm_req", 138 "REQ Received: From IP: 0x%x To IP: 0x%x type: %d", 139 ipcm_info.SRCIP, ipcm_info.DSTIP, cmp.cmp_eptype); 140 141 if (cmp.cmp_version != RDS_VERSION) { 142 RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d " 143 "Remote version: %d", RDS_VERSION, cmp.cmp_version); 144 return (IBT_CM_REJECT); 145 } 146 147 /* RDS supports V4 addresses only */ 148 if ((ipcm_info.src_addr.family != AF_INET) || 149 (ipcm_info.dst_addr.family != AF_INET)) { 150 RDS_DPRINTF2(LABEL, "Unsupported Address Family: " 151 "src: %d dst: %d", ipcm_info.src_addr.family, 152 ipcm_info.dst_addr.family); 153 return (IBT_CM_REJECT); 154 } 155 156 if (cmp.cmp_arch != RDS_THIS_ARCH) { 157 RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)", 158 cmp.cmp_arch, RDS_THIS_ARCH); 159 return (IBT_CM_REJECT); 160 } 161 162 if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) && 163 (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) { 164 RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype); 165 return (IBT_CM_REJECT); 166 } 167 168 /* user_buffer_size should be same on all nodes */ 169 if (cmp.cmp_user_buffer_size != UserBufferSize) { 170 RDS_DPRINTF2(LABEL, 171 "UserBufferSize Mismatch, this node: %d remote node: %d", 172 UserBufferSize, cmp.cmp_user_buffer_size); 173 return (IBT_CM_REJECT); 174 } 175 176 /* 177 * RDS needs more time to process a failover REQ so send an MRA. 178 * Otherwise, the remote may retry the REQ and fail the connection. 179 */ 180 if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) { 181 RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA"); 182 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 183 10000000 /* 10 sec */, NULL, 0); 184 } 185 186 /* Is there a session to the destination node? */ 187 rw_enter(&statep->rds_sessionlock, RW_READER); 188 sp = rds_session_lkup(statep, ipcm_info.SRCIP, rgid.gid_guid); 189 rw_exit(&statep->rds_sessionlock); 190 191 if (sp == NULL) { 192 /* 193 * currently there is no session to the destination 194 * remote ip in the private data is the local ip and vice 195 * versa 196 */ 197 sp = rds_session_create(statep, ipcm_info.DSTIP, 198 ipcm_info.SRCIP, reqp, RDS_SESSION_PASSIVE); 199 if (sp == NULL) { 200 /* Check the list anyway. */ 201 rw_enter(&statep->rds_sessionlock, RW_READER); 202 sp = rds_session_lkup(statep, ipcm_info.SRCIP, 203 rgid.gid_guid); 204 rw_exit(&statep->rds_sessionlock); 205 if (sp == NULL) { 206 /* 207 * The only way this can fail is due to lack 208 * of kernel resources 209 */ 210 return (IBT_CM_REJECT); 211 } 212 } 213 } 214 215 rw_enter(&sp->session_lock, RW_WRITER); 216 217 /* catch peer-to-peer case as soon as possible */ 218 if ((sp->session_state == RDS_SESSION_STATE_CREATED) || 219 (sp->session_state == RDS_SESSION_STATE_INIT)) { 220 /* Check possible peer-to-peer case here */ 221 if (sp->session_type != RDS_SESSION_PASSIVE) { 222 RDS_DPRINTF2("rds_handle_cm_req", 223 "SP(%p) Peer-peer connection handling", sp); 224 if (lgid.gid_guid > rgid.gid_guid) { 225 /* this node is active so reject this request */ 226 rw_exit(&sp->session_lock); 227 return (IBT_CM_REJECT); 228 } else { 229 /* this node is passive, change the session */ 230 sp->session_type = RDS_SESSION_PASSIVE; 231 sp->session_lgid = lgid; 232 sp->session_rgid = rgid; 233 } 234 } 235 } 236 237 RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state); 238 save_state = sp->session_state; 239 save_type = sp->session_type; 240 241 switch (sp->session_state) { 242 case RDS_SESSION_STATE_CONNECTED: 243 RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp); 244 sp->session_state = RDS_SESSION_STATE_ERROR; 245 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 246 "RDS_SESSION_STATE_ERROR", sp); 247 248 /* FALLTHRU */ 249 case RDS_SESSION_STATE_ERROR: 250 case RDS_SESSION_STATE_PASSIVE_CLOSING: 251 /* 252 * Some other thread must be processing this session, 253 * this thread must wait until the other thread finishes. 254 */ 255 sp->session_type = RDS_SESSION_PASSIVE; 256 rw_exit(&sp->session_lock); 257 258 /* Handling this will take some time, so send an MRA */ 259 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 260 10000000 /* 10 sec */, NULL, 0); 261 262 /* 263 * Any pending completions don't get flushed until the channel 264 * is closed. So, passing 0 here will not wait for pending 265 * completions in rds_session_close before closing the channel 266 */ 267 rds_session_close(sp, IBT_NOCALLBACKS, 0); 268 269 rw_enter(&sp->session_lock, RW_WRITER); 270 271 /* 272 * If the session was in ERROR, then either a failover thread 273 * or event_failure thread would be processing this session. 274 * This thread should wait for event_failure thread to 275 * complete. This need not wait for failover thread. 276 */ 277 if ((save_state != RDS_SESSION_STATE_CONNECTED) && 278 (save_type == RDS_SESSION_PASSIVE)) { 279 /* 280 * The other thread is event_failure thread, 281 * wait until it finishes. 282 */ 283 while (!((sp->session_state == 284 RDS_SESSION_STATE_FAILED) || 285 (sp->session_state == 286 RDS_SESSION_STATE_FINI))) { 287 rw_exit(&sp->session_lock); 288 delay(drv_usectohz(1000000)); 289 rw_enter(&sp->session_lock, RW_WRITER); 290 } 291 } 292 293 /* move the session to init state */ 294 if ((sp->session_state == RDS_SESSION_STATE_ERROR) || 295 (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING)) { 296 ret = rds_session_reinit(sp, lgid); 297 sp->session_myip = ipcm_info.DSTIP; 298 sp->session_lgid = lgid; 299 sp->session_rgid = rgid; 300 if (ret != 0) { 301 rds_session_fini(sp); 302 sp->session_state = RDS_SESSION_STATE_FAILED; 303 RDS_DPRINTF3("rds_handle_cm_req", 304 "SP(%p) State RDS_SESSION_STATE_FAILED", 305 sp); 306 rw_exit(&sp->session_lock); 307 return (IBT_CM_REJECT); 308 } else { 309 sp->session_state = RDS_SESSION_STATE_INIT; 310 RDS_DPRINTF3("rds_handle_cm_req", 311 "SP(%p) State RDS_SESSION_STATE_INIT", sp); 312 } 313 314 if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) { 315 ep = &sp->session_ctrlep; 316 } else { 317 ep = &sp->session_dataep; 318 } 319 break; 320 } 321 322 /* FALLTHRU */ 323 case RDS_SESSION_STATE_CREATED: 324 case RDS_SESSION_STATE_FAILED: 325 case RDS_SESSION_STATE_FINI: 326 /* 327 * Initialize both channels, we accept this connection 328 * only if both channels are initialized 329 */ 330 sp->session_type = RDS_SESSION_PASSIVE; 331 sp->session_lgid = lgid; 332 sp->session_rgid = rgid; 333 sp->session_state = RDS_SESSION_STATE_CREATED; 334 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 335 "RDS_SESSION_STATE_CREATED", sp); 336 ret = rds_session_init(sp); 337 if (ret != 0) { 338 /* Seems like there are not enough resources */ 339 sp->session_state = RDS_SESSION_STATE_FAILED; 340 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 341 "RDS_SESSION_STATE_FAILED", sp); 342 rw_exit(&sp->session_lock); 343 return (IBT_CM_REJECT); 344 } 345 sp->session_state = RDS_SESSION_STATE_INIT; 346 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 347 "RDS_SESSION_STATE_INIT", sp); 348 349 /* FALLTHRU */ 350 case RDS_SESSION_STATE_INIT: 351 /* 352 * When re-using an existing session, make sure the 353 * session is still through the same HCA. Otherwise, the 354 * memory registrations have to moved to the new HCA. 355 */ 356 if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) { 357 if (sp->session_lgid.gid_guid != lgid.gid_guid) { 358 RDS_DPRINTF2("rds_handle_cm_req", 359 "Existing Session but different gid " 360 "existing: 0x%llx, new: 0x%llx, " 361 "sending an MRA", 362 sp->session_lgid.gid_guid, lgid.gid_guid); 363 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, 364 evp->cm_session_id, 10000000 /* 10 sec */, 365 NULL, 0); 366 ret = rds_session_reinit(sp, lgid); 367 if (ret != 0) { 368 rds_session_fini(sp); 369 sp->session_state = 370 RDS_SESSION_STATE_FAILED; 371 sp->session_failover = 0; 372 RDS_DPRINTF3("rds_failover_session", 373 "SP(%p) State " 374 "RDS_SESSION_STATE_FAILED", sp); 375 rw_exit(&sp->session_lock); 376 return (IBT_CM_REJECT); 377 } 378 } 379 ep = &sp->session_dataep; 380 } else { 381 ep = &sp->session_ctrlep; 382 } 383 384 break; 385 default: 386 RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected " 387 "state: %d", sp, sp->session_state); 388 rw_exit(&sp->session_lock); 389 return (IBT_CM_REJECT); 390 } 391 392 sp->session_failover = 0; /* reset any previous value */ 393 if (cmp.cmp_failover) { 394 RDS_DPRINTF2("rds_handle_cm_req", 395 "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid); 396 sp->session_failover = 1; 397 } 398 399 mutex_enter(&ep->ep_lock); 400 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 401 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 402 sp->session_type = RDS_SESSION_PASSIVE; 403 rw_exit(&sp->session_lock); 404 } else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 405 rw_exit(&sp->session_lock); 406 /* 407 * Peer to peer connection. There is an active 408 * connection pending on this ep. The one with 409 * greater port guid becomes active and the 410 * other becomes passive. 411 */ 412 RDS_DPRINTF2("rds_handle_cm_req", 413 "EP(%p) Peer-peer connection handling", ep); 414 if (lgid.gid_guid > rgid.gid_guid) { 415 /* this node is active so reject this request */ 416 mutex_exit(&ep->ep_lock); 417 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): " 418 "Rejecting passive in favor of active", sp, ep); 419 return (IBT_CM_REJECT); 420 } else { 421 /* 422 * This session is not the active end, change it 423 * to passive end. 424 */ 425 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 426 427 rw_enter(&sp->session_lock, RW_WRITER); 428 sp->session_type = RDS_SESSION_PASSIVE; 429 sp->session_lgid = lgid; 430 sp->session_rgid = rgid; 431 rw_exit(&sp->session_lock); 432 } 433 } else { 434 rw_exit(&sp->session_lock); 435 } 436 437 ep->ep_lbufid = cmp.cmp_last_bufid; 438 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 439 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 440 cmp.cmp_last_bufid = ep->ep_rbufid; 441 cmp.cmp_ack_addr = ep->ep_ack_addr; 442 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 443 mutex_exit(&ep->ep_lock); 444 445 /* continue with accepting the connection request for this channel */ 446 chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port); 447 if (chanhdl == NULL) { 448 mutex_enter(&ep->ep_lock); 449 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 450 mutex_exit(&ep->ep_lock); 451 return (IBT_CM_REJECT); 452 } 453 454 /* pre-post recv buffers in the RQ */ 455 rds_post_recv_buf((void *)chanhdl); 456 457 rargsp->cm_ret_len = sizeof (rds_cm_private_data_t); 458 bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t)); 459 rargsp->cm_ret.rep.cm_channel = chanhdl; 460 rargsp->cm_ret.rep.cm_rdma_ra_out = 4; 461 rargsp->cm_ret.rep.cm_rdma_ra_in = 4; 462 rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry; 463 464 RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)", 465 sp, ep, chanhdl); 466 467 return (IBT_CM_ACCEPT); 468 } 469 470 /* 471 * Handle an incoming CM REP 472 * Pre-post recv buffers for the QP 473 */ 474 /* ARGSUSED */ 475 static ibt_cm_status_t 476 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, 477 void *rcmp, ibt_priv_data_len_t rcmp_len) 478 { 479 rds_ep_t *ep; 480 rds_cm_private_data_t cmp; 481 482 RDS_DPRINTF2("rds_handle_cm_rep", "Enter"); 483 484 /* pre-post recv buffers in the RQ */ 485 rds_post_recv_buf((void *)evp->cm_channel); 486 487 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 488 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 489 sizeof (rds_cm_private_data_t)); 490 ep->ep_lbufid = cmp.cmp_last_bufid; 491 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 492 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 493 494 rargsp->cm_ret_len = 0; 495 496 RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid); 497 498 return (IBT_CM_ACCEPT); 499 } 500 501 /* 502 * Handle CONN EST 503 */ 504 static ibt_cm_status_t 505 rds_handle_cm_conn_est(ibt_cm_event_t *evp) 506 { 507 rds_session_t *sp; 508 rds_ep_t *ep; 509 510 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 511 512 RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep, 513 ep->ep_state); 514 515 mutex_enter(&ep->ep_lock); 516 ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) || 517 (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING)); 518 ep->ep_state = RDS_EP_STATE_CONNECTED; 519 ep->ep_chanhdl = evp->cm_channel; 520 sp = ep->ep_sp; 521 mutex_exit(&ep->ep_lock); 522 523 (void) rds_session_active(sp); 524 525 RDS_DPRINTF2("rds_handle_cm_conn_est", "Return"); 526 return (IBT_CM_ACCEPT); 527 } 528 529 /* 530 * Handle CONN CLOSED 531 */ 532 static ibt_cm_status_t 533 rds_handle_cm_conn_closed(ibt_cm_event_t *evp) 534 { 535 rds_ep_t *ep; 536 rds_session_t *sp; 537 538 /* Catch DREQs but ignore DREPs */ 539 if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) { 540 RDS_DPRINTF2("rds_handle_cm_conn_closed", 541 "Ignoring Event: %d received", evp->cm_event.closed); 542 return (IBT_CM_ACCEPT); 543 } 544 545 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 546 sp = ep->ep_sp; 547 RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Chan(%p) Enter", 548 ep, evp->cm_channel); 549 550 mutex_enter(&ep->ep_lock); 551 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 552 /* Ignore this DREQ */ 553 RDS_DPRINTF2("rds_handle_cm_conn_closed", 554 "EP(%p) not connected, state: %d", ep, ep->ep_state); 555 mutex_exit(&ep->ep_lock); 556 return (IBT_CM_ACCEPT); 557 } 558 ep->ep_state = RDS_EP_STATE_CLOSING; 559 mutex_exit(&ep->ep_lock); 560 561 rw_enter(&sp->session_lock, RW_WRITER); 562 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp, 563 sp->session_state); 564 565 switch (sp->session_state) { 566 case RDS_SESSION_STATE_CONNECTED: 567 case RDS_SESSION_STATE_HCA_CLOSING: 568 sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING; 569 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 570 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 571 break; 572 573 case RDS_SESSION_STATE_PASSIVE_CLOSING: 574 sp->session_state = RDS_SESSION_STATE_CLOSED; 575 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 576 "RDS_SESSION_STATE_CLOSED", sp); 577 rds_passive_session_fini(sp); 578 sp->session_state = RDS_SESSION_STATE_FINI; 579 RDS_DPRINTF3("rds_handle_cm_conn_closed", 580 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 581 break; 582 583 case RDS_SESSION_STATE_ACTIVE_CLOSING: 584 case RDS_SESSION_STATE_ERROR: 585 case RDS_SESSION_STATE_CLOSED: 586 break; 587 588 case RDS_SESSION_STATE_INIT: 589 sp->session_state = RDS_SESSION_STATE_ERROR; 590 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 591 "RDS_SESSION_STATE_ERROR", sp); 592 rds_passive_session_fini(sp); 593 sp->session_state = RDS_SESSION_STATE_FAILED; 594 RDS_DPRINTF3("rds_handle_cm_conn_closed", 595 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 596 break; 597 598 default: 599 RDS_DPRINTF2("rds_handle_cm_conn_closed", 600 "SP(%p) - Unexpected state: %d", sp, sp->session_state); 601 rds_passive_session_fini(sp); 602 sp->session_state = RDS_SESSION_STATE_FAILED; 603 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 604 "RDS_SESSION_STATE_FAILED", sp); 605 } 606 rw_exit(&sp->session_lock); 607 608 mutex_enter(&ep->ep_lock); 609 ep->ep_state = RDS_EP_STATE_CLOSED; 610 mutex_exit(&ep->ep_lock); 611 612 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp); 613 return (IBT_CM_ACCEPT); 614 } 615 616 /* 617 * Handle EVENT FAILURE 618 */ 619 static ibt_cm_status_t 620 rds_handle_cm_event_failure(ibt_cm_event_t *evp) 621 { 622 rds_ep_t *ep; 623 rds_session_t *sp; 624 int ret; 625 626 RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p " 627 "Code: %d msg: %d reason: %d", evp->cm_channel, 628 evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg, 629 evp->cm_event.failed.cf_reason); 630 631 if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) { 632 RDS_DPRINTF2(LABEL, 633 "Received REJ with reason IBT_CM_INVALID_SID: " 634 "RDS may not be loaded on the remote system"); 635 } 636 637 if (evp->cm_channel == NULL) { 638 return (IBT_CM_ACCEPT); 639 } 640 641 if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) && 642 (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) { 643 /* 644 * This end is active, just ignore, ibt_open_rc_channel() 645 * caller will take care of cleanup. 646 */ 647 RDS_DPRINTF2("rds_handle_cm_event_failure", 648 "Ignoring this event: Chan hdl: 0x%p", evp->cm_channel); 649 return (IBT_CM_ACCEPT); 650 } 651 652 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 653 sp = ep->ep_sp; 654 655 rw_enter(&sp->session_lock, RW_WRITER); 656 if (sp->session_type == RDS_SESSION_PASSIVE) { 657 RDS_DPRINTF2("rds_handle_cm_event_failure", 658 "SP(%p) - state: %d", sp, sp->session_state); 659 if ((sp->session_state == RDS_SESSION_STATE_INIT) || 660 (sp->session_state == RDS_SESSION_STATE_CONNECTED)) { 661 sp->session_state = RDS_SESSION_STATE_ERROR; 662 RDS_DPRINTF3("rds_handle_cm_event_failure", 663 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 664 665 /* 666 * Store the cm_channel for freeing later 667 * Active side frees it on ibt_open_rc_channel 668 * failure 669 */ 670 if (ep->ep_chanhdl == NULL) { 671 ep->ep_chanhdl = evp->cm_channel; 672 } 673 rw_exit(&sp->session_lock); 674 675 /* 676 * rds_passive_session_fini should not be called 677 * directly in the CM handler. It will cause a deadlock. 678 */ 679 ret = ddi_taskq_dispatch(rds_taskq, 680 rds_cleanup_passive_session, (void *)sp, 681 DDI_NOSLEEP); 682 if (ret != DDI_SUCCESS) { 683 RDS_DPRINTF2("rds_handle_cm_event_failure", 684 "SP(%p) TaskQ dispatch FAILED:%d", sp, ret); 685 } 686 return (IBT_CM_ACCEPT); 687 } 688 } 689 rw_exit(&sp->session_lock); 690 691 RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp); 692 return (IBT_CM_ACCEPT); 693 } 694 695 /* 696 * CM Handler 697 * 698 * Called by IBCM 699 * The cm_private type differs for active and passive events. 700 */ 701 ibt_cm_status_t 702 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp, 703 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 704 ibt_priv_data_len_t ret_len_max) 705 { 706 ibt_cm_status_t ret = IBT_CM_ACCEPT; 707 708 RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type); 709 710 switch (eventp->cm_type) { 711 case IBT_CM_EVENT_REQ_RCV: 712 ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp, 713 ret_args, ret_priv_data, ret_len_max); 714 break; 715 case IBT_CM_EVENT_REP_RCV: 716 ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data, 717 ret_len_max); 718 break; 719 case IBT_CM_EVENT_MRA_RCV: 720 /* Not supported */ 721 break; 722 case IBT_CM_EVENT_CONN_EST: 723 ret = rds_handle_cm_conn_est(eventp); 724 break; 725 case IBT_CM_EVENT_CONN_CLOSED: 726 ret = rds_handle_cm_conn_closed(eventp); 727 break; 728 case IBT_CM_EVENT_FAILURE: 729 ret = rds_handle_cm_event_failure(eventp); 730 break; 731 case IBT_CM_EVENT_LAP_RCV: 732 /* Not supported */ 733 RDS_DPRINTF2(LABEL, "LAP message received"); 734 break; 735 case IBT_CM_EVENT_APR_RCV: 736 /* Not supported */ 737 RDS_DPRINTF2(LABEL, "APR message received"); 738 break; 739 default: 740 break; 741 } 742 743 RDS_DPRINTF2("rds_cm_handler", "Return"); 744 745 return (ret); 746 } 747 748 /* This is based on OFED Linux RDS */ 749 #define RDS_PORT_NUM 6556 750 751 /* 752 * Register the wellknown service with service id: RDS_SERVICE_ID 753 * Incoming connection requests should arrive on this service id. 754 */ 755 ibt_srv_hdl_t 756 rds_register_service(ibt_clnt_hdl_t rds_ibhdl) 757 { 758 ibt_srv_hdl_t srvhdl; 759 ibt_srv_desc_t srvdesc; 760 int ret; 761 762 RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl); 763 764 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 765 srvdesc.sd_handler = rds_cm_handler; 766 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 767 768 /* 769 * This is the new service id as per: 770 * Annex A11: RDMA IP CM Service 771 */ 772 rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP, 773 RDS_PORT_NUM); 774 ret = ibt_register_service(rds_ibhdl, &srvdesc, 775 rdsib_statep->rds_service_id, 1, &srvhdl, NULL); 776 if (ret != IBT_SUCCESS) { 777 RDS_DPRINTF2(LABEL, 778 "RDS Service (0x%llx) Registration Failed: %d", 779 rdsib_statep->rds_service_id, ret); 780 return (NULL); 781 } 782 783 RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl); 784 return (srvhdl); 785 } 786 787 /* Bind the RDS service on all ports */ 788 int 789 rds_bind_service(rds_state_t *statep) 790 { 791 rds_hca_t *hcap; 792 ib_gid_t gid; 793 uint_t jx, nbinds = 0, nports = 0; 794 int ret; 795 796 RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep); 797 798 rw_enter(&statep->rds_hca_lock, RW_READER); 799 800 hcap = statep->rds_hcalistp; 801 while (hcap != NULL) { 802 803 /* skip the HCAs that are not fully online */ 804 if ((hcap->hca_state != RDS_HCA_STATE_OPEN) && 805 (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED)) { 806 RDS_DPRINTF2("rds_bind_service", 807 "Skipping HCA: 0x%llx, state: %d", 808 hcap->hca_guid, hcap->hca_state); 809 hcap = hcap->hca_nextp; 810 continue; 811 } 812 813 /* currently, we have space for only 4 bindhdls */ 814 ASSERT(hcap->hca_nports < 4); 815 for (jx = 0; jx < hcap->hca_nports; jx++) { 816 nports++; 817 if (hcap->hca_pinfop[jx].p_linkstate != 818 IBT_PORT_ACTIVE) { 819 /* 820 * service bind will be called in the async 821 * handler when the port comes up. Clear any 822 * stale bind handle. 823 */ 824 hcap->hca_bindhdl[jx] = NULL; 825 continue; 826 } 827 828 gid = hcap->hca_pinfop[jx].p_sgid_tbl[0]; 829 RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d " 830 "gid: %llx:%llx", hcap->hca_guid, 831 hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix, 832 gid.gid_guid); 833 834 /* pass statep as cm_private */ 835 ret = ibt_bind_service(statep->rds_srvhdl, gid, 836 NULL, statep, &hcap->hca_bindhdl[jx]); 837 if (ret != IBT_SUCCESS) { 838 RDS_DPRINTF2(LABEL, "Bind service for " 839 "HCA: 0x%llx Port: %d gid %llx:%llx " 840 "failed: %d", hcap->hca_guid, 841 hcap->hca_pinfop[jx].p_port_num, 842 gid.gid_prefix, gid.gid_guid, ret); 843 continue; 844 } 845 846 nbinds++; 847 } 848 hcap = hcap->hca_nextp; 849 } 850 851 rw_exit(&statep->rds_hca_lock); 852 853 RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports", 854 nbinds, nports); 855 856 #if 0 857 if (nbinds == 0) { 858 return (-1); 859 } 860 #endif 861 862 RDS_DPRINTF2("rds_bind_service", "Return"); 863 864 return (0); 865 } 866 867 /* Open an RC connection */ 868 int 869 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo, 870 ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl) 871 { 872 rds_session_t *sp; 873 ibt_chan_open_args_t ocargs; 874 ibt_rc_returns_t ocrets; 875 rds_cm_private_data_t cmp; 876 uint8_t hca_port; 877 ibt_channel_hdl_t hdl; 878 ibt_status_t ret = 0; 879 ibt_ip_cm_info_t ipcm_info; 880 881 RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode); 882 883 sp = ep->ep_sp; 884 885 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 886 ipcm_info.src_addr.family = AF_INET; 887 ipcm_info.SRCIP = sp->session_myip; 888 ipcm_info.dst_addr.family = AF_INET; 889 ipcm_info.DSTIP = sp->session_remip; 890 ipcm_info.src_port = RDS_PORT_NUM; 891 ret = ibt_format_ip_private_data(&ipcm_info, 892 sizeof (rds_cm_private_data_t), &cmp); 893 if (ret != IBT_SUCCESS) { 894 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data " 895 "failed: %d", sp, ep, ret); 896 return (-1); 897 } 898 899 hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num; 900 901 hdl = rds_ep_alloc_rc_channel(ep, hca_port); 902 if (hdl == NULL) { 903 return (-1); 904 } 905 906 cmp.cmp_version = RDS_VERSION; 907 cmp.cmp_arch = RDS_THIS_ARCH; 908 cmp.cmp_eptype = ep->ep_type; 909 cmp.cmp_failover = sp->session_failover; 910 cmp.cmp_last_bufid = ep->ep_rbufid; 911 cmp.cmp_user_buffer_size = UserBufferSize; 912 cmp.cmp_ack_addr = ep->ep_ack_addr; 913 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 914 915 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 916 bzero(&ocrets, sizeof (ibt_rc_returns_t)); 917 ocargs.oc_path = pinfo; 918 ocargs.oc_cm_handler = rds_cm_handler; 919 ocargs.oc_cm_clnt_private = NULL; 920 ocargs.oc_rdma_ra_out = 4; 921 ocargs.oc_rdma_ra_in = 4; 922 ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t); 923 ocargs.oc_priv_data = &cmp; 924 ocargs.oc_path_retry_cnt = IBPathRetryCount; 925 ocargs.oc_path_rnr_retry_cnt = MinRnrRetry; 926 ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS, 927 mode, &ocargs, &ocrets); 928 if (ret != IBT_SUCCESS) { 929 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel " 930 "failed: %d", sp, ep, ret); 931 (void) ibt_flush_channel(hdl); 932 (void) ibt_free_channel(hdl); 933 934 mutex_enter(&ep->ep_lock); 935 /* don't cleanup if this failure is due to peer-peer race */ 936 if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 937 /* cleanup stuff allocated in rds_ep_alloc_rc_channel */ 938 ep->ep_state = RDS_EP_STATE_ERROR; 939 rds_ep_free_rc_channel(ep); 940 } 941 mutex_exit(&ep->ep_lock); 942 943 return (-1); 944 } 945 946 *chanhdl = hdl; 947 948 RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep, 949 *chanhdl); 950 951 return (0); 952 } 953 954 int 955 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode) 956 { 957 int ret; 958 959 RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)", 960 chanhdl, mode); 961 962 ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0); 963 964 RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl); 965 966 return (ret); 967 } 968