1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #include <sys/ib/clients/rds/rdsib_cm.h> 76 #include <sys/ib/clients/rds/rdsib_ib.h> 77 #include <sys/ib/clients/rds/rdsib_buf.h> 78 #include <sys/ib/clients/rds/rdsib_ep.h> 79 80 /* 81 * This file contains CM related work: 82 * 83 * Service registration/deregistration 84 * Path lookup 85 * CM connection callbacks 86 * CM active and passive connection establishment 87 * Connection failover 88 */ 89 90 #define SRCIP src_addr.un.ip4addr 91 #define DSTIP dst_addr.un.ip4addr 92 93 /* 94 * Handle an incoming CM REQ 95 */ 96 /* ARGSUSED */ 97 static ibt_cm_status_t 98 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, 99 ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len) 100 { 101 ibt_cm_req_rcv_t *reqp; 102 ib_gid_t lgid, rgid; 103 rds_cm_private_data_t cmp; 104 rds_session_t *sp; 105 rds_ep_t *ep; 106 ibt_channel_hdl_t chanhdl; 107 ibt_ip_cm_info_t ipcm_info; 108 int ret; 109 110 RDS_DPRINTF2("rds_handle_cm_req", "Enter"); 111 112 reqp = &evp->cm_event.req; 113 rgid = reqp->req_prim_addr.av_dgid; /* requester gid */ 114 lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */ 115 116 RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", 117 rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); 118 119 /* validate service id */ 120 if (reqp->req_service_id == RDS_SERVICE_ID) { 121 RDS_DPRINTF2(LABEL, "Version Mismatch: Remote system " 122 "(GUID: 0x%llx) is running an older version of RDS", 123 rgid.gid_guid); 124 return (IBT_CM_REJECT); 125 } 126 127 /* 128 * CM private data brings IP information 129 * Private data received is a stream of bytes and may not be properly 130 * aligned. So, bcopy the data onto the stack before accessing it. 131 */ 132 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 133 sizeof (rds_cm_private_data_t)); 134 135 /* extract the CM IP info */ 136 ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data, 137 &ipcm_info); 138 if (ret != IBT_SUCCESS) { 139 RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d", 140 ret); 141 return (IBT_CM_REJECT); 142 } 143 144 RDS_DPRINTF2("rds_handle_cm_req", 145 "REQ Received: From IP: 0x%x To IP: 0x%x type: %d", 146 ipcm_info.SRCIP, ipcm_info.DSTIP, cmp.cmp_eptype); 147 148 if (cmp.cmp_version != RDS_VERSION) { 149 RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d " 150 "Remote version: %d", RDS_VERSION, cmp.cmp_version); 151 return (IBT_CM_REJECT); 152 } 153 154 /* RDS supports V4 addresses only */ 155 if ((ipcm_info.src_addr.family != AF_INET) || 156 (ipcm_info.dst_addr.family != AF_INET)) { 157 RDS_DPRINTF2(LABEL, "Unsupported Address Family: " 158 "src: %d dst: %d", ipcm_info.src_addr.family, 159 ipcm_info.dst_addr.family); 160 return (IBT_CM_REJECT); 161 } 162 163 if (cmp.cmp_arch != RDS_THIS_ARCH) { 164 RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)", 165 cmp.cmp_arch, RDS_THIS_ARCH); 166 return (IBT_CM_REJECT); 167 } 168 169 if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) && 170 (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) { 171 RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype); 172 return (IBT_CM_REJECT); 173 } 174 175 /* user_buffer_size should be same on all nodes */ 176 if (cmp.cmp_user_buffer_size != UserBufferSize) { 177 RDS_DPRINTF2(LABEL, 178 "UserBufferSize Mismatch, this node: %d remote node: %d", 179 UserBufferSize, cmp.cmp_user_buffer_size); 180 return (IBT_CM_REJECT); 181 } 182 183 /* 184 * RDS needs more time to process a failover REQ so send an MRA. 185 * Otherwise, the remote may retry the REQ and fail the connection. 186 */ 187 if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) { 188 RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA"); 189 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 190 10000000 /* 10 sec */, NULL, 0); 191 } 192 193 /* Is there a session to the destination node? */ 194 rw_enter(&statep->rds_sessionlock, RW_READER); 195 sp = rds_session_lkup(statep, ipcm_info.SRCIP, rgid.gid_guid); 196 rw_exit(&statep->rds_sessionlock); 197 198 if (sp == NULL) { 199 /* 200 * currently there is no session to the destination 201 * remote ip in the private data is the local ip and vice 202 * versa 203 */ 204 sp = rds_session_create(statep, ipcm_info.DSTIP, 205 ipcm_info.SRCIP, reqp, RDS_SESSION_PASSIVE); 206 if (sp == NULL) { 207 /* Check the list anyway. */ 208 rw_enter(&statep->rds_sessionlock, RW_READER); 209 sp = rds_session_lkup(statep, ipcm_info.SRCIP, 210 rgid.gid_guid); 211 rw_exit(&statep->rds_sessionlock); 212 if (sp == NULL) { 213 /* 214 * The only way this can fail is due to lack 215 * of kernel resources 216 */ 217 return (IBT_CM_REJECT); 218 } 219 } 220 } 221 222 rw_enter(&sp->session_lock, RW_WRITER); 223 224 /* catch peer-to-peer case as soon as possible */ 225 if ((sp->session_state == RDS_SESSION_STATE_CREATED) || 226 (sp->session_state == RDS_SESSION_STATE_INIT)) { 227 /* Check possible peer-to-peer case here */ 228 if (sp->session_type != RDS_SESSION_PASSIVE) { 229 RDS_DPRINTF2("rds_handle_cm_req", 230 "SP(%p) Peer-peer connection handling", sp); 231 if (lgid.gid_guid > rgid.gid_guid) { 232 /* this node is active so reject this request */ 233 rw_exit(&sp->session_lock); 234 return (IBT_CM_REJECT); 235 } else { 236 /* this node is passive, change the session */ 237 sp->session_type = RDS_SESSION_PASSIVE; 238 sp->session_lgid = lgid; 239 sp->session_rgid = rgid; 240 } 241 } 242 } 243 244 RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state); 245 246 switch (sp->session_state) { 247 case RDS_SESSION_STATE_CONNECTED: 248 RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp); 249 sp->session_state = RDS_SESSION_STATE_ERROR; 250 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 251 "RDS_SESSION_STATE_ERROR", sp); 252 253 /* FALLTHRU */ 254 case RDS_SESSION_STATE_ERROR: 255 case RDS_SESSION_STATE_PASSIVE_CLOSING: 256 sp->session_type = RDS_SESSION_PASSIVE; 257 rw_exit(&sp->session_lock); 258 259 /* Handling this will take some time, so send an MRA */ 260 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 261 10000000 /* 10 sec */, NULL, 0); 262 263 /* 264 * Any pending completions don't get flushed until the channel 265 * is closed. So, passing 0 here will not wait for pending 266 * completions in rds_session_close before closing the channel 267 */ 268 rds_session_close(sp, IBT_NOCALLBACKS, 0); 269 270 /* move the session to init state */ 271 rw_enter(&sp->session_lock, RW_WRITER); 272 ret = rds_session_reinit(sp, lgid); 273 sp->session_myip = ipcm_info.DSTIP; 274 sp->session_lgid = lgid; 275 sp->session_rgid = rgid; 276 if (ret != 0) { 277 rds_session_fini(sp); 278 sp->session_state = RDS_SESSION_STATE_FAILED; 279 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 280 "RDS_SESSION_STATE_FAILED", sp); 281 rw_exit(&sp->session_lock); 282 return (IBT_CM_REJECT); 283 } else { 284 sp->session_state = RDS_SESSION_STATE_INIT; 285 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 286 "RDS_SESSION_STATE_INIT", sp); 287 } 288 289 if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) { 290 ep = &sp->session_ctrlep; 291 } else { 292 ep = &sp->session_dataep; 293 } 294 break; 295 case RDS_SESSION_STATE_CREATED: 296 case RDS_SESSION_STATE_FAILED: 297 case RDS_SESSION_STATE_FINI: 298 /* 299 * Initialize both channels, we accept this connection 300 * only if both channels are initialized 301 */ 302 sp->session_type = RDS_SESSION_PASSIVE; 303 sp->session_lgid = lgid; 304 sp->session_rgid = rgid; 305 sp->session_state = RDS_SESSION_STATE_CREATED; 306 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 307 "RDS_SESSION_STATE_CREATED", sp); 308 ret = rds_session_init(sp); 309 if (ret != 0) { 310 /* Seems like there are not enough resources */ 311 sp->session_state = RDS_SESSION_STATE_FAILED; 312 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 313 "RDS_SESSION_STATE_FAILED", sp); 314 rw_exit(&sp->session_lock); 315 return (IBT_CM_REJECT); 316 } 317 sp->session_state = RDS_SESSION_STATE_INIT; 318 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 319 "RDS_SESSION_STATE_INIT", sp); 320 321 /* FALLTHRU */ 322 case RDS_SESSION_STATE_INIT: 323 /* 324 * When re-using an existing session, make sure the 325 * session is still through the same HCA. Otherwise, the 326 * memory registrations have to moved to the new HCA. 327 */ 328 if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) { 329 if (sp->session_lgid.gid_guid != lgid.gid_guid) { 330 RDS_DPRINTF2("rds_handle_cm_req", 331 "Existing Session but different gid " 332 "existing: 0x%llx, new: 0x%llx, " 333 "sending an MRA", 334 sp->session_lgid.gid_guid, lgid.gid_guid); 335 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, 336 evp->cm_session_id, 10000000 /* 10 sec */, 337 NULL, 0); 338 ret = rds_session_reinit(sp, lgid); 339 if (ret != 0) { 340 rds_session_fini(sp); 341 sp->session_state = 342 RDS_SESSION_STATE_FAILED; 343 sp->session_failover = 0; 344 RDS_DPRINTF3("rds_failover_session", 345 "SP(%p) State " 346 "RDS_SESSION_STATE_FAILED", sp); 347 rw_exit(&sp->session_lock); 348 return (IBT_CM_REJECT); 349 } 350 } 351 ep = &sp->session_dataep; 352 } else { 353 ep = &sp->session_ctrlep; 354 } 355 356 break; 357 default: 358 RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected " 359 "state: %d", sp, sp->session_state); 360 rw_exit(&sp->session_lock); 361 return (IBT_CM_REJECT); 362 } 363 364 sp->session_failover = 0; /* reset any previous value */ 365 if (cmp.cmp_failover) { 366 RDS_DPRINTF2("rds_handle_cm_req", 367 "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid); 368 sp->session_failover = 1; 369 } 370 371 mutex_enter(&ep->ep_lock); 372 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 373 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 374 sp->session_type = RDS_SESSION_PASSIVE; 375 rw_exit(&sp->session_lock); 376 } else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 377 rw_exit(&sp->session_lock); 378 /* 379 * Peer to peer connection. There is an active 380 * connection pending on this ep. The one with 381 * greater port guid becomes active and the 382 * other becomes passive. 383 */ 384 RDS_DPRINTF2("rds_handle_cm_req", 385 "EP(%p) Peer-peer connection handling", ep); 386 if (lgid.gid_guid > rgid.gid_guid) { 387 /* this node is active so reject this request */ 388 mutex_exit(&ep->ep_lock); 389 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): " 390 "Rejecting passive in favor of active", sp, ep); 391 return (IBT_CM_REJECT); 392 } else { 393 /* 394 * This session is not the active end, change it 395 * to passive end. 396 */ 397 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 398 399 rw_enter(&sp->session_lock, RW_WRITER); 400 sp->session_type = RDS_SESSION_PASSIVE; 401 sp->session_lgid = lgid; 402 sp->session_rgid = rgid; 403 rw_exit(&sp->session_lock); 404 } 405 } else { 406 rw_exit(&sp->session_lock); 407 } 408 409 ep->ep_lbufid = cmp.cmp_last_bufid; 410 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 411 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 412 cmp.cmp_last_bufid = ep->ep_rbufid; 413 cmp.cmp_ack_addr = ep->ep_ack_addr; 414 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 415 mutex_exit(&ep->ep_lock); 416 417 /* continue with accepting the connection request for this channel */ 418 chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port); 419 if (chanhdl == NULL) { 420 mutex_enter(&ep->ep_lock); 421 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 422 mutex_exit(&ep->ep_lock); 423 return (IBT_CM_REJECT); 424 } 425 426 /* pre-post recv buffers in the RQ */ 427 rds_post_recv_buf((void *)chanhdl); 428 429 rargsp->cm_ret_len = sizeof (rds_cm_private_data_t); 430 bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t)); 431 rargsp->cm_ret.rep.cm_channel = chanhdl; 432 rargsp->cm_ret.rep.cm_rdma_ra_out = 4; 433 rargsp->cm_ret.rep.cm_rdma_ra_in = 4; 434 rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry; 435 436 RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)", 437 sp, ep, chanhdl); 438 439 return (IBT_CM_ACCEPT); 440 } 441 442 /* 443 * Handle an incoming CM REP 444 * Pre-post recv buffers for the QP 445 */ 446 /* ARGSUSED */ 447 static ibt_cm_status_t 448 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, 449 void *rcmp, ibt_priv_data_len_t rcmp_len) 450 { 451 rds_ep_t *ep; 452 rds_cm_private_data_t cmp; 453 454 RDS_DPRINTF2("rds_handle_cm_rep", "Enter"); 455 456 /* pre-post recv buffers in the RQ */ 457 rds_post_recv_buf((void *)evp->cm_channel); 458 459 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 460 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 461 sizeof (rds_cm_private_data_t)); 462 ep->ep_lbufid = cmp.cmp_last_bufid; 463 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 464 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 465 466 rargsp->cm_ret_len = 0; 467 468 RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid); 469 470 return (IBT_CM_ACCEPT); 471 } 472 473 /* 474 * Handle CONN EST 475 */ 476 static ibt_cm_status_t 477 rds_handle_cm_conn_est(ibt_cm_event_t *evp) 478 { 479 rds_session_t *sp; 480 rds_ep_t *ep; 481 482 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 483 484 RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep, 485 ep->ep_state); 486 487 mutex_enter(&ep->ep_lock); 488 ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) || 489 (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING)); 490 ep->ep_state = RDS_EP_STATE_CONNECTED; 491 ep->ep_chanhdl = evp->cm_channel; 492 sp = ep->ep_sp; 493 mutex_exit(&ep->ep_lock); 494 495 (void) rds_session_active(sp); 496 497 RDS_DPRINTF2("rds_handle_cm_conn_est", "Return"); 498 return (IBT_CM_ACCEPT); 499 } 500 501 /* 502 * Handle CONN CLOSED 503 */ 504 static ibt_cm_status_t 505 rds_handle_cm_conn_closed(ibt_cm_event_t *evp) 506 { 507 rds_ep_t *ep; 508 rds_session_t *sp; 509 510 /* Catch DREQs but ignore DREPs */ 511 if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) { 512 RDS_DPRINTF2("rds_handle_cm_conn_closed", 513 "Ignoring Event: %d received", evp->cm_event.closed); 514 return (IBT_CM_ACCEPT); 515 } 516 517 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 518 sp = ep->ep_sp; 519 RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Chan(%p) Enter", 520 ep, evp->cm_channel); 521 522 mutex_enter(&ep->ep_lock); 523 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 524 /* Ignore this DREQ */ 525 RDS_DPRINTF2("rds_handle_cm_conn_closed", 526 "EP(%p) not connected, state: %d", ep, ep->ep_state); 527 mutex_exit(&ep->ep_lock); 528 return (IBT_CM_ACCEPT); 529 } 530 ep->ep_state = RDS_EP_STATE_CLOSING; 531 mutex_exit(&ep->ep_lock); 532 533 rw_enter(&sp->session_lock, RW_WRITER); 534 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp, 535 sp->session_state); 536 537 switch (sp->session_state) { 538 case RDS_SESSION_STATE_CONNECTED: 539 sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING; 540 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 541 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 542 break; 543 544 case RDS_SESSION_STATE_PASSIVE_CLOSING: 545 sp->session_state = RDS_SESSION_STATE_CLOSED; 546 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 547 "RDS_SESSION_STATE_CLOSED", sp); 548 rds_passive_session_fini(sp); 549 sp->session_state = RDS_SESSION_STATE_FINI; 550 RDS_DPRINTF3("rds_handle_cm_conn_closed", 551 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 552 break; 553 554 case RDS_SESSION_STATE_ACTIVE_CLOSING: 555 case RDS_SESSION_STATE_ERROR: 556 case RDS_SESSION_STATE_CLOSED: 557 break; 558 559 case RDS_SESSION_STATE_INIT: 560 sp->session_state = RDS_SESSION_STATE_ERROR; 561 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 562 "RDS_SESSION_STATE_ERROR", sp); 563 rds_passive_session_fini(sp); 564 sp->session_state = RDS_SESSION_STATE_FAILED; 565 RDS_DPRINTF3("rds_handle_cm_conn_closed", 566 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 567 break; 568 569 default: 570 RDS_DPRINTF2("rds_handle_cm_conn_closed", 571 "SP(%p) - Unexpected state: %d", sp, sp->session_state); 572 rds_passive_session_fini(sp); 573 sp->session_state = RDS_SESSION_STATE_FAILED; 574 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 575 "RDS_SESSION_STATE_FAILED", sp); 576 } 577 rw_exit(&sp->session_lock); 578 579 mutex_enter(&ep->ep_lock); 580 ep->ep_state = RDS_EP_STATE_CLOSED; 581 mutex_exit(&ep->ep_lock); 582 583 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp); 584 return (IBT_CM_ACCEPT); 585 } 586 587 /* 588 * Handle EVENT FAILURE 589 */ 590 static ibt_cm_status_t 591 rds_handle_cm_event_failure(ibt_cm_event_t *evp) 592 { 593 rds_ep_t *ep; 594 rds_session_t *sp; 595 int ret; 596 597 RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p " 598 "Code: %d msg: %d reason: %d", evp->cm_channel, 599 evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg, 600 evp->cm_event.failed.cf_reason); 601 602 if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) { 603 RDS_DPRINTF2(LABEL, 604 "Received REJ with reason IBT_CM_INVALID_SID: " 605 "RDS may not be loaded on the remote system"); 606 } 607 608 if (evp->cm_channel == NULL) { 609 return (IBT_CM_ACCEPT); 610 } 611 612 if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) && 613 (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) { 614 /* 615 * This end is active, just ignore, ibt_open_rc_channel() 616 * caller will take care of cleanup. 617 */ 618 RDS_DPRINTF2("rds_handle_cm_event_failure", 619 "Ignoring this event: Chan hdl: 0x%p", evp->cm_channel); 620 return (IBT_CM_ACCEPT); 621 } 622 623 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 624 sp = ep->ep_sp; 625 626 rw_enter(&sp->session_lock, RW_WRITER); 627 if (sp->session_type == RDS_SESSION_PASSIVE) { 628 RDS_DPRINTF2("rds_handle_cm_event_failure", 629 "SP(%p) - state: %d", sp, sp->session_state); 630 if ((sp->session_state == RDS_SESSION_STATE_INIT) || 631 (sp->session_state == RDS_SESSION_STATE_CONNECTED)) { 632 sp->session_state = RDS_SESSION_STATE_ERROR; 633 RDS_DPRINTF3("rds_handle_cm_event_failure", 634 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 635 636 /* 637 * Store the cm_channel for freeing later 638 * Active side frees it on ibt_open_rc_channel 639 * failure 640 */ 641 if (ep->ep_chanhdl == NULL) { 642 ep->ep_chanhdl = evp->cm_channel; 643 } 644 rw_exit(&sp->session_lock); 645 646 /* 647 * rds_passive_session_fini should not be called 648 * directly in the CM handler. It will cause a deadlock. 649 */ 650 ret = ddi_taskq_dispatch(rds_taskq, 651 rds_cleanup_passive_session, (void *)sp, 652 DDI_NOSLEEP); 653 if (ret != DDI_SUCCESS) { 654 RDS_DPRINTF2("rds_handle_cm_event_failure", 655 "SP(%p) TaskQ dispatch FAILED:%d", sp, ret); 656 } 657 return (IBT_CM_ACCEPT); 658 } 659 } 660 rw_exit(&sp->session_lock); 661 662 RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp); 663 return (IBT_CM_ACCEPT); 664 } 665 666 /* 667 * CM Handler 668 * 669 * Called by IBCM 670 * The cm_private type differs for active and passive events. 671 */ 672 ibt_cm_status_t 673 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp, 674 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 675 ibt_priv_data_len_t ret_len_max) 676 { 677 ibt_cm_status_t ret = IBT_CM_ACCEPT; 678 679 RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type); 680 681 switch (eventp->cm_type) { 682 case IBT_CM_EVENT_REQ_RCV: 683 ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp, 684 ret_args, ret_priv_data, ret_len_max); 685 break; 686 case IBT_CM_EVENT_REP_RCV: 687 ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data, 688 ret_len_max); 689 break; 690 case IBT_CM_EVENT_MRA_RCV: 691 /* Not supported */ 692 break; 693 case IBT_CM_EVENT_CONN_EST: 694 ret = rds_handle_cm_conn_est(eventp); 695 break; 696 case IBT_CM_EVENT_CONN_CLOSED: 697 ret = rds_handle_cm_conn_closed(eventp); 698 break; 699 case IBT_CM_EVENT_FAILURE: 700 ret = rds_handle_cm_event_failure(eventp); 701 break; 702 case IBT_CM_EVENT_LAP_RCV: 703 /* Not supported */ 704 RDS_DPRINTF2(LABEL, "LAP message received"); 705 break; 706 case IBT_CM_EVENT_APR_RCV: 707 /* Not supported */ 708 RDS_DPRINTF2(LABEL, "APR message received"); 709 break; 710 default: 711 break; 712 } 713 714 RDS_DPRINTF2("rds_cm_handler", "Return"); 715 716 return (ret); 717 } 718 719 /* This is based on OFED Linux RDS */ 720 #define RDS_PORT_NUM 6556 721 722 /* 723 * Register the wellknown service with service id: RDS_SERVICE_ID 724 * Incoming connection requests should arrive on this service id. 725 */ 726 ibt_srv_hdl_t 727 rds_register_service(ibt_clnt_hdl_t rds_ibhdl) 728 { 729 ibt_srv_hdl_t srvhdl; 730 ibt_srv_desc_t srvdesc; 731 int ret; 732 733 RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl); 734 735 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 736 srvdesc.sd_handler = rds_cm_handler; 737 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 738 739 /* 740 * Register the old service id for backward compatibility 741 * REQs received on this service id would be rejected 742 */ 743 ret = ibt_register_service(rds_ibhdl, &srvdesc, RDS_SERVICE_ID, 744 1, &rdsib_statep->rds_old_srvhdl, NULL); 745 if (ret != IBT_SUCCESS) { 746 RDS_DPRINTF2(LABEL, 747 "RDS Service (0x%llx) Registration Failed: %d", 748 RDS_SERVICE_ID, ret); 749 return (NULL); 750 } 751 752 /* 753 * This is the new service id as per: 754 * Annex A11: RDMA IP CM Service 755 */ 756 rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP, 757 RDS_PORT_NUM); 758 ret = ibt_register_service(rds_ibhdl, &srvdesc, 759 rdsib_statep->rds_service_id, 1, &srvhdl, NULL); 760 if (ret != IBT_SUCCESS) { 761 RDS_DPRINTF2(LABEL, 762 "RDS Service (0x%llx) Registration Failed: %d", 763 rdsib_statep->rds_service_id, ret); 764 return (NULL); 765 } 766 767 RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl); 768 return (srvhdl); 769 } 770 771 /* Bind the RDS service on all ports */ 772 int 773 rds_bind_service(rds_state_t *statep) 774 { 775 rds_hca_t *hcap; 776 ib_gid_t gid; 777 uint_t jx, nbinds = 0, nports = 0; 778 int ret; 779 780 RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep); 781 782 hcap = statep->rds_hcalistp; 783 while (hcap != NULL) { 784 for (jx = 0; jx < hcap->hca_nports; jx++) { 785 nports++; 786 if (hcap->hca_pinfop[jx].p_linkstate != 787 IBT_PORT_ACTIVE) { 788 /* 789 * service bind will be called in the async 790 * handler when the port comes up 791 */ 792 continue; 793 } 794 795 gid = hcap->hca_pinfop[jx].p_sgid_tbl[0]; 796 RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d " 797 "gid: %llx:%llx", hcap->hca_guid, 798 hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix, 799 gid.gid_guid); 800 801 /* pass statep as cm_private */ 802 ret = ibt_bind_service(statep->rds_srvhdl, gid, 803 NULL, statep, NULL); 804 if (ret != IBT_SUCCESS) { 805 RDS_DPRINTF2(LABEL, "Bind service for " 806 "HCA: 0x%llx Port: %d gid %llx:%llx " 807 "failed: %d", hcap->hca_guid, 808 hcap->hca_pinfop[jx].p_port_num, 809 gid.gid_prefix, gid.gid_guid, ret); 810 continue; 811 } 812 813 nbinds++; 814 815 /* bind the old service, ignore if it fails */ 816 ret = ibt_bind_service(statep->rds_old_srvhdl, gid, 817 NULL, statep, NULL); 818 if (ret != IBT_SUCCESS) { 819 RDS_DPRINTF2(LABEL, "Bind service for " 820 "HCA: 0x%llx Port: %d gid %llx:%llx " 821 "failed: %d", hcap->hca_guid, 822 hcap->hca_pinfop[jx].p_port_num, 823 gid.gid_prefix, gid.gid_guid, ret); 824 } 825 } 826 hcap = hcap->hca_nextp; 827 } 828 829 RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports", 830 nbinds, nports); 831 832 #if 0 833 if (nbinds == 0) { 834 return (-1); 835 } 836 #endif 837 838 RDS_DPRINTF2("rds_bind_service", "Return"); 839 840 return (0); 841 } 842 843 /* Open an RC connection */ 844 int 845 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo, 846 ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl) 847 { 848 rds_session_t *sp; 849 ibt_chan_open_args_t ocargs; 850 ibt_rc_returns_t ocrets; 851 rds_cm_private_data_t cmp; 852 uint8_t hca_port; 853 ibt_channel_hdl_t hdl; 854 ibt_status_t ret = 0; 855 ibt_ip_cm_info_t ipcm_info; 856 857 RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode); 858 859 sp = ep->ep_sp; 860 861 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 862 ipcm_info.src_addr.family = AF_INET; 863 ipcm_info.SRCIP = sp->session_myip; 864 ipcm_info.dst_addr.family = AF_INET; 865 ipcm_info.DSTIP = sp->session_remip; 866 ipcm_info.src_port = RDS_PORT_NUM; 867 ret = ibt_format_ip_private_data(&ipcm_info, 868 sizeof (rds_cm_private_data_t), &cmp); 869 if (ret != IBT_SUCCESS) { 870 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data " 871 "failed: %d", sp, ep, ret); 872 return (-1); 873 } 874 875 hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num; 876 877 hdl = rds_ep_alloc_rc_channel(ep, hca_port); 878 if (hdl == NULL) { 879 return (-1); 880 } 881 882 cmp.cmp_version = RDS_VERSION; 883 cmp.cmp_arch = RDS_THIS_ARCH; 884 cmp.cmp_eptype = ep->ep_type; 885 cmp.cmp_failover = sp->session_failover; 886 cmp.cmp_last_bufid = ep->ep_rbufid; 887 cmp.cmp_user_buffer_size = UserBufferSize; 888 cmp.cmp_ack_addr = ep->ep_ack_addr; 889 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 890 891 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 892 bzero(&ocrets, sizeof (ibt_rc_returns_t)); 893 ocargs.oc_path = pinfo; 894 ocargs.oc_cm_handler = rds_cm_handler; 895 ocargs.oc_cm_clnt_private = NULL; 896 ocargs.oc_rdma_ra_out = 4; 897 ocargs.oc_rdma_ra_in = 4; 898 ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t); 899 ocargs.oc_priv_data = &cmp; 900 ocargs.oc_path_retry_cnt = IBPathRetryCount; 901 ocargs.oc_path_rnr_retry_cnt = MinRnrRetry; 902 ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS, 903 mode, &ocargs, &ocrets); 904 if (ret != IBT_SUCCESS) { 905 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel " 906 "failed: %d", sp, ep, ret); 907 (void) ibt_flush_channel(hdl); 908 (void) ibt_free_channel(hdl); 909 910 mutex_enter(&ep->ep_lock); 911 /* don't cleanup if this failure is due to peer-peer race */ 912 if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 913 /* cleanup stuff allocated in rds_ep_alloc_rc_channel */ 914 ep->ep_state = RDS_EP_STATE_ERROR; 915 rds_ep_free_rc_channel(ep); 916 } 917 mutex_exit(&ep->ep_lock); 918 919 return (-1); 920 } 921 922 *chanhdl = hdl; 923 924 RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep, 925 *chanhdl); 926 927 return (0); 928 } 929 930 int 931 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode) 932 { 933 int ret; 934 935 RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)", 936 chanhdl, mode); 937 938 ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0); 939 940 RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl); 941 942 return (ret); 943 } 944