1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* 28 * An implementation of the IPoIB-CM standard based on PSARC 2009/593. 29 */ 30 #include <sys/types.h> 31 #include <sys/conf.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/modctl.h> 35 #include <sys/stropts.h> 36 #include <sys/stream.h> 37 #include <sys/strsun.h> 38 #include <sys/strsubr.h> 39 #include <sys/dlpi.h> 40 #include <sys/mac_provider.h> 41 42 #include <sys/pattr.h> /* for HCK_FULLCKSUM */ 43 #include <sys/atomic.h> /* for atomic_add*() */ 44 #include <sys/ethernet.h> /* for ETHERTYPE_IP */ 45 #include <netinet/in.h> /* for netinet/ip.h below */ 46 #include <netinet/ip.h> /* for struct ip */ 47 #include <inet/common.h> /* for inet/ip.h below */ 48 #include <inet/ip.h> /* for ipha_t */ 49 #include <inet/ip_if.h> /* for ETHERTYPE_IPV6 */ 50 #include <inet/ip6.h> /* for ip6_t */ 51 #include <netinet/icmp6.h> /* for icmp6_t */ 52 53 #include <sys/ib/clients/ibd/ibd.h> 54 55 extern ibd_global_state_t ibd_gstate; 56 extern int ibd_rc_conn_timeout; 57 uint_t ibd_rc_tx_softintr = 1; 58 /* 59 * If the number of WRs in receive queue of each RC connection less than 60 * IBD_RC_RX_WR_THRESHOLD, we will post more receive WRs into it. 61 */ 62 #define IBD_RC_RX_WR_THRESHOLD 0x20 63 64 /* 65 * If the number of free SWQEs (or large Tx buf) is larger than or equal to 66 * IBD_RC_TX_FREE_THRESH, we will call mac_tx_update to notify GLD to continue 67 * transmitting packets. 68 */ 69 #define IBD_RC_TX_FREE_THRESH 8 70 71 #define IBD_RC_QPN_TO_SID(qpn) \ 72 ((uint64_t)(IBD_RC_SERVICE_ID | ((qpn) & 0xffffff))) 73 74 /* For interop with legacy OFED */ 75 #define IBD_RC_QPN_TO_SID_OFED_INTEROP(qpn) \ 76 ((uint64_t)(IBD_RC_SERVICE_ID_OFED_INTEROP | ((qpn) & 0xffffff))) 77 78 /* Internet Header + 64 bits of Data Datagram. Refer to RFC 792 */ 79 #define IBD_RC_IP_ICMP_RETURN_DATA_BYTES 64 80 81 82 /* Functions for Reliable Connected Mode */ 83 /* Connection Setup/Close Functions */ 84 static ibt_cm_status_t ibd_rc_dispatch_pass_mad(void *, 85 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 86 static ibt_cm_status_t ibd_rc_dispatch_actv_mad(void *, 87 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 88 static void ibd_rc_act_close(ibd_rc_chan_t *, boolean_t); 89 90 static inline void ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *, 91 ibd_rc_chan_t *); 92 static inline ibd_rc_chan_t *ibd_rc_rm_header_chan_list( 93 ibd_rc_chan_list_t *); 94 static inline ibd_rc_chan_t *ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *, 95 ibd_rc_chan_t *); 96 97 /* CQ handlers */ 98 static void ibd_rc_rcq_handler(ibt_cq_hdl_t, void *); 99 static void ibd_rc_scq_handler(ibt_cq_hdl_t, void *); 100 static void ibd_rc_poll_rcq(ibd_rc_chan_t *, ibt_cq_hdl_t); 101 102 /* Receive Functions */ 103 static int ibd_rc_post_srq(ibd_state_t *, ibd_rwqe_t *); 104 static void ibd_rc_srq_freemsg_cb(char *); 105 static void ibd_rc_srq_free_rwqe(ibd_state_t *, ibd_rwqe_t *); 106 107 static int ibd_rc_post_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *); 108 static void ibd_rc_freemsg_cb(char *); 109 static void ibd_rc_process_rx(ibd_rc_chan_t *, ibd_rwqe_t *, ibt_wc_t *); 110 static void ibd_rc_free_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *); 111 static void ibd_rc_fini_rxlist(ibd_rc_chan_t *); 112 113 114 /* Send Functions */ 115 static void ibd_rc_release_swqe(ibd_rc_chan_t *, ibd_swqe_t *); 116 static int ibd_rc_init_txlist(ibd_rc_chan_t *); 117 static void ibd_rc_fini_txlist(ibd_rc_chan_t *); 118 static uint_t ibd_rc_tx_recycle(caddr_t); 119 120 121 void 122 ibd_async_rc_close_act_chan(ibd_state_t *state, ibd_req_t *req) 123 { 124 ibd_rc_chan_t *rc_chan = req->rq_ptr; 125 ibd_ace_t *ace; 126 127 while (rc_chan != NULL) { 128 ace = rc_chan->ace; 129 ASSERT(ace != NULL); 130 /* Close old RC channel */ 131 ibd_rc_act_close(rc_chan, B_TRUE); 132 mutex_enter(&state->id_ac_mutex); 133 ASSERT(ace->ac_ref != 0); 134 atomic_dec_32(&ace->ac_ref); 135 ace->ac_chan = NULL; 136 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 137 IBD_ACACHE_INSERT_FREE(state, ace); 138 ace->ac_ref = 0; 139 } else { 140 ace->ac_ref |= CYCLEVAL; 141 state->rc_delay_ace_recycle++; 142 } 143 mutex_exit(&state->id_ac_mutex); 144 rc_chan = ibd_rc_rm_header_chan_list( 145 &state->rc_obs_act_chan_list); 146 } 147 } 148 149 void 150 ibd_async_rc_recycle_ace(ibd_state_t *state, ibd_req_t *req) 151 { 152 ibd_ace_t *ace = req->rq_ptr; 153 ibd_rc_chan_t *rc_chan; 154 155 ASSERT(ace != NULL); 156 rc_chan = ace->ac_chan; 157 ASSERT(rc_chan != NULL); 158 /* Close old RC channel */ 159 ibd_rc_act_close(rc_chan, B_TRUE); 160 mutex_enter(&state->id_ac_mutex); 161 ASSERT(ace->ac_ref != 0); 162 atomic_dec_32(&ace->ac_ref); 163 ace->ac_chan = NULL; 164 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 165 IBD_ACACHE_INSERT_FREE(state, ace); 166 ace->ac_ref = 0; 167 } else { 168 ace->ac_ref |= CYCLEVAL; 169 state->rc_delay_ace_recycle++; 170 } 171 mutex_exit(&state->id_ac_mutex); 172 mutex_enter(&state->rc_ace_recycle_lock); 173 state->rc_ace_recycle = NULL; 174 mutex_exit(&state->rc_ace_recycle_lock); 175 } 176 177 /* Simple ICMP IP Header Template */ 178 static const ipha_t icmp_ipha = { 179 IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP 180 }; 181 182 /* Packet is too big. Send ICMP packet to GLD to request a smaller MTU */ 183 void 184 ibd_async_rc_process_too_big(ibd_state_t *state, ibd_req_t *req) 185 { 186 mblk_t *mp = req->rq_ptr; 187 ibd_ace_t *ace = req->rq_ptr2; 188 uint16_t mtu = state->id_mtu - IPOIB_HDRSIZE; 189 uint_t len_needed; 190 size_t msg_len; 191 mblk_t *pmtu_mp; 192 ushort_t sap; 193 ib_header_info_t *ibha; /* ib header for pmtu_pkt */ 194 /* 195 * ipha: IP header for pmtu_pkt 196 * old_ipha: IP header for old packet 197 */ 198 ipha_t *ipha, *old_ipha; 199 icmph_t *icmph; 200 201 sap = ntohs(((ipoib_hdr_t *)mp->b_rptr)->ipoib_type); 202 203 if (!pullupmsg(mp, -1)) { 204 DPRINT(40, "ibd_async_rc_process_too_big: pullupmsg fail"); 205 goto too_big_fail; 206 } 207 /* move to IP header. */ 208 mp->b_rptr += IPOIB_HDRSIZE; 209 old_ipha = (ipha_t *)mp->b_rptr; 210 211 len_needed = IPH_HDR_LENGTH(old_ipha); 212 if (old_ipha->ipha_protocol == IPPROTO_ENCAP) { 213 len_needed += IPH_HDR_LENGTH(((uchar_t *)old_ipha + 214 len_needed)); 215 } else if (old_ipha->ipha_protocol == IPPROTO_IPV6) { 216 ip6_t *ip6h = (ip6_t *)((uchar_t *)old_ipha 217 + len_needed); 218 len_needed += ip_hdr_length_v6(mp, ip6h); 219 } 220 len_needed += IBD_RC_IP_ICMP_RETURN_DATA_BYTES; 221 msg_len = msgdsize(mp); 222 if (msg_len > len_needed) { 223 (void) adjmsg(mp, len_needed - msg_len); 224 msg_len = len_needed; 225 } 226 227 if ((pmtu_mp = allocb(sizeof (ib_header_info_t) + sizeof (ipha_t) 228 + sizeof (icmph_t), BPRI_MED)) == NULL) { 229 DPRINT(40, "ibd_async_rc_process_too_big: allocb fail"); 230 goto too_big_fail; 231 } 232 pmtu_mp->b_cont = mp; 233 pmtu_mp->b_wptr = pmtu_mp->b_rptr + sizeof (ib_header_info_t) 234 + sizeof (ipha_t) + sizeof (icmph_t); 235 236 ibha = (ib_header_info_t *)pmtu_mp->b_rptr; 237 238 /* Fill IB header */ 239 bcopy(&state->id_macaddr, &ibha->ib_dst, IPOIB_ADDRL); 240 /* 241 * If the GRH is not valid, indicate to GLDv3 by setting 242 * the VerTcFlow field to 0. 243 */ 244 ibha->ib_grh.ipoib_vertcflow = 0; 245 ibha->ipib_rhdr.ipoib_type = htons(sap); 246 ibha->ipib_rhdr.ipoib_mbz = 0; 247 248 /* Fill IP header */ 249 ipha = (ipha_t *)&ibha[1]; 250 *ipha = icmp_ipha; 251 ipha->ipha_src = old_ipha->ipha_dst; 252 ipha->ipha_dst = old_ipha->ipha_src; 253 ipha->ipha_ttl = old_ipha->ipha_ttl; 254 msg_len += sizeof (icmp_ipha) + sizeof (icmph_t); 255 if (msg_len > IP_MAXPACKET) { 256 ibd_print_warn(state, "ibd_rc_process_too_big_pkt: msg_len(%d) " 257 "> IP_MAXPACKET", (uint32_t)msg_len); 258 (void) adjmsg(mp, IP_MAXPACKET - msg_len); 259 msg_len = IP_MAXPACKET; 260 } 261 ipha->ipha_length = htons((uint16_t)msg_len); 262 ipha->ipha_hdr_checksum = 0; 263 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 264 265 /* Fill ICMP body */ 266 icmph = (icmph_t *)&ipha[1]; 267 bzero(icmph, sizeof (icmph_t)); 268 icmph->icmph_type = ICMP_DEST_UNREACHABLE; 269 icmph->icmph_code = ICMP_FRAGMENTATION_NEEDED; 270 icmph->icmph_du_mtu = htons(mtu); 271 icmph->icmph_checksum = 0; 272 icmph->icmph_checksum = IP_CSUM(pmtu_mp, 273 (int32_t)sizeof (ib_header_info_t) + (int32_t)sizeof (ipha_t), 0); 274 275 (void) hcksum_assoc(pmtu_mp, NULL, NULL, 0, 0, 0, 0, 276 HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); 277 278 DPRINT(30, "ibd_async_rc_process_too_big: sap=0x%x, ip_src=0x%x, " 279 "ip_dst=0x%x, ttl=%d, len_needed=%d, msg_len=%d", 280 sap, ipha->ipha_src, ipha->ipha_dst, ipha->ipha_ttl, 281 len_needed, (uint32_t)msg_len); 282 283 mac_rx(state->id_mh, state->id_rh, pmtu_mp); 284 285 mutex_enter(&ace->tx_too_big_mutex); 286 ace->tx_too_big_ongoing = B_FALSE; 287 mutex_exit(&ace->tx_too_big_mutex); 288 return; 289 290 too_big_fail: 291 /* Drop packet */ 292 freemsg(mp); 293 mutex_enter(&ace->tx_too_big_mutex); 294 ace->tx_too_big_ongoing = B_FALSE; 295 mutex_exit(&ace->tx_too_big_mutex); 296 } 297 298 /* 299 * Check all active/passive channels. If any ative/passive 300 * channel has not been used for a long time, close it. 301 */ 302 void 303 ibd_rc_conn_timeout_call(void *carg) 304 { 305 ibd_state_t *state = carg; 306 ibd_ace_t *ace, *pre_ace; 307 ibd_rc_chan_t *chan, *pre_chan, *next_chan; 308 ibd_req_t *req; 309 310 /* Check all active channels. If chan->is_used == B_FALSE, close it */ 311 mutex_enter(&state->id_ac_mutex); 312 ace = list_head(&state->id_ah_active); 313 while ((pre_ace = ace) != NULL) { 314 ace = list_next(&state->id_ah_active, ace); 315 if (pre_ace->ac_chan != NULL) { 316 chan = pre_ace->ac_chan; 317 ASSERT(state->id_enable_rc == B_TRUE); 318 if (chan->chan_state == IBD_RC_STATE_ACT_ESTAB) { 319 if (chan->is_used == B_FALSE) { 320 state->rc_timeout_act++; 321 INC_REF(pre_ace, 1); 322 IBD_ACACHE_PULLOUT_ACTIVE(state, 323 pre_ace); 324 chan->chan_state = 325 IBD_RC_STATE_ACT_CLOSING; 326 ibd_rc_signal_act_close(state, pre_ace); 327 } else { 328 chan->is_used = B_FALSE; 329 } 330 } 331 } 332 } 333 mutex_exit(&state->id_ac_mutex); 334 335 /* Check all passive channels. If chan->is_used == B_FALSE, close it */ 336 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 337 next_chan = state->rc_pass_chan_list.chan_list; 338 pre_chan = NULL; 339 while ((chan = next_chan) != NULL) { 340 next_chan = chan->next; 341 if (chan->is_used == B_FALSE) { 342 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 343 if (req != NULL) { 344 /* remove it */ 345 state->rc_timeout_pas++; 346 req->rq_ptr = chan; 347 ibd_queue_work_slot(state, req, 348 IBD_ASYNC_RC_CLOSE_PAS_CHAN); 349 } else { 350 ibd_print_warn(state, "ibd_rc_conn_timeout: " 351 "alloc ibd_req_t fail"); 352 if (pre_chan == NULL) { 353 state->rc_pass_chan_list.chan_list = 354 chan; 355 } else { 356 pre_chan->next = chan; 357 } 358 pre_chan = chan; 359 } 360 } else { 361 if (pre_chan == NULL) { 362 state->rc_pass_chan_list.chan_list = chan; 363 } else { 364 pre_chan->next = chan; 365 } 366 pre_chan = chan; 367 chan->is_used = B_FALSE; 368 } 369 } 370 if (pre_chan != NULL) { 371 pre_chan->next = NULL; 372 } else { 373 state->rc_pass_chan_list.chan_list = NULL; 374 } 375 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 376 377 mutex_enter(&state->rc_timeout_lock); 378 if (state->rc_timeout_start == B_TRUE) { 379 state->rc_timeout = timeout(ibd_rc_conn_timeout_call, state, 380 SEC_TO_TICK(ibd_rc_conn_timeout)); 381 } 382 mutex_exit(&state->rc_timeout_lock); 383 } 384 385 #ifdef DEBUG 386 /* 387 * ibd_rc_update_stats - update driver private kstat counters 388 * 389 * This routine will dump the internal statistics counters for ibd's 390 * Reliable Connected Mode. The current stats dump values will 391 * be sent to the kernel status area. 392 */ 393 static int 394 ibd_rc_update_stats(kstat_t *ksp, int rw) 395 { 396 ibd_state_t *state; 397 ibd_rc_stat_t *ibd_rc_ksp; 398 399 if (rw == KSTAT_WRITE) 400 return (EACCES); 401 402 state = (ibd_state_t *)ksp->ks_private; 403 ASSERT(state != NULL); 404 ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data; 405 406 ibd_rc_ksp->rc_rcv_trans_byte.value.ul = state->rc_rcv_trans_byte; 407 ibd_rc_ksp->rc_rcv_trans_pkt.value.ul = state->rc_rcv_trans_pkt; 408 ibd_rc_ksp->rc_rcv_copy_byte.value.ul = state->rc_rcv_copy_byte; 409 ibd_rc_ksp->rc_rcv_copy_pkt.value.ul = state->rc_rcv_copy_pkt; 410 ibd_rc_ksp->rc_rcv_alloc_fail.value.ul = state->rc_rcv_alloc_fail; 411 412 ibd_rc_ksp->rc_rcq_err.value.ul = state->rc_rcq_err; 413 414 ibd_rc_ksp->rc_rwqe_short.value.ul = state->rc_rwqe_short; 415 416 ibd_rc_ksp->rc_xmt_bytes.value.ul = state->rc_xmt_bytes; 417 ibd_rc_ksp->rc_xmt_small_pkt.value.ul = state->rc_xmt_small_pkt; 418 ibd_rc_ksp->rc_xmt_fragmented_pkt.value.ul = 419 state->rc_xmt_fragmented_pkt; 420 ibd_rc_ksp->rc_xmt_map_fail_pkt.value.ul = state->rc_xmt_map_fail_pkt; 421 ibd_rc_ksp->rc_xmt_map_succ_pkt.value.ul = state->rc_xmt_map_succ_pkt; 422 ibd_rc_ksp->rc_ace_not_found.value.ul = state->rc_ace_not_found; 423 424 ibd_rc_ksp->rc_scq_no_swqe.value.ul = state->rc_scq_no_swqe; 425 ibd_rc_ksp->rc_scq_no_largebuf.value.ul = state->rc_scq_no_largebuf; 426 ibd_rc_ksp->rc_swqe_short.value.ul = state->rc_swqe_short; 427 ibd_rc_ksp->rc_swqe_mac_update.value.ul = state->rc_swqe_mac_update; 428 ibd_rc_ksp->rc_xmt_buf_short.value.ul = state->rc_xmt_buf_short; 429 ibd_rc_ksp->rc_xmt_buf_mac_update.value.ul = 430 state->rc_xmt_buf_mac_update; 431 432 ibd_rc_ksp->rc_conn_succ.value.ul = state->rc_conn_succ; 433 ibd_rc_ksp->rc_conn_fail.value.ul = state->rc_conn_fail; 434 ibd_rc_ksp->rc_null_conn.value.ul = state->rc_null_conn; 435 ibd_rc_ksp->rc_no_estab_conn.value.ul = state->rc_no_estab_conn; 436 437 ibd_rc_ksp->rc_act_close.value.ul = state->rc_act_close; 438 ibd_rc_ksp->rc_pas_close.value.ul = state->rc_pas_close; 439 ibd_rc_ksp->rc_delay_ace_recycle.value.ul = state->rc_delay_ace_recycle; 440 ibd_rc_ksp->rc_act_close_simultaneous.value.ul = 441 state->rc_act_close_simultaneous; 442 ibd_rc_ksp->rc_reset_cnt.value.ul = state->rc_reset_cnt; 443 ibd_rc_ksp->rc_timeout_act.value.ul = state->rc_timeout_act; 444 ibd_rc_ksp->rc_timeout_pas.value.ul = state->rc_timeout_pas; 445 446 return (0); 447 } 448 449 450 /* 451 * ibd_rc_init_stats - initialize kstat data structures 452 * 453 * This routine will create and initialize the driver private 454 * statistics counters. 455 */ 456 int 457 ibd_rc_init_stats(ibd_state_t *state) 458 { 459 kstat_t *ksp; 460 ibd_rc_stat_t *ibd_rc_ksp; 461 char stat_name[KSTAT_STRLEN]; 462 int inst; 463 464 /* 465 * Create and init kstat 466 */ 467 inst = ddi_get_instance(state->id_dip); 468 (void) snprintf(stat_name, KSTAT_STRLEN, "statistics%d_%x_%u", inst, 469 state->id_pkey, state->id_plinkid); 470 ksp = kstat_create("ibd", 0, stat_name, "net", KSTAT_TYPE_NAMED, 471 sizeof (ibd_rc_stat_t) / sizeof (kstat_named_t), 0); 472 473 if (ksp == NULL) { 474 ibd_print_warn(state, "ibd_rc_init_stats: Could not create " 475 "kernel statistics"); 476 return (DDI_FAILURE); 477 } 478 479 state->rc_ksp = ksp; /* Fill in the ksp of ibd over RC mode */ 480 481 ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data; 482 483 /* 484 * Initialize all the statistics 485 */ 486 kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_byte, "RC: Rx Bytes, " 487 "transfer mode", KSTAT_DATA_ULONG); 488 kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_pkt, "RC: Rx Pkts, " 489 "transfer mode", KSTAT_DATA_ULONG); 490 kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_byte, "RC: Rx Bytes, " 491 "copy mode", KSTAT_DATA_ULONG); 492 kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_pkt, "RC: Rx Pkts, " 493 "copy mode", KSTAT_DATA_ULONG); 494 kstat_named_init(&ibd_rc_ksp->rc_rcv_alloc_fail, "RC: Rx alloc fail", 495 KSTAT_DATA_ULONG); 496 497 kstat_named_init(&ibd_rc_ksp->rc_rcq_err, "RC: fail in Recv CQ handler", 498 KSTAT_DATA_ULONG); 499 500 kstat_named_init(&ibd_rc_ksp->rc_rwqe_short, "RC: Short rwqe", 501 KSTAT_DATA_ULONG); 502 503 kstat_named_init(&ibd_rc_ksp->rc_xmt_bytes, "RC: Sent Bytes", 504 KSTAT_DATA_ULONG); 505 kstat_named_init(&ibd_rc_ksp->rc_xmt_small_pkt, 506 "RC: Tx pkt small size", KSTAT_DATA_ULONG); 507 kstat_named_init(&ibd_rc_ksp->rc_xmt_fragmented_pkt, 508 "RC: Tx pkt fragmentary", KSTAT_DATA_ULONG); 509 kstat_named_init(&ibd_rc_ksp->rc_xmt_map_fail_pkt, 510 "RC: Tx pkt fail ibt_map_mem_iov()", KSTAT_DATA_ULONG); 511 kstat_named_init(&ibd_rc_ksp->rc_xmt_map_succ_pkt, 512 "RC: Tx pkt succ ibt_map_mem_iov()", KSTAT_DATA_ULONG); 513 kstat_named_init(&ibd_rc_ksp->rc_ace_not_found, "RC: ace not found", 514 KSTAT_DATA_ULONG); 515 516 kstat_named_init(&ibd_rc_ksp->rc_scq_no_swqe, "RC: No swqe after " 517 "recycle", KSTAT_DATA_ULONG); 518 kstat_named_init(&ibd_rc_ksp->rc_scq_no_largebuf, "RC: No large tx buf " 519 "after recycle", KSTAT_DATA_ULONG); 520 kstat_named_init(&ibd_rc_ksp->rc_swqe_short, "RC: No swqe in ibd_send", 521 KSTAT_DATA_ULONG); 522 kstat_named_init(&ibd_rc_ksp->rc_swqe_mac_update, "RC: mac_tx_update " 523 "#, swqe available", KSTAT_DATA_ULONG); 524 kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_short, "RC: No buf in " 525 "ibd_send", KSTAT_DATA_ULONG); 526 kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_mac_update, "RC: " 527 "mac_tx_update #, buf available", KSTAT_DATA_ULONG); 528 529 kstat_named_init(&ibd_rc_ksp->rc_conn_succ, "RC: succ connected", 530 KSTAT_DATA_ULONG); 531 kstat_named_init(&ibd_rc_ksp->rc_conn_fail, "RC: fail connect", 532 KSTAT_DATA_ULONG); 533 kstat_named_init(&ibd_rc_ksp->rc_null_conn, "RC: null conn for unicast " 534 "pkt", KSTAT_DATA_ULONG); 535 kstat_named_init(&ibd_rc_ksp->rc_no_estab_conn, "RC: not in act estab " 536 "state", KSTAT_DATA_ULONG); 537 538 kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: call ibd_rc_act_close", 539 KSTAT_DATA_ULONG); 540 kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: call ibd_rc_pas_close", 541 KSTAT_DATA_ULONG); 542 kstat_named_init(&ibd_rc_ksp->rc_delay_ace_recycle, "RC: delay ace " 543 "recycle", KSTAT_DATA_ULONG); 544 kstat_named_init(&ibd_rc_ksp->rc_act_close_simultaneous, "RC: " 545 "simultaneous ibd_rc_act_close", KSTAT_DATA_ULONG); 546 kstat_named_init(&ibd_rc_ksp->rc_reset_cnt, "RC: Reset RC channel", 547 KSTAT_DATA_ULONG); 548 kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: timeout act side", 549 KSTAT_DATA_ULONG); 550 kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: timeout pas side", 551 KSTAT_DATA_ULONG); 552 553 /* 554 * Function to provide kernel stat update on demand 555 */ 556 ksp->ks_update = ibd_rc_update_stats; 557 558 /* 559 * Pointer into provider's raw statistics 560 */ 561 ksp->ks_private = (void *)state; 562 563 /* 564 * Add kstat to systems kstat chain 565 */ 566 kstat_install(ksp); 567 568 return (DDI_SUCCESS); 569 } 570 #endif 571 572 static ibt_status_t 573 ibd_rc_alloc_chan(ibd_rc_chan_t **ret_chan, ibd_state_t *state, 574 boolean_t is_tx_chan) 575 { 576 ibt_status_t result; 577 ibd_rc_chan_t *chan; 578 ibt_rc_chan_alloc_args_t alloc_args; 579 ibt_chan_alloc_flags_t alloc_flags; 580 ibt_chan_sizes_t sizes; 581 ibt_cq_attr_t cq_atts; 582 int rv; 583 584 chan = kmem_zalloc(sizeof (ibd_rc_chan_t), KM_SLEEP); 585 586 chan->state = state; 587 mutex_init(&chan->rx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 588 mutex_init(&chan->rx_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 589 mutex_init(&chan->tx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 590 mutex_init(&chan->tx_rel_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 591 mutex_init(&chan->tx_post_lock, NULL, MUTEX_DRIVER, NULL); 592 mutex_init(&chan->tx_poll_lock, NULL, MUTEX_DRIVER, NULL); 593 594 /* Allocate IB structures for a new RC channel. */ 595 if (is_tx_chan) { 596 chan->scq_size = state->id_rc_num_swqe; 597 chan->rcq_size = IBD_RC_MIN_CQ_SIZE; 598 } else { 599 chan->scq_size = IBD_RC_MIN_CQ_SIZE; 600 chan->rcq_size = state->id_rc_num_rwqe; 601 } 602 cq_atts.cq_size = chan->scq_size; 603 cq_atts.cq_sched = NULL; 604 cq_atts.cq_flags = IBT_CQ_NO_FLAGS; 605 result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->scq_hdl, 606 &chan->scq_size); 607 if (result != IBT_SUCCESS) { 608 DPRINT(40, "ibd_rc_alloc_chan: error <%d>" 609 "create scq completion queue (size <%d>)", 610 result, chan->scq_size); 611 goto alloc_scq_err; 612 } /* if failure to alloc cq */ 613 614 if (ibt_modify_cq(chan->scq_hdl, state->id_rc_tx_comp_count, 615 state->id_rc_tx_comp_usec, 0) != IBT_SUCCESS) { 616 DPRINT(30, "ibd_rc_alloc_chan: Send CQ " 617 "interrupt moderation failed"); 618 } 619 620 ibt_set_cq_private(chan->scq_hdl, (void *) (uintptr_t)chan); 621 ibt_set_cq_handler(chan->scq_hdl, ibd_rc_scq_handler, 622 (void *) (uintptr_t)chan); 623 624 cq_atts.cq_size = chan->rcq_size; 625 cq_atts.cq_sched = NULL; 626 cq_atts.cq_flags = IBT_CQ_NO_FLAGS; 627 result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->rcq_hdl, 628 &chan->rcq_size); 629 if (result != IBT_SUCCESS) { 630 ibd_print_warn(state, "ibd_rc_alloc_chan: error <%d> creating " 631 "rx completion queue (size <%d>)", result, chan->rcq_size); 632 goto alloc_rcq_err; 633 } /* if failure to alloc cq */ 634 635 if (ibt_modify_cq(chan->rcq_hdl, state->id_rc_rx_comp_count, 636 state->id_rc_rx_comp_usec, 0) != IBT_SUCCESS) { 637 DPRINT(30, "ibd_rc_alloc_chan: Receive CQ " 638 "interrupt moderation failed"); 639 } 640 641 ibt_set_cq_private(chan->rcq_hdl, (void *) (uintptr_t)chan); 642 ibt_set_cq_handler(chan->rcq_hdl, ibd_rc_rcq_handler, 643 (void *)(uintptr_t)chan); 644 645 if (is_tx_chan) { 646 chan->is_tx_chan = B_TRUE; 647 if (ibd_rc_init_txlist(chan) != DDI_SUCCESS) { 648 ibd_print_warn(state, "ibd_rc_alloc_chan: " 649 "ibd_rc_init_txlist failed"); 650 goto init_txlist_err; 651 } 652 if (ibd_rc_tx_softintr == 1) { 653 if ((rv = ddi_add_softintr(state->id_dip, 654 DDI_SOFTINT_LOW, &chan->scq_softintr, NULL, NULL, 655 ibd_rc_tx_recycle, (caddr_t)chan)) != 656 DDI_SUCCESS) { 657 DPRINT(10, "ibd_rc_alloc_chan: failed in " 658 "ddi_add_softintr(scq_softintr), ret=%d", 659 rv); 660 goto alloc_softintr_err; 661 } 662 } 663 } else { 664 chan->is_tx_chan = B_FALSE; 665 } 666 667 /* 668 * enable completions 669 */ 670 result = ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION); 671 if (result != IBT_SUCCESS) { 672 ibd_print_warn(state, "ibd_rc_alloc_chan: ibt_enable_cq_notify" 673 "(scq) failed: status %d\n", result); 674 goto alloc_scq_enable_err; 675 } 676 677 /* We will enable chan->rcq_hdl later. */ 678 679 /* alloc a RC channel */ 680 bzero(&alloc_args, sizeof (ibt_rc_chan_alloc_args_t)); 681 bzero(&sizes, sizeof (ibt_chan_sizes_t)); 682 683 alloc_args.rc_flags = IBT_WR_SIGNALED; 684 alloc_args.rc_control = IBT_CEP_NO_FLAGS; 685 686 alloc_args.rc_scq = chan->scq_hdl; 687 alloc_args.rc_rcq = chan->rcq_hdl; 688 alloc_args.rc_pd = state->id_pd_hdl; 689 690 alloc_args.rc_hca_port_num = state->id_port; 691 alloc_args.rc_clone_chan = NULL; 692 693 /* scatter/gather */ 694 alloc_args.rc_sizes.cs_sq_sgl = state->rc_tx_max_sqseg; 695 696 /* 697 * For the number of SGL elements in receive side, I think it 698 * should be 1. Because ibd driver allocates a whole block memory 699 * for each ibt_post_recv(). 700 */ 701 alloc_args.rc_sizes.cs_rq_sgl = 1; 702 703 /* The send queue size and the receive queue size */ 704 alloc_args.rc_sizes.cs_sq = chan->scq_size; 705 alloc_args.rc_sizes.cs_rq = chan->rcq_size; 706 707 if (state->id_hca_res_lkey_capab) { 708 alloc_args.rc_flags = IBT_FAST_REG_RES_LKEY; 709 } else { 710 DPRINT(40, "ibd_rc_alloc_chan: not support reserved lkey"); 711 } 712 713 if (state->rc_enable_srq) { 714 alloc_flags = IBT_ACHAN_USES_SRQ; 715 alloc_args.rc_srq = state->rc_srq_hdl; 716 } else { 717 alloc_flags = IBT_ACHAN_NO_FLAGS; 718 } 719 720 result = ibt_alloc_rc_channel(state->id_hca_hdl, 721 alloc_flags, &alloc_args, &chan->chan_hdl, &sizes); 722 if (result != IBT_SUCCESS) { 723 ibd_print_warn(state, "ibd_rc_alloc_chan: ibd_rc_open_channel" 724 " fail:<%d>", result); 725 goto alloc_scq_enable_err; 726 } 727 728 if (is_tx_chan) 729 atomic_inc_32(&state->rc_num_tx_chan); 730 else 731 atomic_inc_32(&state->rc_num_rx_chan); 732 733 /* For the connection reaper routine ibd_rc_conn_timeout_call() */ 734 chan->is_used = B_TRUE; 735 736 *ret_chan = chan; 737 return (IBT_SUCCESS); 738 739 alloc_scq_enable_err: 740 if (is_tx_chan) { 741 if (ibd_rc_tx_softintr == 1) { 742 ddi_remove_softintr(chan->scq_softintr); 743 } 744 } 745 alloc_softintr_err: 746 if (is_tx_chan) { 747 ibd_rc_fini_txlist(chan); 748 } 749 init_txlist_err: 750 (void) ibt_free_cq(chan->rcq_hdl); 751 alloc_rcq_err: 752 (void) ibt_free_cq(chan->scq_hdl); 753 alloc_scq_err: 754 mutex_destroy(&chan->tx_poll_lock); 755 mutex_destroy(&chan->tx_post_lock); 756 mutex_destroy(&chan->tx_rel_list.dl_mutex); 757 mutex_destroy(&chan->tx_wqe_list.dl_mutex); 758 mutex_destroy(&chan->rx_free_list.dl_mutex); 759 mutex_destroy(&chan->rx_wqe_list.dl_mutex); 760 kmem_free(chan, sizeof (ibd_rc_chan_t)); 761 return (result); 762 } 763 764 static void 765 ibd_rc_free_chan(ibd_rc_chan_t *chan) 766 { 767 ibt_status_t ret; 768 769 /* DPRINT(30, "ibd_rc_free_chan: chan=%p", chan); */ 770 771 if (chan->chan_hdl != NULL) { 772 ret = ibt_free_channel(chan->chan_hdl); 773 if (ret != IBT_SUCCESS) { 774 DPRINT(40, "ib_rc_free_chan: ibt_free_channel failed, " 775 "chan=%p, returned: %d", chan, ret); 776 return; 777 } 778 chan->chan_hdl = NULL; 779 } 780 781 if (chan->rcq_hdl != NULL) { 782 ret = ibt_free_cq(chan->rcq_hdl); 783 if (ret != IBT_SUCCESS) { 784 DPRINT(40, "ib_rc_free_chan: ibt_free_cq(rcq) failed, " 785 "chan=%p, returned: %d", chan, ret); 786 return; 787 } 788 chan->rcq_hdl = NULL; 789 } 790 791 if (chan->scq_hdl != NULL) { 792 ret = ibt_free_cq(chan->scq_hdl); 793 if (ret != IBT_SUCCESS) { 794 DPRINT(40, "ib_rc_free_chan: ibt_free_cq(scq) failed, " 795 "chan=%p, returned: %d", chan, ret); 796 return; 797 } 798 chan->scq_hdl = NULL; 799 } 800 801 /* Free buffers */ 802 if (chan->is_tx_chan) { 803 ibd_rc_fini_txlist(chan); 804 if (ibd_rc_tx_softintr == 1) { 805 ddi_remove_softintr(chan->scq_softintr); 806 } 807 atomic_dec_32(&chan->state->rc_num_tx_chan); 808 } else { 809 if (!chan->state->rc_enable_srq) { 810 ibd_rc_fini_rxlist(chan); 811 } 812 atomic_dec_32(&chan->state->rc_num_rx_chan); 813 } 814 815 mutex_destroy(&chan->tx_poll_lock); 816 mutex_destroy(&chan->tx_post_lock); 817 mutex_destroy(&chan->tx_rel_list.dl_mutex); 818 mutex_destroy(&chan->tx_wqe_list.dl_mutex); 819 mutex_destroy(&chan->rx_free_list.dl_mutex); 820 mutex_destroy(&chan->rx_wqe_list.dl_mutex); 821 822 /* 823 * If it is a passive channel, must make sure it has been removed 824 * from chan->state->rc_pass_chan_list 825 */ 826 kmem_free(chan, sizeof (ibd_rc_chan_t)); 827 } 828 829 /* Add a RC channel */ 830 static inline void 831 ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan) 832 { 833 mutex_enter(&list->chan_list_mutex); 834 if (list->chan_list == NULL) { 835 list->chan_list = chan; 836 chan->next = NULL; 837 } else { 838 chan->next = list->chan_list; 839 list->chan_list = chan; 840 } 841 mutex_exit(&list->chan_list_mutex); 842 } 843 844 static boolean_t 845 ibd_rc_re_add_to_pas_chan_list(ibd_rc_chan_t *chan) 846 { 847 ibd_state_t *state = chan->state; 848 849 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 850 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) { 851 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 852 return (B_FALSE); 853 } else { 854 if (state->rc_pass_chan_list.chan_list == NULL) { 855 state->rc_pass_chan_list.chan_list = chan; 856 chan->next = NULL; 857 } else { 858 chan->next = state->rc_pass_chan_list.chan_list; 859 state->rc_pass_chan_list.chan_list = chan; 860 } 861 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 862 return (B_TRUE); 863 } 864 } 865 866 /* Remove a RC channel */ 867 static inline ibd_rc_chan_t * 868 ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan) 869 { 870 ibd_rc_chan_t *pre_chan; 871 872 mutex_enter(&list->chan_list_mutex); 873 if (list->chan_list == chan) { 874 DPRINT(30, "ibd_rc_rm_from_chan_list(first): found chan(%p)" 875 " in chan_list", chan); 876 list->chan_list = chan->next; 877 } else { 878 pre_chan = list->chan_list; 879 while (pre_chan != NULL) { 880 if (pre_chan->next == chan) { 881 DPRINT(30, "ibd_rc_rm_from_chan_list" 882 "(middle): found chan(%p)", chan); 883 pre_chan->next = chan->next; 884 break; 885 } 886 pre_chan = pre_chan->next; 887 } 888 if (pre_chan == NULL) 889 chan = NULL; 890 } 891 mutex_exit(&list->chan_list_mutex); 892 return (chan); 893 } 894 895 static inline ibd_rc_chan_t * 896 ibd_rc_rm_header_chan_list(ibd_rc_chan_list_t *list) 897 { 898 ibd_rc_chan_t *rc_chan; 899 900 mutex_enter(&list->chan_list_mutex); 901 rc_chan = list->chan_list; 902 if (rc_chan != NULL) { 903 list->chan_list = rc_chan->next; 904 } 905 mutex_exit(&list->chan_list_mutex); 906 return (rc_chan); 907 } 908 909 static int 910 ibd_rc_alloc_srq_copybufs(ibd_state_t *state) 911 { 912 ibt_mr_attr_t mem_attr; 913 uint_t rc_rx_bufs_sz; 914 915 /* 916 * Allocate one big chunk for all regular rx copy bufs 917 */ 918 rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * state->rc_srq_size; 919 920 state->rc_srq_rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP); 921 922 state->rc_srq_rwqes = kmem_zalloc(state->rc_srq_size * 923 sizeof (ibd_rwqe_t), KM_SLEEP); 924 925 /* 926 * Do one memory registration on the entire rxbuf area 927 */ 928 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_srq_rx_bufs; 929 mem_attr.mr_len = rc_rx_bufs_sz; 930 mem_attr.mr_as = NULL; 931 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 932 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 933 &state->rc_srq_rx_mr_hdl, &state->rc_srq_rx_mr_desc) 934 != IBT_SUCCESS) { 935 DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr() " 936 "failed"); 937 kmem_free(state->rc_srq_rwqes, 938 state->rc_srq_size * sizeof (ibd_rwqe_t)); 939 kmem_free(state->rc_srq_rx_bufs, rc_rx_bufs_sz); 940 state->rc_srq_rx_bufs = NULL; 941 state->rc_srq_rwqes = NULL; 942 return (DDI_FAILURE); 943 } 944 945 return (DDI_SUCCESS); 946 } 947 948 static void 949 ibd_rc_free_srq_copybufs(ibd_state_t *state) 950 { 951 uint_t rc_rx_buf_sz; 952 953 /* 954 * Don't change the value of state->rc_mtu at the period from call 955 * ibd_rc_alloc_srq_copybufs() to call ibd_rc_free_srq_copybufs(). 956 */ 957 rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE; 958 959 /* 960 * Unregister rxbuf mr 961 */ 962 if (ibt_deregister_mr(state->id_hca_hdl, 963 state->rc_srq_rx_mr_hdl) != IBT_SUCCESS) { 964 DPRINT(40, "ibd_rc_free_srq_copybufs: ibt_deregister_mr()" 965 " failed"); 966 } 967 state->rc_srq_rx_mr_hdl = NULL; 968 969 /* 970 * Free rxbuf memory 971 */ 972 kmem_free(state->rc_srq_rwqes, 973 state->rc_srq_size * sizeof (ibd_rwqe_t)); 974 kmem_free(state->rc_srq_rx_bufs, state->rc_srq_size * rc_rx_buf_sz); 975 state->rc_srq_rwqes = NULL; 976 state->rc_srq_rx_bufs = NULL; 977 } 978 979 /* 980 * Allocate and post a certain number of SRQ receive buffers and WRs. 981 */ 982 int 983 ibd_rc_init_srq_list(ibd_state_t *state) 984 { 985 ibd_rwqe_t *rwqe; 986 ibt_lkey_t lkey; 987 int i; 988 uint_t len; 989 uint8_t *bufaddr; 990 ibt_srq_sizes_t srq_sizes; 991 ibt_srq_sizes_t srq_real_sizes; 992 ibt_status_t ret; 993 994 srq_sizes.srq_sgl_sz = 1; 995 srq_sizes.srq_wr_sz = state->id_rc_num_srq; 996 ret = ibt_alloc_srq(state->id_hca_hdl, IBT_SRQ_NO_FLAGS, 997 state->id_pd_hdl, &srq_sizes, &state->rc_srq_hdl, &srq_real_sizes); 998 if (ret != IBT_SUCCESS) { 999 /* 1000 * The following code is for CR 6932460 (can't configure ibd 1001 * interface on 32 bits x86 systems). 32 bits x86 system has 1002 * less memory resource than 64 bits x86 system. If current 1003 * resource request can't be satisfied, we request less 1004 * resource here. 1005 */ 1006 len = state->id_rc_num_srq; 1007 while ((ret == IBT_HCA_WR_EXCEEDED) && 1008 (len >= 2 * IBD_RC_MIN_CQ_SIZE)) { 1009 len = len/2; 1010 srq_sizes.srq_sgl_sz = 1; 1011 srq_sizes.srq_wr_sz = len; 1012 ret = ibt_alloc_srq(state->id_hca_hdl, 1013 IBT_SRQ_NO_FLAGS, state->id_pd_hdl, &srq_sizes, 1014 &state->rc_srq_hdl, &srq_real_sizes); 1015 } 1016 if (ret != IBT_SUCCESS) { 1017 DPRINT(10, "ibd_rc_init_srq_list: ibt_alloc_srq failed." 1018 "req_sgl_sz=%d, req_wr_sz=0x%x, final_req_wr_sz=" 1019 "0x%x, ret=%d", srq_sizes.srq_sgl_sz, 1020 srq_sizes.srq_wr_sz, len, ret); 1021 return (DDI_FAILURE); 1022 } 1023 state->id_rc_num_srq = len; 1024 state->id_rc_num_rwqe = state->id_rc_num_srq + 1; 1025 } 1026 1027 state->rc_srq_size = srq_real_sizes.srq_wr_sz; 1028 if (ibd_rc_alloc_srq_copybufs(state) != DDI_SUCCESS) { 1029 ret = ibt_free_srq(state->rc_srq_hdl); 1030 if (ret != IBT_SUCCESS) { 1031 ibd_print_warn(state, "ibd_rc_init_srq_list: " 1032 "ibt_free_srq fail, ret=%d", ret); 1033 } 1034 return (DDI_FAILURE); 1035 } 1036 1037 /* 1038 * Allocate and setup the rwqe list 1039 */ 1040 lkey = state->rc_srq_rx_mr_desc.md_lkey; 1041 rwqe = state->rc_srq_rwqes; 1042 bufaddr = state->rc_srq_rx_bufs; 1043 len = state->rc_mtu + IPOIB_GRH_SIZE; 1044 state->rc_srq_rwqe_list.dl_cnt = 0; 1045 state->rc_srq_rwqe_list.dl_bufs_outstanding = 0; 1046 for (i = 0; i < state->rc_srq_size; i++, rwqe++, bufaddr += len) { 1047 rwqe->w_state = state; 1048 rwqe->w_freeing_wqe = B_FALSE; 1049 rwqe->w_freemsg_cb.free_func = ibd_rc_srq_freemsg_cb; 1050 rwqe->w_freemsg_cb.free_arg = (char *)rwqe; 1051 rwqe->rwqe_copybuf.ic_bufaddr = bufaddr; 1052 1053 if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0, 1054 &rwqe->w_freemsg_cb)) == NULL) { 1055 DPRINT(40, "ibd_rc_init_srq_list : desballoc() failed"); 1056 rwqe->rwqe_copybuf.ic_bufaddr = NULL; 1057 if (atomic_dec_32_nv(&state->id_running) != 0) { 1058 cmn_err(CE_WARN, "ibd_rc_init_srq_list: " 1059 "id_running was not 1\n"); 1060 } 1061 ibd_rc_fini_srq_list(state); 1062 atomic_inc_32(&state->id_running); 1063 return (DDI_FAILURE); 1064 } 1065 1066 rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey; 1067 /* Leave IPOIB_GRH_SIZE space */ 1068 rwqe->rwqe_copybuf.ic_sgl.ds_va = 1069 (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE); 1070 rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu; 1071 rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe; 1072 rwqe->w_rwr.wr_nds = 1; 1073 rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl; 1074 (void) ibd_rc_post_srq(state, rwqe); 1075 } 1076 1077 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1078 state->rc_srq_free_list.dl_head = NULL; 1079 state->rc_srq_free_list.dl_cnt = 0; 1080 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1081 1082 return (DDI_SUCCESS); 1083 } 1084 1085 /* 1086 * Free the statically allocated Rx buffer list for SRQ. 1087 */ 1088 void 1089 ibd_rc_fini_srq_list(ibd_state_t *state) 1090 { 1091 ibd_rwqe_t *rwqe; 1092 int i; 1093 ibt_status_t ret; 1094 1095 ASSERT(state->id_running == 0); 1096 ret = ibt_free_srq(state->rc_srq_hdl); 1097 if (ret != IBT_SUCCESS) { 1098 ibd_print_warn(state, "ibd_rc_fini_srq_list: " 1099 "ibt_free_srq fail, ret=%d", ret); 1100 } 1101 1102 mutex_enter(&state->rc_srq_rwqe_list.dl_mutex); 1103 rwqe = state->rc_srq_rwqes; 1104 for (i = 0; i < state->rc_srq_size; i++, rwqe++) { 1105 if (rwqe->rwqe_im_mblk != NULL) { 1106 rwqe->w_freeing_wqe = B_TRUE; 1107 freemsg(rwqe->rwqe_im_mblk); 1108 } 1109 } 1110 mutex_exit(&state->rc_srq_rwqe_list.dl_mutex); 1111 1112 ibd_rc_free_srq_copybufs(state); 1113 } 1114 1115 /* Repost the elements in state->ib_rc_free_list */ 1116 int 1117 ibd_rc_repost_srq_free_list(ibd_state_t *state) 1118 { 1119 ibd_rwqe_t *rwqe; 1120 ibd_wqe_t *list; 1121 uint_t len; 1122 1123 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1124 if (state->rc_srq_free_list.dl_head != NULL) { 1125 /* repost them */ 1126 len = state->rc_mtu + IPOIB_GRH_SIZE; 1127 list = state->rc_srq_free_list.dl_head; 1128 state->rc_srq_free_list.dl_head = NULL; 1129 state->rc_srq_free_list.dl_cnt = 0; 1130 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1131 while (list != NULL) { 1132 rwqe = WQE_TO_RWQE(list); 1133 if ((rwqe->rwqe_im_mblk == NULL) && 1134 ((rwqe->rwqe_im_mblk = desballoc( 1135 rwqe->rwqe_copybuf.ic_bufaddr, len, 0, 1136 &rwqe->w_freemsg_cb)) == NULL)) { 1137 DPRINT(40, "ibd_rc_repost_srq_free_list: " 1138 "failed in desballoc()"); 1139 do { 1140 ibd_rc_srq_free_rwqe(state, rwqe); 1141 list = list->w_next; 1142 rwqe = WQE_TO_RWQE(list); 1143 } while (list != NULL); 1144 return (DDI_FAILURE); 1145 } 1146 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1147 ibd_rc_srq_free_rwqe(state, rwqe); 1148 } 1149 list = list->w_next; 1150 } 1151 return (DDI_SUCCESS); 1152 } 1153 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1154 return (DDI_SUCCESS); 1155 } 1156 1157 /* 1158 * Free an allocated recv wqe. 1159 */ 1160 static void 1161 ibd_rc_srq_free_rwqe(ibd_state_t *state, ibd_rwqe_t *rwqe) 1162 { 1163 /* 1164 * desballoc() failed (no memory) or the posting of rwqe failed. 1165 * 1166 * This rwqe is placed on a free list so that it 1167 * can be reinstated in future. 1168 * 1169 * NOTE: no code currently exists to reinstate 1170 * these "lost" rwqes. 1171 */ 1172 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1173 state->rc_srq_free_list.dl_cnt++; 1174 rwqe->rwqe_next = state->rc_srq_free_list.dl_head; 1175 state->rc_srq_free_list.dl_head = RWQE_TO_WQE(rwqe); 1176 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1177 } 1178 1179 static void 1180 ibd_rc_srq_freemsg_cb(char *arg) 1181 { 1182 ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg; 1183 ibd_state_t *state = rwqe->w_state; 1184 1185 ASSERT(state->rc_enable_srq); 1186 1187 /* 1188 * If the driver is stopped, just free the rwqe. 1189 */ 1190 if (atomic_add_32_nv(&state->id_running, 0) == 0) { 1191 if (!rwqe->w_freeing_wqe) { 1192 atomic_dec_32( 1193 &state->rc_srq_rwqe_list.dl_bufs_outstanding); 1194 DPRINT(6, "ibd_rc_srq_freemsg_cb: wqe being freed"); 1195 rwqe->rwqe_im_mblk = NULL; 1196 ibd_rc_srq_free_rwqe(state, rwqe); 1197 } 1198 return; 1199 } 1200 1201 atomic_dec_32(&state->rc_srq_rwqe_list.dl_bufs_outstanding); 1202 1203 ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size); 1204 ASSERT(!rwqe->w_freeing_wqe); 1205 1206 /* 1207 * Upper layer has released held mblk, so we have 1208 * no more use for keeping the old pointer in 1209 * our rwqe. 1210 */ 1211 rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr, 1212 state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb); 1213 if (rwqe->rwqe_im_mblk == NULL) { 1214 DPRINT(40, "ibd_rc_srq_freemsg_cb: desballoc failed"); 1215 ibd_rc_srq_free_rwqe(state, rwqe); 1216 return; 1217 } 1218 1219 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1220 ibd_print_warn(state, "ibd_rc_srq_freemsg_cb: ibd_rc_post_srq" 1221 " failed"); 1222 ibd_rc_srq_free_rwqe(state, rwqe); 1223 return; 1224 } 1225 } 1226 1227 /* 1228 * Post a rwqe to the hardware and add it to the Rx list. 1229 */ 1230 static int 1231 ibd_rc_post_srq(ibd_state_t *state, ibd_rwqe_t *rwqe) 1232 { 1233 /* 1234 * Here we should add dl_cnt before post recv, because 1235 * we would have to make sure dl_cnt is updated before 1236 * the corresponding ibd_rc_process_rx() is called. 1237 */ 1238 ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size); 1239 atomic_inc_32(&state->rc_srq_rwqe_list.dl_cnt); 1240 if (ibt_post_srq(state->rc_srq_hdl, &rwqe->w_rwr, 1, NULL) != 1241 IBT_SUCCESS) { 1242 atomic_dec_32(&state->rc_srq_rwqe_list.dl_cnt); 1243 DPRINT(40, "ibd_rc_post_srq : ibt_post_srq() failed"); 1244 return (DDI_FAILURE); 1245 } 1246 1247 return (DDI_SUCCESS); 1248 } 1249 1250 /* 1251 * Post a rwqe to the hardware and add it to the Rx list. 1252 */ 1253 static int 1254 ibd_rc_post_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe) 1255 { 1256 /* 1257 * Here we should add dl_cnt before post recv, because we would 1258 * have to make sure dl_cnt has already updated before 1259 * corresponding ibd_rc_process_rx() is called. 1260 */ 1261 atomic_inc_32(&chan->rx_wqe_list.dl_cnt); 1262 if (ibt_post_recv(chan->chan_hdl, &rwqe->w_rwr, 1, NULL) != 1263 IBT_SUCCESS) { 1264 atomic_dec_32(&chan->rx_wqe_list.dl_cnt); 1265 DPRINT(40, "ibd_rc_post_rwqe : failed in ibt_post_recv()"); 1266 return (DDI_FAILURE); 1267 } 1268 return (DDI_SUCCESS); 1269 } 1270 1271 static int 1272 ibd_rc_alloc_rx_copybufs(ibd_rc_chan_t *chan) 1273 { 1274 ibd_state_t *state = chan->state; 1275 ibt_mr_attr_t mem_attr; 1276 uint_t rc_rx_bufs_sz; 1277 1278 /* 1279 * Allocate one big chunk for all regular rx copy bufs 1280 */ 1281 rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * chan->rcq_size; 1282 1283 chan->rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP); 1284 1285 chan->rx_rwqes = kmem_zalloc(chan->rcq_size * 1286 sizeof (ibd_rwqe_t), KM_SLEEP); 1287 1288 /* 1289 * Do one memory registration on the entire rxbuf area 1290 */ 1291 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->rx_bufs; 1292 mem_attr.mr_len = rc_rx_bufs_sz; 1293 mem_attr.mr_as = NULL; 1294 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 1295 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1296 &chan->rx_mr_hdl, &chan->rx_mr_desc) != IBT_SUCCESS) { 1297 DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr failed"); 1298 kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t)); 1299 kmem_free(chan->rx_bufs, rc_rx_bufs_sz); 1300 chan->rx_bufs = NULL; 1301 chan->rx_rwqes = NULL; 1302 return (DDI_FAILURE); 1303 } 1304 1305 return (DDI_SUCCESS); 1306 } 1307 1308 static void 1309 ibd_rc_free_rx_copybufs(ibd_rc_chan_t *chan) 1310 { 1311 ibd_state_t *state = chan->state; 1312 uint_t rc_rx_buf_sz; 1313 1314 ASSERT(!state->rc_enable_srq); 1315 ASSERT(chan->rx_rwqes != NULL); 1316 ASSERT(chan->rx_bufs != NULL); 1317 1318 /* 1319 * Don't change the value of state->rc_mtu at the period from call 1320 * ibd_rc_alloc_rx_copybufs() to call ibd_rc_free_rx_copybufs(). 1321 */ 1322 rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE; 1323 1324 /* 1325 * Unregister rxbuf mr 1326 */ 1327 if (ibt_deregister_mr(state->id_hca_hdl, 1328 chan->rx_mr_hdl) != IBT_SUCCESS) { 1329 DPRINT(40, "ibd_rc_free_rx_copybufs: ibt_deregister_mr failed"); 1330 } 1331 chan->rx_mr_hdl = NULL; 1332 1333 /* 1334 * Free rxbuf memory 1335 */ 1336 kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t)); 1337 chan->rx_rwqes = NULL; 1338 1339 kmem_free(chan->rx_bufs, chan->rcq_size * rc_rx_buf_sz); 1340 chan->rx_bufs = NULL; 1341 } 1342 1343 /* 1344 * Post a certain number of receive buffers and WRs on a RC channel. 1345 */ 1346 static int 1347 ibd_rc_init_rxlist(ibd_rc_chan_t *chan) 1348 { 1349 ibd_state_t *state = chan->state; 1350 ibd_rwqe_t *rwqe; 1351 ibt_lkey_t lkey; 1352 int i; 1353 uint_t len; 1354 uint8_t *bufaddr; 1355 1356 ASSERT(!state->rc_enable_srq); 1357 if (ibd_rc_alloc_rx_copybufs(chan) != DDI_SUCCESS) 1358 return (DDI_FAILURE); 1359 1360 /* 1361 * Allocate and setup the rwqe list 1362 */ 1363 lkey = chan->rx_mr_desc.md_lkey; 1364 rwqe = chan->rx_rwqes; 1365 bufaddr = chan->rx_bufs; 1366 len = state->rc_mtu + IPOIB_GRH_SIZE; 1367 for (i = 0; i < chan->rcq_size; i++, rwqe++, bufaddr += len) { 1368 rwqe->w_state = state; 1369 rwqe->w_chan = chan; 1370 rwqe->w_freeing_wqe = B_FALSE; 1371 rwqe->w_freemsg_cb.free_func = ibd_rc_freemsg_cb; 1372 rwqe->w_freemsg_cb.free_arg = (char *)rwqe; 1373 rwqe->rwqe_copybuf.ic_bufaddr = bufaddr; 1374 1375 if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0, 1376 &rwqe->w_freemsg_cb)) == NULL) { 1377 DPRINT(40, "ibd_rc_init_srq_list: desballoc() failed"); 1378 rwqe->rwqe_copybuf.ic_bufaddr = NULL; 1379 ibd_rc_fini_rxlist(chan); 1380 return (DDI_FAILURE); 1381 } 1382 1383 rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey; 1384 rwqe->rwqe_copybuf.ic_sgl.ds_va = 1385 (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE); 1386 rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu; 1387 rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe; 1388 rwqe->w_rwr.wr_nds = 1; 1389 rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl; 1390 (void) ibd_rc_post_rwqe(chan, rwqe); 1391 } 1392 1393 return (DDI_SUCCESS); 1394 } 1395 1396 /* 1397 * Free the statically allocated Rx buffer list for SRQ. 1398 */ 1399 static void 1400 ibd_rc_fini_rxlist(ibd_rc_chan_t *chan) 1401 { 1402 ibd_rwqe_t *rwqe; 1403 int i; 1404 1405 if (chan->rx_bufs == NULL) { 1406 DPRINT(40, "ibd_rc_fini_rxlist: empty chan->rx_bufs, quit"); 1407 return; 1408 } 1409 1410 /* bufs_outstanding must be 0 */ 1411 ASSERT((chan->rx_wqe_list.dl_head == NULL) || 1412 (chan->rx_wqe_list.dl_bufs_outstanding == 0)); 1413 1414 mutex_enter(&chan->rx_wqe_list.dl_mutex); 1415 rwqe = chan->rx_rwqes; 1416 for (i = 0; i < chan->rcq_size; i++, rwqe++) { 1417 if (rwqe->rwqe_im_mblk != NULL) { 1418 rwqe->w_freeing_wqe = B_TRUE; 1419 freemsg(rwqe->rwqe_im_mblk); 1420 } 1421 } 1422 mutex_exit(&chan->rx_wqe_list.dl_mutex); 1423 1424 ibd_rc_free_rx_copybufs(chan); 1425 } 1426 1427 /* 1428 * Free an allocated recv wqe. 1429 */ 1430 static void 1431 ibd_rc_free_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe) 1432 { 1433 /* 1434 * desballoc() failed (no memory) or the posting of rwqe failed. 1435 * 1436 * This rwqe is placed on a free list so that it 1437 * can be reinstated in future. 1438 * 1439 * NOTE: no code currently exists to reinstate 1440 * these "lost" rwqes. 1441 */ 1442 mutex_enter(&chan->rx_free_list.dl_mutex); 1443 chan->rx_free_list.dl_cnt++; 1444 rwqe->rwqe_next = chan->rx_free_list.dl_head; 1445 chan->rx_free_list.dl_head = RWQE_TO_WQE(rwqe); 1446 mutex_exit(&chan->rx_free_list.dl_mutex); 1447 } 1448 1449 /* 1450 * Processing to be done after receipt of a packet; hand off to GLD 1451 * in the format expected by GLD. 1452 */ 1453 static void 1454 ibd_rc_process_rx(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe, ibt_wc_t *wc) 1455 { 1456 ibd_state_t *state = chan->state; 1457 ib_header_info_t *phdr; 1458 ipoib_hdr_t *ipibp; 1459 mblk_t *mp; 1460 mblk_t *mpc; 1461 int rxcnt; 1462 ip6_t *ip6h; 1463 int len; 1464 1465 /* 1466 * Track number handed to upper layer, and number still 1467 * available to receive packets. 1468 */ 1469 if (state->rc_enable_srq) { 1470 rxcnt = atomic_dec_32_nv(&state->rc_srq_rwqe_list.dl_cnt); 1471 } else { 1472 rxcnt = atomic_dec_32_nv(&chan->rx_wqe_list.dl_cnt); 1473 } 1474 1475 /* 1476 * It can not be a IBA multicast packet. 1477 */ 1478 ASSERT(!wc->wc_flags & IBT_WC_GRH_PRESENT); 1479 1480 /* For the connection reaper routine ibd_rc_conn_timeout_call() */ 1481 chan->is_used = B_TRUE; 1482 1483 #ifdef DEBUG 1484 if (rxcnt < state->id_rc_rx_rwqe_thresh) { 1485 state->rc_rwqe_short++; 1486 } 1487 #endif 1488 1489 /* 1490 * Possibly replenish the Rx pool if needed. 1491 */ 1492 if ((rxcnt >= state->id_rc_rx_rwqe_thresh) && 1493 (wc->wc_bytes_xfer > state->id_rc_rx_copy_thresh)) { 1494 atomic_add_64(&state->rc_rcv_trans_byte, wc->wc_bytes_xfer); 1495 atomic_inc_64(&state->rc_rcv_trans_pkt); 1496 1497 /* 1498 * Record how many rwqe has been occupied by upper 1499 * network layer 1500 */ 1501 if (state->rc_enable_srq) { 1502 atomic_inc_32( 1503 &state->rc_srq_rwqe_list.dl_bufs_outstanding); 1504 } else { 1505 atomic_inc_32(&chan->rx_wqe_list.dl_bufs_outstanding); 1506 } 1507 mp = rwqe->rwqe_im_mblk; 1508 } else { 1509 atomic_add_64(&state->rc_rcv_copy_byte, wc->wc_bytes_xfer); 1510 atomic_inc_64(&state->rc_rcv_copy_pkt); 1511 1512 if ((mp = allocb(wc->wc_bytes_xfer + IPOIB_GRH_SIZE, 1513 BPRI_HI)) == NULL) { /* no memory */ 1514 DPRINT(40, "ibd_rc_process_rx: allocb() failed"); 1515 state->rc_rcv_alloc_fail++; 1516 if (state->rc_enable_srq) { 1517 if (ibd_rc_post_srq(state, rwqe) == 1518 DDI_FAILURE) { 1519 ibd_rc_srq_free_rwqe(state, rwqe); 1520 } 1521 } else { 1522 if (ibd_rc_post_rwqe(chan, rwqe) == 1523 DDI_FAILURE) { 1524 ibd_rc_free_rwqe(chan, rwqe); 1525 } 1526 } 1527 return; 1528 } 1529 1530 bcopy(rwqe->rwqe_im_mblk->b_rptr + IPOIB_GRH_SIZE, 1531 mp->b_wptr + IPOIB_GRH_SIZE, wc->wc_bytes_xfer); 1532 1533 if (state->rc_enable_srq) { 1534 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1535 ibd_rc_srq_free_rwqe(state, rwqe); 1536 } 1537 } else { 1538 if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) { 1539 ibd_rc_free_rwqe(chan, rwqe); 1540 } 1541 } 1542 } 1543 1544 ipibp = (ipoib_hdr_t *)((uchar_t *)mp->b_rptr + IPOIB_GRH_SIZE); 1545 if (ntohs(ipibp->ipoib_type) == ETHERTYPE_IPV6) { 1546 ip6h = (ip6_t *)((uchar_t *)ipibp + sizeof (ipoib_hdr_t)); 1547 len = ntohs(ip6h->ip6_plen); 1548 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1549 /* LINTED: E_CONSTANT_CONDITION */ 1550 IBD_PAD_NSNA(ip6h, len, IBD_RECV); 1551 } 1552 } 1553 1554 phdr = (ib_header_info_t *)mp->b_rptr; 1555 phdr->ib_grh.ipoib_vertcflow = 0; 1556 ovbcopy(&state->id_macaddr, &phdr->ib_dst, 1557 sizeof (ipoib_mac_t)); 1558 mp->b_wptr = mp->b_rptr + wc->wc_bytes_xfer+ IPOIB_GRH_SIZE; 1559 1560 /* 1561 * Can RC mode in IB guarantee its checksum correctness? 1562 * 1563 * (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 1564 * HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); 1565 */ 1566 1567 /* 1568 * Make sure this is NULL or we're in trouble. 1569 */ 1570 if (mp->b_next != NULL) { 1571 ibd_print_warn(state, 1572 "ibd_rc_process_rx: got duplicate mp from rcq?"); 1573 mp->b_next = NULL; 1574 } 1575 1576 /* 1577 * Add this mp to the list of processed mp's to send to 1578 * the nw layer 1579 */ 1580 if (state->rc_enable_srq) { 1581 mutex_enter(&state->rc_rx_lock); 1582 if (state->rc_rx_mp) { 1583 ASSERT(state->rc_rx_mp_tail != NULL); 1584 state->rc_rx_mp_tail->b_next = mp; 1585 } else { 1586 ASSERT(state->rc_rx_mp_tail == NULL); 1587 state->rc_rx_mp = mp; 1588 } 1589 1590 state->rc_rx_mp_tail = mp; 1591 state->rc_rx_mp_len++; 1592 1593 if (state->rc_rx_mp_len >= IBD_MAX_RX_MP_LEN) { 1594 mpc = state->rc_rx_mp; 1595 1596 state->rc_rx_mp = NULL; 1597 state->rc_rx_mp_tail = NULL; 1598 state->rc_rx_mp_len = 0; 1599 mutex_exit(&state->rc_rx_lock); 1600 mac_rx(state->id_mh, NULL, mpc); 1601 } else { 1602 mutex_exit(&state->rc_rx_lock); 1603 } 1604 } else { 1605 mutex_enter(&chan->rx_lock); 1606 if (chan->rx_mp) { 1607 ASSERT(chan->rx_mp_tail != NULL); 1608 chan->rx_mp_tail->b_next = mp; 1609 } else { 1610 ASSERT(chan->rx_mp_tail == NULL); 1611 chan->rx_mp = mp; 1612 } 1613 1614 chan->rx_mp_tail = mp; 1615 chan->rx_mp_len++; 1616 1617 if (chan->rx_mp_len >= IBD_MAX_RX_MP_LEN) { 1618 mpc = chan->rx_mp; 1619 1620 chan->rx_mp = NULL; 1621 chan->rx_mp_tail = NULL; 1622 chan->rx_mp_len = 0; 1623 mutex_exit(&chan->rx_lock); 1624 mac_rx(state->id_mh, NULL, mpc); 1625 } else { 1626 mutex_exit(&chan->rx_lock); 1627 } 1628 } 1629 } 1630 1631 /* 1632 * Callback code invoked from STREAMs when the recv data buffer is free 1633 * for recycling. 1634 */ 1635 static void 1636 ibd_rc_freemsg_cb(char *arg) 1637 { 1638 ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg; 1639 ibd_rc_chan_t *chan = rwqe->w_chan; 1640 ibd_state_t *state = rwqe->w_state; 1641 1642 /* 1643 * If the wqe is being destructed, do not attempt recycling. 1644 */ 1645 if (rwqe->w_freeing_wqe == B_TRUE) { 1646 return; 1647 } 1648 1649 ASSERT(!state->rc_enable_srq); 1650 ASSERT(chan->rx_wqe_list.dl_cnt < chan->rcq_size); 1651 1652 rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr, 1653 state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb); 1654 if (rwqe->rwqe_im_mblk == NULL) { 1655 DPRINT(40, "ibd_rc_freemsg_cb: desballoc() failed"); 1656 ibd_rc_free_rwqe(chan, rwqe); 1657 return; 1658 } 1659 1660 /* 1661 * Post back to h/w. We could actually have more than 1662 * id_num_rwqe WQEs on the list if there were multiple 1663 * ibd_freemsg_cb() calls outstanding (since the lock is 1664 * not held the entire time). This will start getting 1665 * corrected over subsequent ibd_freemsg_cb() calls. 1666 */ 1667 if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) { 1668 ibd_rc_free_rwqe(chan, rwqe); 1669 return; 1670 } 1671 atomic_dec_32(&chan->rx_wqe_list.dl_bufs_outstanding); 1672 } 1673 1674 /* 1675 * Common code for interrupt handling as well as for polling 1676 * for all completed wqe's while detaching. 1677 */ 1678 static void 1679 ibd_rc_poll_rcq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl) 1680 { 1681 ibd_wqe_t *wqe; 1682 ibt_wc_t *wc, *wcs; 1683 uint_t numwcs, real_numwcs; 1684 int i; 1685 1686 wcs = chan->rx_wc; 1687 numwcs = IBD_RC_MAX_CQ_WC; 1688 1689 while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) { 1690 for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) { 1691 wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id; 1692 if (wc->wc_status != IBT_WC_SUCCESS) { 1693 chan->state->rc_rcq_err++; 1694 /* 1695 * Channel being torn down. 1696 */ 1697 DPRINT(40, "ibd_rc_poll_rcq: wc_status(%d) != " 1698 "SUCC, chan=%p", wc->wc_status, chan); 1699 if (wc->wc_status == IBT_WC_WR_FLUSHED_ERR) { 1700 /* 1701 * Do not invoke Rx handler because 1702 * it might add buffers to the Rx pool 1703 * when we are trying to deinitialize. 1704 */ 1705 continue; 1706 } 1707 } 1708 ibd_rc_process_rx(chan, WQE_TO_RWQE(wqe), wc); 1709 } 1710 } 1711 } 1712 1713 /* Receive CQ handler */ 1714 /* ARGSUSED */ 1715 static void 1716 ibd_rc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 1717 { 1718 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 1719 ibd_state_t *state = chan->state; 1720 1721 atomic_inc_32(&chan->rcq_invoking); 1722 ASSERT(chan->chan_state == IBD_RC_STATE_PAS_ESTAB); 1723 1724 /* 1725 * Poll for completed entries; the CQ will not interrupt any 1726 * more for incoming (or transmitted) packets. 1727 */ 1728 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 1729 1730 /* 1731 * Now enable CQ notifications; all packets that arrive now 1732 * (or complete transmission) will cause new interrupts. 1733 */ 1734 if (ibt_enable_cq_notify(chan->rcq_hdl, IBT_NEXT_COMPLETION) != 1735 IBT_SUCCESS) { 1736 /* 1737 * We do not expect a failure here. 1738 */ 1739 DPRINT(40, "ibd_rc_rcq_handler: ibt_enable_cq_notify() failed"); 1740 } 1741 1742 /* 1743 * Repoll to catch all packets that might have arrived after 1744 * we finished the first poll loop and before interrupts got 1745 * armed. 1746 */ 1747 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 1748 1749 if (state->rc_enable_srq) { 1750 mutex_enter(&state->rc_rx_lock); 1751 1752 if (state->rc_rx_mp != NULL) { 1753 mblk_t *mpc; 1754 mpc = state->rc_rx_mp; 1755 1756 state->rc_rx_mp = NULL; 1757 state->rc_rx_mp_tail = NULL; 1758 state->rc_rx_mp_len = 0; 1759 1760 mutex_exit(&state->rc_rx_lock); 1761 mac_rx(state->id_mh, NULL, mpc); 1762 } else { 1763 mutex_exit(&state->rc_rx_lock); 1764 } 1765 } else { 1766 mutex_enter(&chan->rx_lock); 1767 1768 if (chan->rx_mp != NULL) { 1769 mblk_t *mpc; 1770 mpc = chan->rx_mp; 1771 1772 chan->rx_mp = NULL; 1773 chan->rx_mp_tail = NULL; 1774 chan->rx_mp_len = 0; 1775 1776 mutex_exit(&chan->rx_lock); 1777 mac_rx(state->id_mh, NULL, mpc); 1778 } else { 1779 mutex_exit(&chan->rx_lock); 1780 } 1781 } 1782 atomic_dec_32(&chan->rcq_invoking); 1783 } 1784 1785 /* 1786 * Allocate the statically allocated Tx buffer list. 1787 */ 1788 int 1789 ibd_rc_init_tx_largebuf_list(ibd_state_t *state) 1790 { 1791 ibd_rc_tx_largebuf_t *lbufp; 1792 ibd_rc_tx_largebuf_t *tail; 1793 uint8_t *memp; 1794 ibt_mr_attr_t mem_attr; 1795 uint32_t num_swqe; 1796 size_t mem_size; 1797 int i; 1798 1799 num_swqe = state->id_rc_num_swqe - 1; 1800 1801 /* 1802 * Allocate one big chunk for all Tx large copy bufs 1803 */ 1804 /* Don't transfer IPOIB_GRH_SIZE bytes (40 bytes) */ 1805 mem_size = num_swqe * state->rc_mtu; 1806 state->rc_tx_mr_bufs = kmem_zalloc(mem_size, KM_SLEEP); 1807 1808 mem_attr.mr_len = mem_size; 1809 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_tx_mr_bufs; 1810 mem_attr.mr_as = NULL; 1811 mem_attr.mr_flags = IBT_MR_SLEEP; 1812 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1813 &state->rc_tx_mr_hdl, &state->rc_tx_mr_desc) != IBT_SUCCESS) { 1814 DPRINT(40, "ibd_rc_init_tx_largebuf_list: ibt_register_mr " 1815 "failed"); 1816 kmem_free(state->rc_tx_mr_bufs, mem_size); 1817 state->rc_tx_mr_bufs = NULL; 1818 return (DDI_FAILURE); 1819 } 1820 1821 state->rc_tx_largebuf_desc_base = kmem_zalloc(num_swqe * 1822 sizeof (ibd_rc_tx_largebuf_t), KM_SLEEP); 1823 1824 /* 1825 * Set up the buf chain 1826 */ 1827 memp = state->rc_tx_mr_bufs; 1828 mutex_enter(&state->rc_tx_large_bufs_lock); 1829 lbufp = state->rc_tx_largebuf_desc_base; 1830 for (i = 0; i < num_swqe; i++) { 1831 lbufp->lb_buf = memp; 1832 lbufp->lb_next = lbufp + 1; 1833 1834 tail = lbufp; 1835 1836 memp += state->rc_mtu; 1837 lbufp++; 1838 } 1839 tail->lb_next = NULL; 1840 1841 /* 1842 * Set up the buffer information in ibd state 1843 */ 1844 state->rc_tx_largebuf_free_head = state->rc_tx_largebuf_desc_base; 1845 state->rc_tx_largebuf_nfree = num_swqe; 1846 mutex_exit(&state->rc_tx_large_bufs_lock); 1847 return (DDI_SUCCESS); 1848 } 1849 1850 void 1851 ibd_rc_fini_tx_largebuf_list(ibd_state_t *state) 1852 { 1853 uint32_t num_swqe; 1854 1855 num_swqe = state->id_rc_num_swqe - 1; 1856 1857 if (ibt_deregister_mr(state->id_hca_hdl, 1858 state->rc_tx_mr_hdl) != IBT_SUCCESS) { 1859 DPRINT(40, "ibd_rc_fini_tx_largebuf_list: ibt_deregister_mr() " 1860 "failed"); 1861 } 1862 state->rc_tx_mr_hdl = NULL; 1863 1864 kmem_free(state->rc_tx_mr_bufs, num_swqe * state->rc_mtu); 1865 state->rc_tx_mr_bufs = NULL; 1866 1867 kmem_free(state->rc_tx_largebuf_desc_base, 1868 num_swqe * sizeof (ibd_rc_tx_largebuf_t)); 1869 state->rc_tx_largebuf_desc_base = NULL; 1870 } 1871 1872 static int 1873 ibd_rc_alloc_tx_copybufs(ibd_rc_chan_t *chan) 1874 { 1875 ibt_mr_attr_t mem_attr; 1876 ibd_state_t *state; 1877 1878 state = chan->state; 1879 ASSERT(state != NULL); 1880 1881 /* 1882 * Allocate one big chunk for all regular tx copy bufs 1883 */ 1884 mem_attr.mr_len = chan->scq_size * state->id_rc_tx_copy_thresh; 1885 1886 chan->tx_mr_bufs = kmem_zalloc(mem_attr.mr_len, KM_SLEEP); 1887 1888 /* 1889 * Do one memory registration on the entire txbuf area 1890 */ 1891 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->tx_mr_bufs; 1892 mem_attr.mr_as = NULL; 1893 mem_attr.mr_flags = IBT_MR_SLEEP; 1894 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1895 &chan->tx_mr_hdl, &chan->tx_mr_desc) != IBT_SUCCESS) { 1896 DPRINT(40, "ibd_rc_alloc_tx_copybufs: ibt_register_mr failed"); 1897 ASSERT(mem_attr.mr_len == 1898 chan->scq_size * state->id_rc_tx_copy_thresh); 1899 kmem_free(chan->tx_mr_bufs, mem_attr.mr_len); 1900 chan->tx_mr_bufs = NULL; 1901 return (DDI_FAILURE); 1902 } 1903 1904 return (DDI_SUCCESS); 1905 } 1906 1907 /* 1908 * Allocate the statically allocated Tx buffer list. 1909 */ 1910 static int 1911 ibd_rc_init_txlist(ibd_rc_chan_t *chan) 1912 { 1913 ibd_swqe_t *swqe; 1914 int i; 1915 ibt_lkey_t lkey; 1916 ibd_state_t *state = chan->state; 1917 1918 if (ibd_rc_alloc_tx_copybufs(chan) != DDI_SUCCESS) 1919 return (DDI_FAILURE); 1920 1921 /* 1922 * Allocate and setup the swqe list 1923 */ 1924 lkey = chan->tx_mr_desc.md_lkey; 1925 chan->tx_wqes = kmem_zalloc(chan->scq_size * 1926 sizeof (ibd_swqe_t), KM_SLEEP); 1927 swqe = chan->tx_wqes; 1928 for (i = 0; i < chan->scq_size; i++, swqe++) { 1929 swqe->swqe_next = NULL; 1930 swqe->swqe_im_mblk = NULL; 1931 1932 swqe->swqe_copybuf.ic_sgl.ds_key = lkey; 1933 swqe->swqe_copybuf.ic_sgl.ds_len = 0; /* set in send */ 1934 1935 swqe->w_swr.wr_id = (ibt_wrid_t)(uintptr_t)swqe; 1936 swqe->w_swr.wr_flags = IBT_WR_SEND_SIGNAL; 1937 swqe->swqe_copybuf.ic_sgl.ds_va = (ib_vaddr_t)(uintptr_t) 1938 (chan->tx_mr_bufs + i * state->id_rc_tx_copy_thresh); 1939 swqe->w_swr.wr_trans = IBT_RC_SRV; 1940 1941 /* Add to list */ 1942 mutex_enter(&chan->tx_wqe_list.dl_mutex); 1943 chan->tx_wqe_list.dl_cnt++; 1944 swqe->swqe_next = chan->tx_wqe_list.dl_head; 1945 chan->tx_wqe_list.dl_head = SWQE_TO_WQE(swqe); 1946 mutex_exit(&chan->tx_wqe_list.dl_mutex); 1947 } 1948 1949 return (DDI_SUCCESS); 1950 } 1951 1952 /* 1953 * Free the statically allocated Tx buffer list. 1954 */ 1955 static void 1956 ibd_rc_fini_txlist(ibd_rc_chan_t *chan) 1957 { 1958 ibd_state_t *state = chan->state; 1959 if (chan->tx_mr_hdl != NULL) { 1960 if (ibt_deregister_mr(chan->state->id_hca_hdl, 1961 chan->tx_mr_hdl) != IBT_SUCCESS) { 1962 DPRINT(40, "ibd_rc_fini_txlist: ibt_deregister_mr " 1963 "failed"); 1964 } 1965 chan->tx_mr_hdl = NULL; 1966 } 1967 1968 if (chan->tx_mr_bufs != NULL) { 1969 kmem_free(chan->tx_mr_bufs, chan->scq_size * 1970 state->id_rc_tx_copy_thresh); 1971 chan->tx_mr_bufs = NULL; 1972 } 1973 1974 if (chan->tx_wqes != NULL) { 1975 kmem_free(chan->tx_wqes, chan->scq_size * 1976 sizeof (ibd_swqe_t)); 1977 chan->tx_wqes = NULL; 1978 } 1979 } 1980 1981 /* 1982 * Acquire send wqe from free list. 1983 * Returns error number and send wqe pointer. 1984 */ 1985 ibd_swqe_t * 1986 ibd_rc_acquire_swqes(ibd_rc_chan_t *chan) 1987 { 1988 ibd_swqe_t *wqe; 1989 1990 mutex_enter(&chan->tx_rel_list.dl_mutex); 1991 if (chan->tx_rel_list.dl_head != NULL) { 1992 /* transfer id_tx_rel_list to id_tx_list */ 1993 chan->tx_wqe_list.dl_head = 1994 chan->tx_rel_list.dl_head; 1995 chan->tx_wqe_list.dl_cnt = 1996 chan->tx_rel_list.dl_cnt; 1997 chan->tx_wqe_list.dl_pending_sends = B_FALSE; 1998 1999 /* clear id_tx_rel_list */ 2000 chan->tx_rel_list.dl_head = NULL; 2001 chan->tx_rel_list.dl_cnt = 0; 2002 mutex_exit(&chan->tx_rel_list.dl_mutex); 2003 2004 wqe = WQE_TO_SWQE(chan->tx_wqe_list.dl_head); 2005 chan->tx_wqe_list.dl_cnt -= 1; 2006 chan->tx_wqe_list.dl_head = wqe->swqe_next; 2007 } else { /* no free swqe */ 2008 mutex_exit(&chan->tx_rel_list.dl_mutex); 2009 chan->tx_wqe_list.dl_pending_sends = B_TRUE; 2010 wqe = NULL; 2011 } 2012 return (wqe); 2013 } 2014 2015 /* 2016 * Release send wqe back into free list. 2017 */ 2018 static void 2019 ibd_rc_release_swqe(ibd_rc_chan_t *chan, ibd_swqe_t *swqe) 2020 { 2021 /* 2022 * Add back on Tx list for reuse. 2023 */ 2024 swqe->swqe_next = NULL; 2025 mutex_enter(&chan->tx_rel_list.dl_mutex); 2026 chan->tx_rel_list.dl_pending_sends = B_FALSE; 2027 swqe->swqe_next = chan->tx_rel_list.dl_head; 2028 chan->tx_rel_list.dl_head = SWQE_TO_WQE(swqe); 2029 chan->tx_rel_list.dl_cnt++; 2030 mutex_exit(&chan->tx_rel_list.dl_mutex); 2031 } 2032 2033 void 2034 ibd_rc_post_send(ibd_rc_chan_t *chan, ibd_swqe_t *node) 2035 { 2036 uint_t i; 2037 uint_t num_posted; 2038 uint_t n_wrs; 2039 ibt_status_t ibt_status; 2040 ibt_send_wr_t wrs[IBD_MAX_TX_POST_MULTIPLE]; 2041 ibd_swqe_t *tx_head, *elem; 2042 ibd_swqe_t *nodes[IBD_MAX_TX_POST_MULTIPLE]; 2043 2044 /* post the one request, then check for more */ 2045 ibt_status = ibt_post_send(chan->chan_hdl, 2046 &node->w_swr, 1, NULL); 2047 if (ibt_status != IBT_SUCCESS) { 2048 ibd_print_warn(chan->state, "ibd_post_send: " 2049 "posting one wr failed: ret=%d", ibt_status); 2050 ibd_rc_tx_cleanup(node); 2051 } 2052 2053 tx_head = NULL; 2054 for (;;) { 2055 if (tx_head == NULL) { 2056 mutex_enter(&chan->tx_post_lock); 2057 tx_head = chan->tx_head; 2058 if (tx_head == NULL) { 2059 chan->tx_busy = 0; 2060 mutex_exit(&chan->tx_post_lock); 2061 return; 2062 } 2063 chan->tx_head = NULL; 2064 mutex_exit(&chan->tx_post_lock); 2065 } 2066 2067 /* 2068 * Collect pending requests, IBD_MAX_TX_POST_MULTIPLE wrs 2069 * at a time if possible, and keep posting them. 2070 */ 2071 for (n_wrs = 0, elem = tx_head; 2072 (elem) && (n_wrs < IBD_MAX_TX_POST_MULTIPLE); 2073 elem = WQE_TO_SWQE(elem->swqe_next), n_wrs++) { 2074 nodes[n_wrs] = elem; 2075 wrs[n_wrs] = elem->w_swr; 2076 } 2077 tx_head = elem; 2078 2079 ASSERT(n_wrs != 0); 2080 2081 /* 2082 * If posting fails for some reason, we'll never receive 2083 * completion intimation, so we'll need to cleanup. But 2084 * we need to make sure we don't clean up nodes whose 2085 * wrs have been successfully posted. We assume that the 2086 * hca driver returns on the first failure to post and 2087 * therefore the first 'num_posted' entries don't need 2088 * cleanup here. 2089 */ 2090 num_posted = 0; 2091 ibt_status = ibt_post_send(chan->chan_hdl, 2092 wrs, n_wrs, &num_posted); 2093 if (ibt_status != IBT_SUCCESS) { 2094 ibd_print_warn(chan->state, "ibd_post_send: " 2095 "posting multiple wrs failed: " 2096 "requested=%d, done=%d, ret=%d", 2097 n_wrs, num_posted, ibt_status); 2098 2099 for (i = num_posted; i < n_wrs; i++) 2100 ibd_rc_tx_cleanup(nodes[i]); 2101 } 2102 } 2103 } 2104 2105 /* 2106 * Common code that deals with clean ups after a successful or 2107 * erroneous transmission attempt. 2108 */ 2109 void 2110 ibd_rc_tx_cleanup(ibd_swqe_t *swqe) 2111 { 2112 ibd_ace_t *ace = swqe->w_ahandle; 2113 ibd_state_t *state; 2114 2115 ASSERT(ace != NULL); 2116 ASSERT(ace->ac_chan != NULL); 2117 2118 state = ace->ac_chan->state; 2119 2120 /* 2121 * If this was a dynamic registration in ibd_send(), 2122 * deregister now. 2123 */ 2124 if (swqe->swqe_im_mblk != NULL) { 2125 ASSERT(swqe->w_buftype == IBD_WQE_MAPPED); 2126 if (swqe->w_buftype == IBD_WQE_MAPPED) { 2127 ibd_unmap_mem(state, swqe); 2128 } 2129 freemsg(swqe->swqe_im_mblk); 2130 swqe->swqe_im_mblk = NULL; 2131 } else { 2132 ASSERT(swqe->w_buftype != IBD_WQE_MAPPED); 2133 } 2134 2135 if (swqe->w_buftype == IBD_WQE_RC_COPYBUF) { 2136 ibd_rc_tx_largebuf_t *lbufp; 2137 2138 lbufp = swqe->w_rc_tx_largebuf; 2139 ASSERT(lbufp != NULL); 2140 2141 mutex_enter(&state->rc_tx_large_bufs_lock); 2142 lbufp->lb_next = state->rc_tx_largebuf_free_head; 2143 state->rc_tx_largebuf_free_head = lbufp; 2144 state->rc_tx_largebuf_nfree ++; 2145 mutex_exit(&state->rc_tx_large_bufs_lock); 2146 swqe->w_rc_tx_largebuf = NULL; 2147 } 2148 2149 2150 /* 2151 * Release the send wqe for reuse. 2152 */ 2153 ibd_rc_release_swqe(ace->ac_chan, swqe); 2154 2155 /* 2156 * Drop the reference count on the AH; it can be reused 2157 * now for a different destination if there are no more 2158 * posted sends that will use it. This can be eliminated 2159 * if we can always associate each Tx buffer with an AH. 2160 * The ace can be null if we are cleaning up from the 2161 * ibd_send() error path. 2162 */ 2163 ibd_dec_ref_ace(state, ace); 2164 } 2165 2166 void 2167 ibd_rc_drain_scq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl) 2168 { 2169 ibd_state_t *state = chan->state; 2170 ibd_wqe_t *wqe; 2171 ibt_wc_t *wc, *wcs; 2172 ibd_ace_t *ace; 2173 uint_t numwcs, real_numwcs; 2174 int i; 2175 boolean_t encount_error; 2176 2177 wcs = chan->tx_wc; 2178 numwcs = IBD_RC_MAX_CQ_WC; 2179 encount_error = B_FALSE; 2180 2181 while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) { 2182 for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) { 2183 wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id; 2184 if (wc->wc_status != IBT_WC_SUCCESS) { 2185 if (encount_error == B_FALSE) { 2186 /* 2187 * This RC channle is in error status, 2188 * remove it. 2189 */ 2190 encount_error = B_TRUE; 2191 mutex_enter(&state->id_ac_mutex); 2192 if ((chan->chan_state == 2193 IBD_RC_STATE_ACT_ESTAB) && 2194 (chan->state->id_link_state == 2195 LINK_STATE_UP) && 2196 ((ace = ibd_acache_find(state, 2197 &chan->ace->ac_mac, B_FALSE, 0)) 2198 != NULL) && (ace == chan->ace)) { 2199 ASSERT(ace->ac_mce == NULL); 2200 INC_REF(ace, 1); 2201 IBD_ACACHE_PULLOUT_ACTIVE( 2202 state, ace); 2203 chan->chan_state = 2204 IBD_RC_STATE_ACT_CLOSING; 2205 mutex_exit(&state->id_ac_mutex); 2206 state->rc_reset_cnt++; 2207 DPRINT(30, "ibd_rc_drain_scq: " 2208 "wc_status(%d) != SUCC, " 2209 "chan=%p, ace=%p, " 2210 "link_state=%d" 2211 "reset RC channel", 2212 wc->wc_status, chan, 2213 chan->ace, chan->state-> 2214 id_link_state); 2215 ibd_rc_signal_act_close( 2216 state, ace); 2217 } else { 2218 mutex_exit(&state->id_ac_mutex); 2219 state-> 2220 rc_act_close_simultaneous++; 2221 DPRINT(40, "ibd_rc_drain_scq: " 2222 "wc_status(%d) != SUCC, " 2223 "chan=%p, chan_state=%d," 2224 "ace=%p, link_state=%d." 2225 "other thread is closing " 2226 "it", wc->wc_status, chan, 2227 chan->chan_state, chan->ace, 2228 chan->state->id_link_state); 2229 } 2230 } 2231 } 2232 ibd_rc_tx_cleanup(WQE_TO_SWQE(wqe)); 2233 } 2234 2235 mutex_enter(&state->id_sched_lock); 2236 if (state->id_sched_needed == 0) { 2237 mutex_exit(&state->id_sched_lock); 2238 } else if (state->id_sched_needed & IBD_RSRC_RC_SWQE) { 2239 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2240 mutex_enter(&chan->tx_rel_list.dl_mutex); 2241 if ((chan->tx_rel_list.dl_cnt + 2242 chan->tx_wqe_list.dl_cnt) > IBD_RC_TX_FREE_THRESH) { 2243 state->id_sched_needed &= ~IBD_RSRC_RC_SWQE; 2244 mutex_exit(&chan->tx_rel_list.dl_mutex); 2245 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2246 mutex_exit(&state->id_sched_lock); 2247 state->rc_swqe_mac_update++; 2248 mac_tx_update(state->id_mh); 2249 } else { 2250 state->rc_scq_no_swqe++; 2251 mutex_exit(&chan->tx_rel_list.dl_mutex); 2252 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2253 mutex_exit(&state->id_sched_lock); 2254 } 2255 } else if (state->id_sched_needed & IBD_RSRC_RC_TX_LARGEBUF) { 2256 mutex_enter(&state->rc_tx_large_bufs_lock); 2257 if (state->rc_tx_largebuf_nfree > 2258 IBD_RC_TX_FREE_THRESH) { 2259 ASSERT(state->rc_tx_largebuf_free_head != NULL); 2260 state->id_sched_needed &= 2261 ~IBD_RSRC_RC_TX_LARGEBUF; 2262 mutex_exit(&state->rc_tx_large_bufs_lock); 2263 mutex_exit(&state->id_sched_lock); 2264 state->rc_xmt_buf_mac_update++; 2265 mac_tx_update(state->id_mh); 2266 } else { 2267 state->rc_scq_no_largebuf++; 2268 mutex_exit(&state->rc_tx_large_bufs_lock); 2269 mutex_exit(&state->id_sched_lock); 2270 } 2271 } else if (state->id_sched_needed & IBD_RSRC_SWQE) { 2272 mutex_enter(&state->id_tx_list.dl_mutex); 2273 mutex_enter(&state->id_tx_rel_list.dl_mutex); 2274 if ((state->id_tx_list.dl_cnt + 2275 state->id_tx_rel_list.dl_cnt) 2276 > IBD_FREE_SWQES_THRESH) { 2277 state->id_sched_needed &= ~IBD_RSRC_SWQE; 2278 state->id_sched_cnt++; 2279 mutex_exit(&state->id_tx_rel_list.dl_mutex); 2280 mutex_exit(&state->id_tx_list.dl_mutex); 2281 mutex_exit(&state->id_sched_lock); 2282 mac_tx_update(state->id_mh); 2283 } else { 2284 mutex_exit(&state->id_tx_rel_list.dl_mutex); 2285 mutex_exit(&state->id_tx_list.dl_mutex); 2286 mutex_exit(&state->id_sched_lock); 2287 } 2288 } else { 2289 mutex_exit(&state->id_sched_lock); 2290 } 2291 } 2292 } 2293 2294 /* Send CQ handler, call ibd_rx_tx_cleanup to recycle Tx buffers */ 2295 /* ARGSUSED */ 2296 static void 2297 ibd_rc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 2298 { 2299 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 2300 2301 if (ibd_rc_tx_softintr == 1) { 2302 mutex_enter(&chan->tx_poll_lock); 2303 if (chan->tx_poll_busy & IBD_CQ_POLLING) { 2304 chan->tx_poll_busy |= IBD_REDO_CQ_POLLING; 2305 mutex_exit(&chan->tx_poll_lock); 2306 return; 2307 } else { 2308 mutex_exit(&chan->tx_poll_lock); 2309 ddi_trigger_softintr(chan->scq_softintr); 2310 } 2311 } else 2312 (void) ibd_rc_tx_recycle(arg); 2313 } 2314 2315 static uint_t 2316 ibd_rc_tx_recycle(caddr_t arg) 2317 { 2318 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 2319 ibd_state_t *state = chan->state; 2320 int flag, redo_flag; 2321 int redo = 1; 2322 2323 flag = IBD_CQ_POLLING; 2324 redo_flag = IBD_REDO_CQ_POLLING; 2325 2326 mutex_enter(&chan->tx_poll_lock); 2327 if (chan->tx_poll_busy & flag) { 2328 ibd_print_warn(state, "ibd_rc_tx_recycle: multiple polling " 2329 "threads"); 2330 chan->tx_poll_busy |= redo_flag; 2331 mutex_exit(&chan->tx_poll_lock); 2332 return (DDI_INTR_CLAIMED); 2333 } 2334 chan->tx_poll_busy |= flag; 2335 mutex_exit(&chan->tx_poll_lock); 2336 2337 /* 2338 * Poll for completed entries; the CQ will not interrupt any 2339 * more for completed packets. 2340 */ 2341 ibd_rc_drain_scq(chan, chan->scq_hdl); 2342 2343 /* 2344 * Now enable CQ notifications; all completions originating now 2345 * will cause new interrupts. 2346 */ 2347 do { 2348 if (ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION) != 2349 IBT_SUCCESS) { 2350 /* 2351 * We do not expect a failure here. 2352 */ 2353 DPRINT(40, "ibd_rc_scq_handler: ibt_enable_cq_notify()" 2354 " failed"); 2355 } 2356 2357 ibd_rc_drain_scq(chan, chan->scq_hdl); 2358 2359 mutex_enter(&chan->tx_poll_lock); 2360 if (chan->tx_poll_busy & redo_flag) 2361 chan->tx_poll_busy &= ~redo_flag; 2362 else { 2363 chan->tx_poll_busy &= ~flag; 2364 redo = 0; 2365 } 2366 mutex_exit(&chan->tx_poll_lock); 2367 2368 } while (redo); 2369 2370 return (DDI_INTR_CLAIMED); 2371 } 2372 2373 static ibt_status_t 2374 ibd_register_service(ibt_srv_desc_t *srv, ib_svc_id_t sid, 2375 int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid) 2376 { 2377 ibd_service_t *p; 2378 ibt_status_t status; 2379 2380 mutex_enter(&ibd_gstate.ig_mutex); 2381 for (p = ibd_gstate.ig_service_list; p != NULL; p = p->is_link) { 2382 if (p->is_sid == sid) { 2383 p->is_ref_cnt++; 2384 *srv_hdl = p->is_srv_hdl; 2385 *ret_sid = sid; 2386 mutex_exit(&ibd_gstate.ig_mutex); 2387 return (IBT_SUCCESS); 2388 } 2389 } 2390 status = ibt_register_service(ibd_gstate.ig_ibt_hdl, srv, sid, 2391 num_sids, srv_hdl, ret_sid); 2392 if (status == IBT_SUCCESS) { 2393 p = kmem_alloc(sizeof (*p), KM_SLEEP); 2394 p->is_srv_hdl = *srv_hdl; 2395 p->is_sid = sid; 2396 p->is_ref_cnt = 1; 2397 p->is_link = ibd_gstate.ig_service_list; 2398 ibd_gstate.ig_service_list = p; 2399 } 2400 mutex_exit(&ibd_gstate.ig_mutex); 2401 return (status); 2402 } 2403 2404 static ibt_status_t 2405 ibd_deregister_service(ibt_srv_hdl_t srv_hdl) 2406 { 2407 ibd_service_t *p, **pp; 2408 ibt_status_t status; 2409 2410 mutex_enter(&ibd_gstate.ig_mutex); 2411 for (pp = &ibd_gstate.ig_service_list; *pp != NULL; 2412 pp = &((*pp)->is_link)) { 2413 p = *pp; 2414 if (p->is_srv_hdl == srv_hdl) { /* Found it */ 2415 if (--p->is_ref_cnt == 0) { 2416 status = ibt_deregister_service( 2417 ibd_gstate.ig_ibt_hdl, srv_hdl); 2418 *pp = p->is_link; /* link prev to next */ 2419 kmem_free(p, sizeof (*p)); 2420 } else { 2421 status = IBT_SUCCESS; 2422 } 2423 mutex_exit(&ibd_gstate.ig_mutex); 2424 return (status); 2425 } 2426 } 2427 /* Should not ever get here */ 2428 mutex_exit(&ibd_gstate.ig_mutex); 2429 return (IBT_FAILURE); 2430 } 2431 2432 /* Listen with corresponding service ID */ 2433 ibt_status_t 2434 ibd_rc_listen(ibd_state_t *state) 2435 { 2436 ibt_srv_desc_t srvdesc; 2437 ib_svc_id_t ret_sid; 2438 ibt_status_t status; 2439 ib_gid_t gid; 2440 2441 if (state->rc_listen_hdl != NULL) { 2442 DPRINT(40, "ibd_rc_listen: rc_listen_hdl should be NULL"); 2443 return (IBT_FAILURE); 2444 } 2445 2446 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 2447 srvdesc.sd_handler = ibd_rc_dispatch_pass_mad; 2448 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 2449 2450 /* 2451 * Register the service with service id 2452 * Incoming connection requests should arrive on this service id. 2453 */ 2454 status = ibd_register_service(&srvdesc, 2455 IBD_RC_QPN_TO_SID(state->id_qpnum), 2456 1, &state->rc_listen_hdl, &ret_sid); 2457 if (status != IBT_SUCCESS) { 2458 DPRINT(40, "ibd_rc_listen: Service Registration Failed, " 2459 "ret=%d", status); 2460 return (status); 2461 } 2462 2463 gid = state->id_sgid; 2464 2465 /* pass state as cm_private */ 2466 status = ibt_bind_service(state->rc_listen_hdl, 2467 gid, NULL, state, &state->rc_listen_bind); 2468 if (status != IBT_SUCCESS) { 2469 DPRINT(40, "ibd_rc_listen:" 2470 " fail to bind port: <%d>", status); 2471 (void) ibd_deregister_service(state->rc_listen_hdl); 2472 return (status); 2473 } 2474 2475 /* 2476 * Legacy OFED had used a wrong service ID (one additional zero digit) 2477 * for many years. To interop with legacy OFED, we support this wrong 2478 * service ID here. 2479 */ 2480 ASSERT(state->rc_listen_hdl_OFED_interop == NULL); 2481 2482 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 2483 srvdesc.sd_handler = ibd_rc_dispatch_pass_mad; 2484 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 2485 2486 /* 2487 * Register the service with service id 2488 * Incoming connection requests should arrive on this service id. 2489 */ 2490 status = ibd_register_service(&srvdesc, 2491 IBD_RC_QPN_TO_SID_OFED_INTEROP(state->id_qpnum), 2492 1, &state->rc_listen_hdl_OFED_interop, &ret_sid); 2493 if (status != IBT_SUCCESS) { 2494 DPRINT(40, 2495 "ibd_rc_listen: Service Registration for Legacy OFED " 2496 "Failed %d", status); 2497 (void) ibt_unbind_service(state->rc_listen_hdl, 2498 state->rc_listen_bind); 2499 (void) ibd_deregister_service(state->rc_listen_hdl); 2500 return (status); 2501 } 2502 2503 gid = state->id_sgid; 2504 2505 /* pass state as cm_private */ 2506 status = ibt_bind_service(state->rc_listen_hdl_OFED_interop, 2507 gid, NULL, state, &state->rc_listen_bind_OFED_interop); 2508 if (status != IBT_SUCCESS) { 2509 DPRINT(40, "ibd_rc_listen: fail to bind port: <%d> for " 2510 "Legacy OFED listener", status); 2511 (void) ibd_deregister_service( 2512 state->rc_listen_hdl_OFED_interop); 2513 (void) ibt_unbind_service(state->rc_listen_hdl, 2514 state->rc_listen_bind); 2515 (void) ibd_deregister_service(state->rc_listen_hdl); 2516 return (status); 2517 } 2518 2519 return (IBT_SUCCESS); 2520 } 2521 2522 void 2523 ibd_rc_stop_listen(ibd_state_t *state) 2524 { 2525 int ret; 2526 2527 /* Disable incoming connection requests */ 2528 if (state->rc_listen_hdl != NULL) { 2529 ret = ibt_unbind_all_services(state->rc_listen_hdl); 2530 if (ret != 0) { 2531 DPRINT(40, "ibd_rc_stop_listen:" 2532 "ibt_unbind_all_services() failed, ret=%d", ret); 2533 } 2534 ret = ibd_deregister_service(state->rc_listen_hdl); 2535 if (ret != 0) { 2536 DPRINT(40, "ibd_rc_stop_listen:" 2537 "ibd_deregister_service() failed, ret=%d", ret); 2538 } else { 2539 state->rc_listen_hdl = NULL; 2540 } 2541 } 2542 2543 /* Disable incoming connection requests */ 2544 if (state->rc_listen_hdl_OFED_interop != NULL) { 2545 ret = ibt_unbind_all_services( 2546 state->rc_listen_hdl_OFED_interop); 2547 if (ret != 0) { 2548 DPRINT(40, "ibd_rc_stop_listen:" 2549 "ibt_unbind_all_services() failed: %d", ret); 2550 } 2551 ret = ibd_deregister_service(state->rc_listen_hdl_OFED_interop); 2552 if (ret != 0) { 2553 DPRINT(40, "ibd_rc_stop_listen:" 2554 "ibd_deregister_service() failed: %d", ret); 2555 } else { 2556 state->rc_listen_hdl_OFED_interop = NULL; 2557 } 2558 } 2559 } 2560 2561 void 2562 ibd_rc_close_all_chan(ibd_state_t *state) 2563 { 2564 ibd_rc_chan_t *rc_chan; 2565 ibd_ace_t *ace, *pre_ace; 2566 uint_t attempts; 2567 2568 /* Disable all Rx routines */ 2569 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 2570 rc_chan = state->rc_pass_chan_list.chan_list; 2571 while (rc_chan != NULL) { 2572 ibt_set_cq_handler(rc_chan->rcq_hdl, 0, 0); 2573 rc_chan = rc_chan->next; 2574 } 2575 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 2576 2577 if (state->rc_enable_srq) { 2578 attempts = 10; 2579 while (state->rc_srq_rwqe_list.dl_bufs_outstanding > 0) { 2580 DPRINT(30, "ibd_rc_close_all_chan: outstanding > 0"); 2581 delay(drv_usectohz(100000)); 2582 if (--attempts == 0) { 2583 /* 2584 * There are pending bufs with the network 2585 * layer and we have no choice but to wait 2586 * for them to be done with. Reap all the 2587 * Tx/Rx completions that were posted since 2588 * we turned off the notification and 2589 * return failure. 2590 */ 2591 break; 2592 } 2593 } 2594 } 2595 2596 /* Close all passive RC channels */ 2597 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list); 2598 while (rc_chan != NULL) { 2599 (void) ibd_rc_pas_close(rc_chan, B_TRUE, B_FALSE); 2600 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list); 2601 } 2602 2603 /* Close all active RC channels */ 2604 mutex_enter(&state->id_ac_mutex); 2605 state->id_ac_hot_ace = NULL; 2606 ace = list_head(&state->id_ah_active); 2607 while ((pre_ace = ace) != NULL) { 2608 ace = list_next(&state->id_ah_active, ace); 2609 if (pre_ace->ac_chan != NULL) { 2610 INC_REF(pre_ace, 1); 2611 IBD_ACACHE_PULLOUT_ACTIVE(state, pre_ace); 2612 pre_ace->ac_chan->chan_state = IBD_RC_STATE_ACT_CLOSING; 2613 ibd_rc_add_to_chan_list(&state->rc_obs_act_chan_list, 2614 pre_ace->ac_chan); 2615 } 2616 } 2617 mutex_exit(&state->id_ac_mutex); 2618 2619 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_obs_act_chan_list); 2620 while (rc_chan != NULL) { 2621 ace = rc_chan->ace; 2622 ibd_rc_act_close(rc_chan, B_TRUE); 2623 if (ace != NULL) { 2624 mutex_enter(&state->id_ac_mutex); 2625 ASSERT(ace->ac_ref != 0); 2626 atomic_dec_32(&ace->ac_ref); 2627 ace->ac_chan = NULL; 2628 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 2629 IBD_ACACHE_INSERT_FREE(state, ace); 2630 ace->ac_ref = 0; 2631 } else { 2632 ace->ac_ref |= CYCLEVAL; 2633 state->rc_delay_ace_recycle++; 2634 } 2635 mutex_exit(&state->id_ac_mutex); 2636 } 2637 rc_chan = ibd_rc_rm_header_chan_list( 2638 &state->rc_obs_act_chan_list); 2639 } 2640 2641 attempts = 400; 2642 while (((state->rc_num_tx_chan != 0) || 2643 (state->rc_num_rx_chan != 0)) && (attempts > 0)) { 2644 /* Other thread is closing CM channel, wait it */ 2645 delay(drv_usectohz(100000)); 2646 attempts--; 2647 } 2648 } 2649 2650 void 2651 ibd_rc_try_connect(ibd_state_t *state, ibd_ace_t *ace, ibt_path_info_t *path) 2652 { 2653 ibt_status_t status; 2654 2655 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2656 return; 2657 2658 status = ibd_rc_connect(state, ace, path, 2659 IBD_RC_SERVICE_ID_OFED_INTEROP); 2660 2661 if (status != IBT_SUCCESS) { 2662 /* wait peer side remove stale channel */ 2663 delay(drv_usectohz(10000)); 2664 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2665 return; 2666 status = ibd_rc_connect(state, ace, path, 2667 IBD_RC_SERVICE_ID_OFED_INTEROP); 2668 } 2669 2670 if (status != IBT_SUCCESS) { 2671 /* wait peer side remove stale channel */ 2672 delay(drv_usectohz(10000)); 2673 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2674 return; 2675 (void) ibd_rc_connect(state, ace, path, 2676 IBD_RC_SERVICE_ID); 2677 } 2678 } 2679 2680 /* 2681 * Allocates channel and sets the ace->ac_chan to it. 2682 * Opens the channel. 2683 */ 2684 ibt_status_t 2685 ibd_rc_connect(ibd_state_t *state, ibd_ace_t *ace, ibt_path_info_t *path, 2686 uint64_t ietf_cm_service_id) 2687 { 2688 ibt_status_t status = 0; 2689 ibt_rc_returns_t open_returns; 2690 ibt_chan_open_args_t open_args; 2691 ibd_rc_msg_hello_t hello_req_msg; 2692 ibd_rc_msg_hello_t *hello_ack_msg; 2693 ibd_rc_chan_t *chan; 2694 ibt_ud_dest_query_attr_t dest_attrs; 2695 2696 ASSERT(ace != NULL); 2697 ASSERT(ace->ac_mce == NULL); 2698 ASSERT(ace->ac_chan == NULL); 2699 2700 if ((status = ibd_rc_alloc_chan(&chan, state, B_TRUE)) != IBT_SUCCESS) { 2701 DPRINT(10, "ibd_rc_connect: ibd_rc_alloc_chan() failed"); 2702 return (status); 2703 } 2704 2705 ace->ac_chan = chan; 2706 chan->state = state; 2707 chan->ace = ace; 2708 2709 ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)ace); 2710 2711 hello_ack_msg = kmem_zalloc(sizeof (ibd_rc_msg_hello_t), KM_SLEEP); 2712 2713 /* 2714 * open the channels 2715 */ 2716 bzero(&open_args, sizeof (ibt_chan_open_args_t)); 2717 bzero(&open_returns, sizeof (ibt_rc_returns_t)); 2718 2719 open_args.oc_cm_handler = ibd_rc_dispatch_actv_mad; 2720 open_args.oc_cm_clnt_private = (void *)(uintptr_t)ace; 2721 2722 /* 2723 * update path record with the SID 2724 */ 2725 if ((status = ibt_query_ud_dest(ace->ac_dest, &dest_attrs)) 2726 != IBT_SUCCESS) { 2727 DPRINT(40, "ibd_rc_connect: ibt_query_ud_dest() failed, " 2728 "ret=%d", status); 2729 return (status); 2730 } 2731 2732 path->pi_sid = 2733 ietf_cm_service_id | ((dest_attrs.ud_dst_qpn) & 0xffffff); 2734 2735 2736 /* pre-allocate memory for hello ack message */ 2737 open_returns.rc_priv_data_len = sizeof (ibd_rc_msg_hello_t); 2738 open_returns.rc_priv_data = hello_ack_msg; 2739 2740 open_args.oc_path = path; 2741 2742 open_args.oc_path_rnr_retry_cnt = 1; 2743 open_args.oc_path_retry_cnt = 1; 2744 2745 /* We don't do RDMA */ 2746 open_args.oc_rdma_ra_out = 0; 2747 open_args.oc_rdma_ra_in = 0; 2748 2749 hello_req_msg.reserved_qpn = htonl(state->id_qpnum); 2750 hello_req_msg.rx_mtu = htonl(state->rc_mtu); 2751 open_args.oc_priv_data_len = sizeof (ibd_rc_msg_hello_t); 2752 open_args.oc_priv_data = (void *)(&hello_req_msg); 2753 2754 ASSERT(open_args.oc_priv_data_len <= IBT_REQ_PRIV_DATA_SZ); 2755 ASSERT(open_returns.rc_priv_data_len <= IBT_REP_PRIV_DATA_SZ); 2756 ASSERT(open_args.oc_cm_handler != NULL); 2757 2758 status = ibt_open_rc_channel(chan->chan_hdl, IBT_OCHAN_NO_FLAGS, 2759 IBT_BLOCKING, &open_args, &open_returns); 2760 2761 if (status == IBT_SUCCESS) { 2762 /* Success! */ 2763 DPRINT(2, "ibd_rc_connect: call ibt_open_rc_channel succ!"); 2764 state->rc_conn_succ++; 2765 kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t)); 2766 return (IBT_SUCCESS); 2767 } 2768 2769 /* failure */ 2770 (void) ibt_flush_channel(chan->chan_hdl); 2771 ibd_rc_free_chan(chan); 2772 ace->ac_chan = NULL; 2773 2774 /* check open_returns report error and exit */ 2775 DPRINT(30, "ibd_rc_connect: call ibt_open_rc_chan fail." 2776 "ret status = %d, reason=%d, ace=%p, mtu=0x%x, qpn=0x%x," 2777 " peer qpn=0x%x", status, (int)open_returns.rc_status, ace, 2778 hello_req_msg.rx_mtu, hello_req_msg.reserved_qpn, 2779 dest_attrs.ud_dst_qpn); 2780 kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t)); 2781 return (status); 2782 } 2783 2784 void 2785 ibd_rc_signal_act_close(ibd_state_t *state, ibd_ace_t *ace) 2786 { 2787 ibd_req_t *req; 2788 2789 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 2790 if (req == NULL) { 2791 ibd_print_warn(state, "ibd_rc_signal_act_close: alloc " 2792 "ibd_req_t fail"); 2793 mutex_enter(&state->rc_obs_act_chan_list.chan_list_mutex); 2794 ace->ac_chan->next = state->rc_obs_act_chan_list.chan_list; 2795 state->rc_obs_act_chan_list.chan_list = ace->ac_chan; 2796 mutex_exit(&state->rc_obs_act_chan_list.chan_list_mutex); 2797 } else { 2798 req->rq_ptr = ace->ac_chan; 2799 ibd_queue_work_slot(state, req, IBD_ASYNC_RC_CLOSE_ACT_CHAN); 2800 } 2801 } 2802 2803 void 2804 ibd_rc_signal_ace_recycle(ibd_state_t *state, ibd_ace_t *ace) 2805 { 2806 ibd_req_t *req; 2807 2808 mutex_enter(&state->rc_ace_recycle_lock); 2809 if (state->rc_ace_recycle != NULL) { 2810 mutex_exit(&state->rc_ace_recycle_lock); 2811 return; 2812 } 2813 2814 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 2815 if (req == NULL) { 2816 mutex_exit(&state->rc_ace_recycle_lock); 2817 return; 2818 } 2819 2820 state->rc_ace_recycle = ace; 2821 mutex_exit(&state->rc_ace_recycle_lock); 2822 ASSERT(ace->ac_mce == NULL); 2823 INC_REF(ace, 1); 2824 IBD_ACACHE_PULLOUT_ACTIVE(state, ace); 2825 req->rq_ptr = ace; 2826 ibd_queue_work_slot(state, req, IBD_ASYNC_RC_RECYCLE_ACE); 2827 } 2828 2829 /* 2830 * Close an active channel 2831 * 2832 * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel() 2833 */ 2834 static void 2835 ibd_rc_act_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan) 2836 { 2837 ibd_state_t *state; 2838 ibd_ace_t *ace; 2839 uint_t times; 2840 ibt_status_t ret; 2841 2842 ASSERT(chan != NULL); 2843 2844 chan->state->rc_act_close++; 2845 switch (chan->chan_state) { 2846 case IBD_RC_STATE_ACT_CLOSING: /* stale, close it */ 2847 case IBD_RC_STATE_ACT_ESTAB: 2848 DPRINT(30, "ibd_rc_act_close-1: close and free chan, " 2849 "act_state=%d, chan=%p", chan->chan_state, chan); 2850 chan->chan_state = IBD_RC_STATE_ACT_CLOSED; 2851 ibt_set_cq_handler(chan->rcq_hdl, 0, 0); 2852 /* 2853 * Wait send queue empty. Its old value is 50 (5 seconds). But 2854 * in my experiment, 5 seconds is not enough time to let IBTL 2855 * return all buffers and ace->ac_ref. I tried 25 seconds, it 2856 * works well. As another evidence, I saw IBTL takes about 17 2857 * seconds every time it cleans a stale RC channel. 2858 */ 2859 times = 250; 2860 ace = chan->ace; 2861 ASSERT(ace != NULL); 2862 state = chan->state; 2863 ASSERT(state != NULL); 2864 mutex_enter(&state->id_ac_mutex); 2865 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2866 mutex_enter(&chan->tx_rel_list.dl_mutex); 2867 while (((chan->tx_wqe_list.dl_cnt + chan->tx_rel_list.dl_cnt) 2868 != chan->scq_size) || ((ace->ac_ref != 1) && 2869 (ace->ac_ref != (CYCLEVAL+1)))) { 2870 mutex_exit(&chan->tx_rel_list.dl_mutex); 2871 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2872 mutex_exit(&state->id_ac_mutex); 2873 times--; 2874 if (times == 0) { 2875 state->rc_act_close_not_clean++; 2876 DPRINT(40, "ibd_rc_act_close: dl_cnt(tx_wqe_" 2877 "list=%d, tx_rel_list=%d) != chan->" 2878 "scq_size=%d, OR ac_ref(=%d) not clean", 2879 chan->tx_wqe_list.dl_cnt, 2880 chan->tx_rel_list.dl_cnt, 2881 chan->scq_size, ace->ac_ref); 2882 break; 2883 } 2884 mutex_enter(&chan->tx_poll_lock); 2885 if (chan->tx_poll_busy & IBD_CQ_POLLING) { 2886 DPRINT(40, "ibd_rc_act_close: multiple " 2887 "polling threads"); 2888 mutex_exit(&chan->tx_poll_lock); 2889 } else { 2890 chan->tx_poll_busy = IBD_CQ_POLLING; 2891 mutex_exit(&chan->tx_poll_lock); 2892 ibd_rc_drain_scq(chan, chan->scq_hdl); 2893 mutex_enter(&chan->tx_poll_lock); 2894 chan->tx_poll_busy = 0; 2895 mutex_exit(&chan->tx_poll_lock); 2896 } 2897 delay(drv_usectohz(100000)); 2898 mutex_enter(&state->id_ac_mutex); 2899 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2900 mutex_enter(&chan->tx_rel_list.dl_mutex); 2901 } 2902 if (times != 0) { 2903 mutex_exit(&chan->tx_rel_list.dl_mutex); 2904 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2905 mutex_exit(&state->id_ac_mutex); 2906 } 2907 2908 ibt_set_cq_handler(chan->scq_hdl, 0, 0); 2909 if (is_close_rc_chan) { 2910 ret = ibt_close_rc_channel(chan->chan_hdl, 2911 IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 2912 0); 2913 if (ret != IBT_SUCCESS) { 2914 DPRINT(40, "ibd_rc_act_close: ibt_close_rc_" 2915 "channel fail, chan=%p, ret=%d", 2916 chan, ret); 2917 } else { 2918 DPRINT(30, "ibd_rc_act_close: ibt_close_rc_" 2919 "channel succ, chan=%p", chan); 2920 } 2921 } 2922 2923 ibd_rc_free_chan(chan); 2924 break; 2925 case IBD_RC_STATE_ACT_REP_RECV: 2926 chan->chan_state = IBD_RC_STATE_ACT_CLOSED; 2927 (void) ibt_flush_channel(chan->chan_hdl); 2928 ibd_rc_free_chan(chan); 2929 break; 2930 case IBD_RC_STATE_ACT_ERROR: 2931 DPRINT(40, "ibd_rc_act_close: IBD_RC_STATE_ERROR branch"); 2932 break; 2933 default: 2934 DPRINT(40, "ibd_rc_act_close: default branch, act_state=%d, " 2935 "chan=%p", chan->chan_state, chan); 2936 } 2937 } 2938 2939 /* 2940 * Close a passive channel 2941 * 2942 * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel() 2943 * 2944 * is_timeout_close: if B_TRUE, this function is called by the connection 2945 * reaper (refer to function ibd_rc_conn_timeout_call). When the connection 2946 * reaper calls ibd_rc_pas_close(), and if it finds that dl_bufs_outstanding 2947 * or chan->rcq_invoking is non-zero, then it can simply put that channel back 2948 * on the passive channels list and move on, since it might be an indication 2949 * that the channel became active again by the time we started it's cleanup. 2950 * It is costlier to do the cleanup and then reinitiate the channel 2951 * establishment and hence it will help to be conservative when we do the 2952 * cleanup. 2953 */ 2954 int 2955 ibd_rc_pas_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan, 2956 boolean_t is_timeout_close) 2957 { 2958 uint_t times; 2959 ibt_status_t ret; 2960 2961 ASSERT(chan != NULL); 2962 chan->state->rc_pas_close++; 2963 2964 switch (chan->chan_state) { 2965 case IBD_RC_STATE_PAS_ESTAB: 2966 if (is_timeout_close) { 2967 if ((chan->rcq_invoking != 0) || 2968 ((!chan->state->rc_enable_srq) && 2969 (chan->rx_wqe_list.dl_bufs_outstanding > 0))) { 2970 if (ibd_rc_re_add_to_pas_chan_list(chan)) { 2971 return (DDI_FAILURE); 2972 } 2973 } 2974 } 2975 /* 2976 * First, stop receive interrupts; this stops the 2977 * connection from handing up buffers to higher layers. 2978 * Wait for receive buffers to be returned; give up 2979 * after 5 seconds. 2980 */ 2981 ibt_set_cq_handler(chan->rcq_hdl, 0, 0); 2982 /* Wait 0.01 second to let ibt_set_cq_handler() take effect */ 2983 delay(drv_usectohz(10000)); 2984 if (!chan->state->rc_enable_srq) { 2985 times = 50; 2986 while (chan->rx_wqe_list.dl_bufs_outstanding > 0) { 2987 delay(drv_usectohz(100000)); 2988 if (--times == 0) { 2989 DPRINT(40, "ibd_rc_pas_close : " 2990 "reclaiming failed"); 2991 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 2992 ibt_set_cq_handler(chan->rcq_hdl, 2993 ibd_rc_rcq_handler, 2994 (void *)(uintptr_t)chan); 2995 return (DDI_FAILURE); 2996 } 2997 } 2998 } 2999 times = 50; 3000 while (chan->rcq_invoking != 0) { 3001 delay(drv_usectohz(100000)); 3002 if (--times == 0) { 3003 DPRINT(40, "ibd_rc_pas_close : " 3004 "rcq handler is being invoked"); 3005 chan->state->rc_pas_close_rcq_invoking++; 3006 break; 3007 } 3008 } 3009 ibt_set_cq_handler(chan->scq_hdl, 0, 0); 3010 chan->chan_state = IBD_RC_STATE_PAS_CLOSED; 3011 DPRINT(30, "ibd_rc_pas_close-1: close and free chan, " 3012 "chan_state=%d, chan=%p", chan->chan_state, chan); 3013 if (is_close_rc_chan) { 3014 ret = ibt_close_rc_channel(chan->chan_hdl, 3015 IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 3016 0); 3017 if (ret != IBT_SUCCESS) { 3018 DPRINT(40, "ibd_rc_pas_close: ibt_close_rc_" 3019 "channel() fail, chan=%p, ret=%d", chan, 3020 ret); 3021 } else { 3022 DPRINT(30, "ibd_rc_pas_close: ibt_close_rc_" 3023 "channel() succ, chan=%p", chan); 3024 } 3025 } 3026 ibd_rc_free_chan(chan); 3027 break; 3028 case IBD_RC_STATE_PAS_REQ_RECV: 3029 chan->chan_state = IBD_RC_STATE_PAS_CLOSED; 3030 (void) ibt_flush_channel(chan->chan_hdl); 3031 ibd_rc_free_chan(chan); 3032 break; 3033 default: 3034 DPRINT(40, "ibd_rc_pas_close: default, chan_state=%d, chan=%p", 3035 chan->chan_state, chan); 3036 } 3037 return (DDI_SUCCESS); 3038 } 3039 3040 /* 3041 * Passive Side: 3042 * Handle an incoming CM REQ from active side. 3043 * 3044 * If success, this function allocates an ibd_rc_chan_t, then 3045 * assigns it to "*ret_conn". 3046 */ 3047 static ibt_cm_status_t 3048 ibd_rc_handle_req(void *arg, ibd_rc_chan_t **ret_conn, 3049 ibt_cm_event_t *ibt_cm_event, ibt_cm_return_args_t *ret_args, 3050 void *ret_priv_data) 3051 { 3052 ibd_rc_msg_hello_t *hello_msg; 3053 ibd_state_t *state = (ibd_state_t *)arg; 3054 ibd_rc_chan_t *chan; 3055 3056 if (ibd_rc_alloc_chan(&chan, state, B_FALSE) != IBT_SUCCESS) { 3057 DPRINT(40, "ibd_rc_handle_req: ibd_rc_alloc_chan() failed"); 3058 return (IBT_CM_REJECT); 3059 } 3060 3061 ibd_rc_add_to_chan_list(&state->rc_pass_chan_list, chan); 3062 3063 ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)chan); 3064 3065 if (!state->rc_enable_srq) { 3066 if (ibd_rc_init_rxlist(chan) != DDI_SUCCESS) { 3067 ibd_rc_free_chan(chan); 3068 DPRINT(40, "ibd_rc_handle_req: ibd_rc_init_rxlist() " 3069 "failed"); 3070 return (IBT_CM_REJECT); 3071 } 3072 } 3073 3074 ret_args->cm_ret.rep.cm_channel = chan->chan_hdl; 3075 3076 /* We don't do RDMA */ 3077 ret_args->cm_ret.rep.cm_rdma_ra_out = 0; 3078 ret_args->cm_ret.rep.cm_rdma_ra_in = 0; 3079 3080 ret_args->cm_ret.rep.cm_rnr_retry_cnt = 7; 3081 ret_args->cm_ret_len = sizeof (ibd_rc_msg_hello_t); 3082 3083 hello_msg = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data; 3084 DPRINT(30, "ibd_rc_handle_req(): peer qpn=0x%x, peer mtu=0x%x", 3085 ntohl(hello_msg->reserved_qpn), ntohl(hello_msg->rx_mtu)); 3086 3087 hello_msg = (ibd_rc_msg_hello_t *)ret_priv_data; 3088 hello_msg->reserved_qpn = htonl(state->id_qpnum); 3089 hello_msg->rx_mtu = htonl(state->rc_mtu); 3090 3091 chan->chan_state = IBD_RC_STATE_PAS_REQ_RECV; /* ready to receive */ 3092 *ret_conn = chan; 3093 3094 return (IBT_CM_ACCEPT); 3095 } 3096 3097 /* 3098 * ibd_rc_handle_act_estab -- handler for connection established completion 3099 * for active side. 3100 */ 3101 static ibt_cm_status_t 3102 ibd_rc_handle_act_estab(ibd_ace_t *ace) 3103 { 3104 ibt_status_t result; 3105 3106 switch (ace->ac_chan->chan_state) { 3107 case IBD_RC_STATE_ACT_REP_RECV: 3108 ace->ac_chan->chan_state = IBD_RC_STATE_ACT_ESTAB; 3109 result = ibt_enable_cq_notify(ace->ac_chan->rcq_hdl, 3110 IBT_NEXT_COMPLETION); 3111 if (result != IBT_SUCCESS) { 3112 DPRINT(40, "ibd_rc_handle_act_estab: " 3113 "ibt_enable_cq_notify(rcq) " 3114 "failed: status %d", result); 3115 return (IBT_CM_REJECT); 3116 } 3117 break; 3118 default: 3119 DPRINT(40, "ibd_rc_handle_act_estab: default " 3120 "branch, act_state=%d", ace->ac_chan->chan_state); 3121 return (IBT_CM_REJECT); 3122 } 3123 return (IBT_CM_ACCEPT); 3124 } 3125 3126 /* 3127 * ibd_rc_handle_pas_estab -- handler for connection established completion 3128 * for passive side. 3129 */ 3130 static ibt_cm_status_t 3131 ibd_rc_handle_pas_estab(ibd_rc_chan_t *chan) 3132 { 3133 ibt_status_t result; 3134 3135 switch (chan->chan_state) { 3136 case IBD_RC_STATE_PAS_REQ_RECV: 3137 chan->chan_state = IBD_RC_STATE_PAS_ESTAB; 3138 3139 result = ibt_enable_cq_notify(chan->rcq_hdl, 3140 IBT_NEXT_COMPLETION); 3141 if (result != IBT_SUCCESS) { 3142 DPRINT(40, "ibd_rc_handle_pas_estab: " 3143 "ibt_enable_cq_notify(rcq) " 3144 "failed: status %d", result); 3145 return (IBT_CM_REJECT); 3146 } 3147 break; 3148 default: 3149 DPRINT(40, "ibd_rc_handle_pas_estab: default " 3150 "branch, chan_state=%d", chan->chan_state); 3151 return (IBT_CM_REJECT); 3152 } 3153 return (IBT_CM_ACCEPT); 3154 } 3155 3156 /* ARGSUSED */ 3157 static ibt_cm_status_t 3158 ibd_rc_dispatch_actv_mad(void *arg, ibt_cm_event_t *ibt_cm_event, 3159 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 3160 ibt_priv_data_len_t ret_len_max) 3161 { 3162 ibt_cm_status_t result = IBT_CM_ACCEPT; 3163 ibd_ace_t *ace = (ibd_ace_t *)(uintptr_t)arg; 3164 ibd_rc_chan_t *rc_chan; 3165 ibd_state_t *state; 3166 ibd_rc_msg_hello_t *hello_ack; 3167 3168 switch (ibt_cm_event->cm_type) { 3169 case IBT_CM_EVENT_REP_RCV: 3170 ASSERT(ace->ac_chan != NULL); 3171 ASSERT(ace->ac_chan->chan_state == IBD_RC_STATE_INIT); 3172 hello_ack = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data; 3173 DPRINT(30, "ibd_rc_handle_rep: hello_ack->mtu=0x%x, " 3174 "hello_ack->qpn=0x%x", ntohl(hello_ack->rx_mtu), 3175 ntohl(hello_ack->reserved_qpn)); 3176 ace->ac_chan->chan_state = IBD_RC_STATE_ACT_REP_RECV; 3177 break; 3178 3179 case IBT_CM_EVENT_CONN_EST: 3180 ASSERT(ace->ac_chan != NULL); 3181 DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_CONN_EST, " 3182 "ace=%p, act_state=%d, chan=%p", 3183 ace, ace->ac_chan->chan_state, ace->ac_chan); 3184 result = ibd_rc_handle_act_estab(ace); 3185 break; 3186 3187 case IBT_CM_EVENT_CONN_CLOSED: 3188 rc_chan = ace->ac_chan; 3189 if (rc_chan == NULL) { 3190 DPRINT(40, "ibd_rc_dispatch_actv_mad: " 3191 "rc_chan==NULL, IBT_CM_EVENT_CONN_CLOSED"); 3192 return (IBT_CM_ACCEPT); 3193 } 3194 state = rc_chan->state; 3195 mutex_enter(&state->id_ac_mutex); 3196 if ((rc_chan->chan_state == IBD_RC_STATE_ACT_ESTAB) && 3197 ((ace = ibd_acache_find(state, &ace->ac_mac, B_FALSE, 0)) 3198 != NULL) && (ace == rc_chan->ace)) { 3199 rc_chan->chan_state = IBD_RC_STATE_ACT_CLOSING; 3200 ASSERT(ace->ac_mce == NULL); 3201 INC_REF(ace, 1); 3202 IBD_ACACHE_PULLOUT_ACTIVE(state, ace); 3203 mutex_exit(&state->id_ac_mutex); 3204 DPRINT(30, "ibd_rc_dispatch_actv_mad: " 3205 "IBT_CM_EVENT_CONN_CLOSED, ace=%p, chan=%p, " 3206 "reason=%d", ace, rc_chan, 3207 ibt_cm_event->cm_event.closed); 3208 } else { 3209 mutex_exit(&state->id_ac_mutex); 3210 state->rc_act_close_simultaneous++; 3211 DPRINT(40, "ibd_rc_dispatch_actv_mad: other thread " 3212 "is closing it, IBT_CM_EVENT_CONN_CLOSED, " 3213 "chan_state=%d", rc_chan->chan_state); 3214 return (IBT_CM_ACCEPT); 3215 } 3216 ibd_rc_act_close(rc_chan, B_FALSE); 3217 mutex_enter(&state->id_ac_mutex); 3218 ace->ac_chan = NULL; 3219 ASSERT(ace->ac_ref != 0); 3220 atomic_dec_32(&ace->ac_ref); 3221 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 3222 IBD_ACACHE_INSERT_FREE(state, ace); 3223 ace->ac_ref = 0; 3224 } else { 3225 ace->ac_ref |= CYCLEVAL; 3226 state->rc_delay_ace_recycle++; 3227 } 3228 mutex_exit(&state->id_ac_mutex); 3229 break; 3230 3231 case IBT_CM_EVENT_FAILURE: 3232 DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_FAILURE," 3233 "ace=%p, chan=%p, code: %d, msg: %d, reason=%d", 3234 ace, ace->ac_chan, 3235 ibt_cm_event->cm_event.failed.cf_code, 3236 ibt_cm_event->cm_event.failed.cf_msg, 3237 ibt_cm_event->cm_event.failed.cf_reason); 3238 /* 3239 * Don't need free resource here. The resource is freed 3240 * at function ibd_rc_connect() 3241 */ 3242 break; 3243 3244 case IBT_CM_EVENT_MRA_RCV: 3245 DPRINT(40, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_MRA_RCV"); 3246 break; 3247 case IBT_CM_EVENT_LAP_RCV: 3248 DPRINT(40, "ibd_rc_dispatch_actv_mad: LAP message received"); 3249 break; 3250 case IBT_CM_EVENT_APR_RCV: 3251 DPRINT(40, "ibd_rc_dispatch_actv_mad: APR message received"); 3252 break; 3253 default: 3254 DPRINT(40, "ibd_rc_dispatch_actv_mad: default branch, " 3255 "ibt_cm_event->cm_type=%d", ibt_cm_event->cm_type); 3256 break; 3257 } 3258 3259 return (result); 3260 } 3261 3262 /* ARGSUSED */ 3263 static ibt_cm_status_t 3264 ibd_rc_dispatch_pass_mad(void *arg, ibt_cm_event_t *ibt_cm_event, 3265 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 3266 ibt_priv_data_len_t ret_len_max) 3267 { 3268 ibt_cm_status_t result = IBT_CM_ACCEPT; 3269 ibd_rc_chan_t *chan; 3270 3271 if (ibt_cm_event->cm_type == IBT_CM_EVENT_REQ_RCV) { 3272 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_REQ_RCV," 3273 "req_pkey=%x", ibt_cm_event->cm_event.req.req_pkey); 3274 /* Receive an incoming CM REQ from active side */ 3275 result = ibd_rc_handle_req(arg, &chan, ibt_cm_event, ret_args, 3276 ret_priv_data); 3277 return (result); 3278 } 3279 3280 if (ibt_cm_event->cm_channel == 0) { 3281 DPRINT(30, "ibd_rc_dispatch_pass_mad: " 3282 "ERROR ibt_cm_event->cm_channel == 0"); 3283 return (IBT_CM_REJECT); 3284 } 3285 3286 chan = 3287 (ibd_rc_chan_t *)ibt_get_chan_private(ibt_cm_event->cm_channel); 3288 if (chan == NULL) { 3289 DPRINT(40, "ibd_rc_dispatch_pass_mad: conn == 0"); 3290 return (IBT_CM_REJECT); 3291 } 3292 3293 switch (ibt_cm_event->cm_type) { 3294 case IBT_CM_EVENT_CONN_EST: 3295 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_EST, " 3296 "chan=%p", chan); 3297 result = ibd_rc_handle_pas_estab(chan); 3298 break; 3299 case IBT_CM_EVENT_CONN_CLOSED: 3300 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_CLOSED," 3301 " chan=%p, reason=%d", chan, ibt_cm_event->cm_event.closed); 3302 chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list, 3303 chan); 3304 if (chan != NULL) 3305 (void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE); 3306 break; 3307 case IBT_CM_EVENT_FAILURE: 3308 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_FAILURE," 3309 " chan=%p, code: %d, msg: %d, reason=%d", chan, 3310 ibt_cm_event->cm_event.failed.cf_code, 3311 ibt_cm_event->cm_event.failed.cf_msg, 3312 ibt_cm_event->cm_event.failed.cf_reason); 3313 chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list, 3314 chan); 3315 if (chan != NULL) 3316 (void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE); 3317 return (IBT_CM_ACCEPT); 3318 case IBT_CM_EVENT_MRA_RCV: 3319 DPRINT(40, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_MRA_RCV"); 3320 break; 3321 case IBT_CM_EVENT_LAP_RCV: 3322 DPRINT(40, "ibd_rc_dispatch_pass_mad: LAP message received"); 3323 break; 3324 case IBT_CM_EVENT_APR_RCV: 3325 DPRINT(40, "ibd_rc_dispatch_pass_mad: APR message received"); 3326 break; 3327 default: 3328 DPRINT(40, "ibd_rc_dispatch_pass_mad: default, type=%d, " 3329 "chan=%p", ibt_cm_event->cm_type, chan); 3330 break; 3331 } 3332 3333 return (result); 3334 } 3335