xref: /illumos-gate/usr/src/uts/common/io/ib/clients/rds/rdsib_ib.c (revision 437220cd296f6d8b6654d6d52508b40b1e2d1ac7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27  *
28  * This software is available to you under a choice of one of two
29  * licenses.  You may choose to be licensed under the terms of the GNU
30  * General Public License (GPL) Version 2, available from the file
31  * COPYING in the main directory of this source tree, or the
32  * OpenIB.org BSD license below:
33  *
34  *     Redistribution and use in source and binary forms, with or
35  *     without modification, are permitted provided that the following
36  *     conditions are met:
37  *
38  *	- Redistributions of source code must retain the above
39  *	  copyright notice, this list of conditions and the following
40  *	  disclaimer.
41  *
42  *	- Redistributions in binary form must reproduce the above
43  *	  copyright notice, this list of conditions and the following
44  *	  disclaimer in the documentation and/or other materials
45  *	  provided with the distribution.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54  * SOFTWARE.
55  *
56  */
57 /*
58  * Sun elects to include this software in Sun product
59  * under the OpenIB BSD license.
60  *
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72  * POSSIBILITY OF SUCH DAMAGE.
73  */
74 
75 #pragma ident	"%Z%%M%	%I%	%E% SMI"
76 
77 #include <sys/types.h>
78 #include <sys/ddi.h>
79 #include <sys/sunddi.h>
80 #include <sys/ib/ibtl/ibti.h>
81 #include <sys/ib/ibtl/ibtl_types.h>
82 #include <sys/ib/clients/rds/rdsib_cm.h>
83 #include <sys/ib/clients/rds/rdsib_ib.h>
84 #include <sys/ib/clients/rds/rdsib_buf.h>
85 #include <sys/ib/clients/rds/rdsib_ep.h>
86 #include <sys/ib/clients/rds/rds_kstat.h>
87 
88 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl,
89     ibt_async_code_t code, ibt_async_event_t *event);
90 
91 static struct ibt_clnt_modinfo_s rds_ib_modinfo = {
92 	IBTI_V2,
93 	IBT_NETWORK,
94 	rds_async_handler,
95 	NULL,
96 	"RDS"
97 };
98 
99 /* performance tunables */
100 uint_t		rds_no_interrupts = 0;
101 uint_t		rds_poll_percent_full = 25;
102 uint_t		rds_wc_signal = IBT_NEXT_SOLICITED;
103 uint_t		rds_waittime_ms = 100; /* ms */
104 
105 extern dev_info_t *rdsib_dev_info;
106 extern void rds_close_sessions();
107 
108 static void
109 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp)
110 {
111 	/* The SQ size should not be more than that supported by the HCA */
112 	if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) ||
113 	    ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) {
114 		RDS_DPRINTF0("RDSIB", "MaxDataSendBuffers + %d is greater "
115 		    "than that supported by the HCA driver "
116 		    "(%d + %d > %d or %d), lowering it to a supported value.",
117 		    RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS,
118 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
119 
120 		MaxDataSendBuffers = (hattrp->hca_max_chan_sz >
121 		    hattrp->hca_max_cq_sz) ?
122 		    hattrp->hca_max_cq_sz - RDS_NUM_ACKS :
123 		    hattrp->hca_max_chan_sz - RDS_NUM_ACKS;
124 	}
125 
126 	/* The RQ size should not be more than that supported by the HCA */
127 	if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) ||
128 	    (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) {
129 		RDS_DPRINTF0("RDSIB", "MaxDataRecvBuffers is greater than that "
130 		    "supported by the HCA driver (%d > %d or %d), lowering it "
131 		    "to a supported value.", MaxDataRecvBuffers,
132 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
133 
134 		MaxDataRecvBuffers = (hattrp->hca_max_chan_sz >
135 		    hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
136 		    hattrp->hca_max_chan_sz;
137 	}
138 
139 	/* The SQ size should not be more than that supported by the HCA */
140 	if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) ||
141 	    (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) {
142 		RDS_DPRINTF0("RDSIB", "MaxCtrlSendBuffers is greater than that "
143 		    "supported by the HCA driver (%d > %d or %d), lowering it "
144 		    "to a supported value.", MaxCtrlSendBuffers,
145 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
146 
147 		MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz >
148 		    hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
149 		    hattrp->hca_max_chan_sz;
150 	}
151 
152 	/* The RQ size should not be more than that supported by the HCA */
153 	if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) ||
154 	    (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) {
155 		RDS_DPRINTF0("RDSIB", "MaxCtrlRecvBuffers is greater than that "
156 		    "supported by the HCA driver (%d > %d or %d), lowering it "
157 		    "to a supported value.", MaxCtrlRecvBuffers,
158 		    hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
159 
160 		MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz >
161 		    hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
162 		    hattrp->hca_max_chan_sz;
163 	}
164 
165 	/* The MaxRecvMemory should be less than that supported by the HCA */
166 	if ((MaxRecvMemory * 1024) > hattrp->hca_max_memr_len) {
167 		RDS_DPRINTF0("RDSIB", "MaxRecvMemory is greater than that "
168 		    "supported by the HCA driver (%d > %d), lowering it to %d",
169 		    MaxRecvMemory, hattrp->hca_max_memr_len,
170 		    hattrp->hca_max_memr_len);
171 
172 		MaxRecvMemory = hattrp->hca_max_memr_len;
173 	}
174 }
175 
176 /*
177  * Called on open of first RDS socket
178  */
179 int
180 rdsib_open_ib()
181 {
182 	ib_guid_t	*guidp;
183 	rds_hca_t	*hcap, *hcap1;
184 	uint_t		ix, hcaix, nhcas;
185 	int		ret;
186 
187 	RDS_DPRINTF4("rdsib_open_ib", "enter: statep %p", rdsib_statep);
188 
189 	ASSERT(rdsib_statep != NULL);
190 	if (rdsib_statep == NULL) {
191 		RDS_DPRINTF1("rdsib_open_ib", "RDS Statep not initialized");
192 		return (-1);
193 	}
194 
195 	/* How many hcas are there? */
196 	nhcas = ibt_get_hca_list(&guidp);
197 	if (nhcas == 0) {
198 		RDS_DPRINTF2("rdsib_open_ib", "No IB HCAs Available");
199 		return (-1);
200 	}
201 
202 	RDS_DPRINTF3("rdsib_open_ib", "Number of HCAs: %d", nhcas);
203 
204 	/* Register with IBTF */
205 	ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep,
206 	    &rdsib_statep->rds_ibhdl);
207 	if (ret != IBT_SUCCESS) {
208 		RDS_DPRINTF2(LABEL, "ibt_attach failed: %d", ret);
209 		(void) ibt_free_hca_list(guidp, nhcas);
210 		return (-1);
211 	}
212 
213 	/*
214 	 * Open each HCA and gather its information. Don't care about HCAs
215 	 * that cannot be opened. It is OK as long as atleast one HCA can be
216 	 * opened.
217 	 * Initialize a HCA only if all the information is available.
218 	 */
219 	hcap1 = NULL;
220 	for (ix = 0, hcaix = 0; ix < nhcas; ix++) {
221 		RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]);
222 
223 		hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP);
224 
225 		ret = ibt_open_hca(rdsib_statep->rds_ibhdl, guidp[ix],
226 		    &hcap->hca_hdl);
227 		if (ret != IBT_SUCCESS) {
228 			RDS_DPRINTF2("rdsib_open_ib",
229 			    "ibt_open_hca: 0x%llx failed: %d", guidp[ix], ret);
230 			kmem_free(hcap, sizeof (rds_hca_t));
231 			continue;
232 		}
233 
234 		hcap->hca_guid = guidp[ix];
235 
236 		ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr);
237 		if (ret != IBT_SUCCESS) {
238 			RDS_DPRINTF2("rdsib_open_ib",
239 			    "Query HCA: 0x%llx failed:  %d", guidp[ix], ret);
240 			ret = ibt_close_hca(hcap->hca_hdl);
241 			ASSERT(ret == IBT_SUCCESS);
242 			kmem_free(hcap, sizeof (rds_hca_t));
243 			continue;
244 		}
245 
246 		ret = ibt_query_hca_ports(hcap->hca_hdl, 0,
247 		    &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz);
248 		if (ret != IBT_SUCCESS) {
249 			RDS_DPRINTF2("rdsib_open_ib",
250 			    "Query HCA 0x%llx ports failed: %d", guidp[ix],
251 			    ret);
252 			ret = ibt_close_hca(hcap->hca_hdl);
253 			ASSERT(ret == IBT_SUCCESS);
254 			kmem_free(hcap, sizeof (rds_hca_t));
255 			continue;
256 		}
257 
258 		/* Only one PD per HCA is allocated, so do it here */
259 		ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS,
260 		    &hcap->hca_pdhdl);
261 		if (ret != IBT_SUCCESS) {
262 			RDS_DPRINTF2(LABEL, "ibt_alloc_pd 0x%llx failed: %d",
263 			    guidp[ix], ret);
264 			(void) ibt_free_portinfo(hcap->hca_pinfop,
265 			    hcap->hca_pinfo_sz);
266 			ret = ibt_close_hca(hcap->hca_hdl);
267 			ASSERT(ret == IBT_SUCCESS);
268 			kmem_free(hcap, sizeof (rds_hca_t));
269 			continue;
270 		}
271 
272 		rdsib_validate_chan_sizes(&hcap->hca_attr);
273 
274 		/* this HCA is fully initialized, go to the next one */
275 		hcaix++;
276 		hcap->hca_nextp = hcap1;
277 		hcap1 = hcap;
278 	}
279 
280 	/* free the HCA list, we are done with it */
281 	(void) ibt_free_hca_list(guidp, nhcas);
282 
283 	if (hcaix == 0) {
284 		/* Failed to Initialize even one HCA */
285 		RDS_DPRINTF2("rdsib_open_ib", "No HCAs are initialized");
286 		(void) ibt_detach(rdsib_statep->rds_ibhdl);
287 		rdsib_statep->rds_ibhdl = NULL;
288 		return (-1);
289 	}
290 
291 	if (hcaix < nhcas) {
292 		RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize",
293 		    (nhcas - hcaix), nhcas);
294 	}
295 
296 	rdsib_statep->rds_hcalistp = hcap1;
297 	rdsib_statep->rds_nhcas = hcaix;
298 
299 	/* register the RDS service */
300 	rdsib_statep->rds_srvhdl =
301 	    rds_register_service(rdsib_statep->rds_ibhdl);
302 	if (rdsib_statep->rds_srvhdl == NULL) {
303 		RDS_DPRINTF2("rdsib_open_ib", "Service registration failed");
304 	} else {
305 		/* bind the service on all available ports */
306 		ret = rds_bind_service(rdsib_statep);
307 		if (ret != 0) {
308 			RDS_DPRINTF2("rdsib_open_ib", "Bind service failed");
309 		}
310 	}
311 
312 	RDS_DPRINTF4("rdsib_open_ib", "return: statep %p", rdsib_statep);
313 
314 	return (0);
315 }
316 
317 /*
318  * Called when all ports are closed.
319  */
320 void
321 rdsib_close_ib()
322 {
323 	rds_hca_t	*hcap, *nextp;
324 	int		ret;
325 
326 	RDS_DPRINTF2("rds_close_ib", "enter: statep %p", rdsib_statep);
327 
328 	if (rdsib_statep->rds_srvhdl != NULL) {
329 		(void) ibt_unbind_all_services(rdsib_statep->rds_srvhdl);
330 		(void) ibt_deregister_service(rdsib_statep->rds_ibhdl,
331 		    rdsib_statep->rds_srvhdl);
332 		(void) ibt_release_ip_sid(rdsib_statep->rds_service_id);
333 
334 		(void) ibt_unbind_all_services(rdsib_statep->rds_old_srvhdl);
335 		(void) ibt_deregister_service(rdsib_statep->rds_ibhdl,
336 		    rdsib_statep->rds_old_srvhdl);
337 	}
338 
339 	/* close and destroy all the sessions */
340 	rds_close_sessions(NULL);
341 
342 	/* Release all HCA resources */
343 	rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
344 	hcap = rdsib_statep->rds_hcalistp;
345 	rdsib_statep->rds_hcalistp = NULL;
346 	rdsib_statep->rds_nhcas = 0;
347 	rw_exit(&rdsib_statep->rds_hca_lock);
348 
349 	while (hcap != NULL) {
350 		nextp = hcap->hca_nextp;
351 
352 		ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl);
353 		ASSERT(ret == IBT_SUCCESS);
354 
355 		(void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz);
356 
357 		ret = ibt_close_hca(hcap->hca_hdl);
358 		ASSERT(ret == IBT_SUCCESS);
359 
360 		kmem_free(hcap, sizeof (rds_hca_t));
361 		hcap = nextp;
362 	}
363 
364 	/* Deregister with IBTF */
365 	if (rdsib_statep->rds_ibhdl != NULL) {
366 		(void) ibt_detach(rdsib_statep->rds_ibhdl);
367 		rdsib_statep->rds_ibhdl = NULL;
368 	}
369 
370 	RDS_DPRINTF2("rds_close_ib", "return: statep %p", rdsib_statep);
371 }
372 
373 /* Return hcap, given the hca guid */
374 rds_hca_t *
375 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid)
376 {
377 	rds_hca_t	*hcap;
378 
379 	RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p "
380 	    "guid: %llx", statep, hca_guid);
381 
382 	rw_enter(&statep->rds_hca_lock, RW_READER);
383 
384 	hcap = statep->rds_hcalistp;
385 	while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
386 		hcap = hcap->hca_nextp;
387 	}
388 
389 	rw_exit(&statep->rds_hca_lock);
390 
391 	RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return");
392 
393 	return (hcap);
394 }
395 
396 /* Return hcap, given a gid */
397 rds_hca_t *
398 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid)
399 {
400 	rds_hca_t	*hcap;
401 	uint_t		ix;
402 
403 	RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx",
404 	    statep, gid.gid_prefix, gid.gid_guid);
405 
406 	rw_enter(&statep->rds_hca_lock, RW_READER);
407 
408 	hcap = statep->rds_hcalistp;
409 	while (hcap != NULL) {
410 		for (ix = 0; ix < hcap->hca_nports; ix++) {
411 			if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix ==
412 			    gid.gid_prefix) &&
413 			    (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid ==
414 			    gid.gid_guid)) {
415 				RDS_DPRINTF4("rds_gid_to_hcap",
416 				    "gid found in hcap: 0x%p", hcap);
417 				rw_exit(&statep->rds_hca_lock);
418 				return (hcap);
419 			}
420 		}
421 		hcap = hcap->hca_nextp;
422 	}
423 
424 	rw_exit(&statep->rds_hca_lock);
425 
426 	return (NULL);
427 }
428 
429 /* This is called from the send CQ handler */
430 void
431 rds_send_acknowledgement(rds_ep_t *ep)
432 {
433 	int	ret;
434 	uint_t	ix;
435 
436 	RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep);
437 
438 	mutex_enter(&ep->ep_lock);
439 
440 	ASSERT(ep->ep_rdmacnt != 0);
441 
442 	/*
443 	 * The previous ACK completed successfully, send the next one
444 	 * if more messages were received after sending the last ACK
445 	 */
446 	if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) {
447 		*(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid;
448 		mutex_exit(&ep->ep_lock);
449 
450 		/* send acknowledgement */
451 		RDS_INCR_TXACKS();
452 		ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix);
453 		if (ret != IBT_SUCCESS) {
454 			RDS_DPRINTF1("rds_send_acknowledgement",
455 			    "EP(%p): ibt_post_send for acknowledgement "
456 			    "failed: %d, SQ depth: %d",
457 			    ep, ret, ep->ep_sndpool.pool_nbusy);
458 			mutex_enter(&ep->ep_lock);
459 			ep->ep_rdmacnt--;
460 			mutex_exit(&ep->ep_lock);
461 		}
462 	} else {
463 		/* ACKed all messages, no more to ACK */
464 		ep->ep_rdmacnt--;
465 		mutex_exit(&ep->ep_lock);
466 		return;
467 	}
468 
469 	RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep);
470 }
471 
472 static int
473 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep)
474 {
475 	ibt_wc_t	wc;
476 	uint_t		npolled;
477 	rds_buf_t	*bp;
478 	rds_ctrl_pkt_t	*cpkt;
479 	rds_qp_t	*recvqp;
480 	int		ret = IBT_SUCCESS;
481 
482 	RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep);
483 
484 	bzero(&wc, sizeof (ibt_wc_t));
485 	ret = ibt_poll_cq(cq, &wc, 1, &npolled);
486 	if (ret != IBT_SUCCESS) {
487 		if (ret != IBT_CQ_EMPTY) {
488 			RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
489 			    "returned: %d", ep, cq, ret);
490 		} else {
491 			RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
492 			    "returned: IBT_CQ_EMPTY", ep, cq);
493 		}
494 		return (ret);
495 	}
496 
497 	bp = (rds_buf_t *)(uintptr_t)wc.wc_id;
498 
499 	if (wc.wc_status != IBT_WC_SUCCESS) {
500 		mutex_enter(&ep->ep_recvqp.qp_lock);
501 		ep->ep_recvqp.qp_level--;
502 		mutex_exit(&ep->ep_recvqp.qp_lock);
503 
504 		/* Free the buffer */
505 		bp->buf_state = RDS_RCVBUF_FREE;
506 		rds_free_recv_buf(bp, 1);
507 
508 		/* Receive completion failure */
509 		if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) {
510 			RDS_DPRINTF2("rds_poll_ctrl_completions",
511 			    "EP(%p) CQ(%p) BP(%p): WC Error Status: %d",
512 			    ep, cq, wc.wc_id, wc.wc_status);
513 		}
514 		return (ret);
515 	}
516 
517 	/* there is one less in the RQ */
518 	recvqp = &ep->ep_recvqp;
519 	mutex_enter(&recvqp->qp_lock);
520 	recvqp->qp_level--;
521 	if ((recvqp->qp_taskqpending == B_FALSE) &&
522 	    (recvqp->qp_level <= recvqp->qp_lwm)) {
523 		/* Time to post more buffers into the RQ */
524 		recvqp->qp_taskqpending = B_TRUE;
525 		mutex_exit(&recvqp->qp_lock);
526 
527 		ret = ddi_taskq_dispatch(rds_taskq,
528 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
529 		if (ret != DDI_SUCCESS) {
530 			RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d",
531 			    ret);
532 			mutex_enter(&recvqp->qp_lock);
533 			recvqp->qp_taskqpending = B_FALSE;
534 			mutex_exit(&recvqp->qp_lock);
535 		}
536 	} else {
537 		mutex_exit(&recvqp->qp_lock);
538 	}
539 
540 	cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va;
541 	rds_handle_control_message(ep->ep_sp, cpkt);
542 
543 	bp->buf_state = RDS_RCVBUF_FREE;
544 	rds_free_recv_buf(bp, 1);
545 
546 	RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep);
547 
548 	return (ret);
549 }
550 
551 #define	RDS_POST_FEW_ATATIME	100
552 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */
553 void
554 rds_post_recv_buf(void *arg)
555 {
556 	ibt_channel_hdl_t	chanhdl;
557 	rds_ep_t		*ep;
558 	rds_session_t		*sp;
559 	rds_qp_t		*recvqp;
560 	rds_bufpool_t		*gp;
561 	rds_buf_t		*bp, *bp1;
562 	ibt_recv_wr_t		*wrp, wr[RDS_POST_FEW_ATATIME];
563 	rds_hca_t		*hcap;
564 	uint_t			npost, nspace, rcv_len;
565 	uint_t			ix, jx, kx;
566 	int			ret;
567 
568 	chanhdl = (ibt_channel_hdl_t)arg;
569 	RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl);
570 	RDS_INCR_POST_RCV_BUF_CALLS();
571 
572 	ep = (rds_ep_t *)ibt_get_chan_private(chanhdl);
573 	ASSERT(ep != NULL);
574 	sp = ep->ep_sp;
575 	recvqp = &ep->ep_recvqp;
576 
577 	RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep);
578 
579 	/* get the hcap for the HCA hosting this channel */
580 	hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid);
581 	if (hcap == NULL) {
582 		RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found",
583 		    ep->ep_hca_guid);
584 		return;
585 	}
586 
587 	/* Make sure the session is still connected */
588 	rw_enter(&sp->session_lock, RW_READER);
589 	if ((sp->session_state != RDS_SESSION_STATE_INIT) &&
590 	    (sp->session_state != RDS_SESSION_STATE_CONNECTED)) {
591 		RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not "
592 		    "in active state (%d)", ep, sp->session_state);
593 		rw_exit(&sp->session_lock);
594 		return;
595 	}
596 	rw_exit(&sp->session_lock);
597 
598 	/* how many can be posted */
599 	mutex_enter(&recvqp->qp_lock);
600 	nspace = recvqp->qp_depth - recvqp->qp_level;
601 	if (nspace == 0) {
602 		RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL");
603 		recvqp->qp_taskqpending = B_FALSE;
604 		mutex_exit(&recvqp->qp_lock);
605 		return;
606 	}
607 	mutex_exit(&recvqp->qp_lock);
608 
609 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
610 		gp = &rds_dpool;
611 		rcv_len = RdsPktSize;
612 	} else {
613 		gp = &rds_cpool;
614 		rcv_len = RDS_CTRLPKT_SIZE;
615 	}
616 
617 	bp = rds_get_buf(gp, nspace, &jx);
618 	if (bp == NULL) {
619 		RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep);
620 		/* try again later */
621 		ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf,
622 		    (void *)ep->ep_chanhdl, DDI_NOSLEEP);
623 		if (ret != DDI_SUCCESS) {
624 			RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d",
625 			    ret);
626 			mutex_enter(&recvqp->qp_lock);
627 			recvqp->qp_taskqpending = B_FALSE;
628 			mutex_exit(&recvqp->qp_lock);
629 		}
630 		return;
631 	}
632 
633 	if (jx != nspace) {
634 		RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers "
635 		    "needed: %d available: %d", ep, nspace, jx);
636 		nspace = jx;
637 	}
638 
639 	bp1 = bp;
640 	for (ix = 0; ix < nspace; ix++) {
641 		bp1->buf_ep = ep;
642 		ASSERT(bp1->buf_state == RDS_RCVBUF_FREE);
643 		bp1->buf_state = RDS_RCVBUF_POSTED;
644 		bp1->buf_ds.ds_key = hcap->hca_lkey;
645 		bp1->buf_ds.ds_len = rcv_len;
646 		bp1 = bp1->buf_nextp;
647 	}
648 
649 #if 0
650 	wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t),
651 	    KM_SLEEP);
652 #else
653 	wrp = &wr[0];
654 #endif
655 
656 	npost = nspace;
657 	while (npost) {
658 		jx = (npost > RDS_POST_FEW_ATATIME) ?
659 		    RDS_POST_FEW_ATATIME : npost;
660 		for (ix = 0; ix < jx; ix++) {
661 			wrp[ix].wr_id = (uintptr_t)bp;
662 			wrp[ix].wr_nds = 1;
663 			wrp[ix].wr_sgl = &bp->buf_ds;
664 			bp = bp->buf_nextp;
665 		}
666 
667 		ret = ibt_post_recv(chanhdl, wrp, jx, &kx);
668 		if ((ret != IBT_SUCCESS) || (kx != jx)) {
669 			RDS_DPRINTF1(LABEL, "ibt_post_recv for %d WRs failed: "
670 			    "%d", npost, ret);
671 			npost -= kx;
672 			break;
673 		}
674 
675 		npost -= jx;
676 	}
677 
678 	mutex_enter(&recvqp->qp_lock);
679 	if (npost != 0) {
680 		RDS_DPRINTF2("rds_post_recv_buf",
681 		    "EP(%p) Failed to post %d WRs", ep, npost);
682 		recvqp->qp_level += (nspace - npost);
683 	} else {
684 		recvqp->qp_level += nspace;
685 	}
686 
687 	/*
688 	 * sometimes, the recv WRs can get consumed as soon as they are
689 	 * posted. In that case, taskq thread to post more WRs to the RQ will
690 	 * not be scheduled as the taskqpending flag is still set.
691 	 */
692 	if (recvqp->qp_level == 0) {
693 		mutex_exit(&recvqp->qp_lock);
694 		ret = ddi_taskq_dispatch(rds_taskq,
695 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
696 		if (ret != DDI_SUCCESS) {
697 			RDS_DPRINTF1("rds_post_recv_buf",
698 			    "ddi_taskq_dispatch failed: %d", ret);
699 			mutex_enter(&recvqp->qp_lock);
700 			recvqp->qp_taskqpending = B_FALSE;
701 			mutex_exit(&recvqp->qp_lock);
702 		}
703 	} else {
704 		recvqp->qp_taskqpending = B_FALSE;
705 		mutex_exit(&recvqp->qp_lock);
706 	}
707 
708 #if 0
709 	kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t));
710 #endif
711 
712 	RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep);
713 }
714 
715 static int
716 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep)
717 {
718 	ibt_wc_t	wc;
719 	rds_buf_t	*bp;
720 	rds_data_hdr_t	*pktp;
721 	rds_qp_t	*recvqp;
722 	uint_t		npolled;
723 	int		ret = IBT_SUCCESS;
724 
725 
726 	RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep);
727 
728 	bzero(&wc, sizeof (ibt_wc_t));
729 	ret = ibt_poll_cq(cq, &wc, 1, &npolled);
730 	if (ret != IBT_SUCCESS) {
731 		if (ret != IBT_CQ_EMPTY) {
732 			RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
733 			    "returned: %d", ep, cq, ret);
734 		} else {
735 			RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
736 			    "returned: IBT_CQ_EMPTY", ep, cq);
737 		}
738 		return (ret);
739 	}
740 
741 	bp = (rds_buf_t *)(uintptr_t)wc.wc_id;
742 	ASSERT(bp->buf_state == RDS_RCVBUF_POSTED);
743 	bp->buf_state = RDS_RCVBUF_ONSOCKQ;
744 	bp->buf_nextp = NULL;
745 
746 	if (wc.wc_status != IBT_WC_SUCCESS) {
747 		mutex_enter(&ep->ep_recvqp.qp_lock);
748 		ep->ep_recvqp.qp_level--;
749 		mutex_exit(&ep->ep_recvqp.qp_lock);
750 
751 		/* free the buffer */
752 		bp->buf_state = RDS_RCVBUF_FREE;
753 		rds_free_recv_buf(bp, 1);
754 
755 		/* Receive completion failure */
756 		if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) {
757 			RDS_DPRINTF2("rds_poll_data_completions",
758 			    "EP(%p) CQ(%p) BP(%p): WC Error Status: %d",
759 			    ep, cq, wc.wc_id, wc.wc_status);
760 			RDS_INCR_RXERRS();
761 		}
762 		return (ret);
763 	}
764 
765 	/* there is one less in the RQ */
766 	recvqp = &ep->ep_recvqp;
767 	mutex_enter(&recvqp->qp_lock);
768 	recvqp->qp_level--;
769 	if ((recvqp->qp_taskqpending == B_FALSE) &&
770 	    (recvqp->qp_level <= recvqp->qp_lwm)) {
771 		/* Time to post more buffers into the RQ */
772 		recvqp->qp_taskqpending = B_TRUE;
773 		mutex_exit(&recvqp->qp_lock);
774 
775 		ret = ddi_taskq_dispatch(rds_taskq,
776 		    rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
777 		if (ret != DDI_SUCCESS) {
778 			RDS_DPRINTF1(LABEL, "ddi_taskq_dispatch failed: %d",
779 			    ret);
780 			mutex_enter(&recvqp->qp_lock);
781 			recvqp->qp_taskqpending = B_FALSE;
782 			mutex_exit(&recvqp->qp_lock);
783 		}
784 	} else {
785 		mutex_exit(&recvqp->qp_lock);
786 	}
787 
788 	pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va;
789 	ASSERT(pktp->dh_datalen != 0);
790 
791 	RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x "
792 	    "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip,
793 	    ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport,
794 	    pktp->dh_npkts, pktp->dh_psn);
795 
796 	RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp,
797 	    pktp->dh_npkts, pktp->dh_psn);
798 
799 	if (pktp->dh_npkts == 1) {
800 		/* single pkt or last packet */
801 		if (pktp->dh_psn != 0) {
802 			/* last packet of a segmented message */
803 			ASSERT(ep->ep_seglbp != NULL);
804 			ep->ep_seglbp->buf_nextp = bp;
805 			ep->ep_seglbp = bp;
806 			rds_received_msg(ep, ep->ep_segfbp);
807 			ep->ep_segfbp = NULL;
808 			ep->ep_seglbp = NULL;
809 		} else {
810 			/* single packet */
811 			rds_received_msg(ep, bp);
812 		}
813 	} else {
814 		/* multi-pkt msg */
815 		if (pktp->dh_psn == 0) {
816 			/* first packet */
817 			ASSERT(ep->ep_segfbp == NULL);
818 			ep->ep_segfbp = bp;
819 			ep->ep_seglbp = bp;
820 		} else {
821 			/* intermediate packet */
822 			ASSERT(ep->ep_segfbp != NULL);
823 			ep->ep_seglbp->buf_nextp = bp;
824 			ep->ep_seglbp = bp;
825 		}
826 	}
827 
828 	RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep);
829 
830 	return (ret);
831 }
832 
833 void
834 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg)
835 {
836 	rds_ep_t	*ep;
837 	int		ret = IBT_SUCCESS;
838 	int		(*func)(ibt_cq_hdl_t, rds_ep_t *);
839 
840 	ep = (rds_ep_t *)arg;
841 
842 	RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep);
843 
844 	if (ep->ep_type == RDS_EP_TYPE_DATA) {
845 		func = rds_poll_data_completions;
846 	} else {
847 		func = rds_poll_ctrl_completions;
848 	}
849 
850 	do {
851 		ret = func(cq, ep);
852 	} while (ret != IBT_CQ_EMPTY);
853 
854 	/* enable the CQ */
855 	ret = ibt_enable_cq_notify(cq, rds_wc_signal);
856 	if (ret != IBT_SUCCESS) {
857 		RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify "
858 		    "failed: %d", ep, cq, ret);
859 		return;
860 	}
861 
862 	do {
863 		ret = func(cq, ep);
864 	} while (ret != IBT_CQ_EMPTY);
865 
866 	RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep);
867 }
868 
869 void
870 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock)
871 {
872 	ibt_wc_t	wc[RDS_NUM_DATA_SEND_WCS];
873 	uint_t		npolled, nret, send_error = 0;
874 	rds_buf_t	*headp, *tailp, *bp;
875 	int		ret, ix;
876 
877 	RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep);
878 
879 	headp = NULL;
880 	tailp = NULL;
881 	npolled = 0;
882 	do {
883 		ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret);
884 		if (ret != IBT_SUCCESS) {
885 			if (ret != IBT_CQ_EMPTY) {
886 				RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): "
887 				    "ibt_poll_cq returned: %d", ep, cq, ret);
888 			} else {
889 				RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): "
890 				    "ibt_poll_cq returned: IBT_CQ_EMPTY",
891 				    ep, cq);
892 			}
893 
894 			break;
895 		}
896 
897 		for (ix = 0; ix < nret; ix++) {
898 			if (wc[ix].wc_status == IBT_WC_SUCCESS) {
899 				if (wc[ix].wc_type == IBT_WRC_RDMAW) {
900 					rds_send_acknowledgement(ep);
901 					continue;
902 				}
903 
904 				bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
905 				ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
906 				bp->buf_state = RDS_SNDBUF_FREE;
907 			} else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) {
908 				RDS_INCR_TXERRS();
909 				RDS_DPRINTF5("rds_poll_send_completions",
910 				    "EP(%p): WC ID: %p ERROR: %d", ep,
911 				    wc[ix].wc_id, wc[ix].wc_status);
912 
913 				if (wc[ix].wc_id == RDS_RDMAW_WRID) {
914 					mutex_enter(&ep->ep_lock);
915 					ep->ep_rdmacnt--;
916 					mutex_exit(&ep->ep_lock);
917 					continue;
918 				}
919 
920 				bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
921 				bp->buf_state = RDS_SNDBUF_ERROR;
922 			} else {
923 				RDS_INCR_TXERRS();
924 				RDS_DPRINTF2("rds_poll_send_completions",
925 				    "EP(%p): WC ID: %p ERROR: %d", ep,
926 				    wc[ix].wc_id, wc[ix].wc_status);
927 				if (send_error == 0) {
928 					rds_session_t	*sp = ep->ep_sp;
929 
930 					/* don't let anyone send anymore */
931 					rw_enter(&sp->session_lock, RW_WRITER);
932 					if (sp->session_state !=
933 					    RDS_SESSION_STATE_ERROR) {
934 						sp->session_state =
935 						    RDS_SESSION_STATE_ERROR;
936 						/* Make this the active end */
937 						sp->session_type =
938 						    RDS_SESSION_ACTIVE;
939 					}
940 					rw_exit(&sp->session_lock);
941 				}
942 
943 				send_error++;
944 
945 				if (wc[ix].wc_id == RDS_RDMAW_WRID) {
946 					mutex_enter(&ep->ep_lock);
947 					ep->ep_rdmacnt--;
948 					mutex_exit(&ep->ep_lock);
949 					continue;
950 				}
951 
952 				bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
953 				bp->buf_state = RDS_SNDBUF_ERROR;
954 			}
955 
956 			bp->buf_nextp = NULL;
957 			if (headp) {
958 				tailp->buf_nextp = bp;
959 				tailp = bp;
960 			} else {
961 				headp = bp;
962 				tailp = bp;
963 			}
964 
965 			npolled++;
966 		}
967 
968 		if (rds_no_interrupts && (npolled > 100)) {
969 			break;
970 		}
971 
972 		if (rds_no_interrupts == 1) {
973 			break;
974 		}
975 	} while (ret != IBT_CQ_EMPTY);
976 
977 	RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d",
978 	    npolled, send_error);
979 
980 	/* put the buffers to the pool */
981 	if (npolled != 0) {
982 		rds_free_send_buf(ep, headp, tailp, npolled, lock);
983 	}
984 
985 	if (send_error != 0) {
986 		rds_handle_send_error(ep);
987 	}
988 
989 	RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep);
990 }
991 
992 void
993 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg)
994 {
995 	rds_ep_t	*ep;
996 	int		ret;
997 
998 	ep = (rds_ep_t *)arg;
999 
1000 	RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep);
1001 
1002 	/* enable the CQ */
1003 	ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION);
1004 	if (ret != IBT_SUCCESS) {
1005 		RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify "
1006 		    "failed: %d", ep, cq, ret);
1007 		return;
1008 	}
1009 
1010 	rds_poll_send_completions(cq, ep, B_FALSE);
1011 
1012 	RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep);
1013 }
1014 
1015 void
1016 rds_ep_free_rc_channel(rds_ep_t *ep)
1017 {
1018 	int ret;
1019 
1020 	RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep);
1021 
1022 	ASSERT(mutex_owned(&ep->ep_lock));
1023 
1024 	/* free the QP */
1025 	if (ep->ep_chanhdl != NULL) {
1026 		/* wait until the RQ is empty */
1027 		(void) ibt_flush_channel(ep->ep_chanhdl);
1028 		(void) rds_is_recvq_empty(ep, B_TRUE);
1029 		ret = ibt_free_channel(ep->ep_chanhdl);
1030 		if (ret != IBT_SUCCESS) {
1031 			RDS_DPRINTF1("rds_ep_free_rc_channel", "EP(%p) "
1032 			    "ibt_free_channel returned: %d", ep, ret);
1033 		}
1034 		ep->ep_chanhdl = NULL;
1035 	} else {
1036 		RDS_DPRINTF2("rds_ep_free_rc_channel",
1037 		    "EP(%p) Channel is ALREADY FREE", ep);
1038 	}
1039 
1040 	/* free the Send CQ */
1041 	if (ep->ep_sendcq != NULL) {
1042 		ret = ibt_free_cq(ep->ep_sendcq);
1043 		if (ret != IBT_SUCCESS) {
1044 			RDS_DPRINTF1("rds_ep_free_rc_channel",
1045 			    "EP(%p) - for sendcq, ibt_free_cq returned %d",
1046 			    ep, ret);
1047 		}
1048 		ep->ep_sendcq = NULL;
1049 	} else {
1050 		RDS_DPRINTF2("rds_ep_free_rc_channel",
1051 		    "EP(%p) SendCQ is ALREADY FREE", ep);
1052 	}
1053 
1054 	/* free the Recv CQ */
1055 	if (ep->ep_recvcq != NULL) {
1056 		ret = ibt_free_cq(ep->ep_recvcq);
1057 		if (ret != IBT_SUCCESS) {
1058 			RDS_DPRINTF1("rds_ep_free_rc_channel",
1059 			    "EP(%p) - for recvcq, ibt_free_cq returned %d",
1060 			    ep, ret);
1061 		}
1062 		ep->ep_recvcq = NULL;
1063 	} else {
1064 		RDS_DPRINTF2("rds_ep_free_rc_channel",
1065 		    "EP(%p) RecvCQ is ALREADY FREE", ep);
1066 	}
1067 
1068 	RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep);
1069 }
1070 
1071 /* Allocate resources for RC channel */
1072 ibt_channel_hdl_t
1073 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port)
1074 {
1075 	int				ret = IBT_SUCCESS;
1076 	ibt_cq_attr_t			scqattr, rcqattr;
1077 	ibt_rc_chan_alloc_args_t	chanargs;
1078 	ibt_channel_hdl_t		chanhdl;
1079 	rds_session_t			*sp;
1080 	rds_hca_t			*hcap;
1081 
1082 	RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d",
1083 	    ep, hca_port);
1084 
1085 	/* Update the EP with the right IP address and HCA guid */
1086 	sp = ep->ep_sp;
1087 	ASSERT(sp != NULL);
1088 	rw_enter(&sp->session_lock, RW_READER);
1089 	mutex_enter(&ep->ep_lock);
1090 	ep->ep_myip = sp->session_myip;
1091 	ep->ep_remip = sp->session_remip;
1092 	hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
1093 	ep->ep_hca_guid = hcap->hca_guid;
1094 	mutex_exit(&ep->ep_lock);
1095 	rw_exit(&sp->session_lock);
1096 
1097 	/* reset taskqpending flag here */
1098 	ep->ep_recvqp.qp_taskqpending = B_FALSE;
1099 
1100 	if (ep->ep_type == RDS_EP_TYPE_CTRL) {
1101 		scqattr.cq_size = MaxCtrlSendBuffers;
1102 		scqattr.cq_sched = NULL;
1103 		scqattr.cq_flags = IBT_CQ_NO_FLAGS;
1104 
1105 		rcqattr.cq_size = MaxCtrlRecvBuffers;
1106 		rcqattr.cq_sched = NULL;
1107 		rcqattr.cq_flags = IBT_CQ_NO_FLAGS;
1108 
1109 		chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers;
1110 		chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers;
1111 		chanargs.rc_sizes.cs_sq_sgl = 1;
1112 		chanargs.rc_sizes.cs_rq_sgl = 1;
1113 	} else {
1114 		scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS;
1115 		scqattr.cq_sched = NULL;
1116 		scqattr.cq_flags = IBT_CQ_NO_FLAGS;
1117 
1118 		rcqattr.cq_size = MaxDataRecvBuffers;
1119 		rcqattr.cq_sched = NULL;
1120 		rcqattr.cq_flags = IBT_CQ_NO_FLAGS;
1121 
1122 		chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS;
1123 		chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers;
1124 		chanargs.rc_sizes.cs_sq_sgl = 1;
1125 		chanargs.rc_sizes.cs_rq_sgl = 1;
1126 	}
1127 
1128 	if (ep->ep_sendcq == NULL) {
1129 		/* returned size is always greater than the requested size */
1130 		ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr,
1131 		    &ep->ep_sendcq, NULL);
1132 		if (ret != IBT_SUCCESS) {
1133 			RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ "
1134 			    "failed, size = %d: %d", scqattr.cq_size, ret);
1135 			return (NULL);
1136 		}
1137 
1138 		(void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler,
1139 		    ep);
1140 
1141 		if (rds_no_interrupts == 0) {
1142 			ret = ibt_enable_cq_notify(ep->ep_sendcq,
1143 			    IBT_NEXT_COMPLETION);
1144 			if (ret != IBT_SUCCESS) {
1145 				RDS_DPRINTF2(LABEL,
1146 				    "ibt_enable_cq_notify failed: %d", ret);
1147 				(void) ibt_free_cq(ep->ep_sendcq);
1148 				ep->ep_sendcq = NULL;
1149 				return (NULL);
1150 			}
1151 		}
1152 	}
1153 
1154 	if (ep->ep_recvcq == NULL) {
1155 		/* returned size is always greater than the requested size */
1156 		ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr,
1157 		    &ep->ep_recvcq, NULL);
1158 		if (ret != IBT_SUCCESS) {
1159 			RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ "
1160 			    "failed, size = %d: %d", rcqattr.cq_size, ret);
1161 			(void) ibt_free_cq(ep->ep_sendcq);
1162 			ep->ep_sendcq = NULL;
1163 			return (NULL);
1164 		}
1165 
1166 		(void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler,
1167 		    ep);
1168 
1169 		ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal);
1170 		if (ret != IBT_SUCCESS) {
1171 			RDS_DPRINTF2(LABEL,
1172 			    "ibt_enable_cq_notify failed: %d", ret);
1173 			(void) ibt_free_cq(ep->ep_recvcq);
1174 			ep->ep_recvcq = NULL;
1175 			(void) ibt_free_cq(ep->ep_sendcq);
1176 			ep->ep_sendcq = NULL;
1177 			return (NULL);
1178 		}
1179 	}
1180 
1181 	chanargs.rc_flags = IBT_ALL_SIGNALED;
1182 	chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
1183 	    IBT_CEP_ATOMIC;
1184 	chanargs.rc_hca_port_num = hca_port;
1185 	chanargs.rc_scq = ep->ep_sendcq;
1186 	chanargs.rc_rcq = ep->ep_recvcq;
1187 	chanargs.rc_pd = hcap->hca_pdhdl;
1188 	chanargs.rc_srq = NULL;
1189 
1190 	ret = ibt_alloc_rc_channel(hcap->hca_hdl,
1191 	    IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL);
1192 	if (ret != IBT_SUCCESS) {
1193 		RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d",
1194 		    ret);
1195 		(void) ibt_free_cq(ep->ep_recvcq);
1196 		ep->ep_recvcq = NULL;
1197 		(void) ibt_free_cq(ep->ep_sendcq);
1198 		ep->ep_sendcq = NULL;
1199 		return (NULL);
1200 	}
1201 
1202 	/* Chan private should contain the ep */
1203 	(void) ibt_set_chan_private(chanhdl, ep);
1204 
1205 	RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl);
1206 
1207 	return (chanhdl);
1208 }
1209 
1210 
1211 #if 0
1212 
1213 /* Return node guid given a port gid */
1214 ib_guid_t
1215 rds_gid_to_node_guid(ib_gid_t gid)
1216 {
1217 	ibt_node_info_t	nodeinfo;
1218 	int		ret;
1219 
1220 	RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx",
1221 	    gid.gid_prefix, gid.gid_guid);
1222 
1223 	ret = ibt_gid_to_node_info(gid, &nodeinfo);
1224 	if (ret != IBT_SUCCESS) {
1225 		RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx "
1226 		    "failed", gid.gid_prefix, gid.gid_guid);
1227 		return (0LL);
1228 	}
1229 
1230 	RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx",
1231 	    nodeinfo.n_node_guid);
1232 
1233 	return (nodeinfo.n_node_guid);
1234 }
1235 
1236 #endif
1237 
1238 static void
1239 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl,
1240     ibt_async_event_t *event)
1241 {
1242 	rds_hca_t		*hcap;
1243 	ibt_hca_portinfo_t	*newpinfop, *oldpinfop;
1244 	uint_t			newsize, oldsize, nport;
1245 	ib_gid_t		gid;
1246 	int			ret;
1247 
1248 	RDS_DPRINTF2("rds_handle_portup_event",
1249 	    "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep);
1250 
1251 	hcap = rds_get_hcap(statep, event->ev_hca_guid);
1252 	if (hcap == NULL) {
1253 		RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is "
1254 		    "not in our list", event->ev_hca_guid);
1255 		return;
1256 	}
1257 
1258 	ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize);
1259 	if (ret != IBT_SUCCESS) {
1260 		RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret);
1261 		return;
1262 	}
1263 
1264 	oldpinfop = hcap->hca_pinfop;
1265 	oldsize = hcap->hca_pinfo_sz;
1266 	hcap->hca_pinfop = newpinfop;
1267 	hcap->hca_pinfo_sz = newsize;
1268 
1269 	/* structure copy */
1270 	gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0];
1271 
1272 	/* bind RDS service on the port, pass statep as cm_private */
1273 	ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, NULL);
1274 	if (ret != IBT_SUCCESS) {
1275 		RDS_DPRINTF2(LABEL, "Bind service for HCA: 0x%llx Port: %d "
1276 		    "gid %llx:%llx returned: %d", event->ev_hca_guid,
1277 		    event->ev_port, gid.gid_prefix, gid.gid_guid, ret);
1278 	}
1279 
1280 	(void) ibt_free_portinfo(oldpinfop, oldsize);
1281 
1282 	RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx",
1283 	    event->ev_hca_guid);
1284 }
1285 
1286 static void
1287 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
1288     ibt_async_event_t *event)
1289 {
1290 	rds_state_t		*statep;
1291 
1292 	RDS_DPRINTF2("rds_async_handler", "Async code: %d", code);
1293 
1294 	switch (code) {
1295 	case IBT_EVENT_PORT_UP:
1296 		statep = (rds_state_t *)clntp;
1297 		rds_handle_portup_event(statep, hdl, event);
1298 		break;
1299 
1300 	default:
1301 		RDS_DPRINTF2(LABEL, "Async event: %d not handled", code);
1302 	}
1303 
1304 	RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code);
1305 }
1306