1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
26 *
27 * This software is available to you under a choice of one of two
28 * licenses. You may choose to be licensed under the terms of the GNU
29 * General Public License (GPL) Version 2, available from the file
30 * COPYING in the main directory of this source tree, or the
31 * OpenIB.org BSD license below:
32 *
33 * Redistribution and use in source and binary forms, with or
34 * without modification, are permitted provided that the following
35 * conditions are met:
36 *
37 * - Redistributions of source code must retain the above
38 * copyright notice, this list of conditions and the following
39 * disclaimer.
40 *
41 * - Redistributions in binary form must reproduce the above
42 * copyright notice, this list of conditions and the following
43 * disclaimer in the documentation and/or other materials
44 * provided with the distribution.
45 *
46 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
47 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
48 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
49 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
50 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
51 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
52 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
53 * SOFTWARE.
54 *
55 */
56 /*
57 * Sun elects to include this software in Sun product
58 * under the OpenIB BSD license.
59 *
60 *
61 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
62 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
64 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
65 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
66 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
67 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
69 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
70 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
71 * POSSIBILITY OF SUCH DAMAGE.
72 */
73
74 #include <sys/types.h>
75 #include <sys/ddi.h>
76 #include <sys/sunddi.h>
77 #include <sys/ib/clients/rds/rdsib_cm.h>
78 #include <sys/ib/clients/rds/rdsib_ib.h>
79 #include <sys/ib/clients/rds/rdsib_buf.h>
80 #include <sys/ib/clients/rds/rdsib_ep.h>
81 #include <sys/ib/clients/rds/rds_kstat.h>
82
83 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl,
84 ibt_async_code_t code, ibt_async_event_t *event);
85
86 static struct ibt_clnt_modinfo_s rds_ib_modinfo = {
87 IBTI_V_CURR,
88 IBT_NETWORK,
89 rds_async_handler,
90 NULL,
91 "RDS"
92 };
93
94 /* performance tunables */
95 uint_t rds_no_interrupts = 0;
96 uint_t rds_poll_percent_full = 25;
97 uint_t rds_wc_signal = IBT_NEXT_SOLICITED;
98 uint_t rds_waittime_ms = 100; /* ms */
99
100 extern dev_info_t *rdsib_dev_info;
101 extern void rds_close_sessions();
102
103 static void
rdsib_validate_chan_sizes(ibt_hca_attr_t * hattrp)104 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp)
105 {
106 /* The SQ size should not be more than that supported by the HCA */
107 if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) ||
108 ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) {
109 RDS_DPRINTF2("RDSIB", "MaxDataSendBuffers + %d is greater "
110 "than that supported by the HCA driver "
111 "(%d + %d > %d or %d), lowering it to a supported value.",
112 RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS,
113 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
114
115 MaxDataSendBuffers = (hattrp->hca_max_chan_sz >
116 hattrp->hca_max_cq_sz) ?
117 hattrp->hca_max_cq_sz - RDS_NUM_ACKS :
118 hattrp->hca_max_chan_sz - RDS_NUM_ACKS;
119 }
120
121 /* The RQ size should not be more than that supported by the HCA */
122 if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) ||
123 (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) {
124 RDS_DPRINTF2("RDSIB", "MaxDataRecvBuffers is greater than that "
125 "supported by the HCA driver (%d > %d or %d), lowering it "
126 "to a supported value.", MaxDataRecvBuffers,
127 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
128
129 MaxDataRecvBuffers = (hattrp->hca_max_chan_sz >
130 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
131 hattrp->hca_max_chan_sz;
132 }
133
134 /* The SQ size should not be more than that supported by the HCA */
135 if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) ||
136 (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) {
137 RDS_DPRINTF2("RDSIB", "MaxCtrlSendBuffers is greater than that "
138 "supported by the HCA driver (%d > %d or %d), lowering it "
139 "to a supported value.", MaxCtrlSendBuffers,
140 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
141
142 MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz >
143 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
144 hattrp->hca_max_chan_sz;
145 }
146
147 /* The RQ size should not be more than that supported by the HCA */
148 if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) ||
149 (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) {
150 RDS_DPRINTF2("RDSIB", "MaxCtrlRecvBuffers is greater than that "
151 "supported by the HCA driver (%d > %d or %d), lowering it "
152 "to a supported value.", MaxCtrlRecvBuffers,
153 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz);
154
155 MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz >
156 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz :
157 hattrp->hca_max_chan_sz;
158 }
159
160 /* The MaxRecvMemory should be less than that supported by the HCA */
161 if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) {
162 RDS_DPRINTF2("RDSIB", "MaxRecvMemory is greater than that "
163 "supported by the HCA driver (%d > %d), lowering it to %d",
164 NDataRX * RdsPktSize, hattrp->hca_max_memr_len,
165 hattrp->hca_max_memr_len);
166
167 NDataRX = hattrp->hca_max_memr_len/RdsPktSize;
168 }
169 }
170
171 /* Return hcap, given the hca guid */
172 rds_hca_t *
rds_lkup_hca(ib_guid_t hca_guid)173 rds_lkup_hca(ib_guid_t hca_guid)
174 {
175 rds_hca_t *hcap;
176
177 RDS_DPRINTF4("rds_lkup_hca", "Enter: statep: 0x%p "
178 "guid: %llx", rdsib_statep, hca_guid);
179
180 rw_enter(&rdsib_statep->rds_hca_lock, RW_READER);
181
182 hcap = rdsib_statep->rds_hcalistp;
183 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
184 hcap = hcap->hca_nextp;
185 }
186
187 rw_exit(&rdsib_statep->rds_hca_lock);
188
189 RDS_DPRINTF4("rds_lkup_hca", "return");
190
191 return (hcap);
192 }
193
194 void rds_randomize_qps(rds_hca_t *hcap);
195
196 static rds_hca_t *
rdsib_init_hca(ib_guid_t hca_guid)197 rdsib_init_hca(ib_guid_t hca_guid)
198 {
199 rds_hca_t *hcap;
200 boolean_t alloc = B_FALSE;
201 int ret;
202
203 RDS_DPRINTF2("rdsib_init_hca", "enter: HCA 0x%llx", hca_guid);
204
205 /* Do a HCA lookup */
206 hcap = rds_lkup_hca(hca_guid);
207
208 if (hcap != NULL && hcap->hca_hdl != NULL) {
209 /*
210 * This can happen if we get IBT_HCA_ATTACH_EVENT on an HCA
211 * that we have already opened. Just return NULL so that
212 * we'll not end up reinitializing the HCA again.
213 */
214 RDS_DPRINTF2("rdsib_init_hca", "HCA already initialized");
215 return (NULL);
216 }
217
218 if (hcap == NULL) {
219 RDS_DPRINTF2("rdsib_init_hca", "New HCA is added");
220 hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP);
221 alloc = B_TRUE;
222 }
223
224 hcap->hca_guid = hca_guid;
225 ret = ibt_open_hca(rdsib_statep->rds_ibhdl, hca_guid,
226 &hcap->hca_hdl);
227 if (ret != IBT_SUCCESS) {
228 if (ret == IBT_HCA_IN_USE) {
229 RDS_DPRINTF2("rdsib_init_hca",
230 "ibt_open_hca: 0x%llx returned IBT_HCA_IN_USE",
231 hca_guid);
232 } else {
233 RDS_DPRINTF2("rdsib_init_hca",
234 "ibt_open_hca: 0x%llx failed: %d", hca_guid, ret);
235 }
236 if (alloc == B_TRUE) {
237 kmem_free(hcap, sizeof (rds_hca_t));
238 }
239 return (NULL);
240 }
241
242 ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr);
243 if (ret != IBT_SUCCESS) {
244 RDS_DPRINTF2("rdsib_init_hca",
245 "Query HCA: 0x%llx failed: %d", hca_guid, ret);
246 ret = ibt_close_hca(hcap->hca_hdl);
247 ASSERT(ret == IBT_SUCCESS);
248 if (alloc == B_TRUE) {
249 kmem_free(hcap, sizeof (rds_hca_t));
250 } else {
251 hcap->hca_hdl = NULL;
252 }
253 return (NULL);
254 }
255
256 ret = ibt_query_hca_ports(hcap->hca_hdl, 0,
257 &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz);
258 if (ret != IBT_SUCCESS) {
259 RDS_DPRINTF2("rdsib_init_hca",
260 "Query HCA 0x%llx ports failed: %d", hca_guid,
261 ret);
262 ret = ibt_close_hca(hcap->hca_hdl);
263 hcap->hca_hdl = NULL;
264 ASSERT(ret == IBT_SUCCESS);
265 if (alloc == B_TRUE) {
266 kmem_free(hcap, sizeof (rds_hca_t));
267 } else {
268 hcap->hca_hdl = NULL;
269 }
270 return (NULL);
271 }
272
273 /* Only one PD per HCA is allocated, so do it here */
274 ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS,
275 &hcap->hca_pdhdl);
276 if (ret != IBT_SUCCESS) {
277 RDS_DPRINTF2("rdsib_init_hca",
278 "ibt_alloc_pd 0x%llx failed: %d", hca_guid, ret);
279 (void) ibt_free_portinfo(hcap->hca_pinfop,
280 hcap->hca_pinfo_sz);
281 ret = ibt_close_hca(hcap->hca_hdl);
282 ASSERT(ret == IBT_SUCCESS);
283 hcap->hca_hdl = NULL;
284 if (alloc == B_TRUE) {
285 kmem_free(hcap, sizeof (rds_hca_t));
286 } else {
287 hcap->hca_hdl = NULL;
288 }
289 return (NULL);
290 }
291
292 rdsib_validate_chan_sizes(&hcap->hca_attr);
293
294 /* To minimize stale connections after ungraceful reboots */
295 rds_randomize_qps(hcap);
296
297 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
298 hcap->hca_state = RDS_HCA_STATE_OPEN;
299 if (alloc == B_TRUE) {
300 /* this is a new HCA, add it to the list */
301 rdsib_statep->rds_nhcas++;
302 hcap->hca_nextp = rdsib_statep->rds_hcalistp;
303 rdsib_statep->rds_hcalistp = hcap;
304 }
305 rw_exit(&rdsib_statep->rds_hca_lock);
306
307 RDS_DPRINTF2("rdsib_init_hca", "return: HCA 0x%llx", hca_guid);
308
309 return (hcap);
310 }
311
312 /*
313 * Called from attach
314 */
315 int
rdsib_initialize_ib()316 rdsib_initialize_ib()
317 {
318 ib_guid_t *guidp;
319 rds_hca_t *hcap;
320 uint_t ix, hcaix, nhcas;
321 int ret;
322
323 RDS_DPRINTF2("rdsib_initialize_ib", "enter: statep %p", rdsib_statep);
324
325 ASSERT(rdsib_statep != NULL);
326 if (rdsib_statep == NULL) {
327 RDS_DPRINTF1("rdsib_initialize_ib",
328 "RDS Statep not initialized");
329 return (-1);
330 }
331
332 /* How many hcas are there? */
333 nhcas = ibt_get_hca_list(&guidp);
334 if (nhcas == 0) {
335 RDS_DPRINTF2("rdsib_initialize_ib", "No IB HCAs Available");
336 return (-1);
337 }
338
339 RDS_DPRINTF3("rdsib_initialize_ib", "Number of HCAs: %d", nhcas);
340
341 /* Register with IBTF */
342 ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep,
343 &rdsib_statep->rds_ibhdl);
344 if (ret != IBT_SUCCESS) {
345 RDS_DPRINTF2("rdsib_initialize_ib", "ibt_attach failed: %d",
346 ret);
347 (void) ibt_free_hca_list(guidp, nhcas);
348 return (-1);
349 }
350
351 /*
352 * Open each HCA and gather its information. Don't care about HCAs
353 * that cannot be opened. It is OK as long as atleast one HCA can be
354 * opened.
355 * Initialize a HCA only if all the information is available.
356 */
357 for (ix = 0, hcaix = 0; ix < nhcas; ix++) {
358 RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]);
359
360 hcap = rdsib_init_hca(guidp[ix]);
361 if (hcap != NULL) hcaix++;
362 }
363
364 /* free the HCA list, we are done with it */
365 (void) ibt_free_hca_list(guidp, nhcas);
366
367 if (hcaix == 0) {
368 /* Failed to Initialize even one HCA */
369 RDS_DPRINTF2("rdsib_initialize_ib", "No HCAs are initialized");
370 (void) ibt_detach(rdsib_statep->rds_ibhdl);
371 rdsib_statep->rds_ibhdl = NULL;
372 return (-1);
373 }
374
375 if (hcaix < nhcas) {
376 RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize",
377 (nhcas - hcaix), nhcas);
378 }
379
380 RDS_DPRINTF2("rdsib_initialize_ib", "return: statep %p", rdsib_statep);
381
382 return (0);
383 }
384
385 /*
386 * Called from detach
387 */
388 void
rdsib_deinitialize_ib()389 rdsib_deinitialize_ib()
390 {
391 rds_hca_t *hcap, *nextp;
392 int ret;
393
394 RDS_DPRINTF2("rdsib_deinitialize_ib", "enter: statep %p", rdsib_statep);
395
396 /* close and destroy all the sessions */
397 rds_close_sessions(NULL);
398
399 /* Release all HCA resources */
400 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
401 RDS_DPRINTF2("rdsib_deinitialize_ib", "HCA List: %p, NHCA: %d",
402 rdsib_statep->rds_hcalistp, rdsib_statep->rds_nhcas);
403 hcap = rdsib_statep->rds_hcalistp;
404 rdsib_statep->rds_hcalistp = NULL;
405 rdsib_statep->rds_nhcas = 0;
406 rw_exit(&rdsib_statep->rds_hca_lock);
407
408 while (hcap != NULL) {
409 nextp = hcap->hca_nextp;
410
411 if (hcap->hca_hdl != NULL) {
412 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl);
413 ASSERT(ret == IBT_SUCCESS);
414
415 (void) ibt_free_portinfo(hcap->hca_pinfop,
416 hcap->hca_pinfo_sz);
417
418 ret = ibt_close_hca(hcap->hca_hdl);
419 ASSERT(ret == IBT_SUCCESS);
420 }
421
422 kmem_free(hcap, sizeof (rds_hca_t));
423 hcap = nextp;
424 }
425
426 /* Deregister with IBTF */
427 if (rdsib_statep->rds_ibhdl != NULL) {
428 (void) ibt_detach(rdsib_statep->rds_ibhdl);
429 rdsib_statep->rds_ibhdl = NULL;
430 }
431
432 RDS_DPRINTF2("rdsib_deinitialize_ib", "return: statep %p",
433 rdsib_statep);
434 }
435
436 /*
437 * Called on open of first RDS socket
438 */
439 int
rdsib_open_ib()440 rdsib_open_ib()
441 {
442 int ret;
443
444 RDS_DPRINTF2("rdsib_open_ib", "enter: statep %p", rdsib_statep);
445
446 /* Enable incoming connection requests */
447 if (rdsib_statep->rds_srvhdl == NULL) {
448 rdsib_statep->rds_srvhdl =
449 rds_register_service(rdsib_statep->rds_ibhdl);
450 if (rdsib_statep->rds_srvhdl == NULL) {
451 RDS_DPRINTF2("rdsib_open_ib",
452 "Service registration failed");
453 return (-1);
454 } else {
455 /* bind the service on all available ports */
456 ret = rds_bind_service(rdsib_statep);
457 if (ret != 0) {
458 RDS_DPRINTF2("rdsib_open_ib",
459 "Bind service failed: %d", ret);
460 }
461 }
462 }
463
464 RDS_DPRINTF2("rdsib_open_ib", "return: statep %p", rdsib_statep);
465
466 return (0);
467 }
468
469 /*
470 * Called when all ports are closed.
471 */
472 void
rdsib_close_ib()473 rdsib_close_ib()
474 {
475 int ret;
476
477 RDS_DPRINTF2("rdsib_close_ib", "enter: statep %p", rdsib_statep);
478
479 /* Disable incoming connection requests */
480 if (rdsib_statep->rds_srvhdl != NULL) {
481 ret = ibt_unbind_all_services(rdsib_statep->rds_srvhdl);
482 if (ret != 0) {
483 RDS_DPRINTF2("rdsib_close_ib",
484 "ibt_unbind_all_services failed: %d\n", ret);
485 }
486 ret = ibt_deregister_service(rdsib_statep->rds_ibhdl,
487 rdsib_statep->rds_srvhdl);
488 if (ret != 0) {
489 RDS_DPRINTF2("rdsib_close_ib",
490 "ibt_deregister_service failed: %d\n", ret);
491 } else {
492 rdsib_statep->rds_srvhdl = NULL;
493 }
494 }
495
496 RDS_DPRINTF2("rdsib_close_ib", "return: statep %p", rdsib_statep);
497 }
498
499 /* Return hcap, given the hca guid */
500 rds_hca_t *
rds_get_hcap(rds_state_t * statep,ib_guid_t hca_guid)501 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid)
502 {
503 rds_hca_t *hcap;
504
505 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p "
506 "guid: %llx", statep, hca_guid);
507
508 rw_enter(&statep->rds_hca_lock, RW_READER);
509
510 hcap = statep->rds_hcalistp;
511 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
512 hcap = hcap->hca_nextp;
513 }
514
515 /*
516 * don't let anyone use this HCA until the RECV memory
517 * is registered with this HCA
518 */
519 if ((hcap != NULL) &&
520 (hcap->hca_state == RDS_HCA_STATE_MEM_REGISTERED)) {
521 ASSERT(hcap->hca_mrhdl != NULL);
522 rw_exit(&statep->rds_hca_lock);
523 return (hcap);
524 }
525
526 RDS_DPRINTF2("rds_get_hcap",
527 "HCA (0x%p, 0x%llx) is not initialized", hcap, hca_guid);
528 rw_exit(&statep->rds_hca_lock);
529
530 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return");
531
532 return (NULL);
533 }
534
535 /* Return hcap, given a gid */
536 rds_hca_t *
rds_gid_to_hcap(rds_state_t * statep,ib_gid_t gid)537 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid)
538 {
539 rds_hca_t *hcap;
540 uint_t ix;
541
542 RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx",
543 statep, gid.gid_prefix, gid.gid_guid);
544
545 rw_enter(&statep->rds_hca_lock, RW_READER);
546
547 hcap = statep->rds_hcalistp;
548 while (hcap != NULL) {
549
550 /*
551 * don't let anyone use this HCA until the RECV memory
552 * is registered with this HCA
553 */
554 if (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED) {
555 RDS_DPRINTF3("rds_gid_to_hcap",
556 "HCA (0x%p, 0x%llx) is not initialized",
557 hcap, gid.gid_guid);
558 hcap = hcap->hca_nextp;
559 continue;
560 }
561
562 for (ix = 0; ix < hcap->hca_nports; ix++) {
563 if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix ==
564 gid.gid_prefix) &&
565 (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid ==
566 gid.gid_guid)) {
567 RDS_DPRINTF4("rds_gid_to_hcap",
568 "gid found in hcap: 0x%p", hcap);
569 rw_exit(&statep->rds_hca_lock);
570 return (hcap);
571 }
572 }
573 hcap = hcap->hca_nextp;
574 }
575
576 rw_exit(&statep->rds_hca_lock);
577
578 return (NULL);
579 }
580
581 /* This is called from the send CQ handler */
582 void
rds_send_acknowledgement(rds_ep_t * ep)583 rds_send_acknowledgement(rds_ep_t *ep)
584 {
585 int ret;
586 uint_t ix;
587
588 RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep);
589
590 mutex_enter(&ep->ep_lock);
591
592 ASSERT(ep->ep_rdmacnt != 0);
593
594 /*
595 * The previous ACK completed successfully, send the next one
596 * if more messages were received after sending the last ACK
597 */
598 if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) {
599 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid;
600 mutex_exit(&ep->ep_lock);
601
602 /* send acknowledgement */
603 RDS_INCR_TXACKS();
604 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix);
605 if (ret != IBT_SUCCESS) {
606 RDS_DPRINTF2("rds_send_acknowledgement",
607 "EP(%p): ibt_post_send for acknowledgement "
608 "failed: %d, SQ depth: %d",
609 ep, ret, ep->ep_sndpool.pool_nbusy);
610 mutex_enter(&ep->ep_lock);
611 ep->ep_rdmacnt--;
612 mutex_exit(&ep->ep_lock);
613 }
614 } else {
615 /* ACKed all messages, no more to ACK */
616 ep->ep_rdmacnt--;
617 mutex_exit(&ep->ep_lock);
618 return;
619 }
620
621 RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep);
622 }
623
624 static int
rds_poll_ctrl_completions(ibt_cq_hdl_t cq,rds_ep_t * ep)625 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep)
626 {
627 ibt_wc_t wc;
628 uint_t npolled;
629 rds_buf_t *bp;
630 rds_ctrl_pkt_t *cpkt;
631 rds_qp_t *recvqp;
632 int ret = IBT_SUCCESS;
633
634 RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep);
635
636 bzero(&wc, sizeof (ibt_wc_t));
637 ret = ibt_poll_cq(cq, &wc, 1, &npolled);
638 if (ret != IBT_SUCCESS) {
639 if (ret != IBT_CQ_EMPTY) {
640 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
641 "returned: %d", ep, cq, ret);
642 } else {
643 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
644 "returned: IBT_CQ_EMPTY", ep, cq);
645 }
646 return (ret);
647 }
648
649 bp = (rds_buf_t *)(uintptr_t)wc.wc_id;
650
651 if (wc.wc_status != IBT_WC_SUCCESS) {
652 mutex_enter(&ep->ep_recvqp.qp_lock);
653 ep->ep_recvqp.qp_level--;
654 mutex_exit(&ep->ep_recvqp.qp_lock);
655
656 /* Free the buffer */
657 bp->buf_state = RDS_RCVBUF_FREE;
658 rds_free_recv_buf(bp, 1);
659
660 /* Receive completion failure */
661 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) {
662 RDS_DPRINTF2("rds_poll_ctrl_completions",
663 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d",
664 ep, cq, wc.wc_id, wc.wc_status);
665 }
666 return (ret);
667 }
668
669 /* there is one less in the RQ */
670 recvqp = &ep->ep_recvqp;
671 mutex_enter(&recvqp->qp_lock);
672 recvqp->qp_level--;
673 if ((recvqp->qp_taskqpending == B_FALSE) &&
674 (recvqp->qp_level <= recvqp->qp_lwm)) {
675 /* Time to post more buffers into the RQ */
676 recvqp->qp_taskqpending = B_TRUE;
677 mutex_exit(&recvqp->qp_lock);
678
679 ret = ddi_taskq_dispatch(rds_taskq,
680 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
681 if (ret != DDI_SUCCESS) {
682 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
683 ret);
684 mutex_enter(&recvqp->qp_lock);
685 recvqp->qp_taskqpending = B_FALSE;
686 mutex_exit(&recvqp->qp_lock);
687 }
688 } else {
689 mutex_exit(&recvqp->qp_lock);
690 }
691
692 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va;
693 rds_handle_control_message(ep->ep_sp, cpkt);
694
695 bp->buf_state = RDS_RCVBUF_FREE;
696 rds_free_recv_buf(bp, 1);
697
698 RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep);
699
700 return (ret);
701 }
702
703 #define RDS_POST_FEW_ATATIME 100
704 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */
705 void
rds_post_recv_buf(void * arg)706 rds_post_recv_buf(void *arg)
707 {
708 ibt_channel_hdl_t chanhdl;
709 rds_ep_t *ep;
710 rds_session_t *sp;
711 rds_qp_t *recvqp;
712 rds_bufpool_t *gp;
713 rds_buf_t *bp, *bp1;
714 ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME];
715 rds_hca_t *hcap;
716 uint_t npost, nspace, rcv_len;
717 uint_t ix, jx, kx;
718 int ret;
719
720 chanhdl = (ibt_channel_hdl_t)arg;
721 RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl);
722 RDS_INCR_POST_RCV_BUF_CALLS();
723
724 ep = (rds_ep_t *)ibt_get_chan_private(chanhdl);
725 ASSERT(ep != NULL);
726 sp = ep->ep_sp;
727 recvqp = &ep->ep_recvqp;
728
729 RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep);
730
731 /* get the hcap for the HCA hosting this channel */
732 hcap = rds_lkup_hca(ep->ep_hca_guid);
733 if (hcap == NULL) {
734 RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found",
735 ep->ep_hca_guid);
736 return;
737 }
738
739 /* Make sure the session is still connected */
740 rw_enter(&sp->session_lock, RW_READER);
741 if ((sp->session_state != RDS_SESSION_STATE_INIT) &&
742 (sp->session_state != RDS_SESSION_STATE_CONNECTED) &&
743 (sp->session_state != RDS_SESSION_STATE_HCA_CLOSING)) {
744 RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not "
745 "in active state (%d)", ep, sp->session_state);
746 rw_exit(&sp->session_lock);
747 return;
748 }
749 rw_exit(&sp->session_lock);
750
751 /* how many can be posted */
752 mutex_enter(&recvqp->qp_lock);
753 nspace = recvqp->qp_depth - recvqp->qp_level;
754 if (nspace == 0) {
755 RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL");
756 recvqp->qp_taskqpending = B_FALSE;
757 mutex_exit(&recvqp->qp_lock);
758 return;
759 }
760 mutex_exit(&recvqp->qp_lock);
761
762 if (ep->ep_type == RDS_EP_TYPE_DATA) {
763 gp = &rds_dpool;
764 rcv_len = RdsPktSize;
765 } else {
766 gp = &rds_cpool;
767 rcv_len = RDS_CTRLPKT_SIZE;
768 }
769
770 bp = rds_get_buf(gp, nspace, &jx);
771 if (bp == NULL) {
772 RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep);
773 /* try again later */
774 ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf,
775 (void *)chanhdl, DDI_NOSLEEP);
776 if (ret != DDI_SUCCESS) {
777 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
778 ret);
779 mutex_enter(&recvqp->qp_lock);
780 recvqp->qp_taskqpending = B_FALSE;
781 mutex_exit(&recvqp->qp_lock);
782 }
783 return;
784 }
785
786 if (jx != nspace) {
787 RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers "
788 "needed: %d available: %d", ep, nspace, jx);
789 nspace = jx;
790 }
791
792 bp1 = bp;
793 for (ix = 0; ix < nspace; ix++) {
794 bp1->buf_ep = ep;
795 ASSERT(bp1->buf_state == RDS_RCVBUF_FREE);
796 bp1->buf_state = RDS_RCVBUF_POSTED;
797 bp1->buf_ds.ds_key = hcap->hca_lkey;
798 bp1->buf_ds.ds_len = rcv_len;
799 bp1 = bp1->buf_nextp;
800 }
801
802 #if 0
803 wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t),
804 KM_SLEEP);
805 #else
806 wrp = &wr[0];
807 #endif
808
809 npost = nspace;
810 while (npost) {
811 jx = (npost > RDS_POST_FEW_ATATIME) ?
812 RDS_POST_FEW_ATATIME : npost;
813 for (ix = 0; ix < jx; ix++) {
814 wrp[ix].wr_id = (uintptr_t)bp;
815 wrp[ix].wr_nds = 1;
816 wrp[ix].wr_sgl = &bp->buf_ds;
817 bp = bp->buf_nextp;
818 }
819
820 ret = ibt_post_recv(chanhdl, wrp, jx, &kx);
821 if ((ret != IBT_SUCCESS) || (kx != jx)) {
822 RDS_DPRINTF2(LABEL, "ibt_post_recv for %d WRs failed: "
823 "%d", npost, ret);
824 npost -= kx;
825 break;
826 }
827
828 npost -= jx;
829 }
830
831 mutex_enter(&recvqp->qp_lock);
832 if (npost != 0) {
833 RDS_DPRINTF2("rds_post_recv_buf",
834 "EP(%p) Failed to post %d WRs", ep, npost);
835 recvqp->qp_level += (nspace - npost);
836 } else {
837 recvqp->qp_level += nspace;
838 }
839
840 /*
841 * sometimes, the recv WRs can get consumed as soon as they are
842 * posted. In that case, taskq thread to post more WRs to the RQ will
843 * not be scheduled as the taskqpending flag is still set.
844 */
845 if (recvqp->qp_level == 0) {
846 mutex_exit(&recvqp->qp_lock);
847 ret = ddi_taskq_dispatch(rds_taskq,
848 rds_post_recv_buf, (void *)chanhdl, DDI_NOSLEEP);
849 if (ret != DDI_SUCCESS) {
850 RDS_DPRINTF2("rds_post_recv_buf",
851 "ddi_taskq_dispatch failed: %d", ret);
852 mutex_enter(&recvqp->qp_lock);
853 recvqp->qp_taskqpending = B_FALSE;
854 mutex_exit(&recvqp->qp_lock);
855 }
856 } else {
857 recvqp->qp_taskqpending = B_FALSE;
858 mutex_exit(&recvqp->qp_lock);
859 }
860
861 #if 0
862 kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t));
863 #endif
864
865 RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep);
866 }
867
868 static int
rds_poll_data_completions(ibt_cq_hdl_t cq,rds_ep_t * ep)869 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep)
870 {
871 ibt_wc_t wc;
872 rds_buf_t *bp;
873 rds_data_hdr_t *pktp;
874 rds_qp_t *recvqp;
875 uint_t npolled;
876 int ret = IBT_SUCCESS;
877
878
879 RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep);
880
881 bzero(&wc, sizeof (ibt_wc_t));
882 ret = ibt_poll_cq(cq, &wc, 1, &npolled);
883 if (ret != IBT_SUCCESS) {
884 if (ret != IBT_CQ_EMPTY) {
885 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
886 "returned: %d", ep, cq, ret);
887 } else {
888 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq "
889 "returned: IBT_CQ_EMPTY", ep, cq);
890 }
891 return (ret);
892 }
893
894 bp = (rds_buf_t *)(uintptr_t)wc.wc_id;
895 ASSERT(bp->buf_state == RDS_RCVBUF_POSTED);
896 bp->buf_state = RDS_RCVBUF_ONSOCKQ;
897 bp->buf_nextp = NULL;
898
899 if (wc.wc_status != IBT_WC_SUCCESS) {
900 mutex_enter(&ep->ep_recvqp.qp_lock);
901 ep->ep_recvqp.qp_level--;
902 mutex_exit(&ep->ep_recvqp.qp_lock);
903
904 /* free the buffer */
905 bp->buf_state = RDS_RCVBUF_FREE;
906 rds_free_recv_buf(bp, 1);
907
908 /* Receive completion failure */
909 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) {
910 RDS_DPRINTF2("rds_poll_data_completions",
911 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d",
912 ep, cq, wc.wc_id, wc.wc_status);
913 RDS_INCR_RXERRS();
914 }
915 return (ret);
916 }
917
918 /* there is one less in the RQ */
919 recvqp = &ep->ep_recvqp;
920 mutex_enter(&recvqp->qp_lock);
921 recvqp->qp_level--;
922 if ((recvqp->qp_taskqpending == B_FALSE) &&
923 (recvqp->qp_level <= recvqp->qp_lwm)) {
924 /* Time to post more buffers into the RQ */
925 recvqp->qp_taskqpending = B_TRUE;
926 mutex_exit(&recvqp->qp_lock);
927
928 ret = ddi_taskq_dispatch(rds_taskq,
929 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP);
930 if (ret != DDI_SUCCESS) {
931 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d",
932 ret);
933 mutex_enter(&recvqp->qp_lock);
934 recvqp->qp_taskqpending = B_FALSE;
935 mutex_exit(&recvqp->qp_lock);
936 }
937 } else {
938 mutex_exit(&recvqp->qp_lock);
939 }
940
941 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va;
942 ASSERT(pktp->dh_datalen != 0);
943
944 RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x "
945 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip,
946 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport,
947 pktp->dh_npkts, pktp->dh_psn);
948
949 RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp,
950 pktp->dh_npkts, pktp->dh_psn);
951
952 if (pktp->dh_npkts == 1) {
953 /* single pkt or last packet */
954 if (pktp->dh_psn != 0) {
955 /* last packet of a segmented message */
956 ASSERT(ep->ep_seglbp != NULL);
957 ep->ep_seglbp->buf_nextp = bp;
958 ep->ep_seglbp = bp;
959 rds_received_msg(ep, ep->ep_segfbp);
960 ep->ep_segfbp = NULL;
961 ep->ep_seglbp = NULL;
962 } else {
963 /* single packet */
964 rds_received_msg(ep, bp);
965 }
966 } else {
967 /* multi-pkt msg */
968 if (pktp->dh_psn == 0) {
969 /* first packet */
970 ASSERT(ep->ep_segfbp == NULL);
971 ep->ep_segfbp = bp;
972 ep->ep_seglbp = bp;
973 } else {
974 /* intermediate packet */
975 ASSERT(ep->ep_segfbp != NULL);
976 ep->ep_seglbp->buf_nextp = bp;
977 ep->ep_seglbp = bp;
978 }
979 }
980
981 RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep);
982
983 return (ret);
984 }
985
986 void
rds_recvcq_handler(ibt_cq_hdl_t cq,void * arg)987 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg)
988 {
989 rds_ep_t *ep;
990 int ret = IBT_SUCCESS;
991 int (*func)(ibt_cq_hdl_t, rds_ep_t *);
992
993 ep = (rds_ep_t *)arg;
994
995 RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep);
996
997 if (ep->ep_type == RDS_EP_TYPE_DATA) {
998 func = rds_poll_data_completions;
999 } else {
1000 func = rds_poll_ctrl_completions;
1001 }
1002
1003 do {
1004 ret = func(cq, ep);
1005 } while (ret != IBT_CQ_EMPTY);
1006
1007 /* enable the CQ */
1008 ret = ibt_enable_cq_notify(cq, rds_wc_signal);
1009 if (ret != IBT_SUCCESS) {
1010 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify "
1011 "failed: %d", ep, cq, ret);
1012 return;
1013 }
1014
1015 do {
1016 ret = func(cq, ep);
1017 } while (ret != IBT_CQ_EMPTY);
1018
1019 RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep);
1020 }
1021
1022 void
rds_poll_send_completions(ibt_cq_hdl_t cq,rds_ep_t * ep,boolean_t lock)1023 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock)
1024 {
1025 ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS];
1026 uint_t npolled, nret, send_error = 0;
1027 rds_buf_t *headp, *tailp, *bp;
1028 int ret, ix;
1029
1030 RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep);
1031
1032 headp = NULL;
1033 tailp = NULL;
1034 npolled = 0;
1035 do {
1036 ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret);
1037 if (ret != IBT_SUCCESS) {
1038 if (ret != IBT_CQ_EMPTY) {
1039 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): "
1040 "ibt_poll_cq returned: %d", ep, cq, ret);
1041 } else {
1042 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): "
1043 "ibt_poll_cq returned: IBT_CQ_EMPTY",
1044 ep, cq);
1045 }
1046
1047 break;
1048 }
1049
1050 for (ix = 0; ix < nret; ix++) {
1051 if (wc[ix].wc_status == IBT_WC_SUCCESS) {
1052 if (wc[ix].wc_type == IBT_WRC_RDMAW) {
1053 rds_send_acknowledgement(ep);
1054 continue;
1055 }
1056
1057 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
1058 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
1059 bp->buf_state = RDS_SNDBUF_FREE;
1060 } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) {
1061 RDS_INCR_TXERRS();
1062 RDS_DPRINTF5("rds_poll_send_completions",
1063 "EP(%p): WC ID: %p ERROR: %d", ep,
1064 wc[ix].wc_id, wc[ix].wc_status);
1065
1066 send_error = 1;
1067
1068 if (wc[ix].wc_id == RDS_RDMAW_WRID) {
1069 mutex_enter(&ep->ep_lock);
1070 ep->ep_rdmacnt--;
1071 mutex_exit(&ep->ep_lock);
1072 continue;
1073 }
1074
1075 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
1076 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
1077 bp->buf_state = RDS_SNDBUF_FREE;
1078 } else {
1079 RDS_INCR_TXERRS();
1080 RDS_DPRINTF2("rds_poll_send_completions",
1081 "EP(%p): WC ID: %p ERROR: %d", ep,
1082 wc[ix].wc_id, wc[ix].wc_status);
1083 if (send_error == 0) {
1084 rds_session_t *sp = ep->ep_sp;
1085
1086 /* don't let anyone send anymore */
1087 rw_enter(&sp->session_lock, RW_WRITER);
1088 if (sp->session_state !=
1089 RDS_SESSION_STATE_ERROR) {
1090 sp->session_state =
1091 RDS_SESSION_STATE_ERROR;
1092 /* Make this the active end */
1093 sp->session_type =
1094 RDS_SESSION_ACTIVE;
1095 }
1096 rw_exit(&sp->session_lock);
1097 }
1098
1099 send_error = 1;
1100
1101 if (wc[ix].wc_id == RDS_RDMAW_WRID) {
1102 mutex_enter(&ep->ep_lock);
1103 ep->ep_rdmacnt--;
1104 mutex_exit(&ep->ep_lock);
1105 continue;
1106 }
1107
1108 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id;
1109 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING);
1110 bp->buf_state = RDS_SNDBUF_FREE;
1111 }
1112
1113 bp->buf_nextp = NULL;
1114 if (headp) {
1115 tailp->buf_nextp = bp;
1116 tailp = bp;
1117 } else {
1118 headp = bp;
1119 tailp = bp;
1120 }
1121
1122 npolled++;
1123 }
1124
1125 if (rds_no_interrupts && (npolled > 100)) {
1126 break;
1127 }
1128
1129 if (rds_no_interrupts == 1) {
1130 break;
1131 }
1132 } while (ret != IBT_CQ_EMPTY);
1133
1134 RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d",
1135 npolled, send_error);
1136
1137 /* put the buffers to the pool */
1138 if (npolled != 0) {
1139 rds_free_send_buf(ep, headp, tailp, npolled, lock);
1140 }
1141
1142 if (send_error != 0) {
1143 rds_handle_send_error(ep);
1144 }
1145
1146 RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep);
1147 }
1148
1149 void
rds_sendcq_handler(ibt_cq_hdl_t cq,void * arg)1150 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg)
1151 {
1152 rds_ep_t *ep;
1153 int ret;
1154
1155 ep = (rds_ep_t *)arg;
1156
1157 RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep);
1158
1159 /* enable the CQ */
1160 ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION);
1161 if (ret != IBT_SUCCESS) {
1162 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify "
1163 "failed: %d", ep, cq, ret);
1164 return;
1165 }
1166
1167 rds_poll_send_completions(cq, ep, B_FALSE);
1168
1169 RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep);
1170 }
1171
1172 void
rds_ep_free_rc_channel(rds_ep_t * ep)1173 rds_ep_free_rc_channel(rds_ep_t *ep)
1174 {
1175 int ret;
1176
1177 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep);
1178
1179 ASSERT(mutex_owned(&ep->ep_lock));
1180
1181 /* free the QP */
1182 if (ep->ep_chanhdl != NULL) {
1183 /* wait until the RQ is empty */
1184 (void) ibt_flush_channel(ep->ep_chanhdl);
1185 (void) rds_is_recvq_empty(ep, B_TRUE);
1186 ret = ibt_free_channel(ep->ep_chanhdl);
1187 if (ret != IBT_SUCCESS) {
1188 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) "
1189 "ibt_free_channel returned: %d", ep, ret);
1190 }
1191 ep->ep_chanhdl = NULL;
1192 } else {
1193 RDS_DPRINTF2("rds_ep_free_rc_channel",
1194 "EP(%p) Channel is ALREADY FREE", ep);
1195 }
1196
1197 /* free the Send CQ */
1198 if (ep->ep_sendcq != NULL) {
1199 ret = ibt_free_cq(ep->ep_sendcq);
1200 if (ret != IBT_SUCCESS) {
1201 RDS_DPRINTF2("rds_ep_free_rc_channel",
1202 "EP(%p) - for sendcq, ibt_free_cq returned %d",
1203 ep, ret);
1204 }
1205 ep->ep_sendcq = NULL;
1206 } else {
1207 RDS_DPRINTF2("rds_ep_free_rc_channel",
1208 "EP(%p) SendCQ is ALREADY FREE", ep);
1209 }
1210
1211 /* free the Recv CQ */
1212 if (ep->ep_recvcq != NULL) {
1213 ret = ibt_free_cq(ep->ep_recvcq);
1214 if (ret != IBT_SUCCESS) {
1215 RDS_DPRINTF2("rds_ep_free_rc_channel",
1216 "EP(%p) - for recvcq, ibt_free_cq returned %d",
1217 ep, ret);
1218 }
1219 ep->ep_recvcq = NULL;
1220 } else {
1221 RDS_DPRINTF2("rds_ep_free_rc_channel",
1222 "EP(%p) RecvCQ is ALREADY FREE", ep);
1223 }
1224
1225 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep);
1226 }
1227
1228 /* Allocate resources for RC channel */
1229 ibt_channel_hdl_t
rds_ep_alloc_rc_channel(rds_ep_t * ep,uint8_t hca_port)1230 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port)
1231 {
1232 int ret = IBT_SUCCESS;
1233 ibt_cq_attr_t scqattr, rcqattr;
1234 ibt_rc_chan_alloc_args_t chanargs;
1235 ibt_channel_hdl_t chanhdl;
1236 rds_session_t *sp;
1237 rds_hca_t *hcap;
1238
1239 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d",
1240 ep, hca_port);
1241
1242 /* Update the EP with the right IP address and HCA guid */
1243 sp = ep->ep_sp;
1244 ASSERT(sp != NULL);
1245 rw_enter(&sp->session_lock, RW_READER);
1246 mutex_enter(&ep->ep_lock);
1247 ep->ep_myip = sp->session_myip;
1248 ep->ep_remip = sp->session_remip;
1249 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid);
1250 ep->ep_hca_guid = hcap->hca_guid;
1251 mutex_exit(&ep->ep_lock);
1252 rw_exit(&sp->session_lock);
1253
1254 /* reset taskqpending flag here */
1255 ep->ep_recvqp.qp_taskqpending = B_FALSE;
1256
1257 if (ep->ep_type == RDS_EP_TYPE_CTRL) {
1258 scqattr.cq_size = MaxCtrlSendBuffers;
1259 scqattr.cq_sched = NULL;
1260 scqattr.cq_flags = IBT_CQ_NO_FLAGS;
1261
1262 rcqattr.cq_size = MaxCtrlRecvBuffers;
1263 rcqattr.cq_sched = NULL;
1264 rcqattr.cq_flags = IBT_CQ_NO_FLAGS;
1265
1266 chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers;
1267 chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers;
1268 chanargs.rc_sizes.cs_sq_sgl = 1;
1269 chanargs.rc_sizes.cs_rq_sgl = 1;
1270 } else {
1271 scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS;
1272 scqattr.cq_sched = NULL;
1273 scqattr.cq_flags = IBT_CQ_NO_FLAGS;
1274
1275 rcqattr.cq_size = MaxDataRecvBuffers;
1276 rcqattr.cq_sched = NULL;
1277 rcqattr.cq_flags = IBT_CQ_NO_FLAGS;
1278
1279 chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS;
1280 chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers;
1281 chanargs.rc_sizes.cs_sq_sgl = 1;
1282 chanargs.rc_sizes.cs_rq_sgl = 1;
1283 }
1284
1285 mutex_enter(&ep->ep_lock);
1286 if (ep->ep_sendcq == NULL) {
1287 /* returned size is always greater than the requested size */
1288 ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr,
1289 &ep->ep_sendcq, NULL);
1290 if (ret != IBT_SUCCESS) {
1291 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ "
1292 "failed, size = %d: %d", scqattr.cq_size, ret);
1293 mutex_exit(&ep->ep_lock);
1294 return (NULL);
1295 }
1296
1297 (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler,
1298 ep);
1299
1300 if (rds_no_interrupts == 0) {
1301 ret = ibt_enable_cq_notify(ep->ep_sendcq,
1302 IBT_NEXT_COMPLETION);
1303 if (ret != IBT_SUCCESS) {
1304 RDS_DPRINTF2(LABEL,
1305 "ibt_enable_cq_notify failed: %d", ret);
1306 (void) ibt_free_cq(ep->ep_sendcq);
1307 ep->ep_sendcq = NULL;
1308 mutex_exit(&ep->ep_lock);
1309 return (NULL);
1310 }
1311 }
1312 }
1313
1314 if (ep->ep_recvcq == NULL) {
1315 /* returned size is always greater than the requested size */
1316 ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr,
1317 &ep->ep_recvcq, NULL);
1318 if (ret != IBT_SUCCESS) {
1319 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ "
1320 "failed, size = %d: %d", rcqattr.cq_size, ret);
1321 (void) ibt_free_cq(ep->ep_sendcq);
1322 ep->ep_sendcq = NULL;
1323 mutex_exit(&ep->ep_lock);
1324 return (NULL);
1325 }
1326
1327 (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler,
1328 ep);
1329
1330 ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal);
1331 if (ret != IBT_SUCCESS) {
1332 RDS_DPRINTF2(LABEL,
1333 "ibt_enable_cq_notify failed: %d", ret);
1334 (void) ibt_free_cq(ep->ep_recvcq);
1335 ep->ep_recvcq = NULL;
1336 (void) ibt_free_cq(ep->ep_sendcq);
1337 ep->ep_sendcq = NULL;
1338 mutex_exit(&ep->ep_lock);
1339 return (NULL);
1340 }
1341 }
1342
1343 chanargs.rc_flags = IBT_ALL_SIGNALED;
1344 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
1345 IBT_CEP_ATOMIC;
1346 chanargs.rc_hca_port_num = hca_port;
1347 chanargs.rc_scq = ep->ep_sendcq;
1348 chanargs.rc_rcq = ep->ep_recvcq;
1349 chanargs.rc_pd = hcap->hca_pdhdl;
1350 chanargs.rc_srq = NULL;
1351
1352 ret = ibt_alloc_rc_channel(hcap->hca_hdl,
1353 IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL);
1354 if (ret != IBT_SUCCESS) {
1355 RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d",
1356 ret);
1357 (void) ibt_free_cq(ep->ep_recvcq);
1358 ep->ep_recvcq = NULL;
1359 (void) ibt_free_cq(ep->ep_sendcq);
1360 ep->ep_sendcq = NULL;
1361 mutex_exit(&ep->ep_lock);
1362 return (NULL);
1363 }
1364 mutex_exit(&ep->ep_lock);
1365
1366 /* Chan private should contain the ep */
1367 (void) ibt_set_chan_private(chanhdl, ep);
1368
1369 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl);
1370
1371 return (chanhdl);
1372 }
1373
1374
1375 #if 0
1376
1377 /* Return node guid given a port gid */
1378 ib_guid_t
1379 rds_gid_to_node_guid(ib_gid_t gid)
1380 {
1381 ibt_node_info_t nodeinfo;
1382 int ret;
1383
1384 RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx",
1385 gid.gid_prefix, gid.gid_guid);
1386
1387 ret = ibt_gid_to_node_info(gid, &nodeinfo);
1388 if (ret != IBT_SUCCESS) {
1389 RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx "
1390 "failed", gid.gid_prefix, gid.gid_guid);
1391 return (0LL);
1392 }
1393
1394 RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx",
1395 nodeinfo.n_node_guid);
1396
1397 return (nodeinfo.n_node_guid);
1398 }
1399
1400 #endif
1401
1402 static void
rds_handle_portup_event(rds_state_t * statep,ibt_hca_hdl_t hdl,ibt_async_event_t * event)1403 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl,
1404 ibt_async_event_t *event)
1405 {
1406 rds_hca_t *hcap;
1407 ibt_hca_portinfo_t *newpinfop, *oldpinfop;
1408 uint_t newsize, oldsize, nport;
1409 ib_gid_t gid;
1410 int ret;
1411
1412 RDS_DPRINTF2("rds_handle_portup_event",
1413 "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep);
1414
1415 rw_enter(&statep->rds_hca_lock, RW_WRITER);
1416
1417 hcap = statep->rds_hcalistp;
1418 while ((hcap != NULL) && (hcap->hca_guid != event->ev_hca_guid)) {
1419 hcap = hcap->hca_nextp;
1420 }
1421
1422 if (hcap == NULL) {
1423 RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is "
1424 "not in our list", event->ev_hca_guid);
1425 rw_exit(&statep->rds_hca_lock);
1426 return;
1427 }
1428
1429 ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize);
1430 if (ret != IBT_SUCCESS) {
1431 RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret);
1432 rw_exit(&statep->rds_hca_lock);
1433 return;
1434 }
1435
1436 oldpinfop = hcap->hca_pinfop;
1437 oldsize = hcap->hca_pinfo_sz;
1438 hcap->hca_pinfop = newpinfop;
1439 hcap->hca_pinfo_sz = newsize;
1440
1441 (void) ibt_free_portinfo(oldpinfop, oldsize);
1442
1443 /* If RDS service is not registered then no bind is needed */
1444 if (statep->rds_srvhdl == NULL) {
1445 RDS_DPRINTF2("rds_handle_portup_event",
1446 "RDS Service is not registered, so no action needed");
1447 rw_exit(&statep->rds_hca_lock);
1448 return;
1449 }
1450
1451 /*
1452 * If the service was previously bound on this port and
1453 * if this port has changed state down and now up, we do not
1454 * need to bind the service again. The bind is expected to
1455 * persist across state changes. If the service was never bound
1456 * before then we bind it this time.
1457 */
1458 if (hcap->hca_bindhdl[event->ev_port - 1] == NULL) {
1459
1460 /* structure copy */
1461 gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0];
1462
1463 /* bind RDS service on the port, pass statep as cm_private */
1464 ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep,
1465 &hcap->hca_bindhdl[event->ev_port - 1]);
1466 if (ret != IBT_SUCCESS) {
1467 RDS_DPRINTF2("rds_handle_portup_event",
1468 "Bind service for HCA: 0x%llx Port: %d "
1469 "gid %llx:%llx returned: %d", event->ev_hca_guid,
1470 event->ev_port, gid.gid_prefix, gid.gid_guid, ret);
1471 }
1472 }
1473
1474 rw_exit(&statep->rds_hca_lock);
1475
1476 RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx",
1477 event->ev_hca_guid);
1478 }
1479
1480 static void
rdsib_add_hca(ib_guid_t hca_guid)1481 rdsib_add_hca(ib_guid_t hca_guid)
1482 {
1483 rds_hca_t *hcap;
1484 ibt_mr_attr_t mem_attr;
1485 ibt_mr_desc_t mem_desc;
1486 int ret;
1487
1488 RDS_DPRINTF2("rdsib_add_hca", "Enter: GUID: 0x%llx", hca_guid);
1489
1490 hcap = rdsib_init_hca(hca_guid);
1491 if (hcap == NULL)
1492 return;
1493
1494 /* register the recv memory with this hca */
1495 mutex_enter(&rds_dpool.pool_lock);
1496 if (rds_dpool.pool_memp == NULL) {
1497 /* no memory to register */
1498 RDS_DPRINTF2("rdsib_add_hca", "No memory to register");
1499 mutex_exit(&rds_dpool.pool_lock);
1500 return;
1501 }
1502
1503 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)rds_dpool.pool_memp;
1504 mem_attr.mr_len = rds_dpool.pool_memsize;
1505 mem_attr.mr_as = NULL;
1506 mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
1507
1508 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, &mem_attr,
1509 &hcap->hca_mrhdl, &mem_desc);
1510
1511 mutex_exit(&rds_dpool.pool_lock);
1512
1513 if (ret != IBT_SUCCESS) {
1514 RDS_DPRINTF2("rdsib_add_hca", "ibt_register_mr failed: %d",
1515 ret);
1516 } else {
1517 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER);
1518 hcap->hca_state = RDS_HCA_STATE_MEM_REGISTERED;
1519 hcap->hca_lkey = mem_desc.md_lkey;
1520 hcap->hca_rkey = mem_desc.md_rkey;
1521 rw_exit(&rdsib_statep->rds_hca_lock);
1522 }
1523
1524 RDS_DPRINTF2("rdsib_add_hca", "Retrun: GUID: 0x%llx", hca_guid);
1525 }
1526
1527 void rds_close_this_session(rds_session_t *sp, uint8_t wait);
1528 int rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port);
1529
1530 static void
rdsib_del_hca(rds_state_t * statep,ib_guid_t hca_guid)1531 rdsib_del_hca(rds_state_t *statep, ib_guid_t hca_guid)
1532 {
1533 rds_session_t *sp;
1534 rds_hca_t *hcap;
1535 rds_hca_state_t saved_state;
1536 int ret, ix;
1537
1538 RDS_DPRINTF2("rdsib_del_hca", "Enter: GUID: 0x%llx", hca_guid);
1539
1540 /*
1541 * This should be a write lock as we don't want anyone to get access
1542 * to the hcap while we are modifing its contents
1543 */
1544 rw_enter(&statep->rds_hca_lock, RW_WRITER);
1545
1546 hcap = statep->rds_hcalistp;
1547 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) {
1548 hcap = hcap->hca_nextp;
1549 }
1550
1551 /* Prevent initiating any new activity on this HCA */
1552 ASSERT(hcap != NULL);
1553 saved_state = hcap->hca_state;
1554 hcap->hca_state = RDS_HCA_STATE_STOPPING;
1555
1556 rw_exit(&statep->rds_hca_lock);
1557
1558 /*
1559 * stop the outgoing traffic and close any active sessions on this hca.
1560 * Any pending messages in the SQ will be allowed to complete.
1561 */
1562 rw_enter(&statep->rds_sessionlock, RW_READER);
1563 sp = statep->rds_sessionlistp;
1564 while (sp) {
1565 if (sp->session_hca_guid != hca_guid) {
1566 sp = sp->session_nextp;
1567 continue;
1568 }
1569
1570 rw_enter(&sp->session_lock, RW_WRITER);
1571 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
1572 sp->session_state);
1573 /*
1574 * We are changing the session state in advance. This prevents
1575 * further messages to be posted to the SQ. We then
1576 * send a control message to the remote and tell it close
1577 * the session.
1578 */
1579 sp->session_state = RDS_SESSION_STATE_HCA_CLOSING;
1580 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State "
1581 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp);
1582 rw_exit(&sp->session_lock);
1583
1584 /*
1585 * wait until the sendq is empty then tell the remote to
1586 * close this session. This enables for graceful shutdown of
1587 * the session
1588 */
1589 (void) rds_is_sendq_empty(&sp->session_dataep, 2);
1590 (void) rds_post_control_message(sp,
1591 RDS_CTRL_CODE_CLOSE_SESSION, 0);
1592
1593 sp = sp->session_nextp;
1594 }
1595
1596 /* wait until all the sessions are off this HCA */
1597 sp = statep->rds_sessionlistp;
1598 while (sp) {
1599 if (sp->session_hca_guid != hca_guid) {
1600 sp = sp->session_nextp;
1601 continue;
1602 }
1603
1604 rw_enter(&sp->session_lock, RW_READER);
1605 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
1606 sp->session_state);
1607
1608 while ((sp->session_state == RDS_SESSION_STATE_HCA_CLOSING) ||
1609 (sp->session_state == RDS_SESSION_STATE_ERROR) ||
1610 (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING) ||
1611 (sp->session_state == RDS_SESSION_STATE_CLOSED)) {
1612 rw_exit(&sp->session_lock);
1613 delay(drv_usectohz(1000000));
1614 rw_enter(&sp->session_lock, RW_READER);
1615 RDS_DPRINTF2("rdsib_del_hca", "SP(%p) State: %d", sp,
1616 sp->session_state);
1617 }
1618
1619 rw_exit(&sp->session_lock);
1620
1621 sp = sp->session_nextp;
1622 }
1623 rw_exit(&statep->rds_sessionlock);
1624
1625 /*
1626 * if rdsib_close_ib was called before this, then that would have
1627 * unbound the service on all ports. In that case, the HCA structs
1628 * will contain stale bindhdls. Hence, we do not call unbind unless
1629 * the service is still registered.
1630 */
1631 if (statep->rds_srvhdl != NULL) {
1632 /* unbind RDS service on all ports on this HCA */
1633 for (ix = 0; ix < hcap->hca_nports; ix++) {
1634 if (hcap->hca_bindhdl[ix] == NULL) {
1635 continue;
1636 }
1637
1638 RDS_DPRINTF2("rdsib_del_hca",
1639 "Unbinding Service: port: %d, bindhdl: %p",
1640 ix + 1, hcap->hca_bindhdl[ix]);
1641 (void) ibt_unbind_service(rdsib_statep->rds_srvhdl,
1642 hcap->hca_bindhdl[ix]);
1643 hcap->hca_bindhdl[ix] = NULL;
1644 }
1645 }
1646
1647 RDS_DPRINTF2("rdsib_del_hca", "HCA(%p) State: %d", hcap,
1648 hcap->hca_state);
1649
1650 switch (saved_state) {
1651 case RDS_HCA_STATE_MEM_REGISTERED:
1652 ASSERT(hcap->hca_mrhdl != NULL);
1653 ret = ibt_deregister_mr(hcap->hca_hdl, hcap->hca_mrhdl);
1654 if (ret != IBT_SUCCESS) {
1655 RDS_DPRINTF2("rdsib_del_hca",
1656 "ibt_deregister_mr failed: %d", ret);
1657 return;
1658 }
1659 hcap->hca_mrhdl = NULL;
1660 /* FALLTHRU */
1661 case RDS_HCA_STATE_OPEN:
1662 ASSERT(hcap->hca_hdl != NULL);
1663 ASSERT(hcap->hca_pdhdl != NULL);
1664
1665
1666 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl);
1667 if (ret != IBT_SUCCESS) {
1668 RDS_DPRINTF2("rdsib_del_hca",
1669 "ibt_free_pd failed: %d", ret);
1670 }
1671
1672 (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz);
1673
1674 ret = ibt_close_hca(hcap->hca_hdl);
1675 if (ret != IBT_SUCCESS) {
1676 RDS_DPRINTF2("rdsib_del_hca",
1677 "ibt_close_hca failed: %d", ret);
1678 }
1679
1680 hcap->hca_hdl = NULL;
1681 hcap->hca_pdhdl = NULL;
1682 hcap->hca_lkey = 0;
1683 hcap->hca_rkey = 0;
1684 }
1685
1686 /*
1687 * This should be a write lock as we don't want anyone to get access
1688 * to the hcap while we are modifing its contents
1689 */
1690 rw_enter(&statep->rds_hca_lock, RW_WRITER);
1691 hcap->hca_state = RDS_HCA_STATE_REMOVED;
1692 rw_exit(&statep->rds_hca_lock);
1693
1694 RDS_DPRINTF2("rdsib_del_hca", "Return: GUID: 0x%llx", hca_guid);
1695 }
1696
1697 static void
rds_async_handler(void * clntp,ibt_hca_hdl_t hdl,ibt_async_code_t code,ibt_async_event_t * event)1698 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
1699 ibt_async_event_t *event)
1700 {
1701 rds_state_t *statep = (rds_state_t *)clntp;
1702
1703 RDS_DPRINTF2("rds_async_handler", "Async code: %d", code);
1704
1705 switch (code) {
1706 case IBT_EVENT_PORT_UP:
1707 rds_handle_portup_event(statep, hdl, event);
1708 break;
1709 case IBT_HCA_ATTACH_EVENT:
1710 /*
1711 * NOTE: In some error recovery paths, it is possible to
1712 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
1713 */
1714 (void) rdsib_add_hca(event->ev_hca_guid);
1715 break;
1716 case IBT_HCA_DETACH_EVENT:
1717 (void) rdsib_del_hca(statep, event->ev_hca_guid);
1718 break;
1719
1720 default:
1721 RDS_DPRINTF2(LABEL, "Async event: %d not handled", code);
1722 }
1723
1724 RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code);
1725 }
1726
1727 /*
1728 * This routine exists to minimize stale connections across ungraceful
1729 * reboots of nodes in a cluster.
1730 */
1731 void
rds_randomize_qps(rds_hca_t * hcap)1732 rds_randomize_qps(rds_hca_t *hcap)
1733 {
1734 ibt_cq_attr_t cqattr;
1735 ibt_rc_chan_alloc_args_t chanargs;
1736 ibt_channel_hdl_t qp1, qp2;
1737 ibt_cq_hdl_t cq_hdl;
1738 hrtime_t nsec;
1739 uint8_t i, j, rand1, rand2;
1740 int ret;
1741
1742 bzero(&cqattr, sizeof (ibt_cq_attr_t));
1743 cqattr.cq_size = 1;
1744 cqattr.cq_sched = NULL;
1745 cqattr.cq_flags = IBT_CQ_NO_FLAGS;
1746 ret = ibt_alloc_cq(hcap->hca_hdl, &cqattr, &cq_hdl, NULL);
1747 if (ret != IBT_SUCCESS) {
1748 RDS_DPRINTF2("rds_randomize_qps",
1749 "ibt_alloc_cq failed: %d", ret);
1750 return;
1751 }
1752
1753 bzero(&chanargs, sizeof (ibt_rc_chan_alloc_args_t));
1754 chanargs.rc_flags = IBT_ALL_SIGNALED;
1755 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
1756 IBT_CEP_ATOMIC;
1757 chanargs.rc_hca_port_num = 1;
1758 chanargs.rc_scq = cq_hdl;
1759 chanargs.rc_rcq = cq_hdl;
1760 chanargs.rc_pd = hcap->hca_pdhdl;
1761 chanargs.rc_srq = NULL;
1762
1763 nsec = gethrtime();
1764 rand1 = (nsec & 0xF);
1765 rand2 = (nsec >> 4) & 0xF;
1766 RDS_DPRINTF2("rds_randomize_qps", "rand1: %d rand2: %d",
1767 rand1, rand2);
1768
1769 for (i = 0; i < rand1 + 3; i++) {
1770 if (ibt_alloc_rc_channel(hcap->hca_hdl,
1771 IBT_ACHAN_NO_FLAGS, &chanargs, &qp1, NULL) !=
1772 IBT_SUCCESS) {
1773 RDS_DPRINTF2("rds_randomize_qps",
1774 "Bailing at i: %d", i);
1775 (void) ibt_free_cq(cq_hdl);
1776 return;
1777 }
1778 for (j = 0; j < rand2 + 3; j++) {
1779 if (ibt_alloc_rc_channel(hcap->hca_hdl,
1780 IBT_ACHAN_NO_FLAGS, &chanargs, &qp2,
1781 NULL) != IBT_SUCCESS) {
1782 RDS_DPRINTF2("rds_randomize_qps",
1783 "Bailing at i: %d j: %d", i, j);
1784 (void) ibt_free_channel(qp1);
1785 (void) ibt_free_cq(cq_hdl);
1786 return;
1787 }
1788 (void) ibt_free_channel(qp2);
1789 }
1790 (void) ibt_free_channel(qp1);
1791 }
1792
1793 (void) ibt_free_cq(cq_hdl);
1794 }
1795