1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/types.h>
26 #include <sys/ddi.h>
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <netinet/in.h>
30 #include <sys/sunddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/iscsi_protocol.h>
33
34 #include <sys/ib/clients/iser/iser.h>
35 #include <sys/ib/clients/iser/iser_idm.h>
36
37 /*
38 * iser_ib.c
39 * Routines for InfiniBand transport for iSER
40 *
41 * This file contains the routines to interface with the IBT API to attach and
42 * allocate IB resources, handle async events, and post recv work requests.
43 *
44 */
45
46 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid);
47 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid);
48
49 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid);
50 static int iser_ib_free_hca(iser_hca_t *hca);
51 static int iser_ib_update_hcaports(iser_hca_t *hca);
52 static int iser_ib_init_hcas(void);
53 static int iser_ib_fini_hcas(void);
54
55 static iser_sbind_t *iser_ib_get_bind(
56 iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid);
57 static int iser_ib_activate_port(
58 idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid);
59 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid);
60
61 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size);
62 static void iser_ib_fini_qp(iser_qp_t *qp);
63
64 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size,
65 ibt_cq_hdl_t *cq_hdl);
66
67 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
68 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
69 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs);
70
71 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl,
72 ibt_async_event_t *event);
73 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl,
74 ibt_async_event_t *event);
75 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl,
76 ibt_async_event_t *event);
77
78 static void iser_ib_post_recv_task(void *arg);
79
80 static struct ibt_clnt_modinfo_s iser_ib_modinfo = {
81 IBTI_V_CURR,
82 IBT_STORAGE_DEV,
83 iser_ib_async_handler,
84 NULL,
85 "iSER"
86 };
87
88 /*
89 * iser_ib_init
90 *
91 * This function registers the HCA drivers with IBTF and registers and binds
92 * iSER as a service with IBTF.
93 */
94 int
iser_ib_init(void)95 iser_ib_init(void)
96 {
97 int status;
98
99 /* Register with IBTF */
100 status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state,
101 &iser_state->is_ibhdl);
102 if (status != DDI_SUCCESS) {
103 ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)",
104 status);
105 return (DDI_FAILURE);
106 }
107
108 /* Create the global work request kmem_cache */
109 iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache",
110 sizeof (iser_wr_t), 0, NULL, NULL, NULL,
111 iser_state, NULL, KM_SLEEP);
112
113 /* Populate our list of HCAs */
114 status = iser_ib_init_hcas();
115 if (status != DDI_SUCCESS) {
116 /* HCAs failed to initialize, tear it down */
117 kmem_cache_destroy(iser_state->iser_wr_cache);
118 (void) ibt_detach(iser_state->is_ibhdl);
119 iser_state->is_ibhdl = NULL;
120 ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs");
121 return (DDI_FAILURE);
122 }
123
124 /* Target will register iSER as a service with IBTF when required */
125
126 /* Target will bind this service when it comes online */
127
128 return (DDI_SUCCESS);
129 }
130
131 /*
132 * iser_ib_fini
133 *
134 * This function unbinds and degisters the iSER service from IBTF
135 */
136 int
iser_ib_fini(void)137 iser_ib_fini(void)
138 {
139 /* IDM would have already disabled all the services */
140
141 /* Teardown the HCA list and associated resources */
142 if (iser_ib_fini_hcas() != DDI_SUCCESS)
143 return (DDI_FAILURE);
144
145 /* Teardown the global work request kmem_cache */
146 kmem_cache_destroy(iser_state->iser_wr_cache);
147
148 /* Deregister with IBTF */
149 if (iser_state->is_ibhdl != NULL) {
150 (void) ibt_detach(iser_state->is_ibhdl);
151 iser_state->is_ibhdl = NULL;
152 }
153
154 return (DDI_SUCCESS);
155 }
156
157 /*
158 * iser_ib_register_service
159 *
160 * This function registers the iSER service using the RDMA-Aware Service ID.
161 */
162 int
iser_ib_register_service(idm_svc_t * idm_svc)163 iser_ib_register_service(idm_svc_t *idm_svc)
164 {
165 ibt_srv_desc_t srvdesc;
166 iser_svc_t *iser_svc;
167 int status;
168
169 bzero(&srvdesc, sizeof (ibt_srv_desc_t));
170
171 /* Set up IBTI client callback handler from the CM */
172 srvdesc.sd_handler = iser_ib_cm_handler;
173
174 srvdesc.sd_flags = IBT_SRV_NO_FLAGS;
175
176 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
177
178 /* Register the service on the specified port */
179 status = ibt_register_service(
180 iser_state->is_ibhdl, &srvdesc,
181 iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL);
182
183 return (status);
184 }
185
186 /*
187 * iser_ib_bind_service
188 *
189 * This function binds a given iSER service on all available HCA ports. The
190 * current specification does not allow user to specify transport bindings
191 * for each iscsi target. The ULP invokes this function to bind the target
192 * to all available iser ports after checking for the presence of an IB HCA.
193 * iSER is "configured" whenever an IB-capable IP address exists. The lack
194 * of active IB ports is a less-fatal condition, and sockets would be used
195 * as the transport even though an Infiniband HCA is configured but unusable.
196 *
197 */
198 int
iser_ib_bind_service(idm_svc_t * idm_svc)199 iser_ib_bind_service(idm_svc_t *idm_svc)
200 {
201 iser_hca_t *hca;
202 ib_gid_t gid;
203 int num_ports = 0;
204 int num_binds = 0;
205 int num_inactive_binds = 0; /* if HCA ports inactive */
206 int status;
207 int i;
208
209 ASSERT(idm_svc != NULL);
210 ASSERT(idm_svc->is_iser_svc != NULL);
211
212 /* Register the iSER service on all available ports */
213 mutex_enter(&iser_state->is_hcalist_lock);
214
215 for (hca = list_head(&iser_state->is_hcalist);
216 hca != NULL;
217 hca = list_next(&iser_state->is_hcalist, hca)) {
218
219 for (i = 0; i < hca->hca_num_ports; i++) {
220 num_ports++;
221 if (hca->hca_port_info[i].p_linkstate !=
222 IBT_PORT_ACTIVE) {
223 /*
224 * Move on. We will attempt to bind service
225 * in our async handler if the port comes up
226 * at a later time.
227 */
228 num_inactive_binds++;
229 continue;
230 }
231
232 gid = hca->hca_port_info[i].p_sgid_tbl[0];
233
234 /* If the port is already bound, skip */
235 if (iser_ib_get_bind(
236 idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) {
237
238 status = iser_ib_activate_port(
239 idm_svc, hca->hca_guid, gid);
240 if (status != IBT_SUCCESS) {
241 ISER_LOG(CE_NOTE,
242 "iser_ib_bind_service: "
243 "iser_ib_activate_port failure "
244 "(0x%x)", status);
245 continue;
246 }
247 }
248 num_binds++;
249 }
250 }
251 mutex_exit(&iser_state->is_hcalist_lock);
252
253 if (num_binds) {
254 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on "
255 "(%d) of (%d) ports", num_binds, num_ports);
256 return (ISER_STATUS_SUCCESS);
257 } else if (num_inactive_binds) {
258 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Could not bind "
259 "service, HCA ports are not active.");
260 /*
261 * still considered success, the async handler will bind
262 * the service when the port comes up at a later time
263 */
264 return (ISER_STATUS_SUCCESS);
265 } else {
266 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service");
267 return (ISER_STATUS_FAIL);
268 }
269 }
270
271 /*
272 * iser_ib_unbind_service
273 *
274 * This function unbinds a given service on a all HCA ports
275 */
276 void
iser_ib_unbind_service(idm_svc_t * idm_svc)277 iser_ib_unbind_service(idm_svc_t *idm_svc)
278 {
279 iser_svc_t *iser_svc;
280 iser_sbind_t *is_sbind, *next_sb;
281
282 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {
283
284 iser_svc = idm_svc->is_iser_svc;
285
286 for (is_sbind = list_head(&iser_svc->is_sbindlist);
287 is_sbind != NULL;
288 is_sbind = next_sb) {
289 next_sb = list_next(&iser_svc->is_sbindlist, is_sbind);
290 (void) ibt_unbind_service(iser_svc->is_srvhdl,
291 is_sbind->is_sbindhdl);
292 list_remove(&iser_svc->is_sbindlist, is_sbind);
293 kmem_free(is_sbind, sizeof (iser_sbind_t));
294 }
295 }
296 }
297
298 /* ARGSUSED */
299 void
iser_ib_deregister_service(idm_svc_t * idm_svc)300 iser_ib_deregister_service(idm_svc_t *idm_svc)
301 {
302 iser_svc_t *iser_svc;
303
304 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {
305
306 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
307 (void) ibt_deregister_service(iser_state->is_ibhdl,
308 iser_svc->is_srvhdl);
309 (void) ibt_release_ip_sid(iser_svc->is_svcid);
310 }
311 }
312
313 /*
314 * iser_ib_get_paths
315 * This function finds the IB path between the local and the remote address.
316 *
317 */
318 int
iser_ib_get_paths(ibt_ip_addr_t * local_ip,ibt_ip_addr_t * remote_ip,ibt_path_info_t * path,ibt_path_ip_src_t * path_src_ip)319 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip,
320 ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip)
321 {
322 ibt_ip_path_attr_t ipattr;
323 int status;
324
325 (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
326 ipattr.ipa_dst_ip = remote_ip;
327 ipattr.ipa_src_ip = *local_ip;
328 ipattr.ipa_max_paths = 1;
329 ipattr.ipa_ndst = 1;
330
331 (void) bzero(path, sizeof (ibt_path_info_t));
332 status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS,
333 &ipattr, path, NULL, path_src_ip);
334 if (status != IBT_SUCCESS) {
335 ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths "
336 "failure: status (%d)", status);
337 return (status);
338 }
339
340 if (local_ip != NULL) {
341 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]",
342 local_ip->un.ip4addr, remote_ip->un.ip4addr);
343 } else {
344 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: "
345 "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr);
346 }
347
348 return (ISER_STATUS_SUCCESS);
349 }
350
351 /*
352 * iser_ib_alloc_channel_nopathlookup
353 *
354 * This function allocates a reliable connected channel. This function does
355 * not invoke ibt_get_ip_paths() to do the path lookup. The HCA GUID and
356 * port are input to this function.
357 */
358 iser_chan_t *
iser_ib_alloc_channel_nopathlookup(ib_guid_t hca_guid,uint8_t hca_port)359 iser_ib_alloc_channel_nopathlookup(ib_guid_t hca_guid, uint8_t hca_port)
360 {
361 iser_hca_t *hca;
362 iser_chan_t *chan;
363
364 /* Lookup the hca using the gid in the path info */
365 hca = iser_ib_guid2hca(hca_guid);
366 if (hca == NULL) {
367 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed "
368 "to lookup HCA(%llx) handle", (longlong_t)hca_guid);
369 return (NULL);
370 }
371
372 chan = iser_ib_alloc_rc_channel(hca, hca_port);
373 if (chan == NULL) {
374 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed "
375 "to alloc channel on HCA(%llx) %d",
376 (longlong_t)hca_guid, hca_port);
377 return (NULL);
378 }
379
380 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: "
381 "chanhdl (0x%p), HCA(%llx) %d",
382 (void *)chan->ic_chanhdl, (longlong_t)hca_guid, hca_port);
383
384 return (chan);
385 }
386
387 /*
388 * iser_ib_alloc_channel_pathlookup
389 *
390 * This function allocates a reliable connected channel but first invokes
391 * ibt_get_ip_paths() with the given local and remote addres to get the
392 * HCA lgid and the port number.
393 */
394 iser_chan_t *
iser_ib_alloc_channel_pathlookup(ibt_ip_addr_t * local_ip,ibt_ip_addr_t * remote_ip)395 iser_ib_alloc_channel_pathlookup(
396 ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip)
397 {
398 ibt_path_info_t ibt_path;
399 ibt_path_ip_src_t path_src_ip;
400 ib_gid_t lgid;
401 uint8_t hca_port; /* from path */
402 iser_hca_t *hca;
403 iser_chan_t *chan;
404 int status;
405
406 /* Lookup a path to the given destination */
407 status = iser_ib_get_paths(
408 local_ip, remote_ip, &ibt_path, &path_src_ip);
409
410 if (status != ISER_STATUS_SUCCESS) {
411 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: faild "
412 "Path lookup IP:[%llx to %llx] failed: status (%d)",
413 (longlong_t)local_ip->un.ip4addr,
414 (longlong_t)remote_ip->un.ip4addr,
415 status);
416 return (NULL);
417 }
418
419 /* get the local gid from the path info */
420 lgid = ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;
421
422 /* get the hca port from the path info */
423 hca_port = ibt_path.pi_prim_cep_path.cep_hca_port_num;
424
425 /* Lookup the hca using the gid in the path info */
426 hca = iser_ib_gid2hca(lgid);
427 if (hca == NULL) {
428 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed "
429 "to lookup HCA (%llx) handle",
430 (longlong_t)hca->hca_guid);
431 return (NULL);
432 }
433
434 chan = iser_ib_alloc_rc_channel(hca, hca_port);
435 if (chan == NULL) {
436 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed "
437 "to alloc channel from IP:[%llx to %llx] on HCA (%llx) %d",
438 (longlong_t)local_ip->un.ip4addr,
439 (longlong_t)remote_ip->un.ip4addr,
440 (longlong_t)hca->hca_guid, hca_port);
441 return (NULL);
442 }
443
444 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: "
445 "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d",
446 (void *)chan->ic_chanhdl,
447 (longlong_t)local_ip->un.ip4addr,
448 (longlong_t)remote_ip->un.ip4addr,
449 (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid,
450 (longlong_t)hca->hca_guid, hca_port);
451
452 chan->ic_ibt_path = ibt_path;
453 chan->ic_localip = path_src_ip.ip_primary;
454 chan->ic_remoteip = *remote_ip;
455
456 return (chan);
457 }
458
459 /*
460 * iser_ib_alloc_rc_channel
461 *
462 * This function allocates a reliable communication channel using the specified
463 * channel attributes.
464 */
465 iser_chan_t *
iser_ib_alloc_rc_channel(iser_hca_t * hca,uint8_t hca_port)466 iser_ib_alloc_rc_channel(iser_hca_t *hca, uint8_t hca_port)
467 {
468
469 iser_chan_t *chan;
470 ibt_rc_chan_alloc_args_t chanargs;
471 uint_t sq_size, rq_size;
472 int status;
473
474 chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP);
475
476 mutex_init(&chan->ic_chan_lock, NULL, MUTEX_DRIVER, NULL);
477 mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL);
478
479 /* Set up the iSER channel handle with HCA */
480 chan->ic_hca = hca;
481
482 /*
483 * Determine the queue sizes, based upon the HCA query data.
484 * For our Work Queues, we will use either our default value,
485 * or the HCA's maximum value, whichever is smaller.
486 */
487 sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE);
488 rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE);
489
490 /*
491 * For our Completion Queues, we again check the device maximum.
492 * We want to end up with CQs that are the next size up from the
493 * WQs they are servicing so that they have some overhead.
494 */
495 if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) {
496 chan->ic_sendcq_sz = sq_size + 1;
497 } else {
498 chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz;
499 sq_size = chan->ic_sendcq_sz - 1;
500 }
501
502 if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) {
503 chan->ic_recvcq_sz = rq_size + 1;
504 } else {
505 chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz;
506 rq_size = chan->ic_recvcq_sz - 1;
507 }
508
509 /* Initialize the iSER channel's QP handle */
510 iser_ib_init_qp(chan, sq_size, rq_size);
511
512 /* Set up the Send Completion Queue */
513 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz,
514 &chan->ic_sendcq);
515 if (status != ISER_STATUS_SUCCESS) {
516 iser_ib_fini_qp(&chan->ic_qp);
517 mutex_destroy(&chan->ic_chan_lock);
518 mutex_destroy(&chan->ic_sq_post_lock);
519 kmem_free(chan, sizeof (iser_chan_t));
520 return (NULL);
521 }
522 ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan);
523 (void) ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION);
524
525 /* Set up the Receive Completion Queue */
526 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz,
527 &chan->ic_recvcq);
528 if (status != ISER_STATUS_SUCCESS) {
529 (void) ibt_free_cq(chan->ic_sendcq);
530 iser_ib_fini_qp(&chan->ic_qp);
531 mutex_destroy(&chan->ic_chan_lock);
532 mutex_destroy(&chan->ic_sq_post_lock);
533 kmem_free(chan, sizeof (iser_chan_t));
534 return (NULL);
535 }
536 ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan);
537 (void) ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION);
538
539 /* Setup the channel arguments */
540 iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq,
541 sq_size, rq_size, hca->hca_pdhdl, &chanargs);
542
543 status = ibt_alloc_rc_channel(hca->hca_hdl,
544 IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL);
545 if (status != IBT_SUCCESS) {
546 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
547 "ibt_alloc_rc_channel: status (%d)", status);
548 (void) ibt_free_cq(chan->ic_sendcq);
549 (void) ibt_free_cq(chan->ic_recvcq);
550 iser_ib_fini_qp(&chan->ic_qp);
551 mutex_destroy(&chan->ic_chan_lock);
552 mutex_destroy(&chan->ic_sq_post_lock);
553 kmem_free(chan, sizeof (iser_chan_t));
554 return (NULL);
555 }
556
557 /* Set the 'channel' as the client private data */
558 (void) ibt_set_chan_private(chan->ic_chanhdl, chan);
559
560 return (chan);
561 }
562
563 /*
564 * iser_ib_open_rc_channel
565 * This function opens a RC connection on the given allocated RC channel
566 */
567 int
iser_ib_open_rc_channel(iser_chan_t * chan)568 iser_ib_open_rc_channel(iser_chan_t *chan)
569 {
570 ibt_ip_cm_info_t ipcm_info;
571 iser_private_data_t iser_priv_data;
572 ibt_chan_open_args_t ocargs;
573 ibt_rc_returns_t ocreturns;
574 int status;
575
576 mutex_enter(&chan->ic_chan_lock);
577
578 /*
579 * For connection establishment, the initiator sends a CM REQ using the
580 * iSER RDMA-Aware Service ID. Included are the source and destination
581 * IP addresses, and the src port.
582 */
583 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
584 ipcm_info.src_addr = chan->ic_localip;
585 ipcm_info.dst_addr = chan->ic_remoteip;
586 ipcm_info.src_port = chan->ic_lport;
587
588 /*
589 * The CM Private Data field defines the iSER connection parameters
590 * such as zero based virtual address exception (ZBVAE) and Send with
591 * invalidate Exception (SIE).
592 *
593 * Solaris IBT does not currently support ZBVAE or SIE.
594 */
595 iser_priv_data.rsvd1 = 0;
596 iser_priv_data.sie = 1;
597 iser_priv_data.zbvae = 1;
598
599 status = ibt_format_ip_private_data(&ipcm_info,
600 sizeof (iser_private_data_t), &iser_priv_data);
601 if (status != IBT_SUCCESS) {
602 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
603 mutex_exit(&chan->ic_chan_lock);
604 return (status);
605 }
606
607 /*
608 * Set the SID we are attempting to connect to, based upon the
609 * remote port number.
610 */
611 chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport);
612
613 /* Set up the args for the channel open */
614 bzero(&ocargs, sizeof (ibt_chan_open_args_t));
615 ocargs.oc_path = &chan->ic_ibt_path;
616 ocargs.oc_cm_handler = iser_ib_cm_handler;
617 ocargs.oc_cm_clnt_private = iser_state;
618 ocargs.oc_rdma_ra_out = 4;
619 ocargs.oc_rdma_ra_in = 4;
620 ocargs.oc_path_retry_cnt = 2;
621 ocargs.oc_path_rnr_retry_cnt = 2;
622 ocargs.oc_priv_data_len = sizeof (iser_private_data_t);
623 ocargs.oc_priv_data = &iser_priv_data;
624
625 bzero(&ocreturns, sizeof (ibt_rc_returns_t));
626
627 status = ibt_open_rc_channel(chan->ic_chanhdl,
628 IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns);
629
630 if (status != IBT_SUCCESS) {
631 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
632 mutex_exit(&chan->ic_chan_lock);
633 return (status);
634 }
635
636 mutex_exit(&chan->ic_chan_lock);
637 return (IDM_STATUS_SUCCESS);
638 }
639
640 /*
641 * iser_ib_close_rc_channel
642 * This function closes the RC channel related to this iser_chan handle.
643 * We invoke this in a non-blocking, no callbacks context.
644 */
645 void
iser_ib_close_rc_channel(iser_chan_t * chan)646 iser_ib_close_rc_channel(iser_chan_t *chan)
647 {
648 int status;
649
650 mutex_enter(&chan->ic_chan_lock);
651 status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL,
652 0, NULL, NULL, 0);
653 if (status != IBT_SUCCESS) {
654 ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: "
655 "ibt_close_rc_channel failed: status (%d)", status);
656 }
657 mutex_exit(&chan->ic_chan_lock);
658 }
659
660 /*
661 * iser_ib_free_rc_channel
662 *
663 * This function tears down an RC channel's QP initialization and frees it.
664 * Note that we do not need synchronization here; the channel has been
665 * closed already, so we should only have completion polling occuring. Once
666 * complete, we are free to free the IBTF channel, WQ and CQ resources, and
667 * our own related resources.
668 */
669 void
iser_ib_free_rc_channel(iser_chan_t * chan)670 iser_ib_free_rc_channel(iser_chan_t *chan)
671 {
672 iser_qp_t *iser_qp;
673
674 iser_qp = &chan->ic_qp;
675
676 /* Ensure the SQ is empty */
677 while (chan->ic_sq_post_count != 0) {
678 mutex_exit(&chan->ic_conn->ic_lock);
679 delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
680 mutex_enter(&chan->ic_conn->ic_lock);
681 }
682 mutex_destroy(&chan->ic_sq_post_lock);
683
684 /* Ensure the RQ is empty */
685 (void) ibt_flush_channel(chan->ic_chanhdl);
686 mutex_enter(&iser_qp->qp_lock);
687 while (iser_qp->rq_level != 0) {
688 mutex_exit(&iser_qp->qp_lock);
689 mutex_exit(&chan->ic_conn->ic_lock);
690 delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
691 mutex_enter(&chan->ic_conn->ic_lock);
692 mutex_enter(&iser_qp->qp_lock);
693 }
694
695 /* Free our QP handle */
696 mutex_exit(&iser_qp->qp_lock);
697 (void) iser_ib_fini_qp(iser_qp);
698
699 /* Free the IBT channel resources */
700 (void) ibt_free_channel(chan->ic_chanhdl);
701 chan->ic_chanhdl = NULL;
702
703 /* Free the CQs */
704 (void) ibt_free_cq(chan->ic_sendcq);
705 (void) ibt_free_cq(chan->ic_recvcq);
706
707 /* Free the chan handle */
708 mutex_destroy(&chan->ic_chan_lock);
709 kmem_free(chan, sizeof (iser_chan_t));
710 }
711
712 /*
713 * iser_ib_post_recv
714 *
715 * This function handles keeping the RQ full on a given channel.
716 * This routine will mostly be run on a taskq, and will check the
717 * current fill level of the RQ, and post as many WRs as necessary
718 * to fill it again.
719 */
720
721 int
iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl)722 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl)
723 {
724 iser_chan_t *chan;
725 int status;
726
727 /* Pull our iSER channel handle from the private data */
728 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
729
730 /*
731 * Caller must check that chan->ic_conn->ic_stage indicates
732 * the connection is active (not closing, not closed) and
733 * it must hold the mutex cross the check and the call to this function
734 */
735 ASSERT(mutex_owned(&chan->ic_conn->ic_lock));
736 ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_ALLOCATED) &&
737 (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN));
738 idm_conn_hold(chan->ic_conn->ic_idmc);
739 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task,
740 (void *)chanhdl, DDI_NOSLEEP);
741 if (status != DDI_SUCCESS) {
742 idm_conn_rele(chan->ic_conn->ic_idmc);
743 }
744
745 return (status);
746 }
747
748 static void
iser_ib_post_recv_task(void * arg)749 iser_ib_post_recv_task(void *arg)
750 {
751 ibt_channel_hdl_t chanhdl = arg;
752 iser_chan_t *chan;
753
754 /* Pull our iSER channel handle from the private data */
755 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
756
757 iser_ib_post_recv(chanhdl);
758 idm_conn_rele(chan->ic_conn->ic_idmc);
759 }
760
761 void
iser_ib_post_recv(ibt_channel_hdl_t chanhdl)762 iser_ib_post_recv(ibt_channel_hdl_t chanhdl)
763 {
764 iser_chan_t *chan;
765 iser_hca_t *hca;
766 iser_msg_t *msg;
767 ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX];
768 int rq_space, msg_ret;
769 int total_num, npost;
770 uint_t nposted;
771 int status, i;
772 iser_qp_t *iser_qp;
773
774 /* Pull our iSER channel handle from the private data */
775 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);
776
777 ASSERT(chan != NULL);
778
779 mutex_enter(&chan->ic_conn->ic_lock);
780
781 /* Bail out if the connection is closed; no need for more recv WRs */
782 if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) ||
783 (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) {
784 mutex_exit(&chan->ic_conn->ic_lock);
785 return;
786 }
787
788 /* get the QP handle from the iser_chan */
789 iser_qp = &chan->ic_qp;
790
791 hca = chan->ic_hca;
792
793 if (hca == NULL) {
794 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve "
795 "HCA handle");
796 mutex_exit(&chan->ic_conn->ic_lock);
797 return;
798 }
799
800 /* check for space to post on the RQ */
801 mutex_enter(&iser_qp->qp_lock);
802 rq_space = iser_qp->rq_depth - iser_qp->rq_level;
803 if (rq_space == 0) {
804 /* The RQ is full, clear the pending flag and return */
805 iser_qp->rq_taskqpending = B_FALSE;
806 mutex_exit(&iser_qp->qp_lock);
807 mutex_exit(&chan->ic_conn->ic_lock);
808 return;
809 }
810
811 /* Keep track of the lowest value for rq_min_post_level */
812 if (iser_qp->rq_level < iser_qp->rq_min_post_level)
813 iser_qp->rq_min_post_level = iser_qp->rq_level;
814
815 mutex_exit(&iser_qp->qp_lock);
816
817 /* we've room to post, so pull from the msg cache */
818 msg = iser_msg_get(hca, rq_space, &msg_ret);
819 if (msg == NULL) {
820 ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles "
821 "available in msg cache currently");
822 /*
823 * There are no messages on the cache. Wait a half-
824 * second, then try again.
825 */
826 delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
827 status = iser_ib_post_recv_async(chanhdl);
828 if (status != DDI_SUCCESS) {
829 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
830 "redispatch routine");
831 /* Failed to dispatch, clear pending flag */
832 mutex_enter(&iser_qp->qp_lock);
833 iser_qp->rq_taskqpending = B_FALSE;
834 mutex_exit(&iser_qp->qp_lock);
835 }
836 mutex_exit(&chan->ic_conn->ic_lock);
837 return;
838 }
839
840 if (msg_ret != rq_space) {
841 ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of "
842 "messages not allocated: requested (%d) allocated (%d)",
843 rq_space, msg_ret);
844 /* We got some, but not all, of our requested depth */
845 rq_space = msg_ret;
846 }
847
848 /*
849 * Now, walk through the allocated WRs and post them,
850 * ISER_IB_RQ_POST_MAX (or less) at a time.
851 */
852 wrlist = &wr[0];
853 total_num = rq_space;
854
855 while (total_num) {
856 /* determine the number to post on this iteration */
857 npost = (total_num > ISER_IB_RQ_POST_MAX) ?
858 ISER_IB_RQ_POST_MAX : total_num;
859
860 /* build a list of WRs from the msg list */
861 for (i = 0; i < npost; i++) {
862 wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg;
863 wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE;
864 wrlist[i].wr_sgl = &msg->msg_ds;
865 msg = msg->nextp;
866 }
867
868 /* post the list to the RQ */
869 nposted = 0;
870 status = ibt_post_recv(chanhdl, wrlist, npost, &nposted);
871 if ((status != IBT_SUCCESS) || (nposted != npost)) {
872 ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv "
873 "failed: requested (%d) posted (%d) status (%d)",
874 npost, nposted, status);
875 total_num -= nposted;
876 break;
877 }
878
879 /* decrement total number to post by the number posted */
880 total_num -= nposted;
881 }
882
883 mutex_enter(&iser_qp->qp_lock);
884 if (total_num != 0) {
885 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, "
886 "failed to post (%d) WRs", total_num);
887 iser_qp->rq_level += rq_space - total_num;
888 } else {
889 iser_qp->rq_level += rq_space;
890 }
891
892 /*
893 * Now that we've filled the RQ, check that all of the recv WRs
894 * haven't just been immediately consumed. If so, taskqpending is
895 * still B_TRUE, so we need to fire off a taskq thread to post
896 * more WRs.
897 */
898 if (iser_qp->rq_level == 0) {
899 mutex_exit(&iser_qp->qp_lock);
900 status = iser_ib_post_recv_async(chanhdl);
901 if (status != DDI_SUCCESS) {
902 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
903 "dispatch followup routine");
904 /* Failed to dispatch, clear pending flag */
905 mutex_enter(&iser_qp->qp_lock);
906 iser_qp->rq_taskqpending = B_FALSE;
907 mutex_exit(&iser_qp->qp_lock);
908 }
909 } else {
910 /*
911 * We're done, we've filled the RQ. Clear the taskq
912 * flag so that we can run again.
913 */
914 iser_qp->rq_taskqpending = B_FALSE;
915 mutex_exit(&iser_qp->qp_lock);
916 }
917
918 mutex_exit(&chan->ic_conn->ic_lock);
919 }
920
921 /*
922 * iser_ib_handle_portup_event()
923 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event.
924 *
925 * To facilitate a seamless bringover of the port and configure the CM service
926 * for inbound iSER service requests on this newly active port, the existing
927 * IDM services will be checked for iSER support.
928 * If an iSER service was already created, then this service will simply be
929 * bound to the gid of the newly active port. If on the other hand, the CM
930 * service did not exist, i.e. only socket communication, then a new CM
931 * service will be first registered with the saved service parameters and
932 * then bound to the newly active port.
933 *
934 */
935 /* ARGSUSED */
936 static void
iser_ib_handle_portup_event(ibt_hca_hdl_t hdl,ibt_async_event_t * event)937 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
938 {
939 iser_hca_t *hca;
940 ib_gid_t gid;
941 idm_svc_t *idm_svc;
942 int status;
943
944 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)",
945 (longlong_t)event->ev_hca_guid, event->ev_port);
946
947 /*
948 * Query all ports on the HCA and update the port information
949 * maintainted in the iser_hca_t structure
950 */
951 hca = iser_ib_guid2hca(event->ev_hca_guid);
952 if (hca == NULL) {
953
954 /* HCA is just made available, first port on that HCA */
955 hca = iser_ib_alloc_hca(event->ev_hca_guid);
956 if (hca == NULL) {
957 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
958 "iser_ib_alloc_hca failed: HCA(0x%llx) port(%d)",
959 (longlong_t)event->ev_hca_guid, event->ev_port);
960 return;
961 }
962 mutex_enter(&iser_state->is_hcalist_lock);
963 list_insert_tail(&iser_state->is_hcalist, hca);
964 iser_state->is_num_hcas++;
965 mutex_exit(&iser_state->is_hcalist_lock);
966
967 } else {
968
969 status = iser_ib_update_hcaports(hca);
970
971 if (status != IBT_SUCCESS) {
972 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
973 "status(0x%x): iser_ib_update_hcaports failed: "
974 "HCA(0x%llx) port(%d)", status,
975 (longlong_t)event->ev_hca_guid, event->ev_port);
976 return;
977 }
978 }
979
980 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
981
982 /*
983 * Iterate through the global list of IDM target services
984 * and check for existing iSER CM service.
985 */
986 mutex_enter(&idm.idm_global_mutex);
987 for (idm_svc = list_head(&idm.idm_tgt_svc_list);
988 idm_svc != NULL;
989 idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) {
990
991
992 if (idm_svc->is_iser_svc == NULL) {
993
994 /* Establish a new CM service for iSER requests */
995 status = iser_tgt_svc_create(
996 &idm_svc->is_svc_req, idm_svc);
997
998 if (status != IBT_SUCCESS) {
999 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
1000 "status(0x%x): iser_tgt_svc_create failed: "
1001 "HCA(0x%llx) port(%d)", status,
1002 (longlong_t)event->ev_hca_guid,
1003 event->ev_port);
1004
1005 continue;
1006 }
1007 }
1008
1009 status = iser_ib_activate_port(
1010 idm_svc, event->ev_hca_guid, gid);
1011 if (status != IBT_SUCCESS) {
1012
1013 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
1014 "status(0x%x): Bind service on port "
1015 "(%llx:%llx) failed",
1016 status, (longlong_t)gid.gid_prefix,
1017 (longlong_t)gid.gid_guid);
1018
1019 continue;
1020 }
1021 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound "
1022 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
1023 event->ev_port);
1024 }
1025 mutex_exit(&idm.idm_global_mutex);
1026
1027 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: "
1028 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
1029 event->ev_port);
1030 }
1031
1032 /*
1033 * iser_ib_handle_portdown_event()
1034 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error.
1035 *
1036 * Unconfigure the CM service on the deactivated port and teardown the
1037 * connections that are using the CM service.
1038 */
1039 /* ARGSUSED */
1040 static void
iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl,ibt_async_event_t * event)1041 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
1042 {
1043 iser_hca_t *hca;
1044 ib_gid_t gid;
1045 int status;
1046
1047 /*
1048 * Query all ports on the HCA and update the port information
1049 * maintainted in the iser_hca_t structure
1050 */
1051 hca = iser_ib_guid2hca(event->ev_hca_guid);
1052 ASSERT(hca != NULL);
1053
1054 status = iser_ib_update_hcaports(hca);
1055 if (status != IBT_SUCCESS) {
1056 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): "
1057 "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)",
1058 status, (longlong_t)event->ev_hca_guid, event->ev_port);
1059 return;
1060 }
1061
1062 /* get the gid of the new port */
1063 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
1064 iser_ib_deactivate_port(event->ev_hca_guid, gid);
1065
1066 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: "
1067 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
1068 event->ev_port);
1069 }
1070
1071 /*
1072 * iser_ib_handle_hca_detach_event()
1073 * Quiesce all activity bound for the port, teardown the connection, unbind
1074 * iSER services on all ports and release the HCA handle.
1075 */
1076 /* ARGSUSED */
1077 static void
iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl,ibt_async_event_t * event)1078 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
1079 {
1080 iser_hca_t *nexthca, *hca;
1081 int i, status;
1082
1083 ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)",
1084 (longlong_t)event->ev_hca_guid);
1085
1086 hca = iser_ib_guid2hca(event->ev_hca_guid);
1087 for (i = 0; i < hca->hca_num_ports; i++) {
1088 iser_ib_deactivate_port(hca->hca_guid,
1089 hca->hca_port_info[i].p_sgid_tbl[0]);
1090 }
1091
1092 /*
1093 * Update the HCA list maintained in the iser_state. Free the
1094 * resources allocated to the HCA, i.e. caches, protection domain
1095 */
1096 mutex_enter(&iser_state->is_hcalist_lock);
1097
1098 for (hca = list_head(&iser_state->is_hcalist);
1099 hca != NULL;
1100 hca = nexthca) {
1101
1102 nexthca = list_next(&iser_state->is_hcalist, hca);
1103
1104 if (hca->hca_guid == event->ev_hca_guid) {
1105
1106 list_remove(&iser_state->is_hcalist, hca);
1107 iser_state->is_num_hcas--;
1108
1109 status = iser_ib_free_hca(hca);
1110 if (status != DDI_SUCCESS) {
1111 ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: "
1112 "Failed to free hca(%p)", (void *)hca);
1113 list_insert_tail(&iser_state->is_hcalist, hca);
1114 iser_state->is_num_hcas++;
1115 }
1116 /* No way to return status to IBT if this fails */
1117 }
1118 }
1119 mutex_exit(&iser_state->is_hcalist_lock);
1120
1121 }
1122
1123 /*
1124 * iser_ib_async_handler
1125 * An IBT Asynchronous Event handler is registered it with the framework and
1126 * passed via the ibt_attach() routine. This function handles the following
1127 * asynchronous events.
1128 * IBT_EVENT_PORT_UP
1129 * IBT_ERROR_PORT_DOWN
1130 * IBT_HCA_ATTACH_EVENT
1131 * IBT_HCA_DETACH_EVENT
1132 */
1133 /* ARGSUSED */
1134 void
iser_ib_async_handler(void * clntp,ibt_hca_hdl_t hdl,ibt_async_code_t code,ibt_async_event_t * event)1135 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
1136 ibt_async_event_t *event)
1137 {
1138 switch (code) {
1139 case IBT_EVENT_PORT_UP:
1140 iser_ib_handle_portup_event(hdl, event);
1141 break;
1142
1143 case IBT_ERROR_PORT_DOWN:
1144 iser_ib_handle_portdown_event(hdl, event);
1145 break;
1146
1147 case IBT_HCA_ATTACH_EVENT:
1148 /*
1149 * A new HCA device is available for use, ignore this
1150 * event because the corresponding IBT_EVENT_PORT_UP
1151 * events will get triggered and handled accordingly.
1152 */
1153 break;
1154
1155 case IBT_HCA_DETACH_EVENT:
1156 iser_ib_handle_hca_detach_event(hdl, event);
1157 break;
1158
1159 default:
1160 break;
1161 }
1162 }
1163
1164 /*
1165 * iser_ib_init_hcas
1166 *
1167 * This function opens all the HCA devices, gathers the HCA state information
1168 * and adds the HCA handle for each HCA found in the iser_soft_state.
1169 */
1170 static int
iser_ib_init_hcas(void)1171 iser_ib_init_hcas(void)
1172 {
1173 ib_guid_t *guid;
1174 int num_hcas;
1175 int i;
1176 iser_hca_t *hca;
1177
1178 /* Retrieve the HCA list */
1179 num_hcas = ibt_get_hca_list(&guid);
1180 if (num_hcas == 0) {
1181 /*
1182 * This shouldn't happen, but might if we have all HCAs
1183 * detach prior to initialization.
1184 */
1185 return (DDI_FAILURE);
1186 }
1187
1188 /* Initialize the hcalist lock */
1189 mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL);
1190
1191 /* Create the HCA list */
1192 list_create(&iser_state->is_hcalist, sizeof (iser_hca_t),
1193 offsetof(iser_hca_t, hca_node));
1194
1195 for (i = 0; i < num_hcas; i++) {
1196
1197 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA "
1198 "(0x%llx)", (longlong_t)guid[i]);
1199
1200 hca = iser_ib_alloc_hca(guid[i]);
1201 if (hca == NULL) {
1202 /* This shouldn't happen, teardown and fail */
1203 (void) iser_ib_fini_hcas();
1204 (void) ibt_free_hca_list(guid, num_hcas);
1205 return (DDI_FAILURE);
1206 }
1207
1208 mutex_enter(&iser_state->is_hcalist_lock);
1209 list_insert_tail(&iser_state->is_hcalist, hca);
1210 iser_state->is_num_hcas++;
1211 mutex_exit(&iser_state->is_hcalist_lock);
1212
1213 }
1214
1215 /* Free the IBT HCA list */
1216 (void) ibt_free_hca_list(guid, num_hcas);
1217
1218 /* Check that we've initialized at least one HCA */
1219 mutex_enter(&iser_state->is_hcalist_lock);
1220 if (list_is_empty(&iser_state->is_hcalist)) {
1221 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize "
1222 "any HCAs");
1223
1224 mutex_exit(&iser_state->is_hcalist_lock);
1225 (void) iser_ib_fini_hcas();
1226 return (DDI_FAILURE);
1227 }
1228 mutex_exit(&iser_state->is_hcalist_lock);
1229
1230 return (DDI_SUCCESS);
1231 }
1232
1233 /*
1234 * iser_ib_fini_hcas
1235 *
1236 * Teardown the iSER HCA list initialized above.
1237 */
1238 static int
iser_ib_fini_hcas(void)1239 iser_ib_fini_hcas(void)
1240 {
1241 iser_hca_t *nexthca, *hca;
1242 int status;
1243
1244 mutex_enter(&iser_state->is_hcalist_lock);
1245 for (hca = list_head(&iser_state->is_hcalist);
1246 hca != NULL;
1247 hca = nexthca) {
1248
1249 nexthca = list_next(&iser_state->is_hcalist, hca);
1250
1251 list_remove(&iser_state->is_hcalist, hca);
1252
1253 status = iser_ib_free_hca(hca);
1254 if (status != IBT_SUCCESS) {
1255 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free "
1256 "HCA during fini");
1257 list_insert_tail(&iser_state->is_hcalist, hca);
1258 return (DDI_FAILURE);
1259 }
1260
1261 iser_state->is_num_hcas--;
1262
1263 }
1264 mutex_exit(&iser_state->is_hcalist_lock);
1265 list_destroy(&iser_state->is_hcalist);
1266 mutex_destroy(&iser_state->is_hcalist_lock);
1267
1268 return (DDI_SUCCESS);
1269 }
1270
1271 /*
1272 * iser_ib_alloc_hca
1273 *
1274 * This function opens the given HCA device, gathers the HCA state information
1275 * and adds the HCA handle
1276 */
1277 static iser_hca_t *
iser_ib_alloc_hca(ib_guid_t guid)1278 iser_ib_alloc_hca(ib_guid_t guid)
1279 {
1280 iser_hca_t *hca;
1281 int status;
1282
1283 /* Allocate an iser_hca_t HCA handle */
1284 hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP);
1285
1286 /* Open this HCA */
1287 status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl);
1288 if (status != IBT_SUCCESS) {
1289 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:"
1290 " guid (0x%llx) status (0x%x)", (longlong_t)guid, status);
1291 kmem_free(hca, sizeof (iser_hca_t));
1292 return (NULL);
1293 }
1294
1295 hca->hca_guid = guid;
1296 hca->hca_clnt_hdl = iser_state->is_ibhdl;
1297
1298 /* Query the HCA */
1299 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
1300 if (status != IBT_SUCCESS) {
1301 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca "
1302 "failure: guid (0x%llx) status (0x%x)",
1303 (longlong_t)guid, status);
1304 (void) ibt_close_hca(hca->hca_hdl);
1305 kmem_free(hca, sizeof (iser_hca_t));
1306 return (NULL);
1307 }
1308
1309 /* Query all ports on the HCA */
1310 status = ibt_query_hca_ports(hca->hca_hdl, 0,
1311 &hca->hca_port_info, &hca->hca_num_ports,
1312 &hca->hca_port_info_sz);
1313 if (status != IBT_SUCCESS) {
1314 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: "
1315 "ibt_query_hca_ports failure: guid (0x%llx) "
1316 "status (0x%x)", (longlong_t)guid, status);
1317 (void) ibt_close_hca(hca->hca_hdl);
1318 kmem_free(hca, sizeof (iser_hca_t));
1319 return (NULL);
1320 }
1321
1322 /* Allocate a single PD on this HCA */
1323 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS,
1324 &hca->hca_pdhdl);
1325 if (status != IBT_SUCCESS) {
1326 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd "
1327 "failure: guid (0x%llx) status (0x%x)",
1328 (longlong_t)guid, status);
1329 (void) ibt_close_hca(hca->hca_hdl);
1330 ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz);
1331 kmem_free(hca, sizeof (iser_hca_t));
1332 return (NULL);
1333 }
1334
1335 /* Initialize the message and data MR caches for this HCA */
1336 iser_init_hca_caches(hca);
1337
1338 return (hca);
1339 }
1340
1341 static int
iser_ib_free_hca(iser_hca_t * hca)1342 iser_ib_free_hca(iser_hca_t *hca)
1343 {
1344 int status;
1345 ibt_hca_portinfo_t *hca_port_info;
1346 uint_t hca_port_info_sz;
1347
1348 ASSERT(hca != NULL);
1349 if (hca->hca_failed)
1350 return (DDI_FAILURE);
1351
1352 hca_port_info = hca->hca_port_info;
1353 hca_port_info_sz = hca->hca_port_info_sz;
1354
1355 /*
1356 * Free the memory regions before freeing
1357 * the associated protection domain
1358 */
1359 iser_fini_hca_caches(hca);
1360
1361 status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl);
1362 if (status != IBT_SUCCESS) {
1363 ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD "
1364 "status=0x%x", status);
1365 goto out_caches;
1366 }
1367
1368 status = ibt_close_hca(hca->hca_hdl);
1369 if (status != IBT_SUCCESS) {
1370 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA "
1371 "status=0x%x", status);
1372 goto out_pd;
1373 }
1374
1375 ibt_free_portinfo(hca_port_info, hca_port_info_sz);
1376
1377 kmem_free(hca, sizeof (iser_hca_t));
1378 return (DDI_SUCCESS);
1379
1380 /*
1381 * We only managed to partially tear down the HCA, try to put it back
1382 * like it was before returning.
1383 */
1384 out_pd:
1385 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl);
1386 if (status != IBT_SUCCESS) {
1387 hca->hca_failed = B_TRUE;
1388 /* Report error and exit */
1389 ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD "
1390 "status=0x%x", status);
1391 return (DDI_FAILURE);
1392 }
1393
1394 out_caches:
1395 iser_init_hca_caches(hca);
1396
1397 return (DDI_FAILURE);
1398 }
1399
1400 static int
iser_ib_update_hcaports(iser_hca_t * hca)1401 iser_ib_update_hcaports(iser_hca_t *hca)
1402 {
1403 ibt_hca_portinfo_t *pinfop, *oldpinfop;
1404 uint_t size, oldsize, nport;
1405 int status;
1406
1407 ASSERT(hca != NULL);
1408
1409 status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size);
1410 if (status != IBT_SUCCESS) {
1411 ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status);
1412 return (status);
1413 }
1414
1415 oldpinfop = hca->hca_port_info;
1416 oldsize = hca->hca_port_info_sz;
1417 hca->hca_port_info = pinfop;
1418 hca->hca_port_info_sz = size;
1419
1420 (void) ibt_free_portinfo(oldpinfop, oldsize);
1421
1422 return (IBT_SUCCESS);
1423 }
1424
1425 /*
1426 * iser_ib_gid2hca
1427 * Given a gid, find the corresponding hca
1428 */
1429 iser_hca_t *
iser_ib_gid2hca(ib_gid_t gid)1430 iser_ib_gid2hca(ib_gid_t gid)
1431 {
1432
1433 iser_hca_t *hca;
1434 int i;
1435
1436 mutex_enter(&iser_state->is_hcalist_lock);
1437 for (hca = list_head(&iser_state->is_hcalist);
1438 hca != NULL;
1439 hca = list_next(&iser_state->is_hcalist, hca)) {
1440
1441 for (i = 0; i < hca->hca_num_ports; i++) {
1442 if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix ==
1443 gid.gid_prefix) &&
1444 (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid ==
1445 gid.gid_guid)) {
1446
1447 mutex_exit(&iser_state->is_hcalist_lock);
1448
1449 return (hca);
1450 }
1451 }
1452 }
1453 mutex_exit(&iser_state->is_hcalist_lock);
1454 return (NULL);
1455 }
1456
1457 /*
1458 * iser_ib_guid2hca
1459 * Given a HCA guid, find the corresponding HCA
1460 */
1461 iser_hca_t *
iser_ib_guid2hca(ib_guid_t guid)1462 iser_ib_guid2hca(ib_guid_t guid)
1463 {
1464
1465 iser_hca_t *hca;
1466
1467 mutex_enter(&iser_state->is_hcalist_lock);
1468 for (hca = list_head(&iser_state->is_hcalist);
1469 hca != NULL;
1470 hca = list_next(&iser_state->is_hcalist, hca)) {
1471
1472 if (hca->hca_guid == guid) {
1473 mutex_exit(&iser_state->is_hcalist_lock);
1474 return (hca);
1475 }
1476 }
1477 mutex_exit(&iser_state->is_hcalist_lock);
1478 return (NULL);
1479 }
1480
1481 /*
1482 * iser_ib_conv_sockaddr2ibtaddr
1483 * This function converts a socket address into the IBT format
1484 */
iser_ib_conv_sockaddr2ibtaddr(idm_sockaddr_t * saddr,ibt_ip_addr_t * ibt_addr)1485 void iser_ib_conv_sockaddr2ibtaddr(
1486 idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr)
1487 {
1488 if (saddr == NULL) {
1489 ibt_addr->family = AF_UNSPEC;
1490 ibt_addr->un.ip4addr = 0;
1491 } else {
1492 switch (saddr->sin.sa_family) {
1493 case AF_INET:
1494
1495 ibt_addr->family = saddr->sin4.sin_family;
1496 ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr;
1497 break;
1498
1499 case AF_INET6:
1500
1501 ibt_addr->family = saddr->sin6.sin6_family;
1502 ibt_addr->un.ip6addr = saddr->sin6.sin6_addr;
1503 break;
1504
1505 default:
1506 ibt_addr->family = AF_UNSPEC;
1507 }
1508
1509 }
1510 }
1511
1512 /*
1513 * iser_ib_conv_ibtaddr2sockaddr
1514 * This function converts an IBT ip address handle to a sockaddr
1515 */
iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage * ss,ibt_ip_addr_t * ibt_addr,in_port_t port)1516 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss,
1517 ibt_ip_addr_t *ibt_addr, in_port_t port)
1518 {
1519 struct sockaddr_in *sin;
1520 struct sockaddr_in6 *sin6;
1521
1522 switch (ibt_addr->family) {
1523 case AF_INET:
1524 case AF_UNSPEC:
1525
1526 sin = (struct sockaddr_in *)ibt_addr;
1527 sin->sin_port = ntohs(port);
1528 bcopy(sin, ss, sizeof (struct sockaddr_in));
1529 break;
1530
1531 case AF_INET6:
1532
1533 sin6 = (struct sockaddr_in6 *)ibt_addr;
1534 sin6->sin6_port = ntohs(port);
1535 bcopy(sin6, ss, sizeof (struct sockaddr_in6));
1536 break;
1537
1538 default:
1539 ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: "
1540 "unknown family type: 0x%x", ibt_addr->family);
1541 }
1542 }
1543
1544 /*
1545 * iser_ib_setup_cq
1546 * This function sets up the Completion Queue size and allocates the specified
1547 * Completion Queue
1548 */
1549 static int
iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl,uint_t cq_size,ibt_cq_hdl_t * cq_hdl)1550 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl)
1551 {
1552
1553 ibt_cq_attr_t cq_attr;
1554 int status;
1555
1556 cq_attr.cq_size = cq_size;
1557 cq_attr.cq_sched = 0;
1558 cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
1559
1560 /* Allocate a Completion Queue */
1561 status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL);
1562 if (status != IBT_SUCCESS) {
1563 ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)",
1564 status);
1565 return (status);
1566 }
1567
1568 return (ISER_STATUS_SUCCESS);
1569 }
1570
1571 /*
1572 * iser_ib_setup_chanargs
1573 *
1574 */
1575 static void
iser_ib_setup_chanargs(uint8_t hca_port,ibt_cq_hdl_t scq_hdl,ibt_cq_hdl_t rcq_hdl,uint_t sq_size,uint_t rq_size,ibt_pd_hdl_t hca_pdhdl,ibt_rc_chan_alloc_args_t * cargs)1576 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
1577 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
1578 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs)
1579 {
1580
1581 bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t));
1582
1583 /*
1584 * Set up the size of the channels send queue, receive queue and the
1585 * maximum number of elements in a scatter gather list of work requests
1586 * posted to the send and receive queues.
1587 */
1588 cargs->rc_sizes.cs_sq = sq_size;
1589 cargs->rc_sizes.cs_rq = rq_size;
1590 cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE;
1591 cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE;
1592
1593 /*
1594 * All Work requests signaled on a WR basis will receive a send
1595 * request completion.
1596 */
1597 cargs->rc_flags = IBT_ALL_SIGNALED;
1598
1599 /* Enable RDMA read and RDMA write on the channel end points */
1600 cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1601
1602 /* Set the local hca port on which the channel is allocated */
1603 cargs->rc_hca_port_num = hca_port;
1604
1605 /* Set the Send and Receive Completion Queue handles */
1606 cargs->rc_scq = scq_hdl;
1607 cargs->rc_rcq = rcq_hdl;
1608
1609 /* Set the protection domain associated with the channel */
1610 cargs->rc_pd = hca_pdhdl;
1611
1612 /* No SRQ usage */
1613 cargs->rc_srq = NULL;
1614 }
1615
1616 /*
1617 * iser_ib_init_qp
1618 * Initialize the QP handle
1619 */
1620 void
iser_ib_init_qp(iser_chan_t * chan,uint_t sq_size,uint_t rq_size)1621 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size)
1622 {
1623 /* Initialize the handle lock */
1624 mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL);
1625
1626 /* Record queue sizes */
1627 chan->ic_qp.sq_size = sq_size;
1628 chan->ic_qp.rq_size = rq_size;
1629
1630 /* Initialize the RQ monitoring data */
1631 chan->ic_qp.rq_depth = rq_size;
1632 chan->ic_qp.rq_level = 0;
1633 chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100;
1634
1635 /* Initialize the taskq flag */
1636 chan->ic_qp.rq_taskqpending = B_FALSE;
1637 }
1638
1639 /*
1640 * iser_ib_fini_qp
1641 * Teardown the QP handle
1642 */
1643 void
iser_ib_fini_qp(iser_qp_t * qp)1644 iser_ib_fini_qp(iser_qp_t *qp)
1645 {
1646 /* Destroy the handle lock */
1647 mutex_destroy(&qp->qp_lock);
1648 }
1649
1650 static int
iser_ib_activate_port(idm_svc_t * idm_svc,ib_guid_t guid,ib_gid_t gid)1651 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid)
1652 {
1653 iser_svc_t *iser_svc;
1654 iser_sbind_t *is_sbind;
1655 int status;
1656
1657 iser_svc = idm_svc->is_iser_svc;
1658
1659 /*
1660 * Save the address of the service bind handle in the
1661 * iser_svc_t to undo the service binding at a later time
1662 */
1663 is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP);
1664 is_sbind->is_gid = gid;
1665 is_sbind->is_guid = guid;
1666
1667 status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL,
1668 idm_svc, &is_sbind->is_sbindhdl);
1669
1670 if (status != IBT_SUCCESS) {
1671 ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): "
1672 "Bind service(%llx) on port(%llx:%llx) failed",
1673 status, (longlong_t)iser_svc->is_svcid,
1674 (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);
1675
1676 kmem_free(is_sbind, sizeof (iser_sbind_t));
1677
1678 return (status);
1679 }
1680
1681 list_insert_tail(&iser_svc->is_sbindlist, is_sbind);
1682
1683 return (IBT_SUCCESS);
1684 }
1685
1686 static void
iser_ib_deactivate_port(ib_guid_t hca_guid,ib_gid_t gid)1687 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid)
1688 {
1689 iser_svc_t *iser_svc;
1690 iser_conn_t *iser_conn;
1691 iser_sbind_t *is_sbind;
1692 idm_conn_t *idm_conn;
1693
1694 /*
1695 * Iterate through the global list of IDM target connections.
1696 * Issue a TRANSPORT_FAIL for any connections on this port, and
1697 * if there is a bound service running on the port, tear it down.
1698 */
1699 mutex_enter(&idm.idm_global_mutex);
1700 for (idm_conn = list_head(&idm.idm_tgt_conn_list);
1701 idm_conn != NULL;
1702 idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) {
1703
1704 if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) {
1705 /* this is not an iSER connection, skip it */
1706 continue;
1707 }
1708
1709 iser_conn = idm_conn->ic_transport_private;
1710 if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) {
1711 /* this iSER connection is on a different port */
1712 continue;
1713 }
1714
1715 /* Fail the transport for this connection */
1716 idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);
1717
1718 if (idm_conn->ic_conn_type == CONN_TYPE_INI) {
1719 /* initiator connection, nothing else to do */
1720 continue;
1721 }
1722
1723 /* Check for a service binding */
1724 iser_svc = idm_conn->ic_svc_binding->is_iser_svc;
1725 is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid);
1726 if (is_sbind != NULL) {
1727 /* This service is still bound, tear it down */
1728 (void) ibt_unbind_service(iser_svc->is_srvhdl,
1729 is_sbind->is_sbindhdl);
1730 list_remove(&iser_svc->is_sbindlist, is_sbind);
1731 kmem_free(is_sbind, sizeof (iser_sbind_t));
1732 }
1733 }
1734 mutex_exit(&idm.idm_global_mutex);
1735 }
1736
1737 static iser_sbind_t *
iser_ib_get_bind(iser_svc_t * iser_svc,ib_guid_t hca_guid,ib_gid_t gid)1738 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid)
1739 {
1740 iser_sbind_t *is_sbind;
1741
1742 for (is_sbind = list_head(&iser_svc->is_sbindlist);
1743 is_sbind != NULL;
1744 is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) {
1745
1746 if ((is_sbind->is_guid == hca_guid) &&
1747 (is_sbind->is_gid.gid_prefix == gid.gid_prefix) &&
1748 (is_sbind->is_gid.gid_guid == gid.gid_guid)) {
1749 return (is_sbind);
1750 }
1751 }
1752 return (NULL);
1753 }
1754