1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /* Solaris Open Fabric kernel verbs */
26
27 #include <sys/types.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/modctl.h>
31 #include <sys/ib/clients/of/rdma/ib_verbs.h>
32 #include <sys/ib/clients/of/rdma/ib_addr.h>
33 #include <sys/ib/clients/of/rdma/rdma_cm.h>
34 #include <sys/ib/clients/of/sol_ofs/sol_kverb_impl.h>
35
36 static void *statep;
37 char *sol_kverbs_dbg_str = "sol_kverbs";
38
39 static llist_head_t client_list = LLIST_HEAD_INIT(client_list);
40 kmutex_t clist_lock; /* mutex for client_list */
41
42 static void ofs_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
43 ibt_async_event_t *);
44
45 /*
46 * set ibt_client_t members. clnt->ib_client must be set before
47 * this func is called.
48 */
49 static int
alloc_ibt_client(ofs_client_t * clnt)50 alloc_ibt_client(ofs_client_t *clnt)
51 {
52 int namelen;
53 ASSERT(clnt->ib_client != NULL);
54
55 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
56 "alloc_ibt_client: client: 0x%p", clnt);
57
58 /*
59 * double-check the name string. if it's longer than MAXNAMELEN
60 * including the string terminator, assuming the name is invalid,
61 * return EINVAL.
62 */
63 namelen = strlen(clnt->ib_client->name);
64 if (namelen >= MAXNAMELEN) {
65 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
66 "alloc_ibt_client: client: 0x%p => "
67 "namelen(%d) is larger than MAXNAMELEN", clnt, namelen);
68 return (-EINVAL);
69 }
70 clnt->ibt_client.mi_clnt_name = kmem_zalloc(namelen + 1, KM_NOSLEEP);
71 if (clnt->ibt_client.mi_clnt_name == NULL) {
72 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
73 "alloc_ibt_client: client: 0x%p => "
74 "no sufficient memory", clnt);
75 return (-ENOMEM);
76 }
77 bcopy(clnt->ib_client->name, clnt->ibt_client.mi_clnt_name, namelen);
78 clnt->ibt_client.mi_ibt_version = IBTI_V_CURR;
79 if (clnt->ib_client->dip) {
80 clnt->ibt_client.mi_clnt_class = IBT_GENERIC;
81 } else {
82 clnt->ibt_client.mi_clnt_class = IBT_GENERIC_MISC;
83 }
84 clnt->ibt_client.mi_async_handler = ofs_async_handler;
85
86 return (0);
87 }
88
89 static void
free_ibt_client(ofs_client_t * clnt)90 free_ibt_client(ofs_client_t *clnt)
91 {
92 int namelen = strlen(clnt->ib_client->name);
93 ASSERT(namelen < MAXNAMELEN);
94
95 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
96 "free_ibt_client: client: 0x%p", clnt);
97
98 kmem_free(clnt->ibt_client.mi_clnt_name, namelen + 1);
99 clnt->ibt_client.mi_clnt_name = NULL;
100 }
101
102 /*
103 * get_device() returns a pointer to struct ib_devcie with
104 * the same guid as one passed to the function.
105 */
106 static ib_device_t *
get_device(ofs_client_t * ofs_client,ib_guid_t guid)107 get_device(ofs_client_t *ofs_client, ib_guid_t guid)
108 {
109 ib_device_t *device;
110 llist_head_t *entry;
111
112 ASSERT(RW_LOCK_HELD(&ofs_client->lock));
113
114 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
115 "get_device: client: 0x%p, guid:0x%p",
116 ofs_client, (void *)(uintptr_t)htonll(guid));
117
118 list_for_each(entry, &ofs_client->device_list) {
119 device = entry->ptr;
120 if (device->node_guid == htonll(guid)) {
121 ASSERT(device->reg_state == IB_DEV_CLOSE);
122 ASSERT(device->node_type == RDMA_NODE_IB_CA);
123 ASSERT(device->clnt_hdl == (ofs_client_p_t)ofs_client);
124 return (device);
125 }
126 }
127
128 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
129 "get_device: client: 0x%p, guid:0x%p => no match guid",
130 ofs_client, (void *)(uintptr_t)htonll(guid));
131
132 return (NULL);
133 }
134
135 /*
136 * ofs_async_handler() is a delegated function to handle asynchrnonous events,
137 * which dispatches each event to corresponding qp/cq handlers registered
138 * with ib_create_qp() and/or ib_create_cq().
139 */
140 static void
ofs_async_handler(void * clntp,ibt_hca_hdl_t hdl,ibt_async_code_t code,ibt_async_event_t * event)141 ofs_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
142 ibt_async_event_t *event)
143 {
144 ofs_client_t *ofs_client = (ofs_client_t *)clntp;
145 struct ib_event ib_event;
146 struct ib_qp *qpp;
147 struct ib_cq *cqp;
148
149
150 ASSERT(ofs_client != NULL);
151
152 cqp = event->ev_cq_hdl ? ibt_get_cq_private(event->ev_cq_hdl) : NULL;
153 qpp = event->ev_chan_hdl ?
154 ibt_get_qp_private(event->ev_chan_hdl) : NULL;
155
156 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
157 "ofs_async_handler: client: 0x%p, hca_hdl: 0x%p, code:0x%x, "
158 "event->qp: 0x%p, event->cq: 0x%p, event->srq: 0x%p "
159 "event->guid: 0x%p, event->port: 0x%x",
160 clntp, hdl, code, qpp, cqp, event->ev_srq_hdl,
161 (void *)(uintptr_t)event->ev_hca_guid, event->ev_port);
162
163 bzero(&ib_event, sizeof (struct ib_event));
164 switch (code) {
165 case IBT_EVENT_PATH_MIGRATED:
166 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
167 IB_EVENT_PATH_MIG);
168 return;
169 case IBT_EVENT_SQD:
170 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
171 IB_EVENT_SQ_DRAINED);
172 return;
173 case IBT_EVENT_COM_EST:
174 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
175 IB_EVENT_COMM_EST);
176 return;
177 case IBT_ERROR_CATASTROPHIC_CHAN:
178 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
179 IB_EVENT_QP_FATAL);
180 return;
181 case IBT_ERROR_INVALID_REQUEST_CHAN:
182 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
183 IB_EVENT_QP_REQ_ERR);
184 return;
185 case IBT_ERROR_ACCESS_VIOLATION_CHAN:
186 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
187 IB_EVENT_QP_ACCESS_ERR);
188 return;
189 case IBT_ERROR_PATH_MIGRATE_REQ:
190 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
191 IB_EVENT_PATH_MIG);
192 return;
193 case IBT_EVENT_EMPTY_CHAN:
194 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
195 IB_EVENT_QP_LAST_WQE_REACHED);
196 return;
197 case IBT_ERROR_CQ:
198 FIRE_CQ_EVENT(ofs_client, hdl, ib_event, cqp,
199 IB_EVENT_CQ_ERR);
200 return;
201 case IBT_HCA_ATTACH_EVENT:
202 {
203 ib_device_t *device;
204 int rtn;
205
206 /* re-use the device once it was created */
207 rw_enter(&ofs_client->lock, RW_WRITER);
208 device = get_device(ofs_client, event->ev_hca_guid);
209 if (device == NULL) {
210 device = kmem_alloc(sizeof (ib_device_t), KM_SLEEP);
211 device->node_type = RDMA_NODE_IB_CA;
212 device->reg_state = IB_DEV_CLOSE;
213 device->clnt_hdl = (ofs_client_p_t)ofs_client;
214 device->node_guid = htonll(event->ev_hca_guid);
215 device->data = NULL;
216 /* add this HCA */
217 ofs_client->hca_num++;
218 llist_head_init(&device->list, device);
219 llist_add_tail(&device->list, &ofs_client->device_list);
220 }
221 device->hca_hdl = NULL;
222 device->local_dma_lkey = 0;
223 device->phys_port_cnt = 0;
224
225 /* open this HCA */
226 rtn = ibt_open_hca(ofs_client->ibt_hdl, event->ev_hca_guid,
227 &device->hca_hdl);
228 if (rtn == IBT_SUCCESS) {
229 ibt_hca_attr_t hattr;
230
231 ofs_client->hca_open_num++;
232 device->reg_state = IB_DEV_OPEN;
233 ibt_set_hca_private(device->hca_hdl, device);
234
235 rtn = ibt_query_hca(device->hca_hdl, &hattr);
236 if (rtn != IBT_SUCCESS) {
237 device->reg_state = IB_DEV_CLOSE;
238 rtn = ibt_close_hca(device->hca_hdl);
239 ASSERT(rtn == IBT_SUCCESS);
240 ofs_client->hca_open_num--;
241 return;
242 }
243
244 (void) sprintf(device->name, "%x:%x:%x",
245 hattr.hca_vendor_id, hattr.hca_device_id,
246 hattr.hca_version_id);
247 device->local_dma_lkey = hattr.hca_reserved_lkey;
248 device->phys_port_cnt = hattr.hca_nports;
249 ibt_set_hca_private(device->hca_hdl, device);
250
251 /* invoke client's callback */
252 if (ofs_client->ib_client->add) {
253 ofs_client->ib_client->add(device);
254 }
255 }
256 rw_exit(&ofs_client->lock);
257
258 return;
259 }
260 case IBT_HCA_DETACH_EVENT:
261 {
262 struct ib_device *device;
263
264 rw_enter(&ofs_client->lock, RW_WRITER);
265 device = ibt_get_hca_private(hdl);
266 if (device->reg_state == IB_DEV_OPEN) {
267 ibt_status_t rtn;
268 /* invoke client's callback */
269 if (ofs_client->ib_client->remove) {
270 ofs_client->ib_client->remove(device);
271 }
272 /* change the state only */
273 device->reg_state = IB_DEV_CLOSE;
274 /* close this HCA */
275 rtn = ibt_close_hca(device->hca_hdl);
276 ASSERT(rtn == IBT_SUCCESS);
277 ofs_client->hca_open_num--;
278 }
279 rw_exit(&ofs_client->lock);
280
281 return;
282 }
283 case IBT_EVENT_LIMIT_REACHED_SRQ:
284 case IBT_ERROR_CATASTROPHIC_SRQ:
285 default:
286 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
287 "sol_ofs does not support this event(0x%x).\n"
288 "\t clntp=0x%p, hca_hdl=0x%p, code=%d, eventp=0x%p\n",
289 code, clntp, hdl, code, event);
290 return;
291 }
292 }
293
294 /*
295 * ib_register_client - Register an IB client
296 * @client:Client to register
297 *
298 * Upper level users of the IB drivers can use ib_register_client() to
299 * register callbacks for IB device addition and removal. When an IB
300 * device is added, each registered client's add method will be called
301 * (in the order the clients were registered), and when a device is
302 * removed, each client's remove method will be called (in the reverse
303 * order that clients were registered). In addition, when
304 * ib_register_client() is called, the client will receive an add
305 * callback for all devices already registered.
306 *
307 * Note that struct ib_client should have a dip pointer to the client,
308 * which is different from the Linux implementation.
309 */
310 int
ib_register_client(struct ib_client * client)311 ib_register_client(struct ib_client *client)
312 {
313 uint_t i, nhcas; /* number of HCAs */
314 ib_guid_t *guidp;
315 ofs_client_t *ofs_client;
316 llist_head_t *entry, *tmp;
317 ib_device_t *device;
318 int rtn;
319
320 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
321 "ib_register_client: client: 0x%p", client);
322
323 /* get the number of HCAs on this system */
324 if ((nhcas = ibt_get_hca_list(&guidp)) == 0) {
325 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
326 "ib_register_client: client: 0x%p => no HCA", client);
327 return (-ENXIO);
328 }
329
330 /* allocate a new sol_ofs_client structure */
331 ofs_client = kmem_zalloc(sizeof (ofs_client_t), KM_NOSLEEP);
332 if (ofs_client == NULL) {
333 (void) ibt_free_hca_list(guidp, nhcas);
334 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
335 "ib_register_client: client: 0x%p => "
336 "no sufficient memory for ofs_client", client);
337 return (-ENOMEM);
338 }
339
340 /* set members */
341 ofs_client->ib_client = client;
342 if ((rtn = alloc_ibt_client(ofs_client)) != 0) {
343 kmem_free(ofs_client, sizeof (ofs_client_t));
344 (void) ibt_free_hca_list(guidp, nhcas);
345 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
346 "ib_register_client: client: 0x%p => "
347 "alloc_ibt_client failed w/ 0x%x", client, rtn);
348 return (rtn);
349 }
350 ofs_client->state = IB_OFS_CLNT_INITIALIZED;
351 llist_head_init(&ofs_client->device_list, NULL);
352 llist_head_init(&ofs_client->client_list, ofs_client);
353 rw_init(&ofs_client->lock, NULL, RW_DEFAULT, NULL);
354
355 /* initialize IB client */
356 rw_enter(&ofs_client->lock, RW_WRITER);
357 if (client->state != IB_CLNT_UNINITIALIZED) {
358 rw_exit(&ofs_client->lock);
359 kmem_free(ofs_client, sizeof (ofs_client_t));
360 (void) ibt_free_hca_list(guidp, nhcas);
361 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
362 "ib_register_client: client: 0x%p => "
363 "invalid client state(%d)", client, client->state);
364 return (-EPERM);
365 }
366
367 /* attach this client to IBTF */
368 rtn = ibt_attach(&ofs_client->ibt_client, client->dip, ofs_client,
369 &ofs_client->ibt_hdl);
370 if (rtn != IBT_SUCCESS) {
371 rw_exit(&ofs_client->lock);
372 free_ibt_client(ofs_client);
373 kmem_free(ofs_client, sizeof (ofs_client_t));
374 (void) ibt_free_hca_list(guidp, nhcas);
375 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
376 "ib_register_client: client: 0x%p => "
377 "ibt_attach failed w/ 0x%x", client, rtn);
378 return (-EINVAL);
379 }
380 client->clnt_hdl = (ofs_client_p_t)ofs_client;
381 client->state = IB_CLNT_INITIALIZED;
382
383 /* link this client */
384 mutex_enter(&clist_lock);
385 llist_add_tail(&ofs_client->client_list, &client_list);
386 mutex_exit(&clist_lock);
387
388 /* Open HCAs */
389 ofs_client->hca_num = nhcas;
390 for (i = 0; i < ofs_client->hca_num; i++) {
391 /* allocate the ib_device structure */
392 device = kmem_zalloc(sizeof (ib_device_t), KM_NOSLEEP);
393 if (device == NULL) {
394 rtn = -ENOMEM;
395 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
396 "ib_register_client: client: 0x%p => "
397 "no sufficient memory for ib_device", client);
398 goto err;
399 }
400 device->node_guid = htonll(guidp[i]);
401 device->node_type = RDMA_NODE_IB_CA;
402 device->reg_state = IB_DEV_CLOSE;
403 device->clnt_hdl = (ofs_client_p_t)ofs_client;
404 llist_head_init(&device->list, device);
405 llist_add_tail(&device->list, &ofs_client->device_list);
406
407 rtn = ibt_open_hca(ofs_client->ibt_hdl, guidp[i],
408 &device->hca_hdl);
409 if (rtn == IBT_SUCCESS) {
410 ibt_hca_attr_t hattr;
411
412 ofs_client->hca_open_num++;
413 device->reg_state = IB_DEV_OPEN;
414
415 rtn = ibt_query_hca(device->hca_hdl, &hattr);
416 if (rtn != IBT_SUCCESS) {
417 rtn = -EIO;
418 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
419 "ib_register_client: client: 0x%p,"
420 "hca_hdl: 0x%p ==> "
421 "ibt_query_hca() failed w/ %d",
422 client, device->hca_hdl, rtn);
423 goto err;
424 }
425
426 (void) sprintf(device->name, "%x:%x:%x",
427 hattr.hca_vendor_id, hattr.hca_device_id,
428 hattr.hca_version_id);
429 device->local_dma_lkey = hattr.hca_reserved_lkey;
430 device->phys_port_cnt = hattr.hca_nports;
431 ibt_set_hca_private(device->hca_hdl, device);
432
433 /* invoke client's callback */
434 if (client->add) {
435 client->add(device);
436 }
437 }
438 }
439 if (ofs_client->hca_open_num == 0) {
440 rtn = -ENXIO;
441 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
442 "ib_register_client: client: 0x%p => "
443 "no available HCA", client);
444 goto err;
445 }
446 rw_exit(&ofs_client->lock);
447
448 (void) ibt_free_hca_list(guidp, nhcas);
449 return (0);
450
451 err:
452 /* first close all open HCAs */
453 list_for_each(entry, &ofs_client->device_list) {
454 device = entry->ptr;
455 /*
456 * If it's open already, close it after the remove
457 * callback.
458 */
459 if (device->reg_state == IB_DEV_OPEN) {
460 ibt_status_t rtn;
461 /* invoke client's callback */
462 if (client->remove) {
463 client->remove(device);
464 }
465 device->reg_state = IB_DEV_CLOSE;
466 rtn = ibt_close_hca(device->hca_hdl);
467 ASSERT(rtn == IBT_SUCCESS);
468 ofs_client->hca_open_num--;
469 }
470 }
471 ASSERT(ofs_client->hca_open_num == 0);
472
473 /* then free the devices */
474 list_for_each_safe(entry, tmp, &ofs_client->device_list) {
475 device = entry->ptr;
476 /* de-link and free the device */
477 llist_del(entry);
478 kmem_free(device, sizeof (ib_device_t));
479 ofs_client->hca_num--;
480 }
481 ASSERT(ofs_client->hca_num == 0);
482
483 /* delink this client */
484 mutex_enter(&clist_lock);
485 llist_del(&ofs_client->client_list);
486 mutex_exit(&clist_lock);
487
488 /* detach the client */
489 client->clnt_hdl = NULL;
490 client->state = IB_CLNT_UNINITIALIZED;
491 (void) ibt_detach(ofs_client->ibt_hdl);
492 rw_exit(&ofs_client->lock);
493
494 /* free sol_ofs_client */
495 free_ibt_client(ofs_client);
496 kmem_free(ofs_client, sizeof (ofs_client_t));
497
498 (void) ibt_free_hca_list(guidp, nhcas);
499 return (rtn);
500 }
501
502 /*
503 * ib_unregister_client - Unregister an IB client
504 * @client:Client to unregister
505 *
506 * Upper level users use ib_unregister_client() to remove their client
507 * registration. When ib_unregister_client() is called, the client
508 * will receive a remove callback for each IB device still registered.
509 */
510 void
ib_unregister_client(struct ib_client * client)511 ib_unregister_client(struct ib_client *client)
512 {
513 ofs_client_t *ofs_client;
514 ib_device_t *device;
515 llist_head_t *entry, *tmp;
516
517 ASSERT(client->state == IB_CLNT_INITIALIZED &&
518 client->clnt_hdl != NULL);
519
520 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
521 "ib_unregister_client: client: 0x%p", client);
522
523 ofs_client = (ofs_client_t *)client->clnt_hdl;
524 rw_enter(&ofs_client->lock, RW_WRITER);
525
526 /* first close all open HCAs */
527 list_for_each(entry, &ofs_client->device_list) {
528 device = entry->ptr;
529 /*
530 * If it's open already, close it after the remove
531 * callback.
532 */
533 if (device->reg_state == IB_DEV_OPEN) {
534 ibt_status_t rtn;
535 /* invoke client's callback */
536 if (client->remove) {
537 client->remove(device);
538 }
539 device->reg_state = IB_DEV_CLOSE;
540 rtn = ibt_close_hca(device->hca_hdl);
541 if (rtn != IBT_SUCCESS)
542 SOL_OFS_DPRINTF_L3(
543 sol_kverbs_dbg_str,
544 "ib_unregister_client(%p) - "
545 "ibt_close_hca failed %d",
546 client, rtn);
547
548 ofs_client->hca_open_num--;
549 }
550 }
551 ASSERT(ofs_client->hca_open_num == 0);
552
553 /* then free the devices */
554 list_for_each_safe(entry, tmp, &ofs_client->device_list) {
555 device = entry->ptr;
556 /* de-link and free the device */
557 llist_del(entry);
558 kmem_free(device, sizeof (ib_device_t));
559 ofs_client->hca_num--;
560 }
561 ASSERT(ofs_client->hca_num == 0);
562
563 /* delink this client */
564 mutex_enter(&clist_lock);
565 llist_del(&ofs_client->client_list);
566 mutex_exit(&clist_lock);
567
568 /* detach the client */
569 client->clnt_hdl = NULL;
570 client->state = IB_CLNT_UNINITIALIZED;
571 (void) ibt_detach(ofs_client->ibt_hdl);
572 rw_exit(&ofs_client->lock);
573
574 /* free sol_ofs_client */
575 free_ibt_client(ofs_client);
576 kmem_free(ofs_client, sizeof (ofs_client_t));
577 }
578
579 /*
580 * ofs_lock_enter() and ofs_lock_exit() are used to avoid the recursive
581 * rwlock while the client callbacks are invoked.
582 *
583 * Note that the writer lock is used only in the client callback case,
584 * so that the kverb functions wanting to acquire the reader lock can
585 * safely ignore the reader lock if the writer lock is already held.
586 * The writer lock shouldn't be used in no other plances.
587 */
588 static inline void
ofs_lock_enter(krwlock_t * lock)589 ofs_lock_enter(krwlock_t *lock)
590 {
591 if (!RW_WRITE_HELD(lock)) {
592 rw_enter(lock, RW_READER);
593 }
594 }
595
596 static inline void
ofs_lock_exit(krwlock_t * lock)597 ofs_lock_exit(krwlock_t *lock)
598 {
599 if (!RW_WRITE_HELD(lock)) {
600 rw_exit(lock);
601 }
602 }
603
604 /*
605 * ib_get_client_data - Get IB client context
606 * @device:Device to get context for
607 * @client:Client to get context for
608 *
609 * ib_get_client_data() returns client context set with
610 * ib_set_client_data() and returns NULL if it's not found.
611 */
ib_get_client_data(struct ib_device * device,struct ib_client * client)612 void *ib_get_client_data(struct ib_device *device,
613 struct ib_client *client)
614 {
615 ofs_client_t *ofs_client;
616 struct ib_device *ib_device;
617 boolean_t found = B_FALSE;
618 llist_head_t *entry;
619 void *data;
620
621 ASSERT(device != 0 && client != 0);
622
623 ofs_client = (ofs_client_t *)client->clnt_hdl;
624 if (ofs_client == 0) {
625 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
626 "ib_get_client_data: device: 0x%p, client: 0x%p => "
627 "no ofs_client", device, client);
628 return (NULL);
629 }
630
631 ofs_lock_enter(&ofs_client->lock);
632 list_for_each(entry, &ofs_client->device_list) {
633 ib_device = entry->ptr;
634 if (ib_device->node_guid == device->node_guid) {
635 found = B_TRUE;
636 break;
637 }
638 }
639 if (!found) {
640 ofs_lock_exit(&ofs_client->lock);
641 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
642 "ib_get_client_data: device: 0x%p, client: 0x%p => "
643 "no ib_device found", device, client);
644 return (NULL);
645 }
646 data = ib_device->data;
647 ofs_lock_exit(&ofs_client->lock);
648
649 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
650 "ib_get_client_data: device: 0x%p, client: 0x%p",
651 device, client);
652
653 return (data);
654 }
655
656 /*
657 * ib_set_client_data - Set IB client context
658 * @device:Device to set context for
659 * @client:Client to set context for
660 * @data:Context to set
661 *
662 * ib_set_client_data() sets client context that can be retrieved with
663 * ib_get_client_data(). If the specified device is not found, the function
664 * returns w/o any operations.
665 */
ib_set_client_data(struct ib_device * device,struct ib_client * client,void * data)666 void ib_set_client_data(struct ib_device *device, struct ib_client *client,
667 void *data)
668 {
669 ofs_client_t *ofs_client;
670 struct ib_device *ib_device;
671 boolean_t found = B_FALSE;
672 llist_head_t *entry;
673
674 ASSERT(device != 0 && client != 0);
675
676 ofs_client = (ofs_client_t *)client->clnt_hdl;
677 if (ofs_client == 0) {
678 cmn_err(CE_WARN, "No client context found for %s/%s\n",
679 device->name, client->name);
680 return;
681 }
682
683 ofs_lock_enter(&ofs_client->lock);
684 list_for_each(entry, &ofs_client->device_list) {
685 ib_device = entry->ptr;
686 if (ib_device->node_guid == device->node_guid) {
687 found = B_TRUE;
688 break;
689 }
690 }
691 if (!found) {
692 cmn_err(CE_WARN, "No client context found for %s/%s\n",
693 device->name, client->name);
694 ofs_lock_exit(&ofs_client->lock);
695 return;
696 }
697 ib_device->data = data;
698 ofs_lock_exit(&ofs_client->lock);
699
700 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
701 "ib_set_client_data: device: 0x%p, client: 0x%p, "
702 "data: 0x%p", device, client, data);
703 }
704
705 /*
706 * ib_query_device - Query IB device attributes
707 * @device:Device to query
708 * @device_attr:Device attributes
709 *
710 * ib_query_device() returns the attributes of a device through the
711 * @device_attr pointer.
712 */
713 int
ib_query_device(struct ib_device * device,struct ib_device_attr * attr)714 ib_query_device(struct ib_device *device, struct ib_device_attr *attr)
715 {
716 ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl;
717 ibt_hca_attr_t hattr;
718 int rtn;
719
720 ofs_lock_enter(&ofs_client->lock);
721 if (device->reg_state != IB_DEV_OPEN) {
722 ofs_lock_exit(&ofs_client->lock);
723 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
724 "ib_query_device: device: 0x%p => "
725 "invalid device state (%d)", device, device->reg_state);
726 return (-ENXIO);
727 }
728 if ((rtn = ibt_query_hca(device->hca_hdl, &hattr)) != IBT_SUCCESS) {
729 ofs_lock_exit(&ofs_client->lock);
730 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
731 "ib_query_device: device: 0x%p => "
732 "ibt_query_hca failed w/ 0x%x", device, rtn);
733 return (-EIO);
734 }
735 ofs_lock_exit(&ofs_client->lock);
736
737 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
738 "ib_query_device: device: 0x%p, attr: 0x%p, rtn: 0x%p",
739 device, attr, rtn);
740
741 /* OF order is major.micro.minor, so keep it here */
742 attr->fw_ver = (uint64_t)hattr.hca_fw_major_version << 32 |
743 hattr.hca_fw_micro_version << 16 & 0xFFFF0000 |
744 hattr.hca_fw_minor_version & 0xFFFF;
745
746 attr->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
747 IB_DEVICE_PORT_ACTIVE_EVENT |
748 IB_DEVICE_SYS_IMAGE_GUID |
749 IB_DEVICE_RC_RNR_NAK_GEN;
750 if (hattr.hca_flags & IBT_HCA_PKEY_CNTR) {
751 attr->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
752 }
753 if (hattr.hca_flags & IBT_HCA_QKEY_CNTR) {
754 attr->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
755 }
756 if (hattr.hca_flags & IBT_HCA_AUTO_PATH_MIG) {
757 attr->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
758 }
759 if (hattr.hca_flags & IBT_HCA_AH_PORT_CHECK) {
760 attr->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
761 }
762
763 attr->vendor_id = hattr.hca_vendor_id;
764 attr->vendor_part_id = hattr.hca_device_id;
765 attr->hw_ver = hattr.hca_version_id;
766 attr->sys_image_guid = htonll(hattr.hca_si_guid);
767 attr->max_mr_size = ~0ull;
768 attr->page_size_cap = IBTF2OF_PGSZ(hattr.hca_page_sz);
769 attr->max_qp = hattr.hca_max_qp;
770 attr->max_qp_wr = hattr.hca_max_qp_sz;
771 attr->max_sge = hattr.hca_max_sgl;
772 attr->max_sge_rd = hattr.hca_max_rd_sgl;
773 attr->max_cq = hattr.hca_max_cq;
774 attr->max_cqe = hattr.hca_max_cq_sz;
775 attr->max_mr = hattr.hca_max_memr;
776 attr->max_pd = hattr.hca_max_pd;
777 attr->max_qp_rd_atom = hattr.hca_max_rdma_in_qp;
778 attr->max_qp_init_rd_atom = hattr.hca_max_rdma_in_qp;
779 attr->max_ee_rd_atom = hattr.hca_max_rdma_in_ee;
780 attr->max_ee_init_rd_atom = hattr.hca_max_rdma_in_ee;
781 attr->max_res_rd_atom = hattr.hca_max_rsc;
782 attr->max_srq = hattr.hca_max_srqs;
783 attr->max_srq_wr = hattr.hca_max_srqs_sz -1;
784 attr->max_srq_sge = hattr.hca_max_srq_sgl;
785 attr->local_ca_ack_delay = hattr.hca_local_ack_delay;
786 attr->atomic_cap = hattr.hca_flags & IBT_HCA_ATOMICS_GLOBAL ?
787 IB_ATOMIC_GLOB : (hattr.hca_flags & IBT_HCA_ATOMICS_HCA ?
788 IB_ATOMIC_HCA : IB_ATOMIC_NONE);
789 attr->max_ee = hattr.hca_max_eec;
790 attr->max_rdd = hattr.hca_max_rdd;
791 attr->max_mw = hattr.hca_max_mem_win;
792 attr->max_pkeys = hattr.hca_max_port_pkey_tbl_sz;
793 attr->max_raw_ipv6_qp = hattr.hca_max_ipv6_qp;
794 attr->max_raw_ethy_qp = hattr.hca_max_ether_qp;
795 attr->max_mcast_grp = hattr.hca_max_mcg;
796 attr->max_mcast_qp_attach = hattr.hca_max_qp_per_mcg;
797 attr->max_total_mcast_qp_attach = hattr.hca_max_mcg_qps;
798 attr->max_ah = hattr.hca_max_ah;
799 attr->max_fmr = hattr.hca_max_fmrs;
800 attr->max_map_per_fmr = hattr.hca_opaque9; /* hca_max_map_per_fmr */
801
802 return (0);
803 }
804
805 /* Protection domains */
806 struct ib_pd *
ib_alloc_pd(struct ib_device * device)807 ib_alloc_pd(struct ib_device *device)
808 {
809 ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl;
810 struct ib_pd *pd;
811 int rtn;
812
813 if ((pd = kmem_alloc(sizeof (struct ib_pd), KM_NOSLEEP)) == NULL) {
814 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
815 "ib_alloc_pd: device: 0x%p => no sufficient memory",
816 device);
817 return ((struct ib_pd *)-ENOMEM);
818 }
819
820 ofs_lock_enter(&ofs_client->lock);
821 if (device->reg_state != IB_DEV_OPEN) {
822 ofs_lock_exit(&ofs_client->lock);
823 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
824 "ib_alloc_pd: device: 0x%p => invalid device state (%d)",
825 device, device->reg_state);
826 return ((struct ib_pd *)-ENXIO);
827 }
828
829 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
830 "ib_alloc_pd: device: 0x%p", device);
831
832 rtn = ibt_alloc_pd(device->hca_hdl, IBT_PD_NO_FLAGS, &pd->ibt_pd);
833 ofs_lock_exit(&ofs_client->lock);
834
835 if (rtn == IBT_SUCCESS) {
836 pd->device = device;
837 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
838 "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p, "
839 "rtn: 0x%x", device, pd, pd->ibt_pd, rtn);
840 return (pd);
841 }
842 kmem_free(pd, sizeof (struct ib_pd));
843
844 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
845 "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p => "
846 "ibt_alloc_pd failed w/ 0x%x", device, pd, pd->ibt_pd, rtn);
847
848 switch (rtn) {
849 case IBT_INSUFF_RESOURCE:
850 return ((struct ib_pd *)-ENOMEM);
851 case IBT_HCA_HDL_INVALID:
852 return ((struct ib_pd *)-EFAULT);
853 default:
854 return ((struct ib_pd *)-EIO);
855 }
856 }
857
858 int
ib_dealloc_pd(struct ib_pd * pd)859 ib_dealloc_pd(struct ib_pd *pd)
860 {
861 ofs_client_t *ofs_client = (ofs_client_t *)pd->device->clnt_hdl;
862 int rtn;
863
864 ofs_lock_enter(&ofs_client->lock);
865 if (pd->device->reg_state != IB_DEV_OPEN) {
866 ofs_lock_exit(&ofs_client->lock);
867 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
868 "ib_dealloc_pd: pd: 0x%p => invalid device state (%d)",
869 pd, pd->device->reg_state);
870 return (-ENXIO);
871 }
872
873 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
874 "ib_dealloc_pd: pd: 0x%p", pd);
875
876 rtn = ibt_free_pd(pd->device->hca_hdl, pd->ibt_pd);
877 ofs_lock_exit(&ofs_client->lock);
878
879 if (rtn == IBT_SUCCESS) {
880 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
881 "ib_dealloc_pd: pd: 0x%p, device: 0x%p, ibt_pd: 0x%p, "
882 "rtn: 0x%x", pd, pd->device, pd->ibt_pd, rtn);
883 kmem_free(pd, sizeof (struct ib_pd));
884 return (0);
885 }
886
887 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
888 "ib_dealloc_pd: pd: 0x%p => ibt_free_pd failed w/ 0x%x",
889 pd, rtn);
890
891 switch (rtn) {
892 case IBT_PD_IN_USE:
893 return (-EBUSY);
894 case IBT_HCA_HDL_INVALID:
895 return (-EFAULT);
896 default:
897 return (-EIO);
898 }
899 }
900
901 /*
902 * ofs_cq_handler() is a delegated function to handle CQ events,
903 * which dispatches them to corresponding cq handlers registered
904 * with ib_create_cq().
905 */
906 static void
ofs_cq_handler(ibt_cq_hdl_t ibt_cq,void * arg)907 ofs_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
908 {
909 struct ib_cq *cq = (struct ib_cq *)ibt_get_cq_private(ibt_cq);
910
911 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
912 "ofs_cq_handler: ibt_cq: 0x%p, ib_cq: 0x%p, comp_handler: 0x%p, "
913 "arg: 0x%p", ibt_cq, cq, cq->comp_handler, arg);
914
915 if (cq->comp_handler) {
916 cq->comp_handler(cq, cq->cq_context);
917 }
918 }
919
920 /*
921 * ib_create_cq - Creates a CQ on the specified device.
922 * @device: The device on which to create the CQ.
923 * @comp_handler: A user-specified callback that is invoked when a
924 * completion event occurs on the CQ.
925 * @event_handler: A user-specified callback that is invoked when an
926 * asynchronous event not associated with a completion occurs on the CQ.
927 * @cq_context: Context associated with the CQ returned to the user via
928 * the associated completion and event handlers.
929 * @cqe: The minimum size of the CQ.
930 * @comp_vector - Completion vector used to signal completion events.
931 * Must be >= 0 and < context->num_comp_vectors.
932 *
933 * Users can examine the cq structure to determine the actual CQ size.
934 *
935 * Note that comp_vector is not supported currently.
936 */
937 struct ib_cq *
ib_create_cq(struct ib_device * device,ib_comp_handler comp_handler,void (* event_handler)(struct ib_event *,void *),void * cq_context,int cqe,void * comp_vector)938 ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler,
939 void (*event_handler)(struct ib_event *, void *), void *cq_context,
940 int cqe, void *comp_vector)
941 {
942 ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl;
943 ibt_cq_attr_t cq_attr;
944 uint32_t real_size;
945 struct ib_cq *cq;
946 int rtn;
947
948 if ((cq = kmem_alloc(sizeof (struct ib_cq), KM_NOSLEEP)) == NULL) {
949 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
950 "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
951 "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
952 "comp_vector: %p => no sufficient memory", device,
953 comp_handler, event_handler, cq_context, cqe, comp_vector);
954 return ((struct ib_cq *)-ENOMEM);
955 }
956
957 ofs_lock_enter(&ofs_client->lock);
958 if (device->reg_state != IB_DEV_OPEN) {
959 ofs_lock_exit(&ofs_client->lock);
960 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
961 "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
962 "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
963 "comp_vector: %p => invalid device state (%d)", device,
964 comp_handler, event_handler, cq_context, cqe, comp_vector,
965 device->reg_state);
966 return ((struct ib_cq *)-ENXIO);
967 }
968
969 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
970 "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
971 "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
972 "comp_vector: %d", device, comp_handler, event_handler,
973 cq_context, cqe, comp_vector);
974
975 cq_attr.cq_size = cqe;
976 cq_attr.cq_sched = comp_vector;
977 cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
978 rtn = ibt_alloc_cq(device->hca_hdl, &cq_attr, &cq->ibt_cq, &real_size);
979 ofs_lock_exit(&ofs_client->lock);
980
981 if (rtn == IBT_SUCCESS) {
982 cq->device = device;
983 cq->comp_handler = comp_handler;
984 cq->event_handler = event_handler;
985 cq->cq_context = cq_context;
986 cq->cqe = real_size;
987 ibt_set_cq_private(cq->ibt_cq, cq);
988 ibt_set_cq_handler(cq->ibt_cq, ofs_cq_handler, cq_context);
989 mutex_init(&cq->lock, NULL, MUTEX_DEFAULT, NULL);
990 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
991 "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p, "
992 "rtn: 0x%x", device, cqe, cq->ibt_cq, rtn);
993 return (cq);
994 }
995 kmem_free(cq, sizeof (struct ib_cq));
996
997 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
998 "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p => "
999 "ibt_alloc_cq failed w/ 0x%x", device, cqe, cq->ibt_cq, rtn);
1000
1001 switch (rtn) {
1002 case IBT_HCA_CQ_EXCEEDED:
1003 case IBT_INVALID_PARAM:
1004 case IBT_HCA_HDL_INVALID:
1005 return ((struct ib_cq *)-EINVAL);
1006 case IBT_INSUFF_RESOURCE:
1007 return ((struct ib_cq *)-ENOMEM);
1008 default:
1009 return ((struct ib_cq *)-EIO);
1010 }
1011 }
1012
1013 int
ib_destroy_cq(struct ib_cq * cq)1014 ib_destroy_cq(struct ib_cq *cq)
1015 {
1016 ofs_client_t *ofs_client = (ofs_client_t *)cq->device->clnt_hdl;
1017 int rtn;
1018
1019 ofs_lock_enter(&ofs_client->lock);
1020 if (cq->device->reg_state != IB_DEV_OPEN) {
1021 ofs_lock_exit(&ofs_client->lock);
1022 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1023 "ib_destroy_cq: cq: 0x%p => invalid device state (%d)",
1024 cq, cq->device->reg_state);
1025 return (-ENXIO);
1026 }
1027
1028 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1029 "ib_destroy_cq: cq: 0x%p", cq);
1030
1031 /*
1032 * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed
1033 * at this moment, but yet alive for a while. Then
1034 * there is a possibility that this qp is used even after
1035 * ib_destroy_cq() is called. To distinguish this case from
1036 * others, clear ibt_qp here.
1037 */
1038 ibt_set_cq_private(cq->ibt_cq, NULL);
1039
1040 rtn = ibt_free_cq(cq->ibt_cq);
1041 if (rtn == IBT_SUCCESS) {
1042 ofs_lock_exit(&ofs_client->lock);
1043 kmem_free(cq, sizeof (struct ib_cq));
1044 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1045 "ib_destroy_cq: cq: 0x%p, rtn: 0x%x", cq, rtn);
1046 return (0);
1047 }
1048 ibt_set_cq_private(cq->ibt_cq, cq);
1049 ofs_lock_exit(&ofs_client->lock);
1050
1051 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1052 "ib_destroy_cq: cq: 0x%p => ibt_free_cq failed w/ 0x%x", cq, rtn);
1053
1054 switch (rtn) {
1055 case IBT_CQ_BUSY:
1056 return (-EBUSY);
1057 case IBT_HCA_HDL_INVALID:
1058 case IBT_CQ_HDL_INVALID:
1059 return (-EINVAL);
1060 default:
1061 return (-EIO);
1062 }
1063 }
1064
1065 struct ib_qp *
ib_create_qp(struct ib_pd * pd,struct ib_qp_init_attr * qp_init_attr)1066 ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr)
1067 {
1068 ofs_client_t *ofs_client = pd->device->clnt_hdl;
1069 ibt_qp_alloc_attr_t attrs;
1070 ibt_chan_sizes_t sizes;
1071 ib_qpn_t qpn;
1072 ibt_qp_hdl_t ibt_qp;
1073 struct ib_qp *qp;
1074 int rtn;
1075
1076 /* sanity check */
1077 if (!(qp_init_attr->send_cq && qp_init_attr->recv_cq)) {
1078 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1079 "ib_create_qp: pd: 0x%p => invalid cqs "
1080 "(send_cq=0x%p, recv_cq=0x%p)", pd,
1081 qp_init_attr->send_cq, qp_init_attr->recv_cq);
1082 return ((struct ib_qp *)-EINVAL);
1083 }
1084
1085 /* UC, Raw IPv6 and Raw Ethernet are not supported */
1086 if (qp_init_attr->qp_type == IB_QPT_UC ||
1087 qp_init_attr->qp_type == IB_QPT_RAW_IPV6 ||
1088 qp_init_attr->qp_type == IB_QPT_RAW_ETY) {
1089 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1090 "ib_create_qp: pd: 0x%p => invalid qp_type",
1091 pd, qp_init_attr->qp_type);
1092 return ((struct ib_qp *)-EINVAL);
1093 }
1094
1095 if ((qp = kmem_alloc(sizeof (struct ib_qp), KM_NOSLEEP)) == NULL) {
1096 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1097 "ib_create_qp: pd: 0x%p, init_attr: 0x%p => "
1098 "no sufficient memory", pd, qp_init_attr);
1099 return ((struct ib_qp *)-ENOMEM);
1100 }
1101
1102 ofs_lock_enter(&ofs_client->lock);
1103 if (pd->device->reg_state != IB_DEV_OPEN) {
1104 ofs_lock_exit(&ofs_client->lock);
1105 kmem_free(qp, sizeof (struct ib_qp));
1106 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1107 "ib_create_qp: pd: 0x%p, init_attr: 0x%p => "
1108 "invalid device state (%d)", pd, qp_init_attr,
1109 pd->device->reg_state);
1110 return ((struct ib_qp *)-ENXIO);
1111 }
1112
1113 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1114 "ib_create_qp: pd: 0x%p, event_handler: 0x%p, qp_context: 0x%p, "
1115 "send_cq: 0x%p, recv_cq: 0x%p, srq: 0x%p, max_send_wr: 0x%x, "
1116 "max_recv_wr: 0x%x, max_send_sge: 0x%x, max_recv_sge: 0x%x, "
1117 "max_inline_data: 0x%x, sq_sig_type: %d, qp_type: %d, "
1118 "port_num: %d",
1119 pd, qp_init_attr->event_handler, qp_init_attr->qp_context,
1120 qp_init_attr->send_cq, qp_init_attr->recv_cq, qp_init_attr->srq,
1121 qp_init_attr->cap.max_send_wr, qp_init_attr->cap.max_recv_wr,
1122 qp_init_attr->cap.max_send_sge, qp_init_attr->cap.max_recv_sge,
1123 qp_init_attr->cap.max_inline_data, qp_init_attr->sq_sig_type,
1124 qp_init_attr->qp_type, qp_init_attr->port_num);
1125
1126 attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1127 if (qp_init_attr->srq) {
1128 attrs.qp_alloc_flags |= IBT_QP_USES_SRQ;
1129 }
1130
1131 attrs.qp_flags = IBT_ALL_SIGNALED | IBT_FAST_REG_RES_LKEY;
1132 if (qp_init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) {
1133 attrs.qp_flags |= IBT_WR_SIGNALED;
1134 }
1135
1136 attrs.qp_scq_hdl = qp_init_attr->send_cq->ibt_cq;
1137 attrs.qp_rcq_hdl = qp_init_attr->recv_cq->ibt_cq;
1138 attrs.qp_pd_hdl = pd->ibt_pd;
1139
1140 attrs.qp_sizes.cs_sq = qp_init_attr->cap.max_send_wr;
1141 attrs.qp_sizes.cs_rq = qp_init_attr->cap.max_recv_wr;
1142 attrs.qp_sizes.cs_sq_sgl = qp_init_attr->cap.max_send_sge;
1143 attrs.qp_sizes.cs_rq_sgl = qp_init_attr->cap.max_recv_sge;
1144 attrs.qp_sizes.cs_inline = qp_init_attr->cap.max_inline_data;
1145
1146 switch (qp_init_attr->qp_type) {
1147 case IB_QPT_RC:
1148 rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_RC_RQP, &attrs,
1149 &sizes, &qpn, &ibt_qp);
1150 break;
1151 case IB_QPT_UD:
1152 rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_UD_RQP, &attrs,
1153 &sizes, &qpn, &ibt_qp);
1154 break;
1155 case IB_QPT_SMI:
1156 rtn = ibt_alloc_special_qp(pd->device->hca_hdl,
1157 qp_init_attr->port_num, IBT_SMI_SQP, &attrs, &sizes,
1158 &ibt_qp);
1159 break;
1160 case IB_QPT_GSI:
1161 rtn = ibt_alloc_special_qp(pd->device->hca_hdl,
1162 qp_init_attr->port_num, IBT_GSI_SQP, &attrs, &sizes,
1163 &ibt_qp);
1164 break;
1165 default:
1166 /* this should never happens */
1167 ofs_lock_exit(&ofs_client->lock);
1168 kmem_free(qp, sizeof (struct ib_qp));
1169 return ((struct ib_qp *)-EINVAL);
1170 }
1171 ofs_lock_exit(&ofs_client->lock);
1172
1173 if (rtn == IBT_SUCCESS) {
1174 /* fill in ib_qp_cap w/ the real values */
1175 qp_init_attr->cap.max_send_wr = sizes.cs_sq;
1176 qp_init_attr->cap.max_recv_wr = sizes.cs_rq;
1177 qp_init_attr->cap.max_send_sge = sizes.cs_sq_sgl;
1178 qp_init_attr->cap.max_recv_sge = sizes.cs_rq_sgl;
1179 /* max_inline_data is not supported */
1180 qp_init_attr->cap.max_inline_data = 0;
1181 /* fill in ib_qp */
1182 qp->device = pd->device;
1183 qp->pd = pd;
1184 qp->send_cq = qp_init_attr->send_cq;
1185 qp->recv_cq = qp_init_attr->recv_cq;
1186 qp->srq = qp_init_attr->srq;
1187 qp->event_handler = qp_init_attr->event_handler;
1188 qp->qp_context = qp_init_attr->qp_context;
1189 qp->qp_num = qp_init_attr->qp_type == IB_QPT_SMI ? 0 :
1190 qp_init_attr->qp_type == IB_QPT_GSI ? 1 : qpn;
1191 qp->qp_type = qp_init_attr->qp_type;
1192 qp->ibt_qp = ibt_qp;
1193 ibt_set_qp_private(qp->ibt_qp, qp);
1194 mutex_init(&qp->lock, NULL, MUTEX_DEFAULT, NULL);
1195 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1196 "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p, "
1197 "rtn: 0x%x", pd->device, pd, qp_init_attr, rtn);
1198 return (qp);
1199 }
1200 kmem_free(qp, sizeof (struct ib_qp));
1201
1202 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1203 "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p => "
1204 "ibt_alloc_(special)_qp failed w/ rtn: 0x%x", pd->device, pd,
1205 qp_init_attr, rtn);
1206
1207 switch (rtn) {
1208 case IBT_NOT_SUPPORTED:
1209 case IBT_QP_SRV_TYPE_INVALID:
1210 case IBT_CQ_HDL_INVALID:
1211 case IBT_HCA_HDL_INVALID:
1212 case IBT_INVALID_PARAM:
1213 case IBT_SRQ_HDL_INVALID:
1214 case IBT_PD_HDL_INVALID:
1215 case IBT_HCA_SGL_EXCEEDED:
1216 case IBT_HCA_WR_EXCEEDED:
1217 return ((struct ib_qp *)-EINVAL);
1218 case IBT_INSUFF_RESOURCE:
1219 return ((struct ib_qp *)-ENOMEM);
1220 default:
1221 return ((struct ib_qp *)-EIO);
1222 }
1223 }
1224
1225 int
ib_destroy_qp(struct ib_qp * qp)1226 ib_destroy_qp(struct ib_qp *qp)
1227 {
1228 ofs_client_t *ofs_client = (ofs_client_t *)qp->device->clnt_hdl;
1229 int rtn;
1230
1231 ofs_lock_enter(&ofs_client->lock);
1232 if (qp->device->reg_state != IB_DEV_OPEN) {
1233 ofs_lock_exit(&ofs_client->lock);
1234 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1235 "ib_destroy_qp: qp: 0x%p => invalid device state (%d)",
1236 qp, qp->device->reg_state);
1237 return (-ENXIO);
1238 }
1239
1240 /*
1241 * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed
1242 * at this moment, but yet alive for a while. Then
1243 * there is a possibility that this qp is used even after
1244 * ib_destroy_qp() is called. To distinguish this case from
1245 * others, clear ibt_qp here.
1246 */
1247 ibt_set_qp_private(qp->ibt_qp, NULL);
1248
1249 rtn = ibt_free_qp(qp->ibt_qp);
1250 if (rtn == IBT_SUCCESS) {
1251 ofs_lock_exit(&ofs_client->lock);
1252 kmem_free(qp, sizeof (struct ib_qp));
1253 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1254 "ib_destroy_qp: qp: 0x%p, rtn: 0x%x", qp, rtn);
1255 return (0);
1256 }
1257 ibt_set_qp_private(qp->ibt_qp, qp);
1258 ofs_lock_exit(&ofs_client->lock);
1259
1260 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1261 "ib_destroy_qp: qp: 0x%p => ibt_free_qp failed w/ 0x%x", qp, rtn);
1262
1263 switch (rtn) {
1264 case IBT_CHAN_STATE_INVALID:
1265 case IBT_HCA_HDL_INVALID:
1266 case IBT_QP_HDL_INVALID:
1267 return (-EINVAL);
1268 default:
1269 return (-EIO);
1270 }
1271 }
1272
1273 /*
1274 * ib_req_notify_cq - Request completion notification on a CQ.
1275 * @cq: The CQ to generate an event for.
1276 * @flags:
1277 * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
1278 * to request an event on the next solicited event or next work
1279 * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
1280 * may also be |ed in to request a hint about missed events, as
1281 * described below.
1282 *
1283 * Return Value:
1284 * < 0 means an error occurred while requesting notification
1285 * == 0 means notification was requested successfully, and if
1286 * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
1287 * were missed and it is safe to wait for another event. In
1288 * this case is it guaranteed that any work completions added
1289 * to the CQ since the last CQ poll will trigger a completion
1290 * notification event.
1291 * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
1292 * in. It means that the consumer must poll the CQ again to
1293 * make sure it is empty to avoid missing an event because of a
1294 * race between requesting notification and an entry being
1295 * added to the CQ. This return value means it is possible
1296 * (but not guaranteed) that a work completion has been added
1297 * to the CQ since the last poll without triggering a
1298 * completion notification event.
1299 *
1300 * Note that IB_CQ_REPORT_MISSED_EVENTS is currently not supported.
1301 */
1302 int
ib_req_notify_cq(struct ib_cq * cq,enum ib_cq_notify_flags flags)1303 ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
1304 {
1305 ibt_cq_notify_flags_t notify_type;
1306 int rtn;
1307 ofs_client_t *ofs_client = cq->device->clnt_hdl;
1308
1309 ofs_lock_enter(&ofs_client->lock);
1310 if (cq->device->reg_state != IB_DEV_OPEN) {
1311 ofs_lock_exit(&ofs_client->lock);
1312 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1313 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags);
1314 return (-ENXIO);
1315 }
1316
1317 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1318 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags);
1319
1320 switch (flags & IB_CQ_SOLICITED_MASK) {
1321 case IB_CQ_SOLICITED:
1322 notify_type = IBT_NEXT_SOLICITED;
1323 break;
1324 case IB_CQ_NEXT_COMP:
1325 notify_type = IBT_NEXT_COMPLETION;
1326 break;
1327 default:
1328 /* Currently only two flags are supported */
1329 ofs_lock_exit(&ofs_client->lock);
1330 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1331 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => invalid flag",
1332 cq, flags);
1333 return (-EINVAL);
1334 }
1335
1336 rtn = ibt_enable_cq_notify(cq->ibt_cq, notify_type);
1337 ofs_lock_exit(&ofs_client->lock);
1338
1339 if (rtn == IBT_SUCCESS) {
1340 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1341 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x rtn: 0x%x",
1342 cq, flags, rtn);
1343 return (0);
1344 }
1345
1346 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1347 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => ibt_enable_cq_notify "
1348 "failed w/ 0x%x", cq, flags, rtn);
1349
1350 switch (rtn) {
1351 case IBT_HCA_HDL_INVALID:
1352 case IBT_CQ_HDL_INVALID:
1353 case IBT_CQ_NOTIFY_TYPE_INVALID:
1354 return (-EINVAL);
1355 default:
1356 return (-EIO);
1357 }
1358 }
1359
1360 static const struct {
1361 int valid;
1362 enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1];
1363 enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1];
1364 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
1365
1366 [IB_QPS_RESET] = {
1367 [IB_QPS_RESET] = { .valid = 1 },
1368 [IB_QPS_INIT] = {
1369 .valid = 1,
1370 .req_param = {
1371 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1372 IB_QP_QKEY),
1373 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1374 IB_QP_ACCESS_FLAGS),
1375 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1376 IB_QP_ACCESS_FLAGS),
1377 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1378 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1379 }
1380 },
1381 },
1382 [IB_QPS_INIT] = {
1383 [IB_QPS_RESET] = { .valid = 1 },
1384 [IB_QPS_ERR] = { .valid = 1 },
1385 [IB_QPS_INIT] = {
1386 .valid = 1,
1387 .opt_param = {
1388 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1389 IB_QP_QKEY),
1390 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1391 IB_QP_ACCESS_FLAGS),
1392 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1393 IB_QP_ACCESS_FLAGS),
1394 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1395 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1396 }
1397 },
1398 [IB_QPS_RTR] = {
1399 .valid = 1,
1400 .req_param = {
1401 [IB_QPT_UC] = (IB_QP_AV | IB_QP_PATH_MTU |
1402 IB_QP_DEST_QPN | IB_QP_RQ_PSN),
1403 [IB_QPT_RC] = (IB_QP_AV | IB_QP_PATH_MTU |
1404 IB_QP_DEST_QPN | IB_QP_RQ_PSN |
1405 IB_QP_MAX_DEST_RD_ATOMIC |
1406 IB_QP_MIN_RNR_TIMER),
1407 },
1408 .opt_param = {
1409 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1410 [IB_QPT_UC] = (IB_QP_ALT_PATH |
1411 IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX),
1412 [IB_QPT_RC] = (IB_QP_ALT_PATH |
1413 IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX),
1414 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1415 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1416 }
1417 }
1418 },
1419 [IB_QPS_RTR] = {
1420 [IB_QPS_RESET] = { .valid = 1 },
1421 [IB_QPS_ERR] = { .valid = 1 },
1422 [IB_QPS_RTS] = {
1423 .valid = 1,
1424 .req_param = {
1425 [IB_QPT_UD] = IB_QP_SQ_PSN,
1426 [IB_QPT_UC] = IB_QP_SQ_PSN,
1427 [IB_QPT_RC] = (IB_QP_TIMEOUT |
1428 IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
1429 IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC),
1430 [IB_QPT_SMI] = IB_QP_SQ_PSN,
1431 [IB_QPT_GSI] = IB_QP_SQ_PSN,
1432 },
1433 .opt_param = {
1434 [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1435 [IB_QPT_UC] = (IB_QP_CUR_STATE |
1436 IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1437 IB_QP_PATH_MIG_STATE),
1438 [IB_QPT_RC] = (IB_QP_CUR_STATE |
1439 IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1440 IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE),
1441 [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1442 [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1443 }
1444 }
1445 },
1446 [IB_QPS_RTS] = {
1447 [IB_QPS_RESET] = { .valid = 1 },
1448 [IB_QPS_ERR] = { .valid = 1 },
1449 [IB_QPS_RTS] = {
1450 .valid = 1,
1451 .opt_param = {
1452 [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1453 [IB_QPT_UC] = (IB_QP_CUR_STATE |
1454 IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH |
1455 IB_QP_PATH_MIG_STATE),
1456 [IB_QPT_RC] = (IB_QP_CUR_STATE |
1457 IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH |
1458 IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER),
1459 [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1460 [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1461 }
1462 },
1463 [IB_QPS_SQD] = {
1464 .valid = 1,
1465 .opt_param = {
1466 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1467 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1468 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1469 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1470 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
1471 }
1472 },
1473 },
1474 [IB_QPS_SQD] = {
1475 [IB_QPS_RESET] = { .valid = 1 },
1476 [IB_QPS_ERR] = { .valid = 1 },
1477 [IB_QPS_RTS] = {
1478 .valid = 1,
1479 .opt_param = {
1480 [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1481 [IB_QPT_UC] = (IB_QP_CUR_STATE |
1482 IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1483 IB_QP_PATH_MIG_STATE),
1484 [IB_QPT_RC] = (IB_QP_CUR_STATE |
1485 IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1486 IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE),
1487 [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1488 [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1489 }
1490 },
1491 [IB_QPS_SQD] = {
1492 .valid = 1,
1493 .opt_param = {
1494 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1495 [IB_QPT_UC] = (IB_QP_AV | IB_QP_ALT_PATH |
1496 IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX |
1497 IB_QP_PATH_MIG_STATE),
1498 [IB_QPT_RC] = (IB_QP_PORT | IB_QP_AV |
1499 IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
1500 IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC |
1501 IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH |
1502 IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX |
1503 IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE),
1504 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1505 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1506 }
1507 }
1508 },
1509 [IB_QPS_SQE] = {
1510 [IB_QPS_RESET] = { .valid = 1 },
1511 [IB_QPS_ERR] = { .valid = 1 },
1512 [IB_QPS_RTS] = {
1513 .valid = 1,
1514 .opt_param = {
1515 [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1516 [IB_QPT_UC] = (IB_QP_CUR_STATE |
1517 IB_QP_ACCESS_FLAGS),
1518 [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1519 [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1520 }
1521 }
1522 },
1523 [IB_QPS_ERR] = {
1524 [IB_QPS_RESET] = { .valid = 1 },
1525 [IB_QPS_ERR] = { .valid = 1 }
1526 }
1527 };
1528
1529 static inline int
ib_modify_qp_is_ok(enum ib_qp_state cur_state,enum ib_qp_state next_state,enum ib_qp_type type,enum ib_qp_attr_mask mask)1530 ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1531 enum ib_qp_type type, enum ib_qp_attr_mask mask)
1532 {
1533 enum ib_qp_attr_mask req_param, opt_param;
1534
1535 if (cur_state < 0 || cur_state > IB_QPS_ERR ||
1536 next_state < 0 || next_state > IB_QPS_ERR) {
1537 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1538 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1539 "qp_type: %d, attr_mask: 0x%x => invalid state(1)",
1540 cur_state, next_state, type, mask);
1541 return (0);
1542 }
1543
1544 if (mask & IB_QP_CUR_STATE &&
1545 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
1546 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) {
1547 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1548 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1549 "qp_type: %d, attr_mask: 0x%x => invalid state(2)",
1550 cur_state, next_state, type, mask);
1551 return (0);
1552 }
1553
1554 if (!qp_state_table[cur_state][next_state].valid) {
1555 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1556 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1557 "qp_type: %d, attr_mask: 0x%x => state is not valid",
1558 cur_state, next_state, type, mask);
1559 return (0);
1560 }
1561
1562 req_param = qp_state_table[cur_state][next_state].req_param[type];
1563 opt_param = qp_state_table[cur_state][next_state].opt_param[type];
1564
1565 if ((mask & req_param) != req_param) {
1566 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1567 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1568 "qp_type: %d, attr_mask: 0x%x => "
1569 "required param doesn't match. req_param = 0x%x",
1570 cur_state, next_state, type, mask, req_param);
1571 return (0);
1572 }
1573
1574 if (mask & ~(req_param | opt_param | IB_QP_STATE)) {
1575 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1576 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1577 "qp_type: %d, attr_mask: 0x%x => "
1578 "unsupported options. req_param = 0x%x, opt_param = 0x%x",
1579 cur_state, next_state, type, mask, req_param, opt_param);
1580 return (0);
1581 }
1582
1583 return (1);
1584 }
1585
1586 static inline enum ib_qp_state
qp_current_state(ibt_qp_query_attr_t * qp_attr)1587 qp_current_state(ibt_qp_query_attr_t *qp_attr)
1588 {
1589 ASSERT(qp_attr->qp_info.qp_state != IBT_STATE_SQDRAIN);
1590 return (enum ib_qp_state)(qp_attr->qp_info.qp_state);
1591 }
1592
1593 static inline ibt_tran_srv_t
of2ibtf_qp_type(enum ib_qp_type type)1594 of2ibtf_qp_type(enum ib_qp_type type)
1595 {
1596 switch (type) {
1597 case IB_QPT_SMI:
1598 case IB_QPT_GSI:
1599 case IB_QPT_UD:
1600 return (IBT_UD_SRV);
1601 case IB_QPT_RC:
1602 return (IBT_RC_SRV);
1603 case IB_QPT_UC:
1604 return (IBT_UC_SRV);
1605 case IB_QPT_RAW_IPV6:
1606 return (IBT_RAWIP_SRV);
1607 case IB_QPT_RAW_ETY:
1608 default:
1609 ASSERT(type == IB_QPT_RAW_ETY);
1610 return (IBT_RAWETHER_SRV);
1611 }
1612 }
1613
1614 static inline void
set_av(struct ib_ah_attr * attr,ibt_cep_path_t * pathp)1615 set_av(struct ib_ah_attr *attr, ibt_cep_path_t *pathp)
1616 {
1617 ibt_adds_vect_t *av = &pathp->cep_adds_vect;
1618
1619 pathp->cep_hca_port_num = attr->port_num;
1620 av->av_srate = OF2IBTF_SRATE(attr->static_rate);
1621 av->av_srvl = attr->sl & 0xF;
1622 av->av_send_grh = attr->ah_flags & IB_AH_GRH ? 1 : 0;
1623
1624 if (av->av_send_grh) {
1625 av->av_dgid.gid_prefix =
1626 attr->grh.dgid.global.subnet_prefix;
1627 av->av_dgid.gid_guid =
1628 attr->grh.dgid.global.interface_id;
1629 av->av_flow = attr->grh.flow_label & 0xFFFFF;
1630 av->av_tclass = attr->grh.traffic_class;
1631 av->av_hop = attr->grh.hop_limit;
1632 av->av_sgid_ix = attr->grh.sgid_index;
1633 }
1634 av->av_dlid = attr->dlid;
1635 av->av_src_path = attr->src_path_bits;
1636 }
1637
1638 int
ib_modify_qp(struct ib_qp * qp,struct ib_qp_attr * attr,int attr_mask)1639 ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask)
1640 {
1641 enum ib_qp_state cur_state, new_state;
1642 ibt_hca_attr_t hattr;
1643 ibt_qp_query_attr_t qp_attr;
1644 ibt_qp_info_t modify_attr;
1645 ibt_cep_modify_flags_t flags;
1646 int rtn;
1647 ofs_client_t *ofs_client = qp->device->clnt_hdl;
1648
1649 ofs_lock_enter(&ofs_client->lock);
1650 if (qp->device->reg_state != IB_DEV_OPEN) {
1651 ofs_lock_exit(&ofs_client->lock);
1652 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1653 "ib_modify_qp: qp: 0x%p => invalid device state (%d)",
1654 qp, qp->device->reg_state);
1655 return (-ENXIO);
1656 }
1657
1658 rtn = ibt_query_hca(qp->device->hca_hdl, &hattr);
1659 if (rtn != IBT_SUCCESS) {
1660 ofs_lock_exit(&ofs_client->lock);
1661 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1662 "ib_modify_qp: qp: 0x%p, hca_hdl: 0x%p => "
1663 "ibt_query_hca() failed w/ %d",
1664 qp, qp->device->hca_hdl, rtn);
1665 return (-EIO);
1666 }
1667
1668 /* only one thread per qp is allowed during the qp modification */
1669 mutex_enter(&qp->lock);
1670
1671 /* Get the current QP attributes first */
1672 bzero(&qp_attr, sizeof (ibt_qp_query_attr_t));
1673 if ((rtn = ibt_query_qp(qp->ibt_qp, &qp_attr)) != IBT_SUCCESS) {
1674 mutex_exit(&qp->lock);
1675 ofs_lock_exit(&ofs_client->lock);
1676 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1677 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1678 "ibt_query_qp failed w/ 0x%x", qp, attr, attr_mask, rtn);
1679 return (-EIO);
1680 }
1681
1682 /* Get the current and new state for this QP */
1683 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state :
1684 qp_current_state(&qp_attr);
1685 new_state = attr_mask & IB_QP_STATE ? attr->qp_state :
1686 cur_state;
1687
1688 /* Sanity check of the current/new states */
1689 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1690 /* Linux OF returns 0 in this case */
1691 mutex_exit(&qp->lock);
1692 ofs_lock_exit(&ofs_client->lock);
1693 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1694 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1695 "invalid state (both of current/new states are RESET)",
1696 qp, attr, attr_mask);
1697 return (0);
1698 }
1699
1700 /*
1701 * Check if this modification request is supported with the new
1702 * and/or current state.
1703 */
1704 if (!ib_modify_qp_is_ok(cur_state, new_state, qp->qp_type, attr_mask)) {
1705 mutex_exit(&qp->lock);
1706 ofs_lock_exit(&ofs_client->lock);
1707 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1708 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1709 "invalid arguments",
1710 qp, attr, attr_mask);
1711 return (-EINVAL);
1712 }
1713
1714 /* Sanity checks */
1715 if (attr_mask & IB_QP_PORT && (attr->port_num == 0 ||
1716 attr->port_num > hattr.hca_nports)) {
1717 mutex_exit(&qp->lock);
1718 ofs_lock_exit(&ofs_client->lock);
1719 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1720 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1721 "invalid attr->port_num(%d), max_nports(%d)",
1722 qp, attr, attr_mask, attr->port_num, hattr.hca_nports);
1723 return (-EINVAL);
1724 }
1725
1726 if (attr_mask & IB_QP_PKEY_INDEX &&
1727 attr->pkey_index >= hattr.hca_max_port_pkey_tbl_sz) {
1728 mutex_exit(&qp->lock);
1729 ofs_lock_exit(&ofs_client->lock);
1730 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1731 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1732 "invalid attr->pkey_index(%d), max_pkey_index(%d)",
1733 qp, attr, attr_mask, attr->pkey_index,
1734 hattr.hca_max_port_pkey_tbl_sz);
1735 return (-EINVAL);
1736 }
1737
1738 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1739 attr->max_rd_atomic > hattr.hca_max_rdma_out_qp) {
1740 mutex_exit(&qp->lock);
1741 ofs_lock_exit(&ofs_client->lock);
1742 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1743 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1744 "invalid attr->max_rd_atomic(0x%x), max_rdma_out_qp(0x%x)",
1745 qp, attr, attr_mask, attr->max_rd_atomic,
1746 hattr.hca_max_rdma_out_qp);
1747 return (-EINVAL);
1748 }
1749
1750 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1751 attr->max_dest_rd_atomic > hattr.hca_max_rdma_in_qp) {
1752 mutex_exit(&qp->lock);
1753 ofs_lock_exit(&ofs_client->lock);
1754 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1755 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1756 "invalid attr->max_dest_rd_atomic(0x%x), "
1757 "max_rdma_in_qp(0x%x)", qp, attr, attr_mask,
1758 attr->max_dest_rd_atomic, hattr.hca_max_rdma_in_qp);
1759 return (-EINVAL);
1760 }
1761
1762 /* copy the current setting */
1763 modify_attr = qp_attr.qp_info;
1764
1765 /*
1766 * Since it's already checked if the modification request matches
1767 * the new and/or current states, just assign both of states to
1768 * modify_attr here. The current state is required if qp_state
1769 * is RTR, but it's harmelss otherwise, so it's set always.
1770 */
1771 modify_attr.qp_current_state = OF2IBTF_STATE(cur_state);
1772 modify_attr.qp_state = OF2IBTF_STATE(new_state);
1773 modify_attr.qp_trans = of2ibtf_qp_type(qp->qp_type);
1774
1775 /* Convert OF modification requests into IBTF ones */
1776 flags = IBT_CEP_SET_STATE; /* IBTF needs IBT_CEP_SET_STATE */
1777 if (cur_state == IB_QPS_RESET &&
1778 new_state == IB_QPS_INIT) {
1779 flags |= IBT_CEP_SET_RESET_INIT;
1780 } else if (cur_state == IB_QPS_INIT &&
1781 new_state == IB_QPS_RTR) {
1782 flags |= IBT_CEP_SET_INIT_RTR;
1783 } else if (cur_state == IB_QPS_RTR &&
1784 new_state == IB_QPS_RTS) {
1785 flags |= IBT_CEP_SET_RTR_RTS;
1786 }
1787 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
1788 flags |= IBT_CEP_SET_SQD_EVENT;
1789 }
1790 if (attr_mask & IB_QP_ACCESS_FLAGS) {
1791 modify_attr.qp_flags &= ~(IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
1792 IBT_CEP_ATOMIC);
1793 if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) {
1794 flags |= IBT_CEP_SET_RDMA_R;
1795 modify_attr.qp_flags |= IBT_CEP_RDMA_RD;
1796 }
1797 if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) {
1798 flags |= IBT_CEP_SET_RDMA_W;
1799 modify_attr.qp_flags |= IBT_CEP_RDMA_WR;
1800 }
1801 if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
1802 flags |= IBT_CEP_SET_ATOMIC;
1803 modify_attr.qp_flags |= IBT_CEP_ATOMIC;
1804 }
1805 }
1806 if (attr_mask & IB_QP_PKEY_INDEX) {
1807 flags |= IBT_CEP_SET_PKEY_IX;
1808 switch (qp->qp_type) {
1809 case IB_QPT_SMI:
1810 case IB_QPT_GSI:
1811 case IB_QPT_UD:
1812 modify_attr.qp_transport.ud.ud_pkey_ix =
1813 attr->pkey_index;
1814 break;
1815 case IB_QPT_RC:
1816 modify_attr.qp_transport.rc.rc_path.cep_pkey_ix =
1817 attr->pkey_index;
1818 break;
1819 case IB_QPT_UC:
1820 modify_attr.qp_transport.uc.uc_path.cep_pkey_ix =
1821 attr->pkey_index;
1822 break;
1823 default:
1824 /* This should never happen */
1825 mutex_exit(&qp->lock);
1826 ofs_lock_exit(&ofs_client->lock);
1827 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1828 "ib_modify_qp(IB_QP_PKEY_INDEX): qp: 0x%p, "
1829 "attr: 0x%p, attr_mask: 0x%x => "
1830 "invalid qp->qp_type(%d)",
1831 qp, attr, attr_mask, qp->qp_type);
1832 return (-EINVAL);
1833 }
1834 }
1835 if (attr_mask & IB_QP_PORT) {
1836 flags |= IBT_CEP_SET_PORT;
1837 switch (qp->qp_type) {
1838 case IB_QPT_SMI:
1839 case IB_QPT_GSI:
1840 case IB_QPT_UD:
1841 modify_attr.qp_transport.ud.ud_port = attr->port_num;
1842 break;
1843 case IB_QPT_RC:
1844 modify_attr.qp_transport.rc.rc_path.cep_hca_port_num =
1845 attr->port_num;
1846 break;
1847 case IB_QPT_UC:
1848 modify_attr.qp_transport.uc.uc_path.cep_hca_port_num =
1849 attr->port_num;
1850 break;
1851 default:
1852 /* This should never happen */
1853 mutex_exit(&qp->lock);
1854 ofs_lock_exit(&ofs_client->lock);
1855 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1856 "ib_modify_qp(IB_QP_PORT): qp: 0x%p, "
1857 "attr: 0x%p, attr_mask: 0x%x => "
1858 "invalid qp->qp_type(%d)",
1859 qp, attr, attr_mask, qp->qp_type);
1860 return (-EINVAL);
1861 }
1862 }
1863 if (attr_mask & IB_QP_QKEY) {
1864 ASSERT(qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_SMI ||
1865 qp->qp_type == IB_QPT_GSI);
1866 flags |= IBT_CEP_SET_QKEY;
1867 modify_attr.qp_transport.ud.ud_qkey = attr->qkey;
1868 }
1869 if (attr_mask & IB_QP_AV) {
1870 flags |= IBT_CEP_SET_ADDS_VECT;
1871 switch (qp->qp_type) {
1872 case IB_QPT_RC:
1873 set_av(&attr->ah_attr,
1874 &modify_attr.qp_transport.rc.rc_path);
1875 break;
1876 case IB_QPT_UC:
1877 set_av(&attr->ah_attr,
1878 &modify_attr.qp_transport.uc.uc_path);
1879 break;
1880 case IB_QPT_SMI:
1881 case IB_QPT_GSI:
1882 case IB_QPT_UD:
1883 default:
1884 /* This should never happen */
1885 mutex_exit(&qp->lock);
1886 ofs_lock_exit(&ofs_client->lock);
1887 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1888 "ib_modify_qp(IB_QP_AV): qp: 0x%p, "
1889 "attr: 0x%p, attr_mask: 0x%x => "
1890 "invalid qp->qp_type(%d)",
1891 qp, attr, attr_mask, qp->qp_type);
1892 return (-EINVAL);
1893 }
1894 }
1895 if (attr_mask & IB_QP_PATH_MTU) {
1896 switch (qp->qp_type) {
1897 case IB_QPT_RC:
1898 modify_attr.qp_transport.rc.rc_path_mtu =
1899 OF2IBTF_PATH_MTU(attr->path_mtu);
1900 break;
1901 case IB_QPT_UC:
1902 modify_attr.qp_transport.uc.uc_path_mtu =
1903 OF2IBTF_PATH_MTU(attr->path_mtu);
1904 break;
1905 case IB_QPT_SMI:
1906 case IB_QPT_GSI:
1907 case IB_QPT_UD:
1908 default:
1909 /* nothing to do */
1910 break;
1911 }
1912 }
1913 if (attr_mask & IB_QP_TIMEOUT && qp->qp_type == IB_QPT_RC) {
1914 flags |= IBT_CEP_SET_TIMEOUT;
1915 modify_attr.qp_transport.rc.rc_path.cep_timeout =
1916 attr->timeout;
1917 }
1918 if (attr_mask & IB_QP_RETRY_CNT && qp->qp_type == IB_QPT_RC) {
1919 flags |= IBT_CEP_SET_RETRY;
1920 modify_attr.qp_transport.rc.rc_retry_cnt =
1921 attr->retry_cnt & 0x7;
1922 }
1923 if (attr_mask & IB_QP_RNR_RETRY && qp->qp_type == IB_QPT_RC) {
1924 flags |= IBT_CEP_SET_RNR_NAK_RETRY;
1925 modify_attr.qp_transport.rc.rc_rnr_retry_cnt =
1926 attr->rnr_retry & 0x7;
1927 }
1928 if (attr_mask & IB_QP_RQ_PSN) {
1929 switch (qp->qp_type) {
1930 case IB_QPT_RC:
1931 modify_attr.qp_transport.rc.rc_rq_psn =
1932 attr->rq_psn & 0xFFFFFF;
1933 break;
1934 case IB_QPT_UC:
1935 modify_attr.qp_transport.uc.uc_rq_psn =
1936 attr->rq_psn & 0xFFFFFF;
1937 break;
1938 case IB_QPT_SMI:
1939 case IB_QPT_GSI:
1940 case IB_QPT_UD:
1941 default:
1942 /* nothing to do */
1943 break;
1944 }
1945 }
1946 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && qp->qp_type == IB_QPT_RC) {
1947 if (attr->max_rd_atomic) {
1948 flags |= IBT_CEP_SET_RDMARA_OUT;
1949 modify_attr.qp_transport.rc.rc_rdma_ra_out =
1950 attr->max_rd_atomic;
1951 }
1952 }
1953 if (attr_mask & IB_QP_ALT_PATH) {
1954 /* Sanity checks */
1955 if (attr->alt_port_num == 0 ||
1956 attr->alt_port_num > hattr.hca_nports) {
1957 mutex_exit(&qp->lock);
1958 ofs_lock_exit(&ofs_client->lock);
1959 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1960 "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
1961 "attr_mask: 0x%x => invalid attr->alt_port_num"
1962 "(%d), max_nports(%d)",
1963 qp, attr, attr_mask, attr->alt_port_num,
1964 hattr.hca_nports);
1965 return (-EINVAL);
1966 }
1967 if (attr->alt_pkey_index >= hattr.hca_max_port_pkey_tbl_sz) {
1968 mutex_exit(&qp->lock);
1969 ofs_lock_exit(&ofs_client->lock);
1970 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1971 "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
1972 "attr_mask: 0x%x => invalid attr->alt_pkey_index"
1973 "(%d), max_port_key_index(%d)",
1974 qp, attr, attr_mask, attr->alt_pkey_index,
1975 hattr.hca_max_port_pkey_tbl_sz);
1976 return (-EINVAL);
1977 }
1978 flags |= IBT_CEP_SET_ALT_PATH;
1979 switch (qp->qp_type) {
1980 case IB_QPT_RC:
1981 modify_attr.qp_transport.rc.rc_alt_path.
1982 cep_pkey_ix = attr->alt_pkey_index;
1983 modify_attr.qp_transport.rc.rc_alt_path.
1984 cep_hca_port_num = attr->alt_port_num;
1985 set_av(&attr->alt_ah_attr,
1986 &modify_attr.qp_transport.rc.rc_alt_path);
1987 modify_attr.qp_transport.rc.rc_alt_path.
1988 cep_timeout = attr->alt_timeout;
1989 break;
1990 case IB_QPT_UC:
1991 modify_attr.qp_transport.uc.uc_alt_path.
1992 cep_pkey_ix = attr->alt_pkey_index;
1993 modify_attr.qp_transport.uc.uc_alt_path.
1994 cep_hca_port_num = attr->alt_port_num;
1995 set_av(&attr->alt_ah_attr,
1996 &modify_attr.qp_transport.uc.uc_alt_path);
1997 modify_attr.qp_transport.uc.uc_alt_path.
1998 cep_timeout = attr->alt_timeout;
1999 break;
2000 case IB_QPT_SMI:
2001 case IB_QPT_GSI:
2002 case IB_QPT_UD:
2003 default:
2004 /* This should never happen */
2005 mutex_exit(&qp->lock);
2006 ofs_lock_exit(&ofs_client->lock);
2007 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2008 "ib_modify_qp(IB_QP_ALT_PATH): qp: 0x%p, "
2009 "attr: 0x%p, attr_mask: 0x%x => "
2010 "invalid qp->qp_type(%d)",
2011 qp, attr, attr_mask, qp->qp_type);
2012 return (-EINVAL);
2013 }
2014 }
2015 if (attr_mask & IB_QP_MIN_RNR_TIMER && qp->qp_type == IB_QPT_RC) {
2016 flags |= IBT_CEP_SET_MIN_RNR_NAK;
2017 modify_attr.qp_transport.rc.rc_min_rnr_nak =
2018 attr->min_rnr_timer & 0x1F;
2019 }
2020 if (attr_mask & IB_QP_SQ_PSN) {
2021 switch (qp->qp_type) {
2022 case IB_QPT_SMI:
2023 case IB_QPT_GSI:
2024 case IB_QPT_UD:
2025 modify_attr.qp_transport.ud.ud_sq_psn =
2026 attr->sq_psn;
2027 break;
2028 case IB_QPT_RC:
2029 modify_attr.qp_transport.rc.rc_sq_psn =
2030 attr->sq_psn;
2031 break;
2032 case IB_QPT_UC:
2033 modify_attr.qp_transport.uc.uc_sq_psn =
2034 attr->sq_psn;
2035 break;
2036 default:
2037 /* This should never happen */
2038 mutex_exit(&qp->lock);
2039 ofs_lock_exit(&ofs_client->lock);
2040 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2041 "ib_modify_qp(IB_QP_SQ_PSN): qp: 0x%p, "
2042 "attr: 0x%p, attr_mask: 0x%x => "
2043 "invalid qp->qp_type(%d)",
2044 qp, attr, attr_mask, qp->qp_type);
2045 return (-EINVAL);
2046 }
2047 }
2048 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && qp->qp_type == IB_QPT_RC) {
2049 /* Linux OF sets the value if max_dest_rd_atomic is not zero */
2050 if (attr->max_dest_rd_atomic) {
2051 flags |= IBT_CEP_SET_RDMARA_IN;
2052 modify_attr.qp_transport.rc.rc_rdma_ra_in =
2053 attr->max_dest_rd_atomic;
2054 }
2055 }
2056 if (attr_mask & IB_QP_PATH_MIG_STATE) {
2057 flags |= IBT_CEP_SET_MIG;
2058 switch (qp->qp_type) {
2059 case IB_QPT_RC:
2060 modify_attr.qp_transport.rc.rc_mig_state =
2061 OF2IBTF_PATH_MIG_STATE(attr->path_mig_state);
2062 break;
2063 case IB_QPT_UC:
2064 modify_attr.qp_transport.uc.uc_mig_state =
2065 OF2IBTF_PATH_MIG_STATE(attr->path_mig_state);
2066 break;
2067 case IB_QPT_SMI:
2068 case IB_QPT_GSI:
2069 case IB_QPT_UD:
2070 default:
2071 /* This should never happen */
2072 mutex_exit(&qp->lock);
2073 ofs_lock_exit(&ofs_client->lock);
2074 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2075 "ib_modify_qp(IB_QP_PATH_MIG_STATE): qp: 0x%p, "
2076 "attr: 0x%p, attr_mask: 0x%x => "
2077 "invalid qp->qp_type(%d)",
2078 qp, attr, attr_mask, qp->qp_type);
2079 return (-EINVAL);
2080 }
2081 }
2082 if (attr_mask & IB_QP_CAP) {
2083 /* IB_QP_CAP is not supported */
2084 mutex_exit(&qp->lock);
2085 ofs_lock_exit(&ofs_client->lock);
2086 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2087 "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
2088 "attr_mask: 0x%x => IB_QP_CAP is not supported",
2089 qp, attr, attr_mask);
2090 return (-EINVAL);
2091 }
2092 if (attr_mask & IB_QP_DEST_QPN) {
2093 switch (qp->qp_type) {
2094 case IB_QPT_RC:
2095 modify_attr.qp_transport.rc.rc_dst_qpn =
2096 attr->dest_qp_num;
2097 break;
2098 case IB_QPT_UC:
2099 modify_attr.qp_transport.uc.uc_dst_qpn =
2100 attr->dest_qp_num;
2101 break;
2102 case IB_QPT_SMI:
2103 case IB_QPT_GSI:
2104 case IB_QPT_UD:
2105 default:
2106 /* This should never happen */
2107 mutex_exit(&qp->lock);
2108 ofs_lock_exit(&ofs_client->lock);
2109 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2110 "ib_modify_qp(IB_QP_DEST_PSN): qp: 0x%p, "
2111 "attr: 0x%p, attr_mask: 0x%x => "
2112 "invalid qp->qp_type(%d)",
2113 qp, attr, attr_mask, qp->qp_type);
2114 return (-EINVAL);
2115 }
2116 }
2117
2118 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
2119 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x, "
2120 "flags: 0x%x, modify_attr: 0x%p",
2121 qp, attr, attr_mask, flags, &modify_attr);
2122
2123 /* Modify the QP attributes */
2124 rtn = ibt_modify_qp(qp->ibt_qp, flags, &modify_attr, NULL);
2125 if (rtn == IBT_SUCCESS) {
2126 mutex_exit(&qp->lock);
2127 ofs_lock_exit(&ofs_client->lock);
2128 return (0);
2129 }
2130 mutex_exit(&qp->lock);
2131 ofs_lock_exit(&ofs_client->lock);
2132
2133 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2134 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
2135 "ibt_modify_qp failed w/ %d, flags: 0x%x",
2136 qp, attr, attr_mask, rtn, flags);
2137
2138 switch (rtn) {
2139 case IBT_HCA_HDL_INVALID:
2140 case IBT_QP_HDL_INVALID:
2141 case IBT_QP_SRV_TYPE_INVALID:
2142 case IBT_QP_STATE_INVALID:
2143 case IBT_HCA_PORT_INVALID:
2144 case IBT_PKEY_IX_ILLEGAL:
2145 return (-EINVAL);
2146 default:
2147 return (-EIO);
2148 }
2149 }
2150
2151 static inline enum ib_wc_status
ibt2of_wc_status(ibt_wc_status_t status)2152 ibt2of_wc_status(ibt_wc_status_t status)
2153 {
2154 switch (status) {
2155 case IBT_WC_LOCAL_LEN_ERR:
2156 return (IB_WC_LOC_LEN_ERR);
2157 case IBT_WC_LOCAL_CHAN_OP_ERR:
2158 return (IB_WC_LOC_QP_OP_ERR);
2159 case IBT_WC_LOCAL_PROTECT_ERR:
2160 return (IB_WC_LOC_PROT_ERR);
2161 case IBT_WC_WR_FLUSHED_ERR:
2162 return (IB_WC_WR_FLUSH_ERR);
2163 case IBT_WC_MEM_WIN_BIND_ERR:
2164 return (IB_WC_MW_BIND_ERR);
2165 case IBT_WC_BAD_RESPONSE_ERR:
2166 return (IB_WC_BAD_RESP_ERR);
2167 case IBT_WC_LOCAL_ACCESS_ERR:
2168 return (IB_WC_LOC_ACCESS_ERR);
2169 case IBT_WC_REMOTE_INVALID_REQ_ERR:
2170 return (IB_WC_REM_INV_REQ_ERR);
2171 case IBT_WC_REMOTE_ACCESS_ERR:
2172 return (IB_WC_REM_ACCESS_ERR);
2173 case IBT_WC_REMOTE_OP_ERR:
2174 return (IB_WC_REM_OP_ERR);
2175 case IBT_WC_TRANS_TIMEOUT_ERR:
2176 return (IB_WC_RETRY_EXC_ERR);
2177 case IBT_WC_RNR_NAK_TIMEOUT_ERR:
2178 return (IB_WC_RNR_RETRY_EXC_ERR);
2179 case IBT_WC_SUCCESS:
2180 default:
2181 /* Hermon doesn't support EEC yet */
2182 ASSERT(status == IBT_WC_SUCCESS);
2183 return (IB_WC_SUCCESS);
2184 }
2185 }
2186
2187 static inline enum ib_wc_opcode
ibt2of_wc_opcode(ibt_wrc_opcode_t wc_type)2188 ibt2of_wc_opcode(ibt_wrc_opcode_t wc_type)
2189 {
2190 switch (wc_type) {
2191 case IBT_WRC_SEND:
2192 return (IB_WC_SEND);
2193 case IBT_WRC_RDMAR:
2194 return (IB_WC_RDMA_READ);
2195 case IBT_WRC_RDMAW:
2196 return (IB_WC_RDMA_WRITE);
2197 case IBT_WRC_CSWAP:
2198 return (IB_WC_COMP_SWAP);
2199 case IBT_WRC_FADD:
2200 return (IB_WC_FETCH_ADD);
2201 case IBT_WRC_BIND:
2202 return (IB_WC_BIND_MW);
2203 case IBT_WRC_RECV:
2204 return (IB_WC_RECV);
2205 case IBT_WRC_RECV_RDMAWI:
2206 default:
2207 ASSERT(wc_type == IBT_WRC_RECV_RDMAWI);
2208 return (IB_WC_RECV_RDMA_WITH_IMM);
2209 }
2210 }
2211
2212 static inline int
ibt2of_wc_flags(ibt_wc_flags_t wc_flags)2213 ibt2of_wc_flags(ibt_wc_flags_t wc_flags)
2214 {
2215 return (wc_flags & ~IBT_WC_CKSUM_OK);
2216 }
2217
2218 static inline void
set_wc(ibt_wc_t * ibt_wc,struct ib_wc * wc)2219 set_wc(ibt_wc_t *ibt_wc, struct ib_wc *wc)
2220 {
2221 wc->wr_id = ibt_wc->wc_id;
2222 wc->status = ibt2of_wc_status(ibt_wc->wc_status);
2223 /* opcode can be undefined if status is not success */
2224 if (wc->status == IB_WC_SUCCESS) {
2225 wc->opcode = ibt2of_wc_opcode(ibt_wc->wc_type);
2226 }
2227 wc->vendor_err = 0; /* not supported */
2228 wc->byte_len = ibt_wc->wc_bytes_xfer;
2229 wc->qp = NULL; /* not supported */
2230 wc->imm_data = htonl(ibt_wc->wc_immed_data);
2231 wc->src_qp = ibt_wc->wc_qpn;
2232 wc->wc_flags = ibt2of_wc_flags(ibt_wc->wc_flags);
2233 wc->pkey_index = ibt_wc->wc_pkey_ix;
2234 wc->slid = ibt_wc->wc_slid;
2235 wc->sl = ibt_wc->wc_sl;
2236 wc->dlid_path_bits = ibt_wc->wc_path_bits;
2237 wc->port_num = 0; /* not supported */
2238 }
2239
2240 /*
2241 * ib_poll_cq - poll a CQ for completion(s)
2242 * @cq:the CQ being polled
2243 * @num_entries:maximum number of completions to return
2244 * @wc:array of at least @num_entries &struct ib_wc where completions
2245 * will be returned
2246 *
2247 * Poll a CQ for (possibly multiple) completions. If the return value
2248 * is < 0, an error occurred. If the return value is >= 0, it is the
2249 * number of completions returned. If the return value is
2250 * non-negative and < num_entries, then the CQ was emptied.
2251 *
2252 * Note that three following memebers in struct ib_wc are not supported
2253 * currently, and the values are always either 0 or NULL.
2254 * u32 vendor_err;
2255 * struct ib_qp *qp;
2256 * u8 port_num;
2257 */
2258 int
ib_poll_cq(struct ib_cq * cq,int num_entries,struct ib_wc * wc)2259 ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
2260 {
2261 ibt_wc_t ibt_wc;
2262 int npolled;
2263 ibt_status_t rtn;
2264 ofs_client_t *ofs_client = (ofs_client_t *)cq->device->clnt_hdl;
2265
2266 ofs_lock_enter(&ofs_client->lock);
2267 if (cq->device->reg_state != IB_DEV_OPEN) {
2268 ofs_lock_exit(&ofs_client->lock);
2269 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2270 "ib_poll_cq: cq: 0x%p => invalid device state (%d)",
2271 cq, cq->device->reg_state);
2272 return (-ENXIO);
2273 }
2274
2275 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
2276 "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p, "
2277 "ibt_cq: 0x%p, ibt_wc: 0x%p",
2278 cq, num_entries, wc, cq->ibt_cq, &ibt_wc);
2279
2280 /* only one thread per cq is allowed during ibt_poll_cq() */
2281 mutex_enter(&cq->lock);
2282 for (npolled = 0; npolled < num_entries; ++npolled) {
2283 bzero(&ibt_wc, sizeof (ibt_wc_t));
2284 rtn = ibt_poll_cq(cq->ibt_cq, &ibt_wc, 1, NULL);
2285 if (rtn != IBT_SUCCESS) {
2286 break;
2287 }
2288 /* save this result to struct ib_wc */
2289 set_wc(&ibt_wc, wc + npolled);
2290 }
2291 mutex_exit(&cq->lock);
2292 ofs_lock_exit(&ofs_client->lock);
2293
2294 if (rtn == IBT_SUCCESS || rtn == IBT_CQ_EMPTY) {
2295 return (npolled);
2296 }
2297
2298 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2299 "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p => "
2300 "ibt_poll_cq failed w/ %d, npolled = %d",
2301 cq, num_entries, wc, rtn, npolled);
2302
2303 switch (rtn) {
2304 case IBT_HCA_HDL_INVALID:
2305 case IBT_CQ_HDL_INVALID:
2306 case IBT_INVALID_PARAM:
2307 return (-EINVAL);
2308 default:
2309 return (-EIO);
2310 }
2311 }
2312
2313 ibt_hca_hdl_t
ib_get_ibt_hca_hdl(struct ib_device * device)2314 ib_get_ibt_hca_hdl(struct ib_device *device)
2315 {
2316 return (device->hca_hdl);
2317 }
2318
2319 ibt_channel_hdl_t
ib_get_ibt_channel_hdl(struct rdma_cm_id * cm)2320 ib_get_ibt_channel_hdl(struct rdma_cm_id *cm)
2321 {
2322 return (cm->qp == NULL ? NULL : cm->qp->ibt_qp);
2323 }
2324