1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /* Solaris Open Fabric kernel verbs */
26
27 #include <sys/types.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/modctl.h>
31 #include <sys/ib/clients/of/rdma/ib_verbs.h>
32 #include <sys/ib/clients/of/rdma/ib_addr.h>
33 #include <sys/ib/clients/of/rdma/rdma_cm.h>
34 #include <sys/ib/clients/of/sol_ofs/sol_kverb_impl.h>
35
36 static void *statep;
37 char *sol_kverbs_dbg_str = "sol_kverbs";
38
39 static llist_head_t client_list = LLIST_HEAD_INIT(client_list);
40 kmutex_t clist_lock; /* mutex for client_list */
41
42 static void ofs_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
43 ibt_async_event_t *);
44
45 /*
46 * set ibt_client_t members. clnt->ib_client must be set before
47 * this func is called.
48 */
49 static int
alloc_ibt_client(ofs_client_t * clnt)50 alloc_ibt_client(ofs_client_t *clnt)
51 {
52 int namelen;
53 ASSERT(clnt->ib_client != NULL);
54
55 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
56 "alloc_ibt_client: client: 0x%p", clnt);
57
58 /*
59 * double-check the name string. if it's longer than MAXNAMELEN
60 * including the string terminator, assuming the name is invalid,
61 * return EINVAL.
62 */
63 namelen = strlen(clnt->ib_client->name);
64 if (namelen >= MAXNAMELEN) {
65 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
66 "alloc_ibt_client: client: 0x%p => "
67 "namelen(%d) is larger than MAXNAMELEN", clnt, namelen);
68 return (-EINVAL);
69 }
70 clnt->ibt_client.mi_clnt_name = kmem_zalloc(namelen + 1, KM_NOSLEEP);
71 if (clnt->ibt_client.mi_clnt_name == NULL) {
72 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
73 "alloc_ibt_client: client: 0x%p => "
74 "no sufficient memory", clnt);
75 return (-ENOMEM);
76 }
77 bcopy(clnt->ib_client->name, clnt->ibt_client.mi_clnt_name, namelen);
78 clnt->ibt_client.mi_ibt_version = IBTI_V_CURR;
79 if (clnt->ib_client->dip) {
80 clnt->ibt_client.mi_clnt_class = IBT_GENERIC;
81 } else {
82 clnt->ibt_client.mi_clnt_class = IBT_GENERIC_MISC;
83 }
84 clnt->ibt_client.mi_async_handler = ofs_async_handler;
85
86 return (0);
87 }
88
89 static void
free_ibt_client(ofs_client_t * clnt)90 free_ibt_client(ofs_client_t *clnt)
91 {
92 int namelen = strlen(clnt->ib_client->name);
93 ASSERT(namelen < MAXNAMELEN);
94
95 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
96 "free_ibt_client: client: 0x%p", clnt);
97
98 kmem_free(clnt->ibt_client.mi_clnt_name, namelen + 1);
99 clnt->ibt_client.mi_clnt_name = NULL;
100 }
101
102 /*
103 * get_device() returns a pointer to struct ib_devcie with
104 * the same guid as one passed to the function.
105 */
106 static ib_device_t *
get_device(ofs_client_t * ofs_client,ib_guid_t guid)107 get_device(ofs_client_t *ofs_client, ib_guid_t guid)
108 {
109 ib_device_t *device;
110 llist_head_t *entry;
111
112 ASSERT(RW_LOCK_HELD(&ofs_client->lock));
113
114 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
115 "get_device: client: 0x%p, guid:0x%p",
116 ofs_client, (void *)(uintptr_t)htonll(guid));
117
118 list_for_each(entry, &ofs_client->device_list) {
119 device = entry->ptr;
120 if (device->node_guid == htonll(guid)) {
121 ASSERT(device->reg_state == IB_DEV_CLOSE);
122 ASSERT(device->node_type == RDMA_NODE_IB_CA);
123 ASSERT(device->clnt_hdl == (ofs_client_p_t)ofs_client);
124 return (device);
125 }
126 }
127
128 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
129 "get_device: client: 0x%p, guid:0x%p => no match guid",
130 ofs_client, (void *)(uintptr_t)htonll(guid));
131
132 return (NULL);
133 }
134
135 /*
136 * ofs_async_handler() is a delegated function to handle asynchrnonous events,
137 * which dispatches each event to corresponding qp/cq handlers registered
138 * with ib_create_qp() and/or ib_create_cq().
139 */
140 static void
ofs_async_handler(void * clntp,ibt_hca_hdl_t hdl,ibt_async_code_t code,ibt_async_event_t * event)141 ofs_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
142 ibt_async_event_t *event)
143 {
144 ofs_client_t *ofs_client = (ofs_client_t *)clntp;
145 struct ib_event ib_event;
146 struct ib_qp *qpp;
147 struct ib_cq *cqp;
148
149
150 ASSERT(ofs_client != NULL);
151
152 cqp = event->ev_cq_hdl ? ibt_get_cq_private(event->ev_cq_hdl) : NULL;
153 qpp = event->ev_chan_hdl ?
154 ibt_get_qp_private(event->ev_chan_hdl) : NULL;
155
156 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
157 "ofs_async_handler: client: 0x%p, hca_hdl: 0x%p, code:0x%x, "
158 "event->qp: 0x%p, event->cq: 0x%p, event->srq: 0x%p "
159 "event->guid: 0x%p, event->port: 0x%x",
160 clntp, hdl, code, qpp, cqp, event->ev_srq_hdl,
161 (void *)(uintptr_t)event->ev_hca_guid, event->ev_port);
162
163 bzero(&ib_event, sizeof (struct ib_event));
164 switch (code) {
165 case IBT_EVENT_PATH_MIGRATED:
166 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
167 IB_EVENT_PATH_MIG);
168 return;
169 case IBT_EVENT_SQD:
170 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
171 IB_EVENT_SQ_DRAINED);
172 return;
173 case IBT_EVENT_COM_EST:
174 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
175 IB_EVENT_COMM_EST);
176 return;
177 case IBT_ERROR_CATASTROPHIC_CHAN:
178 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
179 IB_EVENT_QP_FATAL);
180 return;
181 case IBT_ERROR_INVALID_REQUEST_CHAN:
182 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
183 IB_EVENT_QP_REQ_ERR);
184 return;
185 case IBT_ERROR_ACCESS_VIOLATION_CHAN:
186 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
187 IB_EVENT_QP_ACCESS_ERR);
188 return;
189 case IBT_ERROR_PATH_MIGRATE_REQ:
190 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
191 IB_EVENT_PATH_MIG);
192 return;
193 case IBT_EVENT_EMPTY_CHAN:
194 FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
195 IB_EVENT_QP_LAST_WQE_REACHED);
196 return;
197 case IBT_ERROR_CQ:
198 FIRE_CQ_EVENT(ofs_client, hdl, ib_event, cqp,
199 IB_EVENT_CQ_ERR);
200 return;
201 case IBT_HCA_ATTACH_EVENT:
202 {
203 ib_device_t *device;
204 int rtn;
205
206 /* re-use the device once it was created */
207 rw_enter(&ofs_client->lock, RW_WRITER);
208 device = get_device(ofs_client, event->ev_hca_guid);
209 if (device == NULL) {
210 device = kmem_alloc(sizeof (ib_device_t), KM_SLEEP);
211 device->node_type = RDMA_NODE_IB_CA;
212 device->reg_state = IB_DEV_CLOSE;
213 device->clnt_hdl = (ofs_client_p_t)ofs_client;
214 device->node_guid = htonll(event->ev_hca_guid);
215 device->data = NULL;
216 /* add this HCA */
217 ofs_client->hca_num++;
218 llist_head_init(&device->list, device);
219 llist_add_tail(&device->list, &ofs_client->device_list);
220 }
221 device->hca_hdl = NULL;
222 device->local_dma_lkey = 0;
223 device->phys_port_cnt = 0;
224
225 /* open this HCA */
226 rtn = ibt_open_hca(ofs_client->ibt_hdl, event->ev_hca_guid,
227 &device->hca_hdl);
228 if (rtn == IBT_SUCCESS) {
229 ibt_hca_attr_t hattr;
230
231 ofs_client->hca_open_num++;
232 device->reg_state = IB_DEV_OPEN;
233 ibt_set_hca_private(device->hca_hdl, device);
234
235 rtn = ibt_query_hca(device->hca_hdl, &hattr);
236 if (rtn != IBT_SUCCESS) {
237 device->reg_state = IB_DEV_CLOSE;
238 rtn = ibt_close_hca(device->hca_hdl);
239 ASSERT(rtn == IBT_SUCCESS);
240 ofs_client->hca_open_num--;
241 return;
242 }
243
244 (void) sprintf(device->name, "%x:%x:%x",
245 hattr.hca_vendor_id, hattr.hca_device_id,
246 hattr.hca_version_id);
247 device->local_dma_lkey = hattr.hca_reserved_lkey;
248 device->phys_port_cnt = hattr.hca_nports;
249 ibt_set_hca_private(device->hca_hdl, device);
250
251 /* invoke client's callback */
252 if (ofs_client->ib_client->add) {
253 ofs_client->ib_client->add(device);
254 }
255 }
256 rw_exit(&ofs_client->lock);
257
258 return;
259 }
260 case IBT_HCA_DETACH_EVENT:
261 {
262 struct ib_device *device;
263
264 rw_enter(&ofs_client->lock, RW_WRITER);
265 device = ibt_get_hca_private(hdl);
266 if (device->reg_state == IB_DEV_OPEN) {
267 ibt_status_t rtn;
268 /* invoke client's callback */
269 if (ofs_client->ib_client->remove) {
270 ofs_client->ib_client->remove(device);
271 }
272 /* change the state only */
273 device->reg_state = IB_DEV_CLOSE;
274 /* close this HCA */
275 rtn = ibt_close_hca(device->hca_hdl);
276 ASSERT(rtn == IBT_SUCCESS);
277 ofs_client->hca_open_num--;
278 }
279 rw_exit(&ofs_client->lock);
280
281 return;
282 }
283 case IBT_EVENT_LIMIT_REACHED_SRQ:
284 case IBT_ERROR_CATASTROPHIC_SRQ:
285 default:
286 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
287 "sol_ofs does not support this event(0x%x).\n"
288 "\t clntp=0x%p, hca_hdl=0x%p, code=%d, eventp=0x%p\n",
289 code, clntp, hdl, code, event);
290 return;
291 }
292 }
293
294 /*
295 * ib_register_client - Register an IB client
296 * @client:Client to register
297 *
298 * Upper level users of the IB drivers can use ib_register_client() to
299 * register callbacks for IB device addition and removal. When an IB
300 * device is added, each registered client's add method will be called
301 * (in the order the clients were registered), and when a device is
302 * removed, each client's remove method will be called (in the reverse
303 * order that clients were registered). In addition, when
304 * ib_register_client() is called, the client will receive an add
305 * callback for all devices already registered.
306 *
307 * Note that struct ib_client should have a dip pointer to the client,
308 * which is different from the Linux implementation.
309 */
310 int
ib_register_client(struct ib_client * client)311 ib_register_client(struct ib_client *client)
312 {
313 uint_t i, nhcas; /* number of HCAs */
314 ib_guid_t *guidp;
315 ofs_client_t *ofs_client;
316 llist_head_t *entry, *tmp;
317 ib_device_t *device;
318 int rtn;
319
320 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
321 "ib_register_client: client: 0x%p", client);
322
323 /* get the number of HCAs on this system */
324 if ((nhcas = ibt_get_hca_list(&guidp)) == 0) {
325 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
326 "ib_register_client: client: 0x%p => no HCA", client);
327 return (-ENXIO);
328 }
329
330 /* allocate a new sol_ofs_client structure */
331 ofs_client = kmem_zalloc(sizeof (ofs_client_t), KM_NOSLEEP);
332 if (ofs_client == NULL) {
333 (void) ibt_free_hca_list(guidp, nhcas);
334 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
335 "ib_register_client: client: 0x%p => "
336 "no sufficient memory for ofs_client", client);
337 return (-ENOMEM);
338 }
339
340 /* set members */
341 ofs_client->ib_client = client;
342 if ((rtn = alloc_ibt_client(ofs_client)) != 0) {
343 kmem_free(ofs_client, sizeof (ofs_client_t));
344 (void) ibt_free_hca_list(guidp, nhcas);
345 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
346 "ib_register_client: client: 0x%p => "
347 "alloc_ibt_client failed w/ 0x%x", client, rtn);
348 return (rtn);
349 }
350 ofs_client->state = IB_OFS_CLNT_INITIALIZED;
351 llist_head_init(&ofs_client->device_list, NULL);
352 llist_head_init(&ofs_client->client_list, ofs_client);
353 rw_init(&ofs_client->lock, NULL, RW_DEFAULT, NULL);
354
355 /* initialize IB client */
356 rw_enter(&ofs_client->lock, RW_WRITER);
357 if (client->state != IB_CLNT_UNINITIALIZED) {
358 rw_exit(&ofs_client->lock);
359 kmem_free(ofs_client, sizeof (ofs_client_t));
360 (void) ibt_free_hca_list(guidp, nhcas);
361 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
362 "ib_register_client: client: 0x%p => "
363 "invalid client state(%d)", client, client->state);
364 return (-EPERM);
365 }
366
367 /* attach this client to IBTF */
368 rtn = ibt_attach(&ofs_client->ibt_client, client->dip, ofs_client,
369 &ofs_client->ibt_hdl);
370 if (rtn != IBT_SUCCESS) {
371 rw_exit(&ofs_client->lock);
372 free_ibt_client(ofs_client);
373 kmem_free(ofs_client, sizeof (ofs_client_t));
374 (void) ibt_free_hca_list(guidp, nhcas);
375 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
376 "ib_register_client: client: 0x%p => "
377 "ibt_attach failed w/ 0x%x", client, rtn);
378 return (-EINVAL);
379 }
380 client->clnt_hdl = (ofs_client_p_t)ofs_client;
381 client->state = IB_CLNT_INITIALIZED;
382
383 /* link this client */
384 mutex_enter(&clist_lock);
385 llist_add_tail(&ofs_client->client_list, &client_list);
386 mutex_exit(&clist_lock);
387
388 /* Open HCAs */
389 ofs_client->hca_num = nhcas;
390 for (i = 0; i < ofs_client->hca_num; i++) {
391 /* allocate the ib_device structure */
392 device = kmem_zalloc(sizeof (ib_device_t), KM_NOSLEEP);
393 if (device == NULL) {
394 rtn = -ENOMEM;
395 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
396 "ib_register_client: client: 0x%p => "
397 "no sufficient memory for ib_device", client);
398 goto err;
399 }
400 device->node_guid = htonll(guidp[i]);
401 device->node_type = RDMA_NODE_IB_CA;
402 device->reg_state = IB_DEV_CLOSE;
403 device->clnt_hdl = (ofs_client_p_t)ofs_client;
404 llist_head_init(&device->list, device);
405 llist_add_tail(&device->list, &ofs_client->device_list);
406
407 rtn = ibt_open_hca(ofs_client->ibt_hdl, guidp[i],
408 &device->hca_hdl);
409 if (rtn == IBT_SUCCESS) {
410 ibt_hca_attr_t hattr;
411
412 ofs_client->hca_open_num++;
413 device->reg_state = IB_DEV_OPEN;
414
415 rtn = ibt_query_hca(device->hca_hdl, &hattr);
416 if (rtn != IBT_SUCCESS) {
417 rtn = -EIO;
418 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
419 "ib_register_client: client: 0x%p,"
420 "hca_hdl: 0x%p ==> "
421 "ibt_query_hca() failed w/ %d",
422 client, device->hca_hdl, rtn);
423 goto err;
424 }
425
426 (void) sprintf(device->name, "%x:%x:%x",
427 hattr.hca_vendor_id, hattr.hca_device_id,
428 hattr.hca_version_id);
429 device->local_dma_lkey = hattr.hca_reserved_lkey;
430 device->phys_port_cnt = hattr.hca_nports;
431 ibt_set_hca_private(device->hca_hdl, device);
432
433 /* invoke client's callback */
434 if (client->add) {
435 client->add(device);
436 }
437 }
438 }
439 if (ofs_client->hca_open_num == 0) {
440 rtn = -ENXIO;
441 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
442 "ib_register_client: client: 0x%p => "
443 "no available HCA", client);
444 goto err;
445 }
446 rw_exit(&ofs_client->lock);
447
448 (void) ibt_free_hca_list(guidp, nhcas);
449 return (0);
450
451 err:
452 /* first close all open HCAs */
453 list_for_each(entry, &ofs_client->device_list) {
454 device = entry->ptr;
455 /*
456 * If it's open already, close it after the remove
457 * callback.
458 */
459 if (device->reg_state == IB_DEV_OPEN) {
460 ibt_status_t rtn;
461 /* invoke client's callback */
462 if (client->remove) {
463 client->remove(device);
464 }
465 device->reg_state = IB_DEV_CLOSE;
466 rtn = ibt_close_hca(device->hca_hdl);
467 ASSERT(rtn == IBT_SUCCESS);
468 ofs_client->hca_open_num--;
469 }
470 }
471 ASSERT(ofs_client->hca_open_num == 0);
472
473 /* then free the devices */
474 list_for_each_safe(entry, tmp, &ofs_client->device_list) {
475 device = entry->ptr;
476 /* de-link and free the device */
477 llist_del(entry);
478 kmem_free(device, sizeof (ib_device_t));
479 ofs_client->hca_num--;
480 }
481 ASSERT(ofs_client->hca_num == 0);
482
483 /* delink this client */
484 mutex_enter(&clist_lock);
485 llist_del(&ofs_client->client_list);
486 mutex_exit(&clist_lock);
487
488 /* detach the client */
489 client->clnt_hdl = NULL;
490 client->state = IB_CLNT_UNINITIALIZED;
491 (void) ibt_detach(ofs_client->ibt_hdl);
492 rw_exit(&ofs_client->lock);
493
494 /* free sol_ofs_client */
495 free_ibt_client(ofs_client);
496 kmem_free(ofs_client, sizeof (ofs_client_t));
497
498 (void) ibt_free_hca_list(guidp, nhcas);
499 return (rtn);
500 }
501
502 /*
503 * ib_unregister_client - Unregister an IB client
504 * @client:Client to unregister
505 *
506 * Upper level users use ib_unregister_client() to remove their client
507 * registration. When ib_unregister_client() is called, the client
508 * will receive a remove callback for each IB device still registered.
509 */
510 void
ib_unregister_client(struct ib_client * client)511 ib_unregister_client(struct ib_client *client)
512 {
513 ofs_client_t *ofs_client;
514 ib_device_t *device;
515 llist_head_t *entry, *tmp;
516
517 ASSERT(client->state == IB_CLNT_INITIALIZED &&
518 client->clnt_hdl != NULL);
519
520 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
521 "ib_unregister_client: client: 0x%p", client);
522
523 ofs_client = (ofs_client_t *)client->clnt_hdl;
524 rw_enter(&ofs_client->lock, RW_WRITER);
525
526 /* first close all open HCAs */
527 list_for_each(entry, &ofs_client->device_list) {
528 device = entry->ptr;
529 /*
530 * If it's open already, close it after the remove
531 * callback.
532 */
533 if (device->reg_state == IB_DEV_OPEN) {
534 ibt_status_t rtn;
535 /* invoke client's callback */
536 if (client->remove) {
537 client->remove(device);
538 }
539 device->reg_state = IB_DEV_CLOSE;
540 rtn = ibt_close_hca(device->hca_hdl);
541 if (rtn != IBT_SUCCESS)
542 SOL_OFS_DPRINTF_L3(
543 sol_kverbs_dbg_str,
544 "ib_unregister_client(%p) - "
545 "ibt_close_hca failed %d",
546 client, rtn);
547
548 ofs_client->hca_open_num--;
549 }
550 }
551 ASSERT(ofs_client->hca_open_num == 0);
552
553 /* then free the devices */
554 list_for_each_safe(entry, tmp, &ofs_client->device_list) {
555 device = entry->ptr;
556 /* de-link and free the device */
557 llist_del(entry);
558 kmem_free(device, sizeof (ib_device_t));
559 ofs_client->hca_num--;
560 }
561 ASSERT(ofs_client->hca_num == 0);
562
563 /* delink this client */
564 mutex_enter(&clist_lock);
565 llist_del(&ofs_client->client_list);
566 mutex_exit(&clist_lock);
567
568 /* detach the client */
569 client->clnt_hdl = NULL;
570 client->state = IB_CLNT_UNINITIALIZED;
571 (void) ibt_detach(ofs_client->ibt_hdl);
572 rw_exit(&ofs_client->lock);
573
574 /* free sol_ofs_client */
575 free_ibt_client(ofs_client);
576 kmem_free(ofs_client, sizeof (ofs_client_t));
577 }
578
579 /*
580 * ofs_lock_enter() and ofs_lock_exit() are used to avoid the recursive
581 * rwlock while the client callbacks are invoked.
582 *
583 * Note that the writer lock is used only in the client callback case,
584 * so that the kverb functions wanting to acquire the reader lock can
585 * safely ignore the reader lock if the writer lock is already held.
586 * The writer lock shouldn't be used in no other plances.
587 */
588 static inline void
ofs_lock_enter(krwlock_t * lock)589 ofs_lock_enter(krwlock_t *lock)
590 {
591 if (!RW_WRITE_HELD(lock)) {
592 rw_enter(lock, RW_READER);
593 }
594 }
595
596 static inline void
ofs_lock_exit(krwlock_t * lock)597 ofs_lock_exit(krwlock_t *lock)
598 {
599 if (!RW_WRITE_HELD(lock)) {
600 rw_exit(lock);
601 }
602 }
603
604 /*
605 * ib_get_client_data - Get IB client context
606 * @device:Device to get context for
607 * @client:Client to get context for
608 *
609 * ib_get_client_data() returns client context set with
610 * ib_set_client_data() and returns NULL if it's not found.
611 */
ib_get_client_data(struct ib_device * device,struct ib_client * client)612 void *ib_get_client_data(struct ib_device *device,
613 struct ib_client *client)
614 {
615 ofs_client_t *ofs_client;
616 struct ib_device *ib_device;
617 boolean_t found = B_FALSE;
618 llist_head_t *entry;
619 void *data;
620
621 ASSERT(device != 0 && client != 0);
622
623 ofs_client = (ofs_client_t *)client->clnt_hdl;
624 if (ofs_client == 0) {
625 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
626 "ib_get_client_data: device: 0x%p, client: 0x%p => "
627 "no ofs_client", device, client);
628 return (NULL);
629 }
630
631 ofs_lock_enter(&ofs_client->lock);
632 list_for_each(entry, &ofs_client->device_list) {
633 ib_device = entry->ptr;
634 if (ib_device->node_guid == device->node_guid) {
635 found = B_TRUE;
636 break;
637 }
638 }
639 if (!found) {
640 ofs_lock_exit(&ofs_client->lock);
641 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
642 "ib_get_client_data: device: 0x%p, client: 0x%p => "
643 "no ib_device found", device, client);
644 return (NULL);
645 }
646 data = ib_device->data;
647 ofs_lock_exit(&ofs_client->lock);
648
649 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
650 "ib_get_client_data: device: 0x%p, client: 0x%p",
651 device, client);
652
653 return (data);
654 }
655
656 /*
657 * ib_set_client_data - Set IB client context
658 * @device:Device to set context for
659 * @client:Client to set context for
660 * @data:Context to set
661 *
662 * ib_set_client_data() sets client context that can be retrieved with
663 * ib_get_client_data(). If the specified device is not found, the function
664 * returns w/o any operations.
665 */
ib_set_client_data(struct ib_device * device,struct ib_client * client,void * data)666 void ib_set_client_data(struct ib_device *device, struct ib_client *client,
667 void *data)
668 {
669 ofs_client_t *ofs_client;
670 struct ib_device *ib_device;
671 boolean_t found = B_FALSE;
672 llist_head_t *entry;
673
674 ASSERT(device != 0 && client != 0);
675
676 ofs_client = (ofs_client_t *)client->clnt_hdl;
677 if (ofs_client == 0) {
678 cmn_err(CE_WARN, "No client context found for %s/%s\n",
679 device->name, client->name);
680 return;
681 }
682
683 ofs_lock_enter(&ofs_client->lock);
684 list_for_each(entry, &ofs_client->device_list) {
685 ib_device = entry->ptr;
686 if (ib_device->node_guid == device->node_guid) {
687 found = B_TRUE;
688 break;
689 }
690 }
691 if (!found) {
692 cmn_err(CE_WARN, "No client context found for %s/%s\n",
693 device->name, client->name);
694 ofs_lock_exit(&ofs_client->lock);
695 return;
696 }
697 ib_device->data = data;
698 ofs_lock_exit(&ofs_client->lock);
699
700 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
701 "ib_set_client_data: device: 0x%p, client: 0x%p, "
702 "data: 0x%p", device, client, data);
703 }
704
705 /*
706 * ib_query_device - Query IB device attributes
707 * @device:Device to query
708 * @device_attr:Device attributes
709 *
710 * ib_query_device() returns the attributes of a device through the
711 * @device_attr pointer.
712 */
713 int
ib_query_device(struct ib_device * device,struct ib_device_attr * attr)714 ib_query_device(struct ib_device *device, struct ib_device_attr *attr)
715 {
716 ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl;
717 ibt_hca_attr_t hattr;
718 int rtn;
719
720 ofs_lock_enter(&ofs_client->lock);
721 if (device->reg_state != IB_DEV_OPEN) {
722 ofs_lock_exit(&ofs_client->lock);
723 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
724 "ib_query_device: device: 0x%p => "
725 "invalid device state (%d)", device, device->reg_state);
726 return (-ENXIO);
727 }
728 if ((rtn = ibt_query_hca(device->hca_hdl, &hattr)) != IBT_SUCCESS) {
729 ofs_lock_exit(&ofs_client->lock);
730 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
731 "ib_query_device: device: 0x%p => "
732 "ibt_query_hca failed w/ 0x%x", device, rtn);
733 return (-EIO);
734 }
735 ofs_lock_exit(&ofs_client->lock);
736
737 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
738 "ib_query_device: device: 0x%p, attr: 0x%p, rtn: 0x%p",
739 device, attr, rtn);
740
741 /* OF order is major.micro.minor, so keep it here */
742 attr->fw_ver = (uint64_t)hattr.hca_fw_major_version << 32 |
743 hattr.hca_fw_micro_version << 16 & 0xFFFF0000 |
744 hattr.hca_fw_minor_version & 0xFFFF;
745
746 attr->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
747 IB_DEVICE_PORT_ACTIVE_EVENT |
748 IB_DEVICE_SYS_IMAGE_GUID |
749 IB_DEVICE_RC_RNR_NAK_GEN;
750 if (hattr.hca_flags & IBT_HCA_PKEY_CNTR) {
751 attr->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
752 }
753 if (hattr.hca_flags & IBT_HCA_QKEY_CNTR) {
754 attr->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
755 }
756 if (hattr.hca_flags & IBT_HCA_AUTO_PATH_MIG) {
757 attr->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
758 }
759 if (hattr.hca_flags & IBT_HCA_AH_PORT_CHECK) {
760 attr->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
761 }
762
763 attr->vendor_id = hattr.hca_vendor_id;
764 attr->vendor_part_id = hattr.hca_device_id;
765 attr->hw_ver = hattr.hca_version_id;
766 attr->sys_image_guid = htonll(hattr.hca_si_guid);
767 attr->max_mr_size = ~0ull;
768 attr->page_size_cap = IBTF2OF_PGSZ(hattr.hca_page_sz);
769 attr->max_qp = hattr.hca_max_qp;
770 attr->max_qp_wr = hattr.hca_max_qp_sz;
771 attr->max_sge = hattr.hca_max_sgl;
772 attr->max_sge_rd = hattr.hca_max_rd_sgl;
773 attr->max_cq = hattr.hca_max_cq;
774 attr->max_cqe = hattr.hca_max_cq_sz;
775 attr->max_mr = hattr.hca_max_memr;
776 attr->max_pd = hattr.hca_max_pd;
777 attr->max_qp_rd_atom = hattr.hca_max_rdma_in_qp;
778 attr->max_qp_init_rd_atom = hattr.hca_max_rdma_in_qp;
779 attr->max_ee_rd_atom = hattr.hca_max_rdma_in_ee;
780 attr->max_ee_init_rd_atom = hattr.hca_max_rdma_in_ee;
781 attr->max_res_rd_atom = hattr.hca_max_rsc;
782 attr->max_srq = hattr.hca_max_srqs;
783 attr->max_srq_wr = hattr.hca_max_srqs_sz -1;
784 attr->max_srq_sge = hattr.hca_max_srq_sgl;
785 attr->local_ca_ack_delay = hattr.hca_local_ack_delay;
786 attr->atomic_cap = hattr.hca_flags & IBT_HCA_ATOMICS_GLOBAL ?
787 IB_ATOMIC_GLOB : (hattr.hca_flags & IBT_HCA_ATOMICS_HCA ?
788 IB_ATOMIC_HCA : IB_ATOMIC_NONE);
789 attr->max_ee = hattr.hca_max_eec;
790 attr->max_rdd = hattr.hca_max_rdd;
791 attr->max_mw = hattr.hca_max_mem_win;
792 attr->max_pkeys = hattr.hca_max_port_pkey_tbl_sz;
793 attr->max_raw_ipv6_qp = hattr.hca_max_ipv6_qp;
794 attr->max_raw_ethy_qp = hattr.hca_max_ether_qp;
795 attr->max_mcast_grp = hattr.hca_max_mcg;
796 attr->max_mcast_qp_attach = hattr.hca_max_qp_per_mcg;
797 attr->max_total_mcast_qp_attach = hattr.hca_max_mcg_qps;
798 attr->max_ah = hattr.hca_max_ah;
799 attr->max_fmr = hattr.hca_max_fmrs;
800 attr->max_map_per_fmr = hattr.hca_opaque9; /* hca_max_map_per_fmr */
801
802 return (0);
803 }
804
805 /* Protection domains */
806 struct ib_pd *
ib_alloc_pd(struct ib_device * device)807 ib_alloc_pd(struct ib_device *device)
808 {
809 ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl;
810 struct ib_pd *pd;
811 int rtn;
812
813 if ((pd = kmem_alloc(sizeof (struct ib_pd), KM_NOSLEEP)) == NULL) {
814 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
815 "ib_alloc_pd: device: 0x%p => no sufficient memory",
816 device);
817 return ((struct ib_pd *)-ENOMEM);
818 }
819
820 ofs_lock_enter(&ofs_client->lock);
821 if (device->reg_state != IB_DEV_OPEN) {
822 ofs_lock_exit(&ofs_client->lock);
823 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
824 "ib_alloc_pd: device: 0x%p => invalid device state (%d)",
825 device, device->reg_state);
826 kmem_free(pd, sizeof (struct ib_pd));
827 return ((struct ib_pd *)-ENXIO);
828 }
829
830 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
831 "ib_alloc_pd: device: 0x%p", device);
832
833 rtn = ibt_alloc_pd(device->hca_hdl, IBT_PD_NO_FLAGS, &pd->ibt_pd);
834 ofs_lock_exit(&ofs_client->lock);
835
836 if (rtn == IBT_SUCCESS) {
837 pd->device = device;
838 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
839 "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p, "
840 "rtn: 0x%x", device, pd, pd->ibt_pd, rtn);
841 return (pd);
842 }
843
844 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
845 "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p => "
846 "ibt_alloc_pd failed w/ 0x%x", device, pd, pd->ibt_pd, rtn);
847 kmem_free(pd, sizeof (struct ib_pd));
848
849 switch (rtn) {
850 case IBT_INSUFF_RESOURCE:
851 return ((struct ib_pd *)-ENOMEM);
852 case IBT_HCA_HDL_INVALID:
853 return ((struct ib_pd *)-EFAULT);
854 default:
855 return ((struct ib_pd *)-EIO);
856 }
857 }
858
859 int
ib_dealloc_pd(struct ib_pd * pd)860 ib_dealloc_pd(struct ib_pd *pd)
861 {
862 ofs_client_t *ofs_client = (ofs_client_t *)pd->device->clnt_hdl;
863 int rtn;
864
865 ofs_lock_enter(&ofs_client->lock);
866 if (pd->device->reg_state != IB_DEV_OPEN) {
867 ofs_lock_exit(&ofs_client->lock);
868 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
869 "ib_dealloc_pd: pd: 0x%p => invalid device state (%d)",
870 pd, pd->device->reg_state);
871 return (-ENXIO);
872 }
873
874 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
875 "ib_dealloc_pd: pd: 0x%p", pd);
876
877 rtn = ibt_free_pd(pd->device->hca_hdl, pd->ibt_pd);
878 ofs_lock_exit(&ofs_client->lock);
879
880 if (rtn == IBT_SUCCESS) {
881 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
882 "ib_dealloc_pd: pd: 0x%p, device: 0x%p, ibt_pd: 0x%p, "
883 "rtn: 0x%x", pd, pd->device, pd->ibt_pd, rtn);
884 kmem_free(pd, sizeof (struct ib_pd));
885 return (0);
886 }
887
888 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
889 "ib_dealloc_pd: pd: 0x%p => ibt_free_pd failed w/ 0x%x",
890 pd, rtn);
891
892 switch (rtn) {
893 case IBT_PD_IN_USE:
894 return (-EBUSY);
895 case IBT_HCA_HDL_INVALID:
896 return (-EFAULT);
897 default:
898 return (-EIO);
899 }
900 }
901
902 /*
903 * ofs_cq_handler() is a delegated function to handle CQ events,
904 * which dispatches them to corresponding cq handlers registered
905 * with ib_create_cq().
906 */
907 static void
ofs_cq_handler(ibt_cq_hdl_t ibt_cq,void * arg)908 ofs_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
909 {
910 struct ib_cq *cq = (struct ib_cq *)ibt_get_cq_private(ibt_cq);
911
912 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
913 "ofs_cq_handler: ibt_cq: 0x%p, ib_cq: 0x%p, comp_handler: 0x%p, "
914 "arg: 0x%p", ibt_cq, cq, cq->comp_handler, arg);
915
916 if (cq->comp_handler) {
917 cq->comp_handler(cq, cq->cq_context);
918 }
919 }
920
921 /*
922 * ib_create_cq - Creates a CQ on the specified device.
923 * @device: The device on which to create the CQ.
924 * @comp_handler: A user-specified callback that is invoked when a
925 * completion event occurs on the CQ.
926 * @event_handler: A user-specified callback that is invoked when an
927 * asynchronous event not associated with a completion occurs on the CQ.
928 * @cq_context: Context associated with the CQ returned to the user via
929 * the associated completion and event handlers.
930 * @cqe: The minimum size of the CQ.
931 * @comp_vector - Completion vector used to signal completion events.
932 * Must be >= 0 and < context->num_comp_vectors.
933 *
934 * Users can examine the cq structure to determine the actual CQ size.
935 *
936 * Note that comp_vector is not supported currently.
937 */
938 struct ib_cq *
ib_create_cq(struct ib_device * device,ib_comp_handler comp_handler,void (* event_handler)(struct ib_event *,void *),void * cq_context,int cqe,void * comp_vector)939 ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler,
940 void (*event_handler)(struct ib_event *, void *), void *cq_context,
941 int cqe, void *comp_vector)
942 {
943 ofs_client_t *ofs_client = (ofs_client_t *)device->clnt_hdl;
944 ibt_cq_attr_t cq_attr;
945 uint32_t real_size;
946 struct ib_cq *cq;
947 int rtn;
948
949 if ((cq = kmem_alloc(sizeof (struct ib_cq), KM_NOSLEEP)) == NULL) {
950 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
951 "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
952 "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
953 "comp_vector: %p => no sufficient memory", device,
954 comp_handler, event_handler, cq_context, cqe, comp_vector);
955 return ((struct ib_cq *)-ENOMEM);
956 }
957
958 ofs_lock_enter(&ofs_client->lock);
959 if (device->reg_state != IB_DEV_OPEN) {
960 ofs_lock_exit(&ofs_client->lock);
961 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
962 "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
963 "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
964 "comp_vector: %p => invalid device state (%d)", device,
965 comp_handler, event_handler, cq_context, cqe, comp_vector,
966 device->reg_state);
967 kmem_free(cq, sizeof (struct ib_cq));
968 return ((struct ib_cq *)-ENXIO);
969 }
970
971 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
972 "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
973 "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
974 "comp_vector: %d", device, comp_handler, event_handler,
975 cq_context, cqe, comp_vector);
976
977 cq_attr.cq_size = cqe;
978 cq_attr.cq_sched = comp_vector;
979 cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
980 rtn = ibt_alloc_cq(device->hca_hdl, &cq_attr, &cq->ibt_cq, &real_size);
981 ofs_lock_exit(&ofs_client->lock);
982
983 if (rtn == IBT_SUCCESS) {
984 cq->device = device;
985 cq->comp_handler = comp_handler;
986 cq->event_handler = event_handler;
987 cq->cq_context = cq_context;
988 cq->cqe = real_size;
989 ibt_set_cq_private(cq->ibt_cq, cq);
990 ibt_set_cq_handler(cq->ibt_cq, ofs_cq_handler, cq_context);
991 mutex_init(&cq->lock, NULL, MUTEX_DEFAULT, NULL);
992 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
993 "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p, "
994 "rtn: 0x%x", device, cqe, cq->ibt_cq, rtn);
995 return (cq);
996 }
997
998 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
999 "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p => "
1000 "ibt_alloc_cq failed w/ 0x%x", device, cqe, cq->ibt_cq, rtn);
1001 kmem_free(cq, sizeof (struct ib_cq));
1002
1003 switch (rtn) {
1004 case IBT_HCA_CQ_EXCEEDED:
1005 case IBT_INVALID_PARAM:
1006 case IBT_HCA_HDL_INVALID:
1007 return ((struct ib_cq *)-EINVAL);
1008 case IBT_INSUFF_RESOURCE:
1009 return ((struct ib_cq *)-ENOMEM);
1010 default:
1011 return ((struct ib_cq *)-EIO);
1012 }
1013 }
1014
1015 int
ib_destroy_cq(struct ib_cq * cq)1016 ib_destroy_cq(struct ib_cq *cq)
1017 {
1018 ofs_client_t *ofs_client = (ofs_client_t *)cq->device->clnt_hdl;
1019 int rtn;
1020
1021 ofs_lock_enter(&ofs_client->lock);
1022 if (cq->device->reg_state != IB_DEV_OPEN) {
1023 ofs_lock_exit(&ofs_client->lock);
1024 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1025 "ib_destroy_cq: cq: 0x%p => invalid device state (%d)",
1026 cq, cq->device->reg_state);
1027 return (-ENXIO);
1028 }
1029
1030 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1031 "ib_destroy_cq: cq: 0x%p", cq);
1032
1033 /*
1034 * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed
1035 * at this moment, but yet alive for a while. Then
1036 * there is a possibility that this qp is used even after
1037 * ib_destroy_cq() is called. To distinguish this case from
1038 * others, clear ibt_qp here.
1039 */
1040 ibt_set_cq_private(cq->ibt_cq, NULL);
1041
1042 rtn = ibt_free_cq(cq->ibt_cq);
1043 if (rtn == IBT_SUCCESS) {
1044 ofs_lock_exit(&ofs_client->lock);
1045 kmem_free(cq, sizeof (struct ib_cq));
1046 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1047 "ib_destroy_cq: cq: 0x%p, rtn: 0x%x", cq, rtn);
1048 return (0);
1049 }
1050 ibt_set_cq_private(cq->ibt_cq, cq);
1051 ofs_lock_exit(&ofs_client->lock);
1052
1053 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1054 "ib_destroy_cq: cq: 0x%p => ibt_free_cq failed w/ 0x%x", cq, rtn);
1055
1056 switch (rtn) {
1057 case IBT_CQ_BUSY:
1058 return (-EBUSY);
1059 case IBT_HCA_HDL_INVALID:
1060 case IBT_CQ_HDL_INVALID:
1061 return (-EINVAL);
1062 default:
1063 return (-EIO);
1064 }
1065 }
1066
1067 struct ib_qp *
ib_create_qp(struct ib_pd * pd,struct ib_qp_init_attr * qp_init_attr)1068 ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr)
1069 {
1070 ofs_client_t *ofs_client = pd->device->clnt_hdl;
1071 ibt_qp_alloc_attr_t attrs;
1072 ibt_chan_sizes_t sizes;
1073 ib_qpn_t qpn;
1074 ibt_qp_hdl_t ibt_qp;
1075 struct ib_qp *qp;
1076 int rtn;
1077
1078 /* sanity check */
1079 if (!(qp_init_attr->send_cq && qp_init_attr->recv_cq)) {
1080 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1081 "ib_create_qp: pd: 0x%p => invalid cqs "
1082 "(send_cq=0x%p, recv_cq=0x%p)", pd,
1083 qp_init_attr->send_cq, qp_init_attr->recv_cq);
1084 return ((struct ib_qp *)-EINVAL);
1085 }
1086
1087 /* UC, Raw IPv6 and Raw Ethernet are not supported */
1088 if (qp_init_attr->qp_type == IB_QPT_UC ||
1089 qp_init_attr->qp_type == IB_QPT_RAW_IPV6 ||
1090 qp_init_attr->qp_type == IB_QPT_RAW_ETY) {
1091 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1092 "ib_create_qp: pd: 0x%p => invalid qp_type",
1093 pd, qp_init_attr->qp_type);
1094 return ((struct ib_qp *)-EINVAL);
1095 }
1096
1097 if ((qp = kmem_alloc(sizeof (struct ib_qp), KM_NOSLEEP)) == NULL) {
1098 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1099 "ib_create_qp: pd: 0x%p, init_attr: 0x%p => "
1100 "no sufficient memory", pd, qp_init_attr);
1101 return ((struct ib_qp *)-ENOMEM);
1102 }
1103
1104 ofs_lock_enter(&ofs_client->lock);
1105 if (pd->device->reg_state != IB_DEV_OPEN) {
1106 ofs_lock_exit(&ofs_client->lock);
1107 kmem_free(qp, sizeof (struct ib_qp));
1108 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1109 "ib_create_qp: pd: 0x%p, init_attr: 0x%p => "
1110 "invalid device state (%d)", pd, qp_init_attr,
1111 pd->device->reg_state);
1112 return ((struct ib_qp *)-ENXIO);
1113 }
1114
1115 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1116 "ib_create_qp: pd: 0x%p, event_handler: 0x%p, qp_context: 0x%p, "
1117 "send_cq: 0x%p, recv_cq: 0x%p, srq: 0x%p, max_send_wr: 0x%x, "
1118 "max_recv_wr: 0x%x, max_send_sge: 0x%x, max_recv_sge: 0x%x, "
1119 "max_inline_data: 0x%x, sq_sig_type: %d, qp_type: %d, "
1120 "port_num: %d",
1121 pd, qp_init_attr->event_handler, qp_init_attr->qp_context,
1122 qp_init_attr->send_cq, qp_init_attr->recv_cq, qp_init_attr->srq,
1123 qp_init_attr->cap.max_send_wr, qp_init_attr->cap.max_recv_wr,
1124 qp_init_attr->cap.max_send_sge, qp_init_attr->cap.max_recv_sge,
1125 qp_init_attr->cap.max_inline_data, qp_init_attr->sq_sig_type,
1126 qp_init_attr->qp_type, qp_init_attr->port_num);
1127
1128 attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1129 if (qp_init_attr->srq) {
1130 attrs.qp_alloc_flags |= IBT_QP_USES_SRQ;
1131 }
1132
1133 attrs.qp_flags = IBT_ALL_SIGNALED | IBT_FAST_REG_RES_LKEY;
1134 if (qp_init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) {
1135 attrs.qp_flags |= IBT_WR_SIGNALED;
1136 }
1137
1138 attrs.qp_scq_hdl = qp_init_attr->send_cq->ibt_cq;
1139 attrs.qp_rcq_hdl = qp_init_attr->recv_cq->ibt_cq;
1140 attrs.qp_pd_hdl = pd->ibt_pd;
1141
1142 attrs.qp_sizes.cs_sq = qp_init_attr->cap.max_send_wr;
1143 attrs.qp_sizes.cs_rq = qp_init_attr->cap.max_recv_wr;
1144 attrs.qp_sizes.cs_sq_sgl = qp_init_attr->cap.max_send_sge;
1145 attrs.qp_sizes.cs_rq_sgl = qp_init_attr->cap.max_recv_sge;
1146 attrs.qp_sizes.cs_inline = qp_init_attr->cap.max_inline_data;
1147
1148 switch (qp_init_attr->qp_type) {
1149 case IB_QPT_RC:
1150 rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_RC_RQP, &attrs,
1151 &sizes, &qpn, &ibt_qp);
1152 break;
1153 case IB_QPT_UD:
1154 rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_UD_RQP, &attrs,
1155 &sizes, &qpn, &ibt_qp);
1156 break;
1157 case IB_QPT_SMI:
1158 rtn = ibt_alloc_special_qp(pd->device->hca_hdl,
1159 qp_init_attr->port_num, IBT_SMI_SQP, &attrs, &sizes,
1160 &ibt_qp);
1161 break;
1162 case IB_QPT_GSI:
1163 rtn = ibt_alloc_special_qp(pd->device->hca_hdl,
1164 qp_init_attr->port_num, IBT_GSI_SQP, &attrs, &sizes,
1165 &ibt_qp);
1166 break;
1167 default:
1168 /* this should never happens */
1169 ofs_lock_exit(&ofs_client->lock);
1170 kmem_free(qp, sizeof (struct ib_qp));
1171 return ((struct ib_qp *)-EINVAL);
1172 }
1173 ofs_lock_exit(&ofs_client->lock);
1174
1175 if (rtn == IBT_SUCCESS) {
1176 /* fill in ib_qp_cap w/ the real values */
1177 qp_init_attr->cap.max_send_wr = sizes.cs_sq;
1178 qp_init_attr->cap.max_recv_wr = sizes.cs_rq;
1179 qp_init_attr->cap.max_send_sge = sizes.cs_sq_sgl;
1180 qp_init_attr->cap.max_recv_sge = sizes.cs_rq_sgl;
1181 /* max_inline_data is not supported */
1182 qp_init_attr->cap.max_inline_data = 0;
1183 /* fill in ib_qp */
1184 qp->device = pd->device;
1185 qp->pd = pd;
1186 qp->send_cq = qp_init_attr->send_cq;
1187 qp->recv_cq = qp_init_attr->recv_cq;
1188 qp->srq = qp_init_attr->srq;
1189 qp->event_handler = qp_init_attr->event_handler;
1190 qp->qp_context = qp_init_attr->qp_context;
1191 qp->qp_num = qp_init_attr->qp_type == IB_QPT_SMI ? 0 :
1192 qp_init_attr->qp_type == IB_QPT_GSI ? 1 : qpn;
1193 qp->qp_type = qp_init_attr->qp_type;
1194 qp->ibt_qp = ibt_qp;
1195 ibt_set_qp_private(qp->ibt_qp, qp);
1196 mutex_init(&qp->lock, NULL, MUTEX_DEFAULT, NULL);
1197 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1198 "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p, "
1199 "rtn: 0x%x", pd->device, pd, qp_init_attr, rtn);
1200 return (qp);
1201 }
1202 kmem_free(qp, sizeof (struct ib_qp));
1203
1204 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1205 "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p => "
1206 "ibt_alloc_(special)_qp failed w/ rtn: 0x%x", pd->device, pd,
1207 qp_init_attr, rtn);
1208
1209 switch (rtn) {
1210 case IBT_NOT_SUPPORTED:
1211 case IBT_QP_SRV_TYPE_INVALID:
1212 case IBT_CQ_HDL_INVALID:
1213 case IBT_HCA_HDL_INVALID:
1214 case IBT_INVALID_PARAM:
1215 case IBT_SRQ_HDL_INVALID:
1216 case IBT_PD_HDL_INVALID:
1217 case IBT_HCA_SGL_EXCEEDED:
1218 case IBT_HCA_WR_EXCEEDED:
1219 return ((struct ib_qp *)-EINVAL);
1220 case IBT_INSUFF_RESOURCE:
1221 return ((struct ib_qp *)-ENOMEM);
1222 default:
1223 return ((struct ib_qp *)-EIO);
1224 }
1225 }
1226
1227 int
ib_destroy_qp(struct ib_qp * qp)1228 ib_destroy_qp(struct ib_qp *qp)
1229 {
1230 ofs_client_t *ofs_client = (ofs_client_t *)qp->device->clnt_hdl;
1231 int rtn;
1232
1233 ofs_lock_enter(&ofs_client->lock);
1234 if (qp->device->reg_state != IB_DEV_OPEN) {
1235 ofs_lock_exit(&ofs_client->lock);
1236 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1237 "ib_destroy_qp: qp: 0x%p => invalid device state (%d)",
1238 qp, qp->device->reg_state);
1239 return (-ENXIO);
1240 }
1241
1242 /*
1243 * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed
1244 * at this moment, but yet alive for a while. Then
1245 * there is a possibility that this qp is used even after
1246 * ib_destroy_qp() is called. To distinguish this case from
1247 * others, clear ibt_qp here.
1248 */
1249 ibt_set_qp_private(qp->ibt_qp, NULL);
1250
1251 rtn = ibt_free_qp(qp->ibt_qp);
1252 if (rtn == IBT_SUCCESS) {
1253 ofs_lock_exit(&ofs_client->lock);
1254 kmem_free(qp, sizeof (struct ib_qp));
1255 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1256 "ib_destroy_qp: qp: 0x%p, rtn: 0x%x", qp, rtn);
1257 return (0);
1258 }
1259 ibt_set_qp_private(qp->ibt_qp, qp);
1260 ofs_lock_exit(&ofs_client->lock);
1261
1262 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1263 "ib_destroy_qp: qp: 0x%p => ibt_free_qp failed w/ 0x%x", qp, rtn);
1264
1265 switch (rtn) {
1266 case IBT_CHAN_STATE_INVALID:
1267 case IBT_HCA_HDL_INVALID:
1268 case IBT_QP_HDL_INVALID:
1269 return (-EINVAL);
1270 default:
1271 return (-EIO);
1272 }
1273 }
1274
1275 /*
1276 * ib_req_notify_cq - Request completion notification on a CQ.
1277 * @cq: The CQ to generate an event for.
1278 * @flags:
1279 * Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
1280 * to request an event on the next solicited event or next work
1281 * completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
1282 * may also be |ed in to request a hint about missed events, as
1283 * described below.
1284 *
1285 * Return Value:
1286 * < 0 means an error occurred while requesting notification
1287 * == 0 means notification was requested successfully, and if
1288 * IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
1289 * were missed and it is safe to wait for another event. In
1290 * this case is it guaranteed that any work completions added
1291 * to the CQ since the last CQ poll will trigger a completion
1292 * notification event.
1293 * > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
1294 * in. It means that the consumer must poll the CQ again to
1295 * make sure it is empty to avoid missing an event because of a
1296 * race between requesting notification and an entry being
1297 * added to the CQ. This return value means it is possible
1298 * (but not guaranteed) that a work completion has been added
1299 * to the CQ since the last poll without triggering a
1300 * completion notification event.
1301 *
1302 * Note that IB_CQ_REPORT_MISSED_EVENTS is currently not supported.
1303 */
1304 int
ib_req_notify_cq(struct ib_cq * cq,enum ib_cq_notify_flags flags)1305 ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
1306 {
1307 ibt_cq_notify_flags_t notify_type;
1308 int rtn;
1309 ofs_client_t *ofs_client = cq->device->clnt_hdl;
1310
1311 ofs_lock_enter(&ofs_client->lock);
1312 if (cq->device->reg_state != IB_DEV_OPEN) {
1313 ofs_lock_exit(&ofs_client->lock);
1314 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1315 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags);
1316 return (-ENXIO);
1317 }
1318
1319 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1320 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags);
1321
1322 switch (flags & IB_CQ_SOLICITED_MASK) {
1323 case IB_CQ_SOLICITED:
1324 notify_type = IBT_NEXT_SOLICITED;
1325 break;
1326 case IB_CQ_NEXT_COMP:
1327 notify_type = IBT_NEXT_COMPLETION;
1328 break;
1329 default:
1330 /* Currently only two flags are supported */
1331 ofs_lock_exit(&ofs_client->lock);
1332 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1333 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => invalid flag",
1334 cq, flags);
1335 return (-EINVAL);
1336 }
1337
1338 rtn = ibt_enable_cq_notify(cq->ibt_cq, notify_type);
1339 ofs_lock_exit(&ofs_client->lock);
1340
1341 if (rtn == IBT_SUCCESS) {
1342 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1343 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x rtn: 0x%x",
1344 cq, flags, rtn);
1345 return (0);
1346 }
1347
1348 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1349 "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => ibt_enable_cq_notify "
1350 "failed w/ 0x%x", cq, flags, rtn);
1351
1352 switch (rtn) {
1353 case IBT_HCA_HDL_INVALID:
1354 case IBT_CQ_HDL_INVALID:
1355 case IBT_CQ_NOTIFY_TYPE_INVALID:
1356 return (-EINVAL);
1357 default:
1358 return (-EIO);
1359 }
1360 }
1361
1362 static const struct {
1363 int valid;
1364 enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETY + 1];
1365 enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETY + 1];
1366 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
1367
1368 [IB_QPS_RESET] = {
1369 [IB_QPS_RESET] = { .valid = 1 },
1370 [IB_QPS_INIT] = {
1371 .valid = 1,
1372 .req_param = {
1373 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1374 IB_QP_QKEY),
1375 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1376 IB_QP_ACCESS_FLAGS),
1377 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1378 IB_QP_ACCESS_FLAGS),
1379 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1380 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1381 }
1382 },
1383 },
1384 [IB_QPS_INIT] = {
1385 [IB_QPS_RESET] = { .valid = 1 },
1386 [IB_QPS_ERR] = { .valid = 1 },
1387 [IB_QPS_INIT] = {
1388 .valid = 1,
1389 .opt_param = {
1390 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1391 IB_QP_QKEY),
1392 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1393 IB_QP_ACCESS_FLAGS),
1394 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1395 IB_QP_ACCESS_FLAGS),
1396 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1397 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1398 }
1399 },
1400 [IB_QPS_RTR] = {
1401 .valid = 1,
1402 .req_param = {
1403 [IB_QPT_UC] = (IB_QP_AV | IB_QP_PATH_MTU |
1404 IB_QP_DEST_QPN | IB_QP_RQ_PSN),
1405 [IB_QPT_RC] = (IB_QP_AV | IB_QP_PATH_MTU |
1406 IB_QP_DEST_QPN | IB_QP_RQ_PSN |
1407 IB_QP_MAX_DEST_RD_ATOMIC |
1408 IB_QP_MIN_RNR_TIMER),
1409 },
1410 .opt_param = {
1411 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1412 [IB_QPT_UC] = (IB_QP_ALT_PATH |
1413 IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX),
1414 [IB_QPT_RC] = (IB_QP_ALT_PATH |
1415 IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX),
1416 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1417 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1418 }
1419 }
1420 },
1421 [IB_QPS_RTR] = {
1422 [IB_QPS_RESET] = { .valid = 1 },
1423 [IB_QPS_ERR] = { .valid = 1 },
1424 [IB_QPS_RTS] = {
1425 .valid = 1,
1426 .req_param = {
1427 [IB_QPT_UD] = IB_QP_SQ_PSN,
1428 [IB_QPT_UC] = IB_QP_SQ_PSN,
1429 [IB_QPT_RC] = (IB_QP_TIMEOUT |
1430 IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
1431 IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC),
1432 [IB_QPT_SMI] = IB_QP_SQ_PSN,
1433 [IB_QPT_GSI] = IB_QP_SQ_PSN,
1434 },
1435 .opt_param = {
1436 [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1437 [IB_QPT_UC] = (IB_QP_CUR_STATE |
1438 IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1439 IB_QP_PATH_MIG_STATE),
1440 [IB_QPT_RC] = (IB_QP_CUR_STATE |
1441 IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1442 IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE),
1443 [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1444 [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1445 }
1446 }
1447 },
1448 [IB_QPS_RTS] = {
1449 [IB_QPS_RESET] = { .valid = 1 },
1450 [IB_QPS_ERR] = { .valid = 1 },
1451 [IB_QPS_RTS] = {
1452 .valid = 1,
1453 .opt_param = {
1454 [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1455 [IB_QPT_UC] = (IB_QP_CUR_STATE |
1456 IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH |
1457 IB_QP_PATH_MIG_STATE),
1458 [IB_QPT_RC] = (IB_QP_CUR_STATE |
1459 IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH |
1460 IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER),
1461 [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1462 [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1463 }
1464 },
1465 [IB_QPS_SQD] = {
1466 .valid = 1,
1467 .opt_param = {
1468 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1469 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1470 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1471 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1472 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
1473 }
1474 },
1475 },
1476 [IB_QPS_SQD] = {
1477 [IB_QPS_RESET] = { .valid = 1 },
1478 [IB_QPS_ERR] = { .valid = 1 },
1479 [IB_QPS_RTS] = {
1480 .valid = 1,
1481 .opt_param = {
1482 [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1483 [IB_QPT_UC] = (IB_QP_CUR_STATE |
1484 IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1485 IB_QP_PATH_MIG_STATE),
1486 [IB_QPT_RC] = (IB_QP_CUR_STATE |
1487 IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1488 IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE),
1489 [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1490 [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1491 }
1492 },
1493 [IB_QPS_SQD] = {
1494 .valid = 1,
1495 .opt_param = {
1496 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1497 [IB_QPT_UC] = (IB_QP_AV | IB_QP_ALT_PATH |
1498 IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX |
1499 IB_QP_PATH_MIG_STATE),
1500 [IB_QPT_RC] = (IB_QP_PORT | IB_QP_AV |
1501 IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
1502 IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC |
1503 IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH |
1504 IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX |
1505 IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE),
1506 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1507 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1508 }
1509 }
1510 },
1511 [IB_QPS_SQE] = {
1512 [IB_QPS_RESET] = { .valid = 1 },
1513 [IB_QPS_ERR] = { .valid = 1 },
1514 [IB_QPS_RTS] = {
1515 .valid = 1,
1516 .opt_param = {
1517 [IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1518 [IB_QPT_UC] = (IB_QP_CUR_STATE |
1519 IB_QP_ACCESS_FLAGS),
1520 [IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1521 [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1522 }
1523 }
1524 },
1525 [IB_QPS_ERR] = {
1526 [IB_QPS_RESET] = { .valid = 1 },
1527 [IB_QPS_ERR] = { .valid = 1 }
1528 }
1529 };
1530
1531 static inline int
ib_modify_qp_is_ok(enum ib_qp_state cur_state,enum ib_qp_state next_state,enum ib_qp_type type,enum ib_qp_attr_mask mask)1532 ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1533 enum ib_qp_type type, enum ib_qp_attr_mask mask)
1534 {
1535 enum ib_qp_attr_mask req_param, opt_param;
1536
1537 if (cur_state < 0 || cur_state > IB_QPS_ERR ||
1538 next_state < 0 || next_state > IB_QPS_ERR) {
1539 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1540 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1541 "qp_type: %d, attr_mask: 0x%x => invalid state(1)",
1542 cur_state, next_state, type, mask);
1543 return (0);
1544 }
1545
1546 if (mask & IB_QP_CUR_STATE &&
1547 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
1548 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) {
1549 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1550 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1551 "qp_type: %d, attr_mask: 0x%x => invalid state(2)",
1552 cur_state, next_state, type, mask);
1553 return (0);
1554 }
1555
1556 if (!qp_state_table[cur_state][next_state].valid) {
1557 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1558 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1559 "qp_type: %d, attr_mask: 0x%x => state is not valid",
1560 cur_state, next_state, type, mask);
1561 return (0);
1562 }
1563
1564 req_param = qp_state_table[cur_state][next_state].req_param[type];
1565 opt_param = qp_state_table[cur_state][next_state].opt_param[type];
1566
1567 if ((mask & req_param) != req_param) {
1568 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1569 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1570 "qp_type: %d, attr_mask: 0x%x => "
1571 "required param doesn't match. req_param = 0x%x",
1572 cur_state, next_state, type, mask, req_param);
1573 return (0);
1574 }
1575
1576 if (mask & ~(req_param | opt_param | IB_QP_STATE)) {
1577 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1578 "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1579 "qp_type: %d, attr_mask: 0x%x => "
1580 "unsupported options. req_param = 0x%x, opt_param = 0x%x",
1581 cur_state, next_state, type, mask, req_param, opt_param);
1582 return (0);
1583 }
1584
1585 return (1);
1586 }
1587
1588 static inline enum ib_qp_state
qp_current_state(ibt_qp_query_attr_t * qp_attr)1589 qp_current_state(ibt_qp_query_attr_t *qp_attr)
1590 {
1591 ASSERT(qp_attr->qp_info.qp_state != IBT_STATE_SQDRAIN);
1592 return (enum ib_qp_state)(qp_attr->qp_info.qp_state);
1593 }
1594
1595 static inline ibt_tran_srv_t
of2ibtf_qp_type(enum ib_qp_type type)1596 of2ibtf_qp_type(enum ib_qp_type type)
1597 {
1598 switch (type) {
1599 case IB_QPT_SMI:
1600 case IB_QPT_GSI:
1601 case IB_QPT_UD:
1602 return (IBT_UD_SRV);
1603 case IB_QPT_RC:
1604 return (IBT_RC_SRV);
1605 case IB_QPT_UC:
1606 return (IBT_UC_SRV);
1607 case IB_QPT_RAW_IPV6:
1608 return (IBT_RAWIP_SRV);
1609 case IB_QPT_RAW_ETY:
1610 default:
1611 ASSERT(type == IB_QPT_RAW_ETY);
1612 return (IBT_RAWETHER_SRV);
1613 }
1614 }
1615
1616 static inline void
set_av(struct ib_ah_attr * attr,ibt_cep_path_t * pathp)1617 set_av(struct ib_ah_attr *attr, ibt_cep_path_t *pathp)
1618 {
1619 ibt_adds_vect_t *av = &pathp->cep_adds_vect;
1620
1621 pathp->cep_hca_port_num = attr->port_num;
1622 av->av_srate = OF2IBTF_SRATE(attr->static_rate);
1623 av->av_srvl = attr->sl & 0xF;
1624 av->av_send_grh = attr->ah_flags & IB_AH_GRH ? 1 : 0;
1625
1626 if (av->av_send_grh) {
1627 av->av_dgid.gid_prefix =
1628 attr->grh.dgid.global.subnet_prefix;
1629 av->av_dgid.gid_guid =
1630 attr->grh.dgid.global.interface_id;
1631 av->av_flow = attr->grh.flow_label & 0xFFFFF;
1632 av->av_tclass = attr->grh.traffic_class;
1633 av->av_hop = attr->grh.hop_limit;
1634 av->av_sgid_ix = attr->grh.sgid_index;
1635 }
1636 av->av_dlid = attr->dlid;
1637 av->av_src_path = attr->src_path_bits;
1638 }
1639
1640 int
ib_modify_qp(struct ib_qp * qp,struct ib_qp_attr * attr,int attr_mask)1641 ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask)
1642 {
1643 enum ib_qp_state cur_state, new_state;
1644 ibt_hca_attr_t hattr;
1645 ibt_qp_query_attr_t qp_attr;
1646 ibt_qp_info_t modify_attr;
1647 ibt_cep_modify_flags_t flags;
1648 int rtn;
1649 ofs_client_t *ofs_client = qp->device->clnt_hdl;
1650
1651 ofs_lock_enter(&ofs_client->lock);
1652 if (qp->device->reg_state != IB_DEV_OPEN) {
1653 ofs_lock_exit(&ofs_client->lock);
1654 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1655 "ib_modify_qp: qp: 0x%p => invalid device state (%d)",
1656 qp, qp->device->reg_state);
1657 return (-ENXIO);
1658 }
1659
1660 rtn = ibt_query_hca(qp->device->hca_hdl, &hattr);
1661 if (rtn != IBT_SUCCESS) {
1662 ofs_lock_exit(&ofs_client->lock);
1663 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1664 "ib_modify_qp: qp: 0x%p, hca_hdl: 0x%p => "
1665 "ibt_query_hca() failed w/ %d",
1666 qp, qp->device->hca_hdl, rtn);
1667 return (-EIO);
1668 }
1669
1670 /* only one thread per qp is allowed during the qp modification */
1671 mutex_enter(&qp->lock);
1672
1673 /* Get the current QP attributes first */
1674 bzero(&qp_attr, sizeof (ibt_qp_query_attr_t));
1675 if ((rtn = ibt_query_qp(qp->ibt_qp, &qp_attr)) != IBT_SUCCESS) {
1676 mutex_exit(&qp->lock);
1677 ofs_lock_exit(&ofs_client->lock);
1678 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1679 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1680 "ibt_query_qp failed w/ 0x%x", qp, attr, attr_mask, rtn);
1681 return (-EIO);
1682 }
1683
1684 /* Get the current and new state for this QP */
1685 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state :
1686 qp_current_state(&qp_attr);
1687 new_state = attr_mask & IB_QP_STATE ? attr->qp_state :
1688 cur_state;
1689
1690 /* Sanity check of the current/new states */
1691 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1692 /* Linux OF returns 0 in this case */
1693 mutex_exit(&qp->lock);
1694 ofs_lock_exit(&ofs_client->lock);
1695 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1696 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1697 "invalid state (both of current/new states are RESET)",
1698 qp, attr, attr_mask);
1699 return (0);
1700 }
1701
1702 /*
1703 * Check if this modification request is supported with the new
1704 * and/or current state.
1705 */
1706 if (!ib_modify_qp_is_ok(cur_state, new_state, qp->qp_type, attr_mask)) {
1707 mutex_exit(&qp->lock);
1708 ofs_lock_exit(&ofs_client->lock);
1709 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1710 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1711 "invalid arguments",
1712 qp, attr, attr_mask);
1713 return (-EINVAL);
1714 }
1715
1716 /* Sanity checks */
1717 if (attr_mask & IB_QP_PORT && (attr->port_num == 0 ||
1718 attr->port_num > hattr.hca_nports)) {
1719 mutex_exit(&qp->lock);
1720 ofs_lock_exit(&ofs_client->lock);
1721 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1722 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1723 "invalid attr->port_num(%d), max_nports(%d)",
1724 qp, attr, attr_mask, attr->port_num, hattr.hca_nports);
1725 return (-EINVAL);
1726 }
1727
1728 if (attr_mask & IB_QP_PKEY_INDEX &&
1729 attr->pkey_index >= hattr.hca_max_port_pkey_tbl_sz) {
1730 mutex_exit(&qp->lock);
1731 ofs_lock_exit(&ofs_client->lock);
1732 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1733 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1734 "invalid attr->pkey_index(%d), max_pkey_index(%d)",
1735 qp, attr, attr_mask, attr->pkey_index,
1736 hattr.hca_max_port_pkey_tbl_sz);
1737 return (-EINVAL);
1738 }
1739
1740 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1741 attr->max_rd_atomic > hattr.hca_max_rdma_out_qp) {
1742 mutex_exit(&qp->lock);
1743 ofs_lock_exit(&ofs_client->lock);
1744 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1745 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1746 "invalid attr->max_rd_atomic(0x%x), max_rdma_out_qp(0x%x)",
1747 qp, attr, attr_mask, attr->max_rd_atomic,
1748 hattr.hca_max_rdma_out_qp);
1749 return (-EINVAL);
1750 }
1751
1752 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1753 attr->max_dest_rd_atomic > hattr.hca_max_rdma_in_qp) {
1754 mutex_exit(&qp->lock);
1755 ofs_lock_exit(&ofs_client->lock);
1756 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1757 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1758 "invalid attr->max_dest_rd_atomic(0x%x), "
1759 "max_rdma_in_qp(0x%x)", qp, attr, attr_mask,
1760 attr->max_dest_rd_atomic, hattr.hca_max_rdma_in_qp);
1761 return (-EINVAL);
1762 }
1763
1764 /* copy the current setting */
1765 modify_attr = qp_attr.qp_info;
1766
1767 /*
1768 * Since it's already checked if the modification request matches
1769 * the new and/or current states, just assign both of states to
1770 * modify_attr here. The current state is required if qp_state
1771 * is RTR, but it's harmelss otherwise, so it's set always.
1772 */
1773 modify_attr.qp_current_state = OF2IBTF_STATE(cur_state);
1774 modify_attr.qp_state = OF2IBTF_STATE(new_state);
1775 modify_attr.qp_trans = of2ibtf_qp_type(qp->qp_type);
1776
1777 /* Convert OF modification requests into IBTF ones */
1778 flags = IBT_CEP_SET_STATE; /* IBTF needs IBT_CEP_SET_STATE */
1779 if (cur_state == IB_QPS_RESET &&
1780 new_state == IB_QPS_INIT) {
1781 flags |= IBT_CEP_SET_RESET_INIT;
1782 } else if (cur_state == IB_QPS_INIT &&
1783 new_state == IB_QPS_RTR) {
1784 flags |= IBT_CEP_SET_INIT_RTR;
1785 } else if (cur_state == IB_QPS_RTR &&
1786 new_state == IB_QPS_RTS) {
1787 flags |= IBT_CEP_SET_RTR_RTS;
1788 }
1789 if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
1790 flags |= IBT_CEP_SET_SQD_EVENT;
1791 }
1792 if (attr_mask & IB_QP_ACCESS_FLAGS) {
1793 modify_attr.qp_flags &= ~(IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
1794 IBT_CEP_ATOMIC);
1795 if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) {
1796 flags |= IBT_CEP_SET_RDMA_R;
1797 modify_attr.qp_flags |= IBT_CEP_RDMA_RD;
1798 }
1799 if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) {
1800 flags |= IBT_CEP_SET_RDMA_W;
1801 modify_attr.qp_flags |= IBT_CEP_RDMA_WR;
1802 }
1803 if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
1804 flags |= IBT_CEP_SET_ATOMIC;
1805 modify_attr.qp_flags |= IBT_CEP_ATOMIC;
1806 }
1807 }
1808 if (attr_mask & IB_QP_PKEY_INDEX) {
1809 flags |= IBT_CEP_SET_PKEY_IX;
1810 switch (qp->qp_type) {
1811 case IB_QPT_SMI:
1812 case IB_QPT_GSI:
1813 case IB_QPT_UD:
1814 modify_attr.qp_transport.ud.ud_pkey_ix =
1815 attr->pkey_index;
1816 break;
1817 case IB_QPT_RC:
1818 modify_attr.qp_transport.rc.rc_path.cep_pkey_ix =
1819 attr->pkey_index;
1820 break;
1821 case IB_QPT_UC:
1822 modify_attr.qp_transport.uc.uc_path.cep_pkey_ix =
1823 attr->pkey_index;
1824 break;
1825 default:
1826 /* This should never happen */
1827 mutex_exit(&qp->lock);
1828 ofs_lock_exit(&ofs_client->lock);
1829 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1830 "ib_modify_qp(IB_QP_PKEY_INDEX): qp: 0x%p, "
1831 "attr: 0x%p, attr_mask: 0x%x => "
1832 "invalid qp->qp_type(%d)",
1833 qp, attr, attr_mask, qp->qp_type);
1834 return (-EINVAL);
1835 }
1836 }
1837 if (attr_mask & IB_QP_PORT) {
1838 flags |= IBT_CEP_SET_PORT;
1839 switch (qp->qp_type) {
1840 case IB_QPT_SMI:
1841 case IB_QPT_GSI:
1842 case IB_QPT_UD:
1843 modify_attr.qp_transport.ud.ud_port = attr->port_num;
1844 break;
1845 case IB_QPT_RC:
1846 modify_attr.qp_transport.rc.rc_path.cep_hca_port_num =
1847 attr->port_num;
1848 break;
1849 case IB_QPT_UC:
1850 modify_attr.qp_transport.uc.uc_path.cep_hca_port_num =
1851 attr->port_num;
1852 break;
1853 default:
1854 /* This should never happen */
1855 mutex_exit(&qp->lock);
1856 ofs_lock_exit(&ofs_client->lock);
1857 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1858 "ib_modify_qp(IB_QP_PORT): qp: 0x%p, "
1859 "attr: 0x%p, attr_mask: 0x%x => "
1860 "invalid qp->qp_type(%d)",
1861 qp, attr, attr_mask, qp->qp_type);
1862 return (-EINVAL);
1863 }
1864 }
1865 if (attr_mask & IB_QP_QKEY) {
1866 ASSERT(qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_SMI ||
1867 qp->qp_type == IB_QPT_GSI);
1868 flags |= IBT_CEP_SET_QKEY;
1869 modify_attr.qp_transport.ud.ud_qkey = attr->qkey;
1870 }
1871 if (attr_mask & IB_QP_AV) {
1872 flags |= IBT_CEP_SET_ADDS_VECT;
1873 switch (qp->qp_type) {
1874 case IB_QPT_RC:
1875 set_av(&attr->ah_attr,
1876 &modify_attr.qp_transport.rc.rc_path);
1877 break;
1878 case IB_QPT_UC:
1879 set_av(&attr->ah_attr,
1880 &modify_attr.qp_transport.uc.uc_path);
1881 break;
1882 case IB_QPT_SMI:
1883 case IB_QPT_GSI:
1884 case IB_QPT_UD:
1885 default:
1886 /* This should never happen */
1887 mutex_exit(&qp->lock);
1888 ofs_lock_exit(&ofs_client->lock);
1889 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1890 "ib_modify_qp(IB_QP_AV): qp: 0x%p, "
1891 "attr: 0x%p, attr_mask: 0x%x => "
1892 "invalid qp->qp_type(%d)",
1893 qp, attr, attr_mask, qp->qp_type);
1894 return (-EINVAL);
1895 }
1896 }
1897 if (attr_mask & IB_QP_PATH_MTU) {
1898 switch (qp->qp_type) {
1899 case IB_QPT_RC:
1900 modify_attr.qp_transport.rc.rc_path_mtu =
1901 OF2IBTF_PATH_MTU(attr->path_mtu);
1902 break;
1903 case IB_QPT_UC:
1904 modify_attr.qp_transport.uc.uc_path_mtu =
1905 OF2IBTF_PATH_MTU(attr->path_mtu);
1906 break;
1907 case IB_QPT_SMI:
1908 case IB_QPT_GSI:
1909 case IB_QPT_UD:
1910 default:
1911 /* nothing to do */
1912 break;
1913 }
1914 }
1915 if (attr_mask & IB_QP_TIMEOUT && qp->qp_type == IB_QPT_RC) {
1916 flags |= IBT_CEP_SET_TIMEOUT;
1917 modify_attr.qp_transport.rc.rc_path.cep_timeout =
1918 attr->timeout;
1919 }
1920 if (attr_mask & IB_QP_RETRY_CNT && qp->qp_type == IB_QPT_RC) {
1921 flags |= IBT_CEP_SET_RETRY;
1922 modify_attr.qp_transport.rc.rc_retry_cnt =
1923 attr->retry_cnt & 0x7;
1924 }
1925 if (attr_mask & IB_QP_RNR_RETRY && qp->qp_type == IB_QPT_RC) {
1926 flags |= IBT_CEP_SET_RNR_NAK_RETRY;
1927 modify_attr.qp_transport.rc.rc_rnr_retry_cnt =
1928 attr->rnr_retry & 0x7;
1929 }
1930 if (attr_mask & IB_QP_RQ_PSN) {
1931 switch (qp->qp_type) {
1932 case IB_QPT_RC:
1933 modify_attr.qp_transport.rc.rc_rq_psn =
1934 attr->rq_psn & 0xFFFFFF;
1935 break;
1936 case IB_QPT_UC:
1937 modify_attr.qp_transport.uc.uc_rq_psn =
1938 attr->rq_psn & 0xFFFFFF;
1939 break;
1940 case IB_QPT_SMI:
1941 case IB_QPT_GSI:
1942 case IB_QPT_UD:
1943 default:
1944 /* nothing to do */
1945 break;
1946 }
1947 }
1948 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && qp->qp_type == IB_QPT_RC) {
1949 if (attr->max_rd_atomic) {
1950 flags |= IBT_CEP_SET_RDMARA_OUT;
1951 modify_attr.qp_transport.rc.rc_rdma_ra_out =
1952 attr->max_rd_atomic;
1953 }
1954 }
1955 if (attr_mask & IB_QP_ALT_PATH) {
1956 /* Sanity checks */
1957 if (attr->alt_port_num == 0 ||
1958 attr->alt_port_num > hattr.hca_nports) {
1959 mutex_exit(&qp->lock);
1960 ofs_lock_exit(&ofs_client->lock);
1961 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1962 "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
1963 "attr_mask: 0x%x => invalid attr->alt_port_num"
1964 "(%d), max_nports(%d)",
1965 qp, attr, attr_mask, attr->alt_port_num,
1966 hattr.hca_nports);
1967 return (-EINVAL);
1968 }
1969 if (attr->alt_pkey_index >= hattr.hca_max_port_pkey_tbl_sz) {
1970 mutex_exit(&qp->lock);
1971 ofs_lock_exit(&ofs_client->lock);
1972 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1973 "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
1974 "attr_mask: 0x%x => invalid attr->alt_pkey_index"
1975 "(%d), max_port_key_index(%d)",
1976 qp, attr, attr_mask, attr->alt_pkey_index,
1977 hattr.hca_max_port_pkey_tbl_sz);
1978 return (-EINVAL);
1979 }
1980 flags |= IBT_CEP_SET_ALT_PATH;
1981 switch (qp->qp_type) {
1982 case IB_QPT_RC:
1983 modify_attr.qp_transport.rc.rc_alt_path.
1984 cep_pkey_ix = attr->alt_pkey_index;
1985 modify_attr.qp_transport.rc.rc_alt_path.
1986 cep_hca_port_num = attr->alt_port_num;
1987 set_av(&attr->alt_ah_attr,
1988 &modify_attr.qp_transport.rc.rc_alt_path);
1989 modify_attr.qp_transport.rc.rc_alt_path.
1990 cep_timeout = attr->alt_timeout;
1991 break;
1992 case IB_QPT_UC:
1993 modify_attr.qp_transport.uc.uc_alt_path.
1994 cep_pkey_ix = attr->alt_pkey_index;
1995 modify_attr.qp_transport.uc.uc_alt_path.
1996 cep_hca_port_num = attr->alt_port_num;
1997 set_av(&attr->alt_ah_attr,
1998 &modify_attr.qp_transport.uc.uc_alt_path);
1999 modify_attr.qp_transport.uc.uc_alt_path.
2000 cep_timeout = attr->alt_timeout;
2001 break;
2002 case IB_QPT_SMI:
2003 case IB_QPT_GSI:
2004 case IB_QPT_UD:
2005 default:
2006 /* This should never happen */
2007 mutex_exit(&qp->lock);
2008 ofs_lock_exit(&ofs_client->lock);
2009 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2010 "ib_modify_qp(IB_QP_ALT_PATH): qp: 0x%p, "
2011 "attr: 0x%p, attr_mask: 0x%x => "
2012 "invalid qp->qp_type(%d)",
2013 qp, attr, attr_mask, qp->qp_type);
2014 return (-EINVAL);
2015 }
2016 }
2017 if (attr_mask & IB_QP_MIN_RNR_TIMER && qp->qp_type == IB_QPT_RC) {
2018 flags |= IBT_CEP_SET_MIN_RNR_NAK;
2019 modify_attr.qp_transport.rc.rc_min_rnr_nak =
2020 attr->min_rnr_timer & 0x1F;
2021 }
2022 if (attr_mask & IB_QP_SQ_PSN) {
2023 switch (qp->qp_type) {
2024 case IB_QPT_SMI:
2025 case IB_QPT_GSI:
2026 case IB_QPT_UD:
2027 modify_attr.qp_transport.ud.ud_sq_psn =
2028 attr->sq_psn;
2029 break;
2030 case IB_QPT_RC:
2031 modify_attr.qp_transport.rc.rc_sq_psn =
2032 attr->sq_psn;
2033 break;
2034 case IB_QPT_UC:
2035 modify_attr.qp_transport.uc.uc_sq_psn =
2036 attr->sq_psn;
2037 break;
2038 default:
2039 /* This should never happen */
2040 mutex_exit(&qp->lock);
2041 ofs_lock_exit(&ofs_client->lock);
2042 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2043 "ib_modify_qp(IB_QP_SQ_PSN): qp: 0x%p, "
2044 "attr: 0x%p, attr_mask: 0x%x => "
2045 "invalid qp->qp_type(%d)",
2046 qp, attr, attr_mask, qp->qp_type);
2047 return (-EINVAL);
2048 }
2049 }
2050 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && qp->qp_type == IB_QPT_RC) {
2051 /* Linux OF sets the value if max_dest_rd_atomic is not zero */
2052 if (attr->max_dest_rd_atomic) {
2053 flags |= IBT_CEP_SET_RDMARA_IN;
2054 modify_attr.qp_transport.rc.rc_rdma_ra_in =
2055 attr->max_dest_rd_atomic;
2056 }
2057 }
2058 if (attr_mask & IB_QP_PATH_MIG_STATE) {
2059 flags |= IBT_CEP_SET_MIG;
2060 switch (qp->qp_type) {
2061 case IB_QPT_RC:
2062 modify_attr.qp_transport.rc.rc_mig_state =
2063 OF2IBTF_PATH_MIG_STATE(attr->path_mig_state);
2064 break;
2065 case IB_QPT_UC:
2066 modify_attr.qp_transport.uc.uc_mig_state =
2067 OF2IBTF_PATH_MIG_STATE(attr->path_mig_state);
2068 break;
2069 case IB_QPT_SMI:
2070 case IB_QPT_GSI:
2071 case IB_QPT_UD:
2072 default:
2073 /* This should never happen */
2074 mutex_exit(&qp->lock);
2075 ofs_lock_exit(&ofs_client->lock);
2076 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2077 "ib_modify_qp(IB_QP_PATH_MIG_STATE): qp: 0x%p, "
2078 "attr: 0x%p, attr_mask: 0x%x => "
2079 "invalid qp->qp_type(%d)",
2080 qp, attr, attr_mask, qp->qp_type);
2081 return (-EINVAL);
2082 }
2083 }
2084 if (attr_mask & IB_QP_CAP) {
2085 /* IB_QP_CAP is not supported */
2086 mutex_exit(&qp->lock);
2087 ofs_lock_exit(&ofs_client->lock);
2088 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2089 "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
2090 "attr_mask: 0x%x => IB_QP_CAP is not supported",
2091 qp, attr, attr_mask);
2092 return (-EINVAL);
2093 }
2094 if (attr_mask & IB_QP_DEST_QPN) {
2095 switch (qp->qp_type) {
2096 case IB_QPT_RC:
2097 modify_attr.qp_transport.rc.rc_dst_qpn =
2098 attr->dest_qp_num;
2099 break;
2100 case IB_QPT_UC:
2101 modify_attr.qp_transport.uc.uc_dst_qpn =
2102 attr->dest_qp_num;
2103 break;
2104 case IB_QPT_SMI:
2105 case IB_QPT_GSI:
2106 case IB_QPT_UD:
2107 default:
2108 /* This should never happen */
2109 mutex_exit(&qp->lock);
2110 ofs_lock_exit(&ofs_client->lock);
2111 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2112 "ib_modify_qp(IB_QP_DEST_PSN): qp: 0x%p, "
2113 "attr: 0x%p, attr_mask: 0x%x => "
2114 "invalid qp->qp_type(%d)",
2115 qp, attr, attr_mask, qp->qp_type);
2116 return (-EINVAL);
2117 }
2118 }
2119
2120 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
2121 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x, "
2122 "flags: 0x%x, modify_attr: 0x%p",
2123 qp, attr, attr_mask, flags, &modify_attr);
2124
2125 /* Modify the QP attributes */
2126 rtn = ibt_modify_qp(qp->ibt_qp, flags, &modify_attr, NULL);
2127 if (rtn == IBT_SUCCESS) {
2128 mutex_exit(&qp->lock);
2129 ofs_lock_exit(&ofs_client->lock);
2130 return (0);
2131 }
2132 mutex_exit(&qp->lock);
2133 ofs_lock_exit(&ofs_client->lock);
2134
2135 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2136 "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
2137 "ibt_modify_qp failed w/ %d, flags: 0x%x",
2138 qp, attr, attr_mask, rtn, flags);
2139
2140 switch (rtn) {
2141 case IBT_HCA_HDL_INVALID:
2142 case IBT_QP_HDL_INVALID:
2143 case IBT_QP_SRV_TYPE_INVALID:
2144 case IBT_QP_STATE_INVALID:
2145 case IBT_HCA_PORT_INVALID:
2146 case IBT_PKEY_IX_ILLEGAL:
2147 return (-EINVAL);
2148 default:
2149 return (-EIO);
2150 }
2151 }
2152
2153 static inline enum ib_wc_status
ibt2of_wc_status(ibt_wc_status_t status)2154 ibt2of_wc_status(ibt_wc_status_t status)
2155 {
2156 switch (status) {
2157 case IBT_WC_LOCAL_LEN_ERR:
2158 return (IB_WC_LOC_LEN_ERR);
2159 case IBT_WC_LOCAL_CHAN_OP_ERR:
2160 return (IB_WC_LOC_QP_OP_ERR);
2161 case IBT_WC_LOCAL_PROTECT_ERR:
2162 return (IB_WC_LOC_PROT_ERR);
2163 case IBT_WC_WR_FLUSHED_ERR:
2164 return (IB_WC_WR_FLUSH_ERR);
2165 case IBT_WC_MEM_WIN_BIND_ERR:
2166 return (IB_WC_MW_BIND_ERR);
2167 case IBT_WC_BAD_RESPONSE_ERR:
2168 return (IB_WC_BAD_RESP_ERR);
2169 case IBT_WC_LOCAL_ACCESS_ERR:
2170 return (IB_WC_LOC_ACCESS_ERR);
2171 case IBT_WC_REMOTE_INVALID_REQ_ERR:
2172 return (IB_WC_REM_INV_REQ_ERR);
2173 case IBT_WC_REMOTE_ACCESS_ERR:
2174 return (IB_WC_REM_ACCESS_ERR);
2175 case IBT_WC_REMOTE_OP_ERR:
2176 return (IB_WC_REM_OP_ERR);
2177 case IBT_WC_TRANS_TIMEOUT_ERR:
2178 return (IB_WC_RETRY_EXC_ERR);
2179 case IBT_WC_RNR_NAK_TIMEOUT_ERR:
2180 return (IB_WC_RNR_RETRY_EXC_ERR);
2181 case IBT_WC_SUCCESS:
2182 default:
2183 /* Hermon doesn't support EEC yet */
2184 ASSERT(status == IBT_WC_SUCCESS);
2185 return (IB_WC_SUCCESS);
2186 }
2187 }
2188
2189 static inline enum ib_wc_opcode
ibt2of_wc_opcode(ibt_wrc_opcode_t wc_type)2190 ibt2of_wc_opcode(ibt_wrc_opcode_t wc_type)
2191 {
2192 switch (wc_type) {
2193 case IBT_WRC_SEND:
2194 return (IB_WC_SEND);
2195 case IBT_WRC_RDMAR:
2196 return (IB_WC_RDMA_READ);
2197 case IBT_WRC_RDMAW:
2198 return (IB_WC_RDMA_WRITE);
2199 case IBT_WRC_CSWAP:
2200 return (IB_WC_COMP_SWAP);
2201 case IBT_WRC_FADD:
2202 return (IB_WC_FETCH_ADD);
2203 case IBT_WRC_BIND:
2204 return (IB_WC_BIND_MW);
2205 case IBT_WRC_RECV:
2206 return (IB_WC_RECV);
2207 case IBT_WRC_RECV_RDMAWI:
2208 default:
2209 ASSERT(wc_type == IBT_WRC_RECV_RDMAWI);
2210 return (IB_WC_RECV_RDMA_WITH_IMM);
2211 }
2212 }
2213
2214 static inline int
ibt2of_wc_flags(ibt_wc_flags_t wc_flags)2215 ibt2of_wc_flags(ibt_wc_flags_t wc_flags)
2216 {
2217 return (wc_flags & ~IBT_WC_CKSUM_OK);
2218 }
2219
2220 static inline void
set_wc(ibt_wc_t * ibt_wc,struct ib_wc * wc)2221 set_wc(ibt_wc_t *ibt_wc, struct ib_wc *wc)
2222 {
2223 wc->wr_id = ibt_wc->wc_id;
2224 wc->status = ibt2of_wc_status(ibt_wc->wc_status);
2225 /* opcode can be undefined if status is not success */
2226 if (wc->status == IB_WC_SUCCESS) {
2227 wc->opcode = ibt2of_wc_opcode(ibt_wc->wc_type);
2228 }
2229 wc->vendor_err = 0; /* not supported */
2230 wc->byte_len = ibt_wc->wc_bytes_xfer;
2231 wc->qp = NULL; /* not supported */
2232 wc->imm_data = htonl(ibt_wc->wc_immed_data);
2233 wc->src_qp = ibt_wc->wc_qpn;
2234 wc->wc_flags = ibt2of_wc_flags(ibt_wc->wc_flags);
2235 wc->pkey_index = ibt_wc->wc_pkey_ix;
2236 wc->slid = ibt_wc->wc_slid;
2237 wc->sl = ibt_wc->wc_sl;
2238 wc->dlid_path_bits = ibt_wc->wc_path_bits;
2239 wc->port_num = 0; /* not supported */
2240 }
2241
2242 /*
2243 * ib_poll_cq - poll a CQ for completion(s)
2244 * @cq:the CQ being polled
2245 * @num_entries:maximum number of completions to return
2246 * @wc:array of at least @num_entries &struct ib_wc where completions
2247 * will be returned
2248 *
2249 * Poll a CQ for (possibly multiple) completions. If the return value
2250 * is < 0, an error occurred. If the return value is >= 0, it is the
2251 * number of completions returned. If the return value is
2252 * non-negative and < num_entries, then the CQ was emptied.
2253 *
2254 * Note that three following memebers in struct ib_wc are not supported
2255 * currently, and the values are always either 0 or NULL.
2256 * u32 vendor_err;
2257 * struct ib_qp *qp;
2258 * u8 port_num;
2259 */
2260 int
ib_poll_cq(struct ib_cq * cq,int num_entries,struct ib_wc * wc)2261 ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
2262 {
2263 ibt_wc_t ibt_wc;
2264 int npolled;
2265 ibt_status_t rtn;
2266 ofs_client_t *ofs_client = (ofs_client_t *)cq->device->clnt_hdl;
2267
2268 ofs_lock_enter(&ofs_client->lock);
2269 if (cq->device->reg_state != IB_DEV_OPEN) {
2270 ofs_lock_exit(&ofs_client->lock);
2271 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2272 "ib_poll_cq: cq: 0x%p => invalid device state (%d)",
2273 cq, cq->device->reg_state);
2274 return (-ENXIO);
2275 }
2276
2277 SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
2278 "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p, "
2279 "ibt_cq: 0x%p, ibt_wc: 0x%p",
2280 cq, num_entries, wc, cq->ibt_cq, &ibt_wc);
2281
2282 /* only one thread per cq is allowed during ibt_poll_cq() */
2283 mutex_enter(&cq->lock);
2284 for (npolled = 0; npolled < num_entries; ++npolled) {
2285 bzero(&ibt_wc, sizeof (ibt_wc_t));
2286 rtn = ibt_poll_cq(cq->ibt_cq, &ibt_wc, 1, NULL);
2287 if (rtn != IBT_SUCCESS) {
2288 break;
2289 }
2290 /* save this result to struct ib_wc */
2291 set_wc(&ibt_wc, wc + npolled);
2292 }
2293 mutex_exit(&cq->lock);
2294 ofs_lock_exit(&ofs_client->lock);
2295
2296 if (rtn == IBT_SUCCESS || rtn == IBT_CQ_EMPTY) {
2297 return (npolled);
2298 }
2299
2300 SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2301 "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p => "
2302 "ibt_poll_cq failed w/ %d, npolled = %d",
2303 cq, num_entries, wc, rtn, npolled);
2304
2305 switch (rtn) {
2306 case IBT_HCA_HDL_INVALID:
2307 case IBT_CQ_HDL_INVALID:
2308 case IBT_INVALID_PARAM:
2309 return (-EINVAL);
2310 default:
2311 return (-EIO);
2312 }
2313 }
2314
2315 ibt_hca_hdl_t
ib_get_ibt_hca_hdl(struct ib_device * device)2316 ib_get_ibt_hca_hdl(struct ib_device *device)
2317 {
2318 return (device->hca_hdl);
2319 }
2320
2321 ibt_channel_hdl_t
ib_get_ibt_channel_hdl(struct rdma_cm_id * cm)2322 ib_get_ibt_channel_hdl(struct rdma_cm_id *cm)
2323 {
2324 return (cm->qp == NULL ? NULL : cm->qp->ibt_qp);
2325 }
2326