xref: /illumos-gate/usr/src/uts/common/io/ib/clients/of/sol_ofs/sol_kverbs.c (revision 45744051679350ee063cdc366b66bee5223a11ea)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /* Solaris Open Fabric kernel verbs */
26 
27 #include <sys/types.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/modctl.h>
31 #include <sys/ib/clients/of/rdma/ib_verbs.h>
32 #include <sys/ib/clients/of/rdma/ib_addr.h>
33 #include <sys/ib/clients/of/rdma/rdma_cm.h>
34 #include <sys/ib/clients/of/sol_ofs/sol_kverb_impl.h>
35 
36 static void *statep;
37 char *sol_kverbs_dbg_str = "sol_kverbs";
38 
39 static llist_head_t client_list = LLIST_HEAD_INIT(client_list);
40 kmutex_t clist_lock; /* mutex for client_list */
41 
42 static void ofs_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
43     ibt_async_event_t *);
44 
45 /*
46  * set ibt_client_t members. clnt->ib_client must be set before
47  * this func is called.
48  */
49 static int
alloc_ibt_client(ofs_client_t * clnt)50 alloc_ibt_client(ofs_client_t *clnt)
51 {
52 	int namelen;
53 	ASSERT(clnt->ib_client != NULL);
54 
55 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
56 	    "alloc_ibt_client: client: 0x%p", clnt);
57 
58 	/*
59 	 * double-check the name string. if it's longer than MAXNAMELEN
60 	 * including the string terminator, assuming the name is invalid,
61 	 * return EINVAL.
62 	 */
63 	namelen = strlen(clnt->ib_client->name);
64 	if (namelen >= MAXNAMELEN) {
65 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
66 		    "alloc_ibt_client: client: 0x%p => "
67 		    "namelen(%d) is larger than MAXNAMELEN", clnt, namelen);
68 		return (-EINVAL);
69 	}
70 	clnt->ibt_client.mi_clnt_name = kmem_zalloc(namelen + 1, KM_NOSLEEP);
71 	if (clnt->ibt_client.mi_clnt_name == NULL) {
72 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
73 		    "alloc_ibt_client: client: 0x%p => "
74 		    "no sufficient memory", clnt);
75 		return (-ENOMEM);
76 	}
77 	bcopy(clnt->ib_client->name, clnt->ibt_client.mi_clnt_name, namelen);
78 	clnt->ibt_client.mi_ibt_version = IBTI_V_CURR;
79 	if (clnt->ib_client->dip) {
80 		clnt->ibt_client.mi_clnt_class = IBT_GENERIC;
81 	} else {
82 		clnt->ibt_client.mi_clnt_class = IBT_GENERIC_MISC;
83 	}
84 	clnt->ibt_client.mi_async_handler = ofs_async_handler;
85 
86 	return (0);
87 }
88 
89 static void
free_ibt_client(ofs_client_t * clnt)90 free_ibt_client(ofs_client_t *clnt)
91 {
92 	int namelen = strlen(clnt->ib_client->name);
93 	ASSERT(namelen < MAXNAMELEN);
94 
95 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
96 	    "free_ibt_client: client: 0x%p", clnt);
97 
98 	kmem_free(clnt->ibt_client.mi_clnt_name, namelen + 1);
99 	clnt->ibt_client.mi_clnt_name = NULL;
100 }
101 
102 /*
103  * get_device() returns a pointer to struct ib_devcie with
104  * the same guid as one passed to the function.
105  */
106 static ib_device_t *
get_device(ofs_client_t * ofs_client,ib_guid_t guid)107 get_device(ofs_client_t *ofs_client, ib_guid_t guid)
108 {
109 	ib_device_t *device;
110 	llist_head_t *entry;
111 
112 	ASSERT(RW_LOCK_HELD(&ofs_client->lock));
113 
114 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
115 	    "get_device: client: 0x%p, guid:0x%p",
116 	    ofs_client, (void *)(uintptr_t)htonll(guid));
117 
118 	list_for_each(entry, &ofs_client->device_list) {
119 		device = entry->ptr;
120 		if (device->node_guid == htonll(guid)) {
121 			ASSERT(device->reg_state == IB_DEV_CLOSE);
122 			ASSERT(device->node_type == RDMA_NODE_IB_CA);
123 			ASSERT(device->clnt_hdl == (ofs_client_p_t)ofs_client);
124 			return (device);
125 		}
126 	}
127 
128 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
129 	    "get_device: client: 0x%p, guid:0x%p => no match guid",
130 	    ofs_client, (void *)(uintptr_t)htonll(guid));
131 
132 	return (NULL);
133 }
134 
135 /*
136  * ofs_async_handler() is a delegated function to handle asynchrnonous events,
137  * which dispatches each event to corresponding qp/cq handlers registered
138  * with ib_create_qp() and/or ib_create_cq().
139  */
140 static void
ofs_async_handler(void * clntp,ibt_hca_hdl_t hdl,ibt_async_code_t code,ibt_async_event_t * event)141 ofs_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
142     ibt_async_event_t *event)
143 {
144 	ofs_client_t	*ofs_client = (ofs_client_t *)clntp;
145 	struct ib_event ib_event;
146 	struct ib_qp	*qpp;
147 	struct ib_cq	*cqp;
148 
149 
150 	ASSERT(ofs_client != NULL);
151 
152 	cqp = event->ev_cq_hdl ? ibt_get_cq_private(event->ev_cq_hdl) : NULL;
153 	qpp = event->ev_chan_hdl ?
154 	    ibt_get_qp_private(event->ev_chan_hdl) : NULL;
155 
156 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
157 	    "ofs_async_handler: client: 0x%p, hca_hdl: 0x%p, code:0x%x, "
158 	    "event->qp: 0x%p, event->cq: 0x%p, event->srq: 0x%p "
159 	    "event->guid: 0x%p, event->port: 0x%x",
160 	    clntp, hdl, code, qpp, cqp, event->ev_srq_hdl,
161 	    (void *)(uintptr_t)event->ev_hca_guid, event->ev_port);
162 
163 	bzero(&ib_event, sizeof (struct ib_event));
164 	switch (code) {
165 	case IBT_EVENT_PATH_MIGRATED:
166 		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
167 		    IB_EVENT_PATH_MIG);
168 		return;
169 	case IBT_EVENT_SQD:
170 		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
171 		    IB_EVENT_SQ_DRAINED);
172 		return;
173 	case IBT_EVENT_COM_EST:
174 		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
175 		    IB_EVENT_COMM_EST);
176 		return;
177 	case IBT_ERROR_CATASTROPHIC_CHAN:
178 		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
179 		    IB_EVENT_QP_FATAL);
180 		return;
181 	case IBT_ERROR_INVALID_REQUEST_CHAN:
182 		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
183 		    IB_EVENT_QP_REQ_ERR);
184 		return;
185 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
186 		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
187 		    IB_EVENT_QP_ACCESS_ERR);
188 		return;
189 	case IBT_ERROR_PATH_MIGRATE_REQ:
190 		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
191 		    IB_EVENT_PATH_MIG);
192 		return;
193 	case IBT_EVENT_EMPTY_CHAN:
194 		FIRE_QP_EVENT(ofs_client, hdl, ib_event, qpp,
195 		    IB_EVENT_QP_LAST_WQE_REACHED);
196 		return;
197 	case IBT_ERROR_CQ:
198 		FIRE_CQ_EVENT(ofs_client, hdl, ib_event, cqp,
199 		    IB_EVENT_CQ_ERR);
200 		return;
201 	case IBT_HCA_ATTACH_EVENT:
202 	{
203 		ib_device_t	*device;
204 		int		rtn;
205 
206 		/* re-use the device once it was created */
207 		rw_enter(&ofs_client->lock, RW_WRITER);
208 		device = get_device(ofs_client, event->ev_hca_guid);
209 		if (device == NULL) {
210 			device = kmem_alloc(sizeof (ib_device_t), KM_SLEEP);
211 			device->node_type = RDMA_NODE_IB_CA;
212 			device->reg_state = IB_DEV_CLOSE;
213 			device->clnt_hdl = (ofs_client_p_t)ofs_client;
214 			device->node_guid = htonll(event->ev_hca_guid);
215 			device->data = NULL;
216 			/* add this HCA */
217 			ofs_client->hca_num++;
218 			llist_head_init(&device->list, device);
219 			llist_add_tail(&device->list, &ofs_client->device_list);
220 		}
221 		device->hca_hdl = NULL;
222 		device->local_dma_lkey = 0;
223 		device->phys_port_cnt = 0;
224 
225 		/* open this HCA */
226 		rtn = ibt_open_hca(ofs_client->ibt_hdl, event->ev_hca_guid,
227 		    &device->hca_hdl);
228 		if (rtn == IBT_SUCCESS) {
229 			ibt_hca_attr_t hattr;
230 
231 			ofs_client->hca_open_num++;
232 			device->reg_state = IB_DEV_OPEN;
233 			ibt_set_hca_private(device->hca_hdl, device);
234 
235 			rtn = ibt_query_hca(device->hca_hdl, &hattr);
236 			if (rtn != IBT_SUCCESS) {
237 				device->reg_state = IB_DEV_CLOSE;
238 				rtn = ibt_close_hca(device->hca_hdl);
239 				ASSERT(rtn == IBT_SUCCESS);
240 				ofs_client->hca_open_num--;
241 				return;
242 			}
243 
244 			(void) sprintf(device->name, "%x:%x:%x",
245 			    hattr.hca_vendor_id, hattr.hca_device_id,
246 			    hattr.hca_version_id);
247 			device->local_dma_lkey = hattr.hca_reserved_lkey;
248 			device->phys_port_cnt = hattr.hca_nports;
249 			ibt_set_hca_private(device->hca_hdl, device);
250 
251 			/* invoke client's callback */
252 			if (ofs_client->ib_client->add) {
253 				ofs_client->ib_client->add(device);
254 			}
255 		}
256 		rw_exit(&ofs_client->lock);
257 
258 		return;
259 	}
260 	case IBT_HCA_DETACH_EVENT:
261 	{
262 		struct ib_device *device;
263 
264 		rw_enter(&ofs_client->lock, RW_WRITER);
265 		device = ibt_get_hca_private(hdl);
266 		if (device->reg_state == IB_DEV_OPEN) {
267 			ibt_status_t rtn;
268 			/* invoke client's callback */
269 			if (ofs_client->ib_client->remove) {
270 				ofs_client->ib_client->remove(device);
271 			}
272 			/* change the state only */
273 			device->reg_state = IB_DEV_CLOSE;
274 			/* close this HCA */
275 			rtn = ibt_close_hca(device->hca_hdl);
276 			ASSERT(rtn == IBT_SUCCESS);
277 			ofs_client->hca_open_num--;
278 		}
279 		rw_exit(&ofs_client->lock);
280 
281 		return;
282 	}
283 	case IBT_EVENT_LIMIT_REACHED_SRQ:
284 	case IBT_ERROR_CATASTROPHIC_SRQ:
285 	default:
286 		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
287 		    "sol_ofs does not support this event(0x%x).\n"
288 		    "\t clntp=0x%p, hca_hdl=0x%p, code=%d, eventp=0x%p\n",
289 		    code, clntp, hdl, code, event);
290 		return;
291 	}
292 }
293 
294 /*
295  * ib_register_client - Register an IB client
296  * @client:Client to register
297  *
298  * Upper level users of the IB drivers can use ib_register_client() to
299  * register callbacks for IB device addition and removal.  When an IB
300  * device is added, each registered client's add method will be called
301  * (in the order the clients were registered), and when a device is
302  * removed, each client's remove method will be called (in the reverse
303  * order that clients were registered).  In addition, when
304  * ib_register_client() is called, the client will receive an add
305  * callback for all devices already registered.
306  *
307  * Note that struct ib_client should have a dip pointer to the client,
308  * which is different from the Linux implementation.
309  */
310 int
ib_register_client(struct ib_client * client)311 ib_register_client(struct ib_client *client)
312 {
313 	uint_t		i, nhcas; /* number of HCAs */
314 	ib_guid_t	*guidp;
315 	ofs_client_t	*ofs_client;
316 	llist_head_t	*entry, *tmp;
317 	ib_device_t	*device;
318 	int		rtn;
319 
320 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
321 	    "ib_register_client: client: 0x%p", client);
322 
323 	/* get the number of HCAs on this system */
324 	if ((nhcas = ibt_get_hca_list(&guidp)) == 0) {
325 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
326 		    "ib_register_client: client: 0x%p => no HCA", client);
327 		return (-ENXIO);
328 	}
329 
330 	/* allocate a new sol_ofs_client structure */
331 	ofs_client = kmem_zalloc(sizeof (ofs_client_t), KM_NOSLEEP);
332 	if (ofs_client == NULL) {
333 		(void) ibt_free_hca_list(guidp, nhcas);
334 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
335 		    "ib_register_client: client: 0x%p => "
336 		    "no sufficient memory for ofs_client", client);
337 		return (-ENOMEM);
338 	}
339 
340 	/* set members */
341 	ofs_client->ib_client = client;
342 	if ((rtn = alloc_ibt_client(ofs_client)) != 0) {
343 		kmem_free(ofs_client, sizeof (ofs_client_t));
344 		(void) ibt_free_hca_list(guidp, nhcas);
345 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
346 		    "ib_register_client: client: 0x%p => "
347 		    "alloc_ibt_client failed w/ 0x%x", client, rtn);
348 		return (rtn);
349 	}
350 	ofs_client->state = IB_OFS_CLNT_INITIALIZED;
351 	llist_head_init(&ofs_client->device_list, NULL);
352 	llist_head_init(&ofs_client->client_list, ofs_client);
353 	rw_init(&ofs_client->lock, NULL, RW_DEFAULT, NULL);
354 
355 	/* initialize IB client */
356 	rw_enter(&ofs_client->lock, RW_WRITER);
357 	if (client->state != IB_CLNT_UNINITIALIZED) {
358 		rw_exit(&ofs_client->lock);
359 		kmem_free(ofs_client, sizeof (ofs_client_t));
360 		(void) ibt_free_hca_list(guidp, nhcas);
361 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
362 		    "ib_register_client: client: 0x%p => "
363 		    "invalid client state(%d)", client, client->state);
364 		return (-EPERM);
365 	}
366 
367 	/* attach this client to IBTF */
368 	rtn = ibt_attach(&ofs_client->ibt_client, client->dip, ofs_client,
369 	    &ofs_client->ibt_hdl);
370 	if (rtn != IBT_SUCCESS) {
371 		rw_exit(&ofs_client->lock);
372 		free_ibt_client(ofs_client);
373 		kmem_free(ofs_client, sizeof (ofs_client_t));
374 		(void) ibt_free_hca_list(guidp, nhcas);
375 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
376 		    "ib_register_client: client: 0x%p => "
377 		    "ibt_attach failed w/ 0x%x", client, rtn);
378 		return (-EINVAL);
379 	}
380 	client->clnt_hdl = (ofs_client_p_t)ofs_client;
381 	client->state = IB_CLNT_INITIALIZED;
382 
383 	/* link this client */
384 	mutex_enter(&clist_lock);
385 	llist_add_tail(&ofs_client->client_list, &client_list);
386 	mutex_exit(&clist_lock);
387 
388 	/* Open HCAs */
389 	ofs_client->hca_num = nhcas;
390 	for (i = 0; i < ofs_client->hca_num; i++) {
391 		/* allocate the ib_device structure */
392 		device = kmem_zalloc(sizeof (ib_device_t), KM_NOSLEEP);
393 		if (device == NULL) {
394 			rtn = -ENOMEM;
395 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
396 			    "ib_register_client: client: 0x%p => "
397 			    "no sufficient memory for ib_device", client);
398 			goto err;
399 		}
400 		device->node_guid = htonll(guidp[i]);
401 		device->node_type = RDMA_NODE_IB_CA;
402 		device->reg_state = IB_DEV_CLOSE;
403 		device->clnt_hdl = (ofs_client_p_t)ofs_client;
404 		llist_head_init(&device->list, device);
405 		llist_add_tail(&device->list, &ofs_client->device_list);
406 
407 		rtn = ibt_open_hca(ofs_client->ibt_hdl, guidp[i],
408 		    &device->hca_hdl);
409 		if (rtn == IBT_SUCCESS) {
410 			ibt_hca_attr_t hattr;
411 
412 			ofs_client->hca_open_num++;
413 			device->reg_state = IB_DEV_OPEN;
414 
415 			rtn = ibt_query_hca(device->hca_hdl, &hattr);
416 			if (rtn != IBT_SUCCESS) {
417 				rtn = -EIO;
418 				SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
419 				    "ib_register_client: client: 0x%p,"
420 				    "hca_hdl: 0x%p ==> "
421 				    "ibt_query_hca() failed w/ %d",
422 				    client, device->hca_hdl, rtn);
423 				goto err;
424 			}
425 
426 			(void) sprintf(device->name, "%x:%x:%x",
427 			    hattr.hca_vendor_id, hattr.hca_device_id,
428 			    hattr.hca_version_id);
429 			device->local_dma_lkey = hattr.hca_reserved_lkey;
430 			device->phys_port_cnt = hattr.hca_nports;
431 			ibt_set_hca_private(device->hca_hdl, device);
432 
433 			/* invoke client's callback */
434 			if (client->add) {
435 				client->add(device);
436 			}
437 		}
438 	}
439 	if (ofs_client->hca_open_num == 0) {
440 		rtn = -ENXIO;
441 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
442 		    "ib_register_client: client: 0x%p => "
443 		    "no available HCA", client);
444 		goto err;
445 	}
446 	rw_exit(&ofs_client->lock);
447 
448 	(void) ibt_free_hca_list(guidp, nhcas);
449 	return (0);
450 
451 err:
452 	/* first close all open HCAs */
453 	list_for_each(entry, &ofs_client->device_list) {
454 		device = entry->ptr;
455 		/*
456 		 * If it's open already, close it after the remove
457 		 * callback.
458 		 */
459 		if (device->reg_state == IB_DEV_OPEN) {
460 			ibt_status_t rtn;
461 			/* invoke client's callback */
462 			if (client->remove) {
463 				client->remove(device);
464 			}
465 			device->reg_state = IB_DEV_CLOSE;
466 			rtn = ibt_close_hca(device->hca_hdl);
467 			ASSERT(rtn == IBT_SUCCESS);
468 			ofs_client->hca_open_num--;
469 		}
470 	}
471 	ASSERT(ofs_client->hca_open_num == 0);
472 
473 	/* then free the devices */
474 	list_for_each_safe(entry, tmp, &ofs_client->device_list) {
475 		device = entry->ptr;
476 		/* de-link and free the device */
477 		llist_del(entry);
478 		kmem_free(device, sizeof (ib_device_t));
479 		ofs_client->hca_num--;
480 	}
481 	ASSERT(ofs_client->hca_num == 0);
482 
483 	/* delink this client */
484 	mutex_enter(&clist_lock);
485 	llist_del(&ofs_client->client_list);
486 	mutex_exit(&clist_lock);
487 
488 	/* detach the client */
489 	client->clnt_hdl = NULL;
490 	client->state = IB_CLNT_UNINITIALIZED;
491 	(void) ibt_detach(ofs_client->ibt_hdl);
492 	rw_exit(&ofs_client->lock);
493 
494 	/* free sol_ofs_client */
495 	free_ibt_client(ofs_client);
496 	kmem_free(ofs_client, sizeof (ofs_client_t));
497 
498 	(void) ibt_free_hca_list(guidp, nhcas);
499 	return (rtn);
500 }
501 
502 /*
503  * ib_unregister_client - Unregister an IB client
504  * @client:Client to unregister
505  *
506  * Upper level users use ib_unregister_client() to remove their client
507  * registration.  When ib_unregister_client() is called, the client
508  * will receive a remove callback for each IB device still registered.
509  */
510 void
ib_unregister_client(struct ib_client * client)511 ib_unregister_client(struct ib_client *client)
512 {
513 	ofs_client_t	*ofs_client;
514 	ib_device_t	*device;
515 	llist_head_t	*entry, *tmp;
516 
517 	ASSERT(client->state == IB_CLNT_INITIALIZED &&
518 	    client->clnt_hdl != NULL);
519 
520 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
521 	    "ib_unregister_client: client: 0x%p", client);
522 
523 	ofs_client = (ofs_client_t *)client->clnt_hdl;
524 	rw_enter(&ofs_client->lock, RW_WRITER);
525 
526 	/* first close all open HCAs */
527 	list_for_each(entry, &ofs_client->device_list) {
528 		device = entry->ptr;
529 		/*
530 		 * If it's open already, close it after the remove
531 		 * callback.
532 		 */
533 		if (device->reg_state == IB_DEV_OPEN) {
534 			ibt_status_t rtn;
535 			/* invoke client's callback */
536 			if (client->remove) {
537 				client->remove(device);
538 			}
539 			device->reg_state = IB_DEV_CLOSE;
540 			rtn = ibt_close_hca(device->hca_hdl);
541 			if (rtn != IBT_SUCCESS)
542 				SOL_OFS_DPRINTF_L3(
543 				    sol_kverbs_dbg_str,
544 				    "ib_unregister_client(%p) - "
545 				    "ibt_close_hca failed %d",
546 				    client, rtn);
547 
548 			ofs_client->hca_open_num--;
549 		}
550 	}
551 	ASSERT(ofs_client->hca_open_num == 0);
552 
553 	/* then free the devices */
554 	list_for_each_safe(entry, tmp, &ofs_client->device_list) {
555 		device = entry->ptr;
556 		/* de-link and free the device */
557 		llist_del(entry);
558 		kmem_free(device, sizeof (ib_device_t));
559 		ofs_client->hca_num--;
560 	}
561 	ASSERT(ofs_client->hca_num == 0);
562 
563 	/* delink this client */
564 	mutex_enter(&clist_lock);
565 	llist_del(&ofs_client->client_list);
566 	mutex_exit(&clist_lock);
567 
568 	/* detach the client */
569 	client->clnt_hdl = NULL;
570 	client->state = IB_CLNT_UNINITIALIZED;
571 	(void) ibt_detach(ofs_client->ibt_hdl);
572 	rw_exit(&ofs_client->lock);
573 
574 	/* free sol_ofs_client */
575 	free_ibt_client(ofs_client);
576 	kmem_free(ofs_client, sizeof (ofs_client_t));
577 }
578 
579 /*
580  * ofs_lock_enter() and ofs_lock_exit() are used to avoid the recursive
581  * rwlock while the client callbacks are invoked.
582  *
583  * Note that the writer lock is used only in the client callback case,
584  * so that the kverb functions wanting to acquire the reader lock can
585  * safely ignore the reader lock if the writer lock is already held.
586  * The writer lock shouldn't be used in no other plances.
587  */
588 static inline void
ofs_lock_enter(krwlock_t * lock)589 ofs_lock_enter(krwlock_t *lock)
590 {
591 	if (!RW_WRITE_HELD(lock)) {
592 		rw_enter(lock, RW_READER);
593 	}
594 }
595 
596 static inline void
ofs_lock_exit(krwlock_t * lock)597 ofs_lock_exit(krwlock_t *lock)
598 {
599 	if (!RW_WRITE_HELD(lock)) {
600 		rw_exit(lock);
601 	}
602 }
603 
604 /*
605  * ib_get_client_data - Get IB client context
606  * @device:Device to get context for
607  * @client:Client to get context for
608  *
609  * ib_get_client_data() returns client context set with
610  * ib_set_client_data() and returns NULL if it's not found.
611  */
ib_get_client_data(struct ib_device * device,struct ib_client * client)612 void *ib_get_client_data(struct ib_device *device,
613     struct ib_client *client)
614 {
615 	ofs_client_t		*ofs_client;
616 	struct ib_device	*ib_device;
617 	boolean_t		found = B_FALSE;
618 	llist_head_t		*entry;
619 	void			*data;
620 
621 	ASSERT(device != 0 && client != 0);
622 
623 	ofs_client = (ofs_client_t *)client->clnt_hdl;
624 	if (ofs_client == 0) {
625 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
626 		    "ib_get_client_data: device: 0x%p, client: 0x%p => "
627 		    "no ofs_client", device, client);
628 		return (NULL);
629 	}
630 
631 	ofs_lock_enter(&ofs_client->lock);
632 	list_for_each(entry, &ofs_client->device_list) {
633 		ib_device = entry->ptr;
634 		if (ib_device->node_guid == device->node_guid) {
635 			found = B_TRUE;
636 			break;
637 		}
638 	}
639 	if (!found) {
640 		ofs_lock_exit(&ofs_client->lock);
641 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
642 		    "ib_get_client_data: device: 0x%p, client: 0x%p => "
643 		    "no ib_device found", device, client);
644 		return (NULL);
645 	}
646 	data = ib_device->data;
647 	ofs_lock_exit(&ofs_client->lock);
648 
649 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
650 	    "ib_get_client_data: device: 0x%p, client: 0x%p",
651 	    device, client);
652 
653 	return (data);
654 }
655 
656 /*
657  * ib_set_client_data - Set IB client context
658  * @device:Device to set context for
659  * @client:Client to set context for
660  * @data:Context to set
661  *
662  * ib_set_client_data() sets client context that can be retrieved with
663  * ib_get_client_data(). If the specified device is not found, the function
664  * returns w/o any operations.
665  */
ib_set_client_data(struct ib_device * device,struct ib_client * client,void * data)666 void ib_set_client_data(struct ib_device *device, struct ib_client *client,
667     void *data)
668 {
669 	ofs_client_t		*ofs_client;
670 	struct ib_device	*ib_device;
671 	boolean_t		found = B_FALSE;
672 	llist_head_t		*entry;
673 
674 	ASSERT(device != 0 && client != 0);
675 
676 	ofs_client = (ofs_client_t *)client->clnt_hdl;
677 	if (ofs_client == 0) {
678 		cmn_err(CE_WARN, "No client context found for %s/%s\n",
679 		    device->name, client->name);
680 		return;
681 	}
682 
683 	ofs_lock_enter(&ofs_client->lock);
684 	list_for_each(entry, &ofs_client->device_list) {
685 		ib_device = entry->ptr;
686 		if (ib_device->node_guid == device->node_guid) {
687 			found = B_TRUE;
688 			break;
689 		}
690 	}
691 	if (!found) {
692 		cmn_err(CE_WARN, "No client context found for %s/%s\n",
693 		    device->name, client->name);
694 		ofs_lock_exit(&ofs_client->lock);
695 		return;
696 	}
697 	ib_device->data = data;
698 	ofs_lock_exit(&ofs_client->lock);
699 
700 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
701 	    "ib_set_client_data: device: 0x%p, client: 0x%p, "
702 	    "data: 0x%p", device, client, data);
703 }
704 
705 /*
706  * ib_query_device - Query IB device attributes
707  * @device:Device to query
708  * @device_attr:Device attributes
709  *
710  * ib_query_device() returns the attributes of a device through the
711  * @device_attr pointer.
712  */
713 int
ib_query_device(struct ib_device * device,struct ib_device_attr * attr)714 ib_query_device(struct ib_device *device, struct ib_device_attr *attr)
715 {
716 	ofs_client_t	*ofs_client = (ofs_client_t *)device->clnt_hdl;
717 	ibt_hca_attr_t	hattr;
718 	int		rtn;
719 
720 	ofs_lock_enter(&ofs_client->lock);
721 	if (device->reg_state != IB_DEV_OPEN) {
722 		ofs_lock_exit(&ofs_client->lock);
723 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
724 		    "ib_query_device: device: 0x%p => "
725 		    "invalid device state (%d)", device, device->reg_state);
726 		return (-ENXIO);
727 	}
728 	if ((rtn = ibt_query_hca(device->hca_hdl, &hattr)) != IBT_SUCCESS) {
729 		ofs_lock_exit(&ofs_client->lock);
730 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
731 		    "ib_query_device: device: 0x%p => "
732 		    "ibt_query_hca failed w/ 0x%x", device, rtn);
733 		return (-EIO);
734 	}
735 	ofs_lock_exit(&ofs_client->lock);
736 
737 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
738 	    "ib_query_device: device: 0x%p, attr: 0x%p, rtn: 0x%p",
739 	    device, attr, rtn);
740 
741 	/* OF order is major.micro.minor, so keep it here */
742 	attr->fw_ver = (uint64_t)hattr.hca_fw_major_version << 32	|
743 	    hattr.hca_fw_micro_version << 16 & 0xFFFF0000		|
744 	    hattr.hca_fw_minor_version & 0xFFFF;
745 
746 	attr->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT		|
747 	    IB_DEVICE_PORT_ACTIVE_EVENT					|
748 	    IB_DEVICE_SYS_IMAGE_GUID					|
749 	    IB_DEVICE_RC_RNR_NAK_GEN;
750 	if (hattr.hca_flags & IBT_HCA_PKEY_CNTR) {
751 		attr->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
752 	}
753 	if (hattr.hca_flags & IBT_HCA_QKEY_CNTR) {
754 		attr->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
755 	}
756 	if (hattr.hca_flags & IBT_HCA_AUTO_PATH_MIG) {
757 		attr->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
758 	}
759 	if (hattr.hca_flags & IBT_HCA_AH_PORT_CHECK) {
760 		attr->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
761 	}
762 
763 	attr->vendor_id		= hattr.hca_vendor_id;
764 	attr->vendor_part_id	= hattr.hca_device_id;
765 	attr->hw_ver		= hattr.hca_version_id;
766 	attr->sys_image_guid	= htonll(hattr.hca_si_guid);
767 	attr->max_mr_size	= ~0ull;
768 	attr->page_size_cap	= IBTF2OF_PGSZ(hattr.hca_page_sz);
769 	attr->max_qp		= hattr.hca_max_qp;
770 	attr->max_qp_wr		= hattr.hca_max_qp_sz;
771 	attr->max_sge		= hattr.hca_max_sgl;
772 	attr->max_sge_rd	= hattr.hca_max_rd_sgl;
773 	attr->max_cq		= hattr.hca_max_cq;
774 	attr->max_cqe		= hattr.hca_max_cq_sz;
775 	attr->max_mr		= hattr.hca_max_memr;
776 	attr->max_pd		= hattr.hca_max_pd;
777 	attr->max_qp_rd_atom	= hattr.hca_max_rdma_in_qp;
778 	attr->max_qp_init_rd_atom	= hattr.hca_max_rdma_in_qp;
779 	attr->max_ee_rd_atom	= hattr.hca_max_rdma_in_ee;
780 	attr->max_ee_init_rd_atom	= hattr.hca_max_rdma_in_ee;
781 	attr->max_res_rd_atom	= hattr.hca_max_rsc;
782 	attr->max_srq		= hattr.hca_max_srqs;
783 	attr->max_srq_wr	= hattr.hca_max_srqs_sz -1;
784 	attr->max_srq_sge	= hattr.hca_max_srq_sgl;
785 	attr->local_ca_ack_delay	= hattr.hca_local_ack_delay;
786 	attr->atomic_cap = hattr.hca_flags & IBT_HCA_ATOMICS_GLOBAL ?
787 	    IB_ATOMIC_GLOB : (hattr.hca_flags & IBT_HCA_ATOMICS_HCA ?
788 	    IB_ATOMIC_HCA : IB_ATOMIC_NONE);
789 	attr->max_ee		= hattr.hca_max_eec;
790 	attr->max_rdd		= hattr.hca_max_rdd;
791 	attr->max_mw		= hattr.hca_max_mem_win;
792 	attr->max_pkeys		= hattr.hca_max_port_pkey_tbl_sz;
793 	attr->max_raw_ipv6_qp	= hattr.hca_max_ipv6_qp;
794 	attr->max_raw_ethy_qp	= hattr.hca_max_ether_qp;
795 	attr->max_mcast_grp	= hattr.hca_max_mcg;
796 	attr->max_mcast_qp_attach	= hattr.hca_max_qp_per_mcg;
797 	attr->max_total_mcast_qp_attach = hattr.hca_max_mcg_qps;
798 	attr->max_ah		= hattr.hca_max_ah;
799 	attr->max_fmr		= hattr.hca_max_fmrs;
800 	attr->max_map_per_fmr	= hattr.hca_opaque9; /* hca_max_map_per_fmr */
801 
802 	return (0);
803 }
804 
805 /* Protection domains */
806 struct ib_pd *
ib_alloc_pd(struct ib_device * device)807 ib_alloc_pd(struct ib_device *device)
808 {
809 	ofs_client_t	*ofs_client = (ofs_client_t *)device->clnt_hdl;
810 	struct ib_pd	*pd;
811 	int		rtn;
812 
813 	if ((pd = kmem_alloc(sizeof (struct ib_pd), KM_NOSLEEP)) == NULL) {
814 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
815 		    "ib_alloc_pd: device: 0x%p => no sufficient memory",
816 		    device);
817 		return ((struct ib_pd *)-ENOMEM);
818 	}
819 
820 	ofs_lock_enter(&ofs_client->lock);
821 	if (device->reg_state != IB_DEV_OPEN) {
822 		ofs_lock_exit(&ofs_client->lock);
823 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
824 		    "ib_alloc_pd: device: 0x%p => invalid device state (%d)",
825 		    device, device->reg_state);
826 		kmem_free(pd, sizeof (struct ib_pd));
827 		return ((struct ib_pd *)-ENXIO);
828 	}
829 
830 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
831 	    "ib_alloc_pd: device: 0x%p", device);
832 
833 	rtn = ibt_alloc_pd(device->hca_hdl, IBT_PD_NO_FLAGS, &pd->ibt_pd);
834 	ofs_lock_exit(&ofs_client->lock);
835 
836 	if (rtn == IBT_SUCCESS) {
837 		pd->device = device;
838 		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
839 		    "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p, "
840 		    "rtn: 0x%x", device, pd, pd->ibt_pd, rtn);
841 		return (pd);
842 	}
843 
844 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
845 	    "ib_alloc_pd: device: 0x%p, pd: 0x%p, ibt_pd: 0x%p => "
846 	    "ibt_alloc_pd failed w/ 0x%x", device, pd, pd->ibt_pd, rtn);
847 	kmem_free(pd, sizeof (struct ib_pd));
848 
849 	switch (rtn) {
850 	case IBT_INSUFF_RESOURCE:
851 		return ((struct ib_pd *)-ENOMEM);
852 	case IBT_HCA_HDL_INVALID:
853 		return ((struct ib_pd *)-EFAULT);
854 	default:
855 		return ((struct ib_pd *)-EIO);
856 	}
857 }
858 
859 int
ib_dealloc_pd(struct ib_pd * pd)860 ib_dealloc_pd(struct ib_pd *pd)
861 {
862 	ofs_client_t *ofs_client = (ofs_client_t *)pd->device->clnt_hdl;
863 	int rtn;
864 
865 	ofs_lock_enter(&ofs_client->lock);
866 	if (pd->device->reg_state != IB_DEV_OPEN) {
867 		ofs_lock_exit(&ofs_client->lock);
868 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
869 		    "ib_dealloc_pd: pd: 0x%p => invalid device state (%d)",
870 		    pd, pd->device->reg_state);
871 		return (-ENXIO);
872 	}
873 
874 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
875 	    "ib_dealloc_pd: pd: 0x%p", pd);
876 
877 	rtn = ibt_free_pd(pd->device->hca_hdl, pd->ibt_pd);
878 	ofs_lock_exit(&ofs_client->lock);
879 
880 	if (rtn == IBT_SUCCESS) {
881 		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
882 		    "ib_dealloc_pd: pd: 0x%p, device: 0x%p, ibt_pd: 0x%p, "
883 		    "rtn: 0x%x", pd, pd->device, pd->ibt_pd, rtn);
884 		kmem_free(pd, sizeof (struct ib_pd));
885 		return (0);
886 	}
887 
888 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
889 	    "ib_dealloc_pd: pd: 0x%p => ibt_free_pd failed w/ 0x%x",
890 	    pd, rtn);
891 
892 	switch (rtn) {
893 	case IBT_PD_IN_USE:
894 		return (-EBUSY);
895 	case IBT_HCA_HDL_INVALID:
896 		return (-EFAULT);
897 	default:
898 		return (-EIO);
899 	}
900 }
901 
902 /*
903  * ofs_cq_handler() is a delegated function to handle CQ events,
904  * which dispatches them to corresponding cq handlers registered
905  * with ib_create_cq().
906  */
907 static void
ofs_cq_handler(ibt_cq_hdl_t ibt_cq,void * arg)908 ofs_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
909 {
910 	struct ib_cq *cq = (struct ib_cq *)ibt_get_cq_private(ibt_cq);
911 
912 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
913 	    "ofs_cq_handler: ibt_cq: 0x%p, ib_cq: 0x%p, comp_handler: 0x%p, "
914 	    "arg: 0x%p", ibt_cq, cq, cq->comp_handler, arg);
915 
916 	if (cq->comp_handler) {
917 		cq->comp_handler(cq, cq->cq_context);
918 	}
919 }
920 
921 /*
922  * ib_create_cq - Creates a CQ on the specified device.
923  * @device: The device on which to create the CQ.
924  * @comp_handler: A user-specified callback that is invoked when a
925  *   completion event occurs on the CQ.
926  * @event_handler: A user-specified callback that is invoked when an
927  *   asynchronous event not associated with a completion occurs on the CQ.
928  * @cq_context: Context associated with the CQ returned to the user via
929  *   the associated completion and event handlers.
930  * @cqe: The minimum size of the CQ.
931  * @comp_vector - Completion vector used to signal completion events.
932  *     Must be >= 0 and < context->num_comp_vectors.
933  *
934  * Users can examine the cq structure to determine the actual CQ size.
935  *
936  * Note that comp_vector is not supported currently.
937  */
938 struct ib_cq *
ib_create_cq(struct ib_device * device,ib_comp_handler comp_handler,void (* event_handler)(struct ib_event *,void *),void * cq_context,int cqe,void * comp_vector)939 ib_create_cq(struct ib_device *device, ib_comp_handler comp_handler,
940     void (*event_handler)(struct ib_event *, void *), void *cq_context,
941     int cqe, void *comp_vector)
942 {
943 	ofs_client_t	*ofs_client = (ofs_client_t *)device->clnt_hdl;
944 	ibt_cq_attr_t	cq_attr;
945 	uint32_t	real_size;
946 	struct ib_cq	*cq;
947 	int		rtn;
948 
949 	if ((cq = kmem_alloc(sizeof (struct ib_cq), KM_NOSLEEP)) == NULL) {
950 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
951 		    "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
952 		    "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
953 		    "comp_vector: %p => no sufficient memory", device,
954 		    comp_handler, event_handler, cq_context, cqe, comp_vector);
955 		return ((struct ib_cq *)-ENOMEM);
956 	}
957 
958 	ofs_lock_enter(&ofs_client->lock);
959 	if (device->reg_state != IB_DEV_OPEN) {
960 		ofs_lock_exit(&ofs_client->lock);
961 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
962 		    "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
963 		    "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
964 		    "comp_vector: %p => invalid device state (%d)", device,
965 		    comp_handler, event_handler, cq_context, cqe, comp_vector,
966 		    device->reg_state);
967 		kmem_free(cq, sizeof (struct ib_cq));
968 		return ((struct ib_cq *)-ENXIO);
969 	}
970 
971 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
972 	    "ib_create_cq: device: 0x%p, comp_handler: 0x%p, "
973 	    "event_handler: 0x%p, cq_context: 0x%p, cqe: 0x%x, "
974 	    "comp_vector: %d", device, comp_handler, event_handler,
975 	    cq_context, cqe, comp_vector);
976 
977 	cq_attr.cq_size = cqe;
978 	cq_attr.cq_sched = comp_vector;
979 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
980 	rtn = ibt_alloc_cq(device->hca_hdl, &cq_attr, &cq->ibt_cq, &real_size);
981 	ofs_lock_exit(&ofs_client->lock);
982 
983 	if (rtn == IBT_SUCCESS) {
984 		cq->device = device;
985 		cq->comp_handler = comp_handler;
986 		cq->event_handler = event_handler;
987 		cq->cq_context = cq_context;
988 		cq->cqe = real_size;
989 		ibt_set_cq_private(cq->ibt_cq, cq);
990 		ibt_set_cq_handler(cq->ibt_cq, ofs_cq_handler, cq_context);
991 		mutex_init(&cq->lock, NULL, MUTEX_DEFAULT, NULL);
992 		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
993 		    "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p, "
994 		    "rtn: 0x%x", device, cqe, cq->ibt_cq, rtn);
995 		return (cq);
996 	}
997 
998 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
999 	    "ib_create_cq: device: 0x%p, cqe: 0x%x, ibt_cq: 0x%p => "
1000 	    "ibt_alloc_cq failed w/ 0x%x", device, cqe, cq->ibt_cq, rtn);
1001 	kmem_free(cq, sizeof (struct ib_cq));
1002 
1003 	switch (rtn) {
1004 	case IBT_HCA_CQ_EXCEEDED:
1005 	case IBT_INVALID_PARAM:
1006 	case IBT_HCA_HDL_INVALID:
1007 		return ((struct ib_cq *)-EINVAL);
1008 	case IBT_INSUFF_RESOURCE:
1009 		return ((struct ib_cq *)-ENOMEM);
1010 	default:
1011 		return ((struct ib_cq *)-EIO);
1012 	}
1013 }
1014 
1015 int
ib_destroy_cq(struct ib_cq * cq)1016 ib_destroy_cq(struct ib_cq *cq)
1017 {
1018 	ofs_client_t	*ofs_client = (ofs_client_t *)cq->device->clnt_hdl;
1019 	int		rtn;
1020 
1021 	ofs_lock_enter(&ofs_client->lock);
1022 	if (cq->device->reg_state != IB_DEV_OPEN) {
1023 		ofs_lock_exit(&ofs_client->lock);
1024 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1025 		    "ib_destroy_cq: cq: 0x%p => invalid device state (%d)",
1026 		    cq, cq->device->reg_state);
1027 		return (-ENXIO);
1028 	}
1029 
1030 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1031 	    "ib_destroy_cq: cq: 0x%p", cq);
1032 
1033 	/*
1034 	 * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed
1035 	 * at this moment, but yet alive for a while. Then
1036 	 * there is a possibility that this qp is used even after
1037 	 * ib_destroy_cq() is called. To distinguish this case from
1038 	 * others, clear ibt_qp here.
1039 	 */
1040 	ibt_set_cq_private(cq->ibt_cq, NULL);
1041 
1042 	rtn = ibt_free_cq(cq->ibt_cq);
1043 	if (rtn == IBT_SUCCESS) {
1044 		ofs_lock_exit(&ofs_client->lock);
1045 		kmem_free(cq, sizeof (struct ib_cq));
1046 		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1047 		    "ib_destroy_cq: cq: 0x%p, rtn: 0x%x", cq, rtn);
1048 		return (0);
1049 	}
1050 	ibt_set_cq_private(cq->ibt_cq, cq);
1051 	ofs_lock_exit(&ofs_client->lock);
1052 
1053 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1054 	    "ib_destroy_cq: cq: 0x%p => ibt_free_cq failed w/ 0x%x", cq, rtn);
1055 
1056 	switch (rtn) {
1057 	case IBT_CQ_BUSY:
1058 		return (-EBUSY);
1059 	case IBT_HCA_HDL_INVALID:
1060 	case IBT_CQ_HDL_INVALID:
1061 		return (-EINVAL);
1062 	default:
1063 		return (-EIO);
1064 	}
1065 }
1066 
1067 struct ib_qp *
ib_create_qp(struct ib_pd * pd,struct ib_qp_init_attr * qp_init_attr)1068 ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr)
1069 {
1070 	ofs_client_t		*ofs_client = pd->device->clnt_hdl;
1071 	ibt_qp_alloc_attr_t	attrs;
1072 	ibt_chan_sizes_t	sizes;
1073 	ib_qpn_t		qpn;
1074 	ibt_qp_hdl_t		ibt_qp;
1075 	struct ib_qp		*qp;
1076 	int			rtn;
1077 
1078 	/* sanity check */
1079 	if (!(qp_init_attr->send_cq && qp_init_attr->recv_cq)) {
1080 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1081 		    "ib_create_qp: pd: 0x%p => invalid cqs "
1082 		    "(send_cq=0x%p, recv_cq=0x%p)", pd,
1083 		    qp_init_attr->send_cq, qp_init_attr->recv_cq);
1084 		return ((struct ib_qp *)-EINVAL);
1085 	}
1086 
1087 	/* UC, Raw IPv6 and Raw Ethernet are not supported */
1088 	if (qp_init_attr->qp_type == IB_QPT_UC ||
1089 	    qp_init_attr->qp_type == IB_QPT_RAW_IPV6 ||
1090 	    qp_init_attr->qp_type == IB_QPT_RAW_ETY) {
1091 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1092 		    "ib_create_qp: pd: 0x%p => invalid qp_type",
1093 		    pd, qp_init_attr->qp_type);
1094 		return ((struct ib_qp *)-EINVAL);
1095 	}
1096 
1097 	if ((qp = kmem_alloc(sizeof (struct ib_qp), KM_NOSLEEP)) == NULL) {
1098 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1099 		    "ib_create_qp: pd: 0x%p, init_attr: 0x%p => "
1100 		    "no sufficient memory", pd, qp_init_attr);
1101 		return ((struct ib_qp *)-ENOMEM);
1102 	}
1103 
1104 	ofs_lock_enter(&ofs_client->lock);
1105 	if (pd->device->reg_state != IB_DEV_OPEN) {
1106 		ofs_lock_exit(&ofs_client->lock);
1107 		kmem_free(qp, sizeof (struct ib_qp));
1108 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1109 		    "ib_create_qp: pd: 0x%p, init_attr: 0x%p => "
1110 		    "invalid device state (%d)", pd, qp_init_attr,
1111 		    pd->device->reg_state);
1112 		return ((struct ib_qp *)-ENXIO);
1113 	}
1114 
1115 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1116 	    "ib_create_qp: pd: 0x%p, event_handler: 0x%p, qp_context: 0x%p, "
1117 	    "send_cq: 0x%p, recv_cq: 0x%p, srq: 0x%p, max_send_wr: 0x%x, "
1118 	    "max_recv_wr: 0x%x, max_send_sge: 0x%x, max_recv_sge: 0x%x, "
1119 	    "max_inline_data: 0x%x, sq_sig_type: %d, qp_type: %d, "
1120 	    "port_num: %d",
1121 	    pd, qp_init_attr->event_handler, qp_init_attr->qp_context,
1122 	    qp_init_attr->send_cq, qp_init_attr->recv_cq, qp_init_attr->srq,
1123 	    qp_init_attr->cap.max_send_wr, qp_init_attr->cap.max_recv_wr,
1124 	    qp_init_attr->cap.max_send_sge, qp_init_attr->cap.max_recv_sge,
1125 	    qp_init_attr->cap.max_inline_data, qp_init_attr->sq_sig_type,
1126 	    qp_init_attr->qp_type, qp_init_attr->port_num);
1127 
1128 	attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1129 	if (qp_init_attr->srq) {
1130 		attrs.qp_alloc_flags |= IBT_QP_USES_SRQ;
1131 	}
1132 
1133 	attrs.qp_flags = IBT_ALL_SIGNALED | IBT_FAST_REG_RES_LKEY;
1134 	if (qp_init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) {
1135 		attrs.qp_flags |= IBT_WR_SIGNALED;
1136 	}
1137 
1138 	attrs.qp_scq_hdl = qp_init_attr->send_cq->ibt_cq;
1139 	attrs.qp_rcq_hdl = qp_init_attr->recv_cq->ibt_cq;
1140 	attrs.qp_pd_hdl = pd->ibt_pd;
1141 
1142 	attrs.qp_sizes.cs_sq = qp_init_attr->cap.max_send_wr;
1143 	attrs.qp_sizes.cs_rq = qp_init_attr->cap.max_recv_wr;
1144 	attrs.qp_sizes.cs_sq_sgl = qp_init_attr->cap.max_send_sge;
1145 	attrs.qp_sizes.cs_rq_sgl = qp_init_attr->cap.max_recv_sge;
1146 	attrs.qp_sizes.cs_inline = qp_init_attr->cap.max_inline_data;
1147 
1148 	switch (qp_init_attr->qp_type) {
1149 	case IB_QPT_RC:
1150 		rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_RC_RQP, &attrs,
1151 		    &sizes, &qpn, &ibt_qp);
1152 		break;
1153 	case IB_QPT_UD:
1154 		rtn = ibt_alloc_qp(pd->device->hca_hdl, IBT_UD_RQP, &attrs,
1155 		    &sizes, &qpn, &ibt_qp);
1156 		break;
1157 	case IB_QPT_SMI:
1158 		rtn = ibt_alloc_special_qp(pd->device->hca_hdl,
1159 		    qp_init_attr->port_num, IBT_SMI_SQP, &attrs, &sizes,
1160 		    &ibt_qp);
1161 		break;
1162 	case IB_QPT_GSI:
1163 		rtn = ibt_alloc_special_qp(pd->device->hca_hdl,
1164 		    qp_init_attr->port_num, IBT_GSI_SQP, &attrs, &sizes,
1165 		    &ibt_qp);
1166 		break;
1167 	default:
1168 		/* this should never happens */
1169 		ofs_lock_exit(&ofs_client->lock);
1170 		kmem_free(qp, sizeof (struct ib_qp));
1171 		return ((struct ib_qp *)-EINVAL);
1172 	}
1173 	ofs_lock_exit(&ofs_client->lock);
1174 
1175 	if (rtn == IBT_SUCCESS) {
1176 		/* fill in ib_qp_cap w/ the real values */
1177 		qp_init_attr->cap.max_send_wr = sizes.cs_sq;
1178 		qp_init_attr->cap.max_recv_wr = sizes.cs_rq;
1179 		qp_init_attr->cap.max_send_sge = sizes.cs_sq_sgl;
1180 		qp_init_attr->cap.max_recv_sge = sizes.cs_rq_sgl;
1181 		/* max_inline_data is not supported */
1182 		qp_init_attr->cap.max_inline_data = 0;
1183 		/* fill in ib_qp */
1184 		qp->device = pd->device;
1185 		qp->pd = pd;
1186 		qp->send_cq = qp_init_attr->send_cq;
1187 		qp->recv_cq = qp_init_attr->recv_cq;
1188 		qp->srq = qp_init_attr->srq;
1189 		qp->event_handler = qp_init_attr->event_handler;
1190 		qp->qp_context = qp_init_attr->qp_context;
1191 		qp->qp_num = qp_init_attr->qp_type == IB_QPT_SMI ? 0 :
1192 		    qp_init_attr->qp_type == IB_QPT_GSI ? 1 : qpn;
1193 		qp->qp_type = qp_init_attr->qp_type;
1194 		qp->ibt_qp = ibt_qp;
1195 		ibt_set_qp_private(qp->ibt_qp, qp);
1196 		mutex_init(&qp->lock, NULL, MUTEX_DEFAULT, NULL);
1197 		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1198 		    "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p, "
1199 		    "rtn: 0x%x", pd->device, pd, qp_init_attr, rtn);
1200 		return (qp);
1201 	}
1202 	kmem_free(qp, sizeof (struct ib_qp));
1203 
1204 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1205 	    "ib_create_qp: device: 0x%p, pd: 0x%x, init_attr: 0x%p => "
1206 	    "ibt_alloc_(special)_qp failed w/ rtn: 0x%x", pd->device, pd,
1207 	    qp_init_attr, rtn);
1208 
1209 	switch (rtn) {
1210 	case IBT_NOT_SUPPORTED:
1211 	case IBT_QP_SRV_TYPE_INVALID:
1212 	case IBT_CQ_HDL_INVALID:
1213 	case IBT_HCA_HDL_INVALID:
1214 	case IBT_INVALID_PARAM:
1215 	case IBT_SRQ_HDL_INVALID:
1216 	case IBT_PD_HDL_INVALID:
1217 	case IBT_HCA_SGL_EXCEEDED:
1218 	case IBT_HCA_WR_EXCEEDED:
1219 		return ((struct ib_qp *)-EINVAL);
1220 	case IBT_INSUFF_RESOURCE:
1221 		return ((struct ib_qp *)-ENOMEM);
1222 	default:
1223 		return ((struct ib_qp *)-EIO);
1224 	}
1225 }
1226 
1227 int
ib_destroy_qp(struct ib_qp * qp)1228 ib_destroy_qp(struct ib_qp *qp)
1229 {
1230 	ofs_client_t	*ofs_client = (ofs_client_t *)qp->device->clnt_hdl;
1231 	int		rtn;
1232 
1233 	ofs_lock_enter(&ofs_client->lock);
1234 	if (qp->device->reg_state != IB_DEV_OPEN) {
1235 		ofs_lock_exit(&ofs_client->lock);
1236 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1237 		    "ib_destroy_qp: qp: 0x%p => invalid device state (%d)",
1238 		    qp, qp->device->reg_state);
1239 		return (-ENXIO);
1240 	}
1241 
1242 	/*
1243 	 * if IBTL_ASYNC_PENDING is set, ibt_qp is not freed
1244 	 * at this moment, but yet alive for a while. Then
1245 	 * there is a possibility that this qp is used even after
1246 	 * ib_destroy_qp() is called. To distinguish this case from
1247 	 * others, clear ibt_qp here.
1248 	 */
1249 	ibt_set_qp_private(qp->ibt_qp, NULL);
1250 
1251 	rtn = ibt_free_qp(qp->ibt_qp);
1252 	if (rtn == IBT_SUCCESS) {
1253 		ofs_lock_exit(&ofs_client->lock);
1254 		kmem_free(qp, sizeof (struct ib_qp));
1255 		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1256 		    "ib_destroy_qp: qp: 0x%p, rtn: 0x%x", qp, rtn);
1257 		return (0);
1258 	}
1259 	ibt_set_qp_private(qp->ibt_qp, qp);
1260 	ofs_lock_exit(&ofs_client->lock);
1261 
1262 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1263 	    "ib_destroy_qp: qp: 0x%p => ibt_free_qp failed w/ 0x%x", qp, rtn);
1264 
1265 	switch (rtn) {
1266 	case IBT_CHAN_STATE_INVALID:
1267 	case IBT_HCA_HDL_INVALID:
1268 	case IBT_QP_HDL_INVALID:
1269 		return (-EINVAL);
1270 	default:
1271 		return (-EIO);
1272 	}
1273 }
1274 
1275 /*
1276  * ib_req_notify_cq - Request completion notification on a CQ.
1277  * @cq: The CQ to generate an event for.
1278  * @flags:
1279  *   Must contain exactly one of %IB_CQ_SOLICITED or %IB_CQ_NEXT_COMP
1280  *   to request an event on the next solicited event or next work
1281  *   completion at any type, respectively. %IB_CQ_REPORT_MISSED_EVENTS
1282  *   may also be |ed in to request a hint about missed events, as
1283  *   described below.
1284  *
1285  * Return Value:
1286  *    < 0 means an error occurred while requesting notification
1287  *   == 0 means notification was requested successfully, and if
1288  *        IB_CQ_REPORT_MISSED_EVENTS was passed in, then no events
1289  *        were missed and it is safe to wait for another event.  In
1290  *        this case is it guaranteed that any work completions added
1291  *        to the CQ since the last CQ poll will trigger a completion
1292  *        notification event.
1293  *    > 0 is only returned if IB_CQ_REPORT_MISSED_EVENTS was passed
1294  *        in.  It means that the consumer must poll the CQ again to
1295  *        make sure it is empty to avoid missing an event because of a
1296  *        race between requesting notification and an entry being
1297  *        added to the CQ.  This return value means it is possible
1298  *        (but not guaranteed) that a work completion has been added
1299  *        to the CQ since the last poll without triggering a
1300  *        completion notification event.
1301  *
1302  * Note that IB_CQ_REPORT_MISSED_EVENTS is currently not supported.
1303  */
1304 int
ib_req_notify_cq(struct ib_cq * cq,enum ib_cq_notify_flags flags)1305 ib_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags)
1306 {
1307 	ibt_cq_notify_flags_t	notify_type;
1308 	int			rtn;
1309 	ofs_client_t		*ofs_client = cq->device->clnt_hdl;
1310 
1311 	ofs_lock_enter(&ofs_client->lock);
1312 	if (cq->device->reg_state != IB_DEV_OPEN) {
1313 		ofs_lock_exit(&ofs_client->lock);
1314 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1315 		    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags);
1316 		return (-ENXIO);
1317 	}
1318 
1319 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1320 	    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x", cq, flags);
1321 
1322 	switch (flags & IB_CQ_SOLICITED_MASK) {
1323 	case IB_CQ_SOLICITED:
1324 		notify_type = IBT_NEXT_SOLICITED;
1325 		break;
1326 	case IB_CQ_NEXT_COMP:
1327 		notify_type = IBT_NEXT_COMPLETION;
1328 		break;
1329 	default:
1330 		/* Currently only two flags are supported */
1331 		ofs_lock_exit(&ofs_client->lock);
1332 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1333 		    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => invalid flag",
1334 		    cq, flags);
1335 		return (-EINVAL);
1336 	}
1337 
1338 	rtn = ibt_enable_cq_notify(cq->ibt_cq, notify_type);
1339 	ofs_lock_exit(&ofs_client->lock);
1340 
1341 	if (rtn == IBT_SUCCESS) {
1342 		SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
1343 		    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x rtn: 0x%x",
1344 		    cq, flags, rtn);
1345 		return (0);
1346 	}
1347 
1348 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1349 	    "ib_req_notify_cq: cq: 0x%p, flag: 0x%x => ibt_enable_cq_notify "
1350 	    "failed w/ 0x%x", cq, flags, rtn);
1351 
1352 	switch (rtn) {
1353 	case IBT_HCA_HDL_INVALID:
1354 	case IBT_CQ_HDL_INVALID:
1355 	case IBT_CQ_NOTIFY_TYPE_INVALID:
1356 		return (-EINVAL);
1357 	default:
1358 		return (-EIO);
1359 	}
1360 }
1361 
1362 static const struct {
1363 	int			valid;
1364 	enum ib_qp_attr_mask	req_param[IB_QPT_RAW_ETY + 1];
1365 	enum ib_qp_attr_mask	opt_param[IB_QPT_RAW_ETY + 1];
1366 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
1367 
1368 	[IB_QPS_RESET] = {
1369 		[IB_QPS_RESET] = { .valid = 1 },
1370 		[IB_QPS_INIT]  = {
1371 			.valid = 1,
1372 			.req_param = {
1373 				[IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1374 				    IB_QP_QKEY),
1375 				[IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1376 				    IB_QP_ACCESS_FLAGS),
1377 				[IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1378 				    IB_QP_ACCESS_FLAGS),
1379 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1380 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1381 			}
1382 		},
1383 	},
1384 	[IB_QPS_INIT]  = {
1385 		[IB_QPS_RESET] = { .valid = 1 },
1386 		[IB_QPS_ERR] =   { .valid = 1 },
1387 		[IB_QPS_INIT]  = {
1388 			.valid = 1,
1389 			.opt_param = {
1390 				[IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1391 				    IB_QP_QKEY),
1392 				[IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1393 				    IB_QP_ACCESS_FLAGS),
1394 				[IB_QPT_RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT |
1395 				    IB_QP_ACCESS_FLAGS),
1396 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1397 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1398 			}
1399 		},
1400 		[IB_QPS_RTR]   = {
1401 			.valid = 1,
1402 			.req_param = {
1403 				[IB_QPT_UC] = (IB_QP_AV | IB_QP_PATH_MTU |
1404 				    IB_QP_DEST_QPN | IB_QP_RQ_PSN),
1405 				[IB_QPT_RC] = (IB_QP_AV | IB_QP_PATH_MTU |
1406 				    IB_QP_DEST_QPN | IB_QP_RQ_PSN |
1407 				    IB_QP_MAX_DEST_RD_ATOMIC |
1408 				    IB_QP_MIN_RNR_TIMER),
1409 			},
1410 			.opt_param = {
1411 				[IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1412 				[IB_QPT_UC] = (IB_QP_ALT_PATH |
1413 				    IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX),
1414 				[IB_QPT_RC] = (IB_QP_ALT_PATH |
1415 				    IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX),
1416 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1417 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1418 			}
1419 		}
1420 	},
1421 	[IB_QPS_RTR]   = {
1422 		[IB_QPS_RESET] = { .valid = 1 },
1423 		[IB_QPS_ERR] =   { .valid = 1 },
1424 		[IB_QPS_RTS]   = {
1425 			.valid = 1,
1426 			.req_param = {
1427 				[IB_QPT_UD] = IB_QP_SQ_PSN,
1428 				[IB_QPT_UC] = IB_QP_SQ_PSN,
1429 				[IB_QPT_RC] = (IB_QP_TIMEOUT |
1430 				    IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
1431 				    IB_QP_SQ_PSN | IB_QP_MAX_QP_RD_ATOMIC),
1432 				[IB_QPT_SMI] = IB_QP_SQ_PSN,
1433 				[IB_QPT_GSI] = IB_QP_SQ_PSN,
1434 			},
1435 			.opt_param = {
1436 				[IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1437 				[IB_QPT_UC] = (IB_QP_CUR_STATE |
1438 				    IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1439 				    IB_QP_PATH_MIG_STATE),
1440 				[IB_QPT_RC] = (IB_QP_CUR_STATE |
1441 				    IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS	|
1442 				    IB_QP_MIN_RNR_TIMER | IB_QP_PATH_MIG_STATE),
1443 				[IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1444 				[IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1445 			}
1446 		}
1447 	},
1448 	[IB_QPS_RTS] = {
1449 		[IB_QPS_RESET] = { .valid = 1 },
1450 		[IB_QPS_ERR] =  { .valid = 1 },
1451 		[IB_QPS_RTS] = {
1452 			.valid = 1,
1453 			.opt_param = {
1454 				[IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1455 				[IB_QPT_UC] = (IB_QP_CUR_STATE	|
1456 				    IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH |
1457 				    IB_QP_PATH_MIG_STATE),
1458 				[IB_QPT_RC] = (IB_QP_CUR_STATE	|
1459 				    IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH |
1460 				    IB_QP_PATH_MIG_STATE | IB_QP_MIN_RNR_TIMER),
1461 				[IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1462 				[IB_QPT_GSI] = (IB_QP_CUR_STATE	| IB_QP_QKEY),
1463 			}
1464 		},
1465 		[IB_QPS_SQD] = {
1466 			.valid = 1,
1467 			.opt_param = {
1468 				[IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1469 				[IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1470 				[IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1471 				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1472 				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
1473 			}
1474 		},
1475 	},
1476 	[IB_QPS_SQD] = {
1477 		[IB_QPS_RESET] = { .valid = 1 },
1478 		[IB_QPS_ERR] = { .valid = 1 },
1479 		[IB_QPS_RTS] = {
1480 			.valid = 1,
1481 			.opt_param = {
1482 				[IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1483 				[IB_QPT_UC] = (IB_QP_CUR_STATE |
1484 				    IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1485 				    IB_QP_PATH_MIG_STATE),
1486 				[IB_QPT_RC] = (IB_QP_CUR_STATE |
1487 				    IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS |
1488 				    IB_QP_MIN_RNR_TIMER	| IB_QP_PATH_MIG_STATE),
1489 				[IB_QPT_SMI] = (IB_QP_CUR_STATE	| IB_QP_QKEY),
1490 				[IB_QPT_GSI] = (IB_QP_CUR_STATE	| IB_QP_QKEY),
1491 			}
1492 		},
1493 		[IB_QPS_SQD] = {
1494 			.valid = 1,
1495 			.opt_param = {
1496 				[IB_QPT_UD] = (IB_QP_PKEY_INDEX	| IB_QP_QKEY),
1497 				[IB_QPT_UC] = (IB_QP_AV | IB_QP_ALT_PATH |
1498 				    IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX |
1499 				    IB_QP_PATH_MIG_STATE),
1500 				[IB_QPT_RC] = (IB_QP_PORT | IB_QP_AV |
1501 				    IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
1502 				    IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC |
1503 				    IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_ALT_PATH |
1504 				    IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX |
1505 				    IB_QP_MIN_RNR_TIMER	| IB_QP_PATH_MIG_STATE),
1506 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1507 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY),
1508 			}
1509 		}
1510 	},
1511 	[IB_QPS_SQE]  = {
1512 		[IB_QPS_RESET] = { .valid = 1 },
1513 		[IB_QPS_ERR] = { .valid = 1 },
1514 		[IB_QPS_RTS] = {
1515 			.valid = 1,
1516 			.opt_param = {
1517 				[IB_QPT_UD] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1518 				[IB_QPT_UC] = (IB_QP_CUR_STATE |
1519 				    IB_QP_ACCESS_FLAGS),
1520 				[IB_QPT_SMI] = (IB_QP_CUR_STATE | IB_QP_QKEY),
1521 				[IB_QPT_GSI] = (IB_QP_CUR_STATE	| IB_QP_QKEY),
1522 			}
1523 		}
1524 	},
1525 	[IB_QPS_ERR] = {
1526 		[IB_QPS_RESET] = { .valid = 1 },
1527 		[IB_QPS_ERR] =  { .valid = 1 }
1528 	}
1529 };
1530 
1531 static inline int
ib_modify_qp_is_ok(enum ib_qp_state cur_state,enum ib_qp_state next_state,enum ib_qp_type type,enum ib_qp_attr_mask mask)1532 ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1533     enum ib_qp_type type, enum ib_qp_attr_mask mask)
1534 {
1535 	enum ib_qp_attr_mask req_param, opt_param;
1536 
1537 	if (cur_state  < 0 || cur_state  > IB_QPS_ERR ||
1538 	    next_state < 0 || next_state > IB_QPS_ERR) {
1539 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1540 		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1541 		    "qp_type: %d, attr_mask: 0x%x => invalid state(1)",
1542 		    cur_state, next_state, type, mask);
1543 		return (0);
1544 	}
1545 
1546 	if (mask & IB_QP_CUR_STATE &&
1547 	    cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
1548 	    cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) {
1549 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1550 		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1551 		    "qp_type: %d, attr_mask: 0x%x => invalid state(2)",
1552 		    cur_state, next_state, type, mask);
1553 		return (0);
1554 	}
1555 
1556 	if (!qp_state_table[cur_state][next_state].valid) {
1557 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1558 		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1559 		    "qp_type: %d, attr_mask: 0x%x => state is not valid",
1560 		    cur_state, next_state, type, mask);
1561 		return (0);
1562 	}
1563 
1564 	req_param = qp_state_table[cur_state][next_state].req_param[type];
1565 	opt_param = qp_state_table[cur_state][next_state].opt_param[type];
1566 
1567 	if ((mask & req_param) != req_param) {
1568 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1569 		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1570 		    "qp_type: %d, attr_mask: 0x%x => "
1571 		    "required param doesn't match. req_param = 0x%x",
1572 		    cur_state, next_state, type, mask, req_param);
1573 		return (0);
1574 	}
1575 
1576 	if (mask & ~(req_param | opt_param | IB_QP_STATE)) {
1577 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1578 		    "ib_modify_qp_is_ok: cur_state: %d, next_state: %d, "
1579 		    "qp_type: %d, attr_mask: 0x%x => "
1580 		    "unsupported options. req_param = 0x%x, opt_param = 0x%x",
1581 		    cur_state, next_state, type, mask, req_param, opt_param);
1582 		return (0);
1583 	}
1584 
1585 	return (1);
1586 }
1587 
1588 static inline enum ib_qp_state
qp_current_state(ibt_qp_query_attr_t * qp_attr)1589 qp_current_state(ibt_qp_query_attr_t *qp_attr)
1590 {
1591 	ASSERT(qp_attr->qp_info.qp_state != IBT_STATE_SQDRAIN);
1592 	return (enum ib_qp_state)(qp_attr->qp_info.qp_state);
1593 }
1594 
1595 static inline ibt_tran_srv_t
of2ibtf_qp_type(enum ib_qp_type type)1596 of2ibtf_qp_type(enum ib_qp_type type)
1597 {
1598 	switch (type) {
1599 	case IB_QPT_SMI:
1600 	case IB_QPT_GSI:
1601 	case IB_QPT_UD:
1602 		return (IBT_UD_SRV);
1603 	case IB_QPT_RC:
1604 		return (IBT_RC_SRV);
1605 	case IB_QPT_UC:
1606 		return (IBT_UC_SRV);
1607 	case IB_QPT_RAW_IPV6:
1608 		return (IBT_RAWIP_SRV);
1609 	case IB_QPT_RAW_ETY:
1610 	default:
1611 		ASSERT(type == IB_QPT_RAW_ETY);
1612 		return (IBT_RAWETHER_SRV);
1613 	}
1614 }
1615 
1616 static inline void
set_av(struct ib_ah_attr * attr,ibt_cep_path_t * pathp)1617 set_av(struct ib_ah_attr *attr, ibt_cep_path_t *pathp)
1618 {
1619 	ibt_adds_vect_t		*av = &pathp->cep_adds_vect;
1620 
1621 	pathp->cep_hca_port_num = attr->port_num;
1622 	av->av_srate = OF2IBTF_SRATE(attr->static_rate);
1623 	av->av_srvl = attr->sl & 0xF;
1624 	av->av_send_grh = attr->ah_flags & IB_AH_GRH ? 1 : 0;
1625 
1626 	if (av->av_send_grh) {
1627 		av->av_dgid.gid_prefix =
1628 		    attr->grh.dgid.global.subnet_prefix;
1629 		av->av_dgid.gid_guid =
1630 		    attr->grh.dgid.global.interface_id;
1631 		av->av_flow = attr->grh.flow_label & 0xFFFFF;
1632 		av->av_tclass = attr->grh.traffic_class;
1633 		av->av_hop = attr->grh.hop_limit;
1634 		av->av_sgid_ix = attr->grh.sgid_index;
1635 	}
1636 	av->av_dlid = attr->dlid;
1637 	av->av_src_path = attr->src_path_bits;
1638 }
1639 
1640 int
ib_modify_qp(struct ib_qp * qp,struct ib_qp_attr * attr,int attr_mask)1641 ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask)
1642 {
1643 	enum ib_qp_state	cur_state, new_state;
1644 	ibt_hca_attr_t		hattr;
1645 	ibt_qp_query_attr_t	qp_attr;
1646 	ibt_qp_info_t		modify_attr;
1647 	ibt_cep_modify_flags_t	flags;
1648 	int			rtn;
1649 	ofs_client_t		*ofs_client = qp->device->clnt_hdl;
1650 
1651 	ofs_lock_enter(&ofs_client->lock);
1652 	if (qp->device->reg_state != IB_DEV_OPEN) {
1653 		ofs_lock_exit(&ofs_client->lock);
1654 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1655 		    "ib_modify_qp: qp: 0x%p => invalid device state (%d)",
1656 		    qp, qp->device->reg_state);
1657 		return (-ENXIO);
1658 	}
1659 
1660 	rtn = ibt_query_hca(qp->device->hca_hdl, &hattr);
1661 	if (rtn != IBT_SUCCESS) {
1662 		ofs_lock_exit(&ofs_client->lock);
1663 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1664 		    "ib_modify_qp: qp: 0x%p, hca_hdl: 0x%p => "
1665 		    "ibt_query_hca() failed w/ %d",
1666 		    qp, qp->device->hca_hdl, rtn);
1667 		return (-EIO);
1668 	}
1669 
1670 	/* only one thread per qp is allowed during the qp modification */
1671 	mutex_enter(&qp->lock);
1672 
1673 	/* Get the current QP attributes first */
1674 	bzero(&qp_attr, sizeof (ibt_qp_query_attr_t));
1675 	if ((rtn = ibt_query_qp(qp->ibt_qp, &qp_attr)) != IBT_SUCCESS) {
1676 		mutex_exit(&qp->lock);
1677 		ofs_lock_exit(&ofs_client->lock);
1678 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1679 		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1680 		    "ibt_query_qp failed w/ 0x%x", qp, attr, attr_mask, rtn);
1681 		return (-EIO);
1682 	}
1683 
1684 	/* Get the current and new state for this QP */
1685 	cur_state = attr_mask & IB_QP_CUR_STATE ?  attr->cur_qp_state :
1686 	    qp_current_state(&qp_attr);
1687 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state :
1688 	    cur_state;
1689 
1690 	/* Sanity check of the current/new states */
1691 	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1692 		/* Linux OF returns 0 in this case */
1693 		mutex_exit(&qp->lock);
1694 		ofs_lock_exit(&ofs_client->lock);
1695 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1696 		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1697 		    "invalid state (both of current/new states are RESET)",
1698 		    qp, attr, attr_mask);
1699 		return (0);
1700 	}
1701 
1702 	/*
1703 	 * Check if this modification request is supported with the new
1704 	 * and/or current state.
1705 	 */
1706 	if (!ib_modify_qp_is_ok(cur_state, new_state, qp->qp_type, attr_mask)) {
1707 		mutex_exit(&qp->lock);
1708 		ofs_lock_exit(&ofs_client->lock);
1709 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1710 		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1711 		    "invalid arguments",
1712 		    qp, attr, attr_mask);
1713 		return (-EINVAL);
1714 	}
1715 
1716 	/* Sanity checks */
1717 	if (attr_mask & IB_QP_PORT && (attr->port_num == 0 ||
1718 	    attr->port_num > hattr.hca_nports)) {
1719 		mutex_exit(&qp->lock);
1720 		ofs_lock_exit(&ofs_client->lock);
1721 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1722 		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1723 		    "invalid attr->port_num(%d), max_nports(%d)",
1724 		    qp, attr, attr_mask, attr->port_num, hattr.hca_nports);
1725 		return (-EINVAL);
1726 	}
1727 
1728 	if (attr_mask & IB_QP_PKEY_INDEX &&
1729 	    attr->pkey_index >= hattr.hca_max_port_pkey_tbl_sz) {
1730 		mutex_exit(&qp->lock);
1731 		ofs_lock_exit(&ofs_client->lock);
1732 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1733 		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1734 		    "invalid attr->pkey_index(%d), max_pkey_index(%d)",
1735 		    qp, attr, attr_mask, attr->pkey_index,
1736 		    hattr.hca_max_port_pkey_tbl_sz);
1737 		return (-EINVAL);
1738 	}
1739 
1740 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1741 	    attr->max_rd_atomic > hattr.hca_max_rdma_out_qp) {
1742 		mutex_exit(&qp->lock);
1743 		ofs_lock_exit(&ofs_client->lock);
1744 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1745 		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1746 		    "invalid attr->max_rd_atomic(0x%x), max_rdma_out_qp(0x%x)",
1747 		    qp, attr, attr_mask, attr->max_rd_atomic,
1748 		    hattr.hca_max_rdma_out_qp);
1749 		return (-EINVAL);
1750 	}
1751 
1752 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1753 	    attr->max_dest_rd_atomic > hattr.hca_max_rdma_in_qp) {
1754 		mutex_exit(&qp->lock);
1755 		ofs_lock_exit(&ofs_client->lock);
1756 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1757 		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
1758 		    "invalid attr->max_dest_rd_atomic(0x%x), "
1759 		    "max_rdma_in_qp(0x%x)", qp, attr, attr_mask,
1760 		    attr->max_dest_rd_atomic, hattr.hca_max_rdma_in_qp);
1761 		return (-EINVAL);
1762 	}
1763 
1764 	/* copy the current setting */
1765 	modify_attr = qp_attr.qp_info;
1766 
1767 	/*
1768 	 * Since it's already checked if the modification request matches
1769 	 * the new and/or current states, just assign both of states to
1770 	 * modify_attr here. The current state is required if qp_state
1771 	 * is RTR, but it's harmelss otherwise, so it's set always.
1772 	 */
1773 	modify_attr.qp_current_state = OF2IBTF_STATE(cur_state);
1774 	modify_attr.qp_state = OF2IBTF_STATE(new_state);
1775 	modify_attr.qp_trans = of2ibtf_qp_type(qp->qp_type);
1776 
1777 	/* Convert OF modification requests into IBTF ones */
1778 	flags = IBT_CEP_SET_STATE;	/* IBTF needs IBT_CEP_SET_STATE */
1779 	if (cur_state == IB_QPS_RESET &&
1780 	    new_state == IB_QPS_INIT) {
1781 		flags |= IBT_CEP_SET_RESET_INIT;
1782 	} else if (cur_state == IB_QPS_INIT &&
1783 	    new_state == IB_QPS_RTR) {
1784 		flags |= IBT_CEP_SET_INIT_RTR;
1785 	} else if (cur_state == IB_QPS_RTR &&
1786 	    new_state == IB_QPS_RTS) {
1787 		flags |= IBT_CEP_SET_RTR_RTS;
1788 	}
1789 	if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
1790 		flags |= IBT_CEP_SET_SQD_EVENT;
1791 	}
1792 	if (attr_mask & IB_QP_ACCESS_FLAGS) {
1793 		modify_attr.qp_flags &= ~(IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR |
1794 		    IBT_CEP_ATOMIC);
1795 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ) {
1796 			flags |= IBT_CEP_SET_RDMA_R;
1797 			modify_attr.qp_flags |= IBT_CEP_RDMA_RD;
1798 		}
1799 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) {
1800 			flags |= IBT_CEP_SET_RDMA_W;
1801 			modify_attr.qp_flags |= IBT_CEP_RDMA_WR;
1802 		}
1803 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
1804 			flags |= IBT_CEP_SET_ATOMIC;
1805 			modify_attr.qp_flags |= IBT_CEP_ATOMIC;
1806 		}
1807 	}
1808 	if (attr_mask & IB_QP_PKEY_INDEX) {
1809 		flags |= IBT_CEP_SET_PKEY_IX;
1810 		switch (qp->qp_type)  {
1811 		case IB_QPT_SMI:
1812 		case IB_QPT_GSI:
1813 		case IB_QPT_UD:
1814 			modify_attr.qp_transport.ud.ud_pkey_ix =
1815 			    attr->pkey_index;
1816 			break;
1817 		case IB_QPT_RC:
1818 			modify_attr.qp_transport.rc.rc_path.cep_pkey_ix =
1819 			    attr->pkey_index;
1820 			break;
1821 		case IB_QPT_UC:
1822 			modify_attr.qp_transport.uc.uc_path.cep_pkey_ix =
1823 			    attr->pkey_index;
1824 			break;
1825 		default:
1826 			/* This should never happen */
1827 			mutex_exit(&qp->lock);
1828 			ofs_lock_exit(&ofs_client->lock);
1829 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1830 			    "ib_modify_qp(IB_QP_PKEY_INDEX): qp: 0x%p, "
1831 			    "attr: 0x%p, attr_mask: 0x%x => "
1832 			    "invalid qp->qp_type(%d)",
1833 			    qp, attr, attr_mask, qp->qp_type);
1834 			return (-EINVAL);
1835 		}
1836 	}
1837 	if (attr_mask & IB_QP_PORT) {
1838 		flags |= IBT_CEP_SET_PORT;
1839 		switch (qp->qp_type) {
1840 		case IB_QPT_SMI:
1841 		case IB_QPT_GSI:
1842 		case IB_QPT_UD:
1843 			modify_attr.qp_transport.ud.ud_port = attr->port_num;
1844 			break;
1845 		case IB_QPT_RC:
1846 			modify_attr.qp_transport.rc.rc_path.cep_hca_port_num =
1847 			    attr->port_num;
1848 			break;
1849 		case IB_QPT_UC:
1850 			modify_attr.qp_transport.uc.uc_path.cep_hca_port_num =
1851 			    attr->port_num;
1852 			break;
1853 		default:
1854 			/* This should never happen */
1855 			mutex_exit(&qp->lock);
1856 			ofs_lock_exit(&ofs_client->lock);
1857 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1858 			    "ib_modify_qp(IB_QP_PORT): qp: 0x%p, "
1859 			    "attr: 0x%p, attr_mask: 0x%x => "
1860 			    "invalid qp->qp_type(%d)",
1861 			    qp, attr, attr_mask, qp->qp_type);
1862 			return (-EINVAL);
1863 		}
1864 	}
1865 	if (attr_mask & IB_QP_QKEY) {
1866 		ASSERT(qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_SMI ||
1867 		    qp->qp_type == IB_QPT_GSI);
1868 		flags |= IBT_CEP_SET_QKEY;
1869 		modify_attr.qp_transport.ud.ud_qkey = attr->qkey;
1870 	}
1871 	if (attr_mask & IB_QP_AV) {
1872 		flags |= IBT_CEP_SET_ADDS_VECT;
1873 		switch (qp->qp_type) {
1874 		case IB_QPT_RC:
1875 			set_av(&attr->ah_attr,
1876 			    &modify_attr.qp_transport.rc.rc_path);
1877 			break;
1878 		case IB_QPT_UC:
1879 			set_av(&attr->ah_attr,
1880 			    &modify_attr.qp_transport.uc.uc_path);
1881 			break;
1882 		case IB_QPT_SMI:
1883 		case IB_QPT_GSI:
1884 		case IB_QPT_UD:
1885 		default:
1886 			/* This should never happen */
1887 			mutex_exit(&qp->lock);
1888 			ofs_lock_exit(&ofs_client->lock);
1889 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1890 			    "ib_modify_qp(IB_QP_AV): qp: 0x%p, "
1891 			    "attr: 0x%p, attr_mask: 0x%x => "
1892 			    "invalid qp->qp_type(%d)",
1893 			    qp, attr, attr_mask, qp->qp_type);
1894 			return (-EINVAL);
1895 		}
1896 	}
1897 	if (attr_mask & IB_QP_PATH_MTU) {
1898 		switch (qp->qp_type) {
1899 		case IB_QPT_RC:
1900 			modify_attr.qp_transport.rc.rc_path_mtu =
1901 			    OF2IBTF_PATH_MTU(attr->path_mtu);
1902 			break;
1903 		case IB_QPT_UC:
1904 			modify_attr.qp_transport.uc.uc_path_mtu =
1905 			    OF2IBTF_PATH_MTU(attr->path_mtu);
1906 			break;
1907 		case IB_QPT_SMI:
1908 		case IB_QPT_GSI:
1909 		case IB_QPT_UD:
1910 		default:
1911 			/* nothing to do */
1912 			break;
1913 		}
1914 	}
1915 	if (attr_mask & IB_QP_TIMEOUT && qp->qp_type == IB_QPT_RC) {
1916 		flags |= IBT_CEP_SET_TIMEOUT;
1917 		modify_attr.qp_transport.rc.rc_path.cep_timeout =
1918 		    attr->timeout;
1919 	}
1920 	if (attr_mask & IB_QP_RETRY_CNT && qp->qp_type == IB_QPT_RC) {
1921 		flags |= IBT_CEP_SET_RETRY;
1922 		modify_attr.qp_transport.rc.rc_retry_cnt =
1923 		    attr->retry_cnt & 0x7;
1924 	}
1925 	if (attr_mask & IB_QP_RNR_RETRY && qp->qp_type == IB_QPT_RC) {
1926 		flags |= IBT_CEP_SET_RNR_NAK_RETRY;
1927 		modify_attr.qp_transport.rc.rc_rnr_retry_cnt =
1928 		    attr->rnr_retry & 0x7;
1929 	}
1930 	if (attr_mask & IB_QP_RQ_PSN) {
1931 		switch (qp->qp_type) {
1932 		case IB_QPT_RC:
1933 			modify_attr.qp_transport.rc.rc_rq_psn =
1934 			    attr->rq_psn & 0xFFFFFF;
1935 			break;
1936 		case IB_QPT_UC:
1937 			modify_attr.qp_transport.uc.uc_rq_psn =
1938 			    attr->rq_psn & 0xFFFFFF;
1939 			break;
1940 		case IB_QPT_SMI:
1941 		case IB_QPT_GSI:
1942 		case IB_QPT_UD:
1943 		default:
1944 			/* nothing to do */
1945 			break;
1946 		}
1947 	}
1948 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && qp->qp_type == IB_QPT_RC) {
1949 		if (attr->max_rd_atomic) {
1950 			flags |= IBT_CEP_SET_RDMARA_OUT;
1951 			modify_attr.qp_transport.rc.rc_rdma_ra_out =
1952 			    attr->max_rd_atomic;
1953 		}
1954 	}
1955 	if (attr_mask & IB_QP_ALT_PATH) {
1956 		/* Sanity checks */
1957 		if (attr->alt_port_num == 0 ||
1958 		    attr->alt_port_num > hattr.hca_nports) {
1959 			mutex_exit(&qp->lock);
1960 			ofs_lock_exit(&ofs_client->lock);
1961 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1962 			    "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
1963 			    "attr_mask: 0x%x => invalid attr->alt_port_num"
1964 			    "(%d), max_nports(%d)",
1965 			    qp, attr, attr_mask, attr->alt_port_num,
1966 			    hattr.hca_nports);
1967 			return (-EINVAL);
1968 		}
1969 		if (attr->alt_pkey_index >= hattr.hca_max_port_pkey_tbl_sz) {
1970 			mutex_exit(&qp->lock);
1971 			ofs_lock_exit(&ofs_client->lock);
1972 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
1973 			    "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
1974 			    "attr_mask: 0x%x => invalid attr->alt_pkey_index"
1975 			    "(%d), max_port_key_index(%d)",
1976 			    qp, attr, attr_mask, attr->alt_pkey_index,
1977 			    hattr.hca_max_port_pkey_tbl_sz);
1978 			return (-EINVAL);
1979 		}
1980 		flags |= IBT_CEP_SET_ALT_PATH;
1981 		switch (qp->qp_type) {
1982 		case IB_QPT_RC:
1983 			modify_attr.qp_transport.rc.rc_alt_path.
1984 			    cep_pkey_ix = attr->alt_pkey_index;
1985 			modify_attr.qp_transport.rc.rc_alt_path.
1986 			    cep_hca_port_num = attr->alt_port_num;
1987 			set_av(&attr->alt_ah_attr,
1988 			    &modify_attr.qp_transport.rc.rc_alt_path);
1989 			modify_attr.qp_transport.rc.rc_alt_path.
1990 			    cep_timeout = attr->alt_timeout;
1991 			break;
1992 		case IB_QPT_UC:
1993 			modify_attr.qp_transport.uc.uc_alt_path.
1994 			    cep_pkey_ix = attr->alt_pkey_index;
1995 			modify_attr.qp_transport.uc.uc_alt_path.
1996 			    cep_hca_port_num = attr->alt_port_num;
1997 			set_av(&attr->alt_ah_attr,
1998 			    &modify_attr.qp_transport.uc.uc_alt_path);
1999 			modify_attr.qp_transport.uc.uc_alt_path.
2000 			    cep_timeout = attr->alt_timeout;
2001 			break;
2002 		case IB_QPT_SMI:
2003 		case IB_QPT_GSI:
2004 		case IB_QPT_UD:
2005 		default:
2006 			/* This should never happen */
2007 			mutex_exit(&qp->lock);
2008 			ofs_lock_exit(&ofs_client->lock);
2009 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2010 			    "ib_modify_qp(IB_QP_ALT_PATH): qp: 0x%p, "
2011 			    "attr: 0x%p, attr_mask: 0x%x => "
2012 			    "invalid qp->qp_type(%d)",
2013 			    qp, attr, attr_mask, qp->qp_type);
2014 			return (-EINVAL);
2015 		}
2016 	}
2017 	if (attr_mask & IB_QP_MIN_RNR_TIMER && qp->qp_type == IB_QPT_RC) {
2018 		flags |= IBT_CEP_SET_MIN_RNR_NAK;
2019 		modify_attr.qp_transport.rc.rc_min_rnr_nak =
2020 		    attr->min_rnr_timer & 0x1F;
2021 	}
2022 	if (attr_mask & IB_QP_SQ_PSN) {
2023 		switch (qp->qp_type)  {
2024 		case IB_QPT_SMI:
2025 		case IB_QPT_GSI:
2026 		case IB_QPT_UD:
2027 			modify_attr.qp_transport.ud.ud_sq_psn =
2028 			    attr->sq_psn;
2029 			break;
2030 		case IB_QPT_RC:
2031 			modify_attr.qp_transport.rc.rc_sq_psn =
2032 			    attr->sq_psn;
2033 			break;
2034 		case IB_QPT_UC:
2035 			modify_attr.qp_transport.uc.uc_sq_psn =
2036 			    attr->sq_psn;
2037 			break;
2038 		default:
2039 			/* This should never happen */
2040 			mutex_exit(&qp->lock);
2041 			ofs_lock_exit(&ofs_client->lock);
2042 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2043 			    "ib_modify_qp(IB_QP_SQ_PSN): qp: 0x%p, "
2044 			    "attr: 0x%p, attr_mask: 0x%x => "
2045 			    "invalid qp->qp_type(%d)",
2046 			    qp, attr, attr_mask, qp->qp_type);
2047 			return (-EINVAL);
2048 		}
2049 	}
2050 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && qp->qp_type == IB_QPT_RC) {
2051 		/* Linux OF sets the value if max_dest_rd_atomic is not zero */
2052 		if (attr->max_dest_rd_atomic) {
2053 			flags |= IBT_CEP_SET_RDMARA_IN;
2054 			modify_attr.qp_transport.rc.rc_rdma_ra_in =
2055 			    attr->max_dest_rd_atomic;
2056 		}
2057 	}
2058 	if (attr_mask & IB_QP_PATH_MIG_STATE) {
2059 		flags |= IBT_CEP_SET_MIG;
2060 		switch (qp->qp_type)  {
2061 		case IB_QPT_RC:
2062 			modify_attr.qp_transport.rc.rc_mig_state =
2063 			    OF2IBTF_PATH_MIG_STATE(attr->path_mig_state);
2064 			break;
2065 		case IB_QPT_UC:
2066 			modify_attr.qp_transport.uc.uc_mig_state =
2067 			    OF2IBTF_PATH_MIG_STATE(attr->path_mig_state);
2068 			break;
2069 		case IB_QPT_SMI:
2070 		case IB_QPT_GSI:
2071 		case IB_QPT_UD:
2072 		default:
2073 			/* This should never happen */
2074 			mutex_exit(&qp->lock);
2075 			ofs_lock_exit(&ofs_client->lock);
2076 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2077 			    "ib_modify_qp(IB_QP_PATH_MIG_STATE): qp: 0x%p, "
2078 			    "attr: 0x%p, attr_mask: 0x%x => "
2079 			    "invalid qp->qp_type(%d)",
2080 			    qp, attr, attr_mask, qp->qp_type);
2081 			return (-EINVAL);
2082 		}
2083 	}
2084 	if (attr_mask & IB_QP_CAP) {
2085 		/* IB_QP_CAP is not supported */
2086 		mutex_exit(&qp->lock);
2087 		ofs_lock_exit(&ofs_client->lock);
2088 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2089 		    "ib_modify_qp: qp: 0x%p, attr: 0x%p, "
2090 		    "attr_mask: 0x%x => IB_QP_CAP is not supported",
2091 		    qp, attr, attr_mask);
2092 		return (-EINVAL);
2093 	}
2094 	if (attr_mask & IB_QP_DEST_QPN) {
2095 		switch (qp->qp_type)  {
2096 		case IB_QPT_RC:
2097 			modify_attr.qp_transport.rc.rc_dst_qpn =
2098 			    attr->dest_qp_num;
2099 			break;
2100 		case IB_QPT_UC:
2101 			modify_attr.qp_transport.uc.uc_dst_qpn =
2102 			    attr->dest_qp_num;
2103 			break;
2104 		case IB_QPT_SMI:
2105 		case IB_QPT_GSI:
2106 		case IB_QPT_UD:
2107 		default:
2108 			/* This should never happen */
2109 			mutex_exit(&qp->lock);
2110 			ofs_lock_exit(&ofs_client->lock);
2111 			SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2112 			    "ib_modify_qp(IB_QP_DEST_PSN): qp: 0x%p, "
2113 			    "attr: 0x%p, attr_mask: 0x%x => "
2114 			    "invalid qp->qp_type(%d)",
2115 			    qp, attr, attr_mask, qp->qp_type);
2116 			return (-EINVAL);
2117 		}
2118 	}
2119 
2120 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
2121 	    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x, "
2122 	    "flags: 0x%x, modify_attr: 0x%p",
2123 	    qp, attr, attr_mask, flags, &modify_attr);
2124 
2125 	/* Modify the QP attributes */
2126 	rtn = ibt_modify_qp(qp->ibt_qp, flags, &modify_attr, NULL);
2127 	if (rtn == IBT_SUCCESS) {
2128 		mutex_exit(&qp->lock);
2129 		ofs_lock_exit(&ofs_client->lock);
2130 		return (0);
2131 	}
2132 	mutex_exit(&qp->lock);
2133 	ofs_lock_exit(&ofs_client->lock);
2134 
2135 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2136 	    "ib_modify_qp: qp: 0x%p, attr: 0x%p, attr_mask: 0x%x => "
2137 	    "ibt_modify_qp failed w/ %d, flags: 0x%x",
2138 	    qp, attr, attr_mask, rtn, flags);
2139 
2140 	switch (rtn) {
2141 	case IBT_HCA_HDL_INVALID:
2142 	case IBT_QP_HDL_INVALID:
2143 	case IBT_QP_SRV_TYPE_INVALID:
2144 	case IBT_QP_STATE_INVALID:
2145 	case IBT_HCA_PORT_INVALID:
2146 	case IBT_PKEY_IX_ILLEGAL:
2147 		return (-EINVAL);
2148 	default:
2149 		return (-EIO);
2150 	}
2151 }
2152 
2153 static inline enum ib_wc_status
ibt2of_wc_status(ibt_wc_status_t status)2154 ibt2of_wc_status(ibt_wc_status_t status)
2155 {
2156 	switch (status) {
2157 	case IBT_WC_LOCAL_LEN_ERR:
2158 		return (IB_WC_LOC_LEN_ERR);
2159 	case IBT_WC_LOCAL_CHAN_OP_ERR:
2160 		return (IB_WC_LOC_QP_OP_ERR);
2161 	case IBT_WC_LOCAL_PROTECT_ERR:
2162 		return (IB_WC_LOC_PROT_ERR);
2163 	case IBT_WC_WR_FLUSHED_ERR:
2164 		return (IB_WC_WR_FLUSH_ERR);
2165 	case IBT_WC_MEM_WIN_BIND_ERR:
2166 		return (IB_WC_MW_BIND_ERR);
2167 	case IBT_WC_BAD_RESPONSE_ERR:
2168 		return (IB_WC_BAD_RESP_ERR);
2169 	case IBT_WC_LOCAL_ACCESS_ERR:
2170 		return (IB_WC_LOC_ACCESS_ERR);
2171 	case IBT_WC_REMOTE_INVALID_REQ_ERR:
2172 		return (IB_WC_REM_INV_REQ_ERR);
2173 	case IBT_WC_REMOTE_ACCESS_ERR:
2174 		return (IB_WC_REM_ACCESS_ERR);
2175 	case IBT_WC_REMOTE_OP_ERR:
2176 		return (IB_WC_REM_OP_ERR);
2177 	case IBT_WC_TRANS_TIMEOUT_ERR:
2178 		return (IB_WC_RETRY_EXC_ERR);
2179 	case IBT_WC_RNR_NAK_TIMEOUT_ERR:
2180 		return (IB_WC_RNR_RETRY_EXC_ERR);
2181 	case IBT_WC_SUCCESS:
2182 	default:
2183 		/* Hermon doesn't support EEC yet */
2184 		ASSERT(status == IBT_WC_SUCCESS);
2185 		return (IB_WC_SUCCESS);
2186 	}
2187 }
2188 
2189 static inline enum ib_wc_opcode
ibt2of_wc_opcode(ibt_wrc_opcode_t wc_type)2190 ibt2of_wc_opcode(ibt_wrc_opcode_t wc_type)
2191 {
2192 	switch (wc_type) {
2193 	case IBT_WRC_SEND:
2194 		return (IB_WC_SEND);
2195 	case IBT_WRC_RDMAR:
2196 		return (IB_WC_RDMA_READ);
2197 	case IBT_WRC_RDMAW:
2198 		return (IB_WC_RDMA_WRITE);
2199 	case IBT_WRC_CSWAP:
2200 		return (IB_WC_COMP_SWAP);
2201 	case IBT_WRC_FADD:
2202 		return (IB_WC_FETCH_ADD);
2203 	case IBT_WRC_BIND:
2204 		return (IB_WC_BIND_MW);
2205 	case IBT_WRC_RECV:
2206 		return (IB_WC_RECV);
2207 	case IBT_WRC_RECV_RDMAWI:
2208 	default:
2209 		ASSERT(wc_type == IBT_WRC_RECV_RDMAWI);
2210 		return (IB_WC_RECV_RDMA_WITH_IMM);
2211 	}
2212 }
2213 
2214 static inline int
ibt2of_wc_flags(ibt_wc_flags_t wc_flags)2215 ibt2of_wc_flags(ibt_wc_flags_t wc_flags)
2216 {
2217 	return (wc_flags & ~IBT_WC_CKSUM_OK);
2218 }
2219 
2220 static inline void
set_wc(ibt_wc_t * ibt_wc,struct ib_wc * wc)2221 set_wc(ibt_wc_t *ibt_wc, struct ib_wc *wc)
2222 {
2223 	wc->wr_id = ibt_wc->wc_id;
2224 	wc->status = ibt2of_wc_status(ibt_wc->wc_status);
2225 	/* opcode can be undefined if status is not success */
2226 	if (wc->status == IB_WC_SUCCESS) {
2227 		wc->opcode = ibt2of_wc_opcode(ibt_wc->wc_type);
2228 	}
2229 	wc->vendor_err = 0;			/* not supported */
2230 	wc->byte_len = ibt_wc->wc_bytes_xfer;
2231 	wc->qp = NULL;				/* not supported */
2232 	wc->imm_data = htonl(ibt_wc->wc_immed_data);
2233 	wc->src_qp = ibt_wc->wc_qpn;
2234 	wc->wc_flags = ibt2of_wc_flags(ibt_wc->wc_flags);
2235 	wc->pkey_index = ibt_wc->wc_pkey_ix;
2236 	wc->slid = ibt_wc->wc_slid;
2237 	wc->sl = ibt_wc->wc_sl;
2238 	wc->dlid_path_bits = ibt_wc->wc_path_bits;
2239 	wc->port_num = 0;			/* not supported */
2240 }
2241 
2242 /*
2243  * ib_poll_cq - poll a CQ for completion(s)
2244  * @cq:the CQ being polled
2245  * @num_entries:maximum number of completions to return
2246  * @wc:array of at least @num_entries &struct ib_wc where completions
2247  *   will be returned
2248  *
2249  * Poll a CQ for (possibly multiple) completions.  If the return value
2250  * is < 0, an error occurred.  If the return value is >= 0, it is the
2251  * number of completions returned.  If the return value is
2252  * non-negative and < num_entries, then the CQ was emptied.
2253  *
2254  * Note that three following memebers in struct ib_wc are not supported
2255  * currently, and the values are always either 0 or NULL.
2256  *	u32			vendor_err;
2257  *	struct ib_qp		*qp;
2258  *	u8			port_num;
2259  */
2260 int
ib_poll_cq(struct ib_cq * cq,int num_entries,struct ib_wc * wc)2261 ib_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
2262 {
2263 	ibt_wc_t	ibt_wc;
2264 	int		npolled;
2265 	ibt_status_t	rtn;
2266 	ofs_client_t	*ofs_client = (ofs_client_t *)cq->device->clnt_hdl;
2267 
2268 	ofs_lock_enter(&ofs_client->lock);
2269 	if (cq->device->reg_state != IB_DEV_OPEN) {
2270 		ofs_lock_exit(&ofs_client->lock);
2271 		SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2272 		    "ib_poll_cq: cq: 0x%p => invalid device state (%d)",
2273 		    cq, cq->device->reg_state);
2274 		return (-ENXIO);
2275 	}
2276 
2277 	SOL_OFS_DPRINTF_L3(sol_kverbs_dbg_str,
2278 	    "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p, "
2279 	    "ibt_cq: 0x%p, ibt_wc: 0x%p",
2280 	    cq, num_entries, wc, cq->ibt_cq, &ibt_wc);
2281 
2282 	/* only one thread per cq is allowed during ibt_poll_cq() */
2283 	mutex_enter(&cq->lock);
2284 	for (npolled = 0; npolled < num_entries; ++npolled) {
2285 		bzero(&ibt_wc, sizeof (ibt_wc_t));
2286 		rtn = ibt_poll_cq(cq->ibt_cq, &ibt_wc, 1, NULL);
2287 		if (rtn != IBT_SUCCESS) {
2288 			break;
2289 		}
2290 		/* save this result to struct ib_wc */
2291 		set_wc(&ibt_wc, wc + npolled);
2292 	}
2293 	mutex_exit(&cq->lock);
2294 	ofs_lock_exit(&ofs_client->lock);
2295 
2296 	if (rtn == IBT_SUCCESS || rtn == IBT_CQ_EMPTY) {
2297 		return (npolled);
2298 	}
2299 
2300 	SOL_OFS_DPRINTF_L2(sol_kverbs_dbg_str,
2301 	    "ib_poll_cq: cq: 0x%p, num_entries: %d, wc: 0x%p => "
2302 	    "ibt_poll_cq failed w/ %d, npolled = %d",
2303 	    cq, num_entries, wc, rtn, npolled);
2304 
2305 	switch (rtn) {
2306 	case IBT_HCA_HDL_INVALID:
2307 	case IBT_CQ_HDL_INVALID:
2308 	case IBT_INVALID_PARAM:
2309 		return (-EINVAL);
2310 	default:
2311 		return (-EIO);
2312 	}
2313 }
2314 
2315 ibt_hca_hdl_t
ib_get_ibt_hca_hdl(struct ib_device * device)2316 ib_get_ibt_hca_hdl(struct ib_device *device)
2317 {
2318 	return (device->hca_hdl);
2319 }
2320 
2321 ibt_channel_hdl_t
ib_get_ibt_channel_hdl(struct rdma_cm_id * cm)2322 ib_get_ibt_channel_hdl(struct rdma_cm_id *cm)
2323 {
2324 	return (cm->qp == NULL ? NULL : cm->qp->ibt_qp);
2325 }
2326