xref: /freebsd/contrib/ofed/libibverbs/cmd.c (revision 2cf0c51793da5a2fc03db8990fc2feb3f9aa119f)
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 PathScale, Inc.  All rights reserved.
4  * Copyright (c) 2006 Cisco Systems, Inc.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 
35 #include <config.h>
36 
37 #include <stdio.h>
38 #include <unistd.h>
39 #include <stdlib.h>
40 #include <errno.h>
41 #include <alloca.h>
42 #include <string.h>
43 
44 #include "ibverbs.h"
45 #include <sys/param.h>
46 
47 int ibv_cmd_get_context(struct ibv_context *context, struct ibv_get_context *cmd,
48 			size_t cmd_size, struct ibv_get_context_resp *resp,
49 			size_t resp_size)
50 {
51 	if (abi_ver < IB_USER_VERBS_MIN_ABI_VERSION)
52 		return ENOSYS;
53 
54 	IBV_INIT_CMD_RESP(cmd, cmd_size, GET_CONTEXT, resp, resp_size);
55 
56 	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
57 		return errno;
58 
59 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
60 
61 	context->async_fd         = resp->async_fd;
62 	context->num_comp_vectors = resp->num_comp_vectors;
63 
64 	return 0;
65 }
66 
67 static void copy_query_dev_fields(struct ibv_device_attr *device_attr,
68 				  struct ibv_query_device_resp *resp,
69 				  uint64_t *raw_fw_ver)
70 {
71 	*raw_fw_ver				= resp->fw_ver;
72 	device_attr->node_guid			= resp->node_guid;
73 	device_attr->sys_image_guid		= resp->sys_image_guid;
74 	device_attr->max_mr_size		= resp->max_mr_size;
75 	device_attr->page_size_cap		= resp->page_size_cap;
76 	device_attr->vendor_id			= resp->vendor_id;
77 	device_attr->vendor_part_id		= resp->vendor_part_id;
78 	device_attr->hw_ver			= resp->hw_ver;
79 	device_attr->max_qp			= resp->max_qp;
80 	device_attr->max_qp_wr			= resp->max_qp_wr;
81 	device_attr->device_cap_flags		= resp->device_cap_flags;
82 	device_attr->max_sge			= resp->max_sge;
83 	device_attr->max_sge_rd			= resp->max_sge_rd;
84 	device_attr->max_cq			= resp->max_cq;
85 	device_attr->max_cqe			= resp->max_cqe;
86 	device_attr->max_mr			= resp->max_mr;
87 	device_attr->max_pd			= resp->max_pd;
88 	device_attr->max_qp_rd_atom		= resp->max_qp_rd_atom;
89 	device_attr->max_ee_rd_atom		= resp->max_ee_rd_atom;
90 	device_attr->max_res_rd_atom		= resp->max_res_rd_atom;
91 	device_attr->max_qp_init_rd_atom	= resp->max_qp_init_rd_atom;
92 	device_attr->max_ee_init_rd_atom	= resp->max_ee_init_rd_atom;
93 	device_attr->atomic_cap			= resp->atomic_cap;
94 	device_attr->max_ee			= resp->max_ee;
95 	device_attr->max_rdd			= resp->max_rdd;
96 	device_attr->max_mw			= resp->max_mw;
97 	device_attr->max_raw_ipv6_qp		= resp->max_raw_ipv6_qp;
98 	device_attr->max_raw_ethy_qp		= resp->max_raw_ethy_qp;
99 	device_attr->max_mcast_grp		= resp->max_mcast_grp;
100 	device_attr->max_mcast_qp_attach	= resp->max_mcast_qp_attach;
101 	device_attr->max_total_mcast_qp_attach	= resp->max_total_mcast_qp_attach;
102 	device_attr->max_ah			= resp->max_ah;
103 	device_attr->max_fmr			= resp->max_fmr;
104 	device_attr->max_map_per_fmr		= resp->max_map_per_fmr;
105 	device_attr->max_srq			= resp->max_srq;
106 	device_attr->max_srq_wr			= resp->max_srq_wr;
107 	device_attr->max_srq_sge		= resp->max_srq_sge;
108 	device_attr->max_pkeys			= resp->max_pkeys;
109 	device_attr->local_ca_ack_delay		= resp->local_ca_ack_delay;
110 	device_attr->phys_port_cnt		= resp->phys_port_cnt;
111 }
112 
113 int ibv_cmd_query_device(struct ibv_context *context,
114 			 struct ibv_device_attr *device_attr,
115 			 uint64_t *raw_fw_ver,
116 			 struct ibv_query_device *cmd, size_t cmd_size)
117 {
118 	struct ibv_query_device_resp resp;
119 
120 	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_DEVICE, &resp, sizeof resp);
121 
122 	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
123 		return errno;
124 
125 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
126 
127 	memset(device_attr->fw_ver, 0, sizeof device_attr->fw_ver);
128 	copy_query_dev_fields(device_attr, &resp, raw_fw_ver);
129 
130 	return 0;
131 }
132 
133 int ibv_cmd_query_device_ex(struct ibv_context *context,
134 			    const struct ibv_query_device_ex_input *input,
135 			    struct ibv_device_attr_ex *attr, size_t attr_size,
136 			    uint64_t *raw_fw_ver,
137 			    struct ibv_query_device_ex *cmd,
138 			    size_t cmd_core_size,
139 			    size_t cmd_size,
140 			    struct ibv_query_device_resp_ex *resp,
141 			    size_t resp_core_size,
142 			    size_t resp_size)
143 {
144 	int err;
145 
146 	if (input && input->comp_mask)
147 		return EINVAL;
148 
149 	if (attr_size < offsetof(struct ibv_device_attr_ex, comp_mask) +
150 			sizeof(attr->comp_mask))
151 		return EINVAL;
152 
153 	if (resp_core_size < offsetof(struct ibv_query_device_resp_ex,
154 				      response_length) +
155 			     sizeof(resp->response_length))
156 		return EINVAL;
157 
158 	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
159 			       QUERY_DEVICE_EX, resp, resp_core_size,
160 			       resp_size);
161 	cmd->comp_mask = 0;
162 	cmd->reserved = 0;
163 	memset(attr->orig_attr.fw_ver, 0, sizeof(attr->orig_attr.fw_ver));
164 	memset(&attr->comp_mask, 0, attr_size - sizeof(attr->orig_attr));
165 	err = write(context->cmd_fd, cmd, cmd_size);
166 	if (err != cmd_size)
167 		return errno;
168 
169 	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
170 	copy_query_dev_fields(&attr->orig_attr, &resp->base, raw_fw_ver);
171 	/* Report back supported comp_mask bits. For now no comp_mask bit is
172 	 * defined */
173 	attr->comp_mask = resp->comp_mask & 0;
174 	if (attr_size >= offsetof(struct ibv_device_attr_ex, odp_caps) +
175 			 sizeof(attr->odp_caps)) {
176 		if (resp->response_length >=
177 		    offsetof(struct ibv_query_device_resp_ex, odp_caps) +
178 		    sizeof(resp->odp_caps)) {
179 			attr->odp_caps.general_caps = resp->odp_caps.general_caps;
180 			attr->odp_caps.per_transport_caps.rc_odp_caps =
181 				resp->odp_caps.per_transport_caps.rc_odp_caps;
182 			attr->odp_caps.per_transport_caps.uc_odp_caps =
183 				resp->odp_caps.per_transport_caps.uc_odp_caps;
184 			attr->odp_caps.per_transport_caps.ud_odp_caps =
185 				resp->odp_caps.per_transport_caps.ud_odp_caps;
186 		}
187 	}
188 
189 	if (attr_size >= offsetof(struct ibv_device_attr_ex,
190 				  completion_timestamp_mask) +
191 			 sizeof(attr->completion_timestamp_mask)) {
192 		if (resp->response_length >=
193 		    offsetof(struct ibv_query_device_resp_ex, timestamp_mask) +
194 		    sizeof(resp->timestamp_mask))
195 			attr->completion_timestamp_mask = resp->timestamp_mask;
196 	}
197 
198 	if (attr_size >= offsetof(struct ibv_device_attr_ex, hca_core_clock) +
199 			 sizeof(attr->hca_core_clock)) {
200 		if (resp->response_length >=
201 		    offsetof(struct ibv_query_device_resp_ex, hca_core_clock) +
202 		    sizeof(resp->hca_core_clock))
203 			attr->hca_core_clock = resp->hca_core_clock;
204 	}
205 
206 	if (attr_size >= offsetof(struct ibv_device_attr_ex, device_cap_flags_ex) +
207 			 sizeof(attr->device_cap_flags_ex)) {
208 		if (resp->response_length >=
209 		    offsetof(struct ibv_query_device_resp_ex, device_cap_flags_ex) +
210 		    sizeof(resp->device_cap_flags_ex))
211 			attr->device_cap_flags_ex = resp->device_cap_flags_ex;
212 	}
213 
214 	if (attr_size >= offsetof(struct ibv_device_attr_ex, rss_caps) +
215 			 sizeof(attr->rss_caps)) {
216 		if (resp->response_length >=
217 		    offsetof(struct ibv_query_device_resp_ex, rss_caps) +
218 		    sizeof(resp->rss_caps)) {
219 			attr->rss_caps.supported_qpts = resp->rss_caps.supported_qpts;
220 			attr->rss_caps.max_rwq_indirection_tables = resp->rss_caps.max_rwq_indirection_tables;
221 			attr->rss_caps.max_rwq_indirection_table_size = resp->rss_caps.max_rwq_indirection_table_size;
222 		}
223 	}
224 
225 	if (attr_size >= offsetof(struct ibv_device_attr_ex, max_wq_type_rq) +
226 			 sizeof(attr->max_wq_type_rq)) {
227 		if (resp->response_length >=
228 		    offsetof(struct ibv_query_device_resp_ex, max_wq_type_rq) +
229 		    sizeof(resp->max_wq_type_rq))
230 			attr->max_wq_type_rq = resp->max_wq_type_rq;
231 	}
232 
233 	if (attr_size >= offsetof(struct ibv_device_attr_ex, raw_packet_caps) +
234 			 sizeof(attr->raw_packet_caps)) {
235 		if (resp->response_length >=
236 		    offsetof(struct ibv_query_device_resp_ex, raw_packet_caps) +
237 		    sizeof(resp->raw_packet_caps))
238 			attr->raw_packet_caps = resp->raw_packet_caps;
239 	}
240 
241 	return 0;
242 }
243 
244 int ibv_cmd_query_port(struct ibv_context *context, uint8_t port_num,
245 		       struct ibv_port_attr *port_attr,
246 		       struct ibv_query_port *cmd, size_t cmd_size)
247 {
248 	struct ibv_query_port_resp resp;
249 
250 	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_PORT, &resp, sizeof resp);
251 	cmd->port_num = port_num;
252 	memset(cmd->reserved, 0, sizeof cmd->reserved);
253 
254 	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
255 		return errno;
256 
257 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
258 
259 	port_attr->state      	   = resp.state;
260 	port_attr->max_mtu         = resp.max_mtu;
261 	port_attr->active_mtu      = resp.active_mtu;
262 	port_attr->gid_tbl_len     = resp.gid_tbl_len;
263 	port_attr->port_cap_flags  = resp.port_cap_flags;
264 	port_attr->max_msg_sz      = resp.max_msg_sz;
265 	port_attr->bad_pkey_cntr   = resp.bad_pkey_cntr;
266 	port_attr->qkey_viol_cntr  = resp.qkey_viol_cntr;
267 	port_attr->pkey_tbl_len    = resp.pkey_tbl_len;
268 	port_attr->lid 	      	   = resp.lid;
269 	port_attr->sm_lid 	   = resp.sm_lid;
270 	port_attr->lmc 	      	   = resp.lmc;
271 	port_attr->max_vl_num      = resp.max_vl_num;
272 	port_attr->sm_sl      	   = resp.sm_sl;
273 	port_attr->subnet_timeout  = resp.subnet_timeout;
274 	port_attr->init_type_reply = resp.init_type_reply;
275 	port_attr->active_width    = resp.active_width;
276 	port_attr->active_speed    = resp.active_speed;
277 	port_attr->phys_state      = resp.phys_state;
278 	port_attr->link_layer      = resp.link_layer;
279 
280 	return 0;
281 }
282 
283 int ibv_cmd_alloc_pd(struct ibv_context *context, struct ibv_pd *pd,
284 		     struct ibv_alloc_pd *cmd, size_t cmd_size,
285 		     struct ibv_alloc_pd_resp *resp, size_t resp_size)
286 {
287 	IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_PD, resp, resp_size);
288 
289 	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
290 		return errno;
291 
292 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
293 
294 	pd->handle  = resp->pd_handle;
295 	pd->context = context;
296 
297 	return 0;
298 }
299 
300 int ibv_cmd_dealloc_pd(struct ibv_pd *pd)
301 {
302 	struct ibv_dealloc_pd cmd;
303 
304 	IBV_INIT_CMD(&cmd, sizeof cmd, DEALLOC_PD);
305 	cmd.pd_handle = pd->handle;
306 
307 	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
308 		return errno;
309 
310 	return 0;
311 }
312 
313 int ibv_cmd_open_xrcd(struct ibv_context *context, struct verbs_xrcd *xrcd,
314 		      int vxrcd_size,
315 		      struct ibv_xrcd_init_attr *attr,
316 		      struct ibv_open_xrcd *cmd, size_t cmd_size,
317 		      struct ibv_open_xrcd_resp *resp, size_t resp_size)
318 {
319 	IBV_INIT_CMD_RESP(cmd, cmd_size, OPEN_XRCD, resp, resp_size);
320 
321 	if (attr->comp_mask >= IBV_XRCD_INIT_ATTR_RESERVED)
322 		return ENOSYS;
323 
324 	if (!(attr->comp_mask & IBV_XRCD_INIT_ATTR_FD) ||
325 	    !(attr->comp_mask & IBV_XRCD_INIT_ATTR_OFLAGS))
326 		return EINVAL;
327 
328 	cmd->fd = attr->fd;
329 	cmd->oflags = attr->oflags;
330 	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
331 		return errno;
332 
333 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
334 
335 	xrcd->xrcd.context = context;
336 	xrcd->comp_mask = 0;
337 	if (vext_field_avail(struct verbs_xrcd, handle, vxrcd_size)) {
338 		xrcd->comp_mask = VERBS_XRCD_HANDLE;
339 		xrcd->handle  = resp->xrcd_handle;
340 	}
341 
342 	return 0;
343 }
344 
345 int ibv_cmd_close_xrcd(struct verbs_xrcd *xrcd)
346 {
347 	struct ibv_close_xrcd cmd;
348 
349 	IBV_INIT_CMD(&cmd, sizeof cmd, CLOSE_XRCD);
350 	cmd.xrcd_handle = xrcd->handle;
351 
352 	if (write(xrcd->xrcd.context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
353 		return errno;
354 
355 	return 0;
356 }
357 
358 int ibv_cmd_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
359 		   uint64_t hca_va, int access,
360 		   struct ibv_mr *mr, struct ibv_reg_mr *cmd,
361 		   size_t cmd_size,
362 		   struct ibv_reg_mr_resp *resp, size_t resp_size)
363 {
364 
365 	IBV_INIT_CMD_RESP(cmd, cmd_size, REG_MR, resp, resp_size);
366 
367 	cmd->start 	  = (uintptr_t) addr;
368 	cmd->length 	  = length;
369 	cmd->hca_va 	  = hca_va;
370 	cmd->pd_handle 	  = pd->handle;
371 	cmd->access_flags = access;
372 
373 	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
374 		return errno;
375 
376 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
377 
378 	mr->handle  = resp->mr_handle;
379 	mr->lkey    = resp->lkey;
380 	mr->rkey    = resp->rkey;
381 	mr->context = pd->context;
382 
383 	return 0;
384 }
385 
386 int ibv_cmd_rereg_mr(struct ibv_mr *mr, uint32_t flags, void *addr,
387 		     size_t length, uint64_t hca_va, int access,
388 		     struct ibv_pd *pd, struct ibv_rereg_mr *cmd,
389 		     size_t cmd_sz, struct ibv_rereg_mr_resp *resp,
390 		     size_t resp_sz)
391 {
392 	IBV_INIT_CMD_RESP(cmd, cmd_sz, REREG_MR, resp, resp_sz);
393 
394 	cmd->mr_handle	  = mr->handle;
395 	cmd->flags	  = flags;
396 	cmd->start	  = (uintptr_t)addr;
397 	cmd->length	  = length;
398 	cmd->hca_va	  = hca_va;
399 	cmd->pd_handle	  = (flags & IBV_REREG_MR_CHANGE_PD) ? pd->handle : 0;
400 	cmd->access_flags = access;
401 
402 	if (write(mr->context->cmd_fd, cmd, cmd_sz) != cmd_sz)
403 		return errno;
404 
405 	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_sz);
406 
407 	mr->lkey    = resp->lkey;
408 	mr->rkey    = resp->rkey;
409 	if (flags & IBV_REREG_MR_CHANGE_PD)
410 		mr->context = pd->context;
411 
412 	return 0;
413 }
414 
415 int ibv_cmd_dereg_mr(struct ibv_mr *mr)
416 {
417 	struct ibv_dereg_mr cmd;
418 
419 	IBV_INIT_CMD(&cmd, sizeof cmd, DEREG_MR);
420 	cmd.mr_handle = mr->handle;
421 
422 	if (write(mr->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
423 		return errno;
424 
425 	return 0;
426 }
427 
428 int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type,
429 		     struct ibv_mw *mw, struct ibv_alloc_mw *cmd,
430 		     size_t cmd_size,
431 		     struct ibv_alloc_mw_resp *resp, size_t resp_size)
432 {
433 	IBV_INIT_CMD_RESP(cmd, cmd_size, ALLOC_MW, resp, resp_size);
434 	cmd->pd_handle	= pd->handle;
435 	cmd->mw_type	= type;
436 	memset(cmd->reserved, 0, sizeof(cmd->reserved));
437 
438 	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
439 		return errno;
440 
441 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
442 
443 	mw->context = pd->context;
444 	mw->pd      = pd;
445 	mw->rkey    = resp->rkey;
446 	mw->handle  = resp->mw_handle;
447 	mw->type    = type;
448 
449 	return 0;
450 }
451 
452 int ibv_cmd_dealloc_mw(struct ibv_mw *mw,
453 		       struct ibv_dealloc_mw *cmd, size_t cmd_size)
454 {
455 	IBV_INIT_CMD(cmd, cmd_size, DEALLOC_MW);
456 	cmd->mw_handle = mw->handle;
457 	cmd->reserved = 0;
458 
459 	if (write(mw->context->cmd_fd, cmd, cmd_size) != cmd_size)
460 		return errno;
461 
462 	return 0;
463 }
464 
465 int ibv_cmd_create_cq(struct ibv_context *context, int cqe,
466 		      struct ibv_comp_channel *channel,
467 		      int comp_vector, struct ibv_cq *cq,
468 		      struct ibv_create_cq *cmd, size_t cmd_size,
469 		      struct ibv_create_cq_resp *resp, size_t resp_size)
470 {
471 	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_CQ, resp, resp_size);
472 	cmd->user_handle   = (uintptr_t) cq;
473 	cmd->cqe           = cqe;
474 	cmd->comp_vector   = comp_vector;
475 	cmd->comp_channel  = channel ? channel->fd : -1;
476 	cmd->reserved      = 0;
477 
478 	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size)
479 		return errno;
480 
481 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
482 
483 	cq->handle  = resp->cq_handle;
484 	cq->cqe     = resp->cqe;
485 	cq->context = context;
486 
487 	return 0;
488 }
489 
490 int ibv_cmd_create_cq_ex(struct ibv_context *context,
491 			 struct ibv_cq_init_attr_ex *cq_attr,
492 			 struct ibv_cq_ex *cq,
493 			 struct ibv_create_cq_ex *cmd,
494 			 size_t cmd_core_size,
495 			 size_t cmd_size,
496 			 struct ibv_create_cq_resp_ex *resp,
497 			 size_t resp_core_size,
498 			 size_t resp_size)
499 {
500 	int err;
501 
502 	memset(cmd, 0, cmd_core_size);
503 	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, CREATE_CQ_EX, resp,
504 			       resp_core_size, resp_size);
505 
506 	if (cq_attr->comp_mask & ~(IBV_CQ_INIT_ATTR_MASK_RESERVED - 1))
507 		return EINVAL;
508 
509 	cmd->user_handle   = (uintptr_t)cq;
510 	cmd->cqe           = cq_attr->cqe;
511 	cmd->comp_vector   = cq_attr->comp_vector;
512 	cmd->comp_channel  = cq_attr->channel ? cq_attr->channel->fd : -1;
513 	cmd->comp_mask = 0;
514 
515 	if (cmd_core_size >= offsetof(struct ibv_create_cq_ex, flags) +
516 	    sizeof(cmd->flags)) {
517 		if ((cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS) &&
518 		    (cq_attr->flags & ~(IBV_CREATE_CQ_ATTR_RESERVED - 1)))
519 			return EOPNOTSUPP;
520 
521 		if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP)
522 			cmd->flags |= IBV_CREATE_CQ_EX_KERNEL_FLAG_COMPLETION_TIMESTAMP;
523 	}
524 
525 	err = write(context->cmd_fd, cmd, cmd_size);
526 	if (err != cmd_size)
527 		return errno;
528 
529 	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
530 
531 	cq->handle  = resp->base.cq_handle;
532 	cq->cqe     = resp->base.cqe;
533 	cq->context = context;
534 
535 	return 0;
536 }
537 
538 int ibv_cmd_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc)
539 {
540 	struct ibv_poll_cq       cmd;
541 	struct ibv_poll_cq_resp *resp;
542 	int                      i;
543 	int                      rsize;
544 	int                      ret;
545 
546 	rsize = sizeof *resp + ne * sizeof(struct ibv_kern_wc);
547 	resp  = malloc(rsize);
548 	if (!resp)
549 		return -1;
550 
551 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, POLL_CQ, resp, rsize);
552 	cmd.cq_handle = ibcq->handle;
553 	cmd.ne        = ne;
554 
555 	if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd) {
556 		ret = -1;
557 		goto out;
558 	}
559 
560 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, rsize);
561 
562 	for (i = 0; i < resp->count; i++) {
563 		wc[i].wr_id 	     = resp->wc[i].wr_id;
564 		wc[i].status 	     = resp->wc[i].status;
565 		wc[i].opcode 	     = resp->wc[i].opcode;
566 		wc[i].vendor_err     = resp->wc[i].vendor_err;
567 		wc[i].byte_len 	     = resp->wc[i].byte_len;
568 		wc[i].imm_data 	     = resp->wc[i].imm_data;
569 		wc[i].qp_num 	     = resp->wc[i].qp_num;
570 		wc[i].src_qp 	     = resp->wc[i].src_qp;
571 		wc[i].wc_flags 	     = resp->wc[i].wc_flags;
572 		wc[i].pkey_index     = resp->wc[i].pkey_index;
573 		wc[i].slid 	     = resp->wc[i].slid;
574 		wc[i].sl 	     = resp->wc[i].sl;
575 		wc[i].dlid_path_bits = resp->wc[i].dlid_path_bits;
576 	}
577 
578 	ret = resp->count;
579 
580 out:
581 	free(resp);
582 	return ret;
583 }
584 
585 int ibv_cmd_req_notify_cq(struct ibv_cq *ibcq, int solicited_only)
586 {
587 	struct ibv_req_notify_cq cmd;
588 
589 	IBV_INIT_CMD(&cmd, sizeof cmd, REQ_NOTIFY_CQ);
590 	cmd.cq_handle = ibcq->handle;
591 	cmd.solicited = !!solicited_only;
592 
593 	if (write(ibcq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
594 		return errno;
595 
596 	return 0;
597 }
598 
599 int ibv_cmd_resize_cq(struct ibv_cq *cq, int cqe,
600 		      struct ibv_resize_cq *cmd, size_t cmd_size,
601 		      struct ibv_resize_cq_resp *resp, size_t resp_size)
602 {
603 	IBV_INIT_CMD_RESP(cmd, cmd_size, RESIZE_CQ, resp, resp_size);
604 	cmd->cq_handle = cq->handle;
605 	cmd->cqe       = cqe;
606 
607 	if (write(cq->context->cmd_fd, cmd, cmd_size) != cmd_size)
608 		return errno;
609 
610 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
611 
612 	cq->cqe = resp->cqe;
613 
614 	return 0;
615 }
616 
617 int ibv_cmd_destroy_cq(struct ibv_cq *cq)
618 {
619 	struct ibv_destroy_cq      cmd;
620 	struct ibv_destroy_cq_resp resp;
621 
622 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_CQ, &resp, sizeof resp);
623 	cmd.cq_handle = cq->handle;
624 	cmd.reserved  = 0;
625 
626 	if (write(cq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
627 		return errno;
628 
629 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
630 
631 	pthread_mutex_lock(&cq->mutex);
632 	while (cq->comp_events_completed  != resp.comp_events_reported ||
633 	       cq->async_events_completed != resp.async_events_reported)
634 		pthread_cond_wait(&cq->cond, &cq->mutex);
635 	pthread_mutex_unlock(&cq->mutex);
636 
637 	return 0;
638 }
639 
640 int ibv_cmd_create_srq(struct ibv_pd *pd,
641 		       struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
642 		       struct ibv_create_srq *cmd, size_t cmd_size,
643 		       struct ibv_create_srq_resp *resp, size_t resp_size)
644 {
645 	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_SRQ, resp, resp_size);
646 	cmd->user_handle = (uintptr_t) srq;
647 	cmd->pd_handle 	 = pd->handle;
648 	cmd->max_wr      = attr->attr.max_wr;
649 	cmd->max_sge     = attr->attr.max_sge;
650 	cmd->srq_limit   = attr->attr.srq_limit;
651 
652 	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
653 		return errno;
654 
655 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
656 
657 	srq->handle  = resp->srq_handle;
658 	srq->context = pd->context;
659 
660 	if (abi_ver > 5) {
661 		attr->attr.max_wr = resp->max_wr;
662 		attr->attr.max_sge = resp->max_sge;
663 	} else {
664 		struct ibv_create_srq_resp_v5 *resp_v5 =
665 			(struct ibv_create_srq_resp_v5 *) resp;
666 
667 		memmove((void *) resp + sizeof *resp,
668 			(void *) resp_v5 + sizeof *resp_v5,
669 			resp_size - sizeof *resp);
670 	}
671 
672 	return 0;
673 }
674 
675 int ibv_cmd_create_srq_ex(struct ibv_context *context,
676 			  struct verbs_srq *srq, int vsrq_sz,
677 			  struct ibv_srq_init_attr_ex *attr_ex,
678 			  struct ibv_create_xsrq *cmd, size_t cmd_size,
679 			  struct ibv_create_srq_resp *resp, size_t resp_size)
680 {
681 	struct verbs_xrcd *vxrcd = NULL;
682 	int ret = 0;
683 
684 	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_XSRQ, resp, resp_size);
685 
686 	if (attr_ex->comp_mask >= IBV_SRQ_INIT_ATTR_RESERVED)
687 		return ENOSYS;
688 
689 	if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_PD))
690 		return EINVAL;
691 
692 	cmd->user_handle = (uintptr_t) srq;
693 	cmd->pd_handle   = attr_ex->pd->handle;
694 	cmd->max_wr      = attr_ex->attr.max_wr;
695 	cmd->max_sge     = attr_ex->attr.max_sge;
696 	cmd->srq_limit   = attr_ex->attr.srq_limit;
697 
698 	cmd->srq_type = (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ?
699 			attr_ex->srq_type : IBV_SRQT_BASIC;
700 	if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_XRCD) {
701 		if (!(attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_CQ))
702 			return EINVAL;
703 
704 		vxrcd = container_of(attr_ex->xrcd, struct verbs_xrcd, xrcd);
705 		cmd->xrcd_handle = vxrcd->handle;
706 		cmd->cq_handle   = attr_ex->cq->handle;
707 	}
708 
709 	ret = pthread_mutex_init(&srq->srq.mutex, NULL);
710 	if (ret)
711 		goto err;
712 	ret = pthread_cond_init(&srq->srq.cond, NULL);
713 	if (ret)
714 		goto err_mutex;
715 
716 	if (write(context->cmd_fd, cmd, cmd_size) != cmd_size) {
717 		ret = errno;
718 		goto err_cond;
719 	}
720 
721 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
722 
723 	srq->srq.handle           = resp->srq_handle;
724 	srq->srq.context          = context;
725 	srq->srq.srq_context      = attr_ex->srq_context;
726 	srq->srq.pd               = attr_ex->pd;
727 	srq->srq.events_completed = 0;
728 
729 	/*
730 	 * check that the last field is available.
731 	 * If it is than all the others exist as well
732 	 */
733 	if (vext_field_avail(struct verbs_srq, srq_num, vsrq_sz)) {
734 		srq->comp_mask = IBV_SRQ_INIT_ATTR_TYPE;
735 		srq->srq_type = (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) ?
736 				attr_ex->srq_type : IBV_SRQT_BASIC;
737 		if (srq->srq_type == IBV_SRQT_XRC) {
738 			srq->comp_mask |= VERBS_SRQ_NUM;
739 			srq->srq_num = resp->srqn;
740 		}
741 		if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_XRCD) {
742 			srq->comp_mask |= VERBS_SRQ_XRCD;
743 			srq->xrcd = vxrcd;
744 		}
745 		if (attr_ex->comp_mask & IBV_SRQ_INIT_ATTR_CQ) {
746 			srq->comp_mask |= VERBS_SRQ_CQ;
747 			srq->cq = attr_ex->cq;
748 		}
749 	}
750 
751 	attr_ex->attr.max_wr = resp->max_wr;
752 	attr_ex->attr.max_sge = resp->max_sge;
753 
754 	return 0;
755 err_cond:
756 	pthread_cond_destroy(&srq->srq.cond);
757 err_mutex:
758 	pthread_mutex_destroy(&srq->srq.mutex);
759 err:
760 	return ret;
761 }
762 
763 
764 static int ibv_cmd_modify_srq_v3(struct ibv_srq *srq,
765 				 struct ibv_srq_attr *srq_attr,
766 				 int srq_attr_mask,
767 				 struct ibv_modify_srq *new_cmd,
768 				 size_t new_cmd_size)
769 {
770 	struct ibv_modify_srq_v3 *cmd;
771 	size_t cmd_size;
772 
773 	cmd_size = sizeof *cmd + new_cmd_size - sizeof *new_cmd;
774 	cmd      = alloca(cmd_size);
775 	memcpy(cmd->driver_data, new_cmd->driver_data, new_cmd_size - sizeof *new_cmd);
776 
777 	IBV_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
778 
779 	cmd->srq_handle	= srq->handle;
780 	cmd->attr_mask	= srq_attr_mask;
781 	cmd->max_wr	= srq_attr->max_wr;
782 	cmd->srq_limit	= srq_attr->srq_limit;
783 	cmd->max_sge	= 0;
784 	cmd->reserved	= 0;
785 
786 	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
787 		return errno;
788 
789 	return 0;
790 }
791 
792 int ibv_cmd_modify_srq(struct ibv_srq *srq,
793 		       struct ibv_srq_attr *srq_attr,
794 		       int srq_attr_mask,
795 		       struct ibv_modify_srq *cmd, size_t cmd_size)
796 {
797 	if (abi_ver == 3)
798 		return ibv_cmd_modify_srq_v3(srq, srq_attr, srq_attr_mask,
799 					     cmd, cmd_size);
800 
801 	IBV_INIT_CMD(cmd, cmd_size, MODIFY_SRQ);
802 
803 	cmd->srq_handle	= srq->handle;
804 	cmd->attr_mask	= srq_attr_mask;
805 	cmd->max_wr	= srq_attr->max_wr;
806 	cmd->srq_limit	= srq_attr->srq_limit;
807 
808 	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
809 		return errno;
810 
811 	return 0;
812 }
813 
814 int ibv_cmd_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr,
815 		      struct ibv_query_srq *cmd, size_t cmd_size)
816 {
817 	struct ibv_query_srq_resp resp;
818 
819 	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_SRQ, &resp, sizeof resp);
820 	cmd->srq_handle = srq->handle;
821 	cmd->reserved   = 0;
822 
823 	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
824 		return errno;
825 
826 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
827 
828 	srq_attr->max_wr    = resp.max_wr;
829 	srq_attr->max_sge   = resp.max_sge;
830 	srq_attr->srq_limit = resp.srq_limit;
831 
832 	return 0;
833 }
834 
835 int ibv_cmd_destroy_srq(struct ibv_srq *srq)
836 {
837 	struct ibv_destroy_srq      cmd;
838 	struct ibv_destroy_srq_resp resp;
839 
840 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_SRQ, &resp, sizeof resp);
841 	cmd.srq_handle = srq->handle;
842 	cmd.reserved   = 0;
843 
844 	if (write(srq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
845 		return errno;
846 
847 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
848 
849 	pthread_mutex_lock(&srq->mutex);
850 	while (srq->events_completed != resp.events_reported)
851 		pthread_cond_wait(&srq->cond, &srq->mutex);
852 	pthread_mutex_unlock(&srq->mutex);
853 
854 	pthread_cond_destroy(&srq->cond);
855 	pthread_mutex_destroy(&srq->mutex);
856 
857 	return 0;
858 }
859 
860 static int create_qp_ex_common(struct verbs_qp *qp,
861 			       struct ibv_qp_init_attr_ex *qp_attr,
862 			       struct verbs_xrcd *vxrcd,
863 			       struct ibv_create_qp_common *cmd)
864 {
865 	cmd->user_handle = (uintptr_t)qp;
866 
867 	if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_XRCD) {
868 		vxrcd = container_of(qp_attr->xrcd, struct verbs_xrcd, xrcd);
869 		cmd->pd_handle	= vxrcd->handle;
870 	} else {
871 		if (!(qp_attr->comp_mask & IBV_QP_INIT_ATTR_PD))
872 			return EINVAL;
873 
874 		cmd->pd_handle	= qp_attr->pd->handle;
875 		if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_IND_TABLE) {
876 			if (cmd->max_recv_wr || cmd->max_recv_sge ||
877 			    cmd->recv_cq_handle || qp_attr->srq)
878 				return EINVAL;
879 
880 			/* send_cq is optinal */
881 			if (qp_attr->cap.max_send_wr)
882 				cmd->send_cq_handle = qp_attr->send_cq->handle;
883 		} else {
884 			cmd->send_cq_handle = qp_attr->send_cq->handle;
885 
886 			if (qp_attr->qp_type != IBV_QPT_XRC_SEND) {
887 				cmd->recv_cq_handle = qp_attr->recv_cq->handle;
888 				cmd->srq_handle = qp_attr->srq ? qp_attr->srq->handle :
889 								 0;
890 			}
891 		}
892 	}
893 
894 	cmd->max_send_wr     = qp_attr->cap.max_send_wr;
895 	cmd->max_recv_wr     = qp_attr->cap.max_recv_wr;
896 	cmd->max_send_sge    = qp_attr->cap.max_send_sge;
897 	cmd->max_recv_sge    = qp_attr->cap.max_recv_sge;
898 	cmd->max_inline_data = qp_attr->cap.max_inline_data;
899 	cmd->sq_sig_all	     = qp_attr->sq_sig_all;
900 	cmd->qp_type         = qp_attr->qp_type;
901 	cmd->is_srq	     = !!qp_attr->srq;
902 	cmd->reserved	     = 0;
903 
904 	return 0;
905 }
906 
907 static void create_qp_handle_resp_common_cleanup(struct verbs_qp *qp)
908 {
909 	pthread_cond_destroy(&qp->qp.cond);
910 	pthread_mutex_destroy(&qp->qp.mutex);
911 }
912 
913 static int create_qp_handle_resp_common_init(struct verbs_qp *qp)
914 {
915 	int ret = 0;
916 
917 	ret = pthread_mutex_init(&qp->qp.mutex, NULL);
918 	if (ret)
919 		return ret;
920 	ret = pthread_cond_init(&qp->qp.cond, NULL);
921 	if (ret)
922 		goto err;
923 
924 	return ret;
925 
926 err:
927 	pthread_mutex_destroy(&qp->qp.mutex);
928 
929 	return ret;
930 }
931 
932 static void create_qp_handle_resp_common(struct ibv_context *context,
933 					 struct verbs_qp *qp,
934 					 struct ibv_qp_init_attr_ex *qp_attr,
935 					 struct ibv_create_qp_resp *resp,
936 					 struct verbs_xrcd *vxrcd,
937 					 int vqp_sz)
938 {
939 	if (abi_ver > 3) {
940 		qp_attr->cap.max_recv_sge    = resp->max_recv_sge;
941 		qp_attr->cap.max_send_sge    = resp->max_send_sge;
942 		qp_attr->cap.max_recv_wr     = resp->max_recv_wr;
943 		qp_attr->cap.max_send_wr     = resp->max_send_wr;
944 		qp_attr->cap.max_inline_data = resp->max_inline_data;
945 	}
946 
947 	qp->qp.handle		= resp->qp_handle;
948 	qp->qp.qp_num		= resp->qpn;
949 	qp->qp.context		= context;
950 	qp->qp.qp_context	= qp_attr->qp_context;
951 	qp->qp.pd		= qp_attr->pd;
952 	qp->qp.send_cq		= qp_attr->send_cq;
953 	qp->qp.recv_cq		= qp_attr->recv_cq;
954 	qp->qp.srq		= qp_attr->srq;
955 	qp->qp.qp_type		= qp_attr->qp_type;
956 	qp->qp.state		= IBV_QPS_RESET;
957 	qp->qp.events_completed = 0;
958 
959 	qp->comp_mask = 0;
960 	if (vext_field_avail(struct verbs_qp, xrcd, vqp_sz) &&
961 	    (qp_attr->comp_mask & IBV_QP_INIT_ATTR_XRCD)) {
962 		qp->comp_mask |= VERBS_QP_XRCD;
963 		qp->xrcd = vxrcd;
964 	}
965 }
966 
967 enum {
968 	CREATE_QP_EX2_SUP_CREATE_FLAGS = IBV_QP_CREATE_BLOCK_SELF_MCAST_LB |
969 					 IBV_QP_CREATE_SCATTER_FCS |
970 					 IBV_QP_CREATE_CVLAN_STRIPPING,
971 };
972 
973 int ibv_cmd_create_qp_ex2(struct ibv_context *context,
974 			  struct verbs_qp *qp, int vqp_sz,
975 			  struct ibv_qp_init_attr_ex *qp_attr,
976 			  struct ibv_create_qp_ex *cmd,
977 			  size_t cmd_core_size,
978 			  size_t cmd_size,
979 			  struct ibv_create_qp_resp_ex *resp,
980 			  size_t resp_core_size,
981 			  size_t resp_size)
982 {
983 	struct verbs_xrcd *vxrcd = NULL;
984 	int err;
985 
986 	if (qp_attr->comp_mask >= IBV_QP_INIT_ATTR_RESERVED)
987 		return EINVAL;
988 
989 	if (resp_core_size <
990 	    offsetof(struct ibv_create_qp_resp_ex, response_length) +
991 	    sizeof(resp->response_length))
992 		return EINVAL;
993 
994 	memset(cmd, 0, cmd_core_size);
995 
996 	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, CREATE_QP_EX, resp,
997 			       resp_core_size, resp_size);
998 
999 	err = create_qp_ex_common(qp, qp_attr, vxrcd, &cmd->base);
1000 	if (err)
1001 		return err;
1002 
1003 	if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_CREATE_FLAGS) {
1004 		if (qp_attr->create_flags & ~CREATE_QP_EX2_SUP_CREATE_FLAGS)
1005 			return EINVAL;
1006 		if (cmd_core_size < offsetof(struct ibv_create_qp_ex, create_flags) +
1007 				    sizeof(qp_attr->create_flags))
1008 			return EINVAL;
1009 		cmd->create_flags = qp_attr->create_flags;
1010 	}
1011 
1012 	if (qp_attr->comp_mask & IBV_QP_INIT_ATTR_IND_TABLE) {
1013 		if (cmd_core_size < offsetof(struct ibv_create_qp_ex, ind_tbl_handle) +
1014 				    sizeof(cmd->ind_tbl_handle))
1015 			return EINVAL;
1016 		cmd->ind_tbl_handle = qp_attr->rwq_ind_tbl->ind_tbl_handle;
1017 		cmd->comp_mask = IBV_CREATE_QP_EX_KERNEL_MASK_IND_TABLE;
1018 	}
1019 
1020 	err = create_qp_handle_resp_common_init(qp);
1021 	if (err)
1022 		return err;
1023 
1024 	err = write(context->cmd_fd, cmd, cmd_size);
1025 	if (err != cmd_size) {
1026 		err = errno;
1027 		goto err;
1028 	}
1029 
1030 	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1031 
1032 	create_qp_handle_resp_common(context, qp, qp_attr, &resp->base, vxrcd,
1033 				     vqp_sz);
1034 
1035 	return 0;
1036 
1037 err:
1038 	create_qp_handle_resp_common_cleanup(qp);
1039 
1040 	return err;
1041 }
1042 
1043 int ibv_cmd_create_qp_ex(struct ibv_context *context,
1044 			 struct verbs_qp *qp, int vqp_sz,
1045 			 struct ibv_qp_init_attr_ex *attr_ex,
1046 			 struct ibv_create_qp *cmd, size_t cmd_size,
1047 			 struct ibv_create_qp_resp *resp, size_t resp_size)
1048 {
1049 	struct verbs_xrcd *vxrcd = NULL;
1050 	int err;
1051 
1052 	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, resp, resp_size);
1053 
1054 	if (attr_ex->comp_mask > (IBV_QP_INIT_ATTR_XRCD | IBV_QP_INIT_ATTR_PD))
1055 		return ENOSYS;
1056 
1057 	err = create_qp_ex_common(qp, attr_ex, vxrcd,
1058 				  (struct ibv_create_qp_common *)&cmd->user_handle);
1059 	if (err)
1060 		return err;
1061 
1062 	err = create_qp_handle_resp_common_init(qp);
1063 	if (err)
1064 		return err;
1065 
1066 	err = write(context->cmd_fd, cmd, cmd_size);
1067 	if (err != cmd_size) {
1068 		err = errno;
1069 		goto err;
1070 	}
1071 
1072 	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1073 
1074 	if (abi_ver == 4) {
1075 		struct ibv_create_qp_resp_v4 *resp_v4 =
1076 			(struct ibv_create_qp_resp_v4 *)resp;
1077 
1078 		memmove((void *)resp + sizeof *resp,
1079 			(void *)resp_v4 + sizeof *resp_v4,
1080 			resp_size - sizeof *resp);
1081 	} else if (abi_ver <= 3) {
1082 		struct ibv_create_qp_resp_v3 *resp_v3 =
1083 			(struct ibv_create_qp_resp_v3 *)resp;
1084 
1085 		memmove((void *)resp + sizeof *resp,
1086 			(void *)resp_v3 + sizeof *resp_v3,
1087 			resp_size - sizeof *resp);
1088 	}
1089 
1090 	create_qp_handle_resp_common(context, qp, attr_ex, resp, vxrcd, vqp_sz);
1091 
1092 	return 0;
1093 
1094 err:
1095 	create_qp_handle_resp_common_cleanup(qp);
1096 
1097 	return err;
1098 }
1099 
1100 int ibv_cmd_create_qp(struct ibv_pd *pd,
1101 		      struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
1102 		      struct ibv_create_qp *cmd, size_t cmd_size,
1103 		      struct ibv_create_qp_resp *resp, size_t resp_size)
1104 {
1105 	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_QP, resp, resp_size);
1106 
1107 	cmd->user_handle     = (uintptr_t) qp;
1108 	cmd->pd_handle       = pd->handle;
1109 	cmd->send_cq_handle  = attr->send_cq->handle;
1110 	cmd->recv_cq_handle  = attr->recv_cq->handle;
1111 	cmd->srq_handle      = attr->srq ? attr->srq->handle : 0;
1112 	cmd->max_send_wr     = attr->cap.max_send_wr;
1113 	cmd->max_recv_wr     = attr->cap.max_recv_wr;
1114 	cmd->max_send_sge    = attr->cap.max_send_sge;
1115 	cmd->max_recv_sge    = attr->cap.max_recv_sge;
1116 	cmd->max_inline_data = attr->cap.max_inline_data;
1117 	cmd->sq_sig_all	     = attr->sq_sig_all;
1118 	cmd->qp_type 	     = attr->qp_type;
1119 	cmd->is_srq 	     = !!attr->srq;
1120 	cmd->reserved	     = 0;
1121 
1122 	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
1123 		return errno;
1124 
1125 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1126 
1127 	qp->handle 		  = resp->qp_handle;
1128 	qp->qp_num 		  = resp->qpn;
1129 	qp->context		  = pd->context;
1130 
1131 	if (abi_ver > 3) {
1132 		attr->cap.max_recv_sge    = resp->max_recv_sge;
1133 		attr->cap.max_send_sge    = resp->max_send_sge;
1134 		attr->cap.max_recv_wr     = resp->max_recv_wr;
1135 		attr->cap.max_send_wr     = resp->max_send_wr;
1136 		attr->cap.max_inline_data = resp->max_inline_data;
1137 	}
1138 
1139 	if (abi_ver == 4) {
1140 		struct ibv_create_qp_resp_v4 *resp_v4 =
1141 			(struct ibv_create_qp_resp_v4 *) resp;
1142 
1143 		memmove((void *) resp + sizeof *resp,
1144 			(void *) resp_v4 + sizeof *resp_v4,
1145 			resp_size - sizeof *resp);
1146 	} else if (abi_ver <= 3) {
1147 		struct ibv_create_qp_resp_v3 *resp_v3 =
1148 			(struct ibv_create_qp_resp_v3 *) resp;
1149 
1150 		memmove((void *) resp + sizeof *resp,
1151 			(void *) resp_v3 + sizeof *resp_v3,
1152 			resp_size - sizeof *resp);
1153 	}
1154 
1155 	return 0;
1156 }
1157 
1158 int ibv_cmd_open_qp(struct ibv_context *context, struct verbs_qp *qp,
1159 		    int vqp_sz,
1160 		    struct ibv_qp_open_attr *attr,
1161 		    struct ibv_open_qp *cmd, size_t cmd_size,
1162 		    struct ibv_create_qp_resp *resp, size_t resp_size)
1163 {
1164 	int err = 0;
1165 	struct verbs_xrcd *xrcd;
1166 	IBV_INIT_CMD_RESP(cmd, cmd_size, OPEN_QP, resp, resp_size);
1167 
1168 	if (attr->comp_mask >= IBV_QP_OPEN_ATTR_RESERVED)
1169 		return ENOSYS;
1170 
1171 	if (!(attr->comp_mask & IBV_QP_OPEN_ATTR_XRCD) ||
1172 	    !(attr->comp_mask & IBV_QP_OPEN_ATTR_NUM) ||
1173 	    !(attr->comp_mask & IBV_QP_OPEN_ATTR_TYPE))
1174 		return EINVAL;
1175 
1176 	xrcd = container_of(attr->xrcd, struct verbs_xrcd, xrcd);
1177 	cmd->user_handle = (uintptr_t) qp;
1178 	cmd->pd_handle   = xrcd->handle;
1179 	cmd->qpn         = attr->qp_num;
1180 	cmd->qp_type     = attr->qp_type;
1181 
1182 	err = pthread_mutex_init(&qp->qp.mutex, NULL);
1183 	if (err)
1184 		return err;
1185 	err = pthread_cond_init(&qp->qp.cond, NULL);
1186 	if (err)
1187 		goto err_mutex;
1188 
1189 	err = write(context->cmd_fd, cmd, cmd_size);
1190 	if (err != cmd_size) {
1191 		err = errno;
1192 		goto err_cond;
1193 	}
1194 
1195 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1196 
1197 	qp->qp.handle     = resp->qp_handle;
1198 	qp->qp.context    = context;
1199 	qp->qp.qp_context = attr->qp_context;
1200 	qp->qp.pd	  = NULL;
1201 	qp->qp.send_cq	  = NULL;
1202 	qp->qp.recv_cq    = NULL;
1203 	qp->qp.srq	  = NULL;
1204 	qp->qp.qp_num	  = attr->qp_num;
1205 	qp->qp.qp_type	  = attr->qp_type;
1206 	qp->qp.state	  = IBV_QPS_UNKNOWN;
1207 	qp->qp.events_completed = 0;
1208 	qp->comp_mask = 0;
1209 	if (vext_field_avail(struct verbs_qp, xrcd, vqp_sz)) {
1210 		qp->comp_mask = VERBS_QP_XRCD;
1211 		qp->xrcd	 = xrcd;
1212 	}
1213 
1214 	return 0;
1215 
1216 err_cond:
1217 	pthread_cond_destroy(&qp->qp.cond);
1218 err_mutex:
1219 	pthread_mutex_destroy(&qp->qp.mutex);
1220 
1221 	return err;
1222 }
1223 
1224 int ibv_cmd_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1225 		     int attr_mask,
1226 		     struct ibv_qp_init_attr *init_attr,
1227 		     struct ibv_query_qp *cmd, size_t cmd_size)
1228 {
1229 	struct ibv_query_qp_resp resp;
1230 
1231 	/*
1232 	 * Masks over IBV_QP_DEST_QPN are not supported by
1233 	 * that not extended command.
1234 	 */
1235 	if (attr_mask & ~((IBV_QP_DEST_QPN << 1) - 1))
1236 		return EOPNOTSUPP;
1237 
1238 	IBV_INIT_CMD_RESP(cmd, cmd_size, QUERY_QP, &resp, sizeof resp);
1239 	cmd->qp_handle = qp->handle;
1240 	cmd->attr_mask = attr_mask;
1241 
1242 	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1243 		return errno;
1244 
1245 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1246 
1247 	attr->qkey                          = resp.qkey;
1248 	attr->rq_psn                        = resp.rq_psn;
1249 	attr->sq_psn                        = resp.sq_psn;
1250 	attr->dest_qp_num                   = resp.dest_qp_num;
1251 	attr->qp_access_flags               = resp.qp_access_flags;
1252 	attr->pkey_index                    = resp.pkey_index;
1253 	attr->alt_pkey_index                = resp.alt_pkey_index;
1254 	attr->qp_state                      = resp.qp_state;
1255 	attr->cur_qp_state                  = resp.cur_qp_state;
1256 	attr->path_mtu                      = resp.path_mtu;
1257 	attr->path_mig_state                = resp.path_mig_state;
1258 	attr->sq_draining                   = resp.sq_draining;
1259 	attr->max_rd_atomic                 = resp.max_rd_atomic;
1260 	attr->max_dest_rd_atomic            = resp.max_dest_rd_atomic;
1261 	attr->min_rnr_timer                 = resp.min_rnr_timer;
1262 	attr->port_num                      = resp.port_num;
1263 	attr->timeout                       = resp.timeout;
1264 	attr->retry_cnt                     = resp.retry_cnt;
1265 	attr->rnr_retry                     = resp.rnr_retry;
1266 	attr->alt_port_num                  = resp.alt_port_num;
1267 	attr->alt_timeout                   = resp.alt_timeout;
1268 	attr->cap.max_send_wr               = resp.max_send_wr;
1269 	attr->cap.max_recv_wr               = resp.max_recv_wr;
1270 	attr->cap.max_send_sge              = resp.max_send_sge;
1271 	attr->cap.max_recv_sge              = resp.max_recv_sge;
1272 	attr->cap.max_inline_data           = resp.max_inline_data;
1273 
1274 	memcpy(attr->ah_attr.grh.dgid.raw, resp.dest.dgid, 16);
1275 	attr->ah_attr.grh.flow_label        = resp.dest.flow_label;
1276 	attr->ah_attr.dlid                  = resp.dest.dlid;
1277 	attr->ah_attr.grh.sgid_index        = resp.dest.sgid_index;
1278 	attr->ah_attr.grh.hop_limit         = resp.dest.hop_limit;
1279 	attr->ah_attr.grh.traffic_class     = resp.dest.traffic_class;
1280 	attr->ah_attr.sl                    = resp.dest.sl;
1281 	attr->ah_attr.src_path_bits         = resp.dest.src_path_bits;
1282 	attr->ah_attr.static_rate           = resp.dest.static_rate;
1283 	attr->ah_attr.is_global             = resp.dest.is_global;
1284 	attr->ah_attr.port_num              = resp.dest.port_num;
1285 
1286 	memcpy(attr->alt_ah_attr.grh.dgid.raw, resp.alt_dest.dgid, 16);
1287 	attr->alt_ah_attr.grh.flow_label    = resp.alt_dest.flow_label;
1288 	attr->alt_ah_attr.dlid              = resp.alt_dest.dlid;
1289 	attr->alt_ah_attr.grh.sgid_index    = resp.alt_dest.sgid_index;
1290 	attr->alt_ah_attr.grh.hop_limit     = resp.alt_dest.hop_limit;
1291 	attr->alt_ah_attr.grh.traffic_class = resp.alt_dest.traffic_class;
1292 	attr->alt_ah_attr.sl                = resp.alt_dest.sl;
1293 	attr->alt_ah_attr.src_path_bits     = resp.alt_dest.src_path_bits;
1294 	attr->alt_ah_attr.static_rate       = resp.alt_dest.static_rate;
1295 	attr->alt_ah_attr.is_global         = resp.alt_dest.is_global;
1296 	attr->alt_ah_attr.port_num          = resp.alt_dest.port_num;
1297 
1298 	init_attr->qp_context               = qp->qp_context;
1299 	init_attr->send_cq                  = qp->send_cq;
1300 	init_attr->recv_cq                  = qp->recv_cq;
1301 	init_attr->srq                      = qp->srq;
1302 	init_attr->qp_type                  = qp->qp_type;
1303 	init_attr->cap.max_send_wr          = resp.max_send_wr;
1304 	init_attr->cap.max_recv_wr          = resp.max_recv_wr;
1305 	init_attr->cap.max_send_sge         = resp.max_send_sge;
1306 	init_attr->cap.max_recv_sge         = resp.max_recv_sge;
1307 	init_attr->cap.max_inline_data      = resp.max_inline_data;
1308 	init_attr->sq_sig_all               = resp.sq_sig_all;
1309 
1310 	return 0;
1311 }
1312 
1313 static void copy_modify_qp_fields(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1314 				  int attr_mask,
1315 				  struct ibv_modify_qp_common *cmd)
1316 {
1317 	cmd->qp_handle = qp->handle;
1318 	cmd->attr_mask = attr_mask;
1319 
1320 	if (attr_mask & IBV_QP_STATE)
1321 		cmd->qp_state = attr->qp_state;
1322 	if (attr_mask & IBV_QP_CUR_STATE)
1323 		cmd->cur_qp_state = attr->cur_qp_state;
1324 	if (attr_mask & IBV_QP_EN_SQD_ASYNC_NOTIFY)
1325 		cmd->en_sqd_async_notify = attr->en_sqd_async_notify;
1326 	if (attr_mask & IBV_QP_ACCESS_FLAGS)
1327 		cmd->qp_access_flags = attr->qp_access_flags;
1328 	if (attr_mask & IBV_QP_PKEY_INDEX)
1329 		cmd->pkey_index = attr->pkey_index;
1330 	if (attr_mask & IBV_QP_PORT)
1331 		cmd->port_num = attr->port_num;
1332 	if (attr_mask & IBV_QP_QKEY)
1333 		cmd->qkey = attr->qkey;
1334 
1335 	if (attr_mask & IBV_QP_AV) {
1336 		memcpy(cmd->dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
1337 		cmd->dest.flow_label = attr->ah_attr.grh.flow_label;
1338 		cmd->dest.dlid = attr->ah_attr.dlid;
1339 		cmd->dest.reserved = 0;
1340 		cmd->dest.sgid_index = attr->ah_attr.grh.sgid_index;
1341 		cmd->dest.hop_limit = attr->ah_attr.grh.hop_limit;
1342 		cmd->dest.traffic_class = attr->ah_attr.grh.traffic_class;
1343 		cmd->dest.sl = attr->ah_attr.sl;
1344 		cmd->dest.src_path_bits = attr->ah_attr.src_path_bits;
1345 		cmd->dest.static_rate = attr->ah_attr.static_rate;
1346 		cmd->dest.is_global = attr->ah_attr.is_global;
1347 		cmd->dest.port_num = attr->ah_attr.port_num;
1348 	}
1349 
1350 	if (attr_mask & IBV_QP_PATH_MTU)
1351 		cmd->path_mtu = attr->path_mtu;
1352 	if (attr_mask & IBV_QP_TIMEOUT)
1353 		cmd->timeout = attr->timeout;
1354 	if (attr_mask & IBV_QP_RETRY_CNT)
1355 		cmd->retry_cnt = attr->retry_cnt;
1356 	if (attr_mask & IBV_QP_RNR_RETRY)
1357 		cmd->rnr_retry = attr->rnr_retry;
1358 	if (attr_mask & IBV_QP_RQ_PSN)
1359 		cmd->rq_psn = attr->rq_psn;
1360 	if (attr_mask & IBV_QP_MAX_QP_RD_ATOMIC)
1361 		cmd->max_rd_atomic = attr->max_rd_atomic;
1362 
1363 	if (attr_mask & IBV_QP_ALT_PATH) {
1364 		cmd->alt_pkey_index = attr->alt_pkey_index;
1365 		cmd->alt_port_num = attr->alt_port_num;
1366 		cmd->alt_timeout = attr->alt_timeout;
1367 
1368 		memcpy(cmd->alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
1369 		cmd->alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
1370 		cmd->alt_dest.dlid = attr->alt_ah_attr.dlid;
1371 		cmd->alt_dest.reserved = 0;
1372 		cmd->alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
1373 		cmd->alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
1374 		cmd->alt_dest.traffic_class =
1375 		    attr->alt_ah_attr.grh.traffic_class;
1376 		cmd->alt_dest.sl = attr->alt_ah_attr.sl;
1377 		cmd->alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
1378 		cmd->alt_dest.static_rate = attr->alt_ah_attr.static_rate;
1379 		cmd->alt_dest.is_global = attr->alt_ah_attr.is_global;
1380 		cmd->alt_dest.port_num = attr->alt_ah_attr.port_num;
1381 	}
1382 
1383 	if (attr_mask & IBV_QP_MIN_RNR_TIMER)
1384 		cmd->min_rnr_timer = attr->min_rnr_timer;
1385 	if (attr_mask & IBV_QP_SQ_PSN)
1386 		cmd->sq_psn = attr->sq_psn;
1387 	if (attr_mask & IBV_QP_MAX_DEST_RD_ATOMIC)
1388 		cmd->max_dest_rd_atomic = attr->max_dest_rd_atomic;
1389 	if (attr_mask & IBV_QP_PATH_MIG_STATE)
1390 		cmd->path_mig_state = attr->path_mig_state;
1391 	if (attr_mask & IBV_QP_DEST_QPN)
1392 		cmd->dest_qp_num = attr->dest_qp_num;
1393 
1394 	cmd->reserved[0] = cmd->reserved[1] = 0;
1395 }
1396 
1397 int ibv_cmd_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1398 		      int attr_mask,
1399 		      struct ibv_modify_qp *cmd, size_t cmd_size)
1400 {
1401 	/*
1402 	 * Masks over IBV_QP_DEST_QPN are only supported by
1403 	 * ibv_cmd_modify_qp_ex.
1404 	 */
1405 	if (attr_mask & ~((IBV_QP_DEST_QPN << 1) - 1))
1406 		return EOPNOTSUPP;
1407 
1408 	IBV_INIT_CMD(cmd, cmd_size, MODIFY_QP);
1409 
1410 	copy_modify_qp_fields(qp, attr, attr_mask, &cmd->base);
1411 
1412 	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1413 		return errno;
1414 
1415 	return 0;
1416 }
1417 
1418 int ibv_cmd_modify_qp_ex(struct ibv_qp *qp, struct ibv_qp_attr *attr,
1419 			 int attr_mask, struct ibv_modify_qp_ex *cmd,
1420 			 size_t cmd_core_size, size_t cmd_size,
1421 			 struct ibv_modify_qp_resp_ex *resp,
1422 			 size_t resp_core_size, size_t resp_size)
1423 {
1424 	if (resp_core_size < offsetof(struct ibv_modify_qp_resp_ex,
1425 			     response_length) + sizeof(resp->response_length))
1426 		return EINVAL;
1427 
1428 	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size, MODIFY_QP_EX,
1429 			       resp, resp_core_size, resp_size);
1430 
1431 	copy_modify_qp_fields(qp, attr, attr_mask, &cmd->base);
1432 
1433 	if (attr_mask & IBV_QP_RATE_LIMIT) {
1434 		if (cmd_size >= offsetof(struct ibv_modify_qp_ex, rate_limit) +
1435 		    sizeof(cmd->rate_limit))
1436 			cmd->rate_limit = attr->rate_limit;
1437 		else
1438 			return EINVAL;
1439 	}
1440 
1441 	if (write(qp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1442 		return errno;
1443 
1444 	(void)VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1445 
1446 	return 0;
1447 }
1448 
1449 int ibv_cmd_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
1450 		      struct ibv_send_wr **bad_wr)
1451 {
1452 	struct ibv_post_send     *cmd;
1453 	struct ibv_post_send_resp resp;
1454 	struct ibv_send_wr       *i;
1455 	struct ibv_kern_send_wr  *n, *tmp;
1456 	struct ibv_sge           *s;
1457 	unsigned                  wr_count = 0;
1458 	unsigned                  sge_count = 0;
1459 	int                       cmd_size;
1460 	int                       ret = 0;
1461 
1462 	for (i = wr; i; i = i->next) {
1463 		wr_count++;
1464 		sge_count += i->num_sge;
1465 	}
1466 
1467 	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1468 	cmd  = alloca(cmd_size);
1469 
1470 	IBV_INIT_CMD_RESP(cmd, cmd_size, POST_SEND, &resp, sizeof resp);
1471 	cmd->qp_handle = ibqp->handle;
1472 	cmd->wr_count  = wr_count;
1473 	cmd->sge_count = sge_count;
1474 	cmd->wqe_size  = sizeof *n;
1475 
1476 	n = (struct ibv_kern_send_wr *) ((void *) cmd + sizeof *cmd);
1477 	s = (struct ibv_sge *) (n + wr_count);
1478 
1479 	tmp = n;
1480 	for (i = wr; i; i = i->next) {
1481 		tmp->wr_id 	= i->wr_id;
1482 		tmp->num_sge 	= i->num_sge;
1483 		tmp->opcode 	= i->opcode;
1484 		tmp->send_flags = i->send_flags;
1485 		tmp->imm_data 	= i->imm_data;
1486 		if (ibqp->qp_type == IBV_QPT_UD) {
1487 			tmp->wr.ud.ah 	       = i->wr.ud.ah->handle;
1488 			tmp->wr.ud.remote_qpn  = i->wr.ud.remote_qpn;
1489 			tmp->wr.ud.remote_qkey = i->wr.ud.remote_qkey;
1490 		} else {
1491 			switch (i->opcode) {
1492 			case IBV_WR_RDMA_WRITE:
1493 			case IBV_WR_RDMA_WRITE_WITH_IMM:
1494 			case IBV_WR_RDMA_READ:
1495 				tmp->wr.rdma.remote_addr =
1496 					i->wr.rdma.remote_addr;
1497 				tmp->wr.rdma.rkey = i->wr.rdma.rkey;
1498 				break;
1499 			case IBV_WR_ATOMIC_CMP_AND_SWP:
1500 			case IBV_WR_ATOMIC_FETCH_AND_ADD:
1501 				tmp->wr.atomic.remote_addr =
1502 					i->wr.atomic.remote_addr;
1503 				tmp->wr.atomic.compare_add =
1504 					i->wr.atomic.compare_add;
1505 				tmp->wr.atomic.swap = i->wr.atomic.swap;
1506 				tmp->wr.atomic.rkey = i->wr.atomic.rkey;
1507 				break;
1508 			default:
1509 				break;
1510 			}
1511 		}
1512 
1513 		if (tmp->num_sge) {
1514 			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1515 			s += tmp->num_sge;
1516 		}
1517 
1518 		tmp++;
1519 	}
1520 
1521 	resp.bad_wr = 0;
1522 	if (write(ibqp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1523 		ret = errno;
1524 
1525 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1526 
1527 	wr_count = resp.bad_wr;
1528 	if (wr_count) {
1529 		i = wr;
1530 		while (--wr_count)
1531 			i = i->next;
1532 		*bad_wr = i;
1533 	} else if (ret)
1534 		*bad_wr = wr;
1535 
1536 	return ret;
1537 }
1538 
1539 int ibv_cmd_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
1540 		      struct ibv_recv_wr **bad_wr)
1541 {
1542 	struct ibv_post_recv     *cmd;
1543 	struct ibv_post_recv_resp resp;
1544 	struct ibv_recv_wr       *i;
1545 	struct ibv_kern_recv_wr  *n, *tmp;
1546 	struct ibv_sge           *s;
1547 	unsigned                  wr_count = 0;
1548 	unsigned                  sge_count = 0;
1549 	int                       cmd_size;
1550 	int                       ret = 0;
1551 
1552 	for (i = wr; i; i = i->next) {
1553 		wr_count++;
1554 		sge_count += i->num_sge;
1555 	}
1556 
1557 	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1558 	cmd  = alloca(cmd_size);
1559 
1560 	IBV_INIT_CMD_RESP(cmd, cmd_size, POST_RECV, &resp, sizeof resp);
1561 	cmd->qp_handle = ibqp->handle;
1562 	cmd->wr_count  = wr_count;
1563 	cmd->sge_count = sge_count;
1564 	cmd->wqe_size  = sizeof *n;
1565 
1566 	n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
1567 	s = (struct ibv_sge *) (n + wr_count);
1568 
1569 	tmp = n;
1570 	for (i = wr; i; i = i->next) {
1571 		tmp->wr_id   = i->wr_id;
1572 		tmp->num_sge = i->num_sge;
1573 
1574 		if (tmp->num_sge) {
1575 			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1576 			s += tmp->num_sge;
1577 		}
1578 
1579 		tmp++;
1580 	}
1581 
1582 	resp.bad_wr = 0;
1583 	if (write(ibqp->context->cmd_fd, cmd, cmd_size) != cmd_size)
1584 		ret = errno;
1585 
1586 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1587 
1588 	wr_count = resp.bad_wr;
1589 	if (wr_count) {
1590 		i = wr;
1591 		while (--wr_count)
1592 			i = i->next;
1593 		*bad_wr = i;
1594 	} else if (ret)
1595 		*bad_wr = wr;
1596 
1597 	return ret;
1598 }
1599 
1600 int ibv_cmd_post_srq_recv(struct ibv_srq *srq, struct ibv_recv_wr *wr,
1601 		      struct ibv_recv_wr **bad_wr)
1602 {
1603 	struct ibv_post_srq_recv *cmd;
1604 	struct ibv_post_srq_recv_resp resp;
1605 	struct ibv_recv_wr       *i;
1606 	struct ibv_kern_recv_wr  *n, *tmp;
1607 	struct ibv_sge           *s;
1608 	unsigned                  wr_count = 0;
1609 	unsigned                  sge_count = 0;
1610 	int                       cmd_size;
1611 	int                       ret = 0;
1612 
1613 	for (i = wr; i; i = i->next) {
1614 		wr_count++;
1615 		sge_count += i->num_sge;
1616 	}
1617 
1618 	cmd_size = sizeof *cmd + wr_count * sizeof *n + sge_count * sizeof *s;
1619 	cmd  = alloca(cmd_size);
1620 
1621 	IBV_INIT_CMD_RESP(cmd, cmd_size, POST_SRQ_RECV, &resp, sizeof resp);
1622 	cmd->srq_handle = srq->handle;
1623 	cmd->wr_count  = wr_count;
1624 	cmd->sge_count = sge_count;
1625 	cmd->wqe_size  = sizeof *n;
1626 
1627 	n = (struct ibv_kern_recv_wr *) ((void *) cmd + sizeof *cmd);
1628 	s = (struct ibv_sge *) (n + wr_count);
1629 
1630 	tmp = n;
1631 	for (i = wr; i; i = i->next) {
1632 		tmp->wr_id = i->wr_id;
1633 		tmp->num_sge = i->num_sge;
1634 
1635 		if (tmp->num_sge) {
1636 			memcpy(s, i->sg_list, tmp->num_sge * sizeof *s);
1637 			s += tmp->num_sge;
1638 		}
1639 
1640 		tmp++;
1641 	}
1642 
1643 	resp.bad_wr = 0;
1644 	if (write(srq->context->cmd_fd, cmd, cmd_size) != cmd_size)
1645 		ret = errno;
1646 
1647 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1648 
1649 	wr_count = resp.bad_wr;
1650 	if (wr_count) {
1651 		i = wr;
1652 		while (--wr_count)
1653 			i = i->next;
1654 		*bad_wr = i;
1655 	} else if (ret)
1656 		*bad_wr = wr;
1657 
1658 	return ret;
1659 }
1660 
1661 int ibv_cmd_create_ah(struct ibv_pd *pd, struct ibv_ah *ah,
1662 		      struct ibv_ah_attr *attr,
1663 		      struct ibv_create_ah_resp *resp,
1664 		      size_t resp_size)
1665 {
1666 	struct ibv_create_ah      cmd;
1667 
1668 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_AH, resp, resp_size);
1669 	cmd.user_handle            = (uintptr_t) ah;
1670 	cmd.pd_handle              = pd->handle;
1671 	cmd.attr.dlid              = attr->dlid;
1672 	cmd.attr.sl                = attr->sl;
1673 	cmd.attr.src_path_bits     = attr->src_path_bits;
1674 	cmd.attr.static_rate       = attr->static_rate;
1675 	cmd.attr.is_global         = attr->is_global;
1676 	cmd.attr.port_num          = attr->port_num;
1677 	cmd.attr.grh.flow_label    = attr->grh.flow_label;
1678 	cmd.attr.grh.sgid_index    = attr->grh.sgid_index;
1679 	cmd.attr.grh.hop_limit     = attr->grh.hop_limit;
1680 	cmd.attr.grh.traffic_class = attr->grh.traffic_class;
1681 	memcpy(cmd.attr.grh.dgid, attr->grh.dgid.raw, 16);
1682 
1683 	if (write(pd->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1684 		return errno;
1685 
1686 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
1687 
1688 	ah->handle  = resp->handle;
1689 	ah->context = pd->context;
1690 
1691 	return 0;
1692 }
1693 
1694 int ibv_cmd_destroy_ah(struct ibv_ah *ah)
1695 {
1696 	struct ibv_destroy_ah cmd;
1697 
1698 	IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_AH);
1699 	cmd.ah_handle = ah->handle;
1700 
1701 	if (write(ah->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1702 		return errno;
1703 
1704 	return 0;
1705 }
1706 
1707 int ibv_cmd_destroy_qp(struct ibv_qp *qp)
1708 {
1709 	struct ibv_destroy_qp      cmd;
1710 	struct ibv_destroy_qp_resp resp;
1711 
1712 	IBV_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_QP, &resp, sizeof resp);
1713 	cmd.qp_handle = qp->handle;
1714 	cmd.reserved  = 0;
1715 
1716 	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1717 		return errno;
1718 
1719 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
1720 
1721 	pthread_mutex_lock(&qp->mutex);
1722 	while (qp->events_completed != resp.events_reported)
1723 		pthread_cond_wait(&qp->cond, &qp->mutex);
1724 	pthread_mutex_unlock(&qp->mutex);
1725 
1726 	pthread_cond_destroy(&qp->cond);
1727 	pthread_mutex_destroy(&qp->mutex);
1728 
1729 	return 0;
1730 }
1731 
1732 int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1733 {
1734 	struct ibv_attach_mcast cmd;
1735 
1736 	IBV_INIT_CMD(&cmd, sizeof cmd, ATTACH_MCAST);
1737 	memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
1738 	cmd.qp_handle = qp->handle;
1739 	cmd.mlid      = lid;
1740 	cmd.reserved  = 0;
1741 
1742 	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1743 		return errno;
1744 
1745 	return 0;
1746 }
1747 
1748 int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid)
1749 {
1750 	struct ibv_detach_mcast cmd;
1751 
1752 	IBV_INIT_CMD(&cmd, sizeof cmd, DETACH_MCAST);
1753 	memcpy(cmd.gid, gid->raw, sizeof cmd.gid);
1754 	cmd.qp_handle = qp->handle;
1755 	cmd.mlid      = lid;
1756 	cmd.reserved  = 0;
1757 
1758 	if (write(qp->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
1759 		return errno;
1760 
1761 	return 0;
1762 }
1763 
1764 static int buffer_is_zero(char *addr, ssize_t size)
1765 {
1766 	return addr[0] == 0 && !memcmp(addr, addr + 1, size - 1);
1767 }
1768 
1769 static int get_filters_size(struct ibv_flow_spec *ib_spec,
1770 			    struct ibv_kern_spec *kern_spec,
1771 			    int *ib_filter_size, int *kern_filter_size,
1772 			    enum ibv_flow_spec_type type)
1773 {
1774 	void *ib_spec_filter_mask;
1775 	int curr_kern_filter_size;
1776 	int min_filter_size;
1777 
1778 	*ib_filter_size = (ib_spec->hdr.size - sizeof(ib_spec->hdr)) / 2;
1779 
1780 	switch (type) {
1781 	case IBV_FLOW_SPEC_IPV4_EXT:
1782 		min_filter_size =
1783 			offsetof(struct ibv_kern_ipv4_ext_filter, flags) +
1784 			sizeof(kern_spec->ipv4_ext.mask.flags);
1785 		curr_kern_filter_size = min_filter_size;
1786 		ib_spec_filter_mask = (void *)&ib_spec->ipv4_ext.val +
1787 			*ib_filter_size;
1788 		break;
1789 	case IBV_FLOW_SPEC_IPV6:
1790 		min_filter_size =
1791 			offsetof(struct ibv_kern_ipv6_filter, hop_limit) +
1792 			sizeof(kern_spec->ipv6.mask.hop_limit);
1793 		curr_kern_filter_size = min_filter_size;
1794 		ib_spec_filter_mask = (void *)&ib_spec->ipv6.val +
1795 			*ib_filter_size;
1796 		break;
1797 	case IBV_FLOW_SPEC_VXLAN_TUNNEL:
1798 		min_filter_size =
1799 			offsetof(struct ibv_kern_tunnel_filter,
1800 				 tunnel_id) +
1801 			sizeof(kern_spec->tunnel.mask.tunnel_id);
1802 		curr_kern_filter_size = min_filter_size;
1803 		ib_spec_filter_mask = (void *)&ib_spec->tunnel.val +
1804 			*ib_filter_size;
1805 		break;
1806 	default:
1807 		return EINVAL;
1808 	}
1809 
1810 	if (*ib_filter_size < min_filter_size)
1811 		return EINVAL;
1812 
1813 	if (*ib_filter_size > curr_kern_filter_size &&
1814 	    !buffer_is_zero(ib_spec_filter_mask + curr_kern_filter_size,
1815 			    *ib_filter_size - curr_kern_filter_size))
1816 		return EOPNOTSUPP;
1817 
1818 	*kern_filter_size = min_t(int, curr_kern_filter_size, *ib_filter_size);
1819 
1820 	return 0;
1821 }
1822 
1823 static int ib_spec_to_kern_spec(struct ibv_flow_spec *ib_spec,
1824 				struct ibv_kern_spec *kern_spec)
1825 {
1826 	int kern_filter_size;
1827 	int ib_filter_size;
1828 	int ret;
1829 
1830 	kern_spec->hdr.type = ib_spec->hdr.type;
1831 
1832 	switch (kern_spec->hdr.type) {
1833 	case IBV_FLOW_SPEC_ETH:
1834 	case IBV_FLOW_SPEC_ETH | IBV_FLOW_SPEC_INNER:
1835 		kern_spec->eth.size = sizeof(struct ibv_kern_spec_eth);
1836 		memcpy(&kern_spec->eth.val, &ib_spec->eth.val,
1837 		       sizeof(struct ibv_flow_eth_filter));
1838 		memcpy(&kern_spec->eth.mask, &ib_spec->eth.mask,
1839 		       sizeof(struct ibv_flow_eth_filter));
1840 		break;
1841 	case IBV_FLOW_SPEC_IPV4:
1842 	case IBV_FLOW_SPEC_IPV4 | IBV_FLOW_SPEC_INNER:
1843 		kern_spec->ipv4.size = sizeof(struct ibv_kern_spec_ipv4);
1844 		memcpy(&kern_spec->ipv4.val, &ib_spec->ipv4.val,
1845 		       sizeof(struct ibv_flow_ipv4_filter));
1846 		memcpy(&kern_spec->ipv4.mask, &ib_spec->ipv4.mask,
1847 		       sizeof(struct ibv_flow_ipv4_filter));
1848 		break;
1849 	case IBV_FLOW_SPEC_IPV4_EXT:
1850 	case IBV_FLOW_SPEC_IPV4_EXT | IBV_FLOW_SPEC_INNER:
1851 		ret = get_filters_size(ib_spec, kern_spec,
1852 				       &ib_filter_size, &kern_filter_size,
1853 				       IBV_FLOW_SPEC_IPV4_EXT);
1854 		if (ret)
1855 			return ret;
1856 
1857 		kern_spec->hdr.type = IBV_FLOW_SPEC_IPV4 |
1858 				     (IBV_FLOW_SPEC_INNER & ib_spec->hdr.type);
1859 		kern_spec->ipv4_ext.size = sizeof(struct
1860 						  ibv_kern_spec_ipv4_ext);
1861 		memcpy(&kern_spec->ipv4_ext.val, &ib_spec->ipv4_ext.val,
1862 		       kern_filter_size);
1863 		memcpy(&kern_spec->ipv4_ext.mask, (void *)&ib_spec->ipv4_ext.val
1864 		       + ib_filter_size, kern_filter_size);
1865 		break;
1866 	case IBV_FLOW_SPEC_IPV6:
1867 	case IBV_FLOW_SPEC_IPV6 | IBV_FLOW_SPEC_INNER:
1868 		ret = get_filters_size(ib_spec, kern_spec,
1869 				       &ib_filter_size, &kern_filter_size,
1870 				       IBV_FLOW_SPEC_IPV6);
1871 		if (ret)
1872 			return ret;
1873 
1874 		kern_spec->ipv6.size = sizeof(struct ibv_kern_spec_ipv6);
1875 		memcpy(&kern_spec->ipv6.val, &ib_spec->ipv6.val,
1876 		       kern_filter_size);
1877 		memcpy(&kern_spec->ipv6.mask, (void *)&ib_spec->ipv6.val
1878 		       + ib_filter_size, kern_filter_size);
1879 		break;
1880 	case IBV_FLOW_SPEC_TCP:
1881 	case IBV_FLOW_SPEC_UDP:
1882 	case IBV_FLOW_SPEC_TCP | IBV_FLOW_SPEC_INNER:
1883 	case IBV_FLOW_SPEC_UDP | IBV_FLOW_SPEC_INNER:
1884 		kern_spec->tcp_udp.size = sizeof(struct ibv_kern_spec_tcp_udp);
1885 		memcpy(&kern_spec->tcp_udp.val, &ib_spec->tcp_udp.val,
1886 		       sizeof(struct ibv_flow_ipv4_filter));
1887 		memcpy(&kern_spec->tcp_udp.mask, &ib_spec->tcp_udp.mask,
1888 		       sizeof(struct ibv_flow_tcp_udp_filter));
1889 		break;
1890 	case IBV_FLOW_SPEC_VXLAN_TUNNEL:
1891 		ret = get_filters_size(ib_spec, kern_spec,
1892 				       &ib_filter_size, &kern_filter_size,
1893 				       IBV_FLOW_SPEC_VXLAN_TUNNEL);
1894 		if (ret)
1895 			return ret;
1896 
1897 		kern_spec->tunnel.size = sizeof(struct ibv_kern_spec_tunnel);
1898 		memcpy(&kern_spec->tunnel.val, &ib_spec->tunnel.val,
1899 		       kern_filter_size);
1900 		memcpy(&kern_spec->tunnel.mask, (void *)&ib_spec->tunnel.val
1901 		       + ib_filter_size, kern_filter_size);
1902 		break;
1903 	case IBV_FLOW_SPEC_ACTION_TAG:
1904 		kern_spec->flow_tag.size =
1905 			sizeof(struct ibv_kern_spec_action_tag);
1906 		kern_spec->flow_tag.tag_id = ib_spec->flow_tag.tag_id;
1907 		break;
1908 	case IBV_FLOW_SPEC_ACTION_DROP:
1909 		kern_spec->drop.size = sizeof(struct ibv_kern_spec_action_drop);
1910 		break;
1911 	default:
1912 		return EINVAL;
1913 	}
1914 	return 0;
1915 }
1916 
1917 struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp,
1918 				     struct ibv_flow_attr *flow_attr)
1919 {
1920 	struct ibv_create_flow *cmd;
1921 	struct ibv_create_flow_resp resp;
1922 	struct ibv_flow *flow_id;
1923 	size_t cmd_size;
1924 	size_t written_size;
1925 	int i, err;
1926 	void *kern_spec;
1927 	void *ib_spec;
1928 
1929 	cmd_size = sizeof(*cmd) + (flow_attr->num_of_specs *
1930 				  sizeof(struct ibv_kern_spec));
1931 	cmd = alloca(cmd_size);
1932 	flow_id = malloc(sizeof(*flow_id));
1933 	if (!flow_id)
1934 		return NULL;
1935 	memset(cmd, 0, cmd_size);
1936 
1937 	cmd->qp_handle = qp->handle;
1938 
1939 	cmd->flow_attr.type = flow_attr->type;
1940 	cmd->flow_attr.priority = flow_attr->priority;
1941 	cmd->flow_attr.num_of_specs = flow_attr->num_of_specs;
1942 	cmd->flow_attr.port = flow_attr->port;
1943 	cmd->flow_attr.flags = flow_attr->flags;
1944 
1945 	kern_spec = cmd + 1;
1946 	ib_spec = flow_attr + 1;
1947 	for (i = 0; i < flow_attr->num_of_specs; i++) {
1948 		err = ib_spec_to_kern_spec(ib_spec, kern_spec);
1949 		if (err) {
1950 			errno = err;
1951 			goto err;
1952 		}
1953 		cmd->flow_attr.size +=
1954 			((struct ibv_kern_spec *)kern_spec)->hdr.size;
1955 		kern_spec += ((struct ibv_kern_spec *)kern_spec)->hdr.size;
1956 		ib_spec += ((struct ibv_flow_spec *)ib_spec)->hdr.size;
1957 	}
1958 
1959 	written_size = sizeof(*cmd) + cmd->flow_attr.size;
1960 	IBV_INIT_CMD_RESP_EX_VCMD(cmd, written_size, written_size, CREATE_FLOW,
1961 				  &resp, sizeof(resp));
1962 	if (write(qp->context->cmd_fd, cmd, written_size) != written_size)
1963 		goto err;
1964 
1965 	(void) VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp));
1966 
1967 	flow_id->context = qp->context;
1968 	flow_id->handle = resp.flow_handle;
1969 	return flow_id;
1970 err:
1971 	free(flow_id);
1972 	return NULL;
1973 }
1974 
1975 int ibv_cmd_destroy_flow(struct ibv_flow *flow_id)
1976 {
1977 	struct ibv_destroy_flow cmd;
1978 	int ret = 0;
1979 
1980 	memset(&cmd, 0, sizeof(cmd));
1981 	IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_FLOW);
1982 	cmd.flow_handle = flow_id->handle;
1983 
1984 	if (write(flow_id->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
1985 		ret = errno;
1986 	free(flow_id);
1987 	return ret;
1988 }
1989 
1990 int ibv_cmd_create_wq(struct ibv_context *context,
1991 		      struct ibv_wq_init_attr *wq_init_attr,
1992 		      struct ibv_wq *wq,
1993 		      struct ibv_create_wq *cmd,
1994 		      size_t cmd_core_size,
1995 		      size_t cmd_size,
1996 		      struct ibv_create_wq_resp *resp,
1997 		      size_t resp_core_size,
1998 		      size_t resp_size)
1999 {
2000 	int err;
2001 
2002 	if (wq_init_attr->comp_mask >= IBV_WQ_INIT_ATTR_RESERVED)
2003 		return EINVAL;
2004 
2005 	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
2006 			       CREATE_WQ, resp,
2007 			       resp_core_size, resp_size);
2008 
2009 	cmd->user_handle   = (uintptr_t)wq;
2010 	cmd->pd_handle           = wq_init_attr->pd->handle;
2011 	cmd->cq_handle   = wq_init_attr->cq->handle;
2012 	cmd->wq_type = wq_init_attr->wq_type;
2013 	cmd->max_sge = wq_init_attr->max_sge;
2014 	cmd->max_wr = wq_init_attr->max_wr;
2015 	cmd->comp_mask = 0;
2016 
2017 	if (cmd_core_size >= offsetof(struct ibv_create_wq, create_flags) +
2018 	    sizeof(cmd->create_flags)) {
2019 		if (wq_init_attr->comp_mask & IBV_WQ_INIT_ATTR_FLAGS) {
2020 			if (wq_init_attr->create_flags & ~(IBV_WQ_FLAGS_RESERVED - 1))
2021 				return EOPNOTSUPP;
2022 			cmd->create_flags = wq_init_attr->create_flags;
2023 		}
2024 	}
2025 
2026 	err = write(context->cmd_fd, cmd, cmd_size);
2027 	if (err != cmd_size)
2028 		return errno;
2029 
2030 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
2031 
2032 	if (resp->response_length < resp_core_size)
2033 		return EINVAL;
2034 
2035 	wq->handle  = resp->wq_handle;
2036 	wq_init_attr->max_wr = resp->max_wr;
2037 	wq_init_attr->max_sge = resp->max_sge;
2038 	wq->wq_num = resp->wqn;
2039 	wq->context = context;
2040 	wq->cq = wq_init_attr->cq;
2041 	wq->pd = wq_init_attr->pd;
2042 	wq->wq_type = wq_init_attr->wq_type;
2043 
2044 	return 0;
2045 }
2046 
2047 int ibv_cmd_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr,
2048 		      struct ibv_modify_wq *cmd, size_t cmd_core_size,
2049 		      size_t cmd_size)
2050 {
2051 	if (attr->attr_mask >= IBV_WQ_ATTR_RESERVED)
2052 		return EINVAL;
2053 
2054 	memset(cmd, 0, cmd_core_size);
2055 	IBV_INIT_CMD_EX(cmd, cmd_size, MODIFY_WQ);
2056 
2057 	cmd->curr_wq_state = attr->curr_wq_state;
2058 	cmd->wq_state = attr->wq_state;
2059 	if (cmd_core_size >= offsetof(struct ibv_modify_wq, flags_mask) +
2060 	    sizeof(cmd->flags_mask)) {
2061 		if (attr->attr_mask & IBV_WQ_ATTR_FLAGS) {
2062 			if (attr->flags_mask & ~(IBV_WQ_FLAGS_RESERVED - 1))
2063 				return EOPNOTSUPP;
2064 			cmd->flags = attr->flags;
2065 			cmd->flags_mask = attr->flags_mask;
2066 		}
2067 	}
2068 	cmd->wq_handle = wq->handle;
2069 	cmd->attr_mask = attr->attr_mask;
2070 
2071 	if (write(wq->context->cmd_fd, cmd, cmd_size) != cmd_size)
2072 		return errno;
2073 
2074 	if (attr->attr_mask & IBV_WQ_ATTR_STATE)
2075 		wq->state = attr->wq_state;
2076 
2077 	return 0;
2078 }
2079 
2080 int ibv_cmd_destroy_wq(struct ibv_wq *wq)
2081 {
2082 	struct ibv_destroy_wq cmd;
2083 	struct ibv_destroy_wq_resp resp;
2084 	int ret = 0;
2085 
2086 	memset(&cmd, 0, sizeof(cmd));
2087 	memset(&resp, 0, sizeof(resp));
2088 
2089 	IBV_INIT_CMD_RESP_EX(&cmd, sizeof(cmd), DESTROY_WQ, &resp, sizeof(resp));
2090 	cmd.wq_handle = wq->handle;
2091 
2092 	if (write(wq->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
2093 		return errno;
2094 
2095 	if (resp.response_length < sizeof(resp))
2096 		return EINVAL;
2097 
2098 	pthread_mutex_lock(&wq->mutex);
2099 	while (wq->events_completed != resp.events_reported)
2100 		pthread_cond_wait(&wq->cond, &wq->mutex);
2101 	pthread_mutex_unlock(&wq->mutex);
2102 
2103 	return ret;
2104 }
2105 
2106 int ibv_cmd_create_rwq_ind_table(struct ibv_context *context,
2107 				 struct ibv_rwq_ind_table_init_attr *init_attr,
2108 				 struct ibv_rwq_ind_table *rwq_ind_table,
2109 				 struct ibv_create_rwq_ind_table *cmd,
2110 				 size_t cmd_core_size,
2111 				 size_t cmd_size,
2112 				 struct ibv_create_rwq_ind_table_resp *resp,
2113 				 size_t resp_core_size,
2114 				 size_t resp_size)
2115 {
2116 	int err, i;
2117 	uint32_t required_tbl_size, alloc_tbl_size;
2118 	uint32_t *tbl_start;
2119 	int num_tbl_entries;
2120 
2121 	if (init_attr->comp_mask >= IBV_CREATE_IND_TABLE_RESERVED)
2122 		return EINVAL;
2123 
2124 	alloc_tbl_size = cmd_core_size - sizeof(*cmd);
2125 	num_tbl_entries = 1 << init_attr->log_ind_tbl_size;
2126 
2127 	/* Data must be u64 aligned */
2128 	required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ?
2129 			sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t));
2130 
2131 	if (alloc_tbl_size < required_tbl_size)
2132 		return EINVAL;
2133 
2134 	tbl_start = (uint32_t *)((uint8_t *)cmd + sizeof(*cmd));
2135 	for (i = 0; i < num_tbl_entries; i++)
2136 		tbl_start[i] = init_attr->ind_tbl[i]->handle;
2137 
2138 	IBV_INIT_CMD_RESP_EX_V(cmd, cmd_core_size, cmd_size,
2139 			       CREATE_RWQ_IND_TBL, resp,
2140 			       resp_core_size, resp_size);
2141 	cmd->log_ind_tbl_size = init_attr->log_ind_tbl_size;
2142 	cmd->comp_mask = 0;
2143 
2144 	err = write(context->cmd_fd, cmd, cmd_size);
2145 	if (err != cmd_size)
2146 		return errno;
2147 
2148 	(void) VALGRIND_MAKE_MEM_DEFINED(resp, resp_size);
2149 
2150 	if (resp->response_length < resp_core_size)
2151 		return EINVAL;
2152 
2153 	rwq_ind_table->ind_tbl_handle = resp->ind_tbl_handle;
2154 	rwq_ind_table->ind_tbl_num = resp->ind_tbl_num;
2155 	rwq_ind_table->context = context;
2156 	return 0;
2157 }
2158 
2159 int ibv_cmd_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table)
2160 {
2161 	struct ibv_destroy_rwq_ind_table cmd;
2162 	int ret = 0;
2163 
2164 	memset(&cmd, 0, sizeof(cmd));
2165 	IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_RWQ_IND_TBL);
2166 	cmd.ind_tbl_handle = rwq_ind_table->ind_tbl_handle;
2167 
2168 	if (write(rwq_ind_table->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd))
2169 		ret = errno;
2170 
2171 	return ret;
2172 }
2173