xref: /freebsd/sys/dev/mlx5/mlx5_ib/mlx5_ib_devx.c (revision 1f1e2261e341e6ca6862f82261066ef1705f0a7a)
1 /*-
2  * Copyright (c) 2018-2020, Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "opt_rss.h"
29 #include "opt_ratelimit.h"
30 
31 #include <rdma/ib_user_verbs.h>
32 #include <rdma/ib_verbs.h>
33 #include <rdma/uverbs_types.h>
34 #include <rdma/uverbs_ioctl.h>
35 #include <rdma/mlx5_user_ioctl_cmds.h>
36 #include <rdma/mlx5_user_ioctl_verbs.h>
37 #include <rdma/ib_umem.h>
38 #include <rdma/uverbs_std_types.h>
39 #include <dev/mlx5/driver.h>
40 #include <dev/mlx5/fs.h>
41 #include <dev/mlx5/mlx5_ib/mlx5_ib.h>
42 
43 #include <sys/priv.h>
44 
45 #include <linux/xarray.h>
46 #include <linux/rculist.h>
47 #include <linux/srcu.h>
48 #include <linux/file.h>
49 #include <linux/poll.h>
50 #include <linux/wait.h>
51 
52 #define UVERBS_MODULE_NAME mlx5_ib
53 #include <rdma/uverbs_named_ioctl.h>
54 
55 static void dispatch_event_fd(struct list_head *fd_list, const void *data);
56 
57 enum devx_obj_flags {
58 	DEVX_OBJ_FLAGS_DCT = 1 << 1,
59 	DEVX_OBJ_FLAGS_CQ = 1 << 2,
60 };
61 
62 struct devx_async_data {
63 	struct mlx5_ib_dev *mdev;
64 	struct list_head list;
65 	struct devx_async_cmd_event_file *ev_file;
66 	struct mlx5_async_work cb_work;
67 	u16 cmd_out_len;
68 	/* must be last field in this structure */
69 	struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
70 };
71 
72 struct devx_async_event_data {
73 	struct list_head list; /* headed in ev_file->event_list */
74 	struct mlx5_ib_uapi_devx_async_event_hdr hdr;
75 };
76 
77 /* first level XA value data structure */
78 struct devx_event {
79 	struct xarray object_ids; /* second XA level, Key = object id */
80 	struct list_head unaffiliated_list;
81 };
82 
83 /* second level XA value data structure */
84 struct devx_obj_event {
85 	struct rcu_head rcu;
86 	struct list_head obj_sub_list;
87 };
88 
89 struct devx_event_subscription {
90 	struct list_head file_list; /* headed in ev_file->
91 				     * subscribed_events_list
92 				     */
93 	struct list_head xa_list; /* headed in devx_event->unaffiliated_list or
94 				   * devx_obj_event->obj_sub_list
95 				   */
96 	struct list_head obj_list; /* headed in devx_object */
97 	struct list_head event_list; /* headed in ev_file->event_list or in
98 				      * temp list via subscription
99 				      */
100 
101 	u8 is_cleaned:1;
102 	u32 xa_key_level1;
103 	u32 xa_key_level2;
104 	struct rcu_head	rcu;
105 	u64 cookie;
106 	struct devx_async_event_file *ev_file;
107 	struct fd eventfd;
108 };
109 
110 struct devx_async_event_file {
111 	struct ib_uobject uobj;
112 	/* Head of events that are subscribed to this FD */
113 	struct list_head subscribed_events_list;
114 	spinlock_t lock;
115 	wait_queue_head_t poll_wait;
116 	struct list_head event_list;
117 	struct mlx5_ib_dev *dev;
118 	u8 omit_data:1;
119 	u8 is_overflow_err:1;
120 	u8 is_destroyed:1;
121 };
122 
123 #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
124 struct devx_obj {
125 	struct mlx5_ib_dev	*ib_dev;
126 	u64			obj_id;
127 	u32			dinlen; /* destroy inbox length */
128 	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
129 	u32			flags;
130 	union {
131 		struct mlx5_ib_devx_mr	devx_mr;
132 		struct mlx5_core_dct	core_dct;
133 		struct mlx5_core_cq	core_cq;
134 		u32			flow_counter_bulk_size;
135 	};
136 	struct list_head event_sub; /* holds devx_event_subscription entries */
137 };
138 
139 struct devx_umem {
140 	struct mlx5_core_dev		*mdev;
141 	struct ib_umem			*umem;
142 	u32				page_offset;
143 	int				page_shift;
144 	int				ncont;
145 	u32				dinlen;
146 	u32				dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
147 };
148 
149 struct devx_umem_reg_cmd {
150 	void				*in;
151 	u32				inlen;
152 	u32				out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
153 };
154 
155 static struct mlx5_ib_ucontext *
156 devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
157 {
158 	return to_mucontext(ib_uverbs_get_ucontext(attrs));
159 }
160 
161 int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
162 {
163 	u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
164 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
165 	void *uctx;
166 	int err;
167 	u16 uid;
168 	u32 cap = 0;
169 
170 	/* 0 means not supported */
171 	if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
172 		return -EINVAL;
173 
174 	uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
175 	if (is_user && priv_check(curthread, PRIV_NET_RAW) == 0 &&
176 	    (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
177 		cap |= MLX5_UCTX_CAP_RAW_TX;
178 	if (is_user && priv_check(curthread, PRIV_DRIVER) == 0 &&
179 	    (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
180 	     MLX5_UCTX_CAP_INTERNAL_DEV_RES))
181 		cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
182 
183 	MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
184 	MLX5_SET(uctx, uctx, cap, cap);
185 
186 	err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
187 	if (err)
188 		return err;
189 
190 	uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
191 	return uid;
192 }
193 
194 void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
195 {
196 	u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
197 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
198 
199 	MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
200 	MLX5_SET(destroy_uctx_in, in, uid, uid);
201 
202 	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
203 }
204 
205 bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
206 {
207 	struct devx_obj *devx_obj = obj;
208 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
209 
210 	switch (opcode) {
211 	case MLX5_CMD_OP_DESTROY_TIR:
212 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
213 		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
214 				    obj_id);
215 		return true;
216 
217 	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
218 		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
219 		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
220 				    table_id);
221 		return true;
222 	default:
223 		return false;
224 	}
225 }
226 
227 bool mlx5_ib_devx_is_flow_counter(void *obj, u32 offset, u32 *counter_id)
228 {
229 	struct devx_obj *devx_obj = obj;
230 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
231 
232 	if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
233 
234 		if (offset && offset >= devx_obj->flow_counter_bulk_size)
235 			return false;
236 
237 		*counter_id = MLX5_GET(dealloc_flow_counter_in,
238 				       devx_obj->dinbox,
239 				       flow_counter_id);
240 		*counter_id += offset;
241 		return true;
242 	}
243 
244 	return false;
245 }
246 
247 static bool is_legacy_unaffiliated_event_num(u16 event_num)
248 {
249 	switch (event_num) {
250 	case MLX5_EVENT_TYPE_PORT_CHANGE:
251 		return true;
252 	default:
253 		return false;
254 	}
255 }
256 
257 static bool is_legacy_obj_event_num(u16 event_num)
258 {
259 	switch (event_num) {
260 	case MLX5_EVENT_TYPE_PATH_MIG:
261 	case MLX5_EVENT_TYPE_COMM_EST:
262 	case MLX5_EVENT_TYPE_SQ_DRAINED:
263 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
264 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
265 	case MLX5_EVENT_TYPE_CQ_ERROR:
266 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
267 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
268 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
269 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
270 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
271 	case MLX5_EVENT_TYPE_DCT_DRAINED:
272 	case MLX5_EVENT_TYPE_COMP:
273 	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
274 	case MLX5_EVENT_TYPE_XRQ_ERROR:
275 		return true;
276 	default:
277 		return false;
278 	}
279 }
280 
281 static u16 get_legacy_obj_type(u16 opcode)
282 {
283 	switch (opcode) {
284 	case MLX5_CMD_OP_CREATE_RQ:
285 		return MLX5_EVENT_QUEUE_TYPE_RQ;
286 	case MLX5_CMD_OP_CREATE_QP:
287 		return MLX5_EVENT_QUEUE_TYPE_QP;
288 	case MLX5_CMD_OP_CREATE_SQ:
289 		return MLX5_EVENT_QUEUE_TYPE_SQ;
290 	case MLX5_CMD_OP_CREATE_DCT:
291 		return MLX5_EVENT_QUEUE_TYPE_DCT;
292 	default:
293 		return 0;
294 	}
295 }
296 
297 static u16 get_dec_obj_type(struct devx_obj *obj, u16 event_num)
298 {
299 	u16 opcode;
300 
301 	opcode = (obj->obj_id >> 32) & 0xffff;
302 
303 	if (is_legacy_obj_event_num(event_num))
304 		return get_legacy_obj_type(opcode);
305 
306 	switch (opcode) {
307 	case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
308 		return (obj->obj_id >> 48);
309 	case MLX5_CMD_OP_CREATE_RQ:
310 		return MLX5_OBJ_TYPE_RQ;
311 	case MLX5_CMD_OP_CREATE_QP:
312 		return MLX5_OBJ_TYPE_QP;
313 	case MLX5_CMD_OP_CREATE_SQ:
314 		return MLX5_OBJ_TYPE_SQ;
315 	case MLX5_CMD_OP_CREATE_DCT:
316 		return MLX5_OBJ_TYPE_DCT;
317 	case MLX5_CMD_OP_CREATE_TIR:
318 		return MLX5_OBJ_TYPE_TIR;
319 	case MLX5_CMD_OP_CREATE_TIS:
320 		return MLX5_OBJ_TYPE_TIS;
321 	case MLX5_CMD_OP_CREATE_PSV:
322 		return MLX5_OBJ_TYPE_PSV;
323 	case MLX5_OBJ_TYPE_MKEY:
324 		return MLX5_OBJ_TYPE_MKEY;
325 	case MLX5_CMD_OP_CREATE_RMP:
326 		return MLX5_OBJ_TYPE_RMP;
327 	case MLX5_CMD_OP_CREATE_XRC_SRQ:
328 		return MLX5_OBJ_TYPE_XRC_SRQ;
329 	case MLX5_CMD_OP_CREATE_XRQ:
330 		return MLX5_OBJ_TYPE_XRQ;
331 	case MLX5_CMD_OP_CREATE_RQT:
332 		return MLX5_OBJ_TYPE_RQT;
333 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
334 		return MLX5_OBJ_TYPE_FLOW_COUNTER;
335 	case MLX5_CMD_OP_CREATE_CQ:
336 		return MLX5_OBJ_TYPE_CQ;
337 	default:
338 		return 0;
339 	}
340 }
341 
342 static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
343 {
344 	switch (event_type) {
345 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
346 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
347 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
348 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
349 	case MLX5_EVENT_TYPE_PATH_MIG:
350 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
351 	case MLX5_EVENT_TYPE_COMM_EST:
352 	case MLX5_EVENT_TYPE_SQ_DRAINED:
353 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
354 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
355 		return eqe->data.qp_srq.type;
356 	case MLX5_EVENT_TYPE_CQ_ERROR:
357 	case MLX5_EVENT_TYPE_XRQ_ERROR:
358 		return 0;
359 	case MLX5_EVENT_TYPE_DCT_DRAINED:
360 	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
361 		return MLX5_EVENT_QUEUE_TYPE_DCT;
362 	default:
363 		return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
364 	}
365 }
366 
367 static u32 get_dec_obj_id(u64 obj_id)
368 {
369 	return (obj_id & 0xffffffff);
370 }
371 
372 /*
373  * As the obj_id in the firmware is not globally unique the object type
374  * must be considered upon checking for a valid object id.
375  * For that the opcode of the creator command is encoded as part of the obj_id.
376  */
377 static u64 get_enc_obj_id(u32 opcode, u32 obj_id)
378 {
379 	return ((u64)opcode << 32) | obj_id;
380 }
381 
382 static u64 devx_get_obj_id(const void *in)
383 {
384 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
385 	u64 obj_id;
386 
387 	switch (opcode) {
388 	case MLX5_CMD_OP_MODIFY_GENERAL_OBJ:
389 	case MLX5_CMD_OP_QUERY_GENERAL_OBJ:
390 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_GENERAL_OBJ |
391 					MLX5_GET(general_obj_in_cmd_hdr, in,
392 						 obj_type) << 16,
393 					MLX5_GET(general_obj_in_cmd_hdr, in,
394 						 obj_id));
395 		break;
396 	case MLX5_CMD_OP_QUERY_MKEY:
397 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_MKEY,
398 					MLX5_GET(query_mkey_in, in,
399 						 mkey_index));
400 		break;
401 	case MLX5_CMD_OP_QUERY_CQ:
402 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
403 					MLX5_GET(query_cq_in, in, cqn));
404 		break;
405 	case MLX5_CMD_OP_MODIFY_CQ:
406 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
407 					MLX5_GET(modify_cq_in, in, cqn));
408 		break;
409 	case MLX5_CMD_OP_QUERY_SQ:
410 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
411 					MLX5_GET(query_sq_in, in, sqn));
412 		break;
413 	case MLX5_CMD_OP_MODIFY_SQ:
414 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
415 					MLX5_GET(modify_sq_in, in, sqn));
416 		break;
417 	case MLX5_CMD_OP_QUERY_RQ:
418 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
419 					MLX5_GET(query_rq_in, in, rqn));
420 		break;
421 	case MLX5_CMD_OP_MODIFY_RQ:
422 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
423 					MLX5_GET(modify_rq_in, in, rqn));
424 		break;
425 	case MLX5_CMD_OP_QUERY_RMP:
426 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
427 					MLX5_GET(query_rmp_in, in, rmpn));
428 		break;
429 	case MLX5_CMD_OP_MODIFY_RMP:
430 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RMP,
431 					MLX5_GET(modify_rmp_in, in, rmpn));
432 		break;
433 	case MLX5_CMD_OP_QUERY_RQT:
434 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
435 					MLX5_GET(query_rqt_in, in, rqtn));
436 		break;
437 	case MLX5_CMD_OP_MODIFY_RQT:
438 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
439 					MLX5_GET(modify_rqt_in, in, rqtn));
440 		break;
441 	case MLX5_CMD_OP_QUERY_TIR:
442 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
443 					MLX5_GET(query_tir_in, in, tirn));
444 		break;
445 	case MLX5_CMD_OP_MODIFY_TIR:
446 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
447 					MLX5_GET(modify_tir_in, in, tirn));
448 		break;
449 	case MLX5_CMD_OP_QUERY_TIS:
450 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
451 					MLX5_GET(query_tis_in, in, tisn));
452 		break;
453 	case MLX5_CMD_OP_MODIFY_TIS:
454 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
455 					MLX5_GET(modify_tis_in, in, tisn));
456 		break;
457 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
458 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
459 					MLX5_GET(query_flow_table_in, in,
460 						 table_id));
461 		break;
462 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
463 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_TABLE,
464 					MLX5_GET(modify_flow_table_in, in,
465 						 table_id));
466 		break;
467 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
468 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_FLOW_GROUP,
469 					MLX5_GET(query_flow_group_in, in,
470 						 group_id));
471 		break;
472 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
473 		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
474 					MLX5_GET(query_fte_in, in,
475 						 flow_index));
476 		break;
477 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
478 		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY,
479 					MLX5_GET(set_fte_in, in, flow_index));
480 		break;
481 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
482 		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_Q_COUNTER,
483 					MLX5_GET(query_q_counter_in, in,
484 						 counter_set_id));
485 		break;
486 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
487 		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_FLOW_COUNTER,
488 					MLX5_GET(query_flow_counter_in, in,
489 						 flow_counter_id));
490 		break;
491 	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
492 		obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT,
493 					MLX5_GET(general_obj_in_cmd_hdr, in,
494 						 obj_id));
495 		break;
496 	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
497 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
498 					MLX5_GET(query_scheduling_element_in,
499 						 in, scheduling_element_id));
500 		break;
501 	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
502 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT,
503 					MLX5_GET(modify_scheduling_element_in,
504 						 in, scheduling_element_id));
505 		break;
506 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
507 		obj_id = get_enc_obj_id(MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT,
508 					MLX5_GET(add_vxlan_udp_dport_in, in,
509 						 vxlan_udp_port));
510 		break;
511 	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
512 		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
513 					MLX5_GET(query_l2_table_entry_in, in,
514 						 table_index));
515 		break;
516 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
517 		obj_id = get_enc_obj_id(MLX5_CMD_OP_SET_L2_TABLE_ENTRY,
518 					MLX5_GET(set_l2_table_entry_in, in,
519 						 table_index));
520 		break;
521 	case MLX5_CMD_OP_QUERY_QP:
522 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
523 					MLX5_GET(query_qp_in, in, qpn));
524 		break;
525 	case MLX5_CMD_OP_RST2INIT_QP:
526 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
527 					MLX5_GET(rst2init_qp_in, in, qpn));
528 		break;
529 	case MLX5_CMD_OP_INIT2RTR_QP:
530 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
531 					MLX5_GET(init2rtr_qp_in, in, qpn));
532 		break;
533 	case MLX5_CMD_OP_RTR2RTS_QP:
534 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
535 					MLX5_GET(rtr2rts_qp_in, in, qpn));
536 		break;
537 	case MLX5_CMD_OP_RTS2RTS_QP:
538 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
539 					MLX5_GET(rts2rts_qp_in, in, qpn));
540 		break;
541 	case MLX5_CMD_OP_SQERR2RTS_QP:
542 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
543 					MLX5_GET(sqerr2rts_qp_in, in, qpn));
544 		break;
545 	case MLX5_CMD_OP_2ERR_QP:
546 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
547 					MLX5_GET(qp_2err_in, in, qpn));
548 		break;
549 	case MLX5_CMD_OP_2RST_QP:
550 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
551 					MLX5_GET(qp_2rst_in, in, qpn));
552 		break;
553 	case MLX5_CMD_OP_QUERY_DCT:
554 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
555 					MLX5_GET(query_dct_in, in, dctn));
556 		break;
557 	case MLX5_CMD_OP_QUERY_XRQ:
558 	case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
559 	case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
560 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
561 					MLX5_GET(query_xrq_in, in, xrqn));
562 		break;
563 	case MLX5_CMD_OP_QUERY_XRC_SRQ:
564 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
565 					MLX5_GET(query_xrc_srq_in, in,
566 						 xrc_srqn));
567 		break;
568 	case MLX5_CMD_OP_ARM_XRC_SRQ:
569 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRC_SRQ,
570 					MLX5_GET(arm_xrc_srq_in, in, xrc_srqn));
571 		break;
572 	case MLX5_CMD_OP_QUERY_SRQ:
573 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SRQ,
574 					MLX5_GET(query_srq_in, in, srqn));
575 		break;
576 	case MLX5_CMD_OP_ARM_RQ:
577 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
578 					MLX5_GET(arm_rq_in, in, srq_number));
579 		break;
580 	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
581 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
582 					MLX5_GET(drain_dct_in, in, dctn));
583 		break;
584 	case MLX5_CMD_OP_ARM_XRQ:
585 	case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
586 	case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
587 	case MLX5_CMD_OP_MODIFY_XRQ:
588 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
589 					MLX5_GET(arm_xrq_in, in, xrqn));
590 		break;
591 	case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
592 		obj_id = get_enc_obj_id
593 				(MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
594 				 MLX5_GET(query_packet_reformat_context_in,
595 					  in, packet_reformat_id));
596 		break;
597 	default:
598 		obj_id = 0;
599 	}
600 
601 	return obj_id;
602 }
603 
604 static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
605 				 struct ib_uobject *uobj, const void *in)
606 {
607 	struct mlx5_ib_dev *dev = mlx5_udata_to_mdev(&attrs->driver_udata);
608 	u64 obj_id = devx_get_obj_id(in);
609 
610 	if (!obj_id)
611 		return false;
612 
613 	switch (uobj_get_object_id(uobj)) {
614 	case UVERBS_OBJECT_CQ:
615 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
616 				      to_mcq(uobj->object)->mcq.cqn) ==
617 				      obj_id;
618 
619 	case UVERBS_OBJECT_SRQ:
620 	{
621 		struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
622 		u16 opcode;
623 
624 		switch (srq->common.res) {
625 		case MLX5_RES_XSRQ:
626 			opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
627 			break;
628 		case MLX5_RES_XRQ:
629 			opcode = MLX5_CMD_OP_CREATE_XRQ;
630 			break;
631 		default:
632 			if (!dev->mdev->issi)
633 				opcode = MLX5_CMD_OP_CREATE_SRQ;
634 			else
635 				opcode = MLX5_CMD_OP_CREATE_RMP;
636 		}
637 
638 		return get_enc_obj_id(opcode,
639 				      to_msrq(uobj->object)->msrq.srqn) ==
640 				      obj_id;
641 	}
642 
643 	case UVERBS_OBJECT_QP:
644 	{
645 		struct mlx5_ib_qp *qp = to_mqp(uobj->object);
646 		enum ib_qp_type	qp_type = qp->ibqp.qp_type;
647 
648 		if (qp_type == IB_QPT_RAW_PACKET ||
649 		    (qp->flags & MLX5_IB_QP_UNDERLAY)) {
650 			struct mlx5_ib_raw_packet_qp *raw_packet_qp =
651 							 &qp->raw_packet_qp;
652 			struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
653 			struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
654 
655 			return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
656 					       rq->base.mqp.qpn) == obj_id ||
657 				get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
658 					       sq->base.mqp.qpn) == obj_id ||
659 				get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
660 					       rq->tirn) == obj_id ||
661 				get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
662 					       sq->tisn) == obj_id);
663 		}
664 
665 		if (qp_type == MLX5_IB_QPT_DCT)
666 			return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
667 					      qp->dct.mdct.dctn) == obj_id;
668 
669 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
670 				      qp->ibqp.qp_num) == obj_id;
671 	}
672 
673 	case UVERBS_OBJECT_WQ:
674 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
675 				      to_mrwq(uobj->object)->core_qp.qpn) ==
676 				      obj_id;
677 
678 	case UVERBS_OBJECT_RWQ_IND_TBL:
679 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
680 				      to_mrwq_ind_table(uobj->object)->rqtn) ==
681 				      obj_id;
682 
683 	case MLX5_IB_OBJECT_DEVX_OBJ:
684 		return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
685 
686 	default:
687 		return false;
688 	}
689 }
690 
691 static void devx_set_umem_valid(const void *in)
692 {
693 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
694 
695 	switch (opcode) {
696 	case MLX5_CMD_OP_CREATE_MKEY:
697 		MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
698 		break;
699 	case MLX5_CMD_OP_CREATE_CQ:
700 	{
701 		void *cqc;
702 
703 		MLX5_SET(create_cq_in, in, cq_umem_valid, 1);
704 		cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
705 		MLX5_SET(cqc, cqc, dbr_umem_valid, 1);
706 		break;
707 	}
708 	case MLX5_CMD_OP_CREATE_QP:
709 	{
710 		void *qpc;
711 
712 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
713 		MLX5_SET(qpc, qpc, dbr_umem_valid, 1);
714 		MLX5_SET(create_qp_in, in, wq_umem_valid, 1);
715 		break;
716 	}
717 
718 	case MLX5_CMD_OP_CREATE_RQ:
719 	{
720 		void *rqc, *wq;
721 
722 		rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
723 		wq  = MLX5_ADDR_OF(rqc, rqc, wq);
724 		MLX5_SET(wq, wq, dbr_umem_valid, 1);
725 		MLX5_SET(wq, wq, wq_umem_valid, 1);
726 		break;
727 	}
728 
729 	case MLX5_CMD_OP_CREATE_SQ:
730 	{
731 		void *sqc, *wq;
732 
733 		sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
734 		wq = MLX5_ADDR_OF(sqc, sqc, wq);
735 		MLX5_SET(wq, wq, dbr_umem_valid, 1);
736 		MLX5_SET(wq, wq, wq_umem_valid, 1);
737 		break;
738 	}
739 
740 	case MLX5_CMD_OP_MODIFY_CQ:
741 		MLX5_SET(modify_cq_in, in, cq_umem_valid, 1);
742 		break;
743 
744 	case MLX5_CMD_OP_CREATE_RMP:
745 	{
746 		void *rmpc, *wq;
747 
748 		rmpc = MLX5_ADDR_OF(create_rmp_in, in, ctx);
749 		wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
750 		MLX5_SET(wq, wq, dbr_umem_valid, 1);
751 		MLX5_SET(wq, wq, wq_umem_valid, 1);
752 		break;
753 	}
754 
755 	case MLX5_CMD_OP_CREATE_XRQ:
756 	{
757 		void *xrqc, *wq;
758 
759 		xrqc = MLX5_ADDR_OF(create_xrq_in, in, xrq_context);
760 		wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
761 		MLX5_SET(wq, wq, dbr_umem_valid, 1);
762 		MLX5_SET(wq, wq, wq_umem_valid, 1);
763 		break;
764 	}
765 
766 	case MLX5_CMD_OP_CREATE_XRC_SRQ:
767 	{
768 		void *xrc_srqc;
769 
770 		MLX5_SET(create_xrc_srq_in, in, xrc_srq_umem_valid, 1);
771 		xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, in,
772 					xrc_srq_context_entry);
773 		MLX5_SET(xrc_srqc, xrc_srqc, dbr_umem_valid, 1);
774 		break;
775 	}
776 
777 	default:
778 		return;
779 	}
780 }
781 
782 static bool devx_is_obj_create_cmd(const void *in, u16 *opcode)
783 {
784 	*opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
785 
786 	switch (*opcode) {
787 	case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
788 	case MLX5_CMD_OP_CREATE_MKEY:
789 	case MLX5_CMD_OP_CREATE_CQ:
790 	case MLX5_CMD_OP_ALLOC_PD:
791 	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
792 	case MLX5_CMD_OP_CREATE_RMP:
793 	case MLX5_CMD_OP_CREATE_SQ:
794 	case MLX5_CMD_OP_CREATE_RQ:
795 	case MLX5_CMD_OP_CREATE_RQT:
796 	case MLX5_CMD_OP_CREATE_TIR:
797 	case MLX5_CMD_OP_CREATE_TIS:
798 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
799 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
800 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
801 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
802 	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
803 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
804 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
805 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
806 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
807 	case MLX5_CMD_OP_CREATE_QP:
808 	case MLX5_CMD_OP_CREATE_SRQ:
809 	case MLX5_CMD_OP_CREATE_XRC_SRQ:
810 	case MLX5_CMD_OP_CREATE_DCT:
811 	case MLX5_CMD_OP_CREATE_XRQ:
812 	case MLX5_CMD_OP_ATTACH_TO_MCG:
813 	case MLX5_CMD_OP_ALLOC_XRCD:
814 		return true;
815 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
816 	{
817 		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
818 		if (op_mod == 0)
819 			return true;
820 		return false;
821 	}
822 	case MLX5_CMD_OP_CREATE_PSV:
823 	{
824 		u8 num_psv = MLX5_GET(create_psv_in, in, num_psv);
825 
826 		if (num_psv == 1)
827 			return true;
828 		return false;
829 	}
830 	default:
831 		return false;
832 	}
833 }
834 
835 static bool devx_is_obj_modify_cmd(const void *in)
836 {
837 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
838 
839 	switch (opcode) {
840 	case MLX5_CMD_OP_MODIFY_GENERAL_OBJ:
841 	case MLX5_CMD_OP_MODIFY_CQ:
842 	case MLX5_CMD_OP_MODIFY_RMP:
843 	case MLX5_CMD_OP_MODIFY_SQ:
844 	case MLX5_CMD_OP_MODIFY_RQ:
845 	case MLX5_CMD_OP_MODIFY_RQT:
846 	case MLX5_CMD_OP_MODIFY_TIR:
847 	case MLX5_CMD_OP_MODIFY_TIS:
848 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
849 	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
850 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
851 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
852 	case MLX5_CMD_OP_RST2INIT_QP:
853 	case MLX5_CMD_OP_INIT2RTR_QP:
854 	case MLX5_CMD_OP_RTR2RTS_QP:
855 	case MLX5_CMD_OP_RTS2RTS_QP:
856 	case MLX5_CMD_OP_SQERR2RTS_QP:
857 	case MLX5_CMD_OP_2ERR_QP:
858 	case MLX5_CMD_OP_2RST_QP:
859 	case MLX5_CMD_OP_ARM_XRC_SRQ:
860 	case MLX5_CMD_OP_ARM_RQ:
861 	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
862 	case MLX5_CMD_OP_ARM_XRQ:
863 	case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
864 	case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
865 	case MLX5_CMD_OP_MODIFY_XRQ:
866 		return true;
867 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
868 	{
869 		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
870 
871 		if (op_mod == 1)
872 			return true;
873 		return false;
874 	}
875 	default:
876 		return false;
877 	}
878 }
879 
880 static bool devx_is_obj_query_cmd(const void *in)
881 {
882 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
883 
884 	switch (opcode) {
885 	case MLX5_CMD_OP_QUERY_GENERAL_OBJ:
886 	case MLX5_CMD_OP_QUERY_MKEY:
887 	case MLX5_CMD_OP_QUERY_CQ:
888 	case MLX5_CMD_OP_QUERY_RMP:
889 	case MLX5_CMD_OP_QUERY_SQ:
890 	case MLX5_CMD_OP_QUERY_RQ:
891 	case MLX5_CMD_OP_QUERY_RQT:
892 	case MLX5_CMD_OP_QUERY_TIR:
893 	case MLX5_CMD_OP_QUERY_TIS:
894 	case MLX5_CMD_OP_QUERY_Q_COUNTER:
895 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
896 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
897 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
898 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
899 	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
900 	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
901 	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
902 	case MLX5_CMD_OP_QUERY_QP:
903 	case MLX5_CMD_OP_QUERY_SRQ:
904 	case MLX5_CMD_OP_QUERY_XRC_SRQ:
905 	case MLX5_CMD_OP_QUERY_DCT:
906 	case MLX5_CMD_OP_QUERY_XRQ:
907 	case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
908 	case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
909 	case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
910 		return true;
911 	default:
912 		return false;
913 	}
914 }
915 
916 static bool devx_is_whitelist_cmd(void *in)
917 {
918 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
919 
920 	switch (opcode) {
921 	case MLX5_CMD_OP_QUERY_HCA_CAP:
922 	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
923 	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
924 		return true;
925 	default:
926 		return false;
927 	}
928 }
929 
930 static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
931 {
932 	if (devx_is_whitelist_cmd(cmd_in)) {
933 		if (c->devx_uid)
934 			return c->devx_uid;
935 
936 		return -EOPNOTSUPP;
937 	}
938 
939 	if (!c->devx_uid)
940 		return -EINVAL;
941 
942 	return c->devx_uid;
943 }
944 
945 static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev)
946 {
947 	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
948 
949 	/* Pass all cmds for vhca_tunnel as general, tracking is done in FW */
950 	if ((MLX5_CAP_GEN_64(dev->mdev, vhca_tunnel_commands) &&
951 	     MLX5_GET(general_obj_in_cmd_hdr, in, vhca_tunnel_id)) ||
952 	    (opcode >= MLX5_CMD_OP_GENERAL_START &&
953 	     opcode < MLX5_CMD_OP_GENERAL_END))
954 		return true;
955 
956 	switch (opcode) {
957 	case MLX5_CMD_OP_QUERY_HCA_CAP:
958 	case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
959 	case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT:
960 	case MLX5_CMD_OP_QUERY_VPORT_STATE:
961 	case MLX5_CMD_OP_QUERY_ADAPTER:
962 	case MLX5_CMD_OP_QUERY_ISSI:
963 	case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
964 	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
965 	case MLX5_CMD_OP_QUERY_VNIC_ENV:
966 	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
967 	case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
968 	case MLX5_CMD_OP_NOP:
969 	case MLX5_CMD_OP_QUERY_CONG_STATUS:
970 	case MLX5_CMD_OP_QUERY_CONG_PARAMS:
971 	case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
972 	case MLX5_CMD_OP_QUERY_LAG:
973 		return true;
974 	default:
975 		return false;
976 	}
977 }
978 
979 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
980 	struct uverbs_attr_bundle *attrs)
981 {
982 	struct mlx5_ib_ucontext *c;
983 	struct mlx5_ib_dev *dev;
984 	int user_vector;
985 	int dev_eqn;
986 	unsigned int irqn;
987 	int err;
988 
989 	if (uverbs_copy_from(&user_vector, attrs,
990 			     MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
991 		return -EFAULT;
992 
993 	c = devx_ufile2uctx(attrs);
994 	if (IS_ERR(c))
995 		return PTR_ERR(c);
996 	dev = to_mdev(c->ibucontext.device);
997 
998 	err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
999 	if (err < 0)
1000 		return err;
1001 
1002 	if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
1003 			   &dev_eqn, sizeof(dev_eqn)))
1004 		return -EFAULT;
1005 
1006 	return 0;
1007 }
1008 
1009 /*
1010  *Security note:
1011  * The hardware protection mechanism works like this: Each device object that
1012  * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
1013  * the device specification manual) upon its creation. Then upon doorbell,
1014  * hardware fetches the object context for which the doorbell was rang, and
1015  * validates that the UAR through which the DB was rang matches the UAR ID
1016  * of the object.
1017  * If no match the doorbell is silently ignored by the hardware. Of course,
1018  * the user cannot ring a doorbell on a UAR that was not mapped to it.
1019  * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
1020  * mailboxes (except tagging them with UID), we expose to the user its UAR
1021  * ID, so it can embed it in these objects in the expected specification
1022  * format. So the only thing the user can do is hurt itself by creating a
1023  * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
1024  * may ring a doorbell on its objects.
1025  * The consequence of that will be that another user can schedule a QP/SQ
1026  * of the buggy user for execution (just insert it to the hardware schedule
1027  * queue or arm its CQ for event generation), no further harm is expected.
1028  */
1029 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
1030 	struct uverbs_attr_bundle *attrs)
1031 {
1032 	struct mlx5_ib_ucontext *c;
1033 	struct mlx5_ib_dev *dev;
1034 	u32 user_idx;
1035 	s32 dev_idx;
1036 
1037 	c = devx_ufile2uctx(attrs);
1038 	if (IS_ERR(c))
1039 		return PTR_ERR(c);
1040 	dev = to_mdev(c->ibucontext.device);
1041 
1042 	if (uverbs_copy_from(&user_idx, attrs,
1043 			     MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
1044 		return -EFAULT;
1045 
1046 	dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
1047 	if (dev_idx < 0)
1048 		return dev_idx;
1049 
1050 	if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
1051 			   &dev_idx, sizeof(dev_idx)))
1052 		return -EFAULT;
1053 
1054 	return 0;
1055 }
1056 
1057 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
1058 	struct uverbs_attr_bundle *attrs)
1059 {
1060 	struct mlx5_ib_ucontext *c;
1061 	struct mlx5_ib_dev *dev;
1062 	void *cmd_in = uverbs_attr_get_alloced_ptr(
1063 		attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
1064 	int cmd_out_len = uverbs_attr_get_len(attrs,
1065 					MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
1066 	void *cmd_out;
1067 	int err;
1068 	int uid;
1069 
1070 	c = devx_ufile2uctx(attrs);
1071 	if (IS_ERR(c))
1072 		return PTR_ERR(c);
1073 	dev = to_mdev(c->ibucontext.device);
1074 
1075 	uid = devx_get_uid(c, cmd_in);
1076 	if (uid < 0)
1077 		return uid;
1078 
1079 	/* Only white list of some general HCA commands are allowed for this method. */
1080 	if (!devx_is_general_cmd(cmd_in, dev))
1081 		return -EINVAL;
1082 
1083 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1084 	if (IS_ERR(cmd_out))
1085 		return PTR_ERR(cmd_out);
1086 
1087 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1088 	err = mlx5_cmd_exec(dev->mdev, cmd_in,
1089 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
1090 			    cmd_out, cmd_out_len);
1091 	if (err)
1092 		return err;
1093 
1094 	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
1095 			      cmd_out_len);
1096 }
1097 
1098 static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
1099 				       u32 *dinlen,
1100 				       u32 *obj_id)
1101 {
1102 	u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
1103 	u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
1104 
1105 	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1106 	*dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
1107 
1108 	MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
1109 	MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
1110 
1111 	switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
1112 	case MLX5_CMD_OP_CREATE_GENERAL_OBJ:
1113 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJ);
1114 		MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
1115 		break;
1116 
1117 	case MLX5_CMD_OP_CREATE_UMEM:
1118 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1119 			 MLX5_CMD_OP_DESTROY_UMEM);
1120 		break;
1121 	case MLX5_CMD_OP_CREATE_MKEY:
1122 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
1123 		break;
1124 	case MLX5_CMD_OP_CREATE_CQ:
1125 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
1126 		break;
1127 	case MLX5_CMD_OP_ALLOC_PD:
1128 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
1129 		break;
1130 	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
1131 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1132 			 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
1133 		break;
1134 	case MLX5_CMD_OP_CREATE_RMP:
1135 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
1136 		break;
1137 	case MLX5_CMD_OP_CREATE_SQ:
1138 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
1139 		break;
1140 	case MLX5_CMD_OP_CREATE_RQ:
1141 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
1142 		break;
1143 	case MLX5_CMD_OP_CREATE_RQT:
1144 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
1145 		break;
1146 	case MLX5_CMD_OP_CREATE_TIR:
1147 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
1148 		break;
1149 	case MLX5_CMD_OP_CREATE_TIS:
1150 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
1151 		break;
1152 	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
1153 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1154 			 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
1155 		break;
1156 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
1157 		*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
1158 		*obj_id = MLX5_GET(create_flow_table_out, out, table_id);
1159 		MLX5_SET(destroy_flow_table_in, din, other_vport,
1160 			 MLX5_GET(create_flow_table_in,  in, other_vport));
1161 		MLX5_SET(destroy_flow_table_in, din, vport_number,
1162 			 MLX5_GET(create_flow_table_in,  in, vport_number));
1163 		MLX5_SET(destroy_flow_table_in, din, table_type,
1164 			 MLX5_GET(create_flow_table_in,  in, table_type));
1165 		MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
1166 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1167 			 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
1168 		break;
1169 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
1170 		*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
1171 		*obj_id = MLX5_GET(create_flow_group_out, out, group_id);
1172 		MLX5_SET(destroy_flow_group_in, din, other_vport,
1173 			 MLX5_GET(create_flow_group_in, in, other_vport));
1174 		MLX5_SET(destroy_flow_group_in, din, vport_number,
1175 			 MLX5_GET(create_flow_group_in, in, vport_number));
1176 		MLX5_SET(destroy_flow_group_in, din, table_type,
1177 			 MLX5_GET(create_flow_group_in, in, table_type));
1178 		MLX5_SET(destroy_flow_group_in, din, table_id,
1179 			 MLX5_GET(create_flow_group_in, in, table_id));
1180 		MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
1181 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1182 			 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
1183 		break;
1184 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
1185 		*dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
1186 		*obj_id = MLX5_GET(set_fte_in, in, flow_index);
1187 		MLX5_SET(delete_fte_in, din, other_vport,
1188 			 MLX5_GET(set_fte_in,  in, other_vport));
1189 		MLX5_SET(delete_fte_in, din, vport_number,
1190 			 MLX5_GET(set_fte_in, in, vport_number));
1191 		MLX5_SET(delete_fte_in, din, table_type,
1192 			 MLX5_GET(set_fte_in, in, table_type));
1193 		MLX5_SET(delete_fte_in, din, table_id,
1194 			 MLX5_GET(set_fte_in, in, table_id));
1195 		MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
1196 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1197 			 MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
1198 		break;
1199 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
1200 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1201 			 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
1202 		break;
1203 	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
1204 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1205 			 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
1206 		break;
1207 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
1208 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1209 			 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
1210 		break;
1211 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
1212 		*dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
1213 		*obj_id = MLX5_GET(create_scheduling_element_out, out,
1214 				   scheduling_element_id);
1215 		MLX5_SET(destroy_scheduling_element_in, din,
1216 			 scheduling_hierarchy,
1217 			 MLX5_GET(create_scheduling_element_in, in,
1218 				  scheduling_hierarchy));
1219 		MLX5_SET(destroy_scheduling_element_in, din,
1220 			 scheduling_element_id, *obj_id);
1221 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1222 			 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
1223 		break;
1224 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
1225 		*dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
1226 		*obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
1227 		MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
1228 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1229 			 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
1230 		break;
1231 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
1232 		*dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
1233 		*obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
1234 		MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
1235 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1236 			 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
1237 		break;
1238 	case MLX5_CMD_OP_CREATE_QP:
1239 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
1240 		break;
1241 	case MLX5_CMD_OP_CREATE_SRQ:
1242 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
1243 		break;
1244 	case MLX5_CMD_OP_CREATE_XRC_SRQ:
1245 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1246 			 MLX5_CMD_OP_DESTROY_XRC_SRQ);
1247 		break;
1248 	case MLX5_CMD_OP_CREATE_DCT:
1249 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
1250 		break;
1251 	case MLX5_CMD_OP_CREATE_XRQ:
1252 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
1253 		break;
1254 	case MLX5_CMD_OP_ATTACH_TO_MCG:
1255 		*dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
1256 		MLX5_SET(detach_from_mcg_in, din, qpn,
1257 			 MLX5_GET(attach_to_mcg_in, in, qpn));
1258 		memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
1259 		       MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
1260 		       MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
1261 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
1262 		break;
1263 	case MLX5_CMD_OP_ALLOC_XRCD:
1264 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
1265 		break;
1266 	case MLX5_CMD_OP_CREATE_PSV:
1267 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
1268 			 MLX5_CMD_OP_DESTROY_PSV);
1269 		MLX5_SET(destroy_psv_in, din, psvn,
1270 			 MLX5_GET(create_psv_out, out, psv0_index));
1271 		break;
1272 	default:
1273 		/* The entry must match to one of the devx_is_obj_create_cmd */
1274 		WARN_ON(true);
1275 		break;
1276 	}
1277 }
1278 
1279 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
1280 				   struct devx_obj *obj,
1281 				   void *in, int in_len)
1282 {
1283 	int min_len = MLX5_BYTE_OFF(create_mkey_in, memory_key_mkey_entry) +
1284 			MLX5_FLD_SZ_BYTES(create_mkey_in,
1285 			memory_key_mkey_entry);
1286 	void *mkc;
1287 	u8 access_mode;
1288 
1289 	if (in_len < min_len)
1290 		return -EINVAL;
1291 
1292 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1293 
1294 	access_mode = MLX5_GET(mkc, mkc, access_mode);
1295 	access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
1296 
1297 	if (access_mode == MLX5_ACCESS_MODE_KLM ||
1298 		access_mode == MLX5_ACCESS_MODE_KSM) {
1299 		return 0;
1300 	}
1301 
1302 	MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
1303 	return 0;
1304 }
1305 
1306 static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
1307 				      struct devx_event_subscription *sub)
1308 {
1309 	struct devx_event *event;
1310 	struct devx_obj_event *xa_val_level2;
1311 
1312 	if (sub->is_cleaned)
1313 		return;
1314 
1315 	sub->is_cleaned = 1;
1316 	list_del_rcu(&sub->xa_list);
1317 
1318 	if (list_empty(&sub->obj_list))
1319 		return;
1320 
1321 	list_del_rcu(&sub->obj_list);
1322 	/* check whether key level 1 for this obj_sub_list is empty */
1323 	event = xa_load(&dev->devx_event_table.event_xa,
1324 			sub->xa_key_level1);
1325 	WARN_ON(!event);
1326 
1327 	xa_val_level2 = xa_load(&event->object_ids, sub->xa_key_level2);
1328 	if (list_empty(&xa_val_level2->obj_sub_list)) {
1329 		xa_erase(&event->object_ids,
1330 			 sub->xa_key_level2);
1331 		kfree_rcu(xa_val_level2, rcu);
1332 	}
1333 }
1334 
1335 static int devx_obj_cleanup(struct ib_uobject *uobject,
1336 			    enum rdma_remove_reason why,
1337 			    struct uverbs_attr_bundle *attrs)
1338 {
1339 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1340 	struct mlx5_devx_event_table *devx_event_table;
1341 	struct devx_obj *obj = uobject->object;
1342 	struct devx_event_subscription *sub_entry, *tmp;
1343 	struct mlx5_ib_dev *dev;
1344 	int ret;
1345 
1346 	dev = mlx5_udata_to_mdev(&attrs->driver_udata);
1347 	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1348 		ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
1349 	else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1350 		ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1351 	else
1352 		ret = mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox,
1353 				    obj->dinlen, out, sizeof(out));
1354 	if (ib_is_destroy_retryable(ret, why, uobject))
1355 		return ret;
1356 
1357 	devx_event_table = &dev->devx_event_table;
1358 
1359 	mutex_lock(&devx_event_table->event_xa_lock);
1360 	list_for_each_entry_safe(sub_entry, tmp, &obj->event_sub, obj_list)
1361 		devx_cleanup_subscription(dev, sub_entry);
1362 	mutex_unlock(&devx_event_table->event_xa_lock);
1363 
1364 	kfree(obj);
1365 	return ret;
1366 }
1367 
1368 static void devx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
1369 {
1370 	struct devx_obj *obj = container_of(mcq, struct devx_obj, core_cq);
1371 	struct mlx5_devx_event_table *table;
1372 	struct devx_event *event;
1373 	struct devx_obj_event *obj_event;
1374 	u32 obj_id = mcq->cqn;
1375 
1376 	table = &obj->ib_dev->devx_event_table;
1377 	rcu_read_lock();
1378 	event = xa_load(&table->event_xa, MLX5_EVENT_TYPE_COMP);
1379 	if (!event)
1380 		goto out;
1381 
1382 	obj_event = xa_load(&event->object_ids, obj_id);
1383 	if (!obj_event)
1384 		goto out;
1385 
1386 	dispatch_event_fd(&obj_event->obj_sub_list, eqe);
1387 out:
1388 	rcu_read_unlock();
1389 }
1390 
1391 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
1392 	struct uverbs_attr_bundle *attrs)
1393 {
1394 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1395 	int cmd_out_len =  uverbs_attr_get_len(attrs,
1396 					MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
1397 	int cmd_in_len = uverbs_attr_get_len(attrs,
1398 					MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
1399 	void *cmd_out;
1400 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
1401 		attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
1402 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1403 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1404 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1405 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
1406 	struct devx_obj *obj;
1407 	u16 obj_type = 0;
1408 	int err;
1409 	int uid;
1410 	u32 obj_id;
1411 	u16 opcode;
1412 
1413 	if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1414 		return -EINVAL;
1415 
1416 	uid = devx_get_uid(c, cmd_in);
1417 	if (uid < 0)
1418 		return uid;
1419 
1420 	if (!devx_is_obj_create_cmd(cmd_in, &opcode))
1421 		return -EINVAL;
1422 
1423 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1424 	if (IS_ERR(cmd_out))
1425 		return PTR_ERR(cmd_out);
1426 
1427 	obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
1428 	if (!obj)
1429 		return -ENOMEM;
1430 
1431 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1432 	if (opcode == MLX5_CMD_OP_CREATE_MKEY) {
1433 		err = devx_handle_mkey_create(dev, obj, cmd_in, cmd_in_len);
1434 		if (err)
1435 			goto obj_free;
1436 	} else {
1437 		devx_set_umem_valid(cmd_in);
1438 	}
1439 
1440 	if (opcode == MLX5_CMD_OP_CREATE_DCT) {
1441 		obj->flags |= DEVX_OBJ_FLAGS_DCT;
1442 		err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
1443 					   cmd_in, cmd_in_len,
1444 					   cmd_out, cmd_out_len);
1445 	} else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
1446 		obj->flags |= DEVX_OBJ_FLAGS_CQ;
1447 		obj->core_cq.comp = devx_cq_comp;
1448 		err = mlx5_core_create_cq(dev->mdev, &obj->core_cq,
1449 					  cmd_in, cmd_in_len, cmd_out,
1450 					  cmd_out_len);
1451 	} else {
1452 		err = mlx5_cmd_exec(dev->mdev, cmd_in,
1453 				    cmd_in_len,
1454 				    cmd_out, cmd_out_len);
1455 	}
1456 
1457 	if (err)
1458 		goto obj_free;
1459 
1460 	if (opcode == MLX5_CMD_OP_ALLOC_FLOW_COUNTER) {
1461 		u8 bulk = MLX5_GET(alloc_flow_counter_in,
1462 				   cmd_in,
1463 				   flow_counter_bulk);
1464 		obj->flow_counter_bulk_size = 128UL * bulk;
1465 	}
1466 
1467 	uobj->object = obj;
1468 	INIT_LIST_HEAD(&obj->event_sub);
1469 	obj->ib_dev = dev;
1470 	devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen,
1471 				   &obj_id);
1472 	WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
1473 
1474 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
1475 	if (err)
1476 		goto obj_destroy;
1477 
1478 	if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJ)
1479 		obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
1480 	obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
1481 
1482 	return 0;
1483 
1484 obj_destroy:
1485 	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
1486 		mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
1487 	else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
1488 		mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
1489 	else
1490 		mlx5_cmd_exec(obj->ib_dev->mdev, obj->dinbox, obj->dinlen, out,
1491 			      sizeof(out));
1492 obj_free:
1493 	kfree(obj);
1494 	return err;
1495 }
1496 
1497 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
1498 	struct uverbs_attr_bundle *attrs)
1499 {
1500 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
1501 	int cmd_out_len = uverbs_attr_get_len(attrs,
1502 					MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
1503 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1504 							  MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
1505 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1506 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1507 	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1508 	void *cmd_out;
1509 	int err;
1510 	int uid;
1511 
1512 	if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1513 		return -EINVAL;
1514 
1515 	uid = devx_get_uid(c, cmd_in);
1516 	if (uid < 0)
1517 		return uid;
1518 
1519 	if (!devx_is_obj_modify_cmd(cmd_in))
1520 		return -EINVAL;
1521 
1522 	if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1523 		return -EINVAL;
1524 
1525 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1526 	if (IS_ERR(cmd_out))
1527 		return PTR_ERR(cmd_out);
1528 
1529 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1530 	devx_set_umem_valid(cmd_in);
1531 
1532 	err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1533 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
1534 			    cmd_out, cmd_out_len);
1535 	if (err)
1536 		return err;
1537 
1538 	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
1539 			      cmd_out, cmd_out_len);
1540 }
1541 
1542 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
1543 	struct uverbs_attr_bundle *attrs)
1544 {
1545 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
1546 	int cmd_out_len = uverbs_attr_get_len(attrs,
1547 					      MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
1548 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
1549 							  MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
1550 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1551 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1552 	void *cmd_out;
1553 	int err;
1554 	int uid;
1555 	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1556 
1557 	if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1558 		return -EINVAL;
1559 
1560 	uid = devx_get_uid(c, cmd_in);
1561 	if (uid < 0)
1562 		return uid;
1563 
1564 	if (!devx_is_obj_query_cmd(cmd_in))
1565 		return -EINVAL;
1566 
1567 	if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1568 		return -EINVAL;
1569 
1570 	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
1571 	if (IS_ERR(cmd_out))
1572 		return PTR_ERR(cmd_out);
1573 
1574 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1575 	err = mlx5_cmd_exec(mdev->mdev, cmd_in,
1576 			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
1577 			    cmd_out, cmd_out_len);
1578 	if (err)
1579 		return err;
1580 
1581 	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
1582 			      cmd_out, cmd_out_len);
1583 }
1584 
1585 struct devx_async_event_queue {
1586 	spinlock_t		lock;
1587 	wait_queue_head_t	poll_wait;
1588 	struct list_head	event_list;
1589 	atomic_t		bytes_in_use;
1590 	u8			is_destroyed:1;
1591 };
1592 
1593 struct devx_async_cmd_event_file {
1594 	struct ib_uobject		uobj;
1595 	struct devx_async_event_queue	ev_queue;
1596 	struct mlx5_async_ctx		async_ctx;
1597 };
1598 
1599 static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
1600 {
1601 	spin_lock_init(&ev_queue->lock);
1602 	INIT_LIST_HEAD(&ev_queue->event_list);
1603 	init_waitqueue_head(&ev_queue->poll_wait);
1604 	atomic_set(&ev_queue->bytes_in_use, 0);
1605 	ev_queue->is_destroyed = 0;
1606 }
1607 
1608 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
1609 	struct uverbs_attr_bundle *attrs)
1610 {
1611 	struct devx_async_cmd_event_file *ev_file;
1612 
1613 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
1614 		attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
1615 	struct mlx5_ib_dev *mdev = mlx5_udata_to_mdev(&attrs->driver_udata);
1616 
1617 	ev_file = container_of(uobj, struct devx_async_cmd_event_file,
1618 			       uobj);
1619 	devx_init_event_queue(&ev_file->ev_queue);
1620 	mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
1621 	return 0;
1622 }
1623 
1624 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC)(
1625 	struct uverbs_attr_bundle *attrs)
1626 {
1627 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
1628 		attrs, MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE);
1629 	struct devx_async_event_file *ev_file;
1630 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1631 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1632 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1633 	u32 flags;
1634 	int err;
1635 
1636 	err = uverbs_get_flags32(&flags, attrs,
1637 		MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
1638 		MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA);
1639 
1640 	if (err)
1641 		return err;
1642 
1643 	ev_file = container_of(uobj, struct devx_async_event_file,
1644 			       uobj);
1645 	spin_lock_init(&ev_file->lock);
1646 	INIT_LIST_HEAD(&ev_file->event_list);
1647 	init_waitqueue_head(&ev_file->poll_wait);
1648 	if (flags & MLX5_IB_UAPI_DEVX_CR_EV_CH_FLAGS_OMIT_DATA)
1649 		ev_file->omit_data = 1;
1650 	INIT_LIST_HEAD(&ev_file->subscribed_events_list);
1651 	ev_file->dev = dev;
1652 	get_device(&dev->ib_dev.dev);
1653 	return 0;
1654 }
1655 
1656 static void devx_query_callback(int status, struct mlx5_async_work *context)
1657 {
1658 	struct devx_async_data *async_data =
1659 		container_of(context, struct devx_async_data, cb_work);
1660 	struct devx_async_cmd_event_file *ev_file = async_data->ev_file;
1661 	struct devx_async_event_queue *ev_queue = &ev_file->ev_queue;
1662 	unsigned long flags;
1663 
1664 	/*
1665 	 * Note that if the struct devx_async_cmd_event_file uobj begins to be
1666 	 * destroyed it will block at mlx5_cmd_cleanup_async_ctx() until this
1667 	 * routine returns, ensuring that it always remains valid here.
1668 	 */
1669 	spin_lock_irqsave(&ev_queue->lock, flags);
1670 	list_add_tail(&async_data->list, &ev_queue->event_list);
1671 	spin_unlock_irqrestore(&ev_queue->lock, flags);
1672 
1673 	wake_up_interruptible(&ev_queue->poll_wait);
1674 }
1675 
1676 #define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
1677 
1678 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
1679 	struct uverbs_attr_bundle *attrs)
1680 {
1681 	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
1682 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
1683 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
1684 				attrs,
1685 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
1686 	u16 cmd_out_len;
1687 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1688 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1689 	struct ib_uobject *fd_uobj;
1690 	int err;
1691 	int uid;
1692 	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
1693 	struct devx_async_cmd_event_file *ev_file;
1694 	struct devx_async_data *async_data;
1695 
1696 	if (MLX5_GET(general_obj_in_cmd_hdr, cmd_in, vhca_tunnel_id))
1697 		return -EINVAL;
1698 
1699 	uid = devx_get_uid(c, cmd_in);
1700 	if (uid < 0)
1701 		return uid;
1702 
1703 	if (!devx_is_obj_query_cmd(cmd_in))
1704 		return -EINVAL;
1705 
1706 	err = uverbs_get_const(&cmd_out_len, attrs,
1707 			       MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
1708 	if (err)
1709 		return err;
1710 
1711 	if (!devx_is_valid_obj_id(attrs, uobj, cmd_in))
1712 		return -EINVAL;
1713 
1714 	fd_uobj = uverbs_attr_get_uobject(attrs,
1715 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
1716 	if (IS_ERR(fd_uobj))
1717 		return PTR_ERR(fd_uobj);
1718 
1719 	ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
1720 			       uobj);
1721 
1722 	if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
1723 			MAX_ASYNC_BYTES_IN_USE) {
1724 		atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1725 		return -EAGAIN;
1726 	}
1727 
1728 	async_data = kvzalloc(struct_size(async_data, hdr.out_data,
1729 					  cmd_out_len), GFP_KERNEL);
1730 	if (!async_data) {
1731 		err = -ENOMEM;
1732 		goto sub_bytes;
1733 	}
1734 
1735 	err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
1736 			       MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
1737 	if (err)
1738 		goto free_async;
1739 
1740 	async_data->cmd_out_len = cmd_out_len;
1741 	async_data->mdev = mdev;
1742 	async_data->ev_file = ev_file;
1743 
1744 	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
1745 	err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
1746 		    uverbs_attr_get_len(attrs,
1747 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
1748 		    async_data->hdr.out_data,
1749 		    async_data->cmd_out_len,
1750 		    devx_query_callback, &async_data->cb_work);
1751 
1752 	if (err)
1753 		goto free_async;
1754 
1755 	return 0;
1756 
1757 free_async:
1758 	kvfree(async_data);
1759 sub_bytes:
1760 	atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
1761 	return err;
1762 }
1763 
1764 static void
1765 subscribe_event_xa_dealloc(struct mlx5_devx_event_table *devx_event_table,
1766 			   u32 key_level1,
1767 			   bool is_level2,
1768 			   u32 key_level2)
1769 {
1770 	struct devx_event *event;
1771 	struct devx_obj_event *xa_val_level2;
1772 
1773 	/* Level 1 is valid for future use, no need to free */
1774 	if (!is_level2)
1775 		return;
1776 
1777 	event = xa_load(&devx_event_table->event_xa, key_level1);
1778 	WARN_ON(!event);
1779 
1780 	xa_val_level2 = xa_load(&event->object_ids,
1781 				key_level2);
1782 	if (list_empty(&xa_val_level2->obj_sub_list)) {
1783 		xa_erase(&event->object_ids,
1784 			 key_level2);
1785 		kfree_rcu(xa_val_level2, rcu);
1786 	}
1787 }
1788 
1789 static int
1790 subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
1791 			 u32 key_level1,
1792 			 bool is_level2,
1793 			 u32 key_level2)
1794 {
1795 	struct devx_obj_event *obj_event;
1796 	struct devx_event *event;
1797 	int err;
1798 
1799 	event = xa_load(&devx_event_table->event_xa, key_level1);
1800 	if (!event) {
1801 		event = kzalloc(sizeof(*event), GFP_KERNEL);
1802 		if (!event)
1803 			return -ENOMEM;
1804 
1805 		INIT_LIST_HEAD(&event->unaffiliated_list);
1806 		xa_init_flags(&event->object_ids, 0);
1807 
1808 		err = xa_insert(&devx_event_table->event_xa,
1809 				key_level1,
1810 				event,
1811 				GFP_KERNEL);
1812 		if (err) {
1813 			kfree(event);
1814 			return err;
1815 		}
1816 	}
1817 
1818 	if (!is_level2)
1819 		return 0;
1820 
1821 	obj_event = xa_load(&event->object_ids, key_level2);
1822 	if (!obj_event) {
1823 		obj_event = kzalloc(sizeof(*obj_event), GFP_KERNEL);
1824 		if (!obj_event)
1825 			/* Level1 is valid for future use, no need to free */
1826 			return -ENOMEM;
1827 
1828 		err = xa_insert(&event->object_ids,
1829 				key_level2,
1830 				obj_event,
1831 				GFP_KERNEL);
1832 		if (err)
1833 			return err;
1834 		INIT_LIST_HEAD(&obj_event->obj_sub_list);
1835 	}
1836 
1837 	return 0;
1838 }
1839 
1840 static bool is_valid_events_legacy(int num_events, u16 *event_type_num_list,
1841 				   struct devx_obj *obj)
1842 {
1843 	int i;
1844 
1845 	for (i = 0; i < num_events; i++) {
1846 		if (obj) {
1847 			if (!is_legacy_obj_event_num(event_type_num_list[i]))
1848 				return false;
1849 		} else if (!is_legacy_unaffiliated_event_num(
1850 				event_type_num_list[i])) {
1851 			return false;
1852 		}
1853 	}
1854 
1855 	return true;
1856 }
1857 
1858 #define MAX_SUPP_EVENT_NUM 255
1859 static bool is_valid_events(struct mlx5_core_dev *dev,
1860 			    int num_events, u16 *event_type_num_list,
1861 			    struct devx_obj *obj)
1862 {
1863 	__be64 *aff_events;
1864 	__be64 *unaff_events;
1865 	int mask_entry;
1866 	int mask_bit;
1867 	int i;
1868 
1869 	if (MLX5_CAP_GEN(dev, event_cap)) {
1870 		aff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
1871 						user_affiliated_events);
1872 		unaff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
1873 						  user_unaffiliated_events);
1874 	} else {
1875 		return is_valid_events_legacy(num_events, event_type_num_list,
1876 					      obj);
1877 	}
1878 
1879 	for (i = 0; i < num_events; i++) {
1880 		if (event_type_num_list[i] > MAX_SUPP_EVENT_NUM)
1881 			return false;
1882 
1883 		mask_entry = event_type_num_list[i] / 64;
1884 		mask_bit = event_type_num_list[i] % 64;
1885 
1886 		if (obj) {
1887 			/* CQ completion */
1888 			if (event_type_num_list[i] == 0)
1889 				continue;
1890 
1891 			if (!(be64_to_cpu(aff_events[mask_entry]) &
1892 					(1ull << mask_bit)))
1893 				return false;
1894 
1895 			continue;
1896 		}
1897 
1898 		if (!(be64_to_cpu(unaff_events[mask_entry]) &
1899 				(1ull << mask_bit)))
1900 			return false;
1901 	}
1902 
1903 	return true;
1904 }
1905 
1906 #define MAX_NUM_EVENTS 16
1907 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
1908 	struct uverbs_attr_bundle *attrs)
1909 {
1910 	struct ib_uobject *devx_uobj = uverbs_attr_get_uobject(
1911 				attrs,
1912 				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE);
1913 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
1914 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
1915 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
1916 	struct ib_uobject *fd_uobj;
1917 	struct devx_obj *obj = NULL;
1918 	struct devx_async_event_file *ev_file;
1919 	struct mlx5_devx_event_table *devx_event_table = &dev->devx_event_table;
1920 	u16 *event_type_num_list;
1921 	struct devx_event_subscription *event_sub, *tmp_sub;
1922 	struct list_head sub_list;
1923 	int redirect_fd;
1924 	bool use_eventfd = false;
1925 	int num_events;
1926 	int num_alloc_xa_entries = 0;
1927 	u16 obj_type = 0;
1928 	u64 cookie = 0;
1929 	u32 obj_id = 0;
1930 	int err;
1931 	int i;
1932 
1933 	if (!c->devx_uid)
1934 		return -EINVAL;
1935 
1936 	if (!IS_ERR(devx_uobj)) {
1937 		obj = (struct devx_obj *)devx_uobj->object;
1938 		if (obj)
1939 			obj_id = get_dec_obj_id(obj->obj_id);
1940 	}
1941 
1942 	fd_uobj = uverbs_attr_get_uobject(attrs,
1943 				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE);
1944 	if (IS_ERR(fd_uobj))
1945 		return PTR_ERR(fd_uobj);
1946 
1947 	ev_file = container_of(fd_uobj, struct devx_async_event_file,
1948 			       uobj);
1949 
1950 	if (uverbs_attr_is_valid(attrs,
1951 				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM)) {
1952 		err = uverbs_copy_from(&redirect_fd, attrs,
1953 			       MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM);
1954 		if (err)
1955 			return err;
1956 
1957 		use_eventfd = true;
1958 	}
1959 
1960 	if (uverbs_attr_is_valid(attrs,
1961 				 MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE)) {
1962 		if (use_eventfd)
1963 			return -EINVAL;
1964 
1965 		err = uverbs_copy_from(&cookie, attrs,
1966 				MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE);
1967 		if (err)
1968 			return err;
1969 	}
1970 
1971 	num_events = uverbs_attr_ptr_get_array_size(
1972 		attrs, MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
1973 		sizeof(u16));
1974 
1975 	if (num_events < 0)
1976 		return num_events;
1977 
1978 	if (num_events > MAX_NUM_EVENTS)
1979 		return -EINVAL;
1980 
1981 	event_type_num_list = uverbs_attr_get_alloced_ptr(attrs,
1982 			MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST);
1983 
1984 	if (!is_valid_events(dev->mdev, num_events, event_type_num_list, obj))
1985 		return -EINVAL;
1986 
1987 	INIT_LIST_HEAD(&sub_list);
1988 
1989 	/* Protect from concurrent subscriptions to same XA entries to allow
1990 	 * both to succeed
1991 	 */
1992 	mutex_lock(&devx_event_table->event_xa_lock);
1993 	for (i = 0; i < num_events; i++) {
1994 		u32 key_level1;
1995 
1996 		if (obj)
1997 			obj_type = get_dec_obj_type(obj,
1998 						    event_type_num_list[i]);
1999 		key_level1 = event_type_num_list[i] | obj_type << 16;
2000 
2001 		err = subscribe_event_xa_alloc(devx_event_table,
2002 					       key_level1,
2003 					       obj,
2004 					       obj_id);
2005 		if (err)
2006 			goto err;
2007 
2008 		num_alloc_xa_entries++;
2009 		event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
2010 		if (!event_sub)
2011 			goto err;
2012 
2013 		list_add_tail(&event_sub->event_list, &sub_list);
2014 		uverbs_uobject_get(&ev_file->uobj);
2015 		if (use_eventfd) {
2016 			event_sub->eventfd =
2017 				fdget(redirect_fd);
2018 
2019 			if (event_sub->eventfd.file == NULL) {
2020 				err = -EBADF;
2021 				goto err;
2022 			}
2023 		}
2024 
2025 		event_sub->cookie = cookie;
2026 		event_sub->ev_file = ev_file;
2027 		/* May be needed upon cleanup the devx object/subscription */
2028 		event_sub->xa_key_level1 = key_level1;
2029 		event_sub->xa_key_level2 = obj_id;
2030 		INIT_LIST_HEAD(&event_sub->obj_list);
2031 	}
2032 
2033 	/* Once all the allocations and the XA data insertions were done we
2034 	 * can go ahead and add all the subscriptions to the relevant lists
2035 	 * without concern of a failure.
2036 	 */
2037 	list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2038 		struct devx_event *event;
2039 		struct devx_obj_event *obj_event;
2040 
2041 		list_del_init(&event_sub->event_list);
2042 
2043 		spin_lock_irq(&ev_file->lock);
2044 		list_add_tail_rcu(&event_sub->file_list,
2045 				  &ev_file->subscribed_events_list);
2046 		spin_unlock_irq(&ev_file->lock);
2047 
2048 		event = xa_load(&devx_event_table->event_xa,
2049 				event_sub->xa_key_level1);
2050 		WARN_ON(!event);
2051 
2052 		if (!obj) {
2053 			list_add_tail_rcu(&event_sub->xa_list,
2054 					  &event->unaffiliated_list);
2055 			continue;
2056 		}
2057 
2058 		obj_event = xa_load(&event->object_ids, obj_id);
2059 		WARN_ON(!obj_event);
2060 		list_add_tail_rcu(&event_sub->xa_list,
2061 				  &obj_event->obj_sub_list);
2062 		list_add_tail_rcu(&event_sub->obj_list,
2063 				  &obj->event_sub);
2064 	}
2065 
2066 	mutex_unlock(&devx_event_table->event_xa_lock);
2067 	return 0;
2068 
2069 err:
2070 	list_for_each_entry_safe(event_sub, tmp_sub, &sub_list, event_list) {
2071 		list_del(&event_sub->event_list);
2072 
2073 		subscribe_event_xa_dealloc(devx_event_table,
2074 					   event_sub->xa_key_level1,
2075 					   obj,
2076 					   obj_id);
2077 
2078 		if (event_sub->eventfd.file)
2079 			fdput(event_sub->eventfd);
2080 		uverbs_uobject_put(&event_sub->ev_file->uobj);
2081 		kfree(event_sub);
2082 	}
2083 
2084 	mutex_unlock(&devx_event_table->event_xa_lock);
2085 	return err;
2086 }
2087 
2088 static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
2089 			 struct uverbs_attr_bundle *attrs,
2090 			 struct devx_umem *obj)
2091 {
2092 	u64 addr;
2093 	size_t size;
2094 	u32 access;
2095 	int npages;
2096 	int err;
2097 	u32 page_mask;
2098 
2099 	if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
2100 	    uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
2101 		return -EFAULT;
2102 
2103 	err = uverbs_get_flags32(&access, attrs,
2104 				 MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2105 				 IB_ACCESS_LOCAL_WRITE |
2106 				 IB_ACCESS_REMOTE_WRITE |
2107 				 IB_ACCESS_REMOTE_READ);
2108 	if (err)
2109 		return err;
2110 
2111 	err = ib_check_mr_access(access);
2112 	if (err)
2113 		return err;
2114 
2115 	obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
2116 	if (IS_ERR(obj->umem))
2117 		return PTR_ERR(obj->umem);
2118 
2119 	mlx5_ib_cont_pages(obj->umem, obj->umem->address,
2120 			   MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
2121 			   &obj->page_shift, &obj->ncont, NULL);
2122 
2123 	if (!npages) {
2124 		ib_umem_release(obj->umem);
2125 		return -EINVAL;
2126 	}
2127 
2128 	page_mask = (1 << obj->page_shift) - 1;
2129 	obj->page_offset = obj->umem->address & page_mask;
2130 
2131 	return 0;
2132 }
2133 
2134 static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
2135 				   struct devx_umem *obj,
2136 				   struct devx_umem_reg_cmd *cmd)
2137 {
2138 	cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
2139 		    (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
2140 	cmd->in = uverbs_zalloc(attrs, cmd->inlen);
2141 	return PTR_ERR_OR_ZERO(cmd->in);
2142 }
2143 
2144 static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
2145 				    struct devx_umem *obj,
2146 				    struct devx_umem_reg_cmd *cmd)
2147 {
2148 	void *umem;
2149 	__be64 *mtt;
2150 
2151 	umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
2152 	mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
2153 
2154 	MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
2155 	MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
2156 	MLX5_SET(umem, umem, log_page_size, obj->page_shift -
2157 					    MLX5_ADAPTER_PAGE_SHIFT);
2158 	MLX5_SET(umem, umem, page_offset, obj->page_offset);
2159 	mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
2160 			     (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
2161 			     MLX5_IB_MTT_READ);
2162 }
2163 
2164 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
2165 	struct uverbs_attr_bundle *attrs)
2166 {
2167 	struct devx_umem_reg_cmd cmd;
2168 	struct devx_umem *obj;
2169 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
2170 		attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
2171 	u32 obj_id;
2172 	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
2173 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
2174 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
2175 	int err;
2176 
2177 	if (!c->devx_uid)
2178 		return -EINVAL;
2179 
2180 	obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
2181 	if (!obj)
2182 		return -ENOMEM;
2183 
2184 	err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
2185 	if (err)
2186 		goto err_obj_free;
2187 
2188 	err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
2189 	if (err)
2190 		goto err_umem_release;
2191 
2192 	devx_umem_reg_cmd_build(dev, obj, &cmd);
2193 
2194 	MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
2195 	err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
2196 			    sizeof(cmd.out));
2197 	if (err)
2198 		goto err_umem_release;
2199 
2200 	obj->mdev = dev->mdev;
2201 	uobj->object = obj;
2202 	devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
2203 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
2204 	if (err)
2205 		goto err_umem_destroy;
2206 
2207 	return 0;
2208 
2209 err_umem_destroy:
2210 	mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
2211 err_umem_release:
2212 	ib_umem_release(obj->umem);
2213 err_obj_free:
2214 	kfree(obj);
2215 	return err;
2216 }
2217 
2218 static int devx_umem_cleanup(struct ib_uobject *uobject,
2219 			     enum rdma_remove_reason why,
2220 			     struct uverbs_attr_bundle *attrs)
2221 {
2222 	struct devx_umem *obj = uobject->object;
2223 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
2224 	int err;
2225 
2226 	err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
2227 	if (ib_is_destroy_retryable(err, why, uobject))
2228 		return err;
2229 
2230 	ib_umem_release(obj->umem);
2231 	kfree(obj);
2232 	return 0;
2233 }
2234 
2235 static bool is_unaffiliated_event(struct mlx5_core_dev *dev,
2236 				  unsigned long event_type)
2237 {
2238 	__be64 *unaff_events;
2239 	int mask_entry;
2240 	int mask_bit;
2241 
2242 	if (!MLX5_CAP_GEN(dev, event_cap))
2243 		return is_legacy_unaffiliated_event_num(event_type);
2244 
2245 	unaff_events = (__be64 *)MLX5_CAP_DEV_EVENT(dev,
2246 					  user_unaffiliated_events);
2247 	WARN_ON(event_type > MAX_SUPP_EVENT_NUM);
2248 
2249 	mask_entry = event_type / 64;
2250 	mask_bit = event_type % 64;
2251 
2252 	if (!(be64_to_cpu(unaff_events[mask_entry]) & (1ull << mask_bit)))
2253 		return false;
2254 
2255 	return true;
2256 }
2257 
2258 static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
2259 {
2260 	struct mlx5_eqe *eqe = data;
2261 	u32 obj_id = 0;
2262 
2263 	switch (event_type) {
2264 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
2265 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
2266 	case MLX5_EVENT_TYPE_PATH_MIG:
2267 	case MLX5_EVENT_TYPE_COMM_EST:
2268 	case MLX5_EVENT_TYPE_SQ_DRAINED:
2269 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
2270 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
2271 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
2272 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
2273 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
2274 		obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
2275 		break;
2276 	case MLX5_EVENT_TYPE_XRQ_ERROR:
2277 		obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
2278 		break;
2279 	case MLX5_EVENT_TYPE_DCT_DRAINED:
2280 	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
2281 		obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
2282 		break;
2283 	case MLX5_EVENT_TYPE_CQ_ERROR:
2284 		obj_id = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
2285 		break;
2286 	default:
2287 		obj_id = MLX5_GET(affiliated_event_header, &eqe->data, obj_id);
2288 		break;
2289 	}
2290 
2291 	return obj_id;
2292 }
2293 
2294 static int deliver_event(struct devx_event_subscription *event_sub,
2295 			 const void *data)
2296 {
2297 	struct devx_async_event_file *ev_file;
2298 	struct devx_async_event_data *event_data;
2299 	unsigned long flags;
2300 
2301 	ev_file = event_sub->ev_file;
2302 
2303 	if (ev_file->omit_data) {
2304 		spin_lock_irqsave(&ev_file->lock, flags);
2305 		if (!list_empty(&event_sub->event_list) ||
2306 		    ev_file->is_destroyed) {
2307 			spin_unlock_irqrestore(&ev_file->lock, flags);
2308 			return 0;
2309 		}
2310 
2311 		list_add_tail(&event_sub->event_list, &ev_file->event_list);
2312 		spin_unlock_irqrestore(&ev_file->lock, flags);
2313 		wake_up_interruptible(&ev_file->poll_wait);
2314 		return 0;
2315 	}
2316 
2317 	event_data = kzalloc(sizeof(*event_data) + sizeof(struct mlx5_eqe),
2318 			     GFP_ATOMIC);
2319 	if (!event_data) {
2320 		spin_lock_irqsave(&ev_file->lock, flags);
2321 		ev_file->is_overflow_err = 1;
2322 		spin_unlock_irqrestore(&ev_file->lock, flags);
2323 		return -ENOMEM;
2324 	}
2325 
2326 	event_data->hdr.cookie = event_sub->cookie;
2327 	memcpy(event_data->hdr.out_data, data, sizeof(struct mlx5_eqe));
2328 
2329 	spin_lock_irqsave(&ev_file->lock, flags);
2330 	if (!ev_file->is_destroyed)
2331 		list_add_tail(&event_data->list, &ev_file->event_list);
2332 	else
2333 		kfree(event_data);
2334 	spin_unlock_irqrestore(&ev_file->lock, flags);
2335 	wake_up_interruptible(&ev_file->poll_wait);
2336 
2337 	return 0;
2338 }
2339 
2340 static void dispatch_event_fd(struct list_head *fd_list,
2341 			      const void *data)
2342 {
2343 	struct devx_event_subscription *item;
2344 
2345 	list_for_each_entry_rcu(item, fd_list, xa_list) {
2346 		if (item->eventfd.file != NULL)
2347 			linux_poll_wakeup(item->eventfd.file);
2348 		else
2349 			deliver_event(item, data);
2350 	}
2351 }
2352 
2353 static bool mlx5_devx_event_notifier(struct mlx5_core_dev *mdev,
2354 				     uint8_t event_type, void *data)
2355 {
2356 	struct mlx5_ib_dev *dev;
2357 	struct mlx5_devx_event_table *table;
2358 	struct devx_event *event;
2359 	struct devx_obj_event *obj_event;
2360 	u16 obj_type = 0;
2361 	bool is_unaffiliated;
2362 	u32 obj_id;
2363 
2364 	/* Explicit filtering to kernel events which may occur frequently */
2365 	if (event_type == MLX5_EVENT_TYPE_CMD ||
2366 	    event_type == MLX5_EVENT_TYPE_PAGE_REQUEST)
2367 		return true;
2368 
2369 	dev = mdev->priv.eq_table.dev;
2370 	table = &dev->devx_event_table;
2371 	is_unaffiliated = is_unaffiliated_event(dev->mdev, event_type);
2372 
2373 	if (!is_unaffiliated)
2374 		obj_type = get_event_obj_type(event_type, data);
2375 
2376 	rcu_read_lock();
2377 	event = xa_load(&table->event_xa, event_type | (obj_type << 16));
2378 	if (!event) {
2379 		rcu_read_unlock();
2380 		return false;
2381 	}
2382 
2383 	if (is_unaffiliated) {
2384 		dispatch_event_fd(&event->unaffiliated_list, data);
2385 		rcu_read_unlock();
2386 		return true;
2387 	}
2388 
2389 	obj_id = devx_get_obj_id_from_event(event_type, data);
2390 	obj_event = xa_load(&event->object_ids, obj_id);
2391 	if (!obj_event) {
2392 		rcu_read_unlock();
2393 		return false;
2394 	}
2395 
2396 	dispatch_event_fd(&obj_event->obj_sub_list, data);
2397 
2398 	rcu_read_unlock();
2399 	return true;
2400 }
2401 
2402 void mlx5_ib_devx_init_event_table(struct mlx5_ib_dev *dev)
2403 {
2404 	struct mlx5_devx_event_table *table = &dev->devx_event_table;
2405 
2406 	xa_init_flags(&table->event_xa, 0);
2407 	mutex_init(&table->event_xa_lock);
2408 	dev->mdev->priv.eq_table.dev = dev;
2409 	dev->mdev->priv.eq_table.cb = mlx5_devx_event_notifier;
2410 }
2411 
2412 void mlx5_ib_devx_cleanup_event_table(struct mlx5_ib_dev *dev)
2413 {
2414 	struct mlx5_devx_event_table *table = &dev->devx_event_table;
2415 	struct devx_event_subscription *sub, *tmp;
2416 	struct devx_event *event;
2417 	void *entry;
2418 	unsigned long id;
2419 
2420 	dev->mdev->priv.eq_table.cb = NULL;
2421 	dev->mdev->priv.eq_table.dev = NULL;
2422 	mutex_lock(&dev->devx_event_table.event_xa_lock);
2423 	xa_for_each(&table->event_xa, id, entry) {
2424 		event = entry;
2425 		list_for_each_entry_safe(sub, tmp, &event->unaffiliated_list,
2426 					 xa_list)
2427 			devx_cleanup_subscription(dev, sub);
2428 		kfree(entry);
2429 	}
2430 	mutex_unlock(&dev->devx_event_table.event_xa_lock);
2431 	xa_destroy(&table->event_xa);
2432 }
2433 
2434 static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
2435 					 size_t count, loff_t *pos)
2436 {
2437 	struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2438 	struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2439 	struct devx_async_data *event;
2440 	int ret = 0;
2441 	size_t eventsz;
2442 
2443 	spin_lock_irq(&ev_queue->lock);
2444 
2445 	while (list_empty(&ev_queue->event_list)) {
2446 		spin_unlock_irq(&ev_queue->lock);
2447 
2448 		if (filp->f_flags & O_NONBLOCK)
2449 			return -EAGAIN;
2450 
2451 		if (wait_event_interruptible(
2452 			    ev_queue->poll_wait,
2453 			    (!list_empty(&ev_queue->event_list) ||
2454 			     ev_queue->is_destroyed))) {
2455 			return -ERESTARTSYS;
2456 		}
2457 
2458 		spin_lock_irq(&ev_queue->lock);
2459 		if (ev_queue->is_destroyed) {
2460 			spin_unlock_irq(&ev_queue->lock);
2461 			return -EIO;
2462 		}
2463 	}
2464 
2465 	event = list_entry(ev_queue->event_list.next,
2466 			   struct devx_async_data, list);
2467 	eventsz = event->cmd_out_len +
2468 			sizeof(struct mlx5_ib_uapi_devx_async_cmd_hdr);
2469 
2470 	if (eventsz > count) {
2471 		spin_unlock_irq(&ev_queue->lock);
2472 		return -ENOSPC;
2473 	}
2474 
2475 	list_del(ev_queue->event_list.next);
2476 	spin_unlock_irq(&ev_queue->lock);
2477 
2478 	if (copy_to_user(buf, &event->hdr, eventsz))
2479 		ret = -EFAULT;
2480 	else
2481 		ret = eventsz;
2482 
2483 	atomic_sub(event->cmd_out_len, &ev_queue->bytes_in_use);
2484 	kvfree(event);
2485 	return ret;
2486 }
2487 
2488 static __poll_t devx_async_cmd_event_poll(struct file *filp,
2489 					  struct poll_table_struct *wait)
2490 {
2491 	struct devx_async_cmd_event_file *comp_ev_file = filp->private_data;
2492 	struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2493 	__poll_t pollflags = 0;
2494 
2495 	poll_wait(filp, &ev_queue->poll_wait, wait);
2496 
2497 	spin_lock_irq(&ev_queue->lock);
2498 	if (ev_queue->is_destroyed)
2499 		pollflags = POLLIN | POLLRDNORM | POLLHUP;
2500 	else if (!list_empty(&ev_queue->event_list))
2501 		pollflags = POLLIN | POLLRDNORM;
2502 	spin_unlock_irq(&ev_queue->lock);
2503 
2504 	return pollflags;
2505 }
2506 
2507 static const struct file_operations devx_async_cmd_event_fops = {
2508 	.owner	 = THIS_MODULE,
2509 	.read	 = devx_async_cmd_event_read,
2510 	.poll    = devx_async_cmd_event_poll,
2511 	.release = uverbs_uobject_fd_release,
2512 	.llseek	 = no_llseek,
2513 };
2514 
2515 static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
2516 				     size_t count, loff_t *pos)
2517 {
2518 	struct devx_async_event_file *ev_file = filp->private_data;
2519 	struct devx_event_subscription *event_sub;
2520 	struct devx_async_event_data *uninitialized_var(event);
2521 	int ret = 0;
2522 	size_t eventsz;
2523 	bool omit_data;
2524 	void *event_data;
2525 
2526 	omit_data = ev_file->omit_data;
2527 
2528 	spin_lock_irq(&ev_file->lock);
2529 
2530 	if (ev_file->is_overflow_err) {
2531 		ev_file->is_overflow_err = 0;
2532 		spin_unlock_irq(&ev_file->lock);
2533 		return -EOVERFLOW;
2534 	}
2535 
2536 
2537 	while (list_empty(&ev_file->event_list)) {
2538 		spin_unlock_irq(&ev_file->lock);
2539 
2540 		if (filp->f_flags & O_NONBLOCK)
2541 			return -EAGAIN;
2542 
2543 		if (wait_event_interruptible(ev_file->poll_wait,
2544 			    (!list_empty(&ev_file->event_list) ||
2545 			     ev_file->is_destroyed))) {
2546 			return -ERESTARTSYS;
2547 		}
2548 
2549 		spin_lock_irq(&ev_file->lock);
2550 		if (ev_file->is_destroyed) {
2551 			spin_unlock_irq(&ev_file->lock);
2552 			return -EIO;
2553 		}
2554 	}
2555 
2556 	if (omit_data) {
2557 		event_sub = list_first_entry(&ev_file->event_list,
2558 					struct devx_event_subscription,
2559 					event_list);
2560 		eventsz = sizeof(event_sub->cookie);
2561 		event_data = &event_sub->cookie;
2562 	} else {
2563 		event = list_first_entry(&ev_file->event_list,
2564 				      struct devx_async_event_data, list);
2565 		eventsz = sizeof(struct mlx5_eqe) +
2566 			sizeof(struct mlx5_ib_uapi_devx_async_event_hdr);
2567 		event_data = &event->hdr;
2568 	}
2569 
2570 	if (eventsz > count) {
2571 		spin_unlock_irq(&ev_file->lock);
2572 		return -EINVAL;
2573 	}
2574 
2575 	if (omit_data)
2576 		list_del_init(&event_sub->event_list);
2577 	else
2578 		list_del(&event->list);
2579 
2580 	spin_unlock_irq(&ev_file->lock);
2581 
2582 	if (copy_to_user(buf, event_data, eventsz))
2583 		/* This points to an application issue, not a kernel concern */
2584 		ret = -EFAULT;
2585 	else
2586 		ret = eventsz;
2587 
2588 	if (!omit_data)
2589 		kfree(event);
2590 	return ret;
2591 }
2592 
2593 static __poll_t devx_async_event_poll(struct file *filp,
2594 				      struct poll_table_struct *wait)
2595 {
2596 	struct devx_async_event_file *ev_file = filp->private_data;
2597 	__poll_t pollflags = 0;
2598 
2599 	poll_wait(filp, &ev_file->poll_wait, wait);
2600 
2601 	spin_lock_irq(&ev_file->lock);
2602 	if (ev_file->is_destroyed)
2603 		pollflags = POLLIN | POLLRDNORM | POLLHUP;
2604 	else if (!list_empty(&ev_file->event_list))
2605 		pollflags = POLLIN | POLLRDNORM;
2606 	spin_unlock_irq(&ev_file->lock);
2607 
2608 	return pollflags;
2609 }
2610 
2611 static void devx_free_subscription(struct rcu_head *rcu)
2612 {
2613 	struct devx_event_subscription *event_sub =
2614 		container_of(rcu, struct devx_event_subscription, rcu);
2615 
2616 	if (event_sub->eventfd.file)
2617 		fdput(event_sub->eventfd);
2618 	uverbs_uobject_put(&event_sub->ev_file->uobj);
2619 	kfree(event_sub);
2620 }
2621 
2622 static const struct file_operations devx_async_event_fops = {
2623 	.owner	 = THIS_MODULE,
2624 	.read	 = devx_async_event_read,
2625 	.poll    = devx_async_event_poll,
2626 	.release = uverbs_uobject_fd_release,
2627 	.llseek	 = no_llseek,
2628 };
2629 
2630 static int devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
2631 					     enum rdma_remove_reason why)
2632 {
2633 	struct devx_async_cmd_event_file *comp_ev_file =
2634 		container_of(uobj, struct devx_async_cmd_event_file,
2635 			     uobj);
2636 	struct devx_async_event_queue *ev_queue = &comp_ev_file->ev_queue;
2637 	struct devx_async_data *entry, *tmp;
2638 
2639 	spin_lock_irq(&ev_queue->lock);
2640 	ev_queue->is_destroyed = 1;
2641 	spin_unlock_irq(&ev_queue->lock);
2642 	wake_up_interruptible(&ev_queue->poll_wait);
2643 
2644 	mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
2645 
2646 	spin_lock_irq(&comp_ev_file->ev_queue.lock);
2647 	list_for_each_entry_safe(entry, tmp,
2648 				 &comp_ev_file->ev_queue.event_list, list) {
2649 		list_del(&entry->list);
2650 		kvfree(entry);
2651 	}
2652 	spin_unlock_irq(&comp_ev_file->ev_queue.lock);
2653 	return 0;
2654 };
2655 
2656 static int devx_async_event_destroy_uobj(struct ib_uobject *uobj,
2657 					 enum rdma_remove_reason why)
2658 {
2659 	struct devx_async_event_file *ev_file =
2660 		container_of(uobj, struct devx_async_event_file,
2661 			     uobj);
2662 	struct devx_event_subscription *event_sub, *event_sub_tmp;
2663 	struct mlx5_ib_dev *dev = ev_file->dev;
2664 
2665 	spin_lock_irq(&ev_file->lock);
2666 	ev_file->is_destroyed = 1;
2667 
2668 	/* free the pending events allocation */
2669 	if (ev_file->omit_data) {
2670 		struct devx_event_subscription *event_sub, *tmp;
2671 
2672 		list_for_each_entry_safe(event_sub, tmp, &ev_file->event_list,
2673 					 event_list)
2674 			list_del_init(&event_sub->event_list);
2675 
2676 	} else {
2677 		struct devx_async_event_data *entry, *tmp;
2678 
2679 		list_for_each_entry_safe(entry, tmp, &ev_file->event_list,
2680 					 list) {
2681 			list_del(&entry->list);
2682 			kfree(entry);
2683 		}
2684 	}
2685 
2686 	spin_unlock_irq(&ev_file->lock);
2687 	wake_up_interruptible(&ev_file->poll_wait);
2688 
2689 	mutex_lock(&dev->devx_event_table.event_xa_lock);
2690 	/* delete the subscriptions which are related to this FD */
2691 	list_for_each_entry_safe(event_sub, event_sub_tmp,
2692 				 &ev_file->subscribed_events_list, file_list) {
2693 		devx_cleanup_subscription(dev, event_sub);
2694 		list_del_rcu(&event_sub->file_list);
2695 		/* subscription may not be used by the read API any more */
2696 		call_rcu(&event_sub->rcu, devx_free_subscription);
2697 	}
2698 	mutex_unlock(&dev->devx_event_table.event_xa_lock);
2699 
2700 	put_device(&dev->ib_dev.dev);
2701 	return 0;
2702 };
2703 
2704 DECLARE_UVERBS_NAMED_METHOD(
2705 	MLX5_IB_METHOD_DEVX_UMEM_REG,
2706 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
2707 			MLX5_IB_OBJECT_DEVX_UMEM,
2708 			UVERBS_ACCESS_NEW,
2709 			UA_MANDATORY),
2710 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
2711 			   UVERBS_ATTR_TYPE(u64),
2712 			   UA_MANDATORY),
2713 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
2714 			   UVERBS_ATTR_TYPE(u64),
2715 			   UA_MANDATORY),
2716 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
2717 			     enum ib_access_flags),
2718 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
2719 			    UVERBS_ATTR_TYPE(u32),
2720 			    UA_MANDATORY));
2721 
2722 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2723 	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
2724 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
2725 			MLX5_IB_OBJECT_DEVX_UMEM,
2726 			UVERBS_ACCESS_DESTROY,
2727 			UA_MANDATORY));
2728 
2729 DECLARE_UVERBS_NAMED_METHOD(
2730 	MLX5_IB_METHOD_DEVX_QUERY_EQN,
2731 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
2732 			   UVERBS_ATTR_TYPE(u32),
2733 			   UA_MANDATORY),
2734 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
2735 			    UVERBS_ATTR_TYPE(u32),
2736 			    UA_MANDATORY));
2737 
2738 DECLARE_UVERBS_NAMED_METHOD(
2739 	MLX5_IB_METHOD_DEVX_QUERY_UAR,
2740 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
2741 			   UVERBS_ATTR_TYPE(u32),
2742 			   UA_MANDATORY),
2743 	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
2744 			    UVERBS_ATTR_TYPE(u32),
2745 			    UA_MANDATORY));
2746 
2747 DECLARE_UVERBS_NAMED_METHOD(
2748 	MLX5_IB_METHOD_DEVX_OTHER,
2749 	UVERBS_ATTR_PTR_IN(
2750 		MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
2751 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2752 		UA_MANDATORY,
2753 		UA_ALLOC_AND_COPY),
2754 	UVERBS_ATTR_PTR_OUT(
2755 		MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
2756 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2757 		UA_MANDATORY));
2758 
2759 DECLARE_UVERBS_NAMED_METHOD(
2760 	MLX5_IB_METHOD_DEVX_OBJ_CREATE,
2761 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
2762 			MLX5_IB_OBJECT_DEVX_OBJ,
2763 			UVERBS_ACCESS_NEW,
2764 			UA_MANDATORY),
2765 	UVERBS_ATTR_PTR_IN(
2766 		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
2767 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2768 		UA_MANDATORY,
2769 		UA_ALLOC_AND_COPY),
2770 	UVERBS_ATTR_PTR_OUT(
2771 		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
2772 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2773 		UA_MANDATORY));
2774 
2775 DECLARE_UVERBS_NAMED_METHOD_DESTROY(
2776 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
2777 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
2778 			MLX5_IB_OBJECT_DEVX_OBJ,
2779 			UVERBS_ACCESS_DESTROY,
2780 			UA_MANDATORY));
2781 
2782 DECLARE_UVERBS_NAMED_METHOD(
2783 	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
2784 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
2785 			UVERBS_IDR_ANY_OBJECT,
2786 			UVERBS_ACCESS_WRITE,
2787 			UA_MANDATORY),
2788 	UVERBS_ATTR_PTR_IN(
2789 		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
2790 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2791 		UA_MANDATORY,
2792 		UA_ALLOC_AND_COPY),
2793 	UVERBS_ATTR_PTR_OUT(
2794 		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
2795 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2796 		UA_MANDATORY));
2797 
2798 DECLARE_UVERBS_NAMED_METHOD(
2799 	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
2800 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2801 			UVERBS_IDR_ANY_OBJECT,
2802 			UVERBS_ACCESS_READ,
2803 			UA_MANDATORY),
2804 	UVERBS_ATTR_PTR_IN(
2805 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2806 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2807 		UA_MANDATORY,
2808 		UA_ALLOC_AND_COPY),
2809 	UVERBS_ATTR_PTR_OUT(
2810 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
2811 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
2812 		UA_MANDATORY));
2813 
2814 DECLARE_UVERBS_NAMED_METHOD(
2815 	MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
2816 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
2817 			UVERBS_IDR_ANY_OBJECT,
2818 			UVERBS_ACCESS_READ,
2819 			UA_MANDATORY),
2820 	UVERBS_ATTR_PTR_IN(
2821 		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
2822 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
2823 		UA_MANDATORY,
2824 		UA_ALLOC_AND_COPY),
2825 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
2826 		u16, UA_MANDATORY),
2827 	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
2828 		MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2829 		UVERBS_ACCESS_READ,
2830 		UA_MANDATORY),
2831 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
2832 		UVERBS_ATTR_TYPE(u64),
2833 		UA_MANDATORY));
2834 
2835 DECLARE_UVERBS_NAMED_METHOD(
2836 	MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT,
2837 	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_HANDLE,
2838 		MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2839 		UVERBS_ACCESS_READ,
2840 		UA_MANDATORY),
2841 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_OBJ_HANDLE,
2842 		MLX5_IB_OBJECT_DEVX_OBJ,
2843 		UVERBS_ACCESS_READ,
2844 		UA_OPTIONAL),
2845 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_TYPE_NUM_LIST,
2846 		UVERBS_ATTR_MIN_SIZE(sizeof(u16)),
2847 		UA_MANDATORY,
2848 		UA_ALLOC_AND_COPY),
2849 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_COOKIE,
2850 		UVERBS_ATTR_TYPE(u64),
2851 		UA_OPTIONAL),
2852 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_SUBSCRIBE_EVENT_FD_NUM,
2853 		UVERBS_ATTR_TYPE(u32),
2854 		UA_OPTIONAL));
2855 
2856 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
2857 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
2858 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
2859 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN),
2860 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT));
2861 
2862 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
2863 			    UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
2864 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
2865 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
2866 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
2867 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
2868 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
2869 
2870 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
2871 			    UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
2872 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
2873 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
2874 
2875 
2876 DECLARE_UVERBS_NAMED_METHOD(
2877 	MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
2878 	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
2879 			MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2880 			UVERBS_ACCESS_NEW,
2881 			UA_MANDATORY));
2882 
2883 DECLARE_UVERBS_NAMED_OBJECT(
2884 	MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2885 	UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
2886 			     devx_async_cmd_event_destroy_uobj,
2887 			     &devx_async_cmd_event_fops, "[devx_async_cmd]",
2888 			     FMODE_READ),
2889 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
2890 
2891 DECLARE_UVERBS_NAMED_METHOD(
2892 	MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC,
2893 	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_HANDLE,
2894 			MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2895 			UVERBS_ACCESS_NEW,
2896 			UA_MANDATORY),
2897 	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_ASYNC_EVENT_FD_ALLOC_FLAGS,
2898 			enum mlx5_ib_uapi_devx_create_event_channel_flags,
2899 			UA_MANDATORY));
2900 
2901 DECLARE_UVERBS_NAMED_OBJECT(
2902 	MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2903 	UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_event_file),
2904 			     devx_async_event_destroy_uobj,
2905 			     &devx_async_event_fops, "[devx_async_event]",
2906 			     FMODE_READ),
2907 	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_EVENT_FD_ALLOC));
2908 
2909 static bool devx_is_supported(struct ib_device *device)
2910 {
2911 	struct mlx5_ib_dev *dev = to_mdev(device);
2912 
2913 	return MLX5_CAP_GEN(dev->mdev, log_max_uctx);
2914 }
2915 
2916 const struct uapi_definition mlx5_ib_devx_defs[] = {
2917 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2918 		MLX5_IB_OBJECT_DEVX,
2919 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2920 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2921 		MLX5_IB_OBJECT_DEVX_OBJ,
2922 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2923 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2924 		MLX5_IB_OBJECT_DEVX_UMEM,
2925 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2926 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2927 		MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
2928 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2929 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
2930 		MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD,
2931 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
2932 	{},
2933 };
2934