xref: /linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision c532de5a67a70f8533d495f8f2aaa9a0491c3ad0)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
23 
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 
49 #define MLX5V_UNTAGGED 0x1000
50 
51 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
52  * 5.1.6.5.5 "Device operation in multiqueue mode":
53  *
54  * Multiqueue is disabled by default.
55  * The driver enables multiqueue by sending a command using class
56  * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
57  * operation, as follows: ...
58  */
59 #define MLX5V_DEFAULT_VQ_COUNT 2
60 
61 #define MLX5V_DEFAULT_VQ_SIZE 256
62 
63 struct mlx5_vdpa_cq_buf {
64 	struct mlx5_frag_buf_ctrl fbc;
65 	struct mlx5_frag_buf frag_buf;
66 	int cqe_size;
67 	int nent;
68 };
69 
70 struct mlx5_vdpa_cq {
71 	struct mlx5_core_cq mcq;
72 	struct mlx5_vdpa_cq_buf buf;
73 	struct mlx5_db db;
74 	int cqe;
75 };
76 
77 struct mlx5_vdpa_umem {
78 	struct mlx5_frag_buf_ctrl fbc;
79 	struct mlx5_frag_buf frag_buf;
80 	int size;
81 	u32 id;
82 };
83 
84 struct mlx5_vdpa_qp {
85 	struct mlx5_core_qp mqp;
86 	struct mlx5_frag_buf frag_buf;
87 	struct mlx5_db db;
88 	u16 head;
89 	bool fw;
90 };
91 
92 struct mlx5_vq_restore_info {
93 	u32 num_ent;
94 	u64 desc_addr;
95 	u64 device_addr;
96 	u64 driver_addr;
97 	u16 avail_index;
98 	u16 used_index;
99 	struct msi_map map;
100 	bool ready;
101 	bool restore;
102 };
103 
104 struct mlx5_vdpa_virtqueue {
105 	bool ready;
106 	u64 desc_addr;
107 	u64 device_addr;
108 	u64 driver_addr;
109 	u32 num_ent;
110 
111 	/* Resources for implementing the notification channel from the device
112 	 * to the driver. fwqp is the firmware end of an RC connection; the
113 	 * other end is vqqp used by the driver. cq is where completions are
114 	 * reported.
115 	 */
116 	struct mlx5_vdpa_cq cq;
117 	struct mlx5_vdpa_qp fwqp;
118 	struct mlx5_vdpa_qp vqqp;
119 
120 	/* umem resources are required for the virtqueue operation. They're use
121 	 * is internal and they must be provided by the driver.
122 	 */
123 	struct mlx5_vdpa_umem umem1;
124 	struct mlx5_vdpa_umem umem2;
125 	struct mlx5_vdpa_umem umem3;
126 
127 	u32 counter_set_id;
128 	bool initialized;
129 	int index;
130 	u32 virtq_id;
131 	struct mlx5_vdpa_net *ndev;
132 	u16 avail_idx;
133 	u16 used_idx;
134 	int fw_state;
135 
136 	u64 modified_fields;
137 
138 	struct mlx5_vdpa_mr *vq_mr;
139 	struct mlx5_vdpa_mr *desc_mr;
140 
141 	struct msi_map map;
142 
143 	/* keep last in the struct */
144 	struct mlx5_vq_restore_info ri;
145 };
146 
147 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
148 {
149 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
150 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
151 			return idx < 2;
152 		else
153 			return idx < 3;
154 	}
155 
156 	return idx <= mvdev->max_idx;
157 }
158 
159 static void free_fixed_resources(struct mlx5_vdpa_net *ndev);
160 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev);
161 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled);
162 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev);
163 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq);
164 
165 static bool mlx5_vdpa_debug;
166 
167 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
168 	do {                                                                                       \
169 		if (features & BIT_ULL(_feature))                                                  \
170 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
171 	} while (0)
172 
173 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
174 	do {                                                                                       \
175 		if (status & (_status))                                                            \
176 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
177 	} while (0)
178 
179 /* TODO: cross-endian support */
180 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
181 {
182 	return virtio_legacy_is_little_endian() ||
183 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
184 }
185 
186 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
187 {
188 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
189 }
190 
191 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
192 {
193 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
194 }
195 
196 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
197 {
198 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
199 		return 2;
200 
201 	return mvdev->max_vqs;
202 }
203 
204 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
205 {
206 	return idx == ctrl_vq_idx(mvdev);
207 }
208 
209 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
210 {
211 	if (status & ~VALID_STATUS_MASK)
212 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
213 			       status & ~VALID_STATUS_MASK);
214 
215 	if (!mlx5_vdpa_debug)
216 		return;
217 
218 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
219 	if (set && !status) {
220 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
221 		return;
222 	}
223 
224 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
225 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
226 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
227 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
228 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
229 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
230 }
231 
232 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
233 {
234 	if (features & ~VALID_FEATURES_MASK)
235 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
236 			       features & ~VALID_FEATURES_MASK);
237 
238 	if (!mlx5_vdpa_debug)
239 		return;
240 
241 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
242 	if (!features)
243 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
244 
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
262 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
263 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
264 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
265 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
266 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
267 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
279 }
280 
281 static int create_tis(struct mlx5_vdpa_net *ndev)
282 {
283 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
284 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
285 	void *tisc;
286 	int err;
287 
288 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
289 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
290 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
291 	if (err)
292 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
293 
294 	return err;
295 }
296 
297 static void destroy_tis(struct mlx5_vdpa_net *ndev)
298 {
299 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
300 }
301 
302 #define MLX5_VDPA_CQE_SIZE 64
303 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
304 
305 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
306 {
307 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
308 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
309 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
310 	int err;
311 
312 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
313 				       ndev->mvdev.mdev->priv.numa_node);
314 	if (err)
315 		return err;
316 
317 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
318 
319 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
320 	buf->nent = nent;
321 
322 	return 0;
323 }
324 
325 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
326 {
327 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
328 
329 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
330 					ndev->mvdev.mdev->priv.numa_node);
331 }
332 
333 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
334 {
335 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
336 }
337 
338 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
339 {
340 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
341 }
342 
343 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
344 {
345 	struct mlx5_cqe64 *cqe64;
346 	void *cqe;
347 	int i;
348 
349 	for (i = 0; i < buf->nent; i++) {
350 		cqe = get_cqe(vcq, i);
351 		cqe64 = cqe;
352 		cqe64->op_own = MLX5_CQE_INVALID << 4;
353 	}
354 }
355 
356 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
357 {
358 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
359 
360 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
361 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
362 		return cqe64;
363 
364 	return NULL;
365 }
366 
367 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
368 {
369 	vqp->head += n;
370 	vqp->db.db[0] = cpu_to_be32(vqp->head);
371 }
372 
373 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
374 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
375 {
376 	struct mlx5_vdpa_qp *vqp;
377 	__be64 *pas;
378 	void *qpc;
379 
380 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
381 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
382 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
383 	if (vqp->fw) {
384 		/* Firmware QP is allocated by the driver for the firmware's
385 		 * use so we can skip part of the params as they will be chosen by firmware
386 		 */
387 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
388 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
389 		MLX5_SET(qpc, qpc, no_sq, 1);
390 		return;
391 	}
392 
393 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
394 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
395 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
396 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
397 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
398 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
399 	MLX5_SET(qpc, qpc, no_sq, 1);
400 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
401 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
402 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
403 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
404 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
405 }
406 
407 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
408 {
409 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
410 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
411 					ndev->mvdev.mdev->priv.numa_node);
412 }
413 
414 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
415 {
416 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
417 }
418 
419 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
420 		     struct mlx5_vdpa_qp *vqp)
421 {
422 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
423 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
424 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
425 	void *qpc;
426 	void *in;
427 	int err;
428 
429 	if (!vqp->fw) {
430 		vqp = &mvq->vqqp;
431 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
432 		if (err)
433 			return err;
434 
435 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
436 		if (err)
437 			goto err_db;
438 		inlen += vqp->frag_buf.npages * sizeof(__be64);
439 	}
440 
441 	in = kzalloc(inlen, GFP_KERNEL);
442 	if (!in) {
443 		err = -ENOMEM;
444 		goto err_kzalloc;
445 	}
446 
447 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
448 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
449 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
450 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
451 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
452 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
453 	if (!vqp->fw)
454 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
455 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
456 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
457 	kfree(in);
458 	if (err)
459 		goto err_kzalloc;
460 
461 	vqp->mqp.uid = ndev->mvdev.res.uid;
462 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
463 
464 	if (!vqp->fw)
465 		rx_post(vqp, mvq->num_ent);
466 
467 	return 0;
468 
469 err_kzalloc:
470 	if (!vqp->fw)
471 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
472 err_db:
473 	if (!vqp->fw)
474 		rq_buf_free(ndev, vqp);
475 
476 	return err;
477 }
478 
479 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
480 {
481 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
482 
483 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
484 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
485 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
486 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
487 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
488 	if (!vqp->fw) {
489 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
490 		rq_buf_free(ndev, vqp);
491 	}
492 }
493 
494 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
495 {
496 	return get_sw_cqe(cq, cq->mcq.cons_index);
497 }
498 
499 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
500 {
501 	struct mlx5_cqe64 *cqe64;
502 
503 	cqe64 = next_cqe_sw(vcq);
504 	if (!cqe64)
505 		return -EAGAIN;
506 
507 	vcq->mcq.cons_index++;
508 	return 0;
509 }
510 
511 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
512 {
513 	struct mlx5_vdpa_net *ndev = mvq->ndev;
514 	struct vdpa_callback *event_cb;
515 
516 	event_cb = &ndev->event_cbs[mvq->index];
517 	mlx5_cq_set_ci(&mvq->cq.mcq);
518 
519 	/* make sure CQ cosumer update is visible to the hardware before updating
520 	 * RX doorbell record.
521 	 */
522 	dma_wmb();
523 	rx_post(&mvq->vqqp, num);
524 	if (event_cb->callback)
525 		event_cb->callback(event_cb->private);
526 }
527 
528 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
529 {
530 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
531 	struct mlx5_vdpa_net *ndev = mvq->ndev;
532 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
533 	int num = 0;
534 
535 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
536 		num++;
537 		if (num > mvq->num_ent / 2) {
538 			/* If completions keep coming while we poll, we want to
539 			 * let the hardware know that we consumed them by
540 			 * updating the doorbell record.  We also let vdpa core
541 			 * know about this so it passes it on the virtio driver
542 			 * on the guest.
543 			 */
544 			mlx5_vdpa_handle_completions(mvq, num);
545 			num = 0;
546 		}
547 	}
548 
549 	if (num)
550 		mlx5_vdpa_handle_completions(mvq, num);
551 
552 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
553 }
554 
555 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
556 {
557 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
558 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
559 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
560 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
561 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
562 	__be64 *pas;
563 	int inlen;
564 	void *cqc;
565 	void *in;
566 	int err;
567 	int eqn;
568 
569 	err = mlx5_db_alloc(mdev, &vcq->db);
570 	if (err)
571 		return err;
572 
573 	vcq->mcq.set_ci_db = vcq->db.db;
574 	vcq->mcq.arm_db = vcq->db.db + 1;
575 	vcq->mcq.cqe_sz = 64;
576 
577 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
578 	if (err)
579 		goto err_db;
580 
581 	cq_frag_buf_init(vcq, &vcq->buf);
582 
583 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
584 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
585 	in = kzalloc(inlen, GFP_KERNEL);
586 	if (!in) {
587 		err = -ENOMEM;
588 		goto err_vzalloc;
589 	}
590 
591 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
592 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
593 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
594 
595 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
596 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
597 
598 	/* Use vector 0 by default. Consider adding code to choose least used
599 	 * vector.
600 	 */
601 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
602 	if (err)
603 		goto err_vec;
604 
605 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
606 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
607 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
608 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
609 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
610 
611 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
612 	if (err)
613 		goto err_vec;
614 
615 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
616 	vcq->cqe = num_ent;
617 	vcq->mcq.set_ci_db = vcq->db.db;
618 	vcq->mcq.arm_db = vcq->db.db + 1;
619 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
620 	kfree(in);
621 	return 0;
622 
623 err_vec:
624 	kfree(in);
625 err_vzalloc:
626 	cq_frag_buf_free(ndev, &vcq->buf);
627 err_db:
628 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
629 	return err;
630 }
631 
632 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
633 {
634 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
635 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
636 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
637 
638 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
639 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
640 		return;
641 	}
642 	cq_frag_buf_free(ndev, &vcq->buf);
643 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
644 }
645 
646 static int read_umem_params(struct mlx5_vdpa_net *ndev)
647 {
648 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
649 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
650 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
651 	int out_size;
652 	void *caps;
653 	void *out;
654 	int err;
655 
656 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
657 	out = kzalloc(out_size, GFP_KERNEL);
658 	if (!out)
659 		return -ENOMEM;
660 
661 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
662 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
663 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
664 	if (err) {
665 		mlx5_vdpa_warn(&ndev->mvdev,
666 			"Failed reading vdpa umem capabilities with err %d\n", err);
667 		goto out;
668 	}
669 
670 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
671 
672 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
673 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
674 
675 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
676 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
677 
678 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
679 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
680 
681 out:
682 	kfree(out);
683 	return 0;
684 }
685 
686 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
687 			  struct mlx5_vdpa_umem **umemp)
688 {
689 	u32 p_a;
690 	u32 p_b;
691 
692 	switch (num) {
693 	case 1:
694 		p_a = ndev->umem_1_buffer_param_a;
695 		p_b = ndev->umem_1_buffer_param_b;
696 		*umemp = &mvq->umem1;
697 		break;
698 	case 2:
699 		p_a = ndev->umem_2_buffer_param_a;
700 		p_b = ndev->umem_2_buffer_param_b;
701 		*umemp = &mvq->umem2;
702 		break;
703 	case 3:
704 		p_a = ndev->umem_3_buffer_param_a;
705 		p_b = ndev->umem_3_buffer_param_b;
706 		*umemp = &mvq->umem3;
707 		break;
708 	}
709 
710 	(*umemp)->size = p_a * mvq->num_ent + p_b;
711 }
712 
713 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
714 {
715 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
716 }
717 
718 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
719 {
720 	int inlen;
721 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
722 	void *um;
723 	void *in;
724 	int err;
725 	__be64 *pas;
726 	struct mlx5_vdpa_umem *umem;
727 
728 	set_umem_size(ndev, mvq, num, &umem);
729 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
730 	if (err)
731 		return err;
732 
733 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
734 
735 	in = kzalloc(inlen, GFP_KERNEL);
736 	if (!in) {
737 		err = -ENOMEM;
738 		goto err_in;
739 	}
740 
741 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
742 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
743 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
744 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
745 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
746 
747 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
748 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
749 
750 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
751 	if (err) {
752 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
753 		goto err_cmd;
754 	}
755 
756 	kfree(in);
757 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
758 
759 	return 0;
760 
761 err_cmd:
762 	kfree(in);
763 err_in:
764 	umem_frag_buf_free(ndev, umem);
765 	return err;
766 }
767 
768 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
769 {
770 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
771 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
772 	struct mlx5_vdpa_umem *umem;
773 
774 	switch (num) {
775 	case 1:
776 		umem = &mvq->umem1;
777 		break;
778 	case 2:
779 		umem = &mvq->umem2;
780 		break;
781 	case 3:
782 		umem = &mvq->umem3;
783 		break;
784 	}
785 
786 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
787 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
788 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
789 		return;
790 
791 	umem_frag_buf_free(ndev, umem);
792 }
793 
794 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
795 {
796 	int num;
797 	int err;
798 
799 	for (num = 1; num <= 3; num++) {
800 		err = create_umem(ndev, mvq, num);
801 		if (err)
802 			goto err_umem;
803 	}
804 	return 0;
805 
806 err_umem:
807 	for (num--; num > 0; num--)
808 		umem_destroy(ndev, mvq, num);
809 
810 	return err;
811 }
812 
813 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
814 {
815 	int num;
816 
817 	for (num = 3; num > 0; num--)
818 		umem_destroy(ndev, mvq, num);
819 }
820 
821 static int get_queue_type(struct mlx5_vdpa_net *ndev)
822 {
823 	u32 type_mask;
824 
825 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
826 
827 	/* prefer split queue */
828 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
829 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
830 
831 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
832 
833 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
834 }
835 
836 static bool vq_is_tx(u16 idx)
837 {
838 	return idx % 2;
839 }
840 
841 enum {
842 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
843 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
844 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
845 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
846 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
847 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
848 	MLX5_VIRTIO_NET_F_CSUM = 10,
849 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
850 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
851 };
852 
853 static u16 get_features(u64 features)
854 {
855 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
856 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
857 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
858 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
859 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
860 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
861 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
862 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
863 }
864 
865 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
866 {
867 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
868 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
869 }
870 
871 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
872 {
873 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
874 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
875 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
876 }
877 
878 static int create_virtqueue(struct mlx5_vdpa_net *ndev,
879 			    struct mlx5_vdpa_virtqueue *mvq,
880 			    bool filled)
881 {
882 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
883 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
884 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
885 	struct mlx5_vdpa_mr *vq_mr;
886 	struct mlx5_vdpa_mr *vq_desc_mr;
887 	u64 features = filled ? mvdev->actual_features : mvdev->mlx_features;
888 	void *obj_context;
889 	u16 mlx_features;
890 	void *cmd_hdr;
891 	void *vq_ctx;
892 	void *in;
893 	int err;
894 
895 	err = umems_create(ndev, mvq);
896 	if (err)
897 		return err;
898 
899 	in = kzalloc(inlen, GFP_KERNEL);
900 	if (!in) {
901 		err = -ENOMEM;
902 		goto err_alloc;
903 	}
904 
905 	mlx_features = get_features(features);
906 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
907 
908 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
909 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
910 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
911 
912 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
913 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
914 		 mlx_features >> 3);
915 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
916 		 mlx_features & 7);
917 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
918 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
919 
920 	if (vq_is_tx(mvq->index))
921 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
922 
923 	if (mvq->map.virq) {
924 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
925 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
926 	} else {
927 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
928 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
929 	}
930 
931 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
932 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
933 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
934 		 !!(features & BIT_ULL(VIRTIO_F_VERSION_1)));
935 
936 	if (filled) {
937 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
938 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
939 
940 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
941 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
942 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
943 
944 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
945 		if (vq_mr)
946 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
947 
948 		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
949 		if (vq_desc_mr &&
950 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
951 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
952 	} else {
953 		/* If there is no mr update, make sure that the existing ones are set
954 		 * modify to ready.
955 		 */
956 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
957 		if (vq_mr)
958 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
959 
960 		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
961 		if (vq_desc_mr)
962 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
963 	}
964 
965 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
966 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
967 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
968 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
969 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
970 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
971 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
972 	if (counters_supported(&ndev->mvdev))
973 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
974 
975 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
976 	if (err)
977 		goto err_cmd;
978 
979 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
980 	kfree(in);
981 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
982 
983 	if (filled) {
984 		mlx5_vdpa_get_mr(mvdev, vq_mr);
985 		mvq->vq_mr = vq_mr;
986 
987 		if (vq_desc_mr &&
988 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
989 			mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
990 			mvq->desc_mr = vq_desc_mr;
991 		}
992 	}
993 
994 	return 0;
995 
996 err_cmd:
997 	kfree(in);
998 err_alloc:
999 	umems_destroy(ndev, mvq);
1000 	return err;
1001 }
1002 
1003 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1004 {
1005 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
1006 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
1007 
1008 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
1009 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1010 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
1011 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
1012 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
1013 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1014 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
1015 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
1016 		return;
1017 	}
1018 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
1019 	umems_destroy(ndev, mvq);
1020 
1021 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
1022 	mvq->vq_mr = NULL;
1023 
1024 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
1025 	mvq->desc_mr = NULL;
1026 }
1027 
1028 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1029 {
1030 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
1031 }
1032 
1033 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1034 {
1035 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1036 }
1037 
1038 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1039 			int *outlen, u32 qpn, u32 rqpn)
1040 {
1041 	void *qpc;
1042 	void *pp;
1043 
1044 	switch (cmd) {
1045 	case MLX5_CMD_OP_2RST_QP:
1046 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1047 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1048 		*in = kzalloc(*inlen, GFP_KERNEL);
1049 		*out = kzalloc(*outlen, GFP_KERNEL);
1050 		if (!*in || !*out)
1051 			goto outerr;
1052 
1053 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1054 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1055 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1056 		break;
1057 	case MLX5_CMD_OP_RST2INIT_QP:
1058 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1059 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1060 		*in = kzalloc(*inlen, GFP_KERNEL);
1061 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1062 		if (!*in || !*out)
1063 			goto outerr;
1064 
1065 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1066 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1067 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1068 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1069 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1070 		MLX5_SET(qpc, qpc, rwe, 1);
1071 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1072 		MLX5_SET(ads, pp, vhca_port_num, 1);
1073 		break;
1074 	case MLX5_CMD_OP_INIT2RTR_QP:
1075 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1076 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1077 		*in = kzalloc(*inlen, GFP_KERNEL);
1078 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1079 		if (!*in || !*out)
1080 			goto outerr;
1081 
1082 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1083 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1084 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1085 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1086 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1087 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1088 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1089 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1090 		MLX5_SET(ads, pp, fl, 1);
1091 		break;
1092 	case MLX5_CMD_OP_RTR2RTS_QP:
1093 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1094 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1095 		*in = kzalloc(*inlen, GFP_KERNEL);
1096 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1097 		if (!*in || !*out)
1098 			goto outerr;
1099 
1100 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1101 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1102 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1103 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1104 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1105 		MLX5_SET(ads, pp, ack_timeout, 14);
1106 		MLX5_SET(qpc, qpc, retry_count, 7);
1107 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1108 		break;
1109 	default:
1110 		goto outerr_nullify;
1111 	}
1112 
1113 	return;
1114 
1115 outerr:
1116 	kfree(*in);
1117 	kfree(*out);
1118 outerr_nullify:
1119 	*in = NULL;
1120 	*out = NULL;
1121 }
1122 
1123 static void free_inout(void *in, void *out)
1124 {
1125 	kfree(in);
1126 	kfree(out);
1127 }
1128 
1129 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1130  * firmware. The fw argument indicates whether the subjected QP is the one used
1131  * by firmware.
1132  */
1133 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1134 {
1135 	int outlen;
1136 	int inlen;
1137 	void *out;
1138 	void *in;
1139 	int err;
1140 
1141 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1142 	if (!in || !out)
1143 		return -ENOMEM;
1144 
1145 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1146 	free_inout(in, out);
1147 	return err;
1148 }
1149 
1150 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1151 {
1152 	int err;
1153 
1154 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1155 	if (err)
1156 		return err;
1157 
1158 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1159 	if (err)
1160 		return err;
1161 
1162 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1163 	if (err)
1164 		return err;
1165 
1166 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1167 	if (err)
1168 		return err;
1169 
1170 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1171 	if (err)
1172 		return err;
1173 
1174 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1175 	if (err)
1176 		return err;
1177 
1178 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1179 }
1180 
1181 struct mlx5_virtq_attr {
1182 	u8 state;
1183 	u16 available_index;
1184 	u16 used_index;
1185 };
1186 
1187 struct mlx5_virtqueue_query_mem {
1188 	u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)];
1189 	u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)];
1190 };
1191 
1192 struct mlx5_virtqueue_modify_mem {
1193 	u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)];
1194 	u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)];
1195 };
1196 
1197 static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
1198 				     struct mlx5_vdpa_virtqueue *mvq,
1199 				     struct mlx5_virtqueue_query_mem *cmd)
1200 {
1201 	void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
1202 
1203 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1204 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1205 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1206 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1207 }
1208 
1209 static void query_virtqueue_end(struct mlx5_vdpa_net *ndev,
1210 				struct mlx5_virtqueue_query_mem *cmd,
1211 				struct mlx5_virtq_attr *attr)
1212 {
1213 	void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context);
1214 
1215 	memset(attr, 0, sizeof(*attr));
1216 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1217 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1218 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1219 }
1220 
1221 static int query_virtqueues(struct mlx5_vdpa_net *ndev,
1222 			    int start_vq,
1223 			    int num_vqs,
1224 			    struct mlx5_virtq_attr *attrs)
1225 {
1226 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1227 	struct mlx5_virtqueue_query_mem *cmd_mem;
1228 	struct mlx5_vdpa_async_cmd *cmds;
1229 	int err = 0;
1230 
1231 	WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n",
1232 	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
1233 
1234 	cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
1235 	cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
1236 	if (!cmds || !cmd_mem) {
1237 		err = -ENOMEM;
1238 		goto done;
1239 	}
1240 
1241 	for (int i = 0; i < num_vqs; i++) {
1242 		cmds[i].in = &cmd_mem[i].in;
1243 		cmds[i].inlen = sizeof(cmd_mem[i].in);
1244 		cmds[i].out = &cmd_mem[i].out;
1245 		cmds[i].outlen = sizeof(cmd_mem[i].out);
1246 		fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]);
1247 	}
1248 
1249 	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
1250 	if (err) {
1251 		mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n",
1252 			      start_vq, start_vq + num_vqs, err);
1253 		goto done;
1254 	}
1255 
1256 	for (int i = 0; i < num_vqs; i++) {
1257 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1258 		int vq_idx = start_vq + i;
1259 
1260 		if (cmd->err) {
1261 			mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, err);
1262 			if (!err)
1263 				err = cmd->err;
1264 			continue;
1265 		}
1266 
1267 		query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]);
1268 	}
1269 
1270 done:
1271 	kvfree(cmd_mem);
1272 	kvfree(cmds);
1273 	return err;
1274 }
1275 
1276 static bool is_resumable(struct mlx5_vdpa_net *ndev)
1277 {
1278 	return ndev->mvdev.vdev.config->resume;
1279 }
1280 
1281 static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1282 {
1283 	switch (oldstate) {
1284 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1285 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1286 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1287 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1288 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1289 		return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1290 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1291 	default:
1292 		return false;
1293 	}
1294 }
1295 
1296 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1297 {
1298 	/* Only state is always modifiable */
1299 	if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1300 		return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1301 		       mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1302 
1303 	return true;
1304 }
1305 
1306 static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
1307 				      struct mlx5_vdpa_virtqueue *mvq,
1308 				      int state,
1309 				      struct mlx5_virtqueue_modify_mem *cmd)
1310 {
1311 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1312 	struct mlx5_vdpa_mr *desc_mr = NULL;
1313 	struct mlx5_vdpa_mr *vq_mr = NULL;
1314 	void *obj_context;
1315 	void *cmd_hdr;
1316 	void *vq_ctx;
1317 
1318 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
1319 
1320 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1321 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1322 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1323 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1324 
1325 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context);
1326 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1327 
1328 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
1329 		MLX5_SET(virtio_net_q_object, obj_context, state, state);
1330 
1331 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1332 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1333 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1334 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1335 	}
1336 
1337 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1338 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1339 
1340 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1341 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1342 
1343 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION)
1344 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
1345 			!!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
1346 
1347 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) {
1348 		u16 mlx_features = get_features(ndev->mvdev.actual_features);
1349 
1350 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
1351 			 mlx_features >> 3);
1352 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
1353 			 mlx_features & 7);
1354 	}
1355 
1356 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1357 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1358 
1359 		if (vq_mr)
1360 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1361 		else
1362 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1363 	}
1364 
1365 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1366 		desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1367 
1368 		if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1369 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1370 		else
1371 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1372 	}
1373 
1374 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1375 }
1376 
1377 static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev,
1378 				 struct mlx5_vdpa_virtqueue *mvq,
1379 				 int state)
1380 {
1381 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1382 
1383 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1384 		unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP];
1385 		struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid];
1386 
1387 		mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1388 		mlx5_vdpa_get_mr(mvdev, vq_mr);
1389 		mvq->vq_mr = vq_mr;
1390 	}
1391 
1392 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1393 		unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP];
1394 		struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid];
1395 
1396 		mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1397 		mlx5_vdpa_get_mr(mvdev, desc_mr);
1398 		mvq->desc_mr = desc_mr;
1399 	}
1400 
1401 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
1402 		mvq->fw_state = state;
1403 
1404 	mvq->modified_fields = 0;
1405 }
1406 
1407 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1408 {
1409 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1410 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1411 	void *cmd_hdr;
1412 	int err;
1413 
1414 	if (!counters_supported(&ndev->mvdev))
1415 		return 0;
1416 
1417 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1418 
1419 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1420 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1421 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1422 
1423 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1424 	if (err)
1425 		return err;
1426 
1427 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1428 
1429 	return 0;
1430 }
1431 
1432 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1433 {
1434 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1435 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1436 
1437 	if (!counters_supported(&ndev->mvdev))
1438 		return;
1439 
1440 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1441 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1442 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1443 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1444 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1445 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1446 }
1447 
1448 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1449 {
1450 	struct vdpa_callback *cb = priv;
1451 
1452 	if (cb->callback)
1453 		return cb->callback(cb->private);
1454 
1455 	return IRQ_HANDLED;
1456 }
1457 
1458 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1459 			 struct mlx5_vdpa_virtqueue *mvq)
1460 {
1461 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1462 	struct mlx5_vdpa_irq_pool_entry *ent;
1463 	int err;
1464 	int i;
1465 
1466 	for (i = 0; i < irqp->num_ent; i++) {
1467 		ent = &irqp->entries[i];
1468 		if (!ent->used) {
1469 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1470 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1471 			ent->dev_id = &ndev->event_cbs[mvq->index];
1472 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1473 					  ent->name, ent->dev_id);
1474 			if (err)
1475 				return;
1476 
1477 			ent->used = true;
1478 			mvq->map = ent->map;
1479 			return;
1480 		}
1481 	}
1482 }
1483 
1484 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1485 			   struct mlx5_vdpa_virtqueue *mvq)
1486 {
1487 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1488 	int i;
1489 
1490 	for (i = 0; i < irqp->num_ent; i++)
1491 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1492 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1493 			irqp->entries[i].used = false;
1494 			return;
1495 		}
1496 }
1497 
1498 static int setup_vq(struct mlx5_vdpa_net *ndev,
1499 		    struct mlx5_vdpa_virtqueue *mvq,
1500 		    bool filled)
1501 {
1502 	u16 idx = mvq->index;
1503 	int err;
1504 
1505 	if (mvq->initialized)
1506 		return 0;
1507 
1508 	err = cq_create(ndev, idx, mvq->num_ent);
1509 	if (err)
1510 		return err;
1511 
1512 	err = qp_create(ndev, mvq, &mvq->fwqp);
1513 	if (err)
1514 		goto err_fwqp;
1515 
1516 	err = qp_create(ndev, mvq, &mvq->vqqp);
1517 	if (err)
1518 		goto err_vqqp;
1519 
1520 	err = connect_qps(ndev, mvq);
1521 	if (err)
1522 		goto err_connect;
1523 
1524 	err = counter_set_alloc(ndev, mvq);
1525 	if (err)
1526 		goto err_connect;
1527 
1528 	alloc_vector(ndev, mvq);
1529 	err = create_virtqueue(ndev, mvq, filled);
1530 	if (err)
1531 		goto err_vq;
1532 
1533 	mvq->initialized = true;
1534 
1535 	if (mvq->ready) {
1536 		err = resume_vq(ndev, mvq);
1537 		if (err)
1538 			goto err_modify;
1539 	}
1540 
1541 	return 0;
1542 
1543 err_modify:
1544 	destroy_virtqueue(ndev, mvq);
1545 err_vq:
1546 	dealloc_vector(ndev, mvq);
1547 	counter_set_dealloc(ndev, mvq);
1548 err_connect:
1549 	qp_destroy(ndev, &mvq->vqqp);
1550 err_vqqp:
1551 	qp_destroy(ndev, &mvq->fwqp);
1552 err_fwqp:
1553 	cq_destroy(ndev, idx);
1554 	return err;
1555 }
1556 
1557 static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state)
1558 {
1559 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1560 	struct mlx5_virtqueue_modify_mem *cmd_mem;
1561 	struct mlx5_vdpa_async_cmd *cmds;
1562 	int err = 0;
1563 
1564 	WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n",
1565 	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
1566 
1567 	cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
1568 	cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
1569 	if (!cmds || !cmd_mem) {
1570 		err = -ENOMEM;
1571 		goto done;
1572 	}
1573 
1574 	for (int i = 0; i < num_vqs; i++) {
1575 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1576 		struct mlx5_vdpa_virtqueue *mvq;
1577 		int vq_idx = start_vq + i;
1578 
1579 		mvq = &ndev->vqs[vq_idx];
1580 
1581 		if (!modifiable_virtqueue_fields(mvq)) {
1582 			err = -EINVAL;
1583 			goto done;
1584 		}
1585 
1586 		if (mvq->fw_state != state) {
1587 			if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1588 				err = -EINVAL;
1589 				goto done;
1590 			}
1591 
1592 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1593 		}
1594 
1595 		cmd->in = &cmd_mem[i].in;
1596 		cmd->inlen = sizeof(cmd_mem[i].in);
1597 		cmd->out = &cmd_mem[i].out;
1598 		cmd->outlen = sizeof(cmd_mem[i].out);
1599 		fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]);
1600 	}
1601 
1602 	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
1603 	if (err) {
1604 		mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n",
1605 			      start_vq, start_vq + num_vqs);
1606 		goto done;
1607 	}
1608 
1609 	for (int i = 0; i < num_vqs; i++) {
1610 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1611 		struct mlx5_vdpa_virtqueue *mvq;
1612 		int vq_idx = start_vq + i;
1613 
1614 		mvq = &ndev->vqs[vq_idx];
1615 
1616 		if (cmd->err) {
1617 			mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n",
1618 				      vq_idx, mvq->fw_state, state, err);
1619 			if (!err)
1620 				err = cmd->err;
1621 			continue;
1622 		}
1623 
1624 		modify_virtqueue_end(ndev, mvq, state);
1625 	}
1626 
1627 done:
1628 	kvfree(cmd_mem);
1629 	kvfree(cmds);
1630 	return err;
1631 }
1632 
1633 static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
1634 {
1635 	struct mlx5_vdpa_virtqueue *mvq;
1636 	struct mlx5_virtq_attr *attrs;
1637 	int vq_idx, i;
1638 	int err;
1639 
1640 	if (start_vq >= ndev->cur_num_vqs)
1641 		return -EINVAL;
1642 
1643 	mvq = &ndev->vqs[start_vq];
1644 	if (!mvq->initialized)
1645 		return 0;
1646 
1647 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1648 		return 0;
1649 
1650 	err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND);
1651 	if (err)
1652 		return err;
1653 
1654 	attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL);
1655 	if (!attrs)
1656 		return -ENOMEM;
1657 
1658 	err = query_virtqueues(ndev, start_vq, num_vqs, attrs);
1659 	if (err)
1660 		goto done;
1661 
1662 	for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) {
1663 		mvq = &ndev->vqs[vq_idx];
1664 		mvq->avail_idx = attrs[i].available_index;
1665 		mvq->used_idx = attrs[i].used_index;
1666 	}
1667 
1668 done:
1669 	kfree(attrs);
1670 	return err;
1671 }
1672 
1673 static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1674 {
1675 	return suspend_vqs(ndev, mvq->index, 1);
1676 }
1677 
1678 static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
1679 {
1680 	struct mlx5_vdpa_virtqueue *mvq;
1681 	int err;
1682 
1683 	if (start_vq >= ndev->mvdev.max_vqs)
1684 		return -EINVAL;
1685 
1686 	mvq = &ndev->vqs[start_vq];
1687 	if (!mvq->initialized)
1688 		return 0;
1689 
1690 	if (mvq->index >= ndev->cur_num_vqs)
1691 		return 0;
1692 
1693 	switch (mvq->fw_state) {
1694 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1695 		/* Due to a FW quirk we need to modify the VQ fields first then change state.
1696 		 * This should be fixed soon. After that, a single command can be used.
1697 		 */
1698 		err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state);
1699 		if (err)
1700 			return err;
1701 		break;
1702 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1703 		if (!is_resumable(ndev)) {
1704 			mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index);
1705 			return -EINVAL;
1706 		}
1707 		break;
1708 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1709 		return 0;
1710 	default:
1711 		mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n",
1712 			       mvq->index, mvq->fw_state);
1713 		return -EINVAL;
1714 	}
1715 
1716 	return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1717 }
1718 
1719 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1720 {
1721 	return resume_vqs(ndev, mvq->index, 1);
1722 }
1723 
1724 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1725 {
1726 	if (!mvq->initialized)
1727 		return;
1728 
1729 	suspend_vq(ndev, mvq);
1730 	mvq->modified_fields = 0;
1731 	destroy_virtqueue(ndev, mvq);
1732 	dealloc_vector(ndev, mvq);
1733 	counter_set_dealloc(ndev, mvq);
1734 	qp_destroy(ndev, &mvq->vqqp);
1735 	qp_destroy(ndev, &mvq->fwqp);
1736 	cq_destroy(ndev, mvq->index);
1737 	mvq->initialized = false;
1738 }
1739 
1740 static int create_rqt(struct mlx5_vdpa_net *ndev)
1741 {
1742 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1743 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1744 	__be32 *list;
1745 	void *rqtc;
1746 	int inlen;
1747 	void *in;
1748 	int i, j;
1749 	int err;
1750 
1751 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1752 	in = kzalloc(inlen, GFP_KERNEL);
1753 	if (!in)
1754 		return -ENOMEM;
1755 
1756 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1757 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1758 
1759 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1760 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1761 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1762 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1763 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1764 
1765 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1766 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1767 	kfree(in);
1768 	if (err)
1769 		return err;
1770 
1771 	return 0;
1772 }
1773 
1774 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1775 
1776 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1777 {
1778 	int act_sz = roundup_pow_of_two(num / 2);
1779 	__be32 *list;
1780 	void *rqtc;
1781 	int inlen;
1782 	void *in;
1783 	int i, j;
1784 	int err;
1785 
1786 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1787 	in = kzalloc(inlen, GFP_KERNEL);
1788 	if (!in)
1789 		return -ENOMEM;
1790 
1791 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1792 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1793 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1794 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1795 
1796 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1797 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1798 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1799 
1800 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1801 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1802 	kfree(in);
1803 	if (err)
1804 		return err;
1805 
1806 	return 0;
1807 }
1808 
1809 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1810 {
1811 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1812 }
1813 
1814 static int create_tir(struct mlx5_vdpa_net *ndev)
1815 {
1816 #define HASH_IP_L4PORTS                                                                            \
1817 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1818 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1819 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1820 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1821 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1822 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1823 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1824 	void *rss_key;
1825 	void *outer;
1826 	void *tirc;
1827 	void *in;
1828 	int err;
1829 
1830 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1831 	if (!in)
1832 		return -ENOMEM;
1833 
1834 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1835 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1836 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1837 
1838 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1839 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1840 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1841 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1842 
1843 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1844 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1845 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1846 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1847 
1848 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1849 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1850 
1851 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1852 	kfree(in);
1853 	if (err)
1854 		return err;
1855 
1856 	mlx5_vdpa_add_tirn(ndev);
1857 	return err;
1858 }
1859 
1860 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1861 {
1862 	mlx5_vdpa_remove_tirn(ndev);
1863 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1864 }
1865 
1866 #define MAX_STEERING_ENT 0x8000
1867 #define MAX_STEERING_GROUPS 2
1868 
1869 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1870        #define NUM_DESTS 2
1871 #else
1872        #define NUM_DESTS 1
1873 #endif
1874 
1875 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1876 				 struct macvlan_node *node,
1877 				 struct mlx5_flow_act *flow_act,
1878 				 struct mlx5_flow_destination *dests)
1879 {
1880 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1881 	int err;
1882 
1883 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1884 	if (IS_ERR(node->ucast_counter.counter))
1885 		return PTR_ERR(node->ucast_counter.counter);
1886 
1887 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1888 	if (IS_ERR(node->mcast_counter.counter)) {
1889 		err = PTR_ERR(node->mcast_counter.counter);
1890 		goto err_mcast_counter;
1891 	}
1892 
1893 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1894 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1895 	return 0;
1896 
1897 err_mcast_counter:
1898 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1899 	return err;
1900 #else
1901 	return 0;
1902 #endif
1903 }
1904 
1905 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1906 				     struct macvlan_node *node)
1907 {
1908 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1909 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1910 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1911 #endif
1912 }
1913 
1914 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1915 					struct macvlan_node *node)
1916 {
1917 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1918 	struct mlx5_flow_act flow_act = {};
1919 	struct mlx5_flow_spec *spec;
1920 	void *headers_c;
1921 	void *headers_v;
1922 	u8 *dmac_c;
1923 	u8 *dmac_v;
1924 	int err;
1925 	u16 vid;
1926 
1927 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1928 	if (!spec)
1929 		return -ENOMEM;
1930 
1931 	vid = key2vid(node->macvlan);
1932 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1933 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1934 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1935 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1936 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1937 	eth_broadcast_addr(dmac_c);
1938 	ether_addr_copy(dmac_v, mac);
1939 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1940 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1941 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1942 	}
1943 	if (node->tagged) {
1944 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1945 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1946 	}
1947 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1948 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1949 	dests[0].tir_num = ndev->res.tirn;
1950 	err = add_steering_counters(ndev, node, &flow_act, dests);
1951 	if (err)
1952 		goto out_free;
1953 
1954 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1955 	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1956 #endif
1957 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1958 	if (IS_ERR(node->ucast_rule)) {
1959 		err = PTR_ERR(node->ucast_rule);
1960 		goto err_ucast;
1961 	}
1962 
1963 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1964 	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1965 #endif
1966 
1967 	memset(dmac_c, 0, ETH_ALEN);
1968 	memset(dmac_v, 0, ETH_ALEN);
1969 	dmac_c[0] = 1;
1970 	dmac_v[0] = 1;
1971 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1972 	if (IS_ERR(node->mcast_rule)) {
1973 		err = PTR_ERR(node->mcast_rule);
1974 		goto err_mcast;
1975 	}
1976 	kvfree(spec);
1977 	mlx5_vdpa_add_rx_counters(ndev, node);
1978 	return 0;
1979 
1980 err_mcast:
1981 	mlx5_del_flow_rules(node->ucast_rule);
1982 err_ucast:
1983 	remove_steering_counters(ndev, node);
1984 out_free:
1985 	kvfree(spec);
1986 	return err;
1987 }
1988 
1989 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1990 					 struct macvlan_node *node)
1991 {
1992 	mlx5_vdpa_remove_rx_counters(ndev, node);
1993 	mlx5_del_flow_rules(node->ucast_rule);
1994 	mlx5_del_flow_rules(node->mcast_rule);
1995 }
1996 
1997 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1998 {
1999 	u64 val;
2000 
2001 	if (!tagged)
2002 		vlan = MLX5V_UNTAGGED;
2003 
2004 	val = (u64)vlan << 48 |
2005 	      (u64)mac[0] << 40 |
2006 	      (u64)mac[1] << 32 |
2007 	      (u64)mac[2] << 24 |
2008 	      (u64)mac[3] << 16 |
2009 	      (u64)mac[4] << 8 |
2010 	      (u64)mac[5];
2011 
2012 	return val;
2013 }
2014 
2015 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
2016 {
2017 	struct macvlan_node *pos;
2018 	u32 idx;
2019 
2020 	idx = hash_64(value, 8); // tbd 8
2021 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
2022 		if (pos->macvlan == value)
2023 			return pos;
2024 	}
2025 	return NULL;
2026 }
2027 
2028 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
2029 {
2030 	struct macvlan_node *ptr;
2031 	u64 val;
2032 	u32 idx;
2033 	int err;
2034 
2035 	val = search_val(mac, vid, tagged);
2036 	if (mac_vlan_lookup(ndev, val))
2037 		return -EEXIST;
2038 
2039 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
2040 	if (!ptr)
2041 		return -ENOMEM;
2042 
2043 	ptr->tagged = tagged;
2044 	ptr->macvlan = val;
2045 	ptr->ndev = ndev;
2046 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
2047 	if (err)
2048 		goto err_add;
2049 
2050 	idx = hash_64(val, 8);
2051 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
2052 	return 0;
2053 
2054 err_add:
2055 	kfree(ptr);
2056 	return err;
2057 }
2058 
2059 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
2060 {
2061 	struct macvlan_node *ptr;
2062 
2063 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
2064 	if (!ptr)
2065 		return;
2066 
2067 	hlist_del(&ptr->hlist);
2068 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
2069 	remove_steering_counters(ndev, ptr);
2070 	kfree(ptr);
2071 }
2072 
2073 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
2074 {
2075 	struct macvlan_node *pos;
2076 	struct hlist_node *n;
2077 	int i;
2078 
2079 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
2080 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
2081 			hlist_del(&pos->hlist);
2082 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
2083 			remove_steering_counters(ndev, pos);
2084 			kfree(pos);
2085 		}
2086 	}
2087 }
2088 
2089 static int setup_steering(struct mlx5_vdpa_net *ndev)
2090 {
2091 	struct mlx5_flow_table_attr ft_attr = {};
2092 	struct mlx5_flow_namespace *ns;
2093 	int err;
2094 
2095 	ft_attr.max_fte = MAX_STEERING_ENT;
2096 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
2097 
2098 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
2099 	if (!ns) {
2100 		mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n");
2101 		return -EOPNOTSUPP;
2102 	}
2103 
2104 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
2105 	if (IS_ERR(ndev->rxft)) {
2106 		mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n");
2107 		return PTR_ERR(ndev->rxft);
2108 	}
2109 	mlx5_vdpa_add_rx_flow_table(ndev);
2110 
2111 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
2112 	if (err)
2113 		goto err_add;
2114 
2115 	return 0;
2116 
2117 err_add:
2118 	mlx5_vdpa_remove_rx_flow_table(ndev);
2119 	mlx5_destroy_flow_table(ndev->rxft);
2120 	return err;
2121 }
2122 
2123 static void teardown_steering(struct mlx5_vdpa_net *ndev)
2124 {
2125 	clear_mac_vlan_table(ndev);
2126 	mlx5_vdpa_remove_rx_flow_table(ndev);
2127 	mlx5_destroy_flow_table(ndev->rxft);
2128 }
2129 
2130 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2131 {
2132 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2133 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2134 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2135 	struct mlx5_core_dev *pfmdev;
2136 	size_t read;
2137 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
2138 
2139 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2140 	switch (cmd) {
2141 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
2142 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
2143 		if (read != ETH_ALEN)
2144 			break;
2145 
2146 		if (!memcmp(ndev->config.mac, mac, 6)) {
2147 			status = VIRTIO_NET_OK;
2148 			break;
2149 		}
2150 
2151 		if (is_zero_ether_addr(mac))
2152 			break;
2153 
2154 		if (!is_zero_ether_addr(ndev->config.mac)) {
2155 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2156 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
2157 					       ndev->config.mac);
2158 				break;
2159 			}
2160 		}
2161 
2162 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
2163 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
2164 				       mac);
2165 			break;
2166 		}
2167 
2168 		/* backup the original mac address so that if failed to add the forward rules
2169 		 * we could restore it
2170 		 */
2171 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
2172 
2173 		memcpy(ndev->config.mac, mac, ETH_ALEN);
2174 
2175 		/* Need recreate the flow table entry, so that the packet could forward back
2176 		 */
2177 		mac_vlan_del(ndev, mac_back, 0, false);
2178 
2179 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
2180 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
2181 
2182 			/* Although it hardly run here, we still need double check */
2183 			if (is_zero_ether_addr(mac_back)) {
2184 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
2185 				break;
2186 			}
2187 
2188 			/* Try to restore original mac address to MFPS table, and try to restore
2189 			 * the forward rule entry.
2190 			 */
2191 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2192 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
2193 					       ndev->config.mac);
2194 			}
2195 
2196 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
2197 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2198 					       mac_back);
2199 			}
2200 
2201 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
2202 
2203 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2204 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2205 
2206 			break;
2207 		}
2208 
2209 		status = VIRTIO_NET_OK;
2210 		break;
2211 
2212 	default:
2213 		break;
2214 	}
2215 
2216 	return status;
2217 }
2218 
2219 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2220 {
2221 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2222 	int cur_vqs = ndev->cur_num_vqs;
2223 	int new_vqs = newqps * 2;
2224 	int err;
2225 	int i;
2226 
2227 	if (cur_vqs > new_vqs) {
2228 		err = modify_rqt(ndev, new_vqs);
2229 		if (err)
2230 			return err;
2231 
2232 		if (is_resumable(ndev)) {
2233 			suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs);
2234 		} else {
2235 			for (i = new_vqs; i < cur_vqs; i++)
2236 				teardown_vq(ndev, &ndev->vqs[i]);
2237 		}
2238 
2239 		ndev->cur_num_vqs = new_vqs;
2240 	} else {
2241 		ndev->cur_num_vqs = new_vqs;
2242 
2243 		for (i = cur_vqs; i < new_vqs; i++) {
2244 			err = setup_vq(ndev, &ndev->vqs[i], false);
2245 			if (err)
2246 				goto clean_added;
2247 		}
2248 
2249 		err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs);
2250 		if (err)
2251 			goto clean_added;
2252 
2253 		err = modify_rqt(ndev, new_vqs);
2254 		if (err)
2255 			goto clean_added;
2256 	}
2257 	return 0;
2258 
2259 clean_added:
2260 	for (--i; i >= cur_vqs; --i)
2261 		teardown_vq(ndev, &ndev->vqs[i]);
2262 
2263 	ndev->cur_num_vqs = cur_vqs;
2264 
2265 	return err;
2266 }
2267 
2268 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2269 {
2270 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2271 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2272 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2273 	struct virtio_net_ctrl_mq mq;
2274 	size_t read;
2275 	u16 newqps;
2276 
2277 	switch (cmd) {
2278 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2279 		/* This mq feature check aligns with pre-existing userspace
2280 		 * implementation.
2281 		 *
2282 		 * Without it, an untrusted driver could fake a multiqueue config
2283 		 * request down to a non-mq device that may cause kernel to
2284 		 * panic due to uninitialized resources for extra vqs. Even with
2285 		 * a well behaving guest driver, it is not expected to allow
2286 		 * changing the number of vqs on a non-mq device.
2287 		 */
2288 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2289 			break;
2290 
2291 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2292 		if (read != sizeof(mq))
2293 			break;
2294 
2295 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2296 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2297 		    newqps > ndev->rqt_size)
2298 			break;
2299 
2300 		if (ndev->cur_num_vqs == 2 * newqps) {
2301 			status = VIRTIO_NET_OK;
2302 			break;
2303 		}
2304 
2305 		if (!change_num_qps(mvdev, newqps))
2306 			status = VIRTIO_NET_OK;
2307 
2308 		break;
2309 	default:
2310 		break;
2311 	}
2312 
2313 	return status;
2314 }
2315 
2316 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2317 {
2318 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2319 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2320 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2321 	__virtio16 vlan;
2322 	size_t read;
2323 	u16 id;
2324 
2325 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2326 		return status;
2327 
2328 	switch (cmd) {
2329 	case VIRTIO_NET_CTRL_VLAN_ADD:
2330 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2331 		if (read != sizeof(vlan))
2332 			break;
2333 
2334 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2335 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2336 			break;
2337 
2338 		status = VIRTIO_NET_OK;
2339 		break;
2340 	case VIRTIO_NET_CTRL_VLAN_DEL:
2341 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2342 		if (read != sizeof(vlan))
2343 			break;
2344 
2345 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2346 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2347 		status = VIRTIO_NET_OK;
2348 		break;
2349 	default:
2350 		break;
2351 	}
2352 
2353 	return status;
2354 }
2355 
2356 static void mlx5_cvq_kick_handler(struct work_struct *work)
2357 {
2358 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2359 	struct virtio_net_ctrl_hdr ctrl;
2360 	struct mlx5_vdpa_wq_ent *wqent;
2361 	struct mlx5_vdpa_dev *mvdev;
2362 	struct mlx5_control_vq *cvq;
2363 	struct mlx5_vdpa_net *ndev;
2364 	size_t read, write;
2365 	int err;
2366 
2367 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2368 	mvdev = wqent->mvdev;
2369 	ndev = to_mlx5_vdpa_ndev(mvdev);
2370 	cvq = &mvdev->cvq;
2371 
2372 	down_write(&ndev->reslock);
2373 
2374 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2375 		goto out;
2376 
2377 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2378 		goto out;
2379 
2380 	if (!cvq->ready)
2381 		goto out;
2382 
2383 	while (true) {
2384 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2385 					   GFP_ATOMIC);
2386 		if (err <= 0)
2387 			break;
2388 
2389 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2390 		if (read != sizeof(ctrl))
2391 			break;
2392 
2393 		cvq->received_desc++;
2394 		switch (ctrl.class) {
2395 		case VIRTIO_NET_CTRL_MAC:
2396 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2397 			break;
2398 		case VIRTIO_NET_CTRL_MQ:
2399 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2400 			break;
2401 		case VIRTIO_NET_CTRL_VLAN:
2402 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2403 			break;
2404 		default:
2405 			break;
2406 		}
2407 
2408 		/* Make sure data is written before advancing index */
2409 		smp_wmb();
2410 
2411 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2412 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2413 		vringh_kiov_cleanup(&cvq->riov);
2414 		vringh_kiov_cleanup(&cvq->wiov);
2415 
2416 		if (vringh_need_notify_iotlb(&cvq->vring))
2417 			vringh_notify(&cvq->vring);
2418 
2419 		cvq->completed_desc++;
2420 		queue_work(mvdev->wq, &wqent->work);
2421 		break;
2422 	}
2423 
2424 out:
2425 	up_write(&ndev->reslock);
2426 }
2427 
2428 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2429 {
2430 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2431 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2432 	struct mlx5_vdpa_virtqueue *mvq;
2433 
2434 	if (!is_index_valid(mvdev, idx))
2435 		return;
2436 
2437 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2438 		if (!mvdev->wq || !mvdev->cvq.ready)
2439 			return;
2440 
2441 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2442 		return;
2443 	}
2444 
2445 	mvq = &ndev->vqs[idx];
2446 	if (unlikely(!mvq->ready))
2447 		return;
2448 
2449 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2450 }
2451 
2452 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2453 				    u64 driver_area, u64 device_area)
2454 {
2455 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2456 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2457 	struct mlx5_vdpa_virtqueue *mvq;
2458 
2459 	if (!is_index_valid(mvdev, idx))
2460 		return -EINVAL;
2461 
2462 	if (is_ctrl_vq_idx(mvdev, idx)) {
2463 		mvdev->cvq.desc_addr = desc_area;
2464 		mvdev->cvq.device_addr = device_area;
2465 		mvdev->cvq.driver_addr = driver_area;
2466 		return 0;
2467 	}
2468 
2469 	mvq = &ndev->vqs[idx];
2470 	mvq->desc_addr = desc_area;
2471 	mvq->device_addr = device_area;
2472 	mvq->driver_addr = driver_area;
2473 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2474 	return 0;
2475 }
2476 
2477 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2478 {
2479 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2480 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2481 	struct mlx5_vdpa_virtqueue *mvq;
2482 
2483 	if (!is_index_valid(mvdev, idx))
2484 		return;
2485 
2486         if (is_ctrl_vq_idx(mvdev, idx)) {
2487                 struct mlx5_control_vq *cvq = &mvdev->cvq;
2488 
2489                 cvq->vring.vring.num = num;
2490                 return;
2491         }
2492 
2493 	mvq = &ndev->vqs[idx];
2494 	ndev->needs_teardown = num != mvq->num_ent;
2495 	mvq->num_ent = num;
2496 }
2497 
2498 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2499 {
2500 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2501 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2502 
2503 	ndev->event_cbs[idx] = *cb;
2504 	if (is_ctrl_vq_idx(mvdev, idx))
2505 		mvdev->cvq.event_cb = *cb;
2506 }
2507 
2508 static void mlx5_cvq_notify(struct vringh *vring)
2509 {
2510 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2511 
2512 	if (!cvq->event_cb.callback)
2513 		return;
2514 
2515 	cvq->event_cb.callback(cvq->event_cb.private);
2516 }
2517 
2518 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2519 {
2520 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2521 
2522 	cvq->ready = ready;
2523 	if (!ready)
2524 		return;
2525 
2526 	cvq->vring.notify = mlx5_cvq_notify;
2527 }
2528 
2529 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2530 {
2531 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2532 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2533 	struct mlx5_vdpa_virtqueue *mvq;
2534 
2535 	if (!mvdev->actual_features)
2536 		return;
2537 
2538 	if (!is_index_valid(mvdev, idx))
2539 		return;
2540 
2541 	if (is_ctrl_vq_idx(mvdev, idx)) {
2542 		set_cvq_ready(mvdev, ready);
2543 		return;
2544 	}
2545 
2546 	mvq = &ndev->vqs[idx];
2547 	if (!ready) {
2548 		suspend_vq(ndev, mvq);
2549 	} else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
2550 		if (resume_vq(ndev, mvq))
2551 			ready = false;
2552 	}
2553 
2554 	mvq->ready = ready;
2555 }
2556 
2557 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2558 {
2559 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2560 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2561 
2562 	if (!is_index_valid(mvdev, idx))
2563 		return false;
2564 
2565 	if (is_ctrl_vq_idx(mvdev, idx))
2566 		return mvdev->cvq.ready;
2567 
2568 	return ndev->vqs[idx].ready;
2569 }
2570 
2571 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2572 				  const struct vdpa_vq_state *state)
2573 {
2574 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2575 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2576 	struct mlx5_vdpa_virtqueue *mvq;
2577 
2578 	if (!is_index_valid(mvdev, idx))
2579 		return -EINVAL;
2580 
2581 	if (is_ctrl_vq_idx(mvdev, idx)) {
2582 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2583 		return 0;
2584 	}
2585 
2586 	mvq = &ndev->vqs[idx];
2587 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2588 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2589 		return -EINVAL;
2590 	}
2591 
2592 	mvq->used_idx = state->split.avail_index;
2593 	mvq->avail_idx = state->split.avail_index;
2594 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2595 				MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2596 	return 0;
2597 }
2598 
2599 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2600 {
2601 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2602 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2603 	struct mlx5_vdpa_virtqueue *mvq;
2604 	struct mlx5_virtq_attr attr;
2605 	int err;
2606 
2607 	if (!is_index_valid(mvdev, idx))
2608 		return -EINVAL;
2609 
2610 	if (is_ctrl_vq_idx(mvdev, idx)) {
2611 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2612 		return 0;
2613 	}
2614 
2615 	mvq = &ndev->vqs[idx];
2616 	/* If the virtq object was destroyed, use the value saved at
2617 	 * the last minute of suspend_vq. This caters for userspace
2618 	 * that cares about emulating the index after vq is stopped.
2619 	 */
2620 	if (!mvq->initialized) {
2621 		/* Firmware returns a wrong value for the available index.
2622 		 * Since both values should be identical, we take the value of
2623 		 * used_idx which is reported correctly.
2624 		 */
2625 		state->split.avail_index = mvq->used_idx;
2626 		return 0;
2627 	}
2628 
2629 	err = query_virtqueues(ndev, mvq->index, 1, &attr);
2630 	if (err) {
2631 		mlx5_vdpa_err(mvdev, "failed to query virtqueue\n");
2632 		return err;
2633 	}
2634 	state->split.avail_index = attr.used_index;
2635 	return 0;
2636 }
2637 
2638 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2639 {
2640 	return PAGE_SIZE;
2641 }
2642 
2643 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2644 {
2645 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2646 
2647 	if (is_ctrl_vq_idx(mvdev, idx))
2648 		return MLX5_VDPA_CVQ_GROUP;
2649 
2650 	return MLX5_VDPA_DATAVQ_GROUP;
2651 }
2652 
2653 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2654 {
2655 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2656 
2657 	if (is_ctrl_vq_idx(mvdev, idx))
2658 		return MLX5_VDPA_CVQ_GROUP;
2659 
2660 	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2661 }
2662 
2663 static u64 mlx_to_vritio_features(u16 dev_features)
2664 {
2665 	u64 result = 0;
2666 
2667 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2668 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2669 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2670 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2671 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2672 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2673 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2674 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2675 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2676 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2677 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2678 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2679 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2680 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2681 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2682 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2683 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2684 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2685 
2686 	return result;
2687 }
2688 
2689 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2690 {
2691 	u64 mlx_vdpa_features = 0;
2692 	u16 dev_features;
2693 
2694 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2695 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2696 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2697 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2698 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2699 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2700 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2701 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2702 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2703 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2704 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2705 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2706 
2707 	return mlx_vdpa_features;
2708 }
2709 
2710 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2711 {
2712 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2713 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2714 
2715 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2716 	return ndev->mvdev.mlx_features;
2717 }
2718 
2719 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2720 {
2721 	/* Minimum features to expect */
2722 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2723 		return -EOPNOTSUPP;
2724 
2725 	/* Double check features combination sent down by the driver.
2726 	 * Fail invalid features due to absence of the depended feature.
2727 	 *
2728 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2729 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2730 	 * By failing the invalid features sent down by untrusted drivers,
2731 	 * we're assured the assumption made upon is_index_valid() and
2732 	 * is_ctrl_vq_idx() will not be compromised.
2733 	 */
2734 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2735             BIT_ULL(VIRTIO_NET_F_MQ))
2736 		return -EINVAL;
2737 
2738 	return 0;
2739 }
2740 
2741 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled)
2742 {
2743 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2744 	int err;
2745 	int i;
2746 
2747 	for (i = 0; i < mvdev->max_vqs; i++) {
2748 		err = setup_vq(ndev, &ndev->vqs[i], filled);
2749 		if (err)
2750 			goto err_vq;
2751 	}
2752 
2753 	return 0;
2754 
2755 err_vq:
2756 	for (--i; i >= 0; i--)
2757 		teardown_vq(ndev, &ndev->vqs[i]);
2758 
2759 	return err;
2760 }
2761 
2762 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2763 {
2764 	int i;
2765 
2766 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--)
2767 		teardown_vq(ndev, &ndev->vqs[i]);
2768 }
2769 
2770 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2771 {
2772 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2773 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2774 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2775 			mvdev->max_idx = mvdev->max_vqs;
2776 		} else {
2777 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2778 			 * CVQ gets index 2
2779 			 */
2780 			mvdev->max_idx = 2;
2781 		}
2782 	} else {
2783 		/* Two data virtqueues only: one for rx and one for tx */
2784 		mvdev->max_idx = 1;
2785 	}
2786 }
2787 
2788 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2789 {
2790 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2791 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2792 	int err;
2793 
2794 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2795 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2796 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2797 	if (vport)
2798 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2799 
2800 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2801 	if (err)
2802 		return 0;
2803 
2804 	return MLX5_GET(query_vport_state_out, out, state);
2805 }
2806 
2807 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2808 {
2809 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2810 	    VPORT_STATE_UP)
2811 		return true;
2812 
2813 	return false;
2814 }
2815 
2816 static void update_carrier(struct work_struct *work)
2817 {
2818 	struct mlx5_vdpa_wq_ent *wqent;
2819 	struct mlx5_vdpa_dev *mvdev;
2820 	struct mlx5_vdpa_net *ndev;
2821 
2822 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2823 	mvdev = wqent->mvdev;
2824 	ndev = to_mlx5_vdpa_ndev(mvdev);
2825 	if (get_link_state(mvdev))
2826 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2827 	else
2828 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2829 
2830 	if (ndev->config_cb.callback)
2831 		ndev->config_cb.callback(ndev->config_cb.private);
2832 
2833 	kfree(wqent);
2834 }
2835 
2836 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2837 {
2838 	struct mlx5_vdpa_wq_ent *wqent;
2839 
2840 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2841 	if (!wqent)
2842 		return -ENOMEM;
2843 
2844 	wqent->mvdev = &ndev->mvdev;
2845 	INIT_WORK(&wqent->work, update_carrier);
2846 	queue_work(ndev->mvdev.wq, &wqent->work);
2847 	return 0;
2848 }
2849 
2850 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2851 {
2852 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2853 	struct mlx5_eqe *eqe = param;
2854 	int ret = NOTIFY_DONE;
2855 
2856 	if (ndev->mvdev.suspended)
2857 		return NOTIFY_DONE;
2858 
2859 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2860 		switch (eqe->sub_type) {
2861 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2862 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2863 			if (queue_link_work(ndev))
2864 				return NOTIFY_DONE;
2865 
2866 			ret = NOTIFY_OK;
2867 			break;
2868 		default:
2869 			return NOTIFY_DONE;
2870 		}
2871 		return ret;
2872 	}
2873 	return ret;
2874 }
2875 
2876 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2877 {
2878 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2879 		return;
2880 
2881 	ndev->nb.notifier_call = event_handler;
2882 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2883 	ndev->nb_registered = true;
2884 	queue_link_work(ndev);
2885 }
2886 
2887 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2888 {
2889 	if (!ndev->nb_registered)
2890 		return;
2891 
2892 	ndev->nb_registered = false;
2893 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2894 	if (ndev->mvdev.wq)
2895 		flush_workqueue(ndev->mvdev.wq);
2896 }
2897 
2898 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2899 {
2900 	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2901 }
2902 
2903 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2904 {
2905 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2906 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2907 	u64 old_features = mvdev->actual_features;
2908 	u64 diff_features;
2909 	int err;
2910 
2911 	print_features(mvdev, features, true);
2912 
2913 	err = verify_driver_features(mvdev, features);
2914 	if (err)
2915 		return err;
2916 
2917 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2918 
2919 	/* Interested in changes of vq features only. */
2920 	if (get_features(old_features) != get_features(mvdev->actual_features)) {
2921 		for (int i = 0; i < mvdev->max_vqs; ++i) {
2922 			struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i];
2923 
2924 			mvq->modified_fields |= (
2925 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION |
2926 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES
2927 			);
2928 		}
2929 	}
2930 
2931 	/* When below features diverge from initial device features, VQs need a full teardown. */
2932 #define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \
2933 			     BIT_ULL(VIRTIO_NET_F_CSUM) | \
2934 			     BIT_ULL(VIRTIO_F_VERSION_1))
2935 
2936 	diff_features = mvdev->mlx_features ^ mvdev->actual_features;
2937 	ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK);
2938 
2939 	update_cvq_info(mvdev);
2940 	return err;
2941 }
2942 
2943 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2944 {
2945 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2946 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2947 
2948 	ndev->config_cb = *cb;
2949 }
2950 
2951 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2952 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2953 {
2954 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2955 }
2956 
2957 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2958 {
2959 	return VIRTIO_ID_NET;
2960 }
2961 
2962 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2963 {
2964 	return PCI_VENDOR_ID_MELLANOX;
2965 }
2966 
2967 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2968 {
2969 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2970 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2971 
2972 	print_status(mvdev, ndev->mvdev.status, false);
2973 	return ndev->mvdev.status;
2974 }
2975 
2976 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2977 {
2978 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2979 	struct mlx5_virtq_attr attr = {};
2980 	int err;
2981 
2982 	if (mvq->initialized) {
2983 		err = query_virtqueues(ndev, mvq->index, 1, &attr);
2984 		if (err)
2985 			return err;
2986 	}
2987 
2988 	ri->avail_index = attr.available_index;
2989 	ri->used_index = attr.used_index;
2990 	ri->ready = mvq->ready;
2991 	ri->num_ent = mvq->num_ent;
2992 	ri->desc_addr = mvq->desc_addr;
2993 	ri->device_addr = mvq->device_addr;
2994 	ri->driver_addr = mvq->driver_addr;
2995 	ri->map = mvq->map;
2996 	ri->restore = true;
2997 	return 0;
2998 }
2999 
3000 static int save_channels_info(struct mlx5_vdpa_net *ndev)
3001 {
3002 	int i;
3003 
3004 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3005 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
3006 		save_channel_info(ndev, &ndev->vqs[i]);
3007 	}
3008 	return 0;
3009 }
3010 
3011 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
3012 {
3013 	int i;
3014 
3015 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
3016 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3017 }
3018 
3019 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
3020 {
3021 	struct mlx5_vdpa_virtqueue *mvq;
3022 	struct mlx5_vq_restore_info *ri;
3023 	int i;
3024 
3025 	mlx5_clear_vqs(ndev);
3026 	mvqs_set_defaults(ndev);
3027 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3028 		mvq = &ndev->vqs[i];
3029 		ri = &mvq->ri;
3030 		if (!ri->restore)
3031 			continue;
3032 
3033 		mvq->avail_idx = ri->avail_index;
3034 		mvq->used_idx = ri->used_index;
3035 		mvq->ready = ri->ready;
3036 		mvq->num_ent = ri->num_ent;
3037 		mvq->desc_addr = ri->desc_addr;
3038 		mvq->device_addr = ri->device_addr;
3039 		mvq->driver_addr = ri->driver_addr;
3040 		mvq->map = ri->map;
3041 	}
3042 }
3043 
3044 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
3045 				struct mlx5_vdpa_mr *new_mr,
3046 				unsigned int asid)
3047 {
3048 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3049 	bool teardown = !is_resumable(ndev);
3050 	int err;
3051 
3052 	suspend_vqs(ndev, 0, ndev->cur_num_vqs);
3053 	if (teardown) {
3054 		err = save_channels_info(ndev);
3055 		if (err)
3056 			return err;
3057 
3058 		teardown_vq_resources(ndev);
3059 	}
3060 
3061 	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3062 
3063 	for (int i = 0; i < mvdev->max_vqs; i++)
3064 		ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
3065 						MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
3066 
3067 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
3068 		return 0;
3069 
3070 	if (teardown) {
3071 		restore_channels_info(ndev);
3072 		err = setup_vq_resources(ndev, true);
3073 		if (err)
3074 			return err;
3075 	}
3076 
3077 	resume_vqs(ndev, 0, ndev->cur_num_vqs);
3078 
3079 	return 0;
3080 }
3081 
3082 /* reslock must be held for this function */
3083 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled)
3084 {
3085 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
3086 	int err;
3087 
3088 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3089 
3090 	if (ndev->setup) {
3091 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
3092 		err = 0;
3093 		goto out;
3094 	}
3095 	mlx5_vdpa_add_debugfs(ndev);
3096 
3097 	err = read_umem_params(ndev);
3098 	if (err)
3099 		goto err_setup;
3100 
3101 	err = setup_virtqueues(mvdev, filled);
3102 	if (err) {
3103 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
3104 		goto err_setup;
3105 	}
3106 
3107 	err = create_rqt(ndev);
3108 	if (err) {
3109 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
3110 		goto err_rqt;
3111 	}
3112 
3113 	err = create_tir(ndev);
3114 	if (err) {
3115 		mlx5_vdpa_warn(mvdev, "create_tir\n");
3116 		goto err_tir;
3117 	}
3118 
3119 	err = setup_steering(ndev);
3120 	if (err) {
3121 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
3122 		goto err_fwd;
3123 	}
3124 	ndev->setup = true;
3125 
3126 	return 0;
3127 
3128 err_fwd:
3129 	destroy_tir(ndev);
3130 err_tir:
3131 	destroy_rqt(ndev);
3132 err_rqt:
3133 	teardown_virtqueues(ndev);
3134 err_setup:
3135 	mlx5_vdpa_remove_debugfs(ndev);
3136 out:
3137 	return err;
3138 }
3139 
3140 /* reslock must be held for this function */
3141 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev)
3142 {
3143 
3144 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3145 
3146 	if (!ndev->setup)
3147 		return;
3148 
3149 	mlx5_vdpa_remove_debugfs(ndev);
3150 	teardown_steering(ndev);
3151 	destroy_tir(ndev);
3152 	destroy_rqt(ndev);
3153 	teardown_virtqueues(ndev);
3154 	ndev->setup = false;
3155 	ndev->needs_teardown = false;
3156 }
3157 
3158 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
3159 {
3160 	struct mlx5_control_vq *cvq = &mvdev->cvq;
3161 	int err = 0;
3162 
3163 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
3164 		u16 idx = cvq->vring.last_avail_idx;
3165 
3166 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
3167 					cvq->vring.vring.num, false,
3168 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
3169 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
3170 					(struct vring_used *)(uintptr_t)cvq->device_addr);
3171 
3172 		if (!err)
3173 			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
3174 	}
3175 	return err;
3176 }
3177 
3178 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
3179 {
3180 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3181 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3182 	int err;
3183 
3184 	print_status(mvdev, status, true);
3185 
3186 	down_write(&ndev->reslock);
3187 
3188 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
3189 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
3190 			err = setup_cvq_vring(mvdev);
3191 			if (err) {
3192 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
3193 				goto err_setup;
3194 			}
3195 			register_link_notifier(ndev);
3196 
3197 			if (ndev->needs_teardown)
3198 				teardown_vq_resources(ndev);
3199 
3200 			if (ndev->setup) {
3201 				err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
3202 				if (err) {
3203 					mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
3204 					goto err_driver;
3205 				}
3206 			} else {
3207 				err = setup_vq_resources(ndev, true);
3208 				if (err) {
3209 					mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
3210 					goto err_driver;
3211 				}
3212 			}
3213 		} else {
3214 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3215 			goto err_clear;
3216 		}
3217 	}
3218 
3219 	ndev->mvdev.status = status;
3220 	up_write(&ndev->reslock);
3221 	return;
3222 
3223 err_driver:
3224 	unregister_link_notifier(ndev);
3225 err_setup:
3226 	mlx5_vdpa_clean_mrs(&ndev->mvdev);
3227 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3228 err_clear:
3229 	up_write(&ndev->reslock);
3230 }
3231 
3232 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3233 {
3234 	int i;
3235 
3236 	/* default mapping all groups are mapped to asid 0 */
3237 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3238 		mvdev->mres.group2asid[i] = 0;
3239 }
3240 
3241 static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev)
3242 {
3243 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3244 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0];
3245 
3246 	if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
3247 		return true;
3248 
3249 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT)
3250 		return true;
3251 
3252 	return mvq->modified_fields & (
3253 		MLX5_VIRTQ_MODIFY_MASK_STATE |
3254 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS |
3255 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
3256 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX
3257 	);
3258 }
3259 
3260 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3261 {
3262 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3263 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3264 	bool vq_reset;
3265 
3266 	print_status(mvdev, 0, true);
3267 	mlx5_vdpa_info(mvdev, "performing device reset\n");
3268 
3269 	down_write(&ndev->reslock);
3270 	unregister_link_notifier(ndev);
3271 	vq_reset = needs_vqs_reset(mvdev);
3272 	if (vq_reset) {
3273 		teardown_vq_resources(ndev);
3274 		mvqs_set_defaults(ndev);
3275 	}
3276 
3277 	if (flags & VDPA_RESET_F_CLEAN_MAP)
3278 		mlx5_vdpa_clean_mrs(&ndev->mvdev);
3279 	ndev->mvdev.status = 0;
3280 	ndev->mvdev.suspended = false;
3281 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3282 	ndev->mvdev.cvq.ready = false;
3283 	ndev->mvdev.cvq.received_desc = 0;
3284 	ndev->mvdev.cvq.completed_desc = 0;
3285 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3286 	ndev->mvdev.actual_features = 0;
3287 	init_group_to_asid_map(mvdev);
3288 	++mvdev->generation;
3289 
3290 	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3291 	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3292 		if (mlx5_vdpa_create_dma_mr(mvdev))
3293 			mlx5_vdpa_err(mvdev, "create MR failed\n");
3294 	}
3295 	if (vq_reset)
3296 		setup_vq_resources(ndev, false);
3297 	up_write(&ndev->reslock);
3298 
3299 	return 0;
3300 }
3301 
3302 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3303 {
3304 	return mlx5_vdpa_compat_reset(vdev, 0);
3305 }
3306 
3307 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3308 {
3309 	return sizeof(struct virtio_net_config);
3310 }
3311 
3312 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3313 				 unsigned int len)
3314 {
3315 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3316 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3317 
3318 	if (offset + len <= sizeof(struct virtio_net_config))
3319 		memcpy(buf, (u8 *)&ndev->config + offset, len);
3320 }
3321 
3322 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3323 				 unsigned int len)
3324 {
3325 	/* not supported */
3326 }
3327 
3328 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3329 {
3330 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3331 
3332 	return mvdev->generation;
3333 }
3334 
3335 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3336 			unsigned int asid)
3337 {
3338 	struct mlx5_vdpa_mr *new_mr;
3339 	int err;
3340 
3341 	if (asid >= MLX5_VDPA_NUM_AS)
3342 		return -EINVAL;
3343 
3344 	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3345 		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3346 		if (IS_ERR(new_mr)) {
3347 			err = PTR_ERR(new_mr);
3348 			mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err);
3349 			return err;
3350 		}
3351 	} else {
3352 		/* Empty iotlbs don't have an mr but will clear the previous mr. */
3353 		new_mr = NULL;
3354 	}
3355 
3356 	if (!mvdev->mres.mr[asid]) {
3357 		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3358 	} else {
3359 		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3360 		if (err) {
3361 			mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err);
3362 			goto out_err;
3363 		}
3364 	}
3365 
3366 	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3367 
3368 out_err:
3369 	mlx5_vdpa_put_mr(mvdev, new_mr);
3370 	return err;
3371 }
3372 
3373 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3374 			     struct vhost_iotlb *iotlb)
3375 {
3376 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3377 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3378 	int err = -EINVAL;
3379 
3380 	down_write(&ndev->reslock);
3381 	err = set_map_data(mvdev, iotlb, asid);
3382 	up_write(&ndev->reslock);
3383 	return err;
3384 }
3385 
3386 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3387 {
3388 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3389 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3390 	int err;
3391 
3392 	down_write(&ndev->reslock);
3393 	err = mlx5_vdpa_reset_mr(mvdev, asid);
3394 	up_write(&ndev->reslock);
3395 	return err;
3396 }
3397 
3398 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3399 {
3400 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3401 
3402 	if (is_ctrl_vq_idx(mvdev, idx))
3403 		return &vdev->dev;
3404 
3405 	return mvdev->vdev.dma_dev;
3406 }
3407 
3408 static void free_irqs(struct mlx5_vdpa_net *ndev)
3409 {
3410 	struct mlx5_vdpa_irq_pool_entry *ent;
3411 	int i;
3412 
3413 	if (!msix_mode_supported(&ndev->mvdev))
3414 		return;
3415 
3416 	if (!ndev->irqp.entries)
3417 		return;
3418 
3419 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3420 		ent = ndev->irqp.entries + i;
3421 		if (ent->map.virq)
3422 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3423 	}
3424 	kfree(ndev->irqp.entries);
3425 }
3426 
3427 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3428 {
3429 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3430 	struct mlx5_core_dev *pfmdev;
3431 	struct mlx5_vdpa_net *ndev;
3432 
3433 	ndev = to_mlx5_vdpa_ndev(mvdev);
3434 
3435 	free_fixed_resources(ndev);
3436 	mlx5_vdpa_clean_mrs(mvdev);
3437 	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3438 	mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx);
3439 
3440 	if (!is_zero_ether_addr(ndev->config.mac)) {
3441 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3442 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3443 	}
3444 	mlx5_vdpa_free_resources(&ndev->mvdev);
3445 	free_irqs(ndev);
3446 	kfree(ndev->event_cbs);
3447 	kfree(ndev->vqs);
3448 }
3449 
3450 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3451 {
3452 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3453 	struct vdpa_notification_area ret = {};
3454 	struct mlx5_vdpa_net *ndev;
3455 	phys_addr_t addr;
3456 
3457 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3458 		return ret;
3459 
3460 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3461 	 * notification to avoid the risk of mapping pages that contain BAR of more
3462 	 * than one SF
3463 	 */
3464 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3465 		return ret;
3466 
3467 	ndev = to_mlx5_vdpa_ndev(mvdev);
3468 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3469 	ret.addr = addr;
3470 	ret.size = PAGE_SIZE;
3471 	return ret;
3472 }
3473 
3474 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3475 {
3476 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3477 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3478 	struct mlx5_vdpa_virtqueue *mvq;
3479 
3480 	if (!is_index_valid(mvdev, idx))
3481 		return -EINVAL;
3482 
3483 	if (is_ctrl_vq_idx(mvdev, idx))
3484 		return -EOPNOTSUPP;
3485 
3486 	mvq = &ndev->vqs[idx];
3487 	if (!mvq->map.virq)
3488 		return -EOPNOTSUPP;
3489 
3490 	return mvq->map.virq;
3491 }
3492 
3493 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3494 {
3495 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3496 
3497 	return mvdev->actual_features;
3498 }
3499 
3500 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3501 			     u64 *received_desc, u64 *completed_desc)
3502 {
3503 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3504 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3505 	void *cmd_hdr;
3506 	void *ctx;
3507 	int err;
3508 
3509 	if (!counters_supported(&ndev->mvdev))
3510 		return -EOPNOTSUPP;
3511 
3512 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3513 		return -EAGAIN;
3514 
3515 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3516 
3517 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3518 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3519 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3520 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3521 
3522 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3523 	if (err)
3524 		return err;
3525 
3526 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3527 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3528 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3529 	return 0;
3530 }
3531 
3532 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3533 					 struct sk_buff *msg,
3534 					 struct netlink_ext_ack *extack)
3535 {
3536 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3537 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3538 	struct mlx5_vdpa_virtqueue *mvq;
3539 	struct mlx5_control_vq *cvq;
3540 	u64 received_desc;
3541 	u64 completed_desc;
3542 	int err = 0;
3543 
3544 	down_read(&ndev->reslock);
3545 	if (!is_index_valid(mvdev, idx)) {
3546 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3547 		err = -EINVAL;
3548 		goto out_err;
3549 	}
3550 
3551 	if (idx == ctrl_vq_idx(mvdev)) {
3552 		cvq = &mvdev->cvq;
3553 		received_desc = cvq->received_desc;
3554 		completed_desc = cvq->completed_desc;
3555 		goto out;
3556 	}
3557 
3558 	mvq = &ndev->vqs[idx];
3559 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3560 	if (err) {
3561 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3562 		goto out_err;
3563 	}
3564 
3565 out:
3566 	err = -EMSGSIZE;
3567 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3568 		goto out_err;
3569 
3570 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3571 			      VDPA_ATTR_PAD))
3572 		goto out_err;
3573 
3574 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3575 		goto out_err;
3576 
3577 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3578 			      VDPA_ATTR_PAD))
3579 		goto out_err;
3580 
3581 	err = 0;
3582 out_err:
3583 	up_read(&ndev->reslock);
3584 	return err;
3585 }
3586 
3587 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3588 {
3589 	struct mlx5_control_vq *cvq;
3590 
3591 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3592 		return;
3593 
3594 	cvq = &mvdev->cvq;
3595 	cvq->ready = false;
3596 }
3597 
3598 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3599 {
3600 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3601 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3602 	int err;
3603 
3604 	mlx5_vdpa_info(mvdev, "suspending device\n");
3605 
3606 	down_write(&ndev->reslock);
3607 	err = suspend_vqs(ndev, 0, ndev->cur_num_vqs);
3608 	mlx5_vdpa_cvq_suspend(mvdev);
3609 	mvdev->suspended = true;
3610 	up_write(&ndev->reslock);
3611 
3612 	return err;
3613 }
3614 
3615 static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3616 {
3617 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3618 	struct mlx5_vdpa_net *ndev;
3619 	int err;
3620 
3621 	ndev = to_mlx5_vdpa_ndev(mvdev);
3622 
3623 	mlx5_vdpa_info(mvdev, "resuming device\n");
3624 
3625 	down_write(&ndev->reslock);
3626 	mvdev->suspended = false;
3627 	err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
3628 	queue_link_work(ndev);
3629 	up_write(&ndev->reslock);
3630 
3631 	return err;
3632 }
3633 
3634 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3635 			       unsigned int asid)
3636 {
3637 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3638 	int err = 0;
3639 
3640 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3641 		return -EINVAL;
3642 
3643 	mvdev->mres.group2asid[group] = asid;
3644 
3645 	mutex_lock(&mvdev->mres.lock);
3646 	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid])
3647 		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid);
3648 	mutex_unlock(&mvdev->mres.lock);
3649 
3650 	return err;
3651 }
3652 
3653 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3654 	.set_vq_address = mlx5_vdpa_set_vq_address,
3655 	.set_vq_num = mlx5_vdpa_set_vq_num,
3656 	.kick_vq = mlx5_vdpa_kick_vq,
3657 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3658 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3659 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3660 	.set_vq_state = mlx5_vdpa_set_vq_state,
3661 	.get_vq_state = mlx5_vdpa_get_vq_state,
3662 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3663 	.get_vq_notification = mlx5_get_vq_notification,
3664 	.get_vq_irq = mlx5_get_vq_irq,
3665 	.get_vq_align = mlx5_vdpa_get_vq_align,
3666 	.get_vq_group = mlx5_vdpa_get_vq_group,
3667 	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3668 	.get_device_features = mlx5_vdpa_get_device_features,
3669 	.get_backend_features = mlx5_vdpa_get_backend_features,
3670 	.set_driver_features = mlx5_vdpa_set_driver_features,
3671 	.get_driver_features = mlx5_vdpa_get_driver_features,
3672 	.set_config_cb = mlx5_vdpa_set_config_cb,
3673 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3674 	.get_device_id = mlx5_vdpa_get_device_id,
3675 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3676 	.get_status = mlx5_vdpa_get_status,
3677 	.set_status = mlx5_vdpa_set_status,
3678 	.reset = mlx5_vdpa_reset,
3679 	.compat_reset = mlx5_vdpa_compat_reset,
3680 	.get_config_size = mlx5_vdpa_get_config_size,
3681 	.get_config = mlx5_vdpa_get_config,
3682 	.set_config = mlx5_vdpa_set_config,
3683 	.get_generation = mlx5_vdpa_get_generation,
3684 	.set_map = mlx5_vdpa_set_map,
3685 	.reset_map = mlx5_vdpa_reset_map,
3686 	.set_group_asid = mlx5_set_group_asid,
3687 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3688 	.free = mlx5_vdpa_free,
3689 	.suspend = mlx5_vdpa_suspend,
3690 	.resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3691 };
3692 
3693 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3694 {
3695 	u16 hw_mtu;
3696 	int err;
3697 
3698 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3699 	if (err)
3700 		return err;
3701 
3702 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3703 	return 0;
3704 }
3705 
3706 static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev)
3707 {
3708 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3709 	int err;
3710 
3711 	if (res->valid) {
3712 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3713 		return -EEXIST;
3714 	}
3715 
3716 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3717 	if (err)
3718 		return err;
3719 
3720 	err = create_tis(ndev);
3721 	if (err)
3722 		goto err_tis;
3723 
3724 	res->valid = true;
3725 
3726 	return 0;
3727 
3728 err_tis:
3729 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3730 	return err;
3731 }
3732 
3733 static void free_fixed_resources(struct mlx5_vdpa_net *ndev)
3734 {
3735 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3736 
3737 	if (!res->valid)
3738 		return;
3739 
3740 	destroy_tis(ndev);
3741 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3742 	res->valid = false;
3743 }
3744 
3745 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev)
3746 {
3747 	struct mlx5_vdpa_virtqueue *mvq;
3748 	int i;
3749 
3750 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3751 		mvq = &ndev->vqs[i];
3752 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3753 		mvq->index = i;
3754 		mvq->ndev = ndev;
3755 		mvq->fwqp.fw = true;
3756 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3757 		mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE;
3758 	}
3759 }
3760 
3761 struct mlx5_vdpa_mgmtdev {
3762 	struct vdpa_mgmt_dev mgtdev;
3763 	struct mlx5_adev *madev;
3764 	struct mlx5_vdpa_net *ndev;
3765 	struct vdpa_config_ops vdpa_ops;
3766 };
3767 
3768 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3769 {
3770 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3771 	void *in;
3772 	int err;
3773 
3774 	in = kvzalloc(inlen, GFP_KERNEL);
3775 	if (!in)
3776 		return -ENOMEM;
3777 
3778 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3779 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3780 		 mtu + MLX5V_ETH_HARD_MTU);
3781 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3782 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3783 
3784 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3785 
3786 	kvfree(in);
3787 	return err;
3788 }
3789 
3790 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3791 {
3792 	struct mlx5_vdpa_irq_pool_entry *ent;
3793 	int i;
3794 
3795 	if (!msix_mode_supported(&ndev->mvdev))
3796 		return;
3797 
3798 	if (!ndev->mvdev.mdev->pdev)
3799 		return;
3800 
3801 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3802 	if (!ndev->irqp.entries)
3803 		return;
3804 
3805 
3806 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3807 		ent = ndev->irqp.entries + i;
3808 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3809 			 dev_name(&ndev->mvdev.vdev.dev), i);
3810 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3811 		if (!ent->map.virq)
3812 			return;
3813 
3814 		ndev->irqp.num_ent++;
3815 	}
3816 }
3817 
3818 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3819 			     const struct vdpa_dev_set_config *add_config)
3820 {
3821 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3822 	struct virtio_net_config *config;
3823 	struct mlx5_core_dev *pfmdev;
3824 	struct mlx5_vdpa_dev *mvdev;
3825 	struct mlx5_vdpa_net *ndev;
3826 	struct mlx5_core_dev *mdev;
3827 	u64 device_features;
3828 	u32 max_vqs;
3829 	u16 mtu;
3830 	int err;
3831 
3832 	if (mgtdev->ndev)
3833 		return -ENOSPC;
3834 
3835 	mdev = mgtdev->madev->mdev;
3836 	device_features = mgtdev->mgtdev.supported_features;
3837 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3838 		if (add_config->device_features & ~device_features) {
3839 			dev_warn(mdev->device,
3840 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3841 				 add_config->device_features, device_features);
3842 			return -EINVAL;
3843 		}
3844 		device_features &= add_config->device_features;
3845 	} else {
3846 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3847 	}
3848 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3849 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3850 		dev_warn(mdev->device,
3851 			 "Must provision minimum features 0x%llx for this device",
3852 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3853 		return -EOPNOTSUPP;
3854 	}
3855 
3856 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3857 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3858 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3859 		return -EOPNOTSUPP;
3860 	}
3861 
3862 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3863 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3864 	if (max_vqs < 2) {
3865 		dev_warn(mdev->device,
3866 			 "%d virtqueues are supported. At least 2 are required\n",
3867 			 max_vqs);
3868 		return -EAGAIN;
3869 	}
3870 
3871 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3872 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3873 			return -EINVAL;
3874 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3875 	} else {
3876 		max_vqs = 2;
3877 	}
3878 
3879 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3880 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3881 	if (IS_ERR(ndev))
3882 		return PTR_ERR(ndev);
3883 
3884 	ndev->mvdev.max_vqs = max_vqs;
3885 	mvdev = &ndev->mvdev;
3886 	mvdev->mdev = mdev;
3887 
3888 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3889 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3890 	if (!ndev->vqs || !ndev->event_cbs) {
3891 		err = -ENOMEM;
3892 		goto err_alloc;
3893 	}
3894 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3895 
3896 	mvqs_set_defaults(ndev);
3897 	allocate_irqs(ndev);
3898 	init_rwsem(&ndev->reslock);
3899 	config = &ndev->config;
3900 
3901 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3902 		err = config_func_mtu(mdev, add_config->net.mtu);
3903 		if (err)
3904 			goto err_alloc;
3905 	}
3906 
3907 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3908 		err = query_mtu(mdev, &mtu);
3909 		if (err)
3910 			goto err_alloc;
3911 
3912 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3913 	}
3914 
3915 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3916 		if (get_link_state(mvdev))
3917 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3918 		else
3919 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3920 	}
3921 
3922 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3923 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3924 	/* No bother setting mac address in config if not going to provision _F_MAC */
3925 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3926 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3927 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3928 		if (err)
3929 			goto err_alloc;
3930 	}
3931 
3932 	if (!is_zero_ether_addr(config->mac)) {
3933 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3934 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3935 		if (err)
3936 			goto err_alloc;
3937 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3938 		/*
3939 		 * We used to clear _F_MAC feature bit if seeing
3940 		 * zero mac address when device features are not
3941 		 * specifically provisioned. Keep the behaviour
3942 		 * so old scripts do not break.
3943 		 */
3944 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3945 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3946 		/* Don't provision zero mac address for _F_MAC */
3947 		mlx5_vdpa_warn(&ndev->mvdev,
3948 			       "No mac address provisioned?\n");
3949 		err = -EINVAL;
3950 		goto err_alloc;
3951 	}
3952 
3953 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) {
3954 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3955 		ndev->rqt_size = max_vqs / 2;
3956 	} else {
3957 		ndev->rqt_size = 1;
3958 	}
3959 
3960 	mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx);
3961 
3962 	ndev->mvdev.mlx_features = device_features;
3963 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3964 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3965 	if (err)
3966 		goto err_mpfs;
3967 
3968 	err = mlx5_vdpa_init_mr_resources(mvdev);
3969 	if (err)
3970 		goto err_res;
3971 
3972 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3973 		err = mlx5_vdpa_create_dma_mr(mvdev);
3974 		if (err)
3975 			goto err_mr_res;
3976 	}
3977 
3978 	err = alloc_fixed_resources(ndev);
3979 	if (err)
3980 		goto err_mr;
3981 
3982 	ndev->cvq_ent.mvdev = mvdev;
3983 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3984 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3985 	if (!mvdev->wq) {
3986 		err = -ENOMEM;
3987 		goto err_res2;
3988 	}
3989 
3990 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3991 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3992 	if (err)
3993 		goto err_reg;
3994 
3995 	mgtdev->ndev = ndev;
3996 
3997 	/* For virtio-vdpa, the device was set up during device register. */
3998 	if (ndev->setup)
3999 		return 0;
4000 
4001 	down_write(&ndev->reslock);
4002 	err = setup_vq_resources(ndev, false);
4003 	up_write(&ndev->reslock);
4004 	if (err)
4005 		goto err_setup_vq_res;
4006 
4007 	return 0;
4008 
4009 err_setup_vq_res:
4010 	_vdpa_unregister_device(&mvdev->vdev);
4011 err_reg:
4012 	destroy_workqueue(mvdev->wq);
4013 err_res2:
4014 	free_fixed_resources(ndev);
4015 err_mr:
4016 	mlx5_vdpa_clean_mrs(mvdev);
4017 err_mr_res:
4018 	mlx5_vdpa_destroy_mr_resources(mvdev);
4019 err_res:
4020 	mlx5_vdpa_free_resources(&ndev->mvdev);
4021 err_mpfs:
4022 	if (!is_zero_ether_addr(config->mac))
4023 		mlx5_mpfs_del_mac(pfmdev, config->mac);
4024 err_alloc:
4025 	put_device(&mvdev->vdev.dev);
4026 	return err;
4027 }
4028 
4029 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
4030 {
4031 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
4032 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
4033 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
4034 	struct workqueue_struct *wq;
4035 
4036 	unregister_link_notifier(ndev);
4037 	_vdpa_unregister_device(dev);
4038 
4039 	down_write(&ndev->reslock);
4040 	teardown_vq_resources(ndev);
4041 	up_write(&ndev->reslock);
4042 
4043 	wq = mvdev->wq;
4044 	mvdev->wq = NULL;
4045 	destroy_workqueue(wq);
4046 	mgtdev->ndev = NULL;
4047 }
4048 
4049 static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev,
4050 			      const struct vdpa_dev_set_config *add_config)
4051 {
4052 	struct virtio_net_config *config;
4053 	struct mlx5_core_dev *pfmdev;
4054 	struct mlx5_vdpa_dev *mvdev;
4055 	struct mlx5_vdpa_net *ndev;
4056 	struct mlx5_core_dev *mdev;
4057 	int err = -EOPNOTSUPP;
4058 
4059 	mvdev = to_mvdev(dev);
4060 	ndev = to_mlx5_vdpa_ndev(mvdev);
4061 	mdev = mvdev->mdev;
4062 	config = &ndev->config;
4063 
4064 	down_write(&ndev->reslock);
4065 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
4066 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
4067 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
4068 		if (!err)
4069 			ether_addr_copy(config->mac, add_config->net.mac);
4070 	}
4071 
4072 	up_write(&ndev->reslock);
4073 	return err;
4074 }
4075 
4076 static const struct vdpa_mgmtdev_ops mdev_ops = {
4077 	.dev_add = mlx5_vdpa_dev_add,
4078 	.dev_del = mlx5_vdpa_dev_del,
4079 	.dev_set_attr = mlx5_vdpa_set_attr,
4080 };
4081 
4082 static struct virtio_device_id id_table[] = {
4083 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
4084 	{ 0 },
4085 };
4086 
4087 static int mlx5v_probe(struct auxiliary_device *adev,
4088 		       const struct auxiliary_device_id *id)
4089 
4090 {
4091 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
4092 	struct mlx5_core_dev *mdev = madev->mdev;
4093 	struct mlx5_vdpa_mgmtdev *mgtdev;
4094 	int err;
4095 
4096 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
4097 	if (!mgtdev)
4098 		return -ENOMEM;
4099 
4100 	mgtdev->mgtdev.ops = &mdev_ops;
4101 	mgtdev->mgtdev.device = mdev->device;
4102 	mgtdev->mgtdev.id_table = id_table;
4103 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
4104 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
4105 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
4106 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
4107 	mgtdev->mgtdev.max_supported_vqs =
4108 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
4109 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
4110 	mgtdev->madev = madev;
4111 	mgtdev->vdpa_ops = mlx5_vdpa_ops;
4112 
4113 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
4114 		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
4115 
4116 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
4117 		mgtdev->vdpa_ops.resume = NULL;
4118 
4119 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
4120 	if (err)
4121 		goto reg_err;
4122 
4123 	auxiliary_set_drvdata(adev, mgtdev);
4124 
4125 	return 0;
4126 
4127 reg_err:
4128 	kfree(mgtdev);
4129 	return err;
4130 }
4131 
4132 static void mlx5v_remove(struct auxiliary_device *adev)
4133 {
4134 	struct mlx5_vdpa_mgmtdev *mgtdev;
4135 
4136 	mgtdev = auxiliary_get_drvdata(adev);
4137 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
4138 	kfree(mgtdev);
4139 }
4140 
4141 static const struct auxiliary_device_id mlx5v_id_table[] = {
4142 	{ .name = MLX5_ADEV_NAME ".vnet", },
4143 	{},
4144 };
4145 
4146 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
4147 
4148 static struct auxiliary_driver mlx5v_driver = {
4149 	.name = "vnet",
4150 	.probe = mlx5v_probe,
4151 	.remove = mlx5v_remove,
4152 	.id_table = mlx5v_id_table,
4153 };
4154 
4155 module_auxiliary_driver(mlx5v_driver);
4156