xref: /linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 4eca0ef49af9b2b0c52ef2b58e045ab34629796b)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
23 
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 
49 #define MLX5V_UNTAGGED 0x1000
50 
51 struct mlx5_vdpa_cq_buf {
52 	struct mlx5_frag_buf_ctrl fbc;
53 	struct mlx5_frag_buf frag_buf;
54 	int cqe_size;
55 	int nent;
56 };
57 
58 struct mlx5_vdpa_cq {
59 	struct mlx5_core_cq mcq;
60 	struct mlx5_vdpa_cq_buf buf;
61 	struct mlx5_db db;
62 	int cqe;
63 };
64 
65 struct mlx5_vdpa_umem {
66 	struct mlx5_frag_buf_ctrl fbc;
67 	struct mlx5_frag_buf frag_buf;
68 	int size;
69 	u32 id;
70 };
71 
72 struct mlx5_vdpa_qp {
73 	struct mlx5_core_qp mqp;
74 	struct mlx5_frag_buf frag_buf;
75 	struct mlx5_db db;
76 	u16 head;
77 	bool fw;
78 };
79 
80 struct mlx5_vq_restore_info {
81 	u32 num_ent;
82 	u64 desc_addr;
83 	u64 device_addr;
84 	u64 driver_addr;
85 	u16 avail_index;
86 	u16 used_index;
87 	struct msi_map map;
88 	bool ready;
89 	bool restore;
90 };
91 
92 struct mlx5_vdpa_virtqueue {
93 	bool ready;
94 	u64 desc_addr;
95 	u64 device_addr;
96 	u64 driver_addr;
97 	u32 num_ent;
98 
99 	/* Resources for implementing the notification channel from the device
100 	 * to the driver. fwqp is the firmware end of an RC connection; the
101 	 * other end is vqqp used by the driver. cq is where completions are
102 	 * reported.
103 	 */
104 	struct mlx5_vdpa_cq cq;
105 	struct mlx5_vdpa_qp fwqp;
106 	struct mlx5_vdpa_qp vqqp;
107 
108 	/* umem resources are required for the virtqueue operation. They're use
109 	 * is internal and they must be provided by the driver.
110 	 */
111 	struct mlx5_vdpa_umem umem1;
112 	struct mlx5_vdpa_umem umem2;
113 	struct mlx5_vdpa_umem umem3;
114 
115 	u32 counter_set_id;
116 	bool initialized;
117 	int index;
118 	u32 virtq_id;
119 	struct mlx5_vdpa_net *ndev;
120 	u16 avail_idx;
121 	u16 used_idx;
122 	int fw_state;
123 	struct msi_map map;
124 
125 	/* keep last in the struct */
126 	struct mlx5_vq_restore_info ri;
127 };
128 
129 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
130 {
131 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
132 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
133 			return idx < 2;
134 		else
135 			return idx < 3;
136 	}
137 
138 	return idx <= mvdev->max_idx;
139 }
140 
141 static void free_resources(struct mlx5_vdpa_net *ndev);
142 static void init_mvqs(struct mlx5_vdpa_net *ndev);
143 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
144 static void teardown_driver(struct mlx5_vdpa_net *ndev);
145 
146 static bool mlx5_vdpa_debug;
147 
148 #define MLX5_CVQ_MAX_ENT 16
149 
150 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
151 	do {                                                                                       \
152 		if (features & BIT_ULL(_feature))                                                  \
153 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
154 	} while (0)
155 
156 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
157 	do {                                                                                       \
158 		if (status & (_status))                                                            \
159 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
160 	} while (0)
161 
162 /* TODO: cross-endian support */
163 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
164 {
165 	return virtio_legacy_is_little_endian() ||
166 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
167 }
168 
169 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
170 {
171 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
172 }
173 
174 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
175 {
176 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
177 }
178 
179 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
180 {
181 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
182 		return 2;
183 
184 	return mvdev->max_vqs;
185 }
186 
187 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
188 {
189 	return idx == ctrl_vq_idx(mvdev);
190 }
191 
192 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
193 {
194 	if (status & ~VALID_STATUS_MASK)
195 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
196 			       status & ~VALID_STATUS_MASK);
197 
198 	if (!mlx5_vdpa_debug)
199 		return;
200 
201 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
202 	if (set && !status) {
203 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
204 		return;
205 	}
206 
207 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
208 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
209 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
210 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
211 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
212 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
213 }
214 
215 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
216 {
217 	if (features & ~VALID_FEATURES_MASK)
218 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
219 			       features & ~VALID_FEATURES_MASK);
220 
221 	if (!mlx5_vdpa_debug)
222 		return;
223 
224 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
225 	if (!features)
226 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
227 
228 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
232 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
233 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
262 }
263 
264 static int create_tis(struct mlx5_vdpa_net *ndev)
265 {
266 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
267 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
268 	void *tisc;
269 	int err;
270 
271 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
272 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
273 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
274 	if (err)
275 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
276 
277 	return err;
278 }
279 
280 static void destroy_tis(struct mlx5_vdpa_net *ndev)
281 {
282 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
283 }
284 
285 #define MLX5_VDPA_CQE_SIZE 64
286 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
287 
288 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
289 {
290 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
291 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
292 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
293 	int err;
294 
295 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
296 				       ndev->mvdev.mdev->priv.numa_node);
297 	if (err)
298 		return err;
299 
300 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
301 
302 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
303 	buf->nent = nent;
304 
305 	return 0;
306 }
307 
308 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
309 {
310 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
311 
312 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
313 					ndev->mvdev.mdev->priv.numa_node);
314 }
315 
316 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
317 {
318 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
319 }
320 
321 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
322 {
323 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
324 }
325 
326 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
327 {
328 	struct mlx5_cqe64 *cqe64;
329 	void *cqe;
330 	int i;
331 
332 	for (i = 0; i < buf->nent; i++) {
333 		cqe = get_cqe(vcq, i);
334 		cqe64 = cqe;
335 		cqe64->op_own = MLX5_CQE_INVALID << 4;
336 	}
337 }
338 
339 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
340 {
341 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
342 
343 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
344 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
345 		return cqe64;
346 
347 	return NULL;
348 }
349 
350 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
351 {
352 	vqp->head += n;
353 	vqp->db.db[0] = cpu_to_be32(vqp->head);
354 }
355 
356 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
357 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
358 {
359 	struct mlx5_vdpa_qp *vqp;
360 	__be64 *pas;
361 	void *qpc;
362 
363 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
364 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
365 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
366 	if (vqp->fw) {
367 		/* Firmware QP is allocated by the driver for the firmware's
368 		 * use so we can skip part of the params as they will be chosen by firmware
369 		 */
370 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
371 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
372 		MLX5_SET(qpc, qpc, no_sq, 1);
373 		return;
374 	}
375 
376 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
377 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
378 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
379 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
380 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
381 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
382 	MLX5_SET(qpc, qpc, no_sq, 1);
383 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
384 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
385 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
386 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
387 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
388 }
389 
390 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
391 {
392 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
393 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
394 					ndev->mvdev.mdev->priv.numa_node);
395 }
396 
397 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
398 {
399 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
400 }
401 
402 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
403 		     struct mlx5_vdpa_qp *vqp)
404 {
405 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
406 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
407 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
408 	void *qpc;
409 	void *in;
410 	int err;
411 
412 	if (!vqp->fw) {
413 		vqp = &mvq->vqqp;
414 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
415 		if (err)
416 			return err;
417 
418 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
419 		if (err)
420 			goto err_db;
421 		inlen += vqp->frag_buf.npages * sizeof(__be64);
422 	}
423 
424 	in = kzalloc(inlen, GFP_KERNEL);
425 	if (!in) {
426 		err = -ENOMEM;
427 		goto err_kzalloc;
428 	}
429 
430 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
431 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
432 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
433 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
434 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
435 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
436 	if (!vqp->fw)
437 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
438 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
439 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
440 	kfree(in);
441 	if (err)
442 		goto err_kzalloc;
443 
444 	vqp->mqp.uid = ndev->mvdev.res.uid;
445 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
446 
447 	if (!vqp->fw)
448 		rx_post(vqp, mvq->num_ent);
449 
450 	return 0;
451 
452 err_kzalloc:
453 	if (!vqp->fw)
454 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
455 err_db:
456 	if (!vqp->fw)
457 		rq_buf_free(ndev, vqp);
458 
459 	return err;
460 }
461 
462 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
463 {
464 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
465 
466 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
467 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
468 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
469 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
470 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
471 	if (!vqp->fw) {
472 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
473 		rq_buf_free(ndev, vqp);
474 	}
475 }
476 
477 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
478 {
479 	return get_sw_cqe(cq, cq->mcq.cons_index);
480 }
481 
482 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
483 {
484 	struct mlx5_cqe64 *cqe64;
485 
486 	cqe64 = next_cqe_sw(vcq);
487 	if (!cqe64)
488 		return -EAGAIN;
489 
490 	vcq->mcq.cons_index++;
491 	return 0;
492 }
493 
494 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
495 {
496 	struct mlx5_vdpa_net *ndev = mvq->ndev;
497 	struct vdpa_callback *event_cb;
498 
499 	event_cb = &ndev->event_cbs[mvq->index];
500 	mlx5_cq_set_ci(&mvq->cq.mcq);
501 
502 	/* make sure CQ cosumer update is visible to the hardware before updating
503 	 * RX doorbell record.
504 	 */
505 	dma_wmb();
506 	rx_post(&mvq->vqqp, num);
507 	if (event_cb->callback)
508 		event_cb->callback(event_cb->private);
509 }
510 
511 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
512 {
513 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
514 	struct mlx5_vdpa_net *ndev = mvq->ndev;
515 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
516 	int num = 0;
517 
518 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
519 		num++;
520 		if (num > mvq->num_ent / 2) {
521 			/* If completions keep coming while we poll, we want to
522 			 * let the hardware know that we consumed them by
523 			 * updating the doorbell record.  We also let vdpa core
524 			 * know about this so it passes it on the virtio driver
525 			 * on the guest.
526 			 */
527 			mlx5_vdpa_handle_completions(mvq, num);
528 			num = 0;
529 		}
530 	}
531 
532 	if (num)
533 		mlx5_vdpa_handle_completions(mvq, num);
534 
535 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
536 }
537 
538 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
539 {
540 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
541 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
542 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
543 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
544 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
545 	__be64 *pas;
546 	int inlen;
547 	void *cqc;
548 	void *in;
549 	int err;
550 	int eqn;
551 
552 	err = mlx5_db_alloc(mdev, &vcq->db);
553 	if (err)
554 		return err;
555 
556 	vcq->mcq.set_ci_db = vcq->db.db;
557 	vcq->mcq.arm_db = vcq->db.db + 1;
558 	vcq->mcq.cqe_sz = 64;
559 
560 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
561 	if (err)
562 		goto err_db;
563 
564 	cq_frag_buf_init(vcq, &vcq->buf);
565 
566 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
567 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
568 	in = kzalloc(inlen, GFP_KERNEL);
569 	if (!in) {
570 		err = -ENOMEM;
571 		goto err_vzalloc;
572 	}
573 
574 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
575 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
576 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
577 
578 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
579 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
580 
581 	/* Use vector 0 by default. Consider adding code to choose least used
582 	 * vector.
583 	 */
584 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
585 	if (err)
586 		goto err_vec;
587 
588 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
589 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
590 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
591 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
592 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
593 
594 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
595 	if (err)
596 		goto err_vec;
597 
598 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
599 	vcq->cqe = num_ent;
600 	vcq->mcq.set_ci_db = vcq->db.db;
601 	vcq->mcq.arm_db = vcq->db.db + 1;
602 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
603 	kfree(in);
604 	return 0;
605 
606 err_vec:
607 	kfree(in);
608 err_vzalloc:
609 	cq_frag_buf_free(ndev, &vcq->buf);
610 err_db:
611 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
612 	return err;
613 }
614 
615 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
616 {
617 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
618 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
619 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
620 
621 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
622 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
623 		return;
624 	}
625 	cq_frag_buf_free(ndev, &vcq->buf);
626 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
627 }
628 
629 static int read_umem_params(struct mlx5_vdpa_net *ndev)
630 {
631 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
632 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
633 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
634 	int out_size;
635 	void *caps;
636 	void *out;
637 	int err;
638 
639 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
640 	out = kzalloc(out_size, GFP_KERNEL);
641 	if (!out)
642 		return -ENOMEM;
643 
644 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
645 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
646 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
647 	if (err) {
648 		mlx5_vdpa_warn(&ndev->mvdev,
649 			"Failed reading vdpa umem capabilities with err %d\n", err);
650 		goto out;
651 	}
652 
653 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
654 
655 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
656 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
657 
658 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
659 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
660 
661 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
662 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
663 
664 out:
665 	kfree(out);
666 	return 0;
667 }
668 
669 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
670 			  struct mlx5_vdpa_umem **umemp)
671 {
672 	u32 p_a;
673 	u32 p_b;
674 
675 	switch (num) {
676 	case 1:
677 		p_a = ndev->umem_1_buffer_param_a;
678 		p_b = ndev->umem_1_buffer_param_b;
679 		*umemp = &mvq->umem1;
680 		break;
681 	case 2:
682 		p_a = ndev->umem_2_buffer_param_a;
683 		p_b = ndev->umem_2_buffer_param_b;
684 		*umemp = &mvq->umem2;
685 		break;
686 	case 3:
687 		p_a = ndev->umem_3_buffer_param_a;
688 		p_b = ndev->umem_3_buffer_param_b;
689 		*umemp = &mvq->umem3;
690 		break;
691 	}
692 
693 	(*umemp)->size = p_a * mvq->num_ent + p_b;
694 }
695 
696 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
697 {
698 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
699 }
700 
701 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
702 {
703 	int inlen;
704 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
705 	void *um;
706 	void *in;
707 	int err;
708 	__be64 *pas;
709 	struct mlx5_vdpa_umem *umem;
710 
711 	set_umem_size(ndev, mvq, num, &umem);
712 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
713 	if (err)
714 		return err;
715 
716 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
717 
718 	in = kzalloc(inlen, GFP_KERNEL);
719 	if (!in) {
720 		err = -ENOMEM;
721 		goto err_in;
722 	}
723 
724 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
725 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
726 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
727 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
728 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
729 
730 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
731 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
732 
733 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
734 	if (err) {
735 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
736 		goto err_cmd;
737 	}
738 
739 	kfree(in);
740 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
741 
742 	return 0;
743 
744 err_cmd:
745 	kfree(in);
746 err_in:
747 	umem_frag_buf_free(ndev, umem);
748 	return err;
749 }
750 
751 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
752 {
753 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
754 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
755 	struct mlx5_vdpa_umem *umem;
756 
757 	switch (num) {
758 	case 1:
759 		umem = &mvq->umem1;
760 		break;
761 	case 2:
762 		umem = &mvq->umem2;
763 		break;
764 	case 3:
765 		umem = &mvq->umem3;
766 		break;
767 	}
768 
769 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
770 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
771 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
772 		return;
773 
774 	umem_frag_buf_free(ndev, umem);
775 }
776 
777 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
778 {
779 	int num;
780 	int err;
781 
782 	for (num = 1; num <= 3; num++) {
783 		err = create_umem(ndev, mvq, num);
784 		if (err)
785 			goto err_umem;
786 	}
787 	return 0;
788 
789 err_umem:
790 	for (num--; num > 0; num--)
791 		umem_destroy(ndev, mvq, num);
792 
793 	return err;
794 }
795 
796 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
797 {
798 	int num;
799 
800 	for (num = 3; num > 0; num--)
801 		umem_destroy(ndev, mvq, num);
802 }
803 
804 static int get_queue_type(struct mlx5_vdpa_net *ndev)
805 {
806 	u32 type_mask;
807 
808 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
809 
810 	/* prefer split queue */
811 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
812 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
813 
814 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
815 
816 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
817 }
818 
819 static bool vq_is_tx(u16 idx)
820 {
821 	return idx % 2;
822 }
823 
824 enum {
825 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
826 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
827 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
828 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
829 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
830 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
831 	MLX5_VIRTIO_NET_F_CSUM = 10,
832 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
833 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
834 };
835 
836 static u16 get_features(u64 features)
837 {
838 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
839 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
840 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
841 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
842 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
843 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
844 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
845 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
846 }
847 
848 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
849 {
850 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
851 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
852 }
853 
854 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
855 {
856 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
857 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
858 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
859 }
860 
861 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
862 {
863 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
864 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
865 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
866 	struct mlx5_vdpa_mr *vq_mr;
867 	struct mlx5_vdpa_mr *vq_desc_mr;
868 	void *obj_context;
869 	u16 mlx_features;
870 	void *cmd_hdr;
871 	void *vq_ctx;
872 	void *in;
873 	int err;
874 
875 	err = umems_create(ndev, mvq);
876 	if (err)
877 		return err;
878 
879 	in = kzalloc(inlen, GFP_KERNEL);
880 	if (!in) {
881 		err = -ENOMEM;
882 		goto err_alloc;
883 	}
884 
885 	mlx_features = get_features(ndev->mvdev.actual_features);
886 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
887 
888 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
889 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
890 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
891 
892 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
893 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
894 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
895 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
896 		 mlx_features >> 3);
897 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
898 		 mlx_features & 7);
899 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
900 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
901 
902 	if (vq_is_tx(mvq->index))
903 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
904 
905 	if (mvq->map.virq) {
906 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
907 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
908 	} else {
909 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
910 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
911 	}
912 
913 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
914 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
915 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
916 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
917 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
918 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
919 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
920 	vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
921 	if (vq_mr)
922 		MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
923 
924 	vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
925 	if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
926 		MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
927 
928 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
929 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
930 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
931 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
932 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
933 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
934 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
935 	if (counters_supported(&ndev->mvdev))
936 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
937 
938 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
939 	if (err)
940 		goto err_cmd;
941 
942 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
943 	kfree(in);
944 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
945 
946 	return 0;
947 
948 err_cmd:
949 	kfree(in);
950 err_alloc:
951 	umems_destroy(ndev, mvq);
952 	return err;
953 }
954 
955 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
956 {
957 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
958 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
959 
960 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
961 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
962 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
963 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
964 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
965 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
966 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
967 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
968 		return;
969 	}
970 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
971 	umems_destroy(ndev, mvq);
972 }
973 
974 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
975 {
976 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
977 }
978 
979 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
980 {
981 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
982 }
983 
984 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
985 			int *outlen, u32 qpn, u32 rqpn)
986 {
987 	void *qpc;
988 	void *pp;
989 
990 	switch (cmd) {
991 	case MLX5_CMD_OP_2RST_QP:
992 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
993 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
994 		*in = kzalloc(*inlen, GFP_KERNEL);
995 		*out = kzalloc(*outlen, GFP_KERNEL);
996 		if (!*in || !*out)
997 			goto outerr;
998 
999 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1000 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1001 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1002 		break;
1003 	case MLX5_CMD_OP_RST2INIT_QP:
1004 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1005 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1006 		*in = kzalloc(*inlen, GFP_KERNEL);
1007 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1008 		if (!*in || !*out)
1009 			goto outerr;
1010 
1011 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1012 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1013 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1014 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1015 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1016 		MLX5_SET(qpc, qpc, rwe, 1);
1017 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1018 		MLX5_SET(ads, pp, vhca_port_num, 1);
1019 		break;
1020 	case MLX5_CMD_OP_INIT2RTR_QP:
1021 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1022 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1023 		*in = kzalloc(*inlen, GFP_KERNEL);
1024 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1025 		if (!*in || !*out)
1026 			goto outerr;
1027 
1028 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1029 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1030 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1031 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1032 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1033 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1034 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1035 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1036 		MLX5_SET(ads, pp, fl, 1);
1037 		break;
1038 	case MLX5_CMD_OP_RTR2RTS_QP:
1039 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1040 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1041 		*in = kzalloc(*inlen, GFP_KERNEL);
1042 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1043 		if (!*in || !*out)
1044 			goto outerr;
1045 
1046 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1047 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1048 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1049 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1050 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1051 		MLX5_SET(ads, pp, ack_timeout, 14);
1052 		MLX5_SET(qpc, qpc, retry_count, 7);
1053 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1054 		break;
1055 	default:
1056 		goto outerr_nullify;
1057 	}
1058 
1059 	return;
1060 
1061 outerr:
1062 	kfree(*in);
1063 	kfree(*out);
1064 outerr_nullify:
1065 	*in = NULL;
1066 	*out = NULL;
1067 }
1068 
1069 static void free_inout(void *in, void *out)
1070 {
1071 	kfree(in);
1072 	kfree(out);
1073 }
1074 
1075 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1076  * firmware. The fw argument indicates whether the subjected QP is the one used
1077  * by firmware.
1078  */
1079 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1080 {
1081 	int outlen;
1082 	int inlen;
1083 	void *out;
1084 	void *in;
1085 	int err;
1086 
1087 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1088 	if (!in || !out)
1089 		return -ENOMEM;
1090 
1091 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1092 	free_inout(in, out);
1093 	return err;
1094 }
1095 
1096 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1097 {
1098 	int err;
1099 
1100 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1101 	if (err)
1102 		return err;
1103 
1104 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1105 	if (err)
1106 		return err;
1107 
1108 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1109 	if (err)
1110 		return err;
1111 
1112 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1113 	if (err)
1114 		return err;
1115 
1116 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1117 	if (err)
1118 		return err;
1119 
1120 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1121 	if (err)
1122 		return err;
1123 
1124 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1125 }
1126 
1127 struct mlx5_virtq_attr {
1128 	u8 state;
1129 	u16 available_index;
1130 	u16 used_index;
1131 };
1132 
1133 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1134 			   struct mlx5_virtq_attr *attr)
1135 {
1136 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1137 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1138 	void *out;
1139 	void *obj_context;
1140 	void *cmd_hdr;
1141 	int err;
1142 
1143 	out = kzalloc(outlen, GFP_KERNEL);
1144 	if (!out)
1145 		return -ENOMEM;
1146 
1147 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1148 
1149 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1150 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1151 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1152 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1153 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1154 	if (err)
1155 		goto err_cmd;
1156 
1157 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1158 	memset(attr, 0, sizeof(*attr));
1159 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1160 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1161 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1162 	kfree(out);
1163 	return 0;
1164 
1165 err_cmd:
1166 	kfree(out);
1167 	return err;
1168 }
1169 
1170 static bool is_valid_state_change(int oldstate, int newstate)
1171 {
1172 	switch (oldstate) {
1173 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1174 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1175 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1176 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1177 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1178 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1179 	default:
1180 		return false;
1181 	}
1182 }
1183 
1184 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1185 {
1186 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1187 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1188 	void *obj_context;
1189 	void *cmd_hdr;
1190 	void *in;
1191 	int err;
1192 
1193 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1194 		return 0;
1195 
1196 	if (!is_valid_state_change(mvq->fw_state, state))
1197 		return -EINVAL;
1198 
1199 	in = kzalloc(inlen, GFP_KERNEL);
1200 	if (!in)
1201 		return -ENOMEM;
1202 
1203 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1204 
1205 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1206 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1207 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1208 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1209 
1210 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1211 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1212 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1213 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1214 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1215 	kfree(in);
1216 	if (!err)
1217 		mvq->fw_state = state;
1218 
1219 	return err;
1220 }
1221 
1222 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1223 {
1224 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1225 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1226 	void *cmd_hdr;
1227 	int err;
1228 
1229 	if (!counters_supported(&ndev->mvdev))
1230 		return 0;
1231 
1232 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1233 
1234 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1235 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1236 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1237 
1238 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1239 	if (err)
1240 		return err;
1241 
1242 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1243 
1244 	return 0;
1245 }
1246 
1247 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1248 {
1249 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1250 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1251 
1252 	if (!counters_supported(&ndev->mvdev))
1253 		return;
1254 
1255 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1256 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1257 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1258 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1259 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1260 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1261 }
1262 
1263 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1264 {
1265 	struct vdpa_callback *cb = priv;
1266 
1267 	if (cb->callback)
1268 		return cb->callback(cb->private);
1269 
1270 	return IRQ_HANDLED;
1271 }
1272 
1273 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1274 			 struct mlx5_vdpa_virtqueue *mvq)
1275 {
1276 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1277 	struct mlx5_vdpa_irq_pool_entry *ent;
1278 	int err;
1279 	int i;
1280 
1281 	for (i = 0; i < irqp->num_ent; i++) {
1282 		ent = &irqp->entries[i];
1283 		if (!ent->used) {
1284 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1285 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1286 			ent->dev_id = &ndev->event_cbs[mvq->index];
1287 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1288 					  ent->name, ent->dev_id);
1289 			if (err)
1290 				return;
1291 
1292 			ent->used = true;
1293 			mvq->map = ent->map;
1294 			return;
1295 		}
1296 	}
1297 }
1298 
1299 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1300 			   struct mlx5_vdpa_virtqueue *mvq)
1301 {
1302 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1303 	int i;
1304 
1305 	for (i = 0; i < irqp->num_ent; i++)
1306 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1307 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1308 			irqp->entries[i].used = false;
1309 			return;
1310 		}
1311 }
1312 
1313 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1314 {
1315 	u16 idx = mvq->index;
1316 	int err;
1317 
1318 	if (!mvq->num_ent)
1319 		return 0;
1320 
1321 	if (mvq->initialized)
1322 		return 0;
1323 
1324 	err = cq_create(ndev, idx, mvq->num_ent);
1325 	if (err)
1326 		return err;
1327 
1328 	err = qp_create(ndev, mvq, &mvq->fwqp);
1329 	if (err)
1330 		goto err_fwqp;
1331 
1332 	err = qp_create(ndev, mvq, &mvq->vqqp);
1333 	if (err)
1334 		goto err_vqqp;
1335 
1336 	err = connect_qps(ndev, mvq);
1337 	if (err)
1338 		goto err_connect;
1339 
1340 	err = counter_set_alloc(ndev, mvq);
1341 	if (err)
1342 		goto err_connect;
1343 
1344 	alloc_vector(ndev, mvq);
1345 	err = create_virtqueue(ndev, mvq);
1346 	if (err)
1347 		goto err_vq;
1348 
1349 	if (mvq->ready) {
1350 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1351 		if (err) {
1352 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1353 				       idx, err);
1354 			goto err_modify;
1355 		}
1356 	}
1357 
1358 	mvq->initialized = true;
1359 	return 0;
1360 
1361 err_modify:
1362 	destroy_virtqueue(ndev, mvq);
1363 err_vq:
1364 	dealloc_vector(ndev, mvq);
1365 	counter_set_dealloc(ndev, mvq);
1366 err_connect:
1367 	qp_destroy(ndev, &mvq->vqqp);
1368 err_vqqp:
1369 	qp_destroy(ndev, &mvq->fwqp);
1370 err_fwqp:
1371 	cq_destroy(ndev, idx);
1372 	return err;
1373 }
1374 
1375 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1376 {
1377 	struct mlx5_virtq_attr attr;
1378 
1379 	if (!mvq->initialized)
1380 		return;
1381 
1382 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1383 		return;
1384 
1385 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1386 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1387 
1388 	if (query_virtqueue(ndev, mvq, &attr)) {
1389 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1390 		return;
1391 	}
1392 	mvq->avail_idx = attr.available_index;
1393 	mvq->used_idx = attr.used_index;
1394 }
1395 
1396 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1397 {
1398 	int i;
1399 
1400 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1401 		suspend_vq(ndev, &ndev->vqs[i]);
1402 }
1403 
1404 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1405 {
1406 	if (!mvq->initialized)
1407 		return;
1408 
1409 	suspend_vq(ndev, mvq);
1410 	destroy_virtqueue(ndev, mvq);
1411 	dealloc_vector(ndev, mvq);
1412 	counter_set_dealloc(ndev, mvq);
1413 	qp_destroy(ndev, &mvq->vqqp);
1414 	qp_destroy(ndev, &mvq->fwqp);
1415 	cq_destroy(ndev, mvq->index);
1416 	mvq->initialized = false;
1417 }
1418 
1419 static int create_rqt(struct mlx5_vdpa_net *ndev)
1420 {
1421 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1422 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1423 	__be32 *list;
1424 	void *rqtc;
1425 	int inlen;
1426 	void *in;
1427 	int i, j;
1428 	int err;
1429 
1430 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1431 	in = kzalloc(inlen, GFP_KERNEL);
1432 	if (!in)
1433 		return -ENOMEM;
1434 
1435 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1436 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1437 
1438 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1439 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1440 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1441 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1442 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1443 
1444 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1445 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1446 	kfree(in);
1447 	if (err)
1448 		return err;
1449 
1450 	return 0;
1451 }
1452 
1453 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1454 
1455 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1456 {
1457 	int act_sz = roundup_pow_of_two(num / 2);
1458 	__be32 *list;
1459 	void *rqtc;
1460 	int inlen;
1461 	void *in;
1462 	int i, j;
1463 	int err;
1464 
1465 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1466 	in = kzalloc(inlen, GFP_KERNEL);
1467 	if (!in)
1468 		return -ENOMEM;
1469 
1470 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1471 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1472 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1473 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1474 
1475 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1476 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1477 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1478 
1479 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1480 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1481 	kfree(in);
1482 	if (err)
1483 		return err;
1484 
1485 	return 0;
1486 }
1487 
1488 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1489 {
1490 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1491 }
1492 
1493 static int create_tir(struct mlx5_vdpa_net *ndev)
1494 {
1495 #define HASH_IP_L4PORTS                                                                            \
1496 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1497 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1498 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1499 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1500 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1501 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1502 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1503 	void *rss_key;
1504 	void *outer;
1505 	void *tirc;
1506 	void *in;
1507 	int err;
1508 
1509 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1510 	if (!in)
1511 		return -ENOMEM;
1512 
1513 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1514 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1515 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1516 
1517 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1518 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1519 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1520 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1521 
1522 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1523 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1524 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1525 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1526 
1527 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1528 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1529 
1530 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1531 	kfree(in);
1532 	if (err)
1533 		return err;
1534 
1535 	mlx5_vdpa_add_tirn(ndev);
1536 	return err;
1537 }
1538 
1539 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1540 {
1541 	mlx5_vdpa_remove_tirn(ndev);
1542 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1543 }
1544 
1545 #define MAX_STEERING_ENT 0x8000
1546 #define MAX_STEERING_GROUPS 2
1547 
1548 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1549        #define NUM_DESTS 2
1550 #else
1551        #define NUM_DESTS 1
1552 #endif
1553 
1554 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1555 				 struct macvlan_node *node,
1556 				 struct mlx5_flow_act *flow_act,
1557 				 struct mlx5_flow_destination *dests)
1558 {
1559 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1560 	int err;
1561 
1562 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1563 	if (IS_ERR(node->ucast_counter.counter))
1564 		return PTR_ERR(node->ucast_counter.counter);
1565 
1566 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1567 	if (IS_ERR(node->mcast_counter.counter)) {
1568 		err = PTR_ERR(node->mcast_counter.counter);
1569 		goto err_mcast_counter;
1570 	}
1571 
1572 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1573 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1574 	return 0;
1575 
1576 err_mcast_counter:
1577 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1578 	return err;
1579 #else
1580 	return 0;
1581 #endif
1582 }
1583 
1584 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1585 				     struct macvlan_node *node)
1586 {
1587 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1588 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1589 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1590 #endif
1591 }
1592 
1593 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1594 					struct macvlan_node *node)
1595 {
1596 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1597 	struct mlx5_flow_act flow_act = {};
1598 	struct mlx5_flow_spec *spec;
1599 	void *headers_c;
1600 	void *headers_v;
1601 	u8 *dmac_c;
1602 	u8 *dmac_v;
1603 	int err;
1604 	u16 vid;
1605 
1606 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1607 	if (!spec)
1608 		return -ENOMEM;
1609 
1610 	vid = key2vid(node->macvlan);
1611 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1612 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1613 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1614 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1615 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1616 	eth_broadcast_addr(dmac_c);
1617 	ether_addr_copy(dmac_v, mac);
1618 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1619 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1620 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1621 	}
1622 	if (node->tagged) {
1623 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1624 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1625 	}
1626 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1627 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1628 	dests[0].tir_num = ndev->res.tirn;
1629 	err = add_steering_counters(ndev, node, &flow_act, dests);
1630 	if (err)
1631 		goto out_free;
1632 
1633 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1634 	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1635 #endif
1636 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1637 	if (IS_ERR(node->ucast_rule)) {
1638 		err = PTR_ERR(node->ucast_rule);
1639 		goto err_ucast;
1640 	}
1641 
1642 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1643 	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1644 #endif
1645 
1646 	memset(dmac_c, 0, ETH_ALEN);
1647 	memset(dmac_v, 0, ETH_ALEN);
1648 	dmac_c[0] = 1;
1649 	dmac_v[0] = 1;
1650 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1651 	if (IS_ERR(node->mcast_rule)) {
1652 		err = PTR_ERR(node->mcast_rule);
1653 		goto err_mcast;
1654 	}
1655 	kvfree(spec);
1656 	mlx5_vdpa_add_rx_counters(ndev, node);
1657 	return 0;
1658 
1659 err_mcast:
1660 	mlx5_del_flow_rules(node->ucast_rule);
1661 err_ucast:
1662 	remove_steering_counters(ndev, node);
1663 out_free:
1664 	kvfree(spec);
1665 	return err;
1666 }
1667 
1668 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1669 					 struct macvlan_node *node)
1670 {
1671 	mlx5_vdpa_remove_rx_counters(ndev, node);
1672 	mlx5_del_flow_rules(node->ucast_rule);
1673 	mlx5_del_flow_rules(node->mcast_rule);
1674 }
1675 
1676 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1677 {
1678 	u64 val;
1679 
1680 	if (!tagged)
1681 		vlan = MLX5V_UNTAGGED;
1682 
1683 	val = (u64)vlan << 48 |
1684 	      (u64)mac[0] << 40 |
1685 	      (u64)mac[1] << 32 |
1686 	      (u64)mac[2] << 24 |
1687 	      (u64)mac[3] << 16 |
1688 	      (u64)mac[4] << 8 |
1689 	      (u64)mac[5];
1690 
1691 	return val;
1692 }
1693 
1694 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1695 {
1696 	struct macvlan_node *pos;
1697 	u32 idx;
1698 
1699 	idx = hash_64(value, 8); // tbd 8
1700 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1701 		if (pos->macvlan == value)
1702 			return pos;
1703 	}
1704 	return NULL;
1705 }
1706 
1707 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1708 {
1709 	struct macvlan_node *ptr;
1710 	u64 val;
1711 	u32 idx;
1712 	int err;
1713 
1714 	val = search_val(mac, vid, tagged);
1715 	if (mac_vlan_lookup(ndev, val))
1716 		return -EEXIST;
1717 
1718 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1719 	if (!ptr)
1720 		return -ENOMEM;
1721 
1722 	ptr->tagged = tagged;
1723 	ptr->macvlan = val;
1724 	ptr->ndev = ndev;
1725 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1726 	if (err)
1727 		goto err_add;
1728 
1729 	idx = hash_64(val, 8);
1730 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1731 	return 0;
1732 
1733 err_add:
1734 	kfree(ptr);
1735 	return err;
1736 }
1737 
1738 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1739 {
1740 	struct macvlan_node *ptr;
1741 
1742 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1743 	if (!ptr)
1744 		return;
1745 
1746 	hlist_del(&ptr->hlist);
1747 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1748 	remove_steering_counters(ndev, ptr);
1749 	kfree(ptr);
1750 }
1751 
1752 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1753 {
1754 	struct macvlan_node *pos;
1755 	struct hlist_node *n;
1756 	int i;
1757 
1758 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1759 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1760 			hlist_del(&pos->hlist);
1761 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1762 			remove_steering_counters(ndev, pos);
1763 			kfree(pos);
1764 		}
1765 	}
1766 }
1767 
1768 static int setup_steering(struct mlx5_vdpa_net *ndev)
1769 {
1770 	struct mlx5_flow_table_attr ft_attr = {};
1771 	struct mlx5_flow_namespace *ns;
1772 	int err;
1773 
1774 	ft_attr.max_fte = MAX_STEERING_ENT;
1775 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1776 
1777 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1778 	if (!ns) {
1779 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1780 		return -EOPNOTSUPP;
1781 	}
1782 
1783 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1784 	if (IS_ERR(ndev->rxft)) {
1785 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1786 		return PTR_ERR(ndev->rxft);
1787 	}
1788 	mlx5_vdpa_add_rx_flow_table(ndev);
1789 
1790 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1791 	if (err)
1792 		goto err_add;
1793 
1794 	return 0;
1795 
1796 err_add:
1797 	mlx5_vdpa_remove_rx_flow_table(ndev);
1798 	mlx5_destroy_flow_table(ndev->rxft);
1799 	return err;
1800 }
1801 
1802 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1803 {
1804 	clear_mac_vlan_table(ndev);
1805 	mlx5_vdpa_remove_rx_flow_table(ndev);
1806 	mlx5_destroy_flow_table(ndev->rxft);
1807 }
1808 
1809 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1810 {
1811 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1812 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1813 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1814 	struct mlx5_core_dev *pfmdev;
1815 	size_t read;
1816 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1817 
1818 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1819 	switch (cmd) {
1820 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1821 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1822 		if (read != ETH_ALEN)
1823 			break;
1824 
1825 		if (!memcmp(ndev->config.mac, mac, 6)) {
1826 			status = VIRTIO_NET_OK;
1827 			break;
1828 		}
1829 
1830 		if (is_zero_ether_addr(mac))
1831 			break;
1832 
1833 		if (!is_zero_ether_addr(ndev->config.mac)) {
1834 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1835 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1836 					       ndev->config.mac);
1837 				break;
1838 			}
1839 		}
1840 
1841 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1842 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1843 				       mac);
1844 			break;
1845 		}
1846 
1847 		/* backup the original mac address so that if failed to add the forward rules
1848 		 * we could restore it
1849 		 */
1850 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1851 
1852 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1853 
1854 		/* Need recreate the flow table entry, so that the packet could forward back
1855 		 */
1856 		mac_vlan_del(ndev, mac_back, 0, false);
1857 
1858 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1859 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1860 
1861 			/* Although it hardly run here, we still need double check */
1862 			if (is_zero_ether_addr(mac_back)) {
1863 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1864 				break;
1865 			}
1866 
1867 			/* Try to restore original mac address to MFPS table, and try to restore
1868 			 * the forward rule entry.
1869 			 */
1870 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1871 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1872 					       ndev->config.mac);
1873 			}
1874 
1875 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1876 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1877 					       mac_back);
1878 			}
1879 
1880 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1881 
1882 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1883 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1884 
1885 			break;
1886 		}
1887 
1888 		status = VIRTIO_NET_OK;
1889 		break;
1890 
1891 	default:
1892 		break;
1893 	}
1894 
1895 	return status;
1896 }
1897 
1898 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1899 {
1900 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1901 	int cur_qps = ndev->cur_num_vqs / 2;
1902 	int err;
1903 	int i;
1904 
1905 	if (cur_qps > newqps) {
1906 		err = modify_rqt(ndev, 2 * newqps);
1907 		if (err)
1908 			return err;
1909 
1910 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1911 			teardown_vq(ndev, &ndev->vqs[i]);
1912 
1913 		ndev->cur_num_vqs = 2 * newqps;
1914 	} else {
1915 		ndev->cur_num_vqs = 2 * newqps;
1916 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1917 			err = setup_vq(ndev, &ndev->vqs[i]);
1918 			if (err)
1919 				goto clean_added;
1920 		}
1921 		err = modify_rqt(ndev, 2 * newqps);
1922 		if (err)
1923 			goto clean_added;
1924 	}
1925 	return 0;
1926 
1927 clean_added:
1928 	for (--i; i >= 2 * cur_qps; --i)
1929 		teardown_vq(ndev, &ndev->vqs[i]);
1930 
1931 	ndev->cur_num_vqs = 2 * cur_qps;
1932 
1933 	return err;
1934 }
1935 
1936 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1937 {
1938 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1939 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1940 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1941 	struct virtio_net_ctrl_mq mq;
1942 	size_t read;
1943 	u16 newqps;
1944 
1945 	switch (cmd) {
1946 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1947 		/* This mq feature check aligns with pre-existing userspace
1948 		 * implementation.
1949 		 *
1950 		 * Without it, an untrusted driver could fake a multiqueue config
1951 		 * request down to a non-mq device that may cause kernel to
1952 		 * panic due to uninitialized resources for extra vqs. Even with
1953 		 * a well behaving guest driver, it is not expected to allow
1954 		 * changing the number of vqs on a non-mq device.
1955 		 */
1956 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1957 			break;
1958 
1959 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1960 		if (read != sizeof(mq))
1961 			break;
1962 
1963 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1964 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1965 		    newqps > ndev->rqt_size)
1966 			break;
1967 
1968 		if (ndev->cur_num_vqs == 2 * newqps) {
1969 			status = VIRTIO_NET_OK;
1970 			break;
1971 		}
1972 
1973 		if (!change_num_qps(mvdev, newqps))
1974 			status = VIRTIO_NET_OK;
1975 
1976 		break;
1977 	default:
1978 		break;
1979 	}
1980 
1981 	return status;
1982 }
1983 
1984 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1985 {
1986 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1987 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1988 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1989 	__virtio16 vlan;
1990 	size_t read;
1991 	u16 id;
1992 
1993 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1994 		return status;
1995 
1996 	switch (cmd) {
1997 	case VIRTIO_NET_CTRL_VLAN_ADD:
1998 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1999 		if (read != sizeof(vlan))
2000 			break;
2001 
2002 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2003 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2004 			break;
2005 
2006 		status = VIRTIO_NET_OK;
2007 		break;
2008 	case VIRTIO_NET_CTRL_VLAN_DEL:
2009 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2010 		if (read != sizeof(vlan))
2011 			break;
2012 
2013 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2014 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2015 		status = VIRTIO_NET_OK;
2016 		break;
2017 	default:
2018 		break;
2019 	}
2020 
2021 	return status;
2022 }
2023 
2024 static void mlx5_cvq_kick_handler(struct work_struct *work)
2025 {
2026 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2027 	struct virtio_net_ctrl_hdr ctrl;
2028 	struct mlx5_vdpa_wq_ent *wqent;
2029 	struct mlx5_vdpa_dev *mvdev;
2030 	struct mlx5_control_vq *cvq;
2031 	struct mlx5_vdpa_net *ndev;
2032 	size_t read, write;
2033 	int err;
2034 
2035 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2036 	mvdev = wqent->mvdev;
2037 	ndev = to_mlx5_vdpa_ndev(mvdev);
2038 	cvq = &mvdev->cvq;
2039 
2040 	down_write(&ndev->reslock);
2041 
2042 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2043 		goto out;
2044 
2045 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2046 		goto out;
2047 
2048 	if (!cvq->ready)
2049 		goto out;
2050 
2051 	while (true) {
2052 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2053 					   GFP_ATOMIC);
2054 		if (err <= 0)
2055 			break;
2056 
2057 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2058 		if (read != sizeof(ctrl))
2059 			break;
2060 
2061 		cvq->received_desc++;
2062 		switch (ctrl.class) {
2063 		case VIRTIO_NET_CTRL_MAC:
2064 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2065 			break;
2066 		case VIRTIO_NET_CTRL_MQ:
2067 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2068 			break;
2069 		case VIRTIO_NET_CTRL_VLAN:
2070 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2071 			break;
2072 		default:
2073 			break;
2074 		}
2075 
2076 		/* Make sure data is written before advancing index */
2077 		smp_wmb();
2078 
2079 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2080 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2081 		vringh_kiov_cleanup(&cvq->riov);
2082 		vringh_kiov_cleanup(&cvq->wiov);
2083 
2084 		if (vringh_need_notify_iotlb(&cvq->vring))
2085 			vringh_notify(&cvq->vring);
2086 
2087 		cvq->completed_desc++;
2088 		queue_work(mvdev->wq, &wqent->work);
2089 		break;
2090 	}
2091 
2092 out:
2093 	up_write(&ndev->reslock);
2094 }
2095 
2096 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2097 {
2098 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2099 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2100 	struct mlx5_vdpa_virtqueue *mvq;
2101 
2102 	if (!is_index_valid(mvdev, idx))
2103 		return;
2104 
2105 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2106 		if (!mvdev->wq || !mvdev->cvq.ready)
2107 			return;
2108 
2109 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2110 		return;
2111 	}
2112 
2113 	mvq = &ndev->vqs[idx];
2114 	if (unlikely(!mvq->ready))
2115 		return;
2116 
2117 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2118 }
2119 
2120 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2121 				    u64 driver_area, u64 device_area)
2122 {
2123 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2124 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2125 	struct mlx5_vdpa_virtqueue *mvq;
2126 
2127 	if (!is_index_valid(mvdev, idx))
2128 		return -EINVAL;
2129 
2130 	if (is_ctrl_vq_idx(mvdev, idx)) {
2131 		mvdev->cvq.desc_addr = desc_area;
2132 		mvdev->cvq.device_addr = device_area;
2133 		mvdev->cvq.driver_addr = driver_area;
2134 		return 0;
2135 	}
2136 
2137 	mvq = &ndev->vqs[idx];
2138 	mvq->desc_addr = desc_area;
2139 	mvq->device_addr = device_area;
2140 	mvq->driver_addr = driver_area;
2141 	return 0;
2142 }
2143 
2144 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2145 {
2146 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2147 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2148 	struct mlx5_vdpa_virtqueue *mvq;
2149 
2150 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2151 		return;
2152 
2153 	mvq = &ndev->vqs[idx];
2154 	mvq->num_ent = num;
2155 }
2156 
2157 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2158 {
2159 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2160 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2161 
2162 	ndev->event_cbs[idx] = *cb;
2163 	if (is_ctrl_vq_idx(mvdev, idx))
2164 		mvdev->cvq.event_cb = *cb;
2165 }
2166 
2167 static void mlx5_cvq_notify(struct vringh *vring)
2168 {
2169 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2170 
2171 	if (!cvq->event_cb.callback)
2172 		return;
2173 
2174 	cvq->event_cb.callback(cvq->event_cb.private);
2175 }
2176 
2177 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2178 {
2179 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2180 
2181 	cvq->ready = ready;
2182 	if (!ready)
2183 		return;
2184 
2185 	cvq->vring.notify = mlx5_cvq_notify;
2186 }
2187 
2188 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2189 {
2190 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2191 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2192 	struct mlx5_vdpa_virtqueue *mvq;
2193 	int err;
2194 
2195 	if (!mvdev->actual_features)
2196 		return;
2197 
2198 	if (!is_index_valid(mvdev, idx))
2199 		return;
2200 
2201 	if (is_ctrl_vq_idx(mvdev, idx)) {
2202 		set_cvq_ready(mvdev, ready);
2203 		return;
2204 	}
2205 
2206 	mvq = &ndev->vqs[idx];
2207 	if (!ready) {
2208 		suspend_vq(ndev, mvq);
2209 	} else {
2210 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2211 		if (err) {
2212 			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2213 			ready = false;
2214 		}
2215 	}
2216 
2217 
2218 	mvq->ready = ready;
2219 }
2220 
2221 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2222 {
2223 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2224 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2225 
2226 	if (!is_index_valid(mvdev, idx))
2227 		return false;
2228 
2229 	if (is_ctrl_vq_idx(mvdev, idx))
2230 		return mvdev->cvq.ready;
2231 
2232 	return ndev->vqs[idx].ready;
2233 }
2234 
2235 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2236 				  const struct vdpa_vq_state *state)
2237 {
2238 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2239 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2240 	struct mlx5_vdpa_virtqueue *mvq;
2241 
2242 	if (!is_index_valid(mvdev, idx))
2243 		return -EINVAL;
2244 
2245 	if (is_ctrl_vq_idx(mvdev, idx)) {
2246 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2247 		return 0;
2248 	}
2249 
2250 	mvq = &ndev->vqs[idx];
2251 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2252 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2253 		return -EINVAL;
2254 	}
2255 
2256 	mvq->used_idx = state->split.avail_index;
2257 	mvq->avail_idx = state->split.avail_index;
2258 	return 0;
2259 }
2260 
2261 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2262 {
2263 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2264 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2265 	struct mlx5_vdpa_virtqueue *mvq;
2266 	struct mlx5_virtq_attr attr;
2267 	int err;
2268 
2269 	if (!is_index_valid(mvdev, idx))
2270 		return -EINVAL;
2271 
2272 	if (is_ctrl_vq_idx(mvdev, idx)) {
2273 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2274 		return 0;
2275 	}
2276 
2277 	mvq = &ndev->vqs[idx];
2278 	/* If the virtq object was destroyed, use the value saved at
2279 	 * the last minute of suspend_vq. This caters for userspace
2280 	 * that cares about emulating the index after vq is stopped.
2281 	 */
2282 	if (!mvq->initialized) {
2283 		/* Firmware returns a wrong value for the available index.
2284 		 * Since both values should be identical, we take the value of
2285 		 * used_idx which is reported correctly.
2286 		 */
2287 		state->split.avail_index = mvq->used_idx;
2288 		return 0;
2289 	}
2290 
2291 	err = query_virtqueue(ndev, mvq, &attr);
2292 	if (err) {
2293 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2294 		return err;
2295 	}
2296 	state->split.avail_index = attr.used_index;
2297 	return 0;
2298 }
2299 
2300 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2301 {
2302 	return PAGE_SIZE;
2303 }
2304 
2305 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2306 {
2307 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2308 
2309 	if (is_ctrl_vq_idx(mvdev, idx))
2310 		return MLX5_VDPA_CVQ_GROUP;
2311 
2312 	return MLX5_VDPA_DATAVQ_GROUP;
2313 }
2314 
2315 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2316 {
2317 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2318 
2319 	if (is_ctrl_vq_idx(mvdev, idx))
2320 		return MLX5_VDPA_CVQ_GROUP;
2321 
2322 	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2323 }
2324 
2325 static u64 mlx_to_vritio_features(u16 dev_features)
2326 {
2327 	u64 result = 0;
2328 
2329 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2330 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2331 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2332 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2333 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2334 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2335 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2336 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2337 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2338 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2339 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2340 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2341 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2342 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2343 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2344 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2345 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2346 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2347 
2348 	return result;
2349 }
2350 
2351 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2352 {
2353 	u64 mlx_vdpa_features = 0;
2354 	u16 dev_features;
2355 
2356 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2357 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2358 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2359 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2360 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2361 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2362 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2363 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2364 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2365 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2366 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2367 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2368 
2369 	return mlx_vdpa_features;
2370 }
2371 
2372 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2373 {
2374 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2375 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2376 
2377 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2378 	return ndev->mvdev.mlx_features;
2379 }
2380 
2381 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2382 {
2383 	/* Minimum features to expect */
2384 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2385 		return -EOPNOTSUPP;
2386 
2387 	/* Double check features combination sent down by the driver.
2388 	 * Fail invalid features due to absence of the depended feature.
2389 	 *
2390 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2391 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2392 	 * By failing the invalid features sent down by untrusted drivers,
2393 	 * we're assured the assumption made upon is_index_valid() and
2394 	 * is_ctrl_vq_idx() will not be compromised.
2395 	 */
2396 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2397             BIT_ULL(VIRTIO_NET_F_MQ))
2398 		return -EINVAL;
2399 
2400 	return 0;
2401 }
2402 
2403 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2404 {
2405 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2406 	int err;
2407 	int i;
2408 
2409 	for (i = 0; i < mvdev->max_vqs; i++) {
2410 		err = setup_vq(ndev, &ndev->vqs[i]);
2411 		if (err)
2412 			goto err_vq;
2413 	}
2414 
2415 	return 0;
2416 
2417 err_vq:
2418 	for (--i; i >= 0; i--)
2419 		teardown_vq(ndev, &ndev->vqs[i]);
2420 
2421 	return err;
2422 }
2423 
2424 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2425 {
2426 	struct mlx5_vdpa_virtqueue *mvq;
2427 	int i;
2428 
2429 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2430 		mvq = &ndev->vqs[i];
2431 		if (!mvq->initialized)
2432 			continue;
2433 
2434 		teardown_vq(ndev, mvq);
2435 	}
2436 }
2437 
2438 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2439 {
2440 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2441 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2442 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2443 			mvdev->max_idx = mvdev->max_vqs;
2444 		} else {
2445 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2446 			 * CVQ gets index 2
2447 			 */
2448 			mvdev->max_idx = 2;
2449 		}
2450 	} else {
2451 		/* Two data virtqueues only: one for rx and one for tx */
2452 		mvdev->max_idx = 1;
2453 	}
2454 }
2455 
2456 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2457 {
2458 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2459 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2460 	int err;
2461 
2462 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2463 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2464 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2465 	if (vport)
2466 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2467 
2468 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2469 	if (err)
2470 		return 0;
2471 
2472 	return MLX5_GET(query_vport_state_out, out, state);
2473 }
2474 
2475 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2476 {
2477 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2478 	    VPORT_STATE_UP)
2479 		return true;
2480 
2481 	return false;
2482 }
2483 
2484 static void update_carrier(struct work_struct *work)
2485 {
2486 	struct mlx5_vdpa_wq_ent *wqent;
2487 	struct mlx5_vdpa_dev *mvdev;
2488 	struct mlx5_vdpa_net *ndev;
2489 
2490 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2491 	mvdev = wqent->mvdev;
2492 	ndev = to_mlx5_vdpa_ndev(mvdev);
2493 	if (get_link_state(mvdev))
2494 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2495 	else
2496 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2497 
2498 	if (ndev->config_cb.callback)
2499 		ndev->config_cb.callback(ndev->config_cb.private);
2500 
2501 	kfree(wqent);
2502 }
2503 
2504 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2505 {
2506 	struct mlx5_vdpa_wq_ent *wqent;
2507 
2508 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2509 	if (!wqent)
2510 		return -ENOMEM;
2511 
2512 	wqent->mvdev = &ndev->mvdev;
2513 	INIT_WORK(&wqent->work, update_carrier);
2514 	queue_work(ndev->mvdev.wq, &wqent->work);
2515 	return 0;
2516 }
2517 
2518 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2519 {
2520 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2521 	struct mlx5_eqe *eqe = param;
2522 	int ret = NOTIFY_DONE;
2523 
2524 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2525 		switch (eqe->sub_type) {
2526 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2527 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2528 			if (queue_link_work(ndev))
2529 				return NOTIFY_DONE;
2530 
2531 			ret = NOTIFY_OK;
2532 			break;
2533 		default:
2534 			return NOTIFY_DONE;
2535 		}
2536 		return ret;
2537 	}
2538 	return ret;
2539 }
2540 
2541 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2542 {
2543 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2544 		return;
2545 
2546 	ndev->nb.notifier_call = event_handler;
2547 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2548 	ndev->nb_registered = true;
2549 	queue_link_work(ndev);
2550 }
2551 
2552 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2553 {
2554 	if (!ndev->nb_registered)
2555 		return;
2556 
2557 	ndev->nb_registered = false;
2558 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2559 	if (ndev->mvdev.wq)
2560 		flush_workqueue(ndev->mvdev.wq);
2561 }
2562 
2563 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2564 {
2565 	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2566 }
2567 
2568 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2569 {
2570 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2571 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2572 	int err;
2573 
2574 	print_features(mvdev, features, true);
2575 
2576 	err = verify_driver_features(mvdev, features);
2577 	if (err)
2578 		return err;
2579 
2580 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2581 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2582 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2583 	else
2584 		ndev->rqt_size = 1;
2585 
2586 	/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2587 	 * 5.1.6.5.5 "Device operation in multiqueue mode":
2588 	 *
2589 	 * Multiqueue is disabled by default.
2590 	 * The driver enables multiqueue by sending a command using class
2591 	 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2592 	 * operation, as follows: ...
2593 	 */
2594 	ndev->cur_num_vqs = 2;
2595 
2596 	update_cvq_info(mvdev);
2597 	return err;
2598 }
2599 
2600 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2601 {
2602 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2603 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2604 
2605 	ndev->config_cb = *cb;
2606 }
2607 
2608 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2609 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2610 {
2611 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2612 }
2613 
2614 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2615 {
2616 	return VIRTIO_ID_NET;
2617 }
2618 
2619 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2620 {
2621 	return PCI_VENDOR_ID_MELLANOX;
2622 }
2623 
2624 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2625 {
2626 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2627 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2628 
2629 	print_status(mvdev, ndev->mvdev.status, false);
2630 	return ndev->mvdev.status;
2631 }
2632 
2633 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2634 {
2635 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2636 	struct mlx5_virtq_attr attr = {};
2637 	int err;
2638 
2639 	if (mvq->initialized) {
2640 		err = query_virtqueue(ndev, mvq, &attr);
2641 		if (err)
2642 			return err;
2643 	}
2644 
2645 	ri->avail_index = attr.available_index;
2646 	ri->used_index = attr.used_index;
2647 	ri->ready = mvq->ready;
2648 	ri->num_ent = mvq->num_ent;
2649 	ri->desc_addr = mvq->desc_addr;
2650 	ri->device_addr = mvq->device_addr;
2651 	ri->driver_addr = mvq->driver_addr;
2652 	ri->map = mvq->map;
2653 	ri->restore = true;
2654 	return 0;
2655 }
2656 
2657 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2658 {
2659 	int i;
2660 
2661 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2662 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2663 		save_channel_info(ndev, &ndev->vqs[i]);
2664 	}
2665 	return 0;
2666 }
2667 
2668 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2669 {
2670 	int i;
2671 
2672 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2673 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2674 }
2675 
2676 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2677 {
2678 	struct mlx5_vdpa_virtqueue *mvq;
2679 	struct mlx5_vq_restore_info *ri;
2680 	int i;
2681 
2682 	mlx5_clear_vqs(ndev);
2683 	init_mvqs(ndev);
2684 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2685 		mvq = &ndev->vqs[i];
2686 		ri = &mvq->ri;
2687 		if (!ri->restore)
2688 			continue;
2689 
2690 		mvq->avail_idx = ri->avail_index;
2691 		mvq->used_idx = ri->used_index;
2692 		mvq->ready = ri->ready;
2693 		mvq->num_ent = ri->num_ent;
2694 		mvq->desc_addr = ri->desc_addr;
2695 		mvq->device_addr = ri->device_addr;
2696 		mvq->driver_addr = ri->driver_addr;
2697 		mvq->map = ri->map;
2698 	}
2699 }
2700 
2701 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2702 				struct mlx5_vdpa_mr *new_mr,
2703 				unsigned int asid)
2704 {
2705 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2706 	int err;
2707 
2708 	suspend_vqs(ndev);
2709 	err = save_channels_info(ndev);
2710 	if (err)
2711 		return err;
2712 
2713 	teardown_driver(ndev);
2714 
2715 	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2716 
2717 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2718 		return 0;
2719 
2720 	restore_channels_info(ndev);
2721 	err = setup_driver(mvdev);
2722 	if (err)
2723 		return err;
2724 
2725 	return 0;
2726 }
2727 
2728 /* reslock must be held for this function */
2729 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2730 {
2731 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2732 	int err;
2733 
2734 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2735 
2736 	if (ndev->setup) {
2737 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2738 		err = 0;
2739 		goto out;
2740 	}
2741 	mlx5_vdpa_add_debugfs(ndev);
2742 
2743 	err = read_umem_params(ndev);
2744 	if (err)
2745 		goto err_setup;
2746 
2747 	err = setup_virtqueues(mvdev);
2748 	if (err) {
2749 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2750 		goto err_setup;
2751 	}
2752 
2753 	err = create_rqt(ndev);
2754 	if (err) {
2755 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2756 		goto err_rqt;
2757 	}
2758 
2759 	err = create_tir(ndev);
2760 	if (err) {
2761 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2762 		goto err_tir;
2763 	}
2764 
2765 	err = setup_steering(ndev);
2766 	if (err) {
2767 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2768 		goto err_fwd;
2769 	}
2770 	ndev->setup = true;
2771 
2772 	return 0;
2773 
2774 err_fwd:
2775 	destroy_tir(ndev);
2776 err_tir:
2777 	destroy_rqt(ndev);
2778 err_rqt:
2779 	teardown_virtqueues(ndev);
2780 err_setup:
2781 	mlx5_vdpa_remove_debugfs(ndev);
2782 out:
2783 	return err;
2784 }
2785 
2786 /* reslock must be held for this function */
2787 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2788 {
2789 
2790 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2791 
2792 	if (!ndev->setup)
2793 		return;
2794 
2795 	mlx5_vdpa_remove_debugfs(ndev);
2796 	teardown_steering(ndev);
2797 	destroy_tir(ndev);
2798 	destroy_rqt(ndev);
2799 	teardown_virtqueues(ndev);
2800 	ndev->setup = false;
2801 }
2802 
2803 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2804 {
2805 	int i;
2806 
2807 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2808 		ndev->vqs[i].ready = false;
2809 
2810 	ndev->mvdev.cvq.ready = false;
2811 }
2812 
2813 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2814 {
2815 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2816 	int err = 0;
2817 
2818 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2819 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2820 					MLX5_CVQ_MAX_ENT, false,
2821 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2822 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2823 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2824 
2825 	return err;
2826 }
2827 
2828 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2829 {
2830 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2831 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2832 	int err;
2833 
2834 	print_status(mvdev, status, true);
2835 
2836 	down_write(&ndev->reslock);
2837 
2838 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2839 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2840 			err = setup_cvq_vring(mvdev);
2841 			if (err) {
2842 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2843 				goto err_setup;
2844 			}
2845 			register_link_notifier(ndev);
2846 			err = setup_driver(mvdev);
2847 			if (err) {
2848 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2849 				goto err_driver;
2850 			}
2851 		} else {
2852 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2853 			goto err_clear;
2854 		}
2855 	}
2856 
2857 	ndev->mvdev.status = status;
2858 	up_write(&ndev->reslock);
2859 	return;
2860 
2861 err_driver:
2862 	unregister_link_notifier(ndev);
2863 err_setup:
2864 	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
2865 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2866 err_clear:
2867 	up_write(&ndev->reslock);
2868 }
2869 
2870 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2871 {
2872 	int i;
2873 
2874 	/* default mapping all groups are mapped to asid 0 */
2875 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2876 		mvdev->group2asid[i] = 0;
2877 }
2878 
2879 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
2880 {
2881 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2882 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2883 
2884 	print_status(mvdev, 0, true);
2885 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2886 
2887 	down_write(&ndev->reslock);
2888 	unregister_link_notifier(ndev);
2889 	teardown_driver(ndev);
2890 	clear_vqs_ready(ndev);
2891 	if (flags & VDPA_RESET_F_CLEAN_MAP)
2892 		mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
2893 	ndev->mvdev.status = 0;
2894 	ndev->mvdev.suspended = false;
2895 	ndev->cur_num_vqs = 0;
2896 	ndev->mvdev.cvq.received_desc = 0;
2897 	ndev->mvdev.cvq.completed_desc = 0;
2898 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2899 	ndev->mvdev.actual_features = 0;
2900 	init_group_to_asid_map(mvdev);
2901 	++mvdev->generation;
2902 
2903 	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
2904 	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2905 		if (mlx5_vdpa_create_dma_mr(mvdev))
2906 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2907 	}
2908 	up_write(&ndev->reslock);
2909 
2910 	return 0;
2911 }
2912 
2913 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2914 {
2915 	return mlx5_vdpa_compat_reset(vdev, 0);
2916 }
2917 
2918 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2919 {
2920 	return sizeof(struct virtio_net_config);
2921 }
2922 
2923 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2924 				 unsigned int len)
2925 {
2926 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2927 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2928 
2929 	if (offset + len <= sizeof(struct virtio_net_config))
2930 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2931 }
2932 
2933 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2934 				 unsigned int len)
2935 {
2936 	/* not supported */
2937 }
2938 
2939 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2940 {
2941 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2942 
2943 	return mvdev->generation;
2944 }
2945 
2946 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2947 			unsigned int asid)
2948 {
2949 	struct mlx5_vdpa_mr *new_mr;
2950 	int err;
2951 
2952 	if (asid >= MLX5_VDPA_NUM_AS)
2953 		return -EINVAL;
2954 
2955 	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
2956 		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
2957 		if (IS_ERR(new_mr)) {
2958 			err = PTR_ERR(new_mr);
2959 			mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err);
2960 			return err;
2961 		}
2962 	} else {
2963 		/* Empty iotlbs don't have an mr but will clear the previous mr. */
2964 		new_mr = NULL;
2965 	}
2966 
2967 	if (!mvdev->mr[asid]) {
2968 		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2969 	} else {
2970 		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
2971 		if (err) {
2972 			mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err);
2973 			goto out_err;
2974 		}
2975 	}
2976 
2977 	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
2978 
2979 out_err:
2980 	mlx5_vdpa_destroy_mr(mvdev, new_mr);
2981 	return err;
2982 }
2983 
2984 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2985 			     struct vhost_iotlb *iotlb)
2986 {
2987 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2988 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2989 	int err = -EINVAL;
2990 
2991 	down_write(&ndev->reslock);
2992 	err = set_map_data(mvdev, iotlb, asid);
2993 	up_write(&ndev->reslock);
2994 	return err;
2995 }
2996 
2997 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
2998 {
2999 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3000 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3001 	int err;
3002 
3003 	down_write(&ndev->reslock);
3004 	err = mlx5_vdpa_reset_mr(mvdev, asid);
3005 	up_write(&ndev->reslock);
3006 	return err;
3007 }
3008 
3009 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3010 {
3011 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3012 
3013 	if (is_ctrl_vq_idx(mvdev, idx))
3014 		return &vdev->dev;
3015 
3016 	return mvdev->vdev.dma_dev;
3017 }
3018 
3019 static void free_irqs(struct mlx5_vdpa_net *ndev)
3020 {
3021 	struct mlx5_vdpa_irq_pool_entry *ent;
3022 	int i;
3023 
3024 	if (!msix_mode_supported(&ndev->mvdev))
3025 		return;
3026 
3027 	if (!ndev->irqp.entries)
3028 		return;
3029 
3030 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3031 		ent = ndev->irqp.entries + i;
3032 		if (ent->map.virq)
3033 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3034 	}
3035 	kfree(ndev->irqp.entries);
3036 }
3037 
3038 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3039 {
3040 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3041 	struct mlx5_core_dev *pfmdev;
3042 	struct mlx5_vdpa_net *ndev;
3043 
3044 	ndev = to_mlx5_vdpa_ndev(mvdev);
3045 
3046 	free_resources(ndev);
3047 	mlx5_vdpa_destroy_mr_resources(mvdev);
3048 	if (!is_zero_ether_addr(ndev->config.mac)) {
3049 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3050 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3051 	}
3052 	mlx5_vdpa_free_resources(&ndev->mvdev);
3053 	free_irqs(ndev);
3054 	kfree(ndev->event_cbs);
3055 	kfree(ndev->vqs);
3056 }
3057 
3058 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3059 {
3060 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3061 	struct vdpa_notification_area ret = {};
3062 	struct mlx5_vdpa_net *ndev;
3063 	phys_addr_t addr;
3064 
3065 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3066 		return ret;
3067 
3068 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3069 	 * notification to avoid the risk of mapping pages that contain BAR of more
3070 	 * than one SF
3071 	 */
3072 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3073 		return ret;
3074 
3075 	ndev = to_mlx5_vdpa_ndev(mvdev);
3076 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3077 	ret.addr = addr;
3078 	ret.size = PAGE_SIZE;
3079 	return ret;
3080 }
3081 
3082 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3083 {
3084 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3085 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3086 	struct mlx5_vdpa_virtqueue *mvq;
3087 
3088 	if (!is_index_valid(mvdev, idx))
3089 		return -EINVAL;
3090 
3091 	if (is_ctrl_vq_idx(mvdev, idx))
3092 		return -EOPNOTSUPP;
3093 
3094 	mvq = &ndev->vqs[idx];
3095 	if (!mvq->map.virq)
3096 		return -EOPNOTSUPP;
3097 
3098 	return mvq->map.virq;
3099 }
3100 
3101 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3102 {
3103 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3104 
3105 	return mvdev->actual_features;
3106 }
3107 
3108 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3109 			     u64 *received_desc, u64 *completed_desc)
3110 {
3111 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3112 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3113 	void *cmd_hdr;
3114 	void *ctx;
3115 	int err;
3116 
3117 	if (!counters_supported(&ndev->mvdev))
3118 		return -EOPNOTSUPP;
3119 
3120 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3121 		return -EAGAIN;
3122 
3123 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3124 
3125 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3126 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3127 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3128 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3129 
3130 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3131 	if (err)
3132 		return err;
3133 
3134 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3135 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3136 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3137 	return 0;
3138 }
3139 
3140 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3141 					 struct sk_buff *msg,
3142 					 struct netlink_ext_ack *extack)
3143 {
3144 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3145 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3146 	struct mlx5_vdpa_virtqueue *mvq;
3147 	struct mlx5_control_vq *cvq;
3148 	u64 received_desc;
3149 	u64 completed_desc;
3150 	int err = 0;
3151 
3152 	down_read(&ndev->reslock);
3153 	if (!is_index_valid(mvdev, idx)) {
3154 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3155 		err = -EINVAL;
3156 		goto out_err;
3157 	}
3158 
3159 	if (idx == ctrl_vq_idx(mvdev)) {
3160 		cvq = &mvdev->cvq;
3161 		received_desc = cvq->received_desc;
3162 		completed_desc = cvq->completed_desc;
3163 		goto out;
3164 	}
3165 
3166 	mvq = &ndev->vqs[idx];
3167 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3168 	if (err) {
3169 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3170 		goto out_err;
3171 	}
3172 
3173 out:
3174 	err = -EMSGSIZE;
3175 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3176 		goto out_err;
3177 
3178 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3179 			      VDPA_ATTR_PAD))
3180 		goto out_err;
3181 
3182 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3183 		goto out_err;
3184 
3185 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3186 			      VDPA_ATTR_PAD))
3187 		goto out_err;
3188 
3189 	err = 0;
3190 out_err:
3191 	up_read(&ndev->reslock);
3192 	return err;
3193 }
3194 
3195 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3196 {
3197 	struct mlx5_control_vq *cvq;
3198 
3199 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3200 		return;
3201 
3202 	cvq = &mvdev->cvq;
3203 	cvq->ready = false;
3204 }
3205 
3206 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3207 {
3208 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3209 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3210 	struct mlx5_vdpa_virtqueue *mvq;
3211 	int i;
3212 
3213 	mlx5_vdpa_info(mvdev, "suspending device\n");
3214 
3215 	down_write(&ndev->reslock);
3216 	unregister_link_notifier(ndev);
3217 	for (i = 0; i < ndev->cur_num_vqs; i++) {
3218 		mvq = &ndev->vqs[i];
3219 		suspend_vq(ndev, mvq);
3220 	}
3221 	mlx5_vdpa_cvq_suspend(mvdev);
3222 	mvdev->suspended = true;
3223 	up_write(&ndev->reslock);
3224 	return 0;
3225 }
3226 
3227 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3228 			       unsigned int asid)
3229 {
3230 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3231 	int err = 0;
3232 
3233 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3234 		return -EINVAL;
3235 
3236 	mvdev->group2asid[group] = asid;
3237 
3238 	mutex_lock(&mvdev->mr_mtx);
3239 	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid])
3240 		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid);
3241 	mutex_unlock(&mvdev->mr_mtx);
3242 
3243 	return err;
3244 }
3245 
3246 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3247 	.set_vq_address = mlx5_vdpa_set_vq_address,
3248 	.set_vq_num = mlx5_vdpa_set_vq_num,
3249 	.kick_vq = mlx5_vdpa_kick_vq,
3250 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3251 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3252 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3253 	.set_vq_state = mlx5_vdpa_set_vq_state,
3254 	.get_vq_state = mlx5_vdpa_get_vq_state,
3255 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3256 	.get_vq_notification = mlx5_get_vq_notification,
3257 	.get_vq_irq = mlx5_get_vq_irq,
3258 	.get_vq_align = mlx5_vdpa_get_vq_align,
3259 	.get_vq_group = mlx5_vdpa_get_vq_group,
3260 	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3261 	.get_device_features = mlx5_vdpa_get_device_features,
3262 	.get_backend_features = mlx5_vdpa_get_backend_features,
3263 	.set_driver_features = mlx5_vdpa_set_driver_features,
3264 	.get_driver_features = mlx5_vdpa_get_driver_features,
3265 	.set_config_cb = mlx5_vdpa_set_config_cb,
3266 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3267 	.get_device_id = mlx5_vdpa_get_device_id,
3268 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3269 	.get_status = mlx5_vdpa_get_status,
3270 	.set_status = mlx5_vdpa_set_status,
3271 	.reset = mlx5_vdpa_reset,
3272 	.compat_reset = mlx5_vdpa_compat_reset,
3273 	.get_config_size = mlx5_vdpa_get_config_size,
3274 	.get_config = mlx5_vdpa_get_config,
3275 	.set_config = mlx5_vdpa_set_config,
3276 	.get_generation = mlx5_vdpa_get_generation,
3277 	.set_map = mlx5_vdpa_set_map,
3278 	.reset_map = mlx5_vdpa_reset_map,
3279 	.set_group_asid = mlx5_set_group_asid,
3280 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3281 	.free = mlx5_vdpa_free,
3282 	.suspend = mlx5_vdpa_suspend,
3283 };
3284 
3285 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3286 {
3287 	u16 hw_mtu;
3288 	int err;
3289 
3290 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3291 	if (err)
3292 		return err;
3293 
3294 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3295 	return 0;
3296 }
3297 
3298 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3299 {
3300 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3301 	int err;
3302 
3303 	if (res->valid) {
3304 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3305 		return -EEXIST;
3306 	}
3307 
3308 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3309 	if (err)
3310 		return err;
3311 
3312 	err = create_tis(ndev);
3313 	if (err)
3314 		goto err_tis;
3315 
3316 	res->valid = true;
3317 
3318 	return 0;
3319 
3320 err_tis:
3321 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3322 	return err;
3323 }
3324 
3325 static void free_resources(struct mlx5_vdpa_net *ndev)
3326 {
3327 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3328 
3329 	if (!res->valid)
3330 		return;
3331 
3332 	destroy_tis(ndev);
3333 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3334 	res->valid = false;
3335 }
3336 
3337 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3338 {
3339 	struct mlx5_vdpa_virtqueue *mvq;
3340 	int i;
3341 
3342 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3343 		mvq = &ndev->vqs[i];
3344 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3345 		mvq->index = i;
3346 		mvq->ndev = ndev;
3347 		mvq->fwqp.fw = true;
3348 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3349 	}
3350 	for (; i < ndev->mvdev.max_vqs; i++) {
3351 		mvq = &ndev->vqs[i];
3352 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3353 		mvq->index = i;
3354 		mvq->ndev = ndev;
3355 	}
3356 }
3357 
3358 struct mlx5_vdpa_mgmtdev {
3359 	struct vdpa_mgmt_dev mgtdev;
3360 	struct mlx5_adev *madev;
3361 	struct mlx5_vdpa_net *ndev;
3362 	struct vdpa_config_ops vdpa_ops;
3363 };
3364 
3365 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3366 {
3367 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3368 	void *in;
3369 	int err;
3370 
3371 	in = kvzalloc(inlen, GFP_KERNEL);
3372 	if (!in)
3373 		return -ENOMEM;
3374 
3375 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3376 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3377 		 mtu + MLX5V_ETH_HARD_MTU);
3378 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3379 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3380 
3381 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3382 
3383 	kvfree(in);
3384 	return err;
3385 }
3386 
3387 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3388 {
3389 	struct mlx5_vdpa_irq_pool_entry *ent;
3390 	int i;
3391 
3392 	if (!msix_mode_supported(&ndev->mvdev))
3393 		return;
3394 
3395 	if (!ndev->mvdev.mdev->pdev)
3396 		return;
3397 
3398 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3399 	if (!ndev->irqp.entries)
3400 		return;
3401 
3402 
3403 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3404 		ent = ndev->irqp.entries + i;
3405 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3406 			 dev_name(&ndev->mvdev.vdev.dev), i);
3407 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3408 		if (!ent->map.virq)
3409 			return;
3410 
3411 		ndev->irqp.num_ent++;
3412 	}
3413 }
3414 
3415 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3416 			     const struct vdpa_dev_set_config *add_config)
3417 {
3418 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3419 	struct virtio_net_config *config;
3420 	struct mlx5_core_dev *pfmdev;
3421 	struct mlx5_vdpa_dev *mvdev;
3422 	struct mlx5_vdpa_net *ndev;
3423 	struct mlx5_core_dev *mdev;
3424 	u64 device_features;
3425 	u32 max_vqs;
3426 	u16 mtu;
3427 	int err;
3428 
3429 	if (mgtdev->ndev)
3430 		return -ENOSPC;
3431 
3432 	mdev = mgtdev->madev->mdev;
3433 	device_features = mgtdev->mgtdev.supported_features;
3434 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3435 		if (add_config->device_features & ~device_features) {
3436 			dev_warn(mdev->device,
3437 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3438 				 add_config->device_features, device_features);
3439 			return -EINVAL;
3440 		}
3441 		device_features &= add_config->device_features;
3442 	} else {
3443 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3444 	}
3445 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3446 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3447 		dev_warn(mdev->device,
3448 			 "Must provision minimum features 0x%llx for this device",
3449 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3450 		return -EOPNOTSUPP;
3451 	}
3452 
3453 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3454 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3455 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3456 		return -EOPNOTSUPP;
3457 	}
3458 
3459 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3460 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3461 	if (max_vqs < 2) {
3462 		dev_warn(mdev->device,
3463 			 "%d virtqueues are supported. At least 2 are required\n",
3464 			 max_vqs);
3465 		return -EAGAIN;
3466 	}
3467 
3468 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3469 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3470 			return -EINVAL;
3471 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3472 	} else {
3473 		max_vqs = 2;
3474 	}
3475 
3476 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3477 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3478 	if (IS_ERR(ndev))
3479 		return PTR_ERR(ndev);
3480 
3481 	ndev->mvdev.max_vqs = max_vqs;
3482 	mvdev = &ndev->mvdev;
3483 	mvdev->mdev = mdev;
3484 
3485 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3486 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3487 	if (!ndev->vqs || !ndev->event_cbs) {
3488 		err = -ENOMEM;
3489 		goto err_alloc;
3490 	}
3491 
3492 	init_mvqs(ndev);
3493 	allocate_irqs(ndev);
3494 	init_rwsem(&ndev->reslock);
3495 	config = &ndev->config;
3496 
3497 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3498 		err = config_func_mtu(mdev, add_config->net.mtu);
3499 		if (err)
3500 			goto err_alloc;
3501 	}
3502 
3503 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3504 		err = query_mtu(mdev, &mtu);
3505 		if (err)
3506 			goto err_alloc;
3507 
3508 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3509 	}
3510 
3511 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3512 		if (get_link_state(mvdev))
3513 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3514 		else
3515 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3516 	}
3517 
3518 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3519 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3520 	/* No bother setting mac address in config if not going to provision _F_MAC */
3521 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3522 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3523 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3524 		if (err)
3525 			goto err_alloc;
3526 	}
3527 
3528 	if (!is_zero_ether_addr(config->mac)) {
3529 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3530 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3531 		if (err)
3532 			goto err_alloc;
3533 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3534 		/*
3535 		 * We used to clear _F_MAC feature bit if seeing
3536 		 * zero mac address when device features are not
3537 		 * specifically provisioned. Keep the behaviour
3538 		 * so old scripts do not break.
3539 		 */
3540 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3541 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3542 		/* Don't provision zero mac address for _F_MAC */
3543 		mlx5_vdpa_warn(&ndev->mvdev,
3544 			       "No mac address provisioned?\n");
3545 		err = -EINVAL;
3546 		goto err_alloc;
3547 	}
3548 
3549 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3550 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3551 
3552 	ndev->mvdev.mlx_features = device_features;
3553 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3554 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3555 	if (err)
3556 		goto err_mpfs;
3557 
3558 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3559 		err = mlx5_vdpa_create_dma_mr(mvdev);
3560 		if (err)
3561 			goto err_res;
3562 	}
3563 
3564 	err = alloc_resources(ndev);
3565 	if (err)
3566 		goto err_mr;
3567 
3568 	ndev->cvq_ent.mvdev = mvdev;
3569 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3570 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3571 	if (!mvdev->wq) {
3572 		err = -ENOMEM;
3573 		goto err_res2;
3574 	}
3575 
3576 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3577 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3578 	if (err)
3579 		goto err_reg;
3580 
3581 	mgtdev->ndev = ndev;
3582 	return 0;
3583 
3584 err_reg:
3585 	destroy_workqueue(mvdev->wq);
3586 err_res2:
3587 	free_resources(ndev);
3588 err_mr:
3589 	mlx5_vdpa_destroy_mr_resources(mvdev);
3590 err_res:
3591 	mlx5_vdpa_free_resources(&ndev->mvdev);
3592 err_mpfs:
3593 	if (!is_zero_ether_addr(config->mac))
3594 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3595 err_alloc:
3596 	put_device(&mvdev->vdev.dev);
3597 	return err;
3598 }
3599 
3600 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3601 {
3602 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3603 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3604 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3605 	struct workqueue_struct *wq;
3606 
3607 	unregister_link_notifier(ndev);
3608 	_vdpa_unregister_device(dev);
3609 	wq = mvdev->wq;
3610 	mvdev->wq = NULL;
3611 	destroy_workqueue(wq);
3612 	mgtdev->ndev = NULL;
3613 }
3614 
3615 static const struct vdpa_mgmtdev_ops mdev_ops = {
3616 	.dev_add = mlx5_vdpa_dev_add,
3617 	.dev_del = mlx5_vdpa_dev_del,
3618 };
3619 
3620 static struct virtio_device_id id_table[] = {
3621 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3622 	{ 0 },
3623 };
3624 
3625 static int mlx5v_probe(struct auxiliary_device *adev,
3626 		       const struct auxiliary_device_id *id)
3627 
3628 {
3629 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3630 	struct mlx5_core_dev *mdev = madev->mdev;
3631 	struct mlx5_vdpa_mgmtdev *mgtdev;
3632 	int err;
3633 
3634 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3635 	if (!mgtdev)
3636 		return -ENOMEM;
3637 
3638 	mgtdev->mgtdev.ops = &mdev_ops;
3639 	mgtdev->mgtdev.device = mdev->device;
3640 	mgtdev->mgtdev.id_table = id_table;
3641 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3642 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3643 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3644 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3645 	mgtdev->mgtdev.max_supported_vqs =
3646 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3647 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3648 	mgtdev->madev = madev;
3649 	mgtdev->vdpa_ops = mlx5_vdpa_ops;
3650 
3651 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
3652 		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
3653 
3654 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3655 	if (err)
3656 		goto reg_err;
3657 
3658 	auxiliary_set_drvdata(adev, mgtdev);
3659 
3660 	return 0;
3661 
3662 reg_err:
3663 	kfree(mgtdev);
3664 	return err;
3665 }
3666 
3667 static void mlx5v_remove(struct auxiliary_device *adev)
3668 {
3669 	struct mlx5_vdpa_mgmtdev *mgtdev;
3670 
3671 	mgtdev = auxiliary_get_drvdata(adev);
3672 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3673 	kfree(mgtdev);
3674 }
3675 
3676 static const struct auxiliary_device_id mlx5v_id_table[] = {
3677 	{ .name = MLX5_ADEV_NAME ".vnet", },
3678 	{},
3679 };
3680 
3681 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3682 
3683 static struct auxiliary_driver mlx5v_driver = {
3684 	.name = "vnet",
3685 	.probe = mlx5v_probe,
3686 	.remove = mlx5v_remove,
3687 	.id_table = mlx5v_id_table,
3688 };
3689 
3690 module_auxiliary_driver(mlx5v_driver);
3691