xref: /linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 0a149ab78ee220c75eef797abea7a29f4490e226)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
23 
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 
49 #define MLX5V_UNTAGGED 0x1000
50 
51 struct mlx5_vdpa_cq_buf {
52 	struct mlx5_frag_buf_ctrl fbc;
53 	struct mlx5_frag_buf frag_buf;
54 	int cqe_size;
55 	int nent;
56 };
57 
58 struct mlx5_vdpa_cq {
59 	struct mlx5_core_cq mcq;
60 	struct mlx5_vdpa_cq_buf buf;
61 	struct mlx5_db db;
62 	int cqe;
63 };
64 
65 struct mlx5_vdpa_umem {
66 	struct mlx5_frag_buf_ctrl fbc;
67 	struct mlx5_frag_buf frag_buf;
68 	int size;
69 	u32 id;
70 };
71 
72 struct mlx5_vdpa_qp {
73 	struct mlx5_core_qp mqp;
74 	struct mlx5_frag_buf frag_buf;
75 	struct mlx5_db db;
76 	u16 head;
77 	bool fw;
78 };
79 
80 struct mlx5_vq_restore_info {
81 	u32 num_ent;
82 	u64 desc_addr;
83 	u64 device_addr;
84 	u64 driver_addr;
85 	u16 avail_index;
86 	u16 used_index;
87 	struct msi_map map;
88 	bool ready;
89 	bool restore;
90 };
91 
92 struct mlx5_vdpa_virtqueue {
93 	bool ready;
94 	u64 desc_addr;
95 	u64 device_addr;
96 	u64 driver_addr;
97 	u32 num_ent;
98 
99 	/* Resources for implementing the notification channel from the device
100 	 * to the driver. fwqp is the firmware end of an RC connection; the
101 	 * other end is vqqp used by the driver. cq is where completions are
102 	 * reported.
103 	 */
104 	struct mlx5_vdpa_cq cq;
105 	struct mlx5_vdpa_qp fwqp;
106 	struct mlx5_vdpa_qp vqqp;
107 
108 	/* umem resources are required for the virtqueue operation. They're use
109 	 * is internal and they must be provided by the driver.
110 	 */
111 	struct mlx5_vdpa_umem umem1;
112 	struct mlx5_vdpa_umem umem2;
113 	struct mlx5_vdpa_umem umem3;
114 
115 	u32 counter_set_id;
116 	bool initialized;
117 	int index;
118 	u32 virtq_id;
119 	struct mlx5_vdpa_net *ndev;
120 	u16 avail_idx;
121 	u16 used_idx;
122 	int fw_state;
123 	struct msi_map map;
124 
125 	/* keep last in the struct */
126 	struct mlx5_vq_restore_info ri;
127 };
128 
129 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
130 {
131 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
132 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
133 			return idx < 2;
134 		else
135 			return idx < 3;
136 	}
137 
138 	return idx <= mvdev->max_idx;
139 }
140 
141 static void free_resources(struct mlx5_vdpa_net *ndev);
142 static void init_mvqs(struct mlx5_vdpa_net *ndev);
143 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
144 static void teardown_driver(struct mlx5_vdpa_net *ndev);
145 
146 static bool mlx5_vdpa_debug;
147 
148 #define MLX5_CVQ_MAX_ENT 16
149 
150 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
151 	do {                                                                                       \
152 		if (features & BIT_ULL(_feature))                                                  \
153 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
154 	} while (0)
155 
156 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
157 	do {                                                                                       \
158 		if (status & (_status))                                                            \
159 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
160 	} while (0)
161 
162 /* TODO: cross-endian support */
163 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
164 {
165 	return virtio_legacy_is_little_endian() ||
166 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
167 }
168 
169 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
170 {
171 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
172 }
173 
174 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
175 {
176 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
177 }
178 
179 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
180 {
181 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
182 		return 2;
183 
184 	return mvdev->max_vqs;
185 }
186 
187 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
188 {
189 	return idx == ctrl_vq_idx(mvdev);
190 }
191 
192 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
193 {
194 	if (status & ~VALID_STATUS_MASK)
195 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
196 			       status & ~VALID_STATUS_MASK);
197 
198 	if (!mlx5_vdpa_debug)
199 		return;
200 
201 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
202 	if (set && !status) {
203 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
204 		return;
205 	}
206 
207 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
208 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
209 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
210 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
211 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
212 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
213 }
214 
215 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
216 {
217 	if (features & ~VALID_FEATURES_MASK)
218 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
219 			       features & ~VALID_FEATURES_MASK);
220 
221 	if (!mlx5_vdpa_debug)
222 		return;
223 
224 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
225 	if (!features)
226 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
227 
228 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
229 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
230 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
231 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
232 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
233 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
262 }
263 
264 static int create_tis(struct mlx5_vdpa_net *ndev)
265 {
266 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
267 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
268 	void *tisc;
269 	int err;
270 
271 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
272 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
273 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
274 	if (err)
275 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
276 
277 	return err;
278 }
279 
280 static void destroy_tis(struct mlx5_vdpa_net *ndev)
281 {
282 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
283 }
284 
285 #define MLX5_VDPA_CQE_SIZE 64
286 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
287 
288 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
289 {
290 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
291 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
292 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
293 	int err;
294 
295 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
296 				       ndev->mvdev.mdev->priv.numa_node);
297 	if (err)
298 		return err;
299 
300 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
301 
302 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
303 	buf->nent = nent;
304 
305 	return 0;
306 }
307 
308 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
309 {
310 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
311 
312 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
313 					ndev->mvdev.mdev->priv.numa_node);
314 }
315 
316 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
317 {
318 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
319 }
320 
321 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
322 {
323 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
324 }
325 
326 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
327 {
328 	struct mlx5_cqe64 *cqe64;
329 	void *cqe;
330 	int i;
331 
332 	for (i = 0; i < buf->nent; i++) {
333 		cqe = get_cqe(vcq, i);
334 		cqe64 = cqe;
335 		cqe64->op_own = MLX5_CQE_INVALID << 4;
336 	}
337 }
338 
339 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
340 {
341 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
342 
343 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
344 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
345 		return cqe64;
346 
347 	return NULL;
348 }
349 
350 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
351 {
352 	vqp->head += n;
353 	vqp->db.db[0] = cpu_to_be32(vqp->head);
354 }
355 
356 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
357 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
358 {
359 	struct mlx5_vdpa_qp *vqp;
360 	__be64 *pas;
361 	void *qpc;
362 
363 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
364 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
365 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
366 	if (vqp->fw) {
367 		/* Firmware QP is allocated by the driver for the firmware's
368 		 * use so we can skip part of the params as they will be chosen by firmware
369 		 */
370 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
371 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
372 		MLX5_SET(qpc, qpc, no_sq, 1);
373 		return;
374 	}
375 
376 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
377 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
378 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
379 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
380 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
381 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
382 	MLX5_SET(qpc, qpc, no_sq, 1);
383 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
384 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
385 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
386 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
387 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
388 }
389 
390 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
391 {
392 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
393 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
394 					ndev->mvdev.mdev->priv.numa_node);
395 }
396 
397 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
398 {
399 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
400 }
401 
402 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
403 		     struct mlx5_vdpa_qp *vqp)
404 {
405 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
406 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
407 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
408 	void *qpc;
409 	void *in;
410 	int err;
411 
412 	if (!vqp->fw) {
413 		vqp = &mvq->vqqp;
414 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
415 		if (err)
416 			return err;
417 
418 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
419 		if (err)
420 			goto err_db;
421 		inlen += vqp->frag_buf.npages * sizeof(__be64);
422 	}
423 
424 	in = kzalloc(inlen, GFP_KERNEL);
425 	if (!in) {
426 		err = -ENOMEM;
427 		goto err_kzalloc;
428 	}
429 
430 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
431 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
432 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
433 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
434 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
435 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
436 	if (!vqp->fw)
437 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
438 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
439 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
440 	kfree(in);
441 	if (err)
442 		goto err_kzalloc;
443 
444 	vqp->mqp.uid = ndev->mvdev.res.uid;
445 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
446 
447 	if (!vqp->fw)
448 		rx_post(vqp, mvq->num_ent);
449 
450 	return 0;
451 
452 err_kzalloc:
453 	if (!vqp->fw)
454 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
455 err_db:
456 	if (!vqp->fw)
457 		rq_buf_free(ndev, vqp);
458 
459 	return err;
460 }
461 
462 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
463 {
464 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
465 
466 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
467 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
468 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
469 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
470 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
471 	if (!vqp->fw) {
472 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
473 		rq_buf_free(ndev, vqp);
474 	}
475 }
476 
477 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
478 {
479 	return get_sw_cqe(cq, cq->mcq.cons_index);
480 }
481 
482 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
483 {
484 	struct mlx5_cqe64 *cqe64;
485 
486 	cqe64 = next_cqe_sw(vcq);
487 	if (!cqe64)
488 		return -EAGAIN;
489 
490 	vcq->mcq.cons_index++;
491 	return 0;
492 }
493 
494 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
495 {
496 	struct mlx5_vdpa_net *ndev = mvq->ndev;
497 	struct vdpa_callback *event_cb;
498 
499 	event_cb = &ndev->event_cbs[mvq->index];
500 	mlx5_cq_set_ci(&mvq->cq.mcq);
501 
502 	/* make sure CQ cosumer update is visible to the hardware before updating
503 	 * RX doorbell record.
504 	 */
505 	dma_wmb();
506 	rx_post(&mvq->vqqp, num);
507 	if (event_cb->callback)
508 		event_cb->callback(event_cb->private);
509 }
510 
511 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
512 {
513 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
514 	struct mlx5_vdpa_net *ndev = mvq->ndev;
515 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
516 	int num = 0;
517 
518 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
519 		num++;
520 		if (num > mvq->num_ent / 2) {
521 			/* If completions keep coming while we poll, we want to
522 			 * let the hardware know that we consumed them by
523 			 * updating the doorbell record.  We also let vdpa core
524 			 * know about this so it passes it on the virtio driver
525 			 * on the guest.
526 			 */
527 			mlx5_vdpa_handle_completions(mvq, num);
528 			num = 0;
529 		}
530 	}
531 
532 	if (num)
533 		mlx5_vdpa_handle_completions(mvq, num);
534 
535 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
536 }
537 
538 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
539 {
540 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
541 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
542 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
543 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
544 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
545 	__be64 *pas;
546 	int inlen;
547 	void *cqc;
548 	void *in;
549 	int err;
550 	int eqn;
551 
552 	err = mlx5_db_alloc(mdev, &vcq->db);
553 	if (err)
554 		return err;
555 
556 	vcq->mcq.set_ci_db = vcq->db.db;
557 	vcq->mcq.arm_db = vcq->db.db + 1;
558 	vcq->mcq.cqe_sz = 64;
559 
560 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
561 	if (err)
562 		goto err_db;
563 
564 	cq_frag_buf_init(vcq, &vcq->buf);
565 
566 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
567 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
568 	in = kzalloc(inlen, GFP_KERNEL);
569 	if (!in) {
570 		err = -ENOMEM;
571 		goto err_vzalloc;
572 	}
573 
574 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
575 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
576 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
577 
578 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
579 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
580 
581 	/* Use vector 0 by default. Consider adding code to choose least used
582 	 * vector.
583 	 */
584 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
585 	if (err)
586 		goto err_vec;
587 
588 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
589 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
590 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
591 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
592 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
593 
594 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
595 	if (err)
596 		goto err_vec;
597 
598 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
599 	vcq->cqe = num_ent;
600 	vcq->mcq.set_ci_db = vcq->db.db;
601 	vcq->mcq.arm_db = vcq->db.db + 1;
602 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
603 	kfree(in);
604 	return 0;
605 
606 err_vec:
607 	kfree(in);
608 err_vzalloc:
609 	cq_frag_buf_free(ndev, &vcq->buf);
610 err_db:
611 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
612 	return err;
613 }
614 
615 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
616 {
617 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
618 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
619 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
620 
621 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
622 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
623 		return;
624 	}
625 	cq_frag_buf_free(ndev, &vcq->buf);
626 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
627 }
628 
629 static int read_umem_params(struct mlx5_vdpa_net *ndev)
630 {
631 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
632 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
633 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
634 	int out_size;
635 	void *caps;
636 	void *out;
637 	int err;
638 
639 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
640 	out = kzalloc(out_size, GFP_KERNEL);
641 	if (!out)
642 		return -ENOMEM;
643 
644 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
645 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
646 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
647 	if (err) {
648 		mlx5_vdpa_warn(&ndev->mvdev,
649 			"Failed reading vdpa umem capabilities with err %d\n", err);
650 		goto out;
651 	}
652 
653 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
654 
655 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
656 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
657 
658 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
659 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
660 
661 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
662 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
663 
664 out:
665 	kfree(out);
666 	return 0;
667 }
668 
669 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
670 			  struct mlx5_vdpa_umem **umemp)
671 {
672 	u32 p_a;
673 	u32 p_b;
674 
675 	switch (num) {
676 	case 1:
677 		p_a = ndev->umem_1_buffer_param_a;
678 		p_b = ndev->umem_1_buffer_param_b;
679 		*umemp = &mvq->umem1;
680 		break;
681 	case 2:
682 		p_a = ndev->umem_2_buffer_param_a;
683 		p_b = ndev->umem_2_buffer_param_b;
684 		*umemp = &mvq->umem2;
685 		break;
686 	case 3:
687 		p_a = ndev->umem_3_buffer_param_a;
688 		p_b = ndev->umem_3_buffer_param_b;
689 		*umemp = &mvq->umem3;
690 		break;
691 	}
692 
693 	(*umemp)->size = p_a * mvq->num_ent + p_b;
694 }
695 
696 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
697 {
698 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
699 }
700 
701 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
702 {
703 	int inlen;
704 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
705 	void *um;
706 	void *in;
707 	int err;
708 	__be64 *pas;
709 	struct mlx5_vdpa_umem *umem;
710 
711 	set_umem_size(ndev, mvq, num, &umem);
712 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
713 	if (err)
714 		return err;
715 
716 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
717 
718 	in = kzalloc(inlen, GFP_KERNEL);
719 	if (!in) {
720 		err = -ENOMEM;
721 		goto err_in;
722 	}
723 
724 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
725 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
726 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
727 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
728 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
729 
730 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
731 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
732 
733 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
734 	if (err) {
735 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
736 		goto err_cmd;
737 	}
738 
739 	kfree(in);
740 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
741 
742 	return 0;
743 
744 err_cmd:
745 	kfree(in);
746 err_in:
747 	umem_frag_buf_free(ndev, umem);
748 	return err;
749 }
750 
751 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
752 {
753 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
754 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
755 	struct mlx5_vdpa_umem *umem;
756 
757 	switch (num) {
758 	case 1:
759 		umem = &mvq->umem1;
760 		break;
761 	case 2:
762 		umem = &mvq->umem2;
763 		break;
764 	case 3:
765 		umem = &mvq->umem3;
766 		break;
767 	}
768 
769 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
770 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
771 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
772 		return;
773 
774 	umem_frag_buf_free(ndev, umem);
775 }
776 
777 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
778 {
779 	int num;
780 	int err;
781 
782 	for (num = 1; num <= 3; num++) {
783 		err = create_umem(ndev, mvq, num);
784 		if (err)
785 			goto err_umem;
786 	}
787 	return 0;
788 
789 err_umem:
790 	for (num--; num > 0; num--)
791 		umem_destroy(ndev, mvq, num);
792 
793 	return err;
794 }
795 
796 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
797 {
798 	int num;
799 
800 	for (num = 3; num > 0; num--)
801 		umem_destroy(ndev, mvq, num);
802 }
803 
804 static int get_queue_type(struct mlx5_vdpa_net *ndev)
805 {
806 	u32 type_mask;
807 
808 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
809 
810 	/* prefer split queue */
811 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
812 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
813 
814 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
815 
816 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
817 }
818 
819 static bool vq_is_tx(u16 idx)
820 {
821 	return idx % 2;
822 }
823 
824 enum {
825 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
826 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
827 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
828 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
829 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
830 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
831 	MLX5_VIRTIO_NET_F_CSUM = 10,
832 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
833 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
834 };
835 
836 static u16 get_features(u64 features)
837 {
838 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
839 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
840 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
841 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
842 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
843 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
844 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
845 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
846 }
847 
848 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
849 {
850 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
851 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
852 }
853 
854 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
855 {
856 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
857 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
858 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
859 }
860 
861 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
862 {
863 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
864 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
865 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
866 	struct mlx5_vdpa_mr *vq_mr;
867 	struct mlx5_vdpa_mr *vq_desc_mr;
868 	void *obj_context;
869 	u16 mlx_features;
870 	void *cmd_hdr;
871 	void *vq_ctx;
872 	void *in;
873 	int err;
874 
875 	err = umems_create(ndev, mvq);
876 	if (err)
877 		return err;
878 
879 	in = kzalloc(inlen, GFP_KERNEL);
880 	if (!in) {
881 		err = -ENOMEM;
882 		goto err_alloc;
883 	}
884 
885 	mlx_features = get_features(ndev->mvdev.actual_features);
886 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
887 
888 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
889 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
890 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
891 
892 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
893 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
894 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
895 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
896 		 mlx_features >> 3);
897 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
898 		 mlx_features & 7);
899 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
900 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
901 
902 	if (vq_is_tx(mvq->index))
903 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
904 
905 	if (mvq->map.virq) {
906 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
907 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
908 	} else {
909 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
910 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
911 	}
912 
913 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
914 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
915 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
916 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
917 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
918 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
919 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
920 	vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
921 	if (vq_mr)
922 		MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
923 
924 	vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
925 	if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
926 		MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
927 
928 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
929 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
930 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
931 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
932 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
933 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
934 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
935 	if (counters_supported(&ndev->mvdev))
936 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
937 
938 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
939 	if (err)
940 		goto err_cmd;
941 
942 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
943 	kfree(in);
944 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
945 
946 	return 0;
947 
948 err_cmd:
949 	kfree(in);
950 err_alloc:
951 	umems_destroy(ndev, mvq);
952 	return err;
953 }
954 
955 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
956 {
957 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
958 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
959 
960 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
961 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
962 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
963 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
964 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
965 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
966 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
967 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
968 		return;
969 	}
970 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
971 	umems_destroy(ndev, mvq);
972 }
973 
974 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
975 {
976 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
977 }
978 
979 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
980 {
981 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
982 }
983 
984 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
985 			int *outlen, u32 qpn, u32 rqpn)
986 {
987 	void *qpc;
988 	void *pp;
989 
990 	switch (cmd) {
991 	case MLX5_CMD_OP_2RST_QP:
992 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
993 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
994 		*in = kzalloc(*inlen, GFP_KERNEL);
995 		*out = kzalloc(*outlen, GFP_KERNEL);
996 		if (!*in || !*out)
997 			goto outerr;
998 
999 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1000 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1001 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1002 		break;
1003 	case MLX5_CMD_OP_RST2INIT_QP:
1004 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1005 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1006 		*in = kzalloc(*inlen, GFP_KERNEL);
1007 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1008 		if (!*in || !*out)
1009 			goto outerr;
1010 
1011 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1012 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1013 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1014 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1015 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1016 		MLX5_SET(qpc, qpc, rwe, 1);
1017 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1018 		MLX5_SET(ads, pp, vhca_port_num, 1);
1019 		break;
1020 	case MLX5_CMD_OP_INIT2RTR_QP:
1021 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1022 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1023 		*in = kzalloc(*inlen, GFP_KERNEL);
1024 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1025 		if (!*in || !*out)
1026 			goto outerr;
1027 
1028 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1029 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1030 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1031 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1032 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1033 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1034 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1035 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1036 		MLX5_SET(ads, pp, fl, 1);
1037 		break;
1038 	case MLX5_CMD_OP_RTR2RTS_QP:
1039 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1040 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1041 		*in = kzalloc(*inlen, GFP_KERNEL);
1042 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1043 		if (!*in || !*out)
1044 			goto outerr;
1045 
1046 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1047 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1048 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1049 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1050 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1051 		MLX5_SET(ads, pp, ack_timeout, 14);
1052 		MLX5_SET(qpc, qpc, retry_count, 7);
1053 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1054 		break;
1055 	default:
1056 		goto outerr_nullify;
1057 	}
1058 
1059 	return;
1060 
1061 outerr:
1062 	kfree(*in);
1063 	kfree(*out);
1064 outerr_nullify:
1065 	*in = NULL;
1066 	*out = NULL;
1067 }
1068 
1069 static void free_inout(void *in, void *out)
1070 {
1071 	kfree(in);
1072 	kfree(out);
1073 }
1074 
1075 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1076  * firmware. The fw argument indicates whether the subjected QP is the one used
1077  * by firmware.
1078  */
1079 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1080 {
1081 	int outlen;
1082 	int inlen;
1083 	void *out;
1084 	void *in;
1085 	int err;
1086 
1087 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1088 	if (!in || !out)
1089 		return -ENOMEM;
1090 
1091 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1092 	free_inout(in, out);
1093 	return err;
1094 }
1095 
1096 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1097 {
1098 	int err;
1099 
1100 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1101 	if (err)
1102 		return err;
1103 
1104 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1105 	if (err)
1106 		return err;
1107 
1108 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1109 	if (err)
1110 		return err;
1111 
1112 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1113 	if (err)
1114 		return err;
1115 
1116 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1117 	if (err)
1118 		return err;
1119 
1120 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1121 	if (err)
1122 		return err;
1123 
1124 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1125 }
1126 
1127 struct mlx5_virtq_attr {
1128 	u8 state;
1129 	u16 available_index;
1130 	u16 used_index;
1131 };
1132 
1133 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1134 			   struct mlx5_virtq_attr *attr)
1135 {
1136 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1137 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1138 	void *out;
1139 	void *obj_context;
1140 	void *cmd_hdr;
1141 	int err;
1142 
1143 	out = kzalloc(outlen, GFP_KERNEL);
1144 	if (!out)
1145 		return -ENOMEM;
1146 
1147 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1148 
1149 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1150 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1151 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1152 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1153 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1154 	if (err)
1155 		goto err_cmd;
1156 
1157 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1158 	memset(attr, 0, sizeof(*attr));
1159 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1160 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1161 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1162 	kfree(out);
1163 	return 0;
1164 
1165 err_cmd:
1166 	kfree(out);
1167 	return err;
1168 }
1169 
1170 static bool is_valid_state_change(int oldstate, int newstate)
1171 {
1172 	switch (oldstate) {
1173 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1174 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1175 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1176 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1177 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1178 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1179 	default:
1180 		return false;
1181 	}
1182 }
1183 
1184 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1185 {
1186 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1187 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1188 	void *obj_context;
1189 	void *cmd_hdr;
1190 	void *in;
1191 	int err;
1192 
1193 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1194 		return 0;
1195 
1196 	if (!is_valid_state_change(mvq->fw_state, state))
1197 		return -EINVAL;
1198 
1199 	in = kzalloc(inlen, GFP_KERNEL);
1200 	if (!in)
1201 		return -ENOMEM;
1202 
1203 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1204 
1205 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1206 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1207 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1208 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1209 
1210 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1211 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1212 		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1213 	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1214 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1215 	kfree(in);
1216 	if (!err)
1217 		mvq->fw_state = state;
1218 
1219 	return err;
1220 }
1221 
1222 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1223 {
1224 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1225 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1226 	void *cmd_hdr;
1227 	int err;
1228 
1229 	if (!counters_supported(&ndev->mvdev))
1230 		return 0;
1231 
1232 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1233 
1234 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1235 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1236 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1237 
1238 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1239 	if (err)
1240 		return err;
1241 
1242 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1243 
1244 	return 0;
1245 }
1246 
1247 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1248 {
1249 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1250 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1251 
1252 	if (!counters_supported(&ndev->mvdev))
1253 		return;
1254 
1255 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1256 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1257 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1258 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1259 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1260 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1261 }
1262 
1263 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1264 {
1265 	struct vdpa_callback *cb = priv;
1266 
1267 	if (cb->callback)
1268 		return cb->callback(cb->private);
1269 
1270 	return IRQ_HANDLED;
1271 }
1272 
1273 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1274 			 struct mlx5_vdpa_virtqueue *mvq)
1275 {
1276 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1277 	struct mlx5_vdpa_irq_pool_entry *ent;
1278 	int err;
1279 	int i;
1280 
1281 	for (i = 0; i < irqp->num_ent; i++) {
1282 		ent = &irqp->entries[i];
1283 		if (!ent->used) {
1284 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1285 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1286 			ent->dev_id = &ndev->event_cbs[mvq->index];
1287 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1288 					  ent->name, ent->dev_id);
1289 			if (err)
1290 				return;
1291 
1292 			ent->used = true;
1293 			mvq->map = ent->map;
1294 			return;
1295 		}
1296 	}
1297 }
1298 
1299 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1300 			   struct mlx5_vdpa_virtqueue *mvq)
1301 {
1302 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1303 	int i;
1304 
1305 	for (i = 0; i < irqp->num_ent; i++)
1306 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1307 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1308 			irqp->entries[i].used = false;
1309 			return;
1310 		}
1311 }
1312 
1313 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1314 {
1315 	u16 idx = mvq->index;
1316 	int err;
1317 
1318 	if (!mvq->num_ent)
1319 		return 0;
1320 
1321 	if (mvq->initialized)
1322 		return 0;
1323 
1324 	err = cq_create(ndev, idx, mvq->num_ent);
1325 	if (err)
1326 		return err;
1327 
1328 	err = qp_create(ndev, mvq, &mvq->fwqp);
1329 	if (err)
1330 		goto err_fwqp;
1331 
1332 	err = qp_create(ndev, mvq, &mvq->vqqp);
1333 	if (err)
1334 		goto err_vqqp;
1335 
1336 	err = connect_qps(ndev, mvq);
1337 	if (err)
1338 		goto err_connect;
1339 
1340 	err = counter_set_alloc(ndev, mvq);
1341 	if (err)
1342 		goto err_connect;
1343 
1344 	alloc_vector(ndev, mvq);
1345 	err = create_virtqueue(ndev, mvq);
1346 	if (err)
1347 		goto err_vq;
1348 
1349 	if (mvq->ready) {
1350 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1351 		if (err) {
1352 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1353 				       idx, err);
1354 			goto err_modify;
1355 		}
1356 	}
1357 
1358 	mvq->initialized = true;
1359 	return 0;
1360 
1361 err_modify:
1362 	destroy_virtqueue(ndev, mvq);
1363 err_vq:
1364 	dealloc_vector(ndev, mvq);
1365 	counter_set_dealloc(ndev, mvq);
1366 err_connect:
1367 	qp_destroy(ndev, &mvq->vqqp);
1368 err_vqqp:
1369 	qp_destroy(ndev, &mvq->fwqp);
1370 err_fwqp:
1371 	cq_destroy(ndev, idx);
1372 	return err;
1373 }
1374 
1375 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1376 {
1377 	struct mlx5_virtq_attr attr;
1378 
1379 	if (!mvq->initialized)
1380 		return;
1381 
1382 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1383 		return;
1384 
1385 	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1386 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1387 
1388 	if (query_virtqueue(ndev, mvq, &attr)) {
1389 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1390 		return;
1391 	}
1392 	mvq->avail_idx = attr.available_index;
1393 	mvq->used_idx = attr.used_index;
1394 }
1395 
1396 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1397 {
1398 	int i;
1399 
1400 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1401 		suspend_vq(ndev, &ndev->vqs[i]);
1402 }
1403 
1404 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1405 {
1406 	if (!mvq->initialized)
1407 		return;
1408 
1409 	suspend_vq(ndev, mvq);
1410 	destroy_virtqueue(ndev, mvq);
1411 	dealloc_vector(ndev, mvq);
1412 	counter_set_dealloc(ndev, mvq);
1413 	qp_destroy(ndev, &mvq->vqqp);
1414 	qp_destroy(ndev, &mvq->fwqp);
1415 	cq_destroy(ndev, mvq->index);
1416 	mvq->initialized = false;
1417 }
1418 
1419 static int create_rqt(struct mlx5_vdpa_net *ndev)
1420 {
1421 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1422 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1423 	__be32 *list;
1424 	void *rqtc;
1425 	int inlen;
1426 	void *in;
1427 	int i, j;
1428 	int err;
1429 
1430 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1431 	in = kzalloc(inlen, GFP_KERNEL);
1432 	if (!in)
1433 		return -ENOMEM;
1434 
1435 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1436 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1437 
1438 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1439 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1440 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1441 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1442 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1443 
1444 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1445 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1446 	kfree(in);
1447 	if (err)
1448 		return err;
1449 
1450 	return 0;
1451 }
1452 
1453 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1454 
1455 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1456 {
1457 	int act_sz = roundup_pow_of_two(num / 2);
1458 	__be32 *list;
1459 	void *rqtc;
1460 	int inlen;
1461 	void *in;
1462 	int i, j;
1463 	int err;
1464 
1465 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1466 	in = kzalloc(inlen, GFP_KERNEL);
1467 	if (!in)
1468 		return -ENOMEM;
1469 
1470 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1471 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1472 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1473 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1474 
1475 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1476 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1477 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1478 
1479 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1480 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1481 	kfree(in);
1482 	if (err)
1483 		return err;
1484 
1485 	return 0;
1486 }
1487 
1488 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1489 {
1490 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1491 }
1492 
1493 static int create_tir(struct mlx5_vdpa_net *ndev)
1494 {
1495 #define HASH_IP_L4PORTS                                                                            \
1496 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1497 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1498 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1499 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1500 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1501 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1502 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1503 	void *rss_key;
1504 	void *outer;
1505 	void *tirc;
1506 	void *in;
1507 	int err;
1508 
1509 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1510 	if (!in)
1511 		return -ENOMEM;
1512 
1513 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1514 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1515 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1516 
1517 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1518 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1519 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1520 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1521 
1522 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1523 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1524 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1525 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1526 
1527 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1528 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1529 
1530 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1531 	kfree(in);
1532 	if (err)
1533 		return err;
1534 
1535 	mlx5_vdpa_add_tirn(ndev);
1536 	return err;
1537 }
1538 
1539 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1540 {
1541 	mlx5_vdpa_remove_tirn(ndev);
1542 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1543 }
1544 
1545 #define MAX_STEERING_ENT 0x8000
1546 #define MAX_STEERING_GROUPS 2
1547 
1548 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1549        #define NUM_DESTS 2
1550 #else
1551        #define NUM_DESTS 1
1552 #endif
1553 
1554 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1555 				 struct macvlan_node *node,
1556 				 struct mlx5_flow_act *flow_act,
1557 				 struct mlx5_flow_destination *dests)
1558 {
1559 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1560 	int err;
1561 
1562 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1563 	if (IS_ERR(node->ucast_counter.counter))
1564 		return PTR_ERR(node->ucast_counter.counter);
1565 
1566 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1567 	if (IS_ERR(node->mcast_counter.counter)) {
1568 		err = PTR_ERR(node->mcast_counter.counter);
1569 		goto err_mcast_counter;
1570 	}
1571 
1572 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1573 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1574 	return 0;
1575 
1576 err_mcast_counter:
1577 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1578 	return err;
1579 #else
1580 	return 0;
1581 #endif
1582 }
1583 
1584 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1585 				     struct macvlan_node *node)
1586 {
1587 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1588 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1589 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1590 #endif
1591 }
1592 
1593 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1594 					struct macvlan_node *node)
1595 {
1596 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1597 	struct mlx5_flow_act flow_act = {};
1598 	struct mlx5_flow_spec *spec;
1599 	void *headers_c;
1600 	void *headers_v;
1601 	u8 *dmac_c;
1602 	u8 *dmac_v;
1603 	int err;
1604 	u16 vid;
1605 
1606 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1607 	if (!spec)
1608 		return -ENOMEM;
1609 
1610 	vid = key2vid(node->macvlan);
1611 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1612 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1613 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1614 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1615 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1616 	eth_broadcast_addr(dmac_c);
1617 	ether_addr_copy(dmac_v, mac);
1618 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1619 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1620 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1621 	}
1622 	if (node->tagged) {
1623 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1624 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1625 	}
1626 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1627 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1628 	dests[0].tir_num = ndev->res.tirn;
1629 	err = add_steering_counters(ndev, node, &flow_act, dests);
1630 	if (err)
1631 		goto out_free;
1632 
1633 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1634 	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1635 #endif
1636 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1637 	if (IS_ERR(node->ucast_rule)) {
1638 		err = PTR_ERR(node->ucast_rule);
1639 		goto err_ucast;
1640 	}
1641 
1642 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1643 	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1644 #endif
1645 
1646 	memset(dmac_c, 0, ETH_ALEN);
1647 	memset(dmac_v, 0, ETH_ALEN);
1648 	dmac_c[0] = 1;
1649 	dmac_v[0] = 1;
1650 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1651 	if (IS_ERR(node->mcast_rule)) {
1652 		err = PTR_ERR(node->mcast_rule);
1653 		goto err_mcast;
1654 	}
1655 	kvfree(spec);
1656 	mlx5_vdpa_add_rx_counters(ndev, node);
1657 	return 0;
1658 
1659 err_mcast:
1660 	mlx5_del_flow_rules(node->ucast_rule);
1661 err_ucast:
1662 	remove_steering_counters(ndev, node);
1663 out_free:
1664 	kvfree(spec);
1665 	return err;
1666 }
1667 
1668 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1669 					 struct macvlan_node *node)
1670 {
1671 	mlx5_vdpa_remove_rx_counters(ndev, node);
1672 	mlx5_del_flow_rules(node->ucast_rule);
1673 	mlx5_del_flow_rules(node->mcast_rule);
1674 }
1675 
1676 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1677 {
1678 	u64 val;
1679 
1680 	if (!tagged)
1681 		vlan = MLX5V_UNTAGGED;
1682 
1683 	val = (u64)vlan << 48 |
1684 	      (u64)mac[0] << 40 |
1685 	      (u64)mac[1] << 32 |
1686 	      (u64)mac[2] << 24 |
1687 	      (u64)mac[3] << 16 |
1688 	      (u64)mac[4] << 8 |
1689 	      (u64)mac[5];
1690 
1691 	return val;
1692 }
1693 
1694 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1695 {
1696 	struct macvlan_node *pos;
1697 	u32 idx;
1698 
1699 	idx = hash_64(value, 8); // tbd 8
1700 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1701 		if (pos->macvlan == value)
1702 			return pos;
1703 	}
1704 	return NULL;
1705 }
1706 
1707 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1708 {
1709 	struct macvlan_node *ptr;
1710 	u64 val;
1711 	u32 idx;
1712 	int err;
1713 
1714 	val = search_val(mac, vid, tagged);
1715 	if (mac_vlan_lookup(ndev, val))
1716 		return -EEXIST;
1717 
1718 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1719 	if (!ptr)
1720 		return -ENOMEM;
1721 
1722 	ptr->tagged = tagged;
1723 	ptr->macvlan = val;
1724 	ptr->ndev = ndev;
1725 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1726 	if (err)
1727 		goto err_add;
1728 
1729 	idx = hash_64(val, 8);
1730 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1731 	return 0;
1732 
1733 err_add:
1734 	kfree(ptr);
1735 	return err;
1736 }
1737 
1738 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1739 {
1740 	struct macvlan_node *ptr;
1741 
1742 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1743 	if (!ptr)
1744 		return;
1745 
1746 	hlist_del(&ptr->hlist);
1747 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1748 	remove_steering_counters(ndev, ptr);
1749 	kfree(ptr);
1750 }
1751 
1752 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1753 {
1754 	struct macvlan_node *pos;
1755 	struct hlist_node *n;
1756 	int i;
1757 
1758 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1759 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1760 			hlist_del(&pos->hlist);
1761 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1762 			remove_steering_counters(ndev, pos);
1763 			kfree(pos);
1764 		}
1765 	}
1766 }
1767 
1768 static int setup_steering(struct mlx5_vdpa_net *ndev)
1769 {
1770 	struct mlx5_flow_table_attr ft_attr = {};
1771 	struct mlx5_flow_namespace *ns;
1772 	int err;
1773 
1774 	ft_attr.max_fte = MAX_STEERING_ENT;
1775 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1776 
1777 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1778 	if (!ns) {
1779 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1780 		return -EOPNOTSUPP;
1781 	}
1782 
1783 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1784 	if (IS_ERR(ndev->rxft)) {
1785 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1786 		return PTR_ERR(ndev->rxft);
1787 	}
1788 	mlx5_vdpa_add_rx_flow_table(ndev);
1789 
1790 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1791 	if (err)
1792 		goto err_add;
1793 
1794 	return 0;
1795 
1796 err_add:
1797 	mlx5_vdpa_remove_rx_flow_table(ndev);
1798 	mlx5_destroy_flow_table(ndev->rxft);
1799 	return err;
1800 }
1801 
1802 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1803 {
1804 	clear_mac_vlan_table(ndev);
1805 	mlx5_vdpa_remove_rx_flow_table(ndev);
1806 	mlx5_destroy_flow_table(ndev->rxft);
1807 }
1808 
1809 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1810 {
1811 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1812 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1813 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1814 	struct mlx5_core_dev *pfmdev;
1815 	size_t read;
1816 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1817 
1818 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1819 	switch (cmd) {
1820 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1821 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1822 		if (read != ETH_ALEN)
1823 			break;
1824 
1825 		if (!memcmp(ndev->config.mac, mac, 6)) {
1826 			status = VIRTIO_NET_OK;
1827 			break;
1828 		}
1829 
1830 		if (is_zero_ether_addr(mac))
1831 			break;
1832 
1833 		if (!is_zero_ether_addr(ndev->config.mac)) {
1834 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1835 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1836 					       ndev->config.mac);
1837 				break;
1838 			}
1839 		}
1840 
1841 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1842 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1843 				       mac);
1844 			break;
1845 		}
1846 
1847 		/* backup the original mac address so that if failed to add the forward rules
1848 		 * we could restore it
1849 		 */
1850 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1851 
1852 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1853 
1854 		/* Need recreate the flow table entry, so that the packet could forward back
1855 		 */
1856 		mac_vlan_del(ndev, mac_back, 0, false);
1857 
1858 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1859 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1860 
1861 			/* Although it hardly run here, we still need double check */
1862 			if (is_zero_ether_addr(mac_back)) {
1863 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1864 				break;
1865 			}
1866 
1867 			/* Try to restore original mac address to MFPS table, and try to restore
1868 			 * the forward rule entry.
1869 			 */
1870 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1871 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1872 					       ndev->config.mac);
1873 			}
1874 
1875 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1876 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1877 					       mac_back);
1878 			}
1879 
1880 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1881 
1882 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1883 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1884 
1885 			break;
1886 		}
1887 
1888 		status = VIRTIO_NET_OK;
1889 		break;
1890 
1891 	default:
1892 		break;
1893 	}
1894 
1895 	return status;
1896 }
1897 
1898 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1899 {
1900 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1901 	int cur_qps = ndev->cur_num_vqs / 2;
1902 	int err;
1903 	int i;
1904 
1905 	if (cur_qps > newqps) {
1906 		err = modify_rqt(ndev, 2 * newqps);
1907 		if (err)
1908 			return err;
1909 
1910 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1911 			teardown_vq(ndev, &ndev->vqs[i]);
1912 
1913 		ndev->cur_num_vqs = 2 * newqps;
1914 	} else {
1915 		ndev->cur_num_vqs = 2 * newqps;
1916 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1917 			err = setup_vq(ndev, &ndev->vqs[i]);
1918 			if (err)
1919 				goto clean_added;
1920 		}
1921 		err = modify_rqt(ndev, 2 * newqps);
1922 		if (err)
1923 			goto clean_added;
1924 	}
1925 	return 0;
1926 
1927 clean_added:
1928 	for (--i; i >= 2 * cur_qps; --i)
1929 		teardown_vq(ndev, &ndev->vqs[i]);
1930 
1931 	ndev->cur_num_vqs = 2 * cur_qps;
1932 
1933 	return err;
1934 }
1935 
1936 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1937 {
1938 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1939 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1940 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1941 	struct virtio_net_ctrl_mq mq;
1942 	size_t read;
1943 	u16 newqps;
1944 
1945 	switch (cmd) {
1946 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1947 		/* This mq feature check aligns with pre-existing userspace
1948 		 * implementation.
1949 		 *
1950 		 * Without it, an untrusted driver could fake a multiqueue config
1951 		 * request down to a non-mq device that may cause kernel to
1952 		 * panic due to uninitialized resources for extra vqs. Even with
1953 		 * a well behaving guest driver, it is not expected to allow
1954 		 * changing the number of vqs on a non-mq device.
1955 		 */
1956 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1957 			break;
1958 
1959 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1960 		if (read != sizeof(mq))
1961 			break;
1962 
1963 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1964 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1965 		    newqps > ndev->rqt_size)
1966 			break;
1967 
1968 		if (ndev->cur_num_vqs == 2 * newqps) {
1969 			status = VIRTIO_NET_OK;
1970 			break;
1971 		}
1972 
1973 		if (!change_num_qps(mvdev, newqps))
1974 			status = VIRTIO_NET_OK;
1975 
1976 		break;
1977 	default:
1978 		break;
1979 	}
1980 
1981 	return status;
1982 }
1983 
1984 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1985 {
1986 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1987 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1988 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1989 	__virtio16 vlan;
1990 	size_t read;
1991 	u16 id;
1992 
1993 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1994 		return status;
1995 
1996 	switch (cmd) {
1997 	case VIRTIO_NET_CTRL_VLAN_ADD:
1998 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1999 		if (read != sizeof(vlan))
2000 			break;
2001 
2002 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2003 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2004 			break;
2005 
2006 		status = VIRTIO_NET_OK;
2007 		break;
2008 	case VIRTIO_NET_CTRL_VLAN_DEL:
2009 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2010 		if (read != sizeof(vlan))
2011 			break;
2012 
2013 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2014 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2015 		status = VIRTIO_NET_OK;
2016 		break;
2017 	default:
2018 		break;
2019 	}
2020 
2021 	return status;
2022 }
2023 
2024 static void mlx5_cvq_kick_handler(struct work_struct *work)
2025 {
2026 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2027 	struct virtio_net_ctrl_hdr ctrl;
2028 	struct mlx5_vdpa_wq_ent *wqent;
2029 	struct mlx5_vdpa_dev *mvdev;
2030 	struct mlx5_control_vq *cvq;
2031 	struct mlx5_vdpa_net *ndev;
2032 	size_t read, write;
2033 	int err;
2034 
2035 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2036 	mvdev = wqent->mvdev;
2037 	ndev = to_mlx5_vdpa_ndev(mvdev);
2038 	cvq = &mvdev->cvq;
2039 
2040 	down_write(&ndev->reslock);
2041 
2042 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2043 		goto out;
2044 
2045 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2046 		goto out;
2047 
2048 	if (!cvq->ready)
2049 		goto out;
2050 
2051 	while (true) {
2052 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2053 					   GFP_ATOMIC);
2054 		if (err <= 0)
2055 			break;
2056 
2057 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2058 		if (read != sizeof(ctrl))
2059 			break;
2060 
2061 		cvq->received_desc++;
2062 		switch (ctrl.class) {
2063 		case VIRTIO_NET_CTRL_MAC:
2064 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2065 			break;
2066 		case VIRTIO_NET_CTRL_MQ:
2067 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2068 			break;
2069 		case VIRTIO_NET_CTRL_VLAN:
2070 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2071 			break;
2072 		default:
2073 			break;
2074 		}
2075 
2076 		/* Make sure data is written before advancing index */
2077 		smp_wmb();
2078 
2079 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2080 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2081 		vringh_kiov_cleanup(&cvq->riov);
2082 		vringh_kiov_cleanup(&cvq->wiov);
2083 
2084 		if (vringh_need_notify_iotlb(&cvq->vring))
2085 			vringh_notify(&cvq->vring);
2086 
2087 		cvq->completed_desc++;
2088 		queue_work(mvdev->wq, &wqent->work);
2089 		break;
2090 	}
2091 
2092 out:
2093 	up_write(&ndev->reslock);
2094 }
2095 
2096 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2097 {
2098 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2099 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2100 	struct mlx5_vdpa_virtqueue *mvq;
2101 
2102 	if (!is_index_valid(mvdev, idx))
2103 		return;
2104 
2105 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2106 		if (!mvdev->wq || !mvdev->cvq.ready)
2107 			return;
2108 
2109 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2110 		return;
2111 	}
2112 
2113 	mvq = &ndev->vqs[idx];
2114 	if (unlikely(!mvq->ready))
2115 		return;
2116 
2117 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2118 }
2119 
2120 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2121 				    u64 driver_area, u64 device_area)
2122 {
2123 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2124 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2125 	struct mlx5_vdpa_virtqueue *mvq;
2126 
2127 	if (!is_index_valid(mvdev, idx))
2128 		return -EINVAL;
2129 
2130 	if (is_ctrl_vq_idx(mvdev, idx)) {
2131 		mvdev->cvq.desc_addr = desc_area;
2132 		mvdev->cvq.device_addr = device_area;
2133 		mvdev->cvq.driver_addr = driver_area;
2134 		return 0;
2135 	}
2136 
2137 	mvq = &ndev->vqs[idx];
2138 	mvq->desc_addr = desc_area;
2139 	mvq->device_addr = device_area;
2140 	mvq->driver_addr = driver_area;
2141 	return 0;
2142 }
2143 
2144 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2145 {
2146 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2147 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2148 	struct mlx5_vdpa_virtqueue *mvq;
2149 
2150 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2151 		return;
2152 
2153 	mvq = &ndev->vqs[idx];
2154 	mvq->num_ent = num;
2155 }
2156 
2157 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2158 {
2159 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2160 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2161 
2162 	ndev->event_cbs[idx] = *cb;
2163 	if (is_ctrl_vq_idx(mvdev, idx))
2164 		mvdev->cvq.event_cb = *cb;
2165 }
2166 
2167 static void mlx5_cvq_notify(struct vringh *vring)
2168 {
2169 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2170 
2171 	if (!cvq->event_cb.callback)
2172 		return;
2173 
2174 	cvq->event_cb.callback(cvq->event_cb.private);
2175 }
2176 
2177 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2178 {
2179 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2180 
2181 	cvq->ready = ready;
2182 	if (!ready)
2183 		return;
2184 
2185 	cvq->vring.notify = mlx5_cvq_notify;
2186 }
2187 
2188 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2189 {
2190 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2191 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2192 	struct mlx5_vdpa_virtqueue *mvq;
2193 	int err;
2194 
2195 	if (!mvdev->actual_features)
2196 		return;
2197 
2198 	if (!is_index_valid(mvdev, idx))
2199 		return;
2200 
2201 	if (is_ctrl_vq_idx(mvdev, idx)) {
2202 		set_cvq_ready(mvdev, ready);
2203 		return;
2204 	}
2205 
2206 	mvq = &ndev->vqs[idx];
2207 	if (!ready) {
2208 		suspend_vq(ndev, mvq);
2209 	} else {
2210 		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2211 		if (err) {
2212 			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2213 			ready = false;
2214 		}
2215 	}
2216 
2217 
2218 	mvq->ready = ready;
2219 }
2220 
2221 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2222 {
2223 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2224 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2225 
2226 	if (!is_index_valid(mvdev, idx))
2227 		return false;
2228 
2229 	if (is_ctrl_vq_idx(mvdev, idx))
2230 		return mvdev->cvq.ready;
2231 
2232 	return ndev->vqs[idx].ready;
2233 }
2234 
2235 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2236 				  const struct vdpa_vq_state *state)
2237 {
2238 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2239 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2240 	struct mlx5_vdpa_virtqueue *mvq;
2241 
2242 	if (!is_index_valid(mvdev, idx))
2243 		return -EINVAL;
2244 
2245 	if (is_ctrl_vq_idx(mvdev, idx)) {
2246 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2247 		return 0;
2248 	}
2249 
2250 	mvq = &ndev->vqs[idx];
2251 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2252 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2253 		return -EINVAL;
2254 	}
2255 
2256 	mvq->used_idx = state->split.avail_index;
2257 	mvq->avail_idx = state->split.avail_index;
2258 	return 0;
2259 }
2260 
2261 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2262 {
2263 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2264 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2265 	struct mlx5_vdpa_virtqueue *mvq;
2266 	struct mlx5_virtq_attr attr;
2267 	int err;
2268 
2269 	if (!is_index_valid(mvdev, idx))
2270 		return -EINVAL;
2271 
2272 	if (is_ctrl_vq_idx(mvdev, idx)) {
2273 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2274 		return 0;
2275 	}
2276 
2277 	mvq = &ndev->vqs[idx];
2278 	/* If the virtq object was destroyed, use the value saved at
2279 	 * the last minute of suspend_vq. This caters for userspace
2280 	 * that cares about emulating the index after vq is stopped.
2281 	 */
2282 	if (!mvq->initialized) {
2283 		/* Firmware returns a wrong value for the available index.
2284 		 * Since both values should be identical, we take the value of
2285 		 * used_idx which is reported correctly.
2286 		 */
2287 		state->split.avail_index = mvq->used_idx;
2288 		return 0;
2289 	}
2290 
2291 	err = query_virtqueue(ndev, mvq, &attr);
2292 	if (err) {
2293 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2294 		return err;
2295 	}
2296 	state->split.avail_index = attr.used_index;
2297 	return 0;
2298 }
2299 
2300 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2301 {
2302 	return PAGE_SIZE;
2303 }
2304 
2305 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2306 {
2307 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2308 
2309 	if (is_ctrl_vq_idx(mvdev, idx))
2310 		return MLX5_VDPA_CVQ_GROUP;
2311 
2312 	return MLX5_VDPA_DATAVQ_GROUP;
2313 }
2314 
2315 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2316 {
2317 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2318 
2319 	if (is_ctrl_vq_idx(mvdev, idx))
2320 		return MLX5_VDPA_CVQ_GROUP;
2321 
2322 	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2323 }
2324 
2325 static u64 mlx_to_vritio_features(u16 dev_features)
2326 {
2327 	u64 result = 0;
2328 
2329 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2330 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2331 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2332 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2333 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2334 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2335 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2336 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2337 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2338 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2339 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2340 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2341 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2342 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2343 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2344 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2345 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2346 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2347 
2348 	return result;
2349 }
2350 
2351 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2352 {
2353 	u64 mlx_vdpa_features = 0;
2354 	u16 dev_features;
2355 
2356 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2357 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2358 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2359 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2360 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2361 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2362 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2363 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2364 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2365 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2366 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2367 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2368 
2369 	return mlx_vdpa_features;
2370 }
2371 
2372 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2373 {
2374 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2375 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2376 
2377 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2378 	return ndev->mvdev.mlx_features;
2379 }
2380 
2381 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2382 {
2383 	/* Minimum features to expect */
2384 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2385 		return -EOPNOTSUPP;
2386 
2387 	/* Double check features combination sent down by the driver.
2388 	 * Fail invalid features due to absence of the depended feature.
2389 	 *
2390 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2391 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2392 	 * By failing the invalid features sent down by untrusted drivers,
2393 	 * we're assured the assumption made upon is_index_valid() and
2394 	 * is_ctrl_vq_idx() will not be compromised.
2395 	 */
2396 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2397             BIT_ULL(VIRTIO_NET_F_MQ))
2398 		return -EINVAL;
2399 
2400 	return 0;
2401 }
2402 
2403 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2404 {
2405 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2406 	int err;
2407 	int i;
2408 
2409 	for (i = 0; i < mvdev->max_vqs; i++) {
2410 		err = setup_vq(ndev, &ndev->vqs[i]);
2411 		if (err)
2412 			goto err_vq;
2413 	}
2414 
2415 	return 0;
2416 
2417 err_vq:
2418 	for (--i; i >= 0; i--)
2419 		teardown_vq(ndev, &ndev->vqs[i]);
2420 
2421 	return err;
2422 }
2423 
2424 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2425 {
2426 	struct mlx5_vdpa_virtqueue *mvq;
2427 	int i;
2428 
2429 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2430 		mvq = &ndev->vqs[i];
2431 		if (!mvq->initialized)
2432 			continue;
2433 
2434 		teardown_vq(ndev, mvq);
2435 	}
2436 }
2437 
2438 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2439 {
2440 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2441 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2442 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2443 			mvdev->max_idx = mvdev->max_vqs;
2444 		} else {
2445 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2446 			 * CVQ gets index 2
2447 			 */
2448 			mvdev->max_idx = 2;
2449 		}
2450 	} else {
2451 		/* Two data virtqueues only: one for rx and one for tx */
2452 		mvdev->max_idx = 1;
2453 	}
2454 }
2455 
2456 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2457 {
2458 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2459 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2460 	int err;
2461 
2462 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2463 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2464 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2465 	if (vport)
2466 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2467 
2468 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2469 	if (err)
2470 		return 0;
2471 
2472 	return MLX5_GET(query_vport_state_out, out, state);
2473 }
2474 
2475 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2476 {
2477 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2478 	    VPORT_STATE_UP)
2479 		return true;
2480 
2481 	return false;
2482 }
2483 
2484 static void update_carrier(struct work_struct *work)
2485 {
2486 	struct mlx5_vdpa_wq_ent *wqent;
2487 	struct mlx5_vdpa_dev *mvdev;
2488 	struct mlx5_vdpa_net *ndev;
2489 
2490 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2491 	mvdev = wqent->mvdev;
2492 	ndev = to_mlx5_vdpa_ndev(mvdev);
2493 	if (get_link_state(mvdev))
2494 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2495 	else
2496 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2497 
2498 	if (ndev->config_cb.callback)
2499 		ndev->config_cb.callback(ndev->config_cb.private);
2500 
2501 	kfree(wqent);
2502 }
2503 
2504 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2505 {
2506 	struct mlx5_vdpa_wq_ent *wqent;
2507 
2508 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2509 	if (!wqent)
2510 		return -ENOMEM;
2511 
2512 	wqent->mvdev = &ndev->mvdev;
2513 	INIT_WORK(&wqent->work, update_carrier);
2514 	queue_work(ndev->mvdev.wq, &wqent->work);
2515 	return 0;
2516 }
2517 
2518 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2519 {
2520 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2521 	struct mlx5_eqe *eqe = param;
2522 	int ret = NOTIFY_DONE;
2523 
2524 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2525 		switch (eqe->sub_type) {
2526 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2527 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2528 			if (queue_link_work(ndev))
2529 				return NOTIFY_DONE;
2530 
2531 			ret = NOTIFY_OK;
2532 			break;
2533 		default:
2534 			return NOTIFY_DONE;
2535 		}
2536 		return ret;
2537 	}
2538 	return ret;
2539 }
2540 
2541 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2542 {
2543 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2544 		return;
2545 
2546 	ndev->nb.notifier_call = event_handler;
2547 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2548 	ndev->nb_registered = true;
2549 	queue_link_work(ndev);
2550 }
2551 
2552 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2553 {
2554 	if (!ndev->nb_registered)
2555 		return;
2556 
2557 	ndev->nb_registered = false;
2558 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2559 	if (ndev->mvdev.wq)
2560 		flush_workqueue(ndev->mvdev.wq);
2561 }
2562 
2563 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2564 {
2565 	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2566 }
2567 
2568 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2569 {
2570 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2571 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2572 	int err;
2573 
2574 	print_features(mvdev, features, true);
2575 
2576 	err = verify_driver_features(mvdev, features);
2577 	if (err)
2578 		return err;
2579 
2580 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2581 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2582 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2583 	else
2584 		ndev->rqt_size = 1;
2585 
2586 	/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2587 	 * 5.1.6.5.5 "Device operation in multiqueue mode":
2588 	 *
2589 	 * Multiqueue is disabled by default.
2590 	 * The driver enables multiqueue by sending a command using class
2591 	 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2592 	 * operation, as follows: ...
2593 	 */
2594 	ndev->cur_num_vqs = 2;
2595 
2596 	update_cvq_info(mvdev);
2597 	return err;
2598 }
2599 
2600 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2601 {
2602 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2603 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2604 
2605 	ndev->config_cb = *cb;
2606 }
2607 
2608 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2609 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2610 {
2611 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2612 }
2613 
2614 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2615 {
2616 	return VIRTIO_ID_NET;
2617 }
2618 
2619 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2620 {
2621 	return PCI_VENDOR_ID_MELLANOX;
2622 }
2623 
2624 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2625 {
2626 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2627 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2628 
2629 	print_status(mvdev, ndev->mvdev.status, false);
2630 	return ndev->mvdev.status;
2631 }
2632 
2633 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2634 {
2635 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2636 	struct mlx5_virtq_attr attr = {};
2637 	int err;
2638 
2639 	if (mvq->initialized) {
2640 		err = query_virtqueue(ndev, mvq, &attr);
2641 		if (err)
2642 			return err;
2643 	}
2644 
2645 	ri->avail_index = attr.available_index;
2646 	ri->used_index = attr.used_index;
2647 	ri->ready = mvq->ready;
2648 	ri->num_ent = mvq->num_ent;
2649 	ri->desc_addr = mvq->desc_addr;
2650 	ri->device_addr = mvq->device_addr;
2651 	ri->driver_addr = mvq->driver_addr;
2652 	ri->map = mvq->map;
2653 	ri->restore = true;
2654 	return 0;
2655 }
2656 
2657 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2658 {
2659 	int i;
2660 
2661 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2662 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2663 		save_channel_info(ndev, &ndev->vqs[i]);
2664 	}
2665 	return 0;
2666 }
2667 
2668 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2669 {
2670 	int i;
2671 
2672 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2673 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2674 }
2675 
2676 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2677 {
2678 	struct mlx5_vdpa_virtqueue *mvq;
2679 	struct mlx5_vq_restore_info *ri;
2680 	int i;
2681 
2682 	mlx5_clear_vqs(ndev);
2683 	init_mvqs(ndev);
2684 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2685 		mvq = &ndev->vqs[i];
2686 		ri = &mvq->ri;
2687 		if (!ri->restore)
2688 			continue;
2689 
2690 		mvq->avail_idx = ri->avail_index;
2691 		mvq->used_idx = ri->used_index;
2692 		mvq->ready = ri->ready;
2693 		mvq->num_ent = ri->num_ent;
2694 		mvq->desc_addr = ri->desc_addr;
2695 		mvq->device_addr = ri->device_addr;
2696 		mvq->driver_addr = ri->driver_addr;
2697 		mvq->map = ri->map;
2698 	}
2699 }
2700 
2701 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2702 				struct mlx5_vdpa_mr *new_mr,
2703 				unsigned int asid)
2704 {
2705 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2706 	int err;
2707 
2708 	suspend_vqs(ndev);
2709 	err = save_channels_info(ndev);
2710 	if (err)
2711 		return err;
2712 
2713 	teardown_driver(ndev);
2714 
2715 	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2716 
2717 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2718 		return 0;
2719 
2720 	restore_channels_info(ndev);
2721 	err = setup_driver(mvdev);
2722 	if (err)
2723 		return err;
2724 
2725 	return 0;
2726 }
2727 
2728 /* reslock must be held for this function */
2729 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2730 {
2731 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2732 	int err;
2733 
2734 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2735 
2736 	if (ndev->setup) {
2737 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2738 		err = 0;
2739 		goto out;
2740 	}
2741 	mlx5_vdpa_add_debugfs(ndev);
2742 
2743 	err = read_umem_params(ndev);
2744 	if (err)
2745 		goto err_setup;
2746 
2747 	err = setup_virtqueues(mvdev);
2748 	if (err) {
2749 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2750 		goto err_setup;
2751 	}
2752 
2753 	err = create_rqt(ndev);
2754 	if (err) {
2755 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2756 		goto err_rqt;
2757 	}
2758 
2759 	err = create_tir(ndev);
2760 	if (err) {
2761 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2762 		goto err_tir;
2763 	}
2764 
2765 	err = setup_steering(ndev);
2766 	if (err) {
2767 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2768 		goto err_fwd;
2769 	}
2770 	ndev->setup = true;
2771 
2772 	return 0;
2773 
2774 err_fwd:
2775 	destroy_tir(ndev);
2776 err_tir:
2777 	destroy_rqt(ndev);
2778 err_rqt:
2779 	teardown_virtqueues(ndev);
2780 err_setup:
2781 	mlx5_vdpa_remove_debugfs(ndev);
2782 out:
2783 	return err;
2784 }
2785 
2786 /* reslock must be held for this function */
2787 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2788 {
2789 
2790 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2791 
2792 	if (!ndev->setup)
2793 		return;
2794 
2795 	mlx5_vdpa_remove_debugfs(ndev);
2796 	teardown_steering(ndev);
2797 	destroy_tir(ndev);
2798 	destroy_rqt(ndev);
2799 	teardown_virtqueues(ndev);
2800 	ndev->setup = false;
2801 }
2802 
2803 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2804 {
2805 	int i;
2806 
2807 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2808 		ndev->vqs[i].ready = false;
2809 
2810 	ndev->mvdev.cvq.ready = false;
2811 }
2812 
2813 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2814 {
2815 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2816 	int err = 0;
2817 
2818 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2819 		u16 idx = cvq->vring.last_avail_idx;
2820 
2821 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2822 					MLX5_CVQ_MAX_ENT, false,
2823 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2824 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2825 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2826 
2827 		if (!err)
2828 			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2829 	}
2830 	return err;
2831 }
2832 
2833 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2834 {
2835 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2836 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2837 	int err;
2838 
2839 	print_status(mvdev, status, true);
2840 
2841 	down_write(&ndev->reslock);
2842 
2843 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2844 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2845 			err = setup_cvq_vring(mvdev);
2846 			if (err) {
2847 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2848 				goto err_setup;
2849 			}
2850 			register_link_notifier(ndev);
2851 			err = setup_driver(mvdev);
2852 			if (err) {
2853 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2854 				goto err_driver;
2855 			}
2856 		} else {
2857 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2858 			goto err_clear;
2859 		}
2860 	}
2861 
2862 	ndev->mvdev.status = status;
2863 	up_write(&ndev->reslock);
2864 	return;
2865 
2866 err_driver:
2867 	unregister_link_notifier(ndev);
2868 err_setup:
2869 	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
2870 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2871 err_clear:
2872 	up_write(&ndev->reslock);
2873 }
2874 
2875 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2876 {
2877 	int i;
2878 
2879 	/* default mapping all groups are mapped to asid 0 */
2880 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2881 		mvdev->group2asid[i] = 0;
2882 }
2883 
2884 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
2885 {
2886 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2887 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2888 
2889 	print_status(mvdev, 0, true);
2890 	mlx5_vdpa_info(mvdev, "performing device reset\n");
2891 
2892 	down_write(&ndev->reslock);
2893 	unregister_link_notifier(ndev);
2894 	teardown_driver(ndev);
2895 	clear_vqs_ready(ndev);
2896 	if (flags & VDPA_RESET_F_CLEAN_MAP)
2897 		mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
2898 	ndev->mvdev.status = 0;
2899 	ndev->mvdev.suspended = false;
2900 	ndev->cur_num_vqs = 0;
2901 	ndev->mvdev.cvq.received_desc = 0;
2902 	ndev->mvdev.cvq.completed_desc = 0;
2903 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2904 	ndev->mvdev.actual_features = 0;
2905 	init_group_to_asid_map(mvdev);
2906 	++mvdev->generation;
2907 
2908 	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
2909 	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2910 		if (mlx5_vdpa_create_dma_mr(mvdev))
2911 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2912 	}
2913 	up_write(&ndev->reslock);
2914 
2915 	return 0;
2916 }
2917 
2918 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2919 {
2920 	return mlx5_vdpa_compat_reset(vdev, 0);
2921 }
2922 
2923 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2924 {
2925 	return sizeof(struct virtio_net_config);
2926 }
2927 
2928 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2929 				 unsigned int len)
2930 {
2931 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2932 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2933 
2934 	if (offset + len <= sizeof(struct virtio_net_config))
2935 		memcpy(buf, (u8 *)&ndev->config + offset, len);
2936 }
2937 
2938 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2939 				 unsigned int len)
2940 {
2941 	/* not supported */
2942 }
2943 
2944 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2945 {
2946 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2947 
2948 	return mvdev->generation;
2949 }
2950 
2951 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2952 			unsigned int asid)
2953 {
2954 	struct mlx5_vdpa_mr *new_mr;
2955 	int err;
2956 
2957 	if (asid >= MLX5_VDPA_NUM_AS)
2958 		return -EINVAL;
2959 
2960 	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
2961 		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
2962 		if (IS_ERR(new_mr)) {
2963 			err = PTR_ERR(new_mr);
2964 			mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err);
2965 			return err;
2966 		}
2967 	} else {
2968 		/* Empty iotlbs don't have an mr but will clear the previous mr. */
2969 		new_mr = NULL;
2970 	}
2971 
2972 	if (!mvdev->mr[asid]) {
2973 		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2974 	} else {
2975 		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
2976 		if (err) {
2977 			mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err);
2978 			goto out_err;
2979 		}
2980 	}
2981 
2982 	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
2983 
2984 out_err:
2985 	mlx5_vdpa_destroy_mr(mvdev, new_mr);
2986 	return err;
2987 }
2988 
2989 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2990 			     struct vhost_iotlb *iotlb)
2991 {
2992 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2993 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2994 	int err = -EINVAL;
2995 
2996 	down_write(&ndev->reslock);
2997 	err = set_map_data(mvdev, iotlb, asid);
2998 	up_write(&ndev->reslock);
2999 	return err;
3000 }
3001 
3002 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3003 {
3004 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3005 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3006 	int err;
3007 
3008 	down_write(&ndev->reslock);
3009 	err = mlx5_vdpa_reset_mr(mvdev, asid);
3010 	up_write(&ndev->reslock);
3011 	return err;
3012 }
3013 
3014 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3015 {
3016 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3017 
3018 	if (is_ctrl_vq_idx(mvdev, idx))
3019 		return &vdev->dev;
3020 
3021 	return mvdev->vdev.dma_dev;
3022 }
3023 
3024 static void free_irqs(struct mlx5_vdpa_net *ndev)
3025 {
3026 	struct mlx5_vdpa_irq_pool_entry *ent;
3027 	int i;
3028 
3029 	if (!msix_mode_supported(&ndev->mvdev))
3030 		return;
3031 
3032 	if (!ndev->irqp.entries)
3033 		return;
3034 
3035 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3036 		ent = ndev->irqp.entries + i;
3037 		if (ent->map.virq)
3038 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3039 	}
3040 	kfree(ndev->irqp.entries);
3041 }
3042 
3043 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3044 {
3045 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3046 	struct mlx5_core_dev *pfmdev;
3047 	struct mlx5_vdpa_net *ndev;
3048 
3049 	ndev = to_mlx5_vdpa_ndev(mvdev);
3050 
3051 	free_resources(ndev);
3052 	mlx5_vdpa_destroy_mr_resources(mvdev);
3053 	if (!is_zero_ether_addr(ndev->config.mac)) {
3054 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3055 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3056 	}
3057 	mlx5_vdpa_free_resources(&ndev->mvdev);
3058 	free_irqs(ndev);
3059 	kfree(ndev->event_cbs);
3060 	kfree(ndev->vqs);
3061 }
3062 
3063 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3064 {
3065 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3066 	struct vdpa_notification_area ret = {};
3067 	struct mlx5_vdpa_net *ndev;
3068 	phys_addr_t addr;
3069 
3070 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3071 		return ret;
3072 
3073 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3074 	 * notification to avoid the risk of mapping pages that contain BAR of more
3075 	 * than one SF
3076 	 */
3077 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3078 		return ret;
3079 
3080 	ndev = to_mlx5_vdpa_ndev(mvdev);
3081 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3082 	ret.addr = addr;
3083 	ret.size = PAGE_SIZE;
3084 	return ret;
3085 }
3086 
3087 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3088 {
3089 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3090 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3091 	struct mlx5_vdpa_virtqueue *mvq;
3092 
3093 	if (!is_index_valid(mvdev, idx))
3094 		return -EINVAL;
3095 
3096 	if (is_ctrl_vq_idx(mvdev, idx))
3097 		return -EOPNOTSUPP;
3098 
3099 	mvq = &ndev->vqs[idx];
3100 	if (!mvq->map.virq)
3101 		return -EOPNOTSUPP;
3102 
3103 	return mvq->map.virq;
3104 }
3105 
3106 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3107 {
3108 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3109 
3110 	return mvdev->actual_features;
3111 }
3112 
3113 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3114 			     u64 *received_desc, u64 *completed_desc)
3115 {
3116 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3117 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3118 	void *cmd_hdr;
3119 	void *ctx;
3120 	int err;
3121 
3122 	if (!counters_supported(&ndev->mvdev))
3123 		return -EOPNOTSUPP;
3124 
3125 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3126 		return -EAGAIN;
3127 
3128 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3129 
3130 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3131 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3132 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3133 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3134 
3135 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3136 	if (err)
3137 		return err;
3138 
3139 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3140 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3141 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3142 	return 0;
3143 }
3144 
3145 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3146 					 struct sk_buff *msg,
3147 					 struct netlink_ext_ack *extack)
3148 {
3149 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3150 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3151 	struct mlx5_vdpa_virtqueue *mvq;
3152 	struct mlx5_control_vq *cvq;
3153 	u64 received_desc;
3154 	u64 completed_desc;
3155 	int err = 0;
3156 
3157 	down_read(&ndev->reslock);
3158 	if (!is_index_valid(mvdev, idx)) {
3159 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3160 		err = -EINVAL;
3161 		goto out_err;
3162 	}
3163 
3164 	if (idx == ctrl_vq_idx(mvdev)) {
3165 		cvq = &mvdev->cvq;
3166 		received_desc = cvq->received_desc;
3167 		completed_desc = cvq->completed_desc;
3168 		goto out;
3169 	}
3170 
3171 	mvq = &ndev->vqs[idx];
3172 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3173 	if (err) {
3174 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3175 		goto out_err;
3176 	}
3177 
3178 out:
3179 	err = -EMSGSIZE;
3180 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3181 		goto out_err;
3182 
3183 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3184 			      VDPA_ATTR_PAD))
3185 		goto out_err;
3186 
3187 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3188 		goto out_err;
3189 
3190 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3191 			      VDPA_ATTR_PAD))
3192 		goto out_err;
3193 
3194 	err = 0;
3195 out_err:
3196 	up_read(&ndev->reslock);
3197 	return err;
3198 }
3199 
3200 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3201 {
3202 	struct mlx5_control_vq *cvq;
3203 
3204 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3205 		return;
3206 
3207 	cvq = &mvdev->cvq;
3208 	cvq->ready = false;
3209 }
3210 
3211 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3212 {
3213 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3214 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3215 	struct mlx5_vdpa_virtqueue *mvq;
3216 	int i;
3217 
3218 	mlx5_vdpa_info(mvdev, "suspending device\n");
3219 
3220 	down_write(&ndev->reslock);
3221 	unregister_link_notifier(ndev);
3222 	for (i = 0; i < ndev->cur_num_vqs; i++) {
3223 		mvq = &ndev->vqs[i];
3224 		suspend_vq(ndev, mvq);
3225 	}
3226 	mlx5_vdpa_cvq_suspend(mvdev);
3227 	mvdev->suspended = true;
3228 	up_write(&ndev->reslock);
3229 	return 0;
3230 }
3231 
3232 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3233 			       unsigned int asid)
3234 {
3235 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3236 	int err = 0;
3237 
3238 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3239 		return -EINVAL;
3240 
3241 	mvdev->group2asid[group] = asid;
3242 
3243 	mutex_lock(&mvdev->mr_mtx);
3244 	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid])
3245 		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid);
3246 	mutex_unlock(&mvdev->mr_mtx);
3247 
3248 	return err;
3249 }
3250 
3251 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3252 	.set_vq_address = mlx5_vdpa_set_vq_address,
3253 	.set_vq_num = mlx5_vdpa_set_vq_num,
3254 	.kick_vq = mlx5_vdpa_kick_vq,
3255 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3256 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3257 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3258 	.set_vq_state = mlx5_vdpa_set_vq_state,
3259 	.get_vq_state = mlx5_vdpa_get_vq_state,
3260 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3261 	.get_vq_notification = mlx5_get_vq_notification,
3262 	.get_vq_irq = mlx5_get_vq_irq,
3263 	.get_vq_align = mlx5_vdpa_get_vq_align,
3264 	.get_vq_group = mlx5_vdpa_get_vq_group,
3265 	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3266 	.get_device_features = mlx5_vdpa_get_device_features,
3267 	.get_backend_features = mlx5_vdpa_get_backend_features,
3268 	.set_driver_features = mlx5_vdpa_set_driver_features,
3269 	.get_driver_features = mlx5_vdpa_get_driver_features,
3270 	.set_config_cb = mlx5_vdpa_set_config_cb,
3271 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3272 	.get_device_id = mlx5_vdpa_get_device_id,
3273 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3274 	.get_status = mlx5_vdpa_get_status,
3275 	.set_status = mlx5_vdpa_set_status,
3276 	.reset = mlx5_vdpa_reset,
3277 	.compat_reset = mlx5_vdpa_compat_reset,
3278 	.get_config_size = mlx5_vdpa_get_config_size,
3279 	.get_config = mlx5_vdpa_get_config,
3280 	.set_config = mlx5_vdpa_set_config,
3281 	.get_generation = mlx5_vdpa_get_generation,
3282 	.set_map = mlx5_vdpa_set_map,
3283 	.reset_map = mlx5_vdpa_reset_map,
3284 	.set_group_asid = mlx5_set_group_asid,
3285 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3286 	.free = mlx5_vdpa_free,
3287 	.suspend = mlx5_vdpa_suspend,
3288 };
3289 
3290 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3291 {
3292 	u16 hw_mtu;
3293 	int err;
3294 
3295 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3296 	if (err)
3297 		return err;
3298 
3299 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3300 	return 0;
3301 }
3302 
3303 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3304 {
3305 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3306 	int err;
3307 
3308 	if (res->valid) {
3309 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3310 		return -EEXIST;
3311 	}
3312 
3313 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3314 	if (err)
3315 		return err;
3316 
3317 	err = create_tis(ndev);
3318 	if (err)
3319 		goto err_tis;
3320 
3321 	res->valid = true;
3322 
3323 	return 0;
3324 
3325 err_tis:
3326 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3327 	return err;
3328 }
3329 
3330 static void free_resources(struct mlx5_vdpa_net *ndev)
3331 {
3332 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3333 
3334 	if (!res->valid)
3335 		return;
3336 
3337 	destroy_tis(ndev);
3338 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3339 	res->valid = false;
3340 }
3341 
3342 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3343 {
3344 	struct mlx5_vdpa_virtqueue *mvq;
3345 	int i;
3346 
3347 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3348 		mvq = &ndev->vqs[i];
3349 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3350 		mvq->index = i;
3351 		mvq->ndev = ndev;
3352 		mvq->fwqp.fw = true;
3353 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3354 	}
3355 	for (; i < ndev->mvdev.max_vqs; i++) {
3356 		mvq = &ndev->vqs[i];
3357 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3358 		mvq->index = i;
3359 		mvq->ndev = ndev;
3360 	}
3361 }
3362 
3363 struct mlx5_vdpa_mgmtdev {
3364 	struct vdpa_mgmt_dev mgtdev;
3365 	struct mlx5_adev *madev;
3366 	struct mlx5_vdpa_net *ndev;
3367 	struct vdpa_config_ops vdpa_ops;
3368 };
3369 
3370 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3371 {
3372 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3373 	void *in;
3374 	int err;
3375 
3376 	in = kvzalloc(inlen, GFP_KERNEL);
3377 	if (!in)
3378 		return -ENOMEM;
3379 
3380 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3381 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3382 		 mtu + MLX5V_ETH_HARD_MTU);
3383 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3384 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3385 
3386 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3387 
3388 	kvfree(in);
3389 	return err;
3390 }
3391 
3392 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3393 {
3394 	struct mlx5_vdpa_irq_pool_entry *ent;
3395 	int i;
3396 
3397 	if (!msix_mode_supported(&ndev->mvdev))
3398 		return;
3399 
3400 	if (!ndev->mvdev.mdev->pdev)
3401 		return;
3402 
3403 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3404 	if (!ndev->irqp.entries)
3405 		return;
3406 
3407 
3408 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3409 		ent = ndev->irqp.entries + i;
3410 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3411 			 dev_name(&ndev->mvdev.vdev.dev), i);
3412 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3413 		if (!ent->map.virq)
3414 			return;
3415 
3416 		ndev->irqp.num_ent++;
3417 	}
3418 }
3419 
3420 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3421 			     const struct vdpa_dev_set_config *add_config)
3422 {
3423 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3424 	struct virtio_net_config *config;
3425 	struct mlx5_core_dev *pfmdev;
3426 	struct mlx5_vdpa_dev *mvdev;
3427 	struct mlx5_vdpa_net *ndev;
3428 	struct mlx5_core_dev *mdev;
3429 	u64 device_features;
3430 	u32 max_vqs;
3431 	u16 mtu;
3432 	int err;
3433 
3434 	if (mgtdev->ndev)
3435 		return -ENOSPC;
3436 
3437 	mdev = mgtdev->madev->mdev;
3438 	device_features = mgtdev->mgtdev.supported_features;
3439 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3440 		if (add_config->device_features & ~device_features) {
3441 			dev_warn(mdev->device,
3442 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3443 				 add_config->device_features, device_features);
3444 			return -EINVAL;
3445 		}
3446 		device_features &= add_config->device_features;
3447 	} else {
3448 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3449 	}
3450 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3451 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3452 		dev_warn(mdev->device,
3453 			 "Must provision minimum features 0x%llx for this device",
3454 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3455 		return -EOPNOTSUPP;
3456 	}
3457 
3458 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3459 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3460 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3461 		return -EOPNOTSUPP;
3462 	}
3463 
3464 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3465 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3466 	if (max_vqs < 2) {
3467 		dev_warn(mdev->device,
3468 			 "%d virtqueues are supported. At least 2 are required\n",
3469 			 max_vqs);
3470 		return -EAGAIN;
3471 	}
3472 
3473 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3474 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3475 			return -EINVAL;
3476 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3477 	} else {
3478 		max_vqs = 2;
3479 	}
3480 
3481 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3482 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3483 	if (IS_ERR(ndev))
3484 		return PTR_ERR(ndev);
3485 
3486 	ndev->mvdev.max_vqs = max_vqs;
3487 	mvdev = &ndev->mvdev;
3488 	mvdev->mdev = mdev;
3489 
3490 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3491 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3492 	if (!ndev->vqs || !ndev->event_cbs) {
3493 		err = -ENOMEM;
3494 		goto err_alloc;
3495 	}
3496 
3497 	init_mvqs(ndev);
3498 	allocate_irqs(ndev);
3499 	init_rwsem(&ndev->reslock);
3500 	config = &ndev->config;
3501 
3502 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3503 		err = config_func_mtu(mdev, add_config->net.mtu);
3504 		if (err)
3505 			goto err_alloc;
3506 	}
3507 
3508 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3509 		err = query_mtu(mdev, &mtu);
3510 		if (err)
3511 			goto err_alloc;
3512 
3513 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3514 	}
3515 
3516 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3517 		if (get_link_state(mvdev))
3518 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3519 		else
3520 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3521 	}
3522 
3523 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3524 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3525 	/* No bother setting mac address in config if not going to provision _F_MAC */
3526 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3527 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3528 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3529 		if (err)
3530 			goto err_alloc;
3531 	}
3532 
3533 	if (!is_zero_ether_addr(config->mac)) {
3534 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3535 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3536 		if (err)
3537 			goto err_alloc;
3538 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3539 		/*
3540 		 * We used to clear _F_MAC feature bit if seeing
3541 		 * zero mac address when device features are not
3542 		 * specifically provisioned. Keep the behaviour
3543 		 * so old scripts do not break.
3544 		 */
3545 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3546 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3547 		/* Don't provision zero mac address for _F_MAC */
3548 		mlx5_vdpa_warn(&ndev->mvdev,
3549 			       "No mac address provisioned?\n");
3550 		err = -EINVAL;
3551 		goto err_alloc;
3552 	}
3553 
3554 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3555 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3556 
3557 	ndev->mvdev.mlx_features = device_features;
3558 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3559 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3560 	if (err)
3561 		goto err_mpfs;
3562 
3563 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3564 		err = mlx5_vdpa_create_dma_mr(mvdev);
3565 		if (err)
3566 			goto err_res;
3567 	}
3568 
3569 	err = alloc_resources(ndev);
3570 	if (err)
3571 		goto err_mr;
3572 
3573 	ndev->cvq_ent.mvdev = mvdev;
3574 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3575 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3576 	if (!mvdev->wq) {
3577 		err = -ENOMEM;
3578 		goto err_res2;
3579 	}
3580 
3581 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3582 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3583 	if (err)
3584 		goto err_reg;
3585 
3586 	mgtdev->ndev = ndev;
3587 	return 0;
3588 
3589 err_reg:
3590 	destroy_workqueue(mvdev->wq);
3591 err_res2:
3592 	free_resources(ndev);
3593 err_mr:
3594 	mlx5_vdpa_destroy_mr_resources(mvdev);
3595 err_res:
3596 	mlx5_vdpa_free_resources(&ndev->mvdev);
3597 err_mpfs:
3598 	if (!is_zero_ether_addr(config->mac))
3599 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3600 err_alloc:
3601 	put_device(&mvdev->vdev.dev);
3602 	return err;
3603 }
3604 
3605 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3606 {
3607 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3608 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3609 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3610 	struct workqueue_struct *wq;
3611 
3612 	unregister_link_notifier(ndev);
3613 	_vdpa_unregister_device(dev);
3614 	wq = mvdev->wq;
3615 	mvdev->wq = NULL;
3616 	destroy_workqueue(wq);
3617 	mgtdev->ndev = NULL;
3618 }
3619 
3620 static const struct vdpa_mgmtdev_ops mdev_ops = {
3621 	.dev_add = mlx5_vdpa_dev_add,
3622 	.dev_del = mlx5_vdpa_dev_del,
3623 };
3624 
3625 static struct virtio_device_id id_table[] = {
3626 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3627 	{ 0 },
3628 };
3629 
3630 static int mlx5v_probe(struct auxiliary_device *adev,
3631 		       const struct auxiliary_device_id *id)
3632 
3633 {
3634 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3635 	struct mlx5_core_dev *mdev = madev->mdev;
3636 	struct mlx5_vdpa_mgmtdev *mgtdev;
3637 	int err;
3638 
3639 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3640 	if (!mgtdev)
3641 		return -ENOMEM;
3642 
3643 	mgtdev->mgtdev.ops = &mdev_ops;
3644 	mgtdev->mgtdev.device = mdev->device;
3645 	mgtdev->mgtdev.id_table = id_table;
3646 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3647 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3648 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3649 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3650 	mgtdev->mgtdev.max_supported_vqs =
3651 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3652 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3653 	mgtdev->madev = madev;
3654 	mgtdev->vdpa_ops = mlx5_vdpa_ops;
3655 
3656 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
3657 		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
3658 
3659 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3660 	if (err)
3661 		goto reg_err;
3662 
3663 	auxiliary_set_drvdata(adev, mgtdev);
3664 
3665 	return 0;
3666 
3667 reg_err:
3668 	kfree(mgtdev);
3669 	return err;
3670 }
3671 
3672 static void mlx5v_remove(struct auxiliary_device *adev)
3673 {
3674 	struct mlx5_vdpa_mgmtdev *mgtdev;
3675 
3676 	mgtdev = auxiliary_get_drvdata(adev);
3677 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3678 	kfree(mgtdev);
3679 }
3680 
3681 static const struct auxiliary_device_id mlx5v_id_table[] = {
3682 	{ .name = MLX5_ADEV_NAME ".vnet", },
3683 	{},
3684 };
3685 
3686 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3687 
3688 static struct auxiliary_driver mlx5v_driver = {
3689 	.name = "vnet",
3690 	.probe = mlx5v_probe,
3691 	.remove = mlx5v_remove,
3692 	.id_table = mlx5v_id_table,
3693 };
3694 
3695 module_auxiliary_driver(mlx5v_driver);
3696