xref: /linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 2638134f710364c9e696a155bf16c6847959b1d9)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
23 
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 
49 #define MLX5V_UNTAGGED 0x1000
50 
51 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
52  * 5.1.6.5.5 "Device operation in multiqueue mode":
53  *
54  * Multiqueue is disabled by default.
55  * The driver enables multiqueue by sending a command using class
56  * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
57  * operation, as follows: ...
58  */
59 #define MLX5V_DEFAULT_VQ_COUNT 2
60 
61 #define MLX5V_DEFAULT_VQ_SIZE 256
62 
63 struct mlx5_vdpa_cq_buf {
64 	struct mlx5_frag_buf_ctrl fbc;
65 	struct mlx5_frag_buf frag_buf;
66 	int cqe_size;
67 	int nent;
68 };
69 
70 struct mlx5_vdpa_cq {
71 	struct mlx5_core_cq mcq;
72 	struct mlx5_vdpa_cq_buf buf;
73 	struct mlx5_db db;
74 	int cqe;
75 };
76 
77 struct mlx5_vdpa_umem {
78 	struct mlx5_frag_buf_ctrl fbc;
79 	struct mlx5_frag_buf frag_buf;
80 	int size;
81 	u32 id;
82 };
83 
84 struct mlx5_vdpa_qp {
85 	struct mlx5_core_qp mqp;
86 	struct mlx5_frag_buf frag_buf;
87 	struct mlx5_db db;
88 	u16 head;
89 	bool fw;
90 };
91 
92 struct mlx5_vq_restore_info {
93 	u32 num_ent;
94 	u64 desc_addr;
95 	u64 device_addr;
96 	u64 driver_addr;
97 	u16 avail_index;
98 	u16 used_index;
99 	struct msi_map map;
100 	bool ready;
101 	bool restore;
102 };
103 
104 struct mlx5_vdpa_virtqueue {
105 	bool ready;
106 	u64 desc_addr;
107 	u64 device_addr;
108 	u64 driver_addr;
109 	u32 num_ent;
110 
111 	/* Resources for implementing the notification channel from the device
112 	 * to the driver. fwqp is the firmware end of an RC connection; the
113 	 * other end is vqqp used by the driver. cq is where completions are
114 	 * reported.
115 	 */
116 	struct mlx5_vdpa_cq cq;
117 	struct mlx5_vdpa_qp fwqp;
118 	struct mlx5_vdpa_qp vqqp;
119 
120 	/* umem resources are required for the virtqueue operation. They're use
121 	 * is internal and they must be provided by the driver.
122 	 */
123 	struct mlx5_vdpa_umem umem1;
124 	struct mlx5_vdpa_umem umem2;
125 	struct mlx5_vdpa_umem umem3;
126 
127 	u32 counter_set_id;
128 	bool initialized;
129 	int index;
130 	u32 virtq_id;
131 	struct mlx5_vdpa_net *ndev;
132 	u16 avail_idx;
133 	u16 used_idx;
134 	int fw_state;
135 
136 	u64 modified_fields;
137 
138 	struct mlx5_vdpa_mr *vq_mr;
139 	struct mlx5_vdpa_mr *desc_mr;
140 
141 	struct msi_map map;
142 
143 	/* keep last in the struct */
144 	struct mlx5_vq_restore_info ri;
145 };
146 
147 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
148 {
149 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
150 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
151 			return idx < 2;
152 		else
153 			return idx < 3;
154 	}
155 
156 	return idx <= mvdev->max_idx;
157 }
158 
159 static void free_fixed_resources(struct mlx5_vdpa_net *ndev);
160 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev);
161 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled);
162 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev);
163 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq);
164 
165 static bool mlx5_vdpa_debug;
166 
167 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
168 	do {                                                                                       \
169 		if (features & BIT_ULL(_feature))                                                  \
170 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
171 	} while (0)
172 
173 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
174 	do {                                                                                       \
175 		if (status & (_status))                                                            \
176 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
177 	} while (0)
178 
179 /* TODO: cross-endian support */
180 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
181 {
182 	return virtio_legacy_is_little_endian() ||
183 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
184 }
185 
186 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
187 {
188 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
189 }
190 
191 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
192 {
193 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
194 }
195 
196 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
197 {
198 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
199 		return 2;
200 
201 	return mvdev->max_vqs;
202 }
203 
204 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
205 {
206 	return idx == ctrl_vq_idx(mvdev);
207 }
208 
209 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
210 {
211 	if (status & ~VALID_STATUS_MASK)
212 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
213 			       status & ~VALID_STATUS_MASK);
214 
215 	if (!mlx5_vdpa_debug)
216 		return;
217 
218 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
219 	if (set && !status) {
220 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
221 		return;
222 	}
223 
224 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
225 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
226 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
227 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
228 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
229 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
230 }
231 
232 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
233 {
234 	if (features & ~VALID_FEATURES_MASK)
235 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
236 			       features & ~VALID_FEATURES_MASK);
237 
238 	if (!mlx5_vdpa_debug)
239 		return;
240 
241 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
242 	if (!features)
243 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
244 
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
262 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
263 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
264 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
265 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
266 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
267 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
279 }
280 
281 static int create_tis(struct mlx5_vdpa_net *ndev)
282 {
283 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
284 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
285 	void *tisc;
286 	int err;
287 
288 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
289 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
290 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
291 	if (err)
292 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
293 
294 	return err;
295 }
296 
297 static void destroy_tis(struct mlx5_vdpa_net *ndev)
298 {
299 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
300 }
301 
302 #define MLX5_VDPA_CQE_SIZE 64
303 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
304 
305 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
306 {
307 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
308 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
309 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
310 	int err;
311 
312 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
313 				       ndev->mvdev.mdev->priv.numa_node);
314 	if (err)
315 		return err;
316 
317 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
318 
319 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
320 	buf->nent = nent;
321 
322 	return 0;
323 }
324 
325 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
326 {
327 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
328 
329 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
330 					ndev->mvdev.mdev->priv.numa_node);
331 }
332 
333 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
334 {
335 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
336 }
337 
338 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
339 {
340 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
341 }
342 
343 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
344 {
345 	struct mlx5_cqe64 *cqe64;
346 	void *cqe;
347 	int i;
348 
349 	for (i = 0; i < buf->nent; i++) {
350 		cqe = get_cqe(vcq, i);
351 		cqe64 = cqe;
352 		cqe64->op_own = MLX5_CQE_INVALID << 4;
353 	}
354 }
355 
356 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
357 {
358 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
359 
360 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
361 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
362 		return cqe64;
363 
364 	return NULL;
365 }
366 
367 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
368 {
369 	vqp->head += n;
370 	vqp->db.db[0] = cpu_to_be32(vqp->head);
371 }
372 
373 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
374 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
375 {
376 	struct mlx5_vdpa_qp *vqp;
377 	__be64 *pas;
378 	void *qpc;
379 
380 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
381 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
382 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
383 	if (vqp->fw) {
384 		/* Firmware QP is allocated by the driver for the firmware's
385 		 * use so we can skip part of the params as they will be chosen by firmware
386 		 */
387 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
388 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
389 		MLX5_SET(qpc, qpc, no_sq, 1);
390 		return;
391 	}
392 
393 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
394 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
395 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
396 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
397 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
398 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
399 	MLX5_SET(qpc, qpc, no_sq, 1);
400 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
401 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
402 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
403 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
404 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
405 }
406 
407 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
408 {
409 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
410 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
411 					ndev->mvdev.mdev->priv.numa_node);
412 }
413 
414 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
415 {
416 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
417 }
418 
419 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
420 		     struct mlx5_vdpa_qp *vqp)
421 {
422 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
423 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
424 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
425 	void *qpc;
426 	void *in;
427 	int err;
428 
429 	if (!vqp->fw) {
430 		vqp = &mvq->vqqp;
431 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
432 		if (err)
433 			return err;
434 
435 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
436 		if (err)
437 			goto err_db;
438 		inlen += vqp->frag_buf.npages * sizeof(__be64);
439 	}
440 
441 	in = kzalloc(inlen, GFP_KERNEL);
442 	if (!in) {
443 		err = -ENOMEM;
444 		goto err_kzalloc;
445 	}
446 
447 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
448 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
449 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
450 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
451 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
452 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
453 	if (!vqp->fw)
454 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
455 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
456 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
457 	kfree(in);
458 	if (err)
459 		goto err_kzalloc;
460 
461 	vqp->mqp.uid = ndev->mvdev.res.uid;
462 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
463 
464 	if (!vqp->fw)
465 		rx_post(vqp, mvq->num_ent);
466 
467 	return 0;
468 
469 err_kzalloc:
470 	if (!vqp->fw)
471 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
472 err_db:
473 	if (!vqp->fw)
474 		rq_buf_free(ndev, vqp);
475 
476 	return err;
477 }
478 
479 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
480 {
481 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
482 
483 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
484 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
485 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
486 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
487 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
488 	if (!vqp->fw) {
489 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
490 		rq_buf_free(ndev, vqp);
491 	}
492 }
493 
494 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
495 {
496 	return get_sw_cqe(cq, cq->mcq.cons_index);
497 }
498 
499 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
500 {
501 	struct mlx5_cqe64 *cqe64;
502 
503 	cqe64 = next_cqe_sw(vcq);
504 	if (!cqe64)
505 		return -EAGAIN;
506 
507 	vcq->mcq.cons_index++;
508 	return 0;
509 }
510 
511 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
512 {
513 	struct mlx5_vdpa_net *ndev = mvq->ndev;
514 	struct vdpa_callback *event_cb;
515 
516 	event_cb = &ndev->event_cbs[mvq->index];
517 	mlx5_cq_set_ci(&mvq->cq.mcq);
518 
519 	/* make sure CQ cosumer update is visible to the hardware before updating
520 	 * RX doorbell record.
521 	 */
522 	dma_wmb();
523 	rx_post(&mvq->vqqp, num);
524 	if (event_cb->callback)
525 		event_cb->callback(event_cb->private);
526 }
527 
528 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
529 {
530 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
531 	struct mlx5_vdpa_net *ndev = mvq->ndev;
532 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
533 	int num = 0;
534 
535 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
536 		num++;
537 		if (num > mvq->num_ent / 2) {
538 			/* If completions keep coming while we poll, we want to
539 			 * let the hardware know that we consumed them by
540 			 * updating the doorbell record.  We also let vdpa core
541 			 * know about this so it passes it on the virtio driver
542 			 * on the guest.
543 			 */
544 			mlx5_vdpa_handle_completions(mvq, num);
545 			num = 0;
546 		}
547 	}
548 
549 	if (num)
550 		mlx5_vdpa_handle_completions(mvq, num);
551 
552 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
553 }
554 
555 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
556 {
557 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
558 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
559 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
560 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
561 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
562 	__be64 *pas;
563 	int inlen;
564 	void *cqc;
565 	void *in;
566 	int err;
567 	int eqn;
568 
569 	err = mlx5_db_alloc(mdev, &vcq->db);
570 	if (err)
571 		return err;
572 
573 	vcq->mcq.set_ci_db = vcq->db.db;
574 	vcq->mcq.arm_db = vcq->db.db + 1;
575 	vcq->mcq.cqe_sz = 64;
576 
577 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
578 	if (err)
579 		goto err_db;
580 
581 	cq_frag_buf_init(vcq, &vcq->buf);
582 
583 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
584 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
585 	in = kzalloc(inlen, GFP_KERNEL);
586 	if (!in) {
587 		err = -ENOMEM;
588 		goto err_vzalloc;
589 	}
590 
591 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
592 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
593 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
594 
595 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
596 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
597 
598 	/* Use vector 0 by default. Consider adding code to choose least used
599 	 * vector.
600 	 */
601 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
602 	if (err)
603 		goto err_vec;
604 
605 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
606 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
607 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
608 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
609 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
610 
611 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
612 	if (err)
613 		goto err_vec;
614 
615 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
616 	vcq->cqe = num_ent;
617 	vcq->mcq.set_ci_db = vcq->db.db;
618 	vcq->mcq.arm_db = vcq->db.db + 1;
619 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
620 	kfree(in);
621 	return 0;
622 
623 err_vec:
624 	kfree(in);
625 err_vzalloc:
626 	cq_frag_buf_free(ndev, &vcq->buf);
627 err_db:
628 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
629 	return err;
630 }
631 
632 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
633 {
634 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
635 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
636 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
637 
638 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
639 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
640 		return;
641 	}
642 	cq_frag_buf_free(ndev, &vcq->buf);
643 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
644 }
645 
646 static int read_umem_params(struct mlx5_vdpa_net *ndev)
647 {
648 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
649 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
650 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
651 	int out_size;
652 	void *caps;
653 	void *out;
654 	int err;
655 
656 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
657 	out = kzalloc(out_size, GFP_KERNEL);
658 	if (!out)
659 		return -ENOMEM;
660 
661 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
662 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
663 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
664 	if (err) {
665 		mlx5_vdpa_warn(&ndev->mvdev,
666 			"Failed reading vdpa umem capabilities with err %d\n", err);
667 		goto out;
668 	}
669 
670 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
671 
672 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
673 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
674 
675 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
676 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
677 
678 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
679 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
680 
681 out:
682 	kfree(out);
683 	return 0;
684 }
685 
686 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
687 			  struct mlx5_vdpa_umem **umemp)
688 {
689 	u32 p_a;
690 	u32 p_b;
691 
692 	switch (num) {
693 	case 1:
694 		p_a = ndev->umem_1_buffer_param_a;
695 		p_b = ndev->umem_1_buffer_param_b;
696 		*umemp = &mvq->umem1;
697 		break;
698 	case 2:
699 		p_a = ndev->umem_2_buffer_param_a;
700 		p_b = ndev->umem_2_buffer_param_b;
701 		*umemp = &mvq->umem2;
702 		break;
703 	case 3:
704 		p_a = ndev->umem_3_buffer_param_a;
705 		p_b = ndev->umem_3_buffer_param_b;
706 		*umemp = &mvq->umem3;
707 		break;
708 	}
709 
710 	(*umemp)->size = p_a * mvq->num_ent + p_b;
711 }
712 
713 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
714 {
715 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
716 }
717 
718 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
719 {
720 	int inlen;
721 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
722 	void *um;
723 	void *in;
724 	int err;
725 	__be64 *pas;
726 	struct mlx5_vdpa_umem *umem;
727 
728 	set_umem_size(ndev, mvq, num, &umem);
729 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
730 	if (err)
731 		return err;
732 
733 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
734 
735 	in = kzalloc(inlen, GFP_KERNEL);
736 	if (!in) {
737 		err = -ENOMEM;
738 		goto err_in;
739 	}
740 
741 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
742 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
743 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
744 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
745 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
746 
747 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
748 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
749 
750 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
751 	if (err) {
752 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
753 		goto err_cmd;
754 	}
755 
756 	kfree(in);
757 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
758 
759 	return 0;
760 
761 err_cmd:
762 	kfree(in);
763 err_in:
764 	umem_frag_buf_free(ndev, umem);
765 	return err;
766 }
767 
768 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
769 {
770 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
771 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
772 	struct mlx5_vdpa_umem *umem;
773 
774 	switch (num) {
775 	case 1:
776 		umem = &mvq->umem1;
777 		break;
778 	case 2:
779 		umem = &mvq->umem2;
780 		break;
781 	case 3:
782 		umem = &mvq->umem3;
783 		break;
784 	}
785 
786 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
787 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
788 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
789 		return;
790 
791 	umem_frag_buf_free(ndev, umem);
792 }
793 
794 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
795 {
796 	int num;
797 	int err;
798 
799 	for (num = 1; num <= 3; num++) {
800 		err = create_umem(ndev, mvq, num);
801 		if (err)
802 			goto err_umem;
803 	}
804 	return 0;
805 
806 err_umem:
807 	for (num--; num > 0; num--)
808 		umem_destroy(ndev, mvq, num);
809 
810 	return err;
811 }
812 
813 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
814 {
815 	int num;
816 
817 	for (num = 3; num > 0; num--)
818 		umem_destroy(ndev, mvq, num);
819 }
820 
821 static int get_queue_type(struct mlx5_vdpa_net *ndev)
822 {
823 	u32 type_mask;
824 
825 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
826 
827 	/* prefer split queue */
828 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
829 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
830 
831 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
832 
833 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
834 }
835 
836 static bool vq_is_tx(u16 idx)
837 {
838 	return idx % 2;
839 }
840 
841 enum {
842 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
843 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
844 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
845 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
846 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
847 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
848 	MLX5_VIRTIO_NET_F_CSUM = 10,
849 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
850 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
851 };
852 
853 static u16 get_features(u64 features)
854 {
855 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
856 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
857 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
858 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
859 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
860 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
861 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
862 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
863 }
864 
865 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
866 {
867 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
868 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
869 }
870 
871 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
872 {
873 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
874 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
875 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
876 }
877 
878 static int create_virtqueue(struct mlx5_vdpa_net *ndev,
879 			    struct mlx5_vdpa_virtqueue *mvq,
880 			    bool filled)
881 {
882 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
883 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
884 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
885 	struct mlx5_vdpa_mr *vq_mr;
886 	struct mlx5_vdpa_mr *vq_desc_mr;
887 	u64 features = filled ? mvdev->actual_features : mvdev->mlx_features;
888 	void *obj_context;
889 	u16 mlx_features;
890 	void *cmd_hdr;
891 	void *vq_ctx;
892 	void *in;
893 	int err;
894 
895 	err = umems_create(ndev, mvq);
896 	if (err)
897 		return err;
898 
899 	in = kzalloc(inlen, GFP_KERNEL);
900 	if (!in) {
901 		err = -ENOMEM;
902 		goto err_alloc;
903 	}
904 
905 	mlx_features = get_features(features);
906 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
907 
908 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
909 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
910 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
911 
912 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
913 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
914 		 mlx_features >> 3);
915 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
916 		 mlx_features & 7);
917 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
918 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
919 
920 	if (vq_is_tx(mvq->index))
921 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
922 
923 	if (mvq->map.virq) {
924 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
925 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
926 	} else {
927 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
928 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
929 	}
930 
931 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
932 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
933 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
934 		 !!(features & BIT_ULL(VIRTIO_F_VERSION_1)));
935 
936 	if (filled) {
937 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
938 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
939 
940 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
941 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
942 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
943 
944 		vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
945 		if (vq_mr)
946 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
947 
948 		vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
949 		if (vq_desc_mr &&
950 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
951 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
952 	} else {
953 		/* If there is no mr update, make sure that the existing ones are set
954 		 * modify to ready.
955 		 */
956 		vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
957 		if (vq_mr)
958 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
959 
960 		vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
961 		if (vq_desc_mr)
962 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
963 	}
964 
965 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
966 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
967 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
968 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
969 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
970 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
971 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
972 	if (counters_supported(&ndev->mvdev))
973 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
974 
975 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
976 	if (err)
977 		goto err_cmd;
978 
979 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
980 	kfree(in);
981 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
982 
983 	if (filled) {
984 		mlx5_vdpa_get_mr(mvdev, vq_mr);
985 		mvq->vq_mr = vq_mr;
986 
987 		if (vq_desc_mr &&
988 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
989 			mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
990 			mvq->desc_mr = vq_desc_mr;
991 		}
992 	}
993 
994 	return 0;
995 
996 err_cmd:
997 	kfree(in);
998 err_alloc:
999 	umems_destroy(ndev, mvq);
1000 	return err;
1001 }
1002 
1003 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1004 {
1005 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
1006 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
1007 
1008 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
1009 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1010 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
1011 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
1012 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
1013 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1014 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
1015 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
1016 		return;
1017 	}
1018 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
1019 	umems_destroy(ndev, mvq);
1020 
1021 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
1022 	mvq->vq_mr = NULL;
1023 
1024 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
1025 	mvq->desc_mr = NULL;
1026 }
1027 
1028 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1029 {
1030 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
1031 }
1032 
1033 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1034 {
1035 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1036 }
1037 
1038 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1039 			int *outlen, u32 qpn, u32 rqpn)
1040 {
1041 	void *qpc;
1042 	void *pp;
1043 
1044 	switch (cmd) {
1045 	case MLX5_CMD_OP_2RST_QP:
1046 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1047 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1048 		*in = kzalloc(*inlen, GFP_KERNEL);
1049 		*out = kzalloc(*outlen, GFP_KERNEL);
1050 		if (!*in || !*out)
1051 			goto outerr;
1052 
1053 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1054 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1055 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1056 		break;
1057 	case MLX5_CMD_OP_RST2INIT_QP:
1058 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1059 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1060 		*in = kzalloc(*inlen, GFP_KERNEL);
1061 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1062 		if (!*in || !*out)
1063 			goto outerr;
1064 
1065 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1066 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1067 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1068 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1069 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1070 		MLX5_SET(qpc, qpc, rwe, 1);
1071 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1072 		MLX5_SET(ads, pp, vhca_port_num, 1);
1073 		break;
1074 	case MLX5_CMD_OP_INIT2RTR_QP:
1075 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1076 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1077 		*in = kzalloc(*inlen, GFP_KERNEL);
1078 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1079 		if (!*in || !*out)
1080 			goto outerr;
1081 
1082 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1083 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1084 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1085 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1086 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1087 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1088 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1089 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1090 		MLX5_SET(ads, pp, fl, 1);
1091 		break;
1092 	case MLX5_CMD_OP_RTR2RTS_QP:
1093 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1094 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1095 		*in = kzalloc(*inlen, GFP_KERNEL);
1096 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1097 		if (!*in || !*out)
1098 			goto outerr;
1099 
1100 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1101 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1102 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1103 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1104 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1105 		MLX5_SET(ads, pp, ack_timeout, 14);
1106 		MLX5_SET(qpc, qpc, retry_count, 7);
1107 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1108 		break;
1109 	default:
1110 		goto outerr_nullify;
1111 	}
1112 
1113 	return;
1114 
1115 outerr:
1116 	kfree(*in);
1117 	kfree(*out);
1118 outerr_nullify:
1119 	*in = NULL;
1120 	*out = NULL;
1121 }
1122 
1123 static void free_inout(void *in, void *out)
1124 {
1125 	kfree(in);
1126 	kfree(out);
1127 }
1128 
1129 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1130  * firmware. The fw argument indicates whether the subjected QP is the one used
1131  * by firmware.
1132  */
1133 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1134 {
1135 	int outlen;
1136 	int inlen;
1137 	void *out;
1138 	void *in;
1139 	int err;
1140 
1141 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1142 	if (!in || !out)
1143 		return -ENOMEM;
1144 
1145 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1146 	free_inout(in, out);
1147 	return err;
1148 }
1149 
1150 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1151 {
1152 	int err;
1153 
1154 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1155 	if (err)
1156 		return err;
1157 
1158 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1159 	if (err)
1160 		return err;
1161 
1162 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1163 	if (err)
1164 		return err;
1165 
1166 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1167 	if (err)
1168 		return err;
1169 
1170 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1171 	if (err)
1172 		return err;
1173 
1174 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1175 	if (err)
1176 		return err;
1177 
1178 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1179 }
1180 
1181 struct mlx5_virtq_attr {
1182 	u8 state;
1183 	u16 available_index;
1184 	u16 used_index;
1185 };
1186 
1187 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1188 			   struct mlx5_virtq_attr *attr)
1189 {
1190 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1191 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1192 	void *out;
1193 	void *obj_context;
1194 	void *cmd_hdr;
1195 	int err;
1196 
1197 	out = kzalloc(outlen, GFP_KERNEL);
1198 	if (!out)
1199 		return -ENOMEM;
1200 
1201 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1202 
1203 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1204 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1205 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1206 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1207 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1208 	if (err)
1209 		goto err_cmd;
1210 
1211 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1212 	memset(attr, 0, sizeof(*attr));
1213 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1214 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1215 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1216 	kfree(out);
1217 	return 0;
1218 
1219 err_cmd:
1220 	kfree(out);
1221 	return err;
1222 }
1223 
1224 static bool is_resumable(struct mlx5_vdpa_net *ndev)
1225 {
1226 	return ndev->mvdev.vdev.config->resume;
1227 }
1228 
1229 static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1230 {
1231 	switch (oldstate) {
1232 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1233 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1234 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1235 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1236 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1237 		return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1238 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1239 	default:
1240 		return false;
1241 	}
1242 }
1243 
1244 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1245 {
1246 	/* Only state is always modifiable */
1247 	if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1248 		return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1249 		       mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1250 
1251 	return true;
1252 }
1253 
1254 static int modify_virtqueue(struct mlx5_vdpa_net *ndev,
1255 			    struct mlx5_vdpa_virtqueue *mvq,
1256 			    int state)
1257 {
1258 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1259 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1260 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1261 	struct mlx5_vdpa_mr *desc_mr = NULL;
1262 	struct mlx5_vdpa_mr *vq_mr = NULL;
1263 	bool state_change = false;
1264 	void *obj_context;
1265 	void *cmd_hdr;
1266 	void *vq_ctx;
1267 	void *in;
1268 	int err;
1269 
1270 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1271 		return 0;
1272 
1273 	if (!modifiable_virtqueue_fields(mvq))
1274 		return -EINVAL;
1275 
1276 	in = kzalloc(inlen, GFP_KERNEL);
1277 	if (!in)
1278 		return -ENOMEM;
1279 
1280 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1281 
1282 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1283 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1284 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1285 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1286 
1287 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1288 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1289 
1290 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) {
1291 		if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1292 			err = -EINVAL;
1293 			goto done;
1294 		}
1295 
1296 		MLX5_SET(virtio_net_q_object, obj_context, state, state);
1297 		state_change = true;
1298 	}
1299 
1300 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1301 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1302 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1303 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1304 	}
1305 
1306 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1307 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1308 
1309 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1310 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1311 
1312 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION)
1313 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
1314 			!!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
1315 
1316 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) {
1317 		u16 mlx_features = get_features(ndev->mvdev.actual_features);
1318 
1319 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
1320 			 mlx_features >> 3);
1321 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
1322 			 mlx_features & 7);
1323 	}
1324 
1325 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1326 		vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1327 
1328 		if (vq_mr)
1329 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1330 		else
1331 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1332 	}
1333 
1334 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1335 		desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1336 
1337 		if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1338 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1339 		else
1340 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1341 	}
1342 
1343 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1344 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1345 	if (err)
1346 		goto done;
1347 
1348 	if (state_change)
1349 		mvq->fw_state = state;
1350 
1351 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1352 		mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1353 		mlx5_vdpa_get_mr(mvdev, vq_mr);
1354 		mvq->vq_mr = vq_mr;
1355 	}
1356 
1357 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1358 		mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1359 		mlx5_vdpa_get_mr(mvdev, desc_mr);
1360 		mvq->desc_mr = desc_mr;
1361 	}
1362 
1363 	mvq->modified_fields = 0;
1364 
1365 done:
1366 	kfree(in);
1367 	return err;
1368 }
1369 
1370 static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev,
1371 				  struct mlx5_vdpa_virtqueue *mvq,
1372 				  unsigned int state)
1373 {
1374 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1375 	return modify_virtqueue(ndev, mvq, state);
1376 }
1377 
1378 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1379 {
1380 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1381 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1382 	void *cmd_hdr;
1383 	int err;
1384 
1385 	if (!counters_supported(&ndev->mvdev))
1386 		return 0;
1387 
1388 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1389 
1390 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1391 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1392 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1393 
1394 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1395 	if (err)
1396 		return err;
1397 
1398 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1399 
1400 	return 0;
1401 }
1402 
1403 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1404 {
1405 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1406 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1407 
1408 	if (!counters_supported(&ndev->mvdev))
1409 		return;
1410 
1411 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1412 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1413 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1414 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1415 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1416 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1417 }
1418 
1419 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1420 {
1421 	struct vdpa_callback *cb = priv;
1422 
1423 	if (cb->callback)
1424 		return cb->callback(cb->private);
1425 
1426 	return IRQ_HANDLED;
1427 }
1428 
1429 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1430 			 struct mlx5_vdpa_virtqueue *mvq)
1431 {
1432 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1433 	struct mlx5_vdpa_irq_pool_entry *ent;
1434 	int err;
1435 	int i;
1436 
1437 	for (i = 0; i < irqp->num_ent; i++) {
1438 		ent = &irqp->entries[i];
1439 		if (!ent->used) {
1440 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1441 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1442 			ent->dev_id = &ndev->event_cbs[mvq->index];
1443 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1444 					  ent->name, ent->dev_id);
1445 			if (err)
1446 				return;
1447 
1448 			ent->used = true;
1449 			mvq->map = ent->map;
1450 			return;
1451 		}
1452 	}
1453 }
1454 
1455 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1456 			   struct mlx5_vdpa_virtqueue *mvq)
1457 {
1458 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1459 	int i;
1460 
1461 	for (i = 0; i < irqp->num_ent; i++)
1462 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1463 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1464 			irqp->entries[i].used = false;
1465 			return;
1466 		}
1467 }
1468 
1469 static int setup_vq(struct mlx5_vdpa_net *ndev,
1470 		    struct mlx5_vdpa_virtqueue *mvq,
1471 		    bool filled)
1472 {
1473 	u16 idx = mvq->index;
1474 	int err;
1475 
1476 	if (mvq->initialized)
1477 		return 0;
1478 
1479 	err = cq_create(ndev, idx, mvq->num_ent);
1480 	if (err)
1481 		return err;
1482 
1483 	err = qp_create(ndev, mvq, &mvq->fwqp);
1484 	if (err)
1485 		goto err_fwqp;
1486 
1487 	err = qp_create(ndev, mvq, &mvq->vqqp);
1488 	if (err)
1489 		goto err_vqqp;
1490 
1491 	err = connect_qps(ndev, mvq);
1492 	if (err)
1493 		goto err_connect;
1494 
1495 	err = counter_set_alloc(ndev, mvq);
1496 	if (err)
1497 		goto err_connect;
1498 
1499 	alloc_vector(ndev, mvq);
1500 	err = create_virtqueue(ndev, mvq, filled);
1501 	if (err)
1502 		goto err_vq;
1503 
1504 	mvq->initialized = true;
1505 
1506 	if (mvq->ready) {
1507 		err = resume_vq(ndev, mvq);
1508 		if (err)
1509 			goto err_modify;
1510 	}
1511 
1512 	return 0;
1513 
1514 err_modify:
1515 	destroy_virtqueue(ndev, mvq);
1516 err_vq:
1517 	dealloc_vector(ndev, mvq);
1518 	counter_set_dealloc(ndev, mvq);
1519 err_connect:
1520 	qp_destroy(ndev, &mvq->vqqp);
1521 err_vqqp:
1522 	qp_destroy(ndev, &mvq->fwqp);
1523 err_fwqp:
1524 	cq_destroy(ndev, idx);
1525 	return err;
1526 }
1527 
1528 static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1529 {
1530 	struct mlx5_virtq_attr attr;
1531 	int err;
1532 
1533 	if (!mvq->initialized)
1534 		return 0;
1535 
1536 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1537 		return 0;
1538 
1539 	err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND);
1540 	if (err) {
1541 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed, err: %d\n", err);
1542 		return err;
1543 	}
1544 
1545 	err = query_virtqueue(ndev, mvq, &attr);
1546 	if (err) {
1547 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue, err: %d\n", err);
1548 		return err;
1549 	}
1550 
1551 	mvq->avail_idx = attr.available_index;
1552 	mvq->used_idx = attr.used_index;
1553 
1554 	return 0;
1555 }
1556 
1557 static int suspend_vqs(struct mlx5_vdpa_net *ndev)
1558 {
1559 	int err = 0;
1560 	int i;
1561 
1562 	for (i = 0; i < ndev->cur_num_vqs; i++) {
1563 		int local_err = suspend_vq(ndev, &ndev->vqs[i]);
1564 
1565 		err = local_err ? local_err : err;
1566 	}
1567 
1568 	return err;
1569 }
1570 
1571 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1572 {
1573 	int err;
1574 
1575 	if (!mvq->initialized)
1576 		return 0;
1577 
1578 	switch (mvq->fw_state) {
1579 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1580 		/* Due to a FW quirk we need to modify the VQ fields first then change state.
1581 		 * This should be fixed soon. After that, a single command can be used.
1582 		 */
1583 		err = modify_virtqueue(ndev, mvq, 0);
1584 		if (err) {
1585 			mlx5_vdpa_warn(&ndev->mvdev,
1586 				"modify vq properties failed for vq %u, err: %d\n",
1587 				mvq->index, err);
1588 			return err;
1589 		}
1590 		break;
1591 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1592 		if (!is_resumable(ndev)) {
1593 			mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index);
1594 			return -EINVAL;
1595 		}
1596 		break;
1597 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1598 		return 0;
1599 	default:
1600 		mlx5_vdpa_warn(&ndev->mvdev, "resume vq %u called from bad state %d\n",
1601 			       mvq->index, mvq->fw_state);
1602 		return -EINVAL;
1603 	}
1604 
1605 	err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1606 	if (err)
1607 		mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u, err: %d\n",
1608 			       mvq->index, err);
1609 
1610 	return err;
1611 }
1612 
1613 static int resume_vqs(struct mlx5_vdpa_net *ndev)
1614 {
1615 	int err = 0;
1616 
1617 	for (int i = 0; i < ndev->cur_num_vqs; i++) {
1618 		int local_err = resume_vq(ndev, &ndev->vqs[i]);
1619 
1620 		err = local_err ? local_err : err;
1621 	}
1622 
1623 	return err;
1624 }
1625 
1626 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1627 {
1628 	if (!mvq->initialized)
1629 		return;
1630 
1631 	suspend_vq(ndev, mvq);
1632 	mvq->modified_fields = 0;
1633 	destroy_virtqueue(ndev, mvq);
1634 	dealloc_vector(ndev, mvq);
1635 	counter_set_dealloc(ndev, mvq);
1636 	qp_destroy(ndev, &mvq->vqqp);
1637 	qp_destroy(ndev, &mvq->fwqp);
1638 	cq_destroy(ndev, mvq->index);
1639 	mvq->initialized = false;
1640 }
1641 
1642 static int create_rqt(struct mlx5_vdpa_net *ndev)
1643 {
1644 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1645 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1646 	__be32 *list;
1647 	void *rqtc;
1648 	int inlen;
1649 	void *in;
1650 	int i, j;
1651 	int err;
1652 
1653 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1654 	in = kzalloc(inlen, GFP_KERNEL);
1655 	if (!in)
1656 		return -ENOMEM;
1657 
1658 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1659 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1660 
1661 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1662 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1663 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1664 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1665 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1666 
1667 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1668 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1669 	kfree(in);
1670 	if (err)
1671 		return err;
1672 
1673 	return 0;
1674 }
1675 
1676 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1677 
1678 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1679 {
1680 	int act_sz = roundup_pow_of_two(num / 2);
1681 	__be32 *list;
1682 	void *rqtc;
1683 	int inlen;
1684 	void *in;
1685 	int i, j;
1686 	int err;
1687 
1688 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1689 	in = kzalloc(inlen, GFP_KERNEL);
1690 	if (!in)
1691 		return -ENOMEM;
1692 
1693 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1694 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1695 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1696 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1697 
1698 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1699 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1700 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1701 
1702 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1703 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1704 	kfree(in);
1705 	if (err)
1706 		return err;
1707 
1708 	return 0;
1709 }
1710 
1711 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1712 {
1713 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1714 }
1715 
1716 static int create_tir(struct mlx5_vdpa_net *ndev)
1717 {
1718 #define HASH_IP_L4PORTS                                                                            \
1719 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1720 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1721 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1722 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1723 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1724 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1725 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1726 	void *rss_key;
1727 	void *outer;
1728 	void *tirc;
1729 	void *in;
1730 	int err;
1731 
1732 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1733 	if (!in)
1734 		return -ENOMEM;
1735 
1736 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1737 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1738 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1739 
1740 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1741 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1742 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1743 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1744 
1745 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1746 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1747 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1748 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1749 
1750 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1751 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1752 
1753 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1754 	kfree(in);
1755 	if (err)
1756 		return err;
1757 
1758 	mlx5_vdpa_add_tirn(ndev);
1759 	return err;
1760 }
1761 
1762 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1763 {
1764 	mlx5_vdpa_remove_tirn(ndev);
1765 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1766 }
1767 
1768 #define MAX_STEERING_ENT 0x8000
1769 #define MAX_STEERING_GROUPS 2
1770 
1771 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1772        #define NUM_DESTS 2
1773 #else
1774        #define NUM_DESTS 1
1775 #endif
1776 
1777 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1778 				 struct macvlan_node *node,
1779 				 struct mlx5_flow_act *flow_act,
1780 				 struct mlx5_flow_destination *dests)
1781 {
1782 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1783 	int err;
1784 
1785 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1786 	if (IS_ERR(node->ucast_counter.counter))
1787 		return PTR_ERR(node->ucast_counter.counter);
1788 
1789 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1790 	if (IS_ERR(node->mcast_counter.counter)) {
1791 		err = PTR_ERR(node->mcast_counter.counter);
1792 		goto err_mcast_counter;
1793 	}
1794 
1795 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1796 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1797 	return 0;
1798 
1799 err_mcast_counter:
1800 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1801 	return err;
1802 #else
1803 	return 0;
1804 #endif
1805 }
1806 
1807 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1808 				     struct macvlan_node *node)
1809 {
1810 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1811 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1812 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1813 #endif
1814 }
1815 
1816 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1817 					struct macvlan_node *node)
1818 {
1819 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1820 	struct mlx5_flow_act flow_act = {};
1821 	struct mlx5_flow_spec *spec;
1822 	void *headers_c;
1823 	void *headers_v;
1824 	u8 *dmac_c;
1825 	u8 *dmac_v;
1826 	int err;
1827 	u16 vid;
1828 
1829 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1830 	if (!spec)
1831 		return -ENOMEM;
1832 
1833 	vid = key2vid(node->macvlan);
1834 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1835 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1836 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1837 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1838 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1839 	eth_broadcast_addr(dmac_c);
1840 	ether_addr_copy(dmac_v, mac);
1841 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1842 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1843 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1844 	}
1845 	if (node->tagged) {
1846 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1847 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1848 	}
1849 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1850 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1851 	dests[0].tir_num = ndev->res.tirn;
1852 	err = add_steering_counters(ndev, node, &flow_act, dests);
1853 	if (err)
1854 		goto out_free;
1855 
1856 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1857 	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1858 #endif
1859 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1860 	if (IS_ERR(node->ucast_rule)) {
1861 		err = PTR_ERR(node->ucast_rule);
1862 		goto err_ucast;
1863 	}
1864 
1865 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1866 	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1867 #endif
1868 
1869 	memset(dmac_c, 0, ETH_ALEN);
1870 	memset(dmac_v, 0, ETH_ALEN);
1871 	dmac_c[0] = 1;
1872 	dmac_v[0] = 1;
1873 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1874 	if (IS_ERR(node->mcast_rule)) {
1875 		err = PTR_ERR(node->mcast_rule);
1876 		goto err_mcast;
1877 	}
1878 	kvfree(spec);
1879 	mlx5_vdpa_add_rx_counters(ndev, node);
1880 	return 0;
1881 
1882 err_mcast:
1883 	mlx5_del_flow_rules(node->ucast_rule);
1884 err_ucast:
1885 	remove_steering_counters(ndev, node);
1886 out_free:
1887 	kvfree(spec);
1888 	return err;
1889 }
1890 
1891 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1892 					 struct macvlan_node *node)
1893 {
1894 	mlx5_vdpa_remove_rx_counters(ndev, node);
1895 	mlx5_del_flow_rules(node->ucast_rule);
1896 	mlx5_del_flow_rules(node->mcast_rule);
1897 }
1898 
1899 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1900 {
1901 	u64 val;
1902 
1903 	if (!tagged)
1904 		vlan = MLX5V_UNTAGGED;
1905 
1906 	val = (u64)vlan << 48 |
1907 	      (u64)mac[0] << 40 |
1908 	      (u64)mac[1] << 32 |
1909 	      (u64)mac[2] << 24 |
1910 	      (u64)mac[3] << 16 |
1911 	      (u64)mac[4] << 8 |
1912 	      (u64)mac[5];
1913 
1914 	return val;
1915 }
1916 
1917 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1918 {
1919 	struct macvlan_node *pos;
1920 	u32 idx;
1921 
1922 	idx = hash_64(value, 8); // tbd 8
1923 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1924 		if (pos->macvlan == value)
1925 			return pos;
1926 	}
1927 	return NULL;
1928 }
1929 
1930 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1931 {
1932 	struct macvlan_node *ptr;
1933 	u64 val;
1934 	u32 idx;
1935 	int err;
1936 
1937 	val = search_val(mac, vid, tagged);
1938 	if (mac_vlan_lookup(ndev, val))
1939 		return -EEXIST;
1940 
1941 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1942 	if (!ptr)
1943 		return -ENOMEM;
1944 
1945 	ptr->tagged = tagged;
1946 	ptr->macvlan = val;
1947 	ptr->ndev = ndev;
1948 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1949 	if (err)
1950 		goto err_add;
1951 
1952 	idx = hash_64(val, 8);
1953 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1954 	return 0;
1955 
1956 err_add:
1957 	kfree(ptr);
1958 	return err;
1959 }
1960 
1961 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1962 {
1963 	struct macvlan_node *ptr;
1964 
1965 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1966 	if (!ptr)
1967 		return;
1968 
1969 	hlist_del(&ptr->hlist);
1970 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1971 	remove_steering_counters(ndev, ptr);
1972 	kfree(ptr);
1973 }
1974 
1975 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1976 {
1977 	struct macvlan_node *pos;
1978 	struct hlist_node *n;
1979 	int i;
1980 
1981 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1982 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1983 			hlist_del(&pos->hlist);
1984 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1985 			remove_steering_counters(ndev, pos);
1986 			kfree(pos);
1987 		}
1988 	}
1989 }
1990 
1991 static int setup_steering(struct mlx5_vdpa_net *ndev)
1992 {
1993 	struct mlx5_flow_table_attr ft_attr = {};
1994 	struct mlx5_flow_namespace *ns;
1995 	int err;
1996 
1997 	ft_attr.max_fte = MAX_STEERING_ENT;
1998 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1999 
2000 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
2001 	if (!ns) {
2002 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
2003 		return -EOPNOTSUPP;
2004 	}
2005 
2006 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
2007 	if (IS_ERR(ndev->rxft)) {
2008 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
2009 		return PTR_ERR(ndev->rxft);
2010 	}
2011 	mlx5_vdpa_add_rx_flow_table(ndev);
2012 
2013 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
2014 	if (err)
2015 		goto err_add;
2016 
2017 	return 0;
2018 
2019 err_add:
2020 	mlx5_vdpa_remove_rx_flow_table(ndev);
2021 	mlx5_destroy_flow_table(ndev->rxft);
2022 	return err;
2023 }
2024 
2025 static void teardown_steering(struct mlx5_vdpa_net *ndev)
2026 {
2027 	clear_mac_vlan_table(ndev);
2028 	mlx5_vdpa_remove_rx_flow_table(ndev);
2029 	mlx5_destroy_flow_table(ndev->rxft);
2030 }
2031 
2032 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2033 {
2034 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2035 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2036 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2037 	struct mlx5_core_dev *pfmdev;
2038 	size_t read;
2039 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
2040 
2041 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2042 	switch (cmd) {
2043 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
2044 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
2045 		if (read != ETH_ALEN)
2046 			break;
2047 
2048 		if (!memcmp(ndev->config.mac, mac, 6)) {
2049 			status = VIRTIO_NET_OK;
2050 			break;
2051 		}
2052 
2053 		if (is_zero_ether_addr(mac))
2054 			break;
2055 
2056 		if (!is_zero_ether_addr(ndev->config.mac)) {
2057 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2058 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
2059 					       ndev->config.mac);
2060 				break;
2061 			}
2062 		}
2063 
2064 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
2065 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
2066 				       mac);
2067 			break;
2068 		}
2069 
2070 		/* backup the original mac address so that if failed to add the forward rules
2071 		 * we could restore it
2072 		 */
2073 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
2074 
2075 		memcpy(ndev->config.mac, mac, ETH_ALEN);
2076 
2077 		/* Need recreate the flow table entry, so that the packet could forward back
2078 		 */
2079 		mac_vlan_del(ndev, mac_back, 0, false);
2080 
2081 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
2082 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
2083 
2084 			/* Although it hardly run here, we still need double check */
2085 			if (is_zero_ether_addr(mac_back)) {
2086 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
2087 				break;
2088 			}
2089 
2090 			/* Try to restore original mac address to MFPS table, and try to restore
2091 			 * the forward rule entry.
2092 			 */
2093 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2094 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
2095 					       ndev->config.mac);
2096 			}
2097 
2098 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
2099 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2100 					       mac_back);
2101 			}
2102 
2103 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
2104 
2105 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2106 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2107 
2108 			break;
2109 		}
2110 
2111 		status = VIRTIO_NET_OK;
2112 		break;
2113 
2114 	default:
2115 		break;
2116 	}
2117 
2118 	return status;
2119 }
2120 
2121 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2122 {
2123 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2124 	int cur_qps = ndev->cur_num_vqs / 2;
2125 	int err;
2126 	int i;
2127 
2128 	if (cur_qps > newqps) {
2129 		err = modify_rqt(ndev, 2 * newqps);
2130 		if (err)
2131 			return err;
2132 
2133 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) {
2134 			struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i];
2135 
2136 			if (is_resumable(ndev))
2137 				suspend_vq(ndev, mvq);
2138 			else
2139 				teardown_vq(ndev, mvq);
2140 		}
2141 
2142 		ndev->cur_num_vqs = 2 * newqps;
2143 	} else {
2144 		ndev->cur_num_vqs = 2 * newqps;
2145 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
2146 			struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i];
2147 
2148 			err = mvq->initialized ? resume_vq(ndev, mvq) : setup_vq(ndev, mvq, true);
2149 			if (err)
2150 				goto clean_added;
2151 		}
2152 		err = modify_rqt(ndev, 2 * newqps);
2153 		if (err)
2154 			goto clean_added;
2155 	}
2156 	return 0;
2157 
2158 clean_added:
2159 	for (--i; i >= 2 * cur_qps; --i)
2160 		teardown_vq(ndev, &ndev->vqs[i]);
2161 
2162 	ndev->cur_num_vqs = 2 * cur_qps;
2163 
2164 	return err;
2165 }
2166 
2167 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2168 {
2169 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2170 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2171 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2172 	struct virtio_net_ctrl_mq mq;
2173 	size_t read;
2174 	u16 newqps;
2175 
2176 	switch (cmd) {
2177 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2178 		/* This mq feature check aligns with pre-existing userspace
2179 		 * implementation.
2180 		 *
2181 		 * Without it, an untrusted driver could fake a multiqueue config
2182 		 * request down to a non-mq device that may cause kernel to
2183 		 * panic due to uninitialized resources for extra vqs. Even with
2184 		 * a well behaving guest driver, it is not expected to allow
2185 		 * changing the number of vqs on a non-mq device.
2186 		 */
2187 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2188 			break;
2189 
2190 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2191 		if (read != sizeof(mq))
2192 			break;
2193 
2194 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2195 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2196 		    newqps > ndev->rqt_size)
2197 			break;
2198 
2199 		if (ndev->cur_num_vqs == 2 * newqps) {
2200 			status = VIRTIO_NET_OK;
2201 			break;
2202 		}
2203 
2204 		if (!change_num_qps(mvdev, newqps))
2205 			status = VIRTIO_NET_OK;
2206 
2207 		break;
2208 	default:
2209 		break;
2210 	}
2211 
2212 	return status;
2213 }
2214 
2215 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2216 {
2217 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2218 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2219 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2220 	__virtio16 vlan;
2221 	size_t read;
2222 	u16 id;
2223 
2224 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2225 		return status;
2226 
2227 	switch (cmd) {
2228 	case VIRTIO_NET_CTRL_VLAN_ADD:
2229 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2230 		if (read != sizeof(vlan))
2231 			break;
2232 
2233 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2234 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2235 			break;
2236 
2237 		status = VIRTIO_NET_OK;
2238 		break;
2239 	case VIRTIO_NET_CTRL_VLAN_DEL:
2240 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2241 		if (read != sizeof(vlan))
2242 			break;
2243 
2244 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2245 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2246 		status = VIRTIO_NET_OK;
2247 		break;
2248 	default:
2249 		break;
2250 	}
2251 
2252 	return status;
2253 }
2254 
2255 static void mlx5_cvq_kick_handler(struct work_struct *work)
2256 {
2257 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2258 	struct virtio_net_ctrl_hdr ctrl;
2259 	struct mlx5_vdpa_wq_ent *wqent;
2260 	struct mlx5_vdpa_dev *mvdev;
2261 	struct mlx5_control_vq *cvq;
2262 	struct mlx5_vdpa_net *ndev;
2263 	size_t read, write;
2264 	int err;
2265 
2266 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2267 	mvdev = wqent->mvdev;
2268 	ndev = to_mlx5_vdpa_ndev(mvdev);
2269 	cvq = &mvdev->cvq;
2270 
2271 	down_write(&ndev->reslock);
2272 
2273 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2274 		goto out;
2275 
2276 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2277 		goto out;
2278 
2279 	if (!cvq->ready)
2280 		goto out;
2281 
2282 	while (true) {
2283 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2284 					   GFP_ATOMIC);
2285 		if (err <= 0)
2286 			break;
2287 
2288 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2289 		if (read != sizeof(ctrl))
2290 			break;
2291 
2292 		cvq->received_desc++;
2293 		switch (ctrl.class) {
2294 		case VIRTIO_NET_CTRL_MAC:
2295 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2296 			break;
2297 		case VIRTIO_NET_CTRL_MQ:
2298 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2299 			break;
2300 		case VIRTIO_NET_CTRL_VLAN:
2301 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2302 			break;
2303 		default:
2304 			break;
2305 		}
2306 
2307 		/* Make sure data is written before advancing index */
2308 		smp_wmb();
2309 
2310 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2311 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2312 		vringh_kiov_cleanup(&cvq->riov);
2313 		vringh_kiov_cleanup(&cvq->wiov);
2314 
2315 		if (vringh_need_notify_iotlb(&cvq->vring))
2316 			vringh_notify(&cvq->vring);
2317 
2318 		cvq->completed_desc++;
2319 		queue_work(mvdev->wq, &wqent->work);
2320 		break;
2321 	}
2322 
2323 out:
2324 	up_write(&ndev->reslock);
2325 }
2326 
2327 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2328 {
2329 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2330 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2331 	struct mlx5_vdpa_virtqueue *mvq;
2332 
2333 	if (!is_index_valid(mvdev, idx))
2334 		return;
2335 
2336 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2337 		if (!mvdev->wq || !mvdev->cvq.ready)
2338 			return;
2339 
2340 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2341 		return;
2342 	}
2343 
2344 	mvq = &ndev->vqs[idx];
2345 	if (unlikely(!mvq->ready))
2346 		return;
2347 
2348 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2349 }
2350 
2351 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2352 				    u64 driver_area, u64 device_area)
2353 {
2354 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2355 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2356 	struct mlx5_vdpa_virtqueue *mvq;
2357 
2358 	if (!is_index_valid(mvdev, idx))
2359 		return -EINVAL;
2360 
2361 	if (is_ctrl_vq_idx(mvdev, idx)) {
2362 		mvdev->cvq.desc_addr = desc_area;
2363 		mvdev->cvq.device_addr = device_area;
2364 		mvdev->cvq.driver_addr = driver_area;
2365 		return 0;
2366 	}
2367 
2368 	mvq = &ndev->vqs[idx];
2369 	mvq->desc_addr = desc_area;
2370 	mvq->device_addr = device_area;
2371 	mvq->driver_addr = driver_area;
2372 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2373 	return 0;
2374 }
2375 
2376 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2377 {
2378 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2379 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2380 	struct mlx5_vdpa_virtqueue *mvq;
2381 
2382 	if (!is_index_valid(mvdev, idx))
2383 		return;
2384 
2385         if (is_ctrl_vq_idx(mvdev, idx)) {
2386                 struct mlx5_control_vq *cvq = &mvdev->cvq;
2387 
2388                 cvq->vring.vring.num = num;
2389                 return;
2390         }
2391 
2392 	mvq = &ndev->vqs[idx];
2393 	ndev->needs_teardown = num != mvq->num_ent;
2394 	mvq->num_ent = num;
2395 }
2396 
2397 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2398 {
2399 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2400 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2401 
2402 	ndev->event_cbs[idx] = *cb;
2403 	if (is_ctrl_vq_idx(mvdev, idx))
2404 		mvdev->cvq.event_cb = *cb;
2405 }
2406 
2407 static void mlx5_cvq_notify(struct vringh *vring)
2408 {
2409 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2410 
2411 	if (!cvq->event_cb.callback)
2412 		return;
2413 
2414 	cvq->event_cb.callback(cvq->event_cb.private);
2415 }
2416 
2417 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2418 {
2419 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2420 
2421 	cvq->ready = ready;
2422 	if (!ready)
2423 		return;
2424 
2425 	cvq->vring.notify = mlx5_cvq_notify;
2426 }
2427 
2428 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2429 {
2430 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2431 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2432 	struct mlx5_vdpa_virtqueue *mvq;
2433 
2434 	if (!mvdev->actual_features)
2435 		return;
2436 
2437 	if (!is_index_valid(mvdev, idx))
2438 		return;
2439 
2440 	if (is_ctrl_vq_idx(mvdev, idx)) {
2441 		set_cvq_ready(mvdev, ready);
2442 		return;
2443 	}
2444 
2445 	mvq = &ndev->vqs[idx];
2446 	if (!ready) {
2447 		suspend_vq(ndev, mvq);
2448 	} else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
2449 		if (resume_vq(ndev, mvq))
2450 			ready = false;
2451 	}
2452 
2453 	mvq->ready = ready;
2454 }
2455 
2456 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2457 {
2458 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2459 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2460 
2461 	if (!is_index_valid(mvdev, idx))
2462 		return false;
2463 
2464 	if (is_ctrl_vq_idx(mvdev, idx))
2465 		return mvdev->cvq.ready;
2466 
2467 	return ndev->vqs[idx].ready;
2468 }
2469 
2470 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2471 				  const struct vdpa_vq_state *state)
2472 {
2473 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2474 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2475 	struct mlx5_vdpa_virtqueue *mvq;
2476 
2477 	if (!is_index_valid(mvdev, idx))
2478 		return -EINVAL;
2479 
2480 	if (is_ctrl_vq_idx(mvdev, idx)) {
2481 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2482 		return 0;
2483 	}
2484 
2485 	mvq = &ndev->vqs[idx];
2486 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2487 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2488 		return -EINVAL;
2489 	}
2490 
2491 	mvq->used_idx = state->split.avail_index;
2492 	mvq->avail_idx = state->split.avail_index;
2493 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2494 				MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2495 	return 0;
2496 }
2497 
2498 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2499 {
2500 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2501 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2502 	struct mlx5_vdpa_virtqueue *mvq;
2503 	struct mlx5_virtq_attr attr;
2504 	int err;
2505 
2506 	if (!is_index_valid(mvdev, idx))
2507 		return -EINVAL;
2508 
2509 	if (is_ctrl_vq_idx(mvdev, idx)) {
2510 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2511 		return 0;
2512 	}
2513 
2514 	mvq = &ndev->vqs[idx];
2515 	/* If the virtq object was destroyed, use the value saved at
2516 	 * the last minute of suspend_vq. This caters for userspace
2517 	 * that cares about emulating the index after vq is stopped.
2518 	 */
2519 	if (!mvq->initialized) {
2520 		/* Firmware returns a wrong value for the available index.
2521 		 * Since both values should be identical, we take the value of
2522 		 * used_idx which is reported correctly.
2523 		 */
2524 		state->split.avail_index = mvq->used_idx;
2525 		return 0;
2526 	}
2527 
2528 	err = query_virtqueue(ndev, mvq, &attr);
2529 	if (err) {
2530 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2531 		return err;
2532 	}
2533 	state->split.avail_index = attr.used_index;
2534 	return 0;
2535 }
2536 
2537 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2538 {
2539 	return PAGE_SIZE;
2540 }
2541 
2542 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2543 {
2544 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2545 
2546 	if (is_ctrl_vq_idx(mvdev, idx))
2547 		return MLX5_VDPA_CVQ_GROUP;
2548 
2549 	return MLX5_VDPA_DATAVQ_GROUP;
2550 }
2551 
2552 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2553 {
2554 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2555 
2556 	if (is_ctrl_vq_idx(mvdev, idx))
2557 		return MLX5_VDPA_CVQ_GROUP;
2558 
2559 	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2560 }
2561 
2562 static u64 mlx_to_vritio_features(u16 dev_features)
2563 {
2564 	u64 result = 0;
2565 
2566 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2567 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2568 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2569 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2570 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2571 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2572 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2573 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2574 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2575 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2576 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2577 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2578 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2579 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2580 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2581 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2582 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2583 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2584 
2585 	return result;
2586 }
2587 
2588 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2589 {
2590 	u64 mlx_vdpa_features = 0;
2591 	u16 dev_features;
2592 
2593 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2594 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2595 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2596 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2597 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2598 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2599 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2600 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2601 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2602 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2603 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2604 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2605 
2606 	return mlx_vdpa_features;
2607 }
2608 
2609 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2610 {
2611 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2612 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2613 
2614 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2615 	return ndev->mvdev.mlx_features;
2616 }
2617 
2618 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2619 {
2620 	/* Minimum features to expect */
2621 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2622 		return -EOPNOTSUPP;
2623 
2624 	/* Double check features combination sent down by the driver.
2625 	 * Fail invalid features due to absence of the depended feature.
2626 	 *
2627 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2628 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2629 	 * By failing the invalid features sent down by untrusted drivers,
2630 	 * we're assured the assumption made upon is_index_valid() and
2631 	 * is_ctrl_vq_idx() will not be compromised.
2632 	 */
2633 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2634             BIT_ULL(VIRTIO_NET_F_MQ))
2635 		return -EINVAL;
2636 
2637 	return 0;
2638 }
2639 
2640 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled)
2641 {
2642 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2643 	int err;
2644 	int i;
2645 
2646 	for (i = 0; i < mvdev->max_vqs; i++) {
2647 		err = setup_vq(ndev, &ndev->vqs[i], filled);
2648 		if (err)
2649 			goto err_vq;
2650 	}
2651 
2652 	return 0;
2653 
2654 err_vq:
2655 	for (--i; i >= 0; i--)
2656 		teardown_vq(ndev, &ndev->vqs[i]);
2657 
2658 	return err;
2659 }
2660 
2661 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2662 {
2663 	int i;
2664 
2665 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--)
2666 		teardown_vq(ndev, &ndev->vqs[i]);
2667 }
2668 
2669 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2670 {
2671 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2672 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2673 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2674 			mvdev->max_idx = mvdev->max_vqs;
2675 		} else {
2676 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2677 			 * CVQ gets index 2
2678 			 */
2679 			mvdev->max_idx = 2;
2680 		}
2681 	} else {
2682 		/* Two data virtqueues only: one for rx and one for tx */
2683 		mvdev->max_idx = 1;
2684 	}
2685 }
2686 
2687 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2688 {
2689 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2690 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2691 	int err;
2692 
2693 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2694 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2695 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2696 	if (vport)
2697 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2698 
2699 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2700 	if (err)
2701 		return 0;
2702 
2703 	return MLX5_GET(query_vport_state_out, out, state);
2704 }
2705 
2706 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2707 {
2708 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2709 	    VPORT_STATE_UP)
2710 		return true;
2711 
2712 	return false;
2713 }
2714 
2715 static void update_carrier(struct work_struct *work)
2716 {
2717 	struct mlx5_vdpa_wq_ent *wqent;
2718 	struct mlx5_vdpa_dev *mvdev;
2719 	struct mlx5_vdpa_net *ndev;
2720 
2721 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2722 	mvdev = wqent->mvdev;
2723 	ndev = to_mlx5_vdpa_ndev(mvdev);
2724 	if (get_link_state(mvdev))
2725 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2726 	else
2727 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2728 
2729 	if (ndev->config_cb.callback)
2730 		ndev->config_cb.callback(ndev->config_cb.private);
2731 
2732 	kfree(wqent);
2733 }
2734 
2735 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2736 {
2737 	struct mlx5_vdpa_wq_ent *wqent;
2738 
2739 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2740 	if (!wqent)
2741 		return -ENOMEM;
2742 
2743 	wqent->mvdev = &ndev->mvdev;
2744 	INIT_WORK(&wqent->work, update_carrier);
2745 	queue_work(ndev->mvdev.wq, &wqent->work);
2746 	return 0;
2747 }
2748 
2749 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2750 {
2751 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2752 	struct mlx5_eqe *eqe = param;
2753 	int ret = NOTIFY_DONE;
2754 
2755 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2756 		switch (eqe->sub_type) {
2757 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2758 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2759 			if (queue_link_work(ndev))
2760 				return NOTIFY_DONE;
2761 
2762 			ret = NOTIFY_OK;
2763 			break;
2764 		default:
2765 			return NOTIFY_DONE;
2766 		}
2767 		return ret;
2768 	}
2769 	return ret;
2770 }
2771 
2772 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2773 {
2774 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2775 		return;
2776 
2777 	ndev->nb.notifier_call = event_handler;
2778 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2779 	ndev->nb_registered = true;
2780 	queue_link_work(ndev);
2781 }
2782 
2783 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2784 {
2785 	if (!ndev->nb_registered)
2786 		return;
2787 
2788 	ndev->nb_registered = false;
2789 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2790 	if (ndev->mvdev.wq)
2791 		flush_workqueue(ndev->mvdev.wq);
2792 }
2793 
2794 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2795 {
2796 	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2797 }
2798 
2799 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2800 {
2801 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2802 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2803 	u64 old_features = mvdev->actual_features;
2804 	u64 diff_features;
2805 	int err;
2806 
2807 	print_features(mvdev, features, true);
2808 
2809 	err = verify_driver_features(mvdev, features);
2810 	if (err)
2811 		return err;
2812 
2813 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2814 
2815 	/* Interested in changes of vq features only. */
2816 	if (get_features(old_features) != get_features(mvdev->actual_features)) {
2817 		for (int i = 0; i < mvdev->max_vqs; ++i) {
2818 			struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i];
2819 
2820 			mvq->modified_fields |= (
2821 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION |
2822 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES
2823 			);
2824 		}
2825 	}
2826 
2827 	/* When below features diverge from initial device features, VQs need a full teardown. */
2828 #define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \
2829 			     BIT_ULL(VIRTIO_NET_F_CSUM) | \
2830 			     BIT_ULL(VIRTIO_F_VERSION_1))
2831 
2832 	diff_features = mvdev->mlx_features ^ mvdev->actual_features;
2833 	ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK);
2834 
2835 	update_cvq_info(mvdev);
2836 	return err;
2837 }
2838 
2839 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2840 {
2841 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2842 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2843 
2844 	ndev->config_cb = *cb;
2845 }
2846 
2847 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2848 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2849 {
2850 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2851 }
2852 
2853 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2854 {
2855 	return VIRTIO_ID_NET;
2856 }
2857 
2858 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2859 {
2860 	return PCI_VENDOR_ID_MELLANOX;
2861 }
2862 
2863 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2864 {
2865 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2866 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2867 
2868 	print_status(mvdev, ndev->mvdev.status, false);
2869 	return ndev->mvdev.status;
2870 }
2871 
2872 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2873 {
2874 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2875 	struct mlx5_virtq_attr attr = {};
2876 	int err;
2877 
2878 	if (mvq->initialized) {
2879 		err = query_virtqueue(ndev, mvq, &attr);
2880 		if (err)
2881 			return err;
2882 	}
2883 
2884 	ri->avail_index = attr.available_index;
2885 	ri->used_index = attr.used_index;
2886 	ri->ready = mvq->ready;
2887 	ri->num_ent = mvq->num_ent;
2888 	ri->desc_addr = mvq->desc_addr;
2889 	ri->device_addr = mvq->device_addr;
2890 	ri->driver_addr = mvq->driver_addr;
2891 	ri->map = mvq->map;
2892 	ri->restore = true;
2893 	return 0;
2894 }
2895 
2896 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2897 {
2898 	int i;
2899 
2900 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2901 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2902 		save_channel_info(ndev, &ndev->vqs[i]);
2903 	}
2904 	return 0;
2905 }
2906 
2907 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2908 {
2909 	int i;
2910 
2911 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2912 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2913 }
2914 
2915 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2916 {
2917 	struct mlx5_vdpa_virtqueue *mvq;
2918 	struct mlx5_vq_restore_info *ri;
2919 	int i;
2920 
2921 	mlx5_clear_vqs(ndev);
2922 	mvqs_set_defaults(ndev);
2923 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2924 		mvq = &ndev->vqs[i];
2925 		ri = &mvq->ri;
2926 		if (!ri->restore)
2927 			continue;
2928 
2929 		mvq->avail_idx = ri->avail_index;
2930 		mvq->used_idx = ri->used_index;
2931 		mvq->ready = ri->ready;
2932 		mvq->num_ent = ri->num_ent;
2933 		mvq->desc_addr = ri->desc_addr;
2934 		mvq->device_addr = ri->device_addr;
2935 		mvq->driver_addr = ri->driver_addr;
2936 		mvq->map = ri->map;
2937 	}
2938 }
2939 
2940 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2941 				struct mlx5_vdpa_mr *new_mr,
2942 				unsigned int asid)
2943 {
2944 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2945 	bool teardown = !is_resumable(ndev);
2946 	int err;
2947 
2948 	suspend_vqs(ndev);
2949 	if (teardown) {
2950 		err = save_channels_info(ndev);
2951 		if (err)
2952 			return err;
2953 
2954 		teardown_vq_resources(ndev);
2955 	}
2956 
2957 	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2958 
2959 	for (int i = 0; i < mvdev->max_vqs; i++)
2960 		ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
2961 						MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
2962 
2963 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2964 		return 0;
2965 
2966 	if (teardown) {
2967 		restore_channels_info(ndev);
2968 		err = setup_vq_resources(ndev, true);
2969 		if (err)
2970 			return err;
2971 	}
2972 
2973 	resume_vqs(ndev);
2974 
2975 	return 0;
2976 }
2977 
2978 /* reslock must be held for this function */
2979 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled)
2980 {
2981 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
2982 	int err;
2983 
2984 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2985 
2986 	if (ndev->setup) {
2987 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2988 		err = 0;
2989 		goto out;
2990 	}
2991 	mlx5_vdpa_add_debugfs(ndev);
2992 
2993 	err = read_umem_params(ndev);
2994 	if (err)
2995 		goto err_setup;
2996 
2997 	err = setup_virtqueues(mvdev, filled);
2998 	if (err) {
2999 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
3000 		goto err_setup;
3001 	}
3002 
3003 	err = create_rqt(ndev);
3004 	if (err) {
3005 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
3006 		goto err_rqt;
3007 	}
3008 
3009 	err = create_tir(ndev);
3010 	if (err) {
3011 		mlx5_vdpa_warn(mvdev, "create_tir\n");
3012 		goto err_tir;
3013 	}
3014 
3015 	err = setup_steering(ndev);
3016 	if (err) {
3017 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
3018 		goto err_fwd;
3019 	}
3020 	ndev->setup = true;
3021 
3022 	return 0;
3023 
3024 err_fwd:
3025 	destroy_tir(ndev);
3026 err_tir:
3027 	destroy_rqt(ndev);
3028 err_rqt:
3029 	teardown_virtqueues(ndev);
3030 err_setup:
3031 	mlx5_vdpa_remove_debugfs(ndev);
3032 out:
3033 	return err;
3034 }
3035 
3036 /* reslock must be held for this function */
3037 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev)
3038 {
3039 
3040 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3041 
3042 	if (!ndev->setup)
3043 		return;
3044 
3045 	mlx5_vdpa_remove_debugfs(ndev);
3046 	teardown_steering(ndev);
3047 	destroy_tir(ndev);
3048 	destroy_rqt(ndev);
3049 	teardown_virtqueues(ndev);
3050 	ndev->setup = false;
3051 	ndev->needs_teardown = false;
3052 }
3053 
3054 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
3055 {
3056 	struct mlx5_control_vq *cvq = &mvdev->cvq;
3057 	int err = 0;
3058 
3059 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
3060 		u16 idx = cvq->vring.last_avail_idx;
3061 
3062 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
3063 					cvq->vring.vring.num, false,
3064 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
3065 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
3066 					(struct vring_used *)(uintptr_t)cvq->device_addr);
3067 
3068 		if (!err)
3069 			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
3070 	}
3071 	return err;
3072 }
3073 
3074 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
3075 {
3076 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3077 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3078 	int err;
3079 
3080 	print_status(mvdev, status, true);
3081 
3082 	down_write(&ndev->reslock);
3083 
3084 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
3085 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
3086 			err = setup_cvq_vring(mvdev);
3087 			if (err) {
3088 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
3089 				goto err_setup;
3090 			}
3091 			register_link_notifier(ndev);
3092 
3093 			if (ndev->needs_teardown)
3094 				teardown_vq_resources(ndev);
3095 
3096 			if (ndev->setup) {
3097 				err = resume_vqs(ndev);
3098 				if (err) {
3099 					mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
3100 					goto err_driver;
3101 				}
3102 			} else {
3103 				err = setup_vq_resources(ndev, true);
3104 				if (err) {
3105 					mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
3106 					goto err_driver;
3107 				}
3108 			}
3109 		} else {
3110 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3111 			goto err_clear;
3112 		}
3113 	}
3114 
3115 	ndev->mvdev.status = status;
3116 	up_write(&ndev->reslock);
3117 	return;
3118 
3119 err_driver:
3120 	unregister_link_notifier(ndev);
3121 err_setup:
3122 	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3123 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3124 err_clear:
3125 	up_write(&ndev->reslock);
3126 }
3127 
3128 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3129 {
3130 	int i;
3131 
3132 	/* default mapping all groups are mapped to asid 0 */
3133 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3134 		mvdev->group2asid[i] = 0;
3135 }
3136 
3137 static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev)
3138 {
3139 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3140 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0];
3141 
3142 	if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
3143 		return true;
3144 
3145 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT)
3146 		return true;
3147 
3148 	return mvq->modified_fields & (
3149 		MLX5_VIRTQ_MODIFY_MASK_STATE |
3150 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS |
3151 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
3152 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX
3153 	);
3154 }
3155 
3156 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3157 {
3158 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3159 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3160 	bool vq_reset;
3161 
3162 	print_status(mvdev, 0, true);
3163 	mlx5_vdpa_info(mvdev, "performing device reset\n");
3164 
3165 	down_write(&ndev->reslock);
3166 	unregister_link_notifier(ndev);
3167 	vq_reset = needs_vqs_reset(mvdev);
3168 	if (vq_reset) {
3169 		teardown_vq_resources(ndev);
3170 		mvqs_set_defaults(ndev);
3171 	}
3172 
3173 	if (flags & VDPA_RESET_F_CLEAN_MAP)
3174 		mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3175 	ndev->mvdev.status = 0;
3176 	ndev->mvdev.suspended = false;
3177 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3178 	ndev->mvdev.cvq.ready = false;
3179 	ndev->mvdev.cvq.received_desc = 0;
3180 	ndev->mvdev.cvq.completed_desc = 0;
3181 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3182 	ndev->mvdev.actual_features = 0;
3183 	init_group_to_asid_map(mvdev);
3184 	++mvdev->generation;
3185 
3186 	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3187 	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3188 		if (mlx5_vdpa_create_dma_mr(mvdev))
3189 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
3190 	}
3191 	if (vq_reset)
3192 		setup_vq_resources(ndev, false);
3193 	up_write(&ndev->reslock);
3194 
3195 	return 0;
3196 }
3197 
3198 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3199 {
3200 	return mlx5_vdpa_compat_reset(vdev, 0);
3201 }
3202 
3203 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3204 {
3205 	return sizeof(struct virtio_net_config);
3206 }
3207 
3208 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3209 				 unsigned int len)
3210 {
3211 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3212 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3213 
3214 	if (offset + len <= sizeof(struct virtio_net_config))
3215 		memcpy(buf, (u8 *)&ndev->config + offset, len);
3216 }
3217 
3218 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3219 				 unsigned int len)
3220 {
3221 	/* not supported */
3222 }
3223 
3224 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3225 {
3226 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3227 
3228 	return mvdev->generation;
3229 }
3230 
3231 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3232 			unsigned int asid)
3233 {
3234 	struct mlx5_vdpa_mr *new_mr;
3235 	int err;
3236 
3237 	if (asid >= MLX5_VDPA_NUM_AS)
3238 		return -EINVAL;
3239 
3240 	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3241 		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3242 		if (IS_ERR(new_mr)) {
3243 			err = PTR_ERR(new_mr);
3244 			mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err);
3245 			return err;
3246 		}
3247 	} else {
3248 		/* Empty iotlbs don't have an mr but will clear the previous mr. */
3249 		new_mr = NULL;
3250 	}
3251 
3252 	if (!mvdev->mr[asid]) {
3253 		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3254 	} else {
3255 		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3256 		if (err) {
3257 			mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err);
3258 			goto out_err;
3259 		}
3260 	}
3261 
3262 	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3263 
3264 out_err:
3265 	mlx5_vdpa_put_mr(mvdev, new_mr);
3266 	return err;
3267 }
3268 
3269 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3270 			     struct vhost_iotlb *iotlb)
3271 {
3272 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3273 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3274 	int err = -EINVAL;
3275 
3276 	down_write(&ndev->reslock);
3277 	err = set_map_data(mvdev, iotlb, asid);
3278 	up_write(&ndev->reslock);
3279 	return err;
3280 }
3281 
3282 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3283 {
3284 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3285 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3286 	int err;
3287 
3288 	down_write(&ndev->reslock);
3289 	err = mlx5_vdpa_reset_mr(mvdev, asid);
3290 	up_write(&ndev->reslock);
3291 	return err;
3292 }
3293 
3294 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3295 {
3296 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3297 
3298 	if (is_ctrl_vq_idx(mvdev, idx))
3299 		return &vdev->dev;
3300 
3301 	return mvdev->vdev.dma_dev;
3302 }
3303 
3304 static void free_irqs(struct mlx5_vdpa_net *ndev)
3305 {
3306 	struct mlx5_vdpa_irq_pool_entry *ent;
3307 	int i;
3308 
3309 	if (!msix_mode_supported(&ndev->mvdev))
3310 		return;
3311 
3312 	if (!ndev->irqp.entries)
3313 		return;
3314 
3315 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3316 		ent = ndev->irqp.entries + i;
3317 		if (ent->map.virq)
3318 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3319 	}
3320 	kfree(ndev->irqp.entries);
3321 }
3322 
3323 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3324 {
3325 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3326 	struct mlx5_core_dev *pfmdev;
3327 	struct mlx5_vdpa_net *ndev;
3328 
3329 	ndev = to_mlx5_vdpa_ndev(mvdev);
3330 
3331 	free_fixed_resources(ndev);
3332 	mlx5_vdpa_destroy_mr_resources(mvdev);
3333 	if (!is_zero_ether_addr(ndev->config.mac)) {
3334 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3335 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3336 	}
3337 	mlx5_vdpa_free_resources(&ndev->mvdev);
3338 	free_irqs(ndev);
3339 	kfree(ndev->event_cbs);
3340 	kfree(ndev->vqs);
3341 }
3342 
3343 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3344 {
3345 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3346 	struct vdpa_notification_area ret = {};
3347 	struct mlx5_vdpa_net *ndev;
3348 	phys_addr_t addr;
3349 
3350 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3351 		return ret;
3352 
3353 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3354 	 * notification to avoid the risk of mapping pages that contain BAR of more
3355 	 * than one SF
3356 	 */
3357 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3358 		return ret;
3359 
3360 	ndev = to_mlx5_vdpa_ndev(mvdev);
3361 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3362 	ret.addr = addr;
3363 	ret.size = PAGE_SIZE;
3364 	return ret;
3365 }
3366 
3367 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3368 {
3369 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3370 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3371 	struct mlx5_vdpa_virtqueue *mvq;
3372 
3373 	if (!is_index_valid(mvdev, idx))
3374 		return -EINVAL;
3375 
3376 	if (is_ctrl_vq_idx(mvdev, idx))
3377 		return -EOPNOTSUPP;
3378 
3379 	mvq = &ndev->vqs[idx];
3380 	if (!mvq->map.virq)
3381 		return -EOPNOTSUPP;
3382 
3383 	return mvq->map.virq;
3384 }
3385 
3386 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3387 {
3388 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3389 
3390 	return mvdev->actual_features;
3391 }
3392 
3393 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3394 			     u64 *received_desc, u64 *completed_desc)
3395 {
3396 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3397 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3398 	void *cmd_hdr;
3399 	void *ctx;
3400 	int err;
3401 
3402 	if (!counters_supported(&ndev->mvdev))
3403 		return -EOPNOTSUPP;
3404 
3405 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3406 		return -EAGAIN;
3407 
3408 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3409 
3410 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3411 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3412 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3413 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3414 
3415 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3416 	if (err)
3417 		return err;
3418 
3419 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3420 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3421 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3422 	return 0;
3423 }
3424 
3425 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3426 					 struct sk_buff *msg,
3427 					 struct netlink_ext_ack *extack)
3428 {
3429 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3430 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3431 	struct mlx5_vdpa_virtqueue *mvq;
3432 	struct mlx5_control_vq *cvq;
3433 	u64 received_desc;
3434 	u64 completed_desc;
3435 	int err = 0;
3436 
3437 	down_read(&ndev->reslock);
3438 	if (!is_index_valid(mvdev, idx)) {
3439 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3440 		err = -EINVAL;
3441 		goto out_err;
3442 	}
3443 
3444 	if (idx == ctrl_vq_idx(mvdev)) {
3445 		cvq = &mvdev->cvq;
3446 		received_desc = cvq->received_desc;
3447 		completed_desc = cvq->completed_desc;
3448 		goto out;
3449 	}
3450 
3451 	mvq = &ndev->vqs[idx];
3452 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3453 	if (err) {
3454 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3455 		goto out_err;
3456 	}
3457 
3458 out:
3459 	err = -EMSGSIZE;
3460 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3461 		goto out_err;
3462 
3463 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3464 			      VDPA_ATTR_PAD))
3465 		goto out_err;
3466 
3467 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3468 		goto out_err;
3469 
3470 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3471 			      VDPA_ATTR_PAD))
3472 		goto out_err;
3473 
3474 	err = 0;
3475 out_err:
3476 	up_read(&ndev->reslock);
3477 	return err;
3478 }
3479 
3480 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3481 {
3482 	struct mlx5_control_vq *cvq;
3483 
3484 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3485 		return;
3486 
3487 	cvq = &mvdev->cvq;
3488 	cvq->ready = false;
3489 }
3490 
3491 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3492 {
3493 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3494 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3495 	int err;
3496 
3497 	mlx5_vdpa_info(mvdev, "suspending device\n");
3498 
3499 	down_write(&ndev->reslock);
3500 	unregister_link_notifier(ndev);
3501 	err = suspend_vqs(ndev);
3502 	mlx5_vdpa_cvq_suspend(mvdev);
3503 	mvdev->suspended = true;
3504 	up_write(&ndev->reslock);
3505 
3506 	return err;
3507 }
3508 
3509 static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3510 {
3511 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3512 	struct mlx5_vdpa_net *ndev;
3513 	int err;
3514 
3515 	ndev = to_mlx5_vdpa_ndev(mvdev);
3516 
3517 	mlx5_vdpa_info(mvdev, "resuming device\n");
3518 
3519 	down_write(&ndev->reslock);
3520 	mvdev->suspended = false;
3521 	err = resume_vqs(ndev);
3522 	register_link_notifier(ndev);
3523 	up_write(&ndev->reslock);
3524 
3525 	return err;
3526 }
3527 
3528 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3529 			       unsigned int asid)
3530 {
3531 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3532 	int err = 0;
3533 
3534 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3535 		return -EINVAL;
3536 
3537 	mvdev->group2asid[group] = asid;
3538 
3539 	mutex_lock(&mvdev->mr_mtx);
3540 	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid])
3541 		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid);
3542 	mutex_unlock(&mvdev->mr_mtx);
3543 
3544 	return err;
3545 }
3546 
3547 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3548 	.set_vq_address = mlx5_vdpa_set_vq_address,
3549 	.set_vq_num = mlx5_vdpa_set_vq_num,
3550 	.kick_vq = mlx5_vdpa_kick_vq,
3551 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3552 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3553 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3554 	.set_vq_state = mlx5_vdpa_set_vq_state,
3555 	.get_vq_state = mlx5_vdpa_get_vq_state,
3556 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3557 	.get_vq_notification = mlx5_get_vq_notification,
3558 	.get_vq_irq = mlx5_get_vq_irq,
3559 	.get_vq_align = mlx5_vdpa_get_vq_align,
3560 	.get_vq_group = mlx5_vdpa_get_vq_group,
3561 	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3562 	.get_device_features = mlx5_vdpa_get_device_features,
3563 	.get_backend_features = mlx5_vdpa_get_backend_features,
3564 	.set_driver_features = mlx5_vdpa_set_driver_features,
3565 	.get_driver_features = mlx5_vdpa_get_driver_features,
3566 	.set_config_cb = mlx5_vdpa_set_config_cb,
3567 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3568 	.get_device_id = mlx5_vdpa_get_device_id,
3569 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3570 	.get_status = mlx5_vdpa_get_status,
3571 	.set_status = mlx5_vdpa_set_status,
3572 	.reset = mlx5_vdpa_reset,
3573 	.compat_reset = mlx5_vdpa_compat_reset,
3574 	.get_config_size = mlx5_vdpa_get_config_size,
3575 	.get_config = mlx5_vdpa_get_config,
3576 	.set_config = mlx5_vdpa_set_config,
3577 	.get_generation = mlx5_vdpa_get_generation,
3578 	.set_map = mlx5_vdpa_set_map,
3579 	.reset_map = mlx5_vdpa_reset_map,
3580 	.set_group_asid = mlx5_set_group_asid,
3581 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3582 	.free = mlx5_vdpa_free,
3583 	.suspend = mlx5_vdpa_suspend,
3584 	.resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3585 };
3586 
3587 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3588 {
3589 	u16 hw_mtu;
3590 	int err;
3591 
3592 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3593 	if (err)
3594 		return err;
3595 
3596 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3597 	return 0;
3598 }
3599 
3600 static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev)
3601 {
3602 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3603 	int err;
3604 
3605 	if (res->valid) {
3606 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3607 		return -EEXIST;
3608 	}
3609 
3610 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3611 	if (err)
3612 		return err;
3613 
3614 	err = create_tis(ndev);
3615 	if (err)
3616 		goto err_tis;
3617 
3618 	res->valid = true;
3619 
3620 	return 0;
3621 
3622 err_tis:
3623 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3624 	return err;
3625 }
3626 
3627 static void free_fixed_resources(struct mlx5_vdpa_net *ndev)
3628 {
3629 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3630 
3631 	if (!res->valid)
3632 		return;
3633 
3634 	destroy_tis(ndev);
3635 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3636 	res->valid = false;
3637 }
3638 
3639 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev)
3640 {
3641 	struct mlx5_vdpa_virtqueue *mvq;
3642 	int i;
3643 
3644 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3645 		mvq = &ndev->vqs[i];
3646 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3647 		mvq->index = i;
3648 		mvq->ndev = ndev;
3649 		mvq->fwqp.fw = true;
3650 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3651 		mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE;
3652 	}
3653 }
3654 
3655 struct mlx5_vdpa_mgmtdev {
3656 	struct vdpa_mgmt_dev mgtdev;
3657 	struct mlx5_adev *madev;
3658 	struct mlx5_vdpa_net *ndev;
3659 	struct vdpa_config_ops vdpa_ops;
3660 };
3661 
3662 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3663 {
3664 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3665 	void *in;
3666 	int err;
3667 
3668 	in = kvzalloc(inlen, GFP_KERNEL);
3669 	if (!in)
3670 		return -ENOMEM;
3671 
3672 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3673 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3674 		 mtu + MLX5V_ETH_HARD_MTU);
3675 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3676 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3677 
3678 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3679 
3680 	kvfree(in);
3681 	return err;
3682 }
3683 
3684 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3685 {
3686 	struct mlx5_vdpa_irq_pool_entry *ent;
3687 	int i;
3688 
3689 	if (!msix_mode_supported(&ndev->mvdev))
3690 		return;
3691 
3692 	if (!ndev->mvdev.mdev->pdev)
3693 		return;
3694 
3695 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3696 	if (!ndev->irqp.entries)
3697 		return;
3698 
3699 
3700 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3701 		ent = ndev->irqp.entries + i;
3702 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3703 			 dev_name(&ndev->mvdev.vdev.dev), i);
3704 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3705 		if (!ent->map.virq)
3706 			return;
3707 
3708 		ndev->irqp.num_ent++;
3709 	}
3710 }
3711 
3712 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3713 			     const struct vdpa_dev_set_config *add_config)
3714 {
3715 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3716 	struct virtio_net_config *config;
3717 	struct mlx5_core_dev *pfmdev;
3718 	struct mlx5_vdpa_dev *mvdev;
3719 	struct mlx5_vdpa_net *ndev;
3720 	struct mlx5_core_dev *mdev;
3721 	u64 device_features;
3722 	u32 max_vqs;
3723 	u16 mtu;
3724 	int err;
3725 
3726 	if (mgtdev->ndev)
3727 		return -ENOSPC;
3728 
3729 	mdev = mgtdev->madev->mdev;
3730 	device_features = mgtdev->mgtdev.supported_features;
3731 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3732 		if (add_config->device_features & ~device_features) {
3733 			dev_warn(mdev->device,
3734 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3735 				 add_config->device_features, device_features);
3736 			return -EINVAL;
3737 		}
3738 		device_features &= add_config->device_features;
3739 	} else {
3740 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3741 	}
3742 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3743 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3744 		dev_warn(mdev->device,
3745 			 "Must provision minimum features 0x%llx for this device",
3746 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3747 		return -EOPNOTSUPP;
3748 	}
3749 
3750 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3751 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3752 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3753 		return -EOPNOTSUPP;
3754 	}
3755 
3756 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3757 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3758 	if (max_vqs < 2) {
3759 		dev_warn(mdev->device,
3760 			 "%d virtqueues are supported. At least 2 are required\n",
3761 			 max_vqs);
3762 		return -EAGAIN;
3763 	}
3764 
3765 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3766 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3767 			return -EINVAL;
3768 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3769 	} else {
3770 		max_vqs = 2;
3771 	}
3772 
3773 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3774 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3775 	if (IS_ERR(ndev))
3776 		return PTR_ERR(ndev);
3777 
3778 	ndev->mvdev.max_vqs = max_vqs;
3779 	mvdev = &ndev->mvdev;
3780 	mvdev->mdev = mdev;
3781 
3782 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3783 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3784 	if (!ndev->vqs || !ndev->event_cbs) {
3785 		err = -ENOMEM;
3786 		goto err_alloc;
3787 	}
3788 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3789 
3790 	mvqs_set_defaults(ndev);
3791 	allocate_irqs(ndev);
3792 	init_rwsem(&ndev->reslock);
3793 	config = &ndev->config;
3794 
3795 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3796 		err = config_func_mtu(mdev, add_config->net.mtu);
3797 		if (err)
3798 			goto err_alloc;
3799 	}
3800 
3801 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3802 		err = query_mtu(mdev, &mtu);
3803 		if (err)
3804 			goto err_alloc;
3805 
3806 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3807 	}
3808 
3809 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3810 		if (get_link_state(mvdev))
3811 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3812 		else
3813 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3814 	}
3815 
3816 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3817 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3818 	/* No bother setting mac address in config if not going to provision _F_MAC */
3819 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3820 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3821 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3822 		if (err)
3823 			goto err_alloc;
3824 	}
3825 
3826 	if (!is_zero_ether_addr(config->mac)) {
3827 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3828 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3829 		if (err)
3830 			goto err_alloc;
3831 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3832 		/*
3833 		 * We used to clear _F_MAC feature bit if seeing
3834 		 * zero mac address when device features are not
3835 		 * specifically provisioned. Keep the behaviour
3836 		 * so old scripts do not break.
3837 		 */
3838 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3839 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3840 		/* Don't provision zero mac address for _F_MAC */
3841 		mlx5_vdpa_warn(&ndev->mvdev,
3842 			       "No mac address provisioned?\n");
3843 		err = -EINVAL;
3844 		goto err_alloc;
3845 	}
3846 
3847 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) {
3848 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3849 		ndev->rqt_size = max_vqs / 2;
3850 	} else {
3851 		ndev->rqt_size = 1;
3852 	}
3853 
3854 	ndev->mvdev.mlx_features = device_features;
3855 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3856 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3857 	if (err)
3858 		goto err_mpfs;
3859 
3860 	INIT_LIST_HEAD(&mvdev->mr_list_head);
3861 
3862 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3863 		err = mlx5_vdpa_create_dma_mr(mvdev);
3864 		if (err)
3865 			goto err_res;
3866 	}
3867 
3868 	err = alloc_fixed_resources(ndev);
3869 	if (err)
3870 		goto err_mr;
3871 
3872 	ndev->cvq_ent.mvdev = mvdev;
3873 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3874 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3875 	if (!mvdev->wq) {
3876 		err = -ENOMEM;
3877 		goto err_res2;
3878 	}
3879 
3880 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3881 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3882 	if (err)
3883 		goto err_reg;
3884 
3885 	mgtdev->ndev = ndev;
3886 
3887 	/* For virtio-vdpa, the device was set up during device register. */
3888 	if (ndev->setup)
3889 		return 0;
3890 
3891 	down_write(&ndev->reslock);
3892 	err = setup_vq_resources(ndev, false);
3893 	up_write(&ndev->reslock);
3894 	if (err)
3895 		goto err_setup_vq_res;
3896 
3897 	return 0;
3898 
3899 err_setup_vq_res:
3900 	_vdpa_unregister_device(&mvdev->vdev);
3901 err_reg:
3902 	destroy_workqueue(mvdev->wq);
3903 err_res2:
3904 	free_fixed_resources(ndev);
3905 err_mr:
3906 	mlx5_vdpa_destroy_mr_resources(mvdev);
3907 err_res:
3908 	mlx5_vdpa_free_resources(&ndev->mvdev);
3909 err_mpfs:
3910 	if (!is_zero_ether_addr(config->mac))
3911 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3912 err_alloc:
3913 	put_device(&mvdev->vdev.dev);
3914 	return err;
3915 }
3916 
3917 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3918 {
3919 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3920 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3921 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3922 	struct workqueue_struct *wq;
3923 
3924 	unregister_link_notifier(ndev);
3925 	_vdpa_unregister_device(dev);
3926 
3927 	down_write(&ndev->reslock);
3928 	teardown_vq_resources(ndev);
3929 	up_write(&ndev->reslock);
3930 
3931 	wq = mvdev->wq;
3932 	mvdev->wq = NULL;
3933 	destroy_workqueue(wq);
3934 	mgtdev->ndev = NULL;
3935 }
3936 
3937 static const struct vdpa_mgmtdev_ops mdev_ops = {
3938 	.dev_add = mlx5_vdpa_dev_add,
3939 	.dev_del = mlx5_vdpa_dev_del,
3940 };
3941 
3942 static struct virtio_device_id id_table[] = {
3943 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3944 	{ 0 },
3945 };
3946 
3947 static int mlx5v_probe(struct auxiliary_device *adev,
3948 		       const struct auxiliary_device_id *id)
3949 
3950 {
3951 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3952 	struct mlx5_core_dev *mdev = madev->mdev;
3953 	struct mlx5_vdpa_mgmtdev *mgtdev;
3954 	int err;
3955 
3956 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3957 	if (!mgtdev)
3958 		return -ENOMEM;
3959 
3960 	mgtdev->mgtdev.ops = &mdev_ops;
3961 	mgtdev->mgtdev.device = mdev->device;
3962 	mgtdev->mgtdev.id_table = id_table;
3963 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3964 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3965 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3966 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3967 	mgtdev->mgtdev.max_supported_vqs =
3968 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3969 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3970 	mgtdev->madev = madev;
3971 	mgtdev->vdpa_ops = mlx5_vdpa_ops;
3972 
3973 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
3974 		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
3975 
3976 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
3977 		mgtdev->vdpa_ops.resume = NULL;
3978 
3979 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3980 	if (err)
3981 		goto reg_err;
3982 
3983 	auxiliary_set_drvdata(adev, mgtdev);
3984 
3985 	return 0;
3986 
3987 reg_err:
3988 	kfree(mgtdev);
3989 	return err;
3990 }
3991 
3992 static void mlx5v_remove(struct auxiliary_device *adev)
3993 {
3994 	struct mlx5_vdpa_mgmtdev *mgtdev;
3995 
3996 	mgtdev = auxiliary_get_drvdata(adev);
3997 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3998 	kfree(mgtdev);
3999 }
4000 
4001 static const struct auxiliary_device_id mlx5v_id_table[] = {
4002 	{ .name = MLX5_ADEV_NAME ".vnet", },
4003 	{},
4004 };
4005 
4006 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
4007 
4008 static struct auxiliary_driver mlx5v_driver = {
4009 	.name = "vnet",
4010 	.probe = mlx5v_probe,
4011 	.remove = mlx5v_remove,
4012 	.id_table = mlx5v_id_table,
4013 };
4014 
4015 module_auxiliary_driver(mlx5v_driver);
4016