xref: /linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision c745b15c1f9cea5680c2906ae868302108f8daf0)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
23 
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 
49 #define MLX5V_UNTAGGED 0x1000
50 
51 struct mlx5_vdpa_cq_buf {
52 	struct mlx5_frag_buf_ctrl fbc;
53 	struct mlx5_frag_buf frag_buf;
54 	int cqe_size;
55 	int nent;
56 };
57 
58 struct mlx5_vdpa_cq {
59 	struct mlx5_core_cq mcq;
60 	struct mlx5_vdpa_cq_buf buf;
61 	struct mlx5_db db;
62 	int cqe;
63 };
64 
65 struct mlx5_vdpa_umem {
66 	struct mlx5_frag_buf_ctrl fbc;
67 	struct mlx5_frag_buf frag_buf;
68 	int size;
69 	u32 id;
70 };
71 
72 struct mlx5_vdpa_qp {
73 	struct mlx5_core_qp mqp;
74 	struct mlx5_frag_buf frag_buf;
75 	struct mlx5_db db;
76 	u16 head;
77 	bool fw;
78 };
79 
80 struct mlx5_vq_restore_info {
81 	u32 num_ent;
82 	u64 desc_addr;
83 	u64 device_addr;
84 	u64 driver_addr;
85 	u16 avail_index;
86 	u16 used_index;
87 	struct msi_map map;
88 	bool ready;
89 	bool restore;
90 };
91 
92 struct mlx5_vdpa_virtqueue {
93 	bool ready;
94 	u64 desc_addr;
95 	u64 device_addr;
96 	u64 driver_addr;
97 	u32 num_ent;
98 
99 	/* Resources for implementing the notification channel from the device
100 	 * to the driver. fwqp is the firmware end of an RC connection; the
101 	 * other end is vqqp used by the driver. cq is where completions are
102 	 * reported.
103 	 */
104 	struct mlx5_vdpa_cq cq;
105 	struct mlx5_vdpa_qp fwqp;
106 	struct mlx5_vdpa_qp vqqp;
107 
108 	/* umem resources are required for the virtqueue operation. They're use
109 	 * is internal and they must be provided by the driver.
110 	 */
111 	struct mlx5_vdpa_umem umem1;
112 	struct mlx5_vdpa_umem umem2;
113 	struct mlx5_vdpa_umem umem3;
114 
115 	u32 counter_set_id;
116 	bool initialized;
117 	int index;
118 	u32 virtq_id;
119 	struct mlx5_vdpa_net *ndev;
120 	u16 avail_idx;
121 	u16 used_idx;
122 	int fw_state;
123 
124 	u64 modified_fields;
125 
126 	struct mlx5_vdpa_mr *vq_mr;
127 	struct mlx5_vdpa_mr *desc_mr;
128 
129 	struct msi_map map;
130 
131 	/* keep last in the struct */
132 	struct mlx5_vq_restore_info ri;
133 };
134 
135 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
136 {
137 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
138 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
139 			return idx < 2;
140 		else
141 			return idx < 3;
142 	}
143 
144 	return idx <= mvdev->max_idx;
145 }
146 
147 static void free_resources(struct mlx5_vdpa_net *ndev);
148 static void init_mvqs(struct mlx5_vdpa_net *ndev);
149 static int setup_driver(struct mlx5_vdpa_dev *mvdev);
150 static void teardown_driver(struct mlx5_vdpa_net *ndev);
151 
152 static bool mlx5_vdpa_debug;
153 
154 #define MLX5_CVQ_MAX_ENT 16
155 
156 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
157 	do {                                                                                       \
158 		if (features & BIT_ULL(_feature))                                                  \
159 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
160 	} while (0)
161 
162 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
163 	do {                                                                                       \
164 		if (status & (_status))                                                            \
165 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
166 	} while (0)
167 
168 /* TODO: cross-endian support */
169 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
170 {
171 	return virtio_legacy_is_little_endian() ||
172 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
173 }
174 
175 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
176 {
177 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
178 }
179 
180 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
181 {
182 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
183 }
184 
185 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
186 {
187 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
188 		return 2;
189 
190 	return mvdev->max_vqs;
191 }
192 
193 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
194 {
195 	return idx == ctrl_vq_idx(mvdev);
196 }
197 
198 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
199 {
200 	if (status & ~VALID_STATUS_MASK)
201 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
202 			       status & ~VALID_STATUS_MASK);
203 
204 	if (!mlx5_vdpa_debug)
205 		return;
206 
207 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
208 	if (set && !status) {
209 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
210 		return;
211 	}
212 
213 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
214 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
215 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
216 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
217 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
218 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
219 }
220 
221 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
222 {
223 	if (features & ~VALID_FEATURES_MASK)
224 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
225 			       features & ~VALID_FEATURES_MASK);
226 
227 	if (!mlx5_vdpa_debug)
228 		return;
229 
230 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
231 	if (!features)
232 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
233 
234 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
235 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
236 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
237 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
238 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
239 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
240 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
241 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
242 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
243 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
244 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
262 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
263 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
264 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
265 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
266 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
267 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
268 }
269 
270 static int create_tis(struct mlx5_vdpa_net *ndev)
271 {
272 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
273 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
274 	void *tisc;
275 	int err;
276 
277 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
278 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
279 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
280 	if (err)
281 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
282 
283 	return err;
284 }
285 
286 static void destroy_tis(struct mlx5_vdpa_net *ndev)
287 {
288 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
289 }
290 
291 #define MLX5_VDPA_CQE_SIZE 64
292 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
293 
294 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
295 {
296 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
297 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
298 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
299 	int err;
300 
301 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
302 				       ndev->mvdev.mdev->priv.numa_node);
303 	if (err)
304 		return err;
305 
306 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
307 
308 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
309 	buf->nent = nent;
310 
311 	return 0;
312 }
313 
314 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
315 {
316 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
317 
318 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
319 					ndev->mvdev.mdev->priv.numa_node);
320 }
321 
322 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
323 {
324 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
325 }
326 
327 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
328 {
329 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
330 }
331 
332 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
333 {
334 	struct mlx5_cqe64 *cqe64;
335 	void *cqe;
336 	int i;
337 
338 	for (i = 0; i < buf->nent; i++) {
339 		cqe = get_cqe(vcq, i);
340 		cqe64 = cqe;
341 		cqe64->op_own = MLX5_CQE_INVALID << 4;
342 	}
343 }
344 
345 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
346 {
347 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
348 
349 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
350 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
351 		return cqe64;
352 
353 	return NULL;
354 }
355 
356 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
357 {
358 	vqp->head += n;
359 	vqp->db.db[0] = cpu_to_be32(vqp->head);
360 }
361 
362 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
363 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
364 {
365 	struct mlx5_vdpa_qp *vqp;
366 	__be64 *pas;
367 	void *qpc;
368 
369 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
370 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
371 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
372 	if (vqp->fw) {
373 		/* Firmware QP is allocated by the driver for the firmware's
374 		 * use so we can skip part of the params as they will be chosen by firmware
375 		 */
376 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
377 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
378 		MLX5_SET(qpc, qpc, no_sq, 1);
379 		return;
380 	}
381 
382 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
383 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
384 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
385 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
386 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
387 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
388 	MLX5_SET(qpc, qpc, no_sq, 1);
389 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
390 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
391 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
392 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
393 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
394 }
395 
396 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
397 {
398 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
399 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
400 					ndev->mvdev.mdev->priv.numa_node);
401 }
402 
403 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
404 {
405 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
406 }
407 
408 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
409 		     struct mlx5_vdpa_qp *vqp)
410 {
411 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
412 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
413 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
414 	void *qpc;
415 	void *in;
416 	int err;
417 
418 	if (!vqp->fw) {
419 		vqp = &mvq->vqqp;
420 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
421 		if (err)
422 			return err;
423 
424 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
425 		if (err)
426 			goto err_db;
427 		inlen += vqp->frag_buf.npages * sizeof(__be64);
428 	}
429 
430 	in = kzalloc(inlen, GFP_KERNEL);
431 	if (!in) {
432 		err = -ENOMEM;
433 		goto err_kzalloc;
434 	}
435 
436 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
437 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
438 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
439 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
440 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
441 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
442 	if (!vqp->fw)
443 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
444 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
445 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
446 	kfree(in);
447 	if (err)
448 		goto err_kzalloc;
449 
450 	vqp->mqp.uid = ndev->mvdev.res.uid;
451 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
452 
453 	if (!vqp->fw)
454 		rx_post(vqp, mvq->num_ent);
455 
456 	return 0;
457 
458 err_kzalloc:
459 	if (!vqp->fw)
460 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
461 err_db:
462 	if (!vqp->fw)
463 		rq_buf_free(ndev, vqp);
464 
465 	return err;
466 }
467 
468 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
469 {
470 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
471 
472 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
473 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
474 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
475 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
476 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
477 	if (!vqp->fw) {
478 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
479 		rq_buf_free(ndev, vqp);
480 	}
481 }
482 
483 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
484 {
485 	return get_sw_cqe(cq, cq->mcq.cons_index);
486 }
487 
488 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
489 {
490 	struct mlx5_cqe64 *cqe64;
491 
492 	cqe64 = next_cqe_sw(vcq);
493 	if (!cqe64)
494 		return -EAGAIN;
495 
496 	vcq->mcq.cons_index++;
497 	return 0;
498 }
499 
500 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
501 {
502 	struct mlx5_vdpa_net *ndev = mvq->ndev;
503 	struct vdpa_callback *event_cb;
504 
505 	event_cb = &ndev->event_cbs[mvq->index];
506 	mlx5_cq_set_ci(&mvq->cq.mcq);
507 
508 	/* make sure CQ cosumer update is visible to the hardware before updating
509 	 * RX doorbell record.
510 	 */
511 	dma_wmb();
512 	rx_post(&mvq->vqqp, num);
513 	if (event_cb->callback)
514 		event_cb->callback(event_cb->private);
515 }
516 
517 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
518 {
519 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
520 	struct mlx5_vdpa_net *ndev = mvq->ndev;
521 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
522 	int num = 0;
523 
524 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
525 		num++;
526 		if (num > mvq->num_ent / 2) {
527 			/* If completions keep coming while we poll, we want to
528 			 * let the hardware know that we consumed them by
529 			 * updating the doorbell record.  We also let vdpa core
530 			 * know about this so it passes it on the virtio driver
531 			 * on the guest.
532 			 */
533 			mlx5_vdpa_handle_completions(mvq, num);
534 			num = 0;
535 		}
536 	}
537 
538 	if (num)
539 		mlx5_vdpa_handle_completions(mvq, num);
540 
541 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
542 }
543 
544 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
545 {
546 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
547 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
548 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
549 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
550 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
551 	__be64 *pas;
552 	int inlen;
553 	void *cqc;
554 	void *in;
555 	int err;
556 	int eqn;
557 
558 	err = mlx5_db_alloc(mdev, &vcq->db);
559 	if (err)
560 		return err;
561 
562 	vcq->mcq.set_ci_db = vcq->db.db;
563 	vcq->mcq.arm_db = vcq->db.db + 1;
564 	vcq->mcq.cqe_sz = 64;
565 
566 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
567 	if (err)
568 		goto err_db;
569 
570 	cq_frag_buf_init(vcq, &vcq->buf);
571 
572 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
573 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
574 	in = kzalloc(inlen, GFP_KERNEL);
575 	if (!in) {
576 		err = -ENOMEM;
577 		goto err_vzalloc;
578 	}
579 
580 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
581 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
582 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
583 
584 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
585 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
586 
587 	/* Use vector 0 by default. Consider adding code to choose least used
588 	 * vector.
589 	 */
590 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
591 	if (err)
592 		goto err_vec;
593 
594 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
595 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
596 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
597 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
598 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
599 
600 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
601 	if (err)
602 		goto err_vec;
603 
604 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
605 	vcq->cqe = num_ent;
606 	vcq->mcq.set_ci_db = vcq->db.db;
607 	vcq->mcq.arm_db = vcq->db.db + 1;
608 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
609 	kfree(in);
610 	return 0;
611 
612 err_vec:
613 	kfree(in);
614 err_vzalloc:
615 	cq_frag_buf_free(ndev, &vcq->buf);
616 err_db:
617 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
618 	return err;
619 }
620 
621 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
622 {
623 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
624 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
625 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
626 
627 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
628 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
629 		return;
630 	}
631 	cq_frag_buf_free(ndev, &vcq->buf);
632 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
633 }
634 
635 static int read_umem_params(struct mlx5_vdpa_net *ndev)
636 {
637 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
638 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
639 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
640 	int out_size;
641 	void *caps;
642 	void *out;
643 	int err;
644 
645 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
646 	out = kzalloc(out_size, GFP_KERNEL);
647 	if (!out)
648 		return -ENOMEM;
649 
650 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
651 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
652 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
653 	if (err) {
654 		mlx5_vdpa_warn(&ndev->mvdev,
655 			"Failed reading vdpa umem capabilities with err %d\n", err);
656 		goto out;
657 	}
658 
659 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
660 
661 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
662 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
663 
664 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
665 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
666 
667 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
668 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
669 
670 out:
671 	kfree(out);
672 	return 0;
673 }
674 
675 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
676 			  struct mlx5_vdpa_umem **umemp)
677 {
678 	u32 p_a;
679 	u32 p_b;
680 
681 	switch (num) {
682 	case 1:
683 		p_a = ndev->umem_1_buffer_param_a;
684 		p_b = ndev->umem_1_buffer_param_b;
685 		*umemp = &mvq->umem1;
686 		break;
687 	case 2:
688 		p_a = ndev->umem_2_buffer_param_a;
689 		p_b = ndev->umem_2_buffer_param_b;
690 		*umemp = &mvq->umem2;
691 		break;
692 	case 3:
693 		p_a = ndev->umem_3_buffer_param_a;
694 		p_b = ndev->umem_3_buffer_param_b;
695 		*umemp = &mvq->umem3;
696 		break;
697 	}
698 
699 	(*umemp)->size = p_a * mvq->num_ent + p_b;
700 }
701 
702 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
703 {
704 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
705 }
706 
707 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
708 {
709 	int inlen;
710 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
711 	void *um;
712 	void *in;
713 	int err;
714 	__be64 *pas;
715 	struct mlx5_vdpa_umem *umem;
716 
717 	set_umem_size(ndev, mvq, num, &umem);
718 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
719 	if (err)
720 		return err;
721 
722 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
723 
724 	in = kzalloc(inlen, GFP_KERNEL);
725 	if (!in) {
726 		err = -ENOMEM;
727 		goto err_in;
728 	}
729 
730 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
731 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
732 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
733 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
734 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
735 
736 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
737 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
738 
739 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
740 	if (err) {
741 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
742 		goto err_cmd;
743 	}
744 
745 	kfree(in);
746 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
747 
748 	return 0;
749 
750 err_cmd:
751 	kfree(in);
752 err_in:
753 	umem_frag_buf_free(ndev, umem);
754 	return err;
755 }
756 
757 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
758 {
759 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
760 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
761 	struct mlx5_vdpa_umem *umem;
762 
763 	switch (num) {
764 	case 1:
765 		umem = &mvq->umem1;
766 		break;
767 	case 2:
768 		umem = &mvq->umem2;
769 		break;
770 	case 3:
771 		umem = &mvq->umem3;
772 		break;
773 	}
774 
775 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
776 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
777 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
778 		return;
779 
780 	umem_frag_buf_free(ndev, umem);
781 }
782 
783 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
784 {
785 	int num;
786 	int err;
787 
788 	for (num = 1; num <= 3; num++) {
789 		err = create_umem(ndev, mvq, num);
790 		if (err)
791 			goto err_umem;
792 	}
793 	return 0;
794 
795 err_umem:
796 	for (num--; num > 0; num--)
797 		umem_destroy(ndev, mvq, num);
798 
799 	return err;
800 }
801 
802 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
803 {
804 	int num;
805 
806 	for (num = 3; num > 0; num--)
807 		umem_destroy(ndev, mvq, num);
808 }
809 
810 static int get_queue_type(struct mlx5_vdpa_net *ndev)
811 {
812 	u32 type_mask;
813 
814 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
815 
816 	/* prefer split queue */
817 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
818 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
819 
820 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
821 
822 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
823 }
824 
825 static bool vq_is_tx(u16 idx)
826 {
827 	return idx % 2;
828 }
829 
830 enum {
831 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
832 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
833 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
834 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
835 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
836 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
837 	MLX5_VIRTIO_NET_F_CSUM = 10,
838 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
839 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
840 };
841 
842 static u16 get_features(u64 features)
843 {
844 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
845 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
846 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
847 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
848 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
849 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
850 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
851 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
852 }
853 
854 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
855 {
856 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
857 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
858 }
859 
860 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
861 {
862 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
863 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
864 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
865 }
866 
867 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
868 {
869 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
870 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
871 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
872 	struct mlx5_vdpa_mr *vq_mr;
873 	struct mlx5_vdpa_mr *vq_desc_mr;
874 	void *obj_context;
875 	u16 mlx_features;
876 	void *cmd_hdr;
877 	void *vq_ctx;
878 	void *in;
879 	int err;
880 
881 	err = umems_create(ndev, mvq);
882 	if (err)
883 		return err;
884 
885 	in = kzalloc(inlen, GFP_KERNEL);
886 	if (!in) {
887 		err = -ENOMEM;
888 		goto err_alloc;
889 	}
890 
891 	mlx_features = get_features(ndev->mvdev.actual_features);
892 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
893 
894 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
895 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
896 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
897 
898 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
899 	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
900 	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
901 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
902 		 mlx_features >> 3);
903 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
904 		 mlx_features & 7);
905 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
906 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
907 
908 	if (vq_is_tx(mvq->index))
909 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
910 
911 	if (mvq->map.virq) {
912 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
913 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
914 	} else {
915 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
916 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
917 	}
918 
919 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
920 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
921 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
922 		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
923 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
924 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
925 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
926 	vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
927 	if (vq_mr)
928 		MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
929 
930 	vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
931 	if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
932 		MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
933 
934 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
935 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
936 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
937 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
938 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
939 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
940 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
941 	if (counters_supported(&ndev->mvdev))
942 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
943 
944 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
945 	if (err)
946 		goto err_cmd;
947 
948 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
949 	kfree(in);
950 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
951 
952 	mlx5_vdpa_get_mr(mvdev, vq_mr);
953 	mvq->vq_mr = vq_mr;
954 
955 	if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
956 		mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
957 		mvq->desc_mr = vq_desc_mr;
958 	}
959 
960 	return 0;
961 
962 err_cmd:
963 	kfree(in);
964 err_alloc:
965 	umems_destroy(ndev, mvq);
966 	return err;
967 }
968 
969 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
970 {
971 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
972 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
973 
974 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
975 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
976 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
977 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
978 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
979 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
980 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
981 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
982 		return;
983 	}
984 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
985 	umems_destroy(ndev, mvq);
986 
987 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
988 	mvq->vq_mr = NULL;
989 
990 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
991 	mvq->desc_mr = NULL;
992 }
993 
994 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
995 {
996 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
997 }
998 
999 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1000 {
1001 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1002 }
1003 
1004 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1005 			int *outlen, u32 qpn, u32 rqpn)
1006 {
1007 	void *qpc;
1008 	void *pp;
1009 
1010 	switch (cmd) {
1011 	case MLX5_CMD_OP_2RST_QP:
1012 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1013 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1014 		*in = kzalloc(*inlen, GFP_KERNEL);
1015 		*out = kzalloc(*outlen, GFP_KERNEL);
1016 		if (!*in || !*out)
1017 			goto outerr;
1018 
1019 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1020 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1021 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1022 		break;
1023 	case MLX5_CMD_OP_RST2INIT_QP:
1024 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1025 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1026 		*in = kzalloc(*inlen, GFP_KERNEL);
1027 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1028 		if (!*in || !*out)
1029 			goto outerr;
1030 
1031 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1032 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1033 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1034 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1035 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1036 		MLX5_SET(qpc, qpc, rwe, 1);
1037 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1038 		MLX5_SET(ads, pp, vhca_port_num, 1);
1039 		break;
1040 	case MLX5_CMD_OP_INIT2RTR_QP:
1041 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1042 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1043 		*in = kzalloc(*inlen, GFP_KERNEL);
1044 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1045 		if (!*in || !*out)
1046 			goto outerr;
1047 
1048 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1049 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1050 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1051 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1052 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1053 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1054 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1055 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1056 		MLX5_SET(ads, pp, fl, 1);
1057 		break;
1058 	case MLX5_CMD_OP_RTR2RTS_QP:
1059 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1060 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1061 		*in = kzalloc(*inlen, GFP_KERNEL);
1062 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1063 		if (!*in || !*out)
1064 			goto outerr;
1065 
1066 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1067 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1068 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1069 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1070 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1071 		MLX5_SET(ads, pp, ack_timeout, 14);
1072 		MLX5_SET(qpc, qpc, retry_count, 7);
1073 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1074 		break;
1075 	default:
1076 		goto outerr_nullify;
1077 	}
1078 
1079 	return;
1080 
1081 outerr:
1082 	kfree(*in);
1083 	kfree(*out);
1084 outerr_nullify:
1085 	*in = NULL;
1086 	*out = NULL;
1087 }
1088 
1089 static void free_inout(void *in, void *out)
1090 {
1091 	kfree(in);
1092 	kfree(out);
1093 }
1094 
1095 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1096  * firmware. The fw argument indicates whether the subjected QP is the one used
1097  * by firmware.
1098  */
1099 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1100 {
1101 	int outlen;
1102 	int inlen;
1103 	void *out;
1104 	void *in;
1105 	int err;
1106 
1107 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1108 	if (!in || !out)
1109 		return -ENOMEM;
1110 
1111 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1112 	free_inout(in, out);
1113 	return err;
1114 }
1115 
1116 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1117 {
1118 	int err;
1119 
1120 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1121 	if (err)
1122 		return err;
1123 
1124 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1125 	if (err)
1126 		return err;
1127 
1128 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1129 	if (err)
1130 		return err;
1131 
1132 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1133 	if (err)
1134 		return err;
1135 
1136 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1137 	if (err)
1138 		return err;
1139 
1140 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1141 	if (err)
1142 		return err;
1143 
1144 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1145 }
1146 
1147 struct mlx5_virtq_attr {
1148 	u8 state;
1149 	u16 available_index;
1150 	u16 used_index;
1151 };
1152 
1153 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1154 			   struct mlx5_virtq_attr *attr)
1155 {
1156 	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1157 	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1158 	void *out;
1159 	void *obj_context;
1160 	void *cmd_hdr;
1161 	int err;
1162 
1163 	out = kzalloc(outlen, GFP_KERNEL);
1164 	if (!out)
1165 		return -ENOMEM;
1166 
1167 	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1168 
1169 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1170 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1171 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1172 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1173 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1174 	if (err)
1175 		goto err_cmd;
1176 
1177 	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1178 	memset(attr, 0, sizeof(*attr));
1179 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1180 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1181 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1182 	kfree(out);
1183 	return 0;
1184 
1185 err_cmd:
1186 	kfree(out);
1187 	return err;
1188 }
1189 
1190 static bool is_resumable(struct mlx5_vdpa_net *ndev)
1191 {
1192 	return ndev->mvdev.vdev.config->resume;
1193 }
1194 
1195 static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1196 {
1197 	switch (oldstate) {
1198 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1199 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1200 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1201 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1202 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1203 		return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1204 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1205 	default:
1206 		return false;
1207 	}
1208 }
1209 
1210 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1211 {
1212 	/* Only state is always modifiable */
1213 	if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1214 		return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1215 		       mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1216 
1217 	return true;
1218 }
1219 
1220 static int modify_virtqueue(struct mlx5_vdpa_net *ndev,
1221 			    struct mlx5_vdpa_virtqueue *mvq,
1222 			    int state)
1223 {
1224 	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1225 	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1226 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1227 	struct mlx5_vdpa_mr *desc_mr = NULL;
1228 	struct mlx5_vdpa_mr *vq_mr = NULL;
1229 	bool state_change = false;
1230 	void *obj_context;
1231 	void *cmd_hdr;
1232 	void *vq_ctx;
1233 	void *in;
1234 	int err;
1235 
1236 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1237 		return 0;
1238 
1239 	if (!modifiable_virtqueue_fields(mvq))
1240 		return -EINVAL;
1241 
1242 	in = kzalloc(inlen, GFP_KERNEL);
1243 	if (!in)
1244 		return -ENOMEM;
1245 
1246 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1247 
1248 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1249 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1250 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1251 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1252 
1253 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1254 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1255 
1256 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) {
1257 		if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1258 			err = -EINVAL;
1259 			goto done;
1260 		}
1261 
1262 		MLX5_SET(virtio_net_q_object, obj_context, state, state);
1263 		state_change = true;
1264 	}
1265 
1266 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1267 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1268 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1269 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1270 	}
1271 
1272 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1273 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1274 
1275 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1276 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1277 
1278 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1279 		vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1280 
1281 		if (vq_mr)
1282 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1283 		else
1284 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1285 	}
1286 
1287 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1288 		desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1289 
1290 		if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1291 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1292 		else
1293 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1294 	}
1295 
1296 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1297 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1298 	if (err)
1299 		goto done;
1300 
1301 	if (state_change)
1302 		mvq->fw_state = state;
1303 
1304 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1305 		mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1306 		mlx5_vdpa_get_mr(mvdev, vq_mr);
1307 		mvq->vq_mr = vq_mr;
1308 	}
1309 
1310 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1311 		mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1312 		mlx5_vdpa_get_mr(mvdev, desc_mr);
1313 		mvq->desc_mr = desc_mr;
1314 	}
1315 
1316 	mvq->modified_fields = 0;
1317 
1318 done:
1319 	kfree(in);
1320 	return err;
1321 }
1322 
1323 static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev,
1324 				  struct mlx5_vdpa_virtqueue *mvq,
1325 				  unsigned int state)
1326 {
1327 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1328 	return modify_virtqueue(ndev, mvq, state);
1329 }
1330 
1331 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1332 {
1333 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1334 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1335 	void *cmd_hdr;
1336 	int err;
1337 
1338 	if (!counters_supported(&ndev->mvdev))
1339 		return 0;
1340 
1341 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1342 
1343 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1344 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1345 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1346 
1347 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1348 	if (err)
1349 		return err;
1350 
1351 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1352 
1353 	return 0;
1354 }
1355 
1356 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1357 {
1358 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1359 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1360 
1361 	if (!counters_supported(&ndev->mvdev))
1362 		return;
1363 
1364 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1365 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1366 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1367 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1368 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1369 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1370 }
1371 
1372 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1373 {
1374 	struct vdpa_callback *cb = priv;
1375 
1376 	if (cb->callback)
1377 		return cb->callback(cb->private);
1378 
1379 	return IRQ_HANDLED;
1380 }
1381 
1382 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1383 			 struct mlx5_vdpa_virtqueue *mvq)
1384 {
1385 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1386 	struct mlx5_vdpa_irq_pool_entry *ent;
1387 	int err;
1388 	int i;
1389 
1390 	for (i = 0; i < irqp->num_ent; i++) {
1391 		ent = &irqp->entries[i];
1392 		if (!ent->used) {
1393 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1394 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1395 			ent->dev_id = &ndev->event_cbs[mvq->index];
1396 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1397 					  ent->name, ent->dev_id);
1398 			if (err)
1399 				return;
1400 
1401 			ent->used = true;
1402 			mvq->map = ent->map;
1403 			return;
1404 		}
1405 	}
1406 }
1407 
1408 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1409 			   struct mlx5_vdpa_virtqueue *mvq)
1410 {
1411 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1412 	int i;
1413 
1414 	for (i = 0; i < irqp->num_ent; i++)
1415 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1416 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1417 			irqp->entries[i].used = false;
1418 			return;
1419 		}
1420 }
1421 
1422 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1423 {
1424 	u16 idx = mvq->index;
1425 	int err;
1426 
1427 	if (!mvq->num_ent)
1428 		return 0;
1429 
1430 	if (mvq->initialized)
1431 		return 0;
1432 
1433 	err = cq_create(ndev, idx, mvq->num_ent);
1434 	if (err)
1435 		return err;
1436 
1437 	err = qp_create(ndev, mvq, &mvq->fwqp);
1438 	if (err)
1439 		goto err_fwqp;
1440 
1441 	err = qp_create(ndev, mvq, &mvq->vqqp);
1442 	if (err)
1443 		goto err_vqqp;
1444 
1445 	err = connect_qps(ndev, mvq);
1446 	if (err)
1447 		goto err_connect;
1448 
1449 	err = counter_set_alloc(ndev, mvq);
1450 	if (err)
1451 		goto err_connect;
1452 
1453 	alloc_vector(ndev, mvq);
1454 	err = create_virtqueue(ndev, mvq);
1455 	if (err)
1456 		goto err_vq;
1457 
1458 	if (mvq->ready) {
1459 		err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1460 		if (err) {
1461 			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1462 				       idx, err);
1463 			goto err_modify;
1464 		}
1465 	}
1466 
1467 	mvq->initialized = true;
1468 	return 0;
1469 
1470 err_modify:
1471 	destroy_virtqueue(ndev, mvq);
1472 err_vq:
1473 	dealloc_vector(ndev, mvq);
1474 	counter_set_dealloc(ndev, mvq);
1475 err_connect:
1476 	qp_destroy(ndev, &mvq->vqqp);
1477 err_vqqp:
1478 	qp_destroy(ndev, &mvq->fwqp);
1479 err_fwqp:
1480 	cq_destroy(ndev, idx);
1481 	return err;
1482 }
1483 
1484 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1485 {
1486 	struct mlx5_virtq_attr attr;
1487 
1488 	if (!mvq->initialized)
1489 		return;
1490 
1491 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1492 		return;
1493 
1494 	if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1495 		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1496 
1497 	if (query_virtqueue(ndev, mvq, &attr)) {
1498 		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1499 		return;
1500 	}
1501 	mvq->avail_idx = attr.available_index;
1502 	mvq->used_idx = attr.used_index;
1503 }
1504 
1505 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1506 {
1507 	int i;
1508 
1509 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1510 		suspend_vq(ndev, &ndev->vqs[i]);
1511 }
1512 
1513 static void resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1514 {
1515 	if (!mvq->initialized || !is_resumable(ndev))
1516 		return;
1517 
1518 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)
1519 		return;
1520 
1521 	if (modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY))
1522 		mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u\n", mvq->index);
1523 }
1524 
1525 static void resume_vqs(struct mlx5_vdpa_net *ndev)
1526 {
1527 	for (int i = 0; i < ndev->mvdev.max_vqs; i++)
1528 		resume_vq(ndev, &ndev->vqs[i]);
1529 }
1530 
1531 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1532 {
1533 	if (!mvq->initialized)
1534 		return;
1535 
1536 	suspend_vq(ndev, mvq);
1537 	mvq->modified_fields = 0;
1538 	destroy_virtqueue(ndev, mvq);
1539 	dealloc_vector(ndev, mvq);
1540 	counter_set_dealloc(ndev, mvq);
1541 	qp_destroy(ndev, &mvq->vqqp);
1542 	qp_destroy(ndev, &mvq->fwqp);
1543 	cq_destroy(ndev, mvq->index);
1544 	mvq->initialized = false;
1545 }
1546 
1547 static int create_rqt(struct mlx5_vdpa_net *ndev)
1548 {
1549 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1550 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1551 	__be32 *list;
1552 	void *rqtc;
1553 	int inlen;
1554 	void *in;
1555 	int i, j;
1556 	int err;
1557 
1558 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1559 	in = kzalloc(inlen, GFP_KERNEL);
1560 	if (!in)
1561 		return -ENOMEM;
1562 
1563 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1564 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1565 
1566 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1567 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1568 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1569 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1570 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1571 
1572 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1573 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1574 	kfree(in);
1575 	if (err)
1576 		return err;
1577 
1578 	return 0;
1579 }
1580 
1581 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1582 
1583 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1584 {
1585 	int act_sz = roundup_pow_of_two(num / 2);
1586 	__be32 *list;
1587 	void *rqtc;
1588 	int inlen;
1589 	void *in;
1590 	int i, j;
1591 	int err;
1592 
1593 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1594 	in = kzalloc(inlen, GFP_KERNEL);
1595 	if (!in)
1596 		return -ENOMEM;
1597 
1598 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1599 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1600 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1601 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1602 
1603 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1604 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1605 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1606 
1607 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1608 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1609 	kfree(in);
1610 	if (err)
1611 		return err;
1612 
1613 	return 0;
1614 }
1615 
1616 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1617 {
1618 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1619 }
1620 
1621 static int create_tir(struct mlx5_vdpa_net *ndev)
1622 {
1623 #define HASH_IP_L4PORTS                                                                            \
1624 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1625 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1626 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1627 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1628 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1629 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1630 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1631 	void *rss_key;
1632 	void *outer;
1633 	void *tirc;
1634 	void *in;
1635 	int err;
1636 
1637 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1638 	if (!in)
1639 		return -ENOMEM;
1640 
1641 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1642 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1643 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1644 
1645 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1646 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1647 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1648 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1649 
1650 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1651 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1652 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1653 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1654 
1655 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1656 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1657 
1658 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1659 	kfree(in);
1660 	if (err)
1661 		return err;
1662 
1663 	mlx5_vdpa_add_tirn(ndev);
1664 	return err;
1665 }
1666 
1667 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1668 {
1669 	mlx5_vdpa_remove_tirn(ndev);
1670 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1671 }
1672 
1673 #define MAX_STEERING_ENT 0x8000
1674 #define MAX_STEERING_GROUPS 2
1675 
1676 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1677        #define NUM_DESTS 2
1678 #else
1679        #define NUM_DESTS 1
1680 #endif
1681 
1682 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1683 				 struct macvlan_node *node,
1684 				 struct mlx5_flow_act *flow_act,
1685 				 struct mlx5_flow_destination *dests)
1686 {
1687 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1688 	int err;
1689 
1690 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1691 	if (IS_ERR(node->ucast_counter.counter))
1692 		return PTR_ERR(node->ucast_counter.counter);
1693 
1694 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1695 	if (IS_ERR(node->mcast_counter.counter)) {
1696 		err = PTR_ERR(node->mcast_counter.counter);
1697 		goto err_mcast_counter;
1698 	}
1699 
1700 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1701 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1702 	return 0;
1703 
1704 err_mcast_counter:
1705 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1706 	return err;
1707 #else
1708 	return 0;
1709 #endif
1710 }
1711 
1712 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1713 				     struct macvlan_node *node)
1714 {
1715 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1716 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1717 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1718 #endif
1719 }
1720 
1721 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1722 					struct macvlan_node *node)
1723 {
1724 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1725 	struct mlx5_flow_act flow_act = {};
1726 	struct mlx5_flow_spec *spec;
1727 	void *headers_c;
1728 	void *headers_v;
1729 	u8 *dmac_c;
1730 	u8 *dmac_v;
1731 	int err;
1732 	u16 vid;
1733 
1734 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1735 	if (!spec)
1736 		return -ENOMEM;
1737 
1738 	vid = key2vid(node->macvlan);
1739 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1740 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1741 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1742 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1743 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1744 	eth_broadcast_addr(dmac_c);
1745 	ether_addr_copy(dmac_v, mac);
1746 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1747 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1748 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1749 	}
1750 	if (node->tagged) {
1751 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1752 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1753 	}
1754 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1755 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1756 	dests[0].tir_num = ndev->res.tirn;
1757 	err = add_steering_counters(ndev, node, &flow_act, dests);
1758 	if (err)
1759 		goto out_free;
1760 
1761 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1762 	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1763 #endif
1764 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1765 	if (IS_ERR(node->ucast_rule)) {
1766 		err = PTR_ERR(node->ucast_rule);
1767 		goto err_ucast;
1768 	}
1769 
1770 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1771 	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1772 #endif
1773 
1774 	memset(dmac_c, 0, ETH_ALEN);
1775 	memset(dmac_v, 0, ETH_ALEN);
1776 	dmac_c[0] = 1;
1777 	dmac_v[0] = 1;
1778 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1779 	if (IS_ERR(node->mcast_rule)) {
1780 		err = PTR_ERR(node->mcast_rule);
1781 		goto err_mcast;
1782 	}
1783 	kvfree(spec);
1784 	mlx5_vdpa_add_rx_counters(ndev, node);
1785 	return 0;
1786 
1787 err_mcast:
1788 	mlx5_del_flow_rules(node->ucast_rule);
1789 err_ucast:
1790 	remove_steering_counters(ndev, node);
1791 out_free:
1792 	kvfree(spec);
1793 	return err;
1794 }
1795 
1796 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1797 					 struct macvlan_node *node)
1798 {
1799 	mlx5_vdpa_remove_rx_counters(ndev, node);
1800 	mlx5_del_flow_rules(node->ucast_rule);
1801 	mlx5_del_flow_rules(node->mcast_rule);
1802 }
1803 
1804 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1805 {
1806 	u64 val;
1807 
1808 	if (!tagged)
1809 		vlan = MLX5V_UNTAGGED;
1810 
1811 	val = (u64)vlan << 48 |
1812 	      (u64)mac[0] << 40 |
1813 	      (u64)mac[1] << 32 |
1814 	      (u64)mac[2] << 24 |
1815 	      (u64)mac[3] << 16 |
1816 	      (u64)mac[4] << 8 |
1817 	      (u64)mac[5];
1818 
1819 	return val;
1820 }
1821 
1822 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1823 {
1824 	struct macvlan_node *pos;
1825 	u32 idx;
1826 
1827 	idx = hash_64(value, 8); // tbd 8
1828 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1829 		if (pos->macvlan == value)
1830 			return pos;
1831 	}
1832 	return NULL;
1833 }
1834 
1835 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1836 {
1837 	struct macvlan_node *ptr;
1838 	u64 val;
1839 	u32 idx;
1840 	int err;
1841 
1842 	val = search_val(mac, vid, tagged);
1843 	if (mac_vlan_lookup(ndev, val))
1844 		return -EEXIST;
1845 
1846 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1847 	if (!ptr)
1848 		return -ENOMEM;
1849 
1850 	ptr->tagged = tagged;
1851 	ptr->macvlan = val;
1852 	ptr->ndev = ndev;
1853 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1854 	if (err)
1855 		goto err_add;
1856 
1857 	idx = hash_64(val, 8);
1858 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1859 	return 0;
1860 
1861 err_add:
1862 	kfree(ptr);
1863 	return err;
1864 }
1865 
1866 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1867 {
1868 	struct macvlan_node *ptr;
1869 
1870 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1871 	if (!ptr)
1872 		return;
1873 
1874 	hlist_del(&ptr->hlist);
1875 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1876 	remove_steering_counters(ndev, ptr);
1877 	kfree(ptr);
1878 }
1879 
1880 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1881 {
1882 	struct macvlan_node *pos;
1883 	struct hlist_node *n;
1884 	int i;
1885 
1886 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1887 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1888 			hlist_del(&pos->hlist);
1889 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1890 			remove_steering_counters(ndev, pos);
1891 			kfree(pos);
1892 		}
1893 	}
1894 }
1895 
1896 static int setup_steering(struct mlx5_vdpa_net *ndev)
1897 {
1898 	struct mlx5_flow_table_attr ft_attr = {};
1899 	struct mlx5_flow_namespace *ns;
1900 	int err;
1901 
1902 	ft_attr.max_fte = MAX_STEERING_ENT;
1903 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1904 
1905 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1906 	if (!ns) {
1907 		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1908 		return -EOPNOTSUPP;
1909 	}
1910 
1911 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1912 	if (IS_ERR(ndev->rxft)) {
1913 		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1914 		return PTR_ERR(ndev->rxft);
1915 	}
1916 	mlx5_vdpa_add_rx_flow_table(ndev);
1917 
1918 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1919 	if (err)
1920 		goto err_add;
1921 
1922 	return 0;
1923 
1924 err_add:
1925 	mlx5_vdpa_remove_rx_flow_table(ndev);
1926 	mlx5_destroy_flow_table(ndev->rxft);
1927 	return err;
1928 }
1929 
1930 static void teardown_steering(struct mlx5_vdpa_net *ndev)
1931 {
1932 	clear_mac_vlan_table(ndev);
1933 	mlx5_vdpa_remove_rx_flow_table(ndev);
1934 	mlx5_destroy_flow_table(ndev->rxft);
1935 }
1936 
1937 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1938 {
1939 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1940 	struct mlx5_control_vq *cvq = &mvdev->cvq;
1941 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1942 	struct mlx5_core_dev *pfmdev;
1943 	size_t read;
1944 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1945 
1946 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1947 	switch (cmd) {
1948 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1949 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1950 		if (read != ETH_ALEN)
1951 			break;
1952 
1953 		if (!memcmp(ndev->config.mac, mac, 6)) {
1954 			status = VIRTIO_NET_OK;
1955 			break;
1956 		}
1957 
1958 		if (is_zero_ether_addr(mac))
1959 			break;
1960 
1961 		if (!is_zero_ether_addr(ndev->config.mac)) {
1962 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1963 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1964 					       ndev->config.mac);
1965 				break;
1966 			}
1967 		}
1968 
1969 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1970 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1971 				       mac);
1972 			break;
1973 		}
1974 
1975 		/* backup the original mac address so that if failed to add the forward rules
1976 		 * we could restore it
1977 		 */
1978 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1979 
1980 		memcpy(ndev->config.mac, mac, ETH_ALEN);
1981 
1982 		/* Need recreate the flow table entry, so that the packet could forward back
1983 		 */
1984 		mac_vlan_del(ndev, mac_back, 0, false);
1985 
1986 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1987 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1988 
1989 			/* Although it hardly run here, we still need double check */
1990 			if (is_zero_ether_addr(mac_back)) {
1991 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1992 				break;
1993 			}
1994 
1995 			/* Try to restore original mac address to MFPS table, and try to restore
1996 			 * the forward rule entry.
1997 			 */
1998 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1999 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
2000 					       ndev->config.mac);
2001 			}
2002 
2003 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
2004 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2005 					       mac_back);
2006 			}
2007 
2008 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
2009 
2010 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2011 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2012 
2013 			break;
2014 		}
2015 
2016 		status = VIRTIO_NET_OK;
2017 		break;
2018 
2019 	default:
2020 		break;
2021 	}
2022 
2023 	return status;
2024 }
2025 
2026 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2027 {
2028 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2029 	int cur_qps = ndev->cur_num_vqs / 2;
2030 	int err;
2031 	int i;
2032 
2033 	if (cur_qps > newqps) {
2034 		err = modify_rqt(ndev, 2 * newqps);
2035 		if (err)
2036 			return err;
2037 
2038 		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
2039 			teardown_vq(ndev, &ndev->vqs[i]);
2040 
2041 		ndev->cur_num_vqs = 2 * newqps;
2042 	} else {
2043 		ndev->cur_num_vqs = 2 * newqps;
2044 		for (i = cur_qps * 2; i < 2 * newqps; i++) {
2045 			err = setup_vq(ndev, &ndev->vqs[i]);
2046 			if (err)
2047 				goto clean_added;
2048 		}
2049 		err = modify_rqt(ndev, 2 * newqps);
2050 		if (err)
2051 			goto clean_added;
2052 	}
2053 	return 0;
2054 
2055 clean_added:
2056 	for (--i; i >= 2 * cur_qps; --i)
2057 		teardown_vq(ndev, &ndev->vqs[i]);
2058 
2059 	ndev->cur_num_vqs = 2 * cur_qps;
2060 
2061 	return err;
2062 }
2063 
2064 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2065 {
2066 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2067 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2068 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2069 	struct virtio_net_ctrl_mq mq;
2070 	size_t read;
2071 	u16 newqps;
2072 
2073 	switch (cmd) {
2074 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2075 		/* This mq feature check aligns with pre-existing userspace
2076 		 * implementation.
2077 		 *
2078 		 * Without it, an untrusted driver could fake a multiqueue config
2079 		 * request down to a non-mq device that may cause kernel to
2080 		 * panic due to uninitialized resources for extra vqs. Even with
2081 		 * a well behaving guest driver, it is not expected to allow
2082 		 * changing the number of vqs on a non-mq device.
2083 		 */
2084 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2085 			break;
2086 
2087 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2088 		if (read != sizeof(mq))
2089 			break;
2090 
2091 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2092 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2093 		    newqps > ndev->rqt_size)
2094 			break;
2095 
2096 		if (ndev->cur_num_vqs == 2 * newqps) {
2097 			status = VIRTIO_NET_OK;
2098 			break;
2099 		}
2100 
2101 		if (!change_num_qps(mvdev, newqps))
2102 			status = VIRTIO_NET_OK;
2103 
2104 		break;
2105 	default:
2106 		break;
2107 	}
2108 
2109 	return status;
2110 }
2111 
2112 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2113 {
2114 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2115 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2116 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2117 	__virtio16 vlan;
2118 	size_t read;
2119 	u16 id;
2120 
2121 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2122 		return status;
2123 
2124 	switch (cmd) {
2125 	case VIRTIO_NET_CTRL_VLAN_ADD:
2126 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2127 		if (read != sizeof(vlan))
2128 			break;
2129 
2130 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2131 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2132 			break;
2133 
2134 		status = VIRTIO_NET_OK;
2135 		break;
2136 	case VIRTIO_NET_CTRL_VLAN_DEL:
2137 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2138 		if (read != sizeof(vlan))
2139 			break;
2140 
2141 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2142 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2143 		status = VIRTIO_NET_OK;
2144 		break;
2145 	default:
2146 		break;
2147 	}
2148 
2149 	return status;
2150 }
2151 
2152 static void mlx5_cvq_kick_handler(struct work_struct *work)
2153 {
2154 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2155 	struct virtio_net_ctrl_hdr ctrl;
2156 	struct mlx5_vdpa_wq_ent *wqent;
2157 	struct mlx5_vdpa_dev *mvdev;
2158 	struct mlx5_control_vq *cvq;
2159 	struct mlx5_vdpa_net *ndev;
2160 	size_t read, write;
2161 	int err;
2162 
2163 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2164 	mvdev = wqent->mvdev;
2165 	ndev = to_mlx5_vdpa_ndev(mvdev);
2166 	cvq = &mvdev->cvq;
2167 
2168 	down_write(&ndev->reslock);
2169 
2170 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2171 		goto out;
2172 
2173 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2174 		goto out;
2175 
2176 	if (!cvq->ready)
2177 		goto out;
2178 
2179 	while (true) {
2180 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2181 					   GFP_ATOMIC);
2182 		if (err <= 0)
2183 			break;
2184 
2185 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2186 		if (read != sizeof(ctrl))
2187 			break;
2188 
2189 		cvq->received_desc++;
2190 		switch (ctrl.class) {
2191 		case VIRTIO_NET_CTRL_MAC:
2192 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2193 			break;
2194 		case VIRTIO_NET_CTRL_MQ:
2195 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2196 			break;
2197 		case VIRTIO_NET_CTRL_VLAN:
2198 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2199 			break;
2200 		default:
2201 			break;
2202 		}
2203 
2204 		/* Make sure data is written before advancing index */
2205 		smp_wmb();
2206 
2207 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2208 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2209 		vringh_kiov_cleanup(&cvq->riov);
2210 		vringh_kiov_cleanup(&cvq->wiov);
2211 
2212 		if (vringh_need_notify_iotlb(&cvq->vring))
2213 			vringh_notify(&cvq->vring);
2214 
2215 		cvq->completed_desc++;
2216 		queue_work(mvdev->wq, &wqent->work);
2217 		break;
2218 	}
2219 
2220 out:
2221 	up_write(&ndev->reslock);
2222 }
2223 
2224 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2225 {
2226 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2227 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2228 	struct mlx5_vdpa_virtqueue *mvq;
2229 
2230 	if (!is_index_valid(mvdev, idx))
2231 		return;
2232 
2233 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2234 		if (!mvdev->wq || !mvdev->cvq.ready)
2235 			return;
2236 
2237 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2238 		return;
2239 	}
2240 
2241 	mvq = &ndev->vqs[idx];
2242 	if (unlikely(!mvq->ready))
2243 		return;
2244 
2245 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2246 }
2247 
2248 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2249 				    u64 driver_area, u64 device_area)
2250 {
2251 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2252 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2253 	struct mlx5_vdpa_virtqueue *mvq;
2254 
2255 	if (!is_index_valid(mvdev, idx))
2256 		return -EINVAL;
2257 
2258 	if (is_ctrl_vq_idx(mvdev, idx)) {
2259 		mvdev->cvq.desc_addr = desc_area;
2260 		mvdev->cvq.device_addr = device_area;
2261 		mvdev->cvq.driver_addr = driver_area;
2262 		return 0;
2263 	}
2264 
2265 	mvq = &ndev->vqs[idx];
2266 	mvq->desc_addr = desc_area;
2267 	mvq->device_addr = device_area;
2268 	mvq->driver_addr = driver_area;
2269 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2270 	return 0;
2271 }
2272 
2273 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2274 {
2275 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2276 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2277 	struct mlx5_vdpa_virtqueue *mvq;
2278 
2279 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
2280 		return;
2281 
2282 	mvq = &ndev->vqs[idx];
2283 	mvq->num_ent = num;
2284 }
2285 
2286 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2287 {
2288 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2289 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2290 
2291 	ndev->event_cbs[idx] = *cb;
2292 	if (is_ctrl_vq_idx(mvdev, idx))
2293 		mvdev->cvq.event_cb = *cb;
2294 }
2295 
2296 static void mlx5_cvq_notify(struct vringh *vring)
2297 {
2298 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2299 
2300 	if (!cvq->event_cb.callback)
2301 		return;
2302 
2303 	cvq->event_cb.callback(cvq->event_cb.private);
2304 }
2305 
2306 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2307 {
2308 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2309 
2310 	cvq->ready = ready;
2311 	if (!ready)
2312 		return;
2313 
2314 	cvq->vring.notify = mlx5_cvq_notify;
2315 }
2316 
2317 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2318 {
2319 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2320 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2321 	struct mlx5_vdpa_virtqueue *mvq;
2322 	int err;
2323 
2324 	if (!mvdev->actual_features)
2325 		return;
2326 
2327 	if (!is_index_valid(mvdev, idx))
2328 		return;
2329 
2330 	if (is_ctrl_vq_idx(mvdev, idx)) {
2331 		set_cvq_ready(mvdev, ready);
2332 		return;
2333 	}
2334 
2335 	mvq = &ndev->vqs[idx];
2336 	if (!ready) {
2337 		suspend_vq(ndev, mvq);
2338 	} else {
2339 		err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2340 		if (err) {
2341 			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2342 			ready = false;
2343 		}
2344 	}
2345 
2346 
2347 	mvq->ready = ready;
2348 }
2349 
2350 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2351 {
2352 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2353 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2354 
2355 	if (!is_index_valid(mvdev, idx))
2356 		return false;
2357 
2358 	if (is_ctrl_vq_idx(mvdev, idx))
2359 		return mvdev->cvq.ready;
2360 
2361 	return ndev->vqs[idx].ready;
2362 }
2363 
2364 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2365 				  const struct vdpa_vq_state *state)
2366 {
2367 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2368 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2369 	struct mlx5_vdpa_virtqueue *mvq;
2370 
2371 	if (!is_index_valid(mvdev, idx))
2372 		return -EINVAL;
2373 
2374 	if (is_ctrl_vq_idx(mvdev, idx)) {
2375 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2376 		return 0;
2377 	}
2378 
2379 	mvq = &ndev->vqs[idx];
2380 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2381 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2382 		return -EINVAL;
2383 	}
2384 
2385 	mvq->used_idx = state->split.avail_index;
2386 	mvq->avail_idx = state->split.avail_index;
2387 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2388 				MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2389 	return 0;
2390 }
2391 
2392 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2393 {
2394 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2395 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2396 	struct mlx5_vdpa_virtqueue *mvq;
2397 	struct mlx5_virtq_attr attr;
2398 	int err;
2399 
2400 	if (!is_index_valid(mvdev, idx))
2401 		return -EINVAL;
2402 
2403 	if (is_ctrl_vq_idx(mvdev, idx)) {
2404 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2405 		return 0;
2406 	}
2407 
2408 	mvq = &ndev->vqs[idx];
2409 	/* If the virtq object was destroyed, use the value saved at
2410 	 * the last minute of suspend_vq. This caters for userspace
2411 	 * that cares about emulating the index after vq is stopped.
2412 	 */
2413 	if (!mvq->initialized) {
2414 		/* Firmware returns a wrong value for the available index.
2415 		 * Since both values should be identical, we take the value of
2416 		 * used_idx which is reported correctly.
2417 		 */
2418 		state->split.avail_index = mvq->used_idx;
2419 		return 0;
2420 	}
2421 
2422 	err = query_virtqueue(ndev, mvq, &attr);
2423 	if (err) {
2424 		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2425 		return err;
2426 	}
2427 	state->split.avail_index = attr.used_index;
2428 	return 0;
2429 }
2430 
2431 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2432 {
2433 	return PAGE_SIZE;
2434 }
2435 
2436 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2437 {
2438 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2439 
2440 	if (is_ctrl_vq_idx(mvdev, idx))
2441 		return MLX5_VDPA_CVQ_GROUP;
2442 
2443 	return MLX5_VDPA_DATAVQ_GROUP;
2444 }
2445 
2446 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2447 {
2448 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2449 
2450 	if (is_ctrl_vq_idx(mvdev, idx))
2451 		return MLX5_VDPA_CVQ_GROUP;
2452 
2453 	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2454 }
2455 
2456 static u64 mlx_to_vritio_features(u16 dev_features)
2457 {
2458 	u64 result = 0;
2459 
2460 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2461 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2462 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2463 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2464 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2465 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2466 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2467 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2468 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2469 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2470 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2471 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2472 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2473 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2474 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2475 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2476 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2477 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2478 
2479 	return result;
2480 }
2481 
2482 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2483 {
2484 	u64 mlx_vdpa_features = 0;
2485 	u16 dev_features;
2486 
2487 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2488 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2489 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2490 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2491 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2492 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2493 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2494 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2495 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2496 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2497 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2498 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2499 
2500 	return mlx_vdpa_features;
2501 }
2502 
2503 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2504 {
2505 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2506 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2507 
2508 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2509 	return ndev->mvdev.mlx_features;
2510 }
2511 
2512 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2513 {
2514 	/* Minimum features to expect */
2515 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2516 		return -EOPNOTSUPP;
2517 
2518 	/* Double check features combination sent down by the driver.
2519 	 * Fail invalid features due to absence of the depended feature.
2520 	 *
2521 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2522 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2523 	 * By failing the invalid features sent down by untrusted drivers,
2524 	 * we're assured the assumption made upon is_index_valid() and
2525 	 * is_ctrl_vq_idx() will not be compromised.
2526 	 */
2527 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2528             BIT_ULL(VIRTIO_NET_F_MQ))
2529 		return -EINVAL;
2530 
2531 	return 0;
2532 }
2533 
2534 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2535 {
2536 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2537 	int err;
2538 	int i;
2539 
2540 	for (i = 0; i < mvdev->max_vqs; i++) {
2541 		err = setup_vq(ndev, &ndev->vqs[i]);
2542 		if (err)
2543 			goto err_vq;
2544 	}
2545 
2546 	return 0;
2547 
2548 err_vq:
2549 	for (--i; i >= 0; i--)
2550 		teardown_vq(ndev, &ndev->vqs[i]);
2551 
2552 	return err;
2553 }
2554 
2555 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2556 {
2557 	struct mlx5_vdpa_virtqueue *mvq;
2558 	int i;
2559 
2560 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2561 		mvq = &ndev->vqs[i];
2562 		if (!mvq->initialized)
2563 			continue;
2564 
2565 		teardown_vq(ndev, mvq);
2566 	}
2567 }
2568 
2569 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2570 {
2571 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2572 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2573 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2574 			mvdev->max_idx = mvdev->max_vqs;
2575 		} else {
2576 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2577 			 * CVQ gets index 2
2578 			 */
2579 			mvdev->max_idx = 2;
2580 		}
2581 	} else {
2582 		/* Two data virtqueues only: one for rx and one for tx */
2583 		mvdev->max_idx = 1;
2584 	}
2585 }
2586 
2587 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2588 {
2589 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2590 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2591 	int err;
2592 
2593 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2594 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2595 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2596 	if (vport)
2597 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2598 
2599 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2600 	if (err)
2601 		return 0;
2602 
2603 	return MLX5_GET(query_vport_state_out, out, state);
2604 }
2605 
2606 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2607 {
2608 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2609 	    VPORT_STATE_UP)
2610 		return true;
2611 
2612 	return false;
2613 }
2614 
2615 static void update_carrier(struct work_struct *work)
2616 {
2617 	struct mlx5_vdpa_wq_ent *wqent;
2618 	struct mlx5_vdpa_dev *mvdev;
2619 	struct mlx5_vdpa_net *ndev;
2620 
2621 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2622 	mvdev = wqent->mvdev;
2623 	ndev = to_mlx5_vdpa_ndev(mvdev);
2624 	if (get_link_state(mvdev))
2625 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2626 	else
2627 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2628 
2629 	if (ndev->config_cb.callback)
2630 		ndev->config_cb.callback(ndev->config_cb.private);
2631 
2632 	kfree(wqent);
2633 }
2634 
2635 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2636 {
2637 	struct mlx5_vdpa_wq_ent *wqent;
2638 
2639 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2640 	if (!wqent)
2641 		return -ENOMEM;
2642 
2643 	wqent->mvdev = &ndev->mvdev;
2644 	INIT_WORK(&wqent->work, update_carrier);
2645 	queue_work(ndev->mvdev.wq, &wqent->work);
2646 	return 0;
2647 }
2648 
2649 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2650 {
2651 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2652 	struct mlx5_eqe *eqe = param;
2653 	int ret = NOTIFY_DONE;
2654 
2655 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2656 		switch (eqe->sub_type) {
2657 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2658 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2659 			if (queue_link_work(ndev))
2660 				return NOTIFY_DONE;
2661 
2662 			ret = NOTIFY_OK;
2663 			break;
2664 		default:
2665 			return NOTIFY_DONE;
2666 		}
2667 		return ret;
2668 	}
2669 	return ret;
2670 }
2671 
2672 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2673 {
2674 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2675 		return;
2676 
2677 	ndev->nb.notifier_call = event_handler;
2678 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2679 	ndev->nb_registered = true;
2680 	queue_link_work(ndev);
2681 }
2682 
2683 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2684 {
2685 	if (!ndev->nb_registered)
2686 		return;
2687 
2688 	ndev->nb_registered = false;
2689 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2690 	if (ndev->mvdev.wq)
2691 		flush_workqueue(ndev->mvdev.wq);
2692 }
2693 
2694 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2695 {
2696 	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2697 }
2698 
2699 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2700 {
2701 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2702 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2703 	int err;
2704 
2705 	print_features(mvdev, features, true);
2706 
2707 	err = verify_driver_features(mvdev, features);
2708 	if (err)
2709 		return err;
2710 
2711 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2712 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2713 		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2714 	else
2715 		ndev->rqt_size = 1;
2716 
2717 	/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2718 	 * 5.1.6.5.5 "Device operation in multiqueue mode":
2719 	 *
2720 	 * Multiqueue is disabled by default.
2721 	 * The driver enables multiqueue by sending a command using class
2722 	 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2723 	 * operation, as follows: ...
2724 	 */
2725 	ndev->cur_num_vqs = 2;
2726 
2727 	update_cvq_info(mvdev);
2728 	return err;
2729 }
2730 
2731 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2732 {
2733 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2734 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2735 
2736 	ndev->config_cb = *cb;
2737 }
2738 
2739 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
2740 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2741 {
2742 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2743 }
2744 
2745 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2746 {
2747 	return VIRTIO_ID_NET;
2748 }
2749 
2750 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2751 {
2752 	return PCI_VENDOR_ID_MELLANOX;
2753 }
2754 
2755 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2756 {
2757 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2758 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2759 
2760 	print_status(mvdev, ndev->mvdev.status, false);
2761 	return ndev->mvdev.status;
2762 }
2763 
2764 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2765 {
2766 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2767 	struct mlx5_virtq_attr attr = {};
2768 	int err;
2769 
2770 	if (mvq->initialized) {
2771 		err = query_virtqueue(ndev, mvq, &attr);
2772 		if (err)
2773 			return err;
2774 	}
2775 
2776 	ri->avail_index = attr.available_index;
2777 	ri->used_index = attr.used_index;
2778 	ri->ready = mvq->ready;
2779 	ri->num_ent = mvq->num_ent;
2780 	ri->desc_addr = mvq->desc_addr;
2781 	ri->device_addr = mvq->device_addr;
2782 	ri->driver_addr = mvq->driver_addr;
2783 	ri->map = mvq->map;
2784 	ri->restore = true;
2785 	return 0;
2786 }
2787 
2788 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2789 {
2790 	int i;
2791 
2792 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2793 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2794 		save_channel_info(ndev, &ndev->vqs[i]);
2795 	}
2796 	return 0;
2797 }
2798 
2799 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2800 {
2801 	int i;
2802 
2803 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2804 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2805 }
2806 
2807 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2808 {
2809 	struct mlx5_vdpa_virtqueue *mvq;
2810 	struct mlx5_vq_restore_info *ri;
2811 	int i;
2812 
2813 	mlx5_clear_vqs(ndev);
2814 	init_mvqs(ndev);
2815 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2816 		mvq = &ndev->vqs[i];
2817 		ri = &mvq->ri;
2818 		if (!ri->restore)
2819 			continue;
2820 
2821 		mvq->avail_idx = ri->avail_index;
2822 		mvq->used_idx = ri->used_index;
2823 		mvq->ready = ri->ready;
2824 		mvq->num_ent = ri->num_ent;
2825 		mvq->desc_addr = ri->desc_addr;
2826 		mvq->device_addr = ri->device_addr;
2827 		mvq->driver_addr = ri->driver_addr;
2828 		mvq->map = ri->map;
2829 	}
2830 }
2831 
2832 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2833 				struct mlx5_vdpa_mr *new_mr,
2834 				unsigned int asid)
2835 {
2836 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2837 	bool teardown = !is_resumable(ndev);
2838 	int err;
2839 
2840 	suspend_vqs(ndev);
2841 	if (teardown) {
2842 		err = save_channels_info(ndev);
2843 		if (err)
2844 			return err;
2845 
2846 		teardown_driver(ndev);
2847 	}
2848 
2849 	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
2850 
2851 	for (int i = 0; i < ndev->cur_num_vqs; i++)
2852 		ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
2853 						MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
2854 
2855 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2856 		return 0;
2857 
2858 	if (teardown) {
2859 		restore_channels_info(ndev);
2860 		err = setup_driver(mvdev);
2861 		if (err)
2862 			return err;
2863 	}
2864 
2865 	resume_vqs(ndev);
2866 
2867 	return 0;
2868 }
2869 
2870 /* reslock must be held for this function */
2871 static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2872 {
2873 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2874 	int err;
2875 
2876 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2877 
2878 	if (ndev->setup) {
2879 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2880 		err = 0;
2881 		goto out;
2882 	}
2883 	mlx5_vdpa_add_debugfs(ndev);
2884 
2885 	err = read_umem_params(ndev);
2886 	if (err)
2887 		goto err_setup;
2888 
2889 	err = setup_virtqueues(mvdev);
2890 	if (err) {
2891 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2892 		goto err_setup;
2893 	}
2894 
2895 	err = create_rqt(ndev);
2896 	if (err) {
2897 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2898 		goto err_rqt;
2899 	}
2900 
2901 	err = create_tir(ndev);
2902 	if (err) {
2903 		mlx5_vdpa_warn(mvdev, "create_tir\n");
2904 		goto err_tir;
2905 	}
2906 
2907 	err = setup_steering(ndev);
2908 	if (err) {
2909 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2910 		goto err_fwd;
2911 	}
2912 	ndev->setup = true;
2913 
2914 	return 0;
2915 
2916 err_fwd:
2917 	destroy_tir(ndev);
2918 err_tir:
2919 	destroy_rqt(ndev);
2920 err_rqt:
2921 	teardown_virtqueues(ndev);
2922 err_setup:
2923 	mlx5_vdpa_remove_debugfs(ndev);
2924 out:
2925 	return err;
2926 }
2927 
2928 /* reslock must be held for this function */
2929 static void teardown_driver(struct mlx5_vdpa_net *ndev)
2930 {
2931 
2932 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2933 
2934 	if (!ndev->setup)
2935 		return;
2936 
2937 	mlx5_vdpa_remove_debugfs(ndev);
2938 	teardown_steering(ndev);
2939 	destroy_tir(ndev);
2940 	destroy_rqt(ndev);
2941 	teardown_virtqueues(ndev);
2942 	ndev->setup = false;
2943 }
2944 
2945 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2946 {
2947 	int i;
2948 
2949 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2950 		ndev->vqs[i].ready = false;
2951 		ndev->vqs[i].modified_fields = 0;
2952 	}
2953 
2954 	ndev->mvdev.cvq.ready = false;
2955 }
2956 
2957 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2958 {
2959 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2960 	int err = 0;
2961 
2962 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2963 		u16 idx = cvq->vring.last_avail_idx;
2964 
2965 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2966 					MLX5_CVQ_MAX_ENT, false,
2967 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2968 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2969 					(struct vring_used *)(uintptr_t)cvq->device_addr);
2970 
2971 		if (!err)
2972 			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2973 	}
2974 	return err;
2975 }
2976 
2977 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2978 {
2979 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2980 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2981 	int err;
2982 
2983 	print_status(mvdev, status, true);
2984 
2985 	down_write(&ndev->reslock);
2986 
2987 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2988 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2989 			err = setup_cvq_vring(mvdev);
2990 			if (err) {
2991 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2992 				goto err_setup;
2993 			}
2994 			register_link_notifier(ndev);
2995 			err = setup_driver(mvdev);
2996 			if (err) {
2997 				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2998 				goto err_driver;
2999 			}
3000 		} else {
3001 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3002 			goto err_clear;
3003 		}
3004 	}
3005 
3006 	ndev->mvdev.status = status;
3007 	up_write(&ndev->reslock);
3008 	return;
3009 
3010 err_driver:
3011 	unregister_link_notifier(ndev);
3012 err_setup:
3013 	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3014 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3015 err_clear:
3016 	up_write(&ndev->reslock);
3017 }
3018 
3019 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3020 {
3021 	int i;
3022 
3023 	/* default mapping all groups are mapped to asid 0 */
3024 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3025 		mvdev->group2asid[i] = 0;
3026 }
3027 
3028 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3029 {
3030 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3031 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3032 
3033 	print_status(mvdev, 0, true);
3034 	mlx5_vdpa_info(mvdev, "performing device reset\n");
3035 
3036 	down_write(&ndev->reslock);
3037 	unregister_link_notifier(ndev);
3038 	teardown_driver(ndev);
3039 	clear_vqs_ready(ndev);
3040 	if (flags & VDPA_RESET_F_CLEAN_MAP)
3041 		mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3042 	ndev->mvdev.status = 0;
3043 	ndev->mvdev.suspended = false;
3044 	ndev->cur_num_vqs = 0;
3045 	ndev->mvdev.cvq.received_desc = 0;
3046 	ndev->mvdev.cvq.completed_desc = 0;
3047 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3048 	ndev->mvdev.actual_features = 0;
3049 	init_group_to_asid_map(mvdev);
3050 	++mvdev->generation;
3051 
3052 	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3053 	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3054 		if (mlx5_vdpa_create_dma_mr(mvdev))
3055 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
3056 	}
3057 	up_write(&ndev->reslock);
3058 
3059 	return 0;
3060 }
3061 
3062 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3063 {
3064 	return mlx5_vdpa_compat_reset(vdev, 0);
3065 }
3066 
3067 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3068 {
3069 	return sizeof(struct virtio_net_config);
3070 }
3071 
3072 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3073 				 unsigned int len)
3074 {
3075 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3076 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3077 
3078 	if (offset + len <= sizeof(struct virtio_net_config))
3079 		memcpy(buf, (u8 *)&ndev->config + offset, len);
3080 }
3081 
3082 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3083 				 unsigned int len)
3084 {
3085 	/* not supported */
3086 }
3087 
3088 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3089 {
3090 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3091 
3092 	return mvdev->generation;
3093 }
3094 
3095 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3096 			unsigned int asid)
3097 {
3098 	struct mlx5_vdpa_mr *new_mr;
3099 	int err;
3100 
3101 	if (asid >= MLX5_VDPA_NUM_AS)
3102 		return -EINVAL;
3103 
3104 	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3105 		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3106 		if (IS_ERR(new_mr)) {
3107 			err = PTR_ERR(new_mr);
3108 			mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err);
3109 			return err;
3110 		}
3111 	} else {
3112 		/* Empty iotlbs don't have an mr but will clear the previous mr. */
3113 		new_mr = NULL;
3114 	}
3115 
3116 	if (!mvdev->mr[asid]) {
3117 		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3118 	} else {
3119 		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3120 		if (err) {
3121 			mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err);
3122 			goto out_err;
3123 		}
3124 	}
3125 
3126 	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3127 
3128 out_err:
3129 	mlx5_vdpa_put_mr(mvdev, new_mr);
3130 	return err;
3131 }
3132 
3133 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3134 			     struct vhost_iotlb *iotlb)
3135 {
3136 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3137 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3138 	int err = -EINVAL;
3139 
3140 	down_write(&ndev->reslock);
3141 	err = set_map_data(mvdev, iotlb, asid);
3142 	up_write(&ndev->reslock);
3143 	return err;
3144 }
3145 
3146 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3147 {
3148 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3149 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3150 	int err;
3151 
3152 	down_write(&ndev->reslock);
3153 	err = mlx5_vdpa_reset_mr(mvdev, asid);
3154 	up_write(&ndev->reslock);
3155 	return err;
3156 }
3157 
3158 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3159 {
3160 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3161 
3162 	if (is_ctrl_vq_idx(mvdev, idx))
3163 		return &vdev->dev;
3164 
3165 	return mvdev->vdev.dma_dev;
3166 }
3167 
3168 static void free_irqs(struct mlx5_vdpa_net *ndev)
3169 {
3170 	struct mlx5_vdpa_irq_pool_entry *ent;
3171 	int i;
3172 
3173 	if (!msix_mode_supported(&ndev->mvdev))
3174 		return;
3175 
3176 	if (!ndev->irqp.entries)
3177 		return;
3178 
3179 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3180 		ent = ndev->irqp.entries + i;
3181 		if (ent->map.virq)
3182 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3183 	}
3184 	kfree(ndev->irqp.entries);
3185 }
3186 
3187 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3188 {
3189 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3190 	struct mlx5_core_dev *pfmdev;
3191 	struct mlx5_vdpa_net *ndev;
3192 
3193 	ndev = to_mlx5_vdpa_ndev(mvdev);
3194 
3195 	free_resources(ndev);
3196 	mlx5_vdpa_destroy_mr_resources(mvdev);
3197 	if (!is_zero_ether_addr(ndev->config.mac)) {
3198 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3199 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3200 	}
3201 	mlx5_vdpa_free_resources(&ndev->mvdev);
3202 	free_irqs(ndev);
3203 	kfree(ndev->event_cbs);
3204 	kfree(ndev->vqs);
3205 }
3206 
3207 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3208 {
3209 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3210 	struct vdpa_notification_area ret = {};
3211 	struct mlx5_vdpa_net *ndev;
3212 	phys_addr_t addr;
3213 
3214 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3215 		return ret;
3216 
3217 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3218 	 * notification to avoid the risk of mapping pages that contain BAR of more
3219 	 * than one SF
3220 	 */
3221 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3222 		return ret;
3223 
3224 	ndev = to_mlx5_vdpa_ndev(mvdev);
3225 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3226 	ret.addr = addr;
3227 	ret.size = PAGE_SIZE;
3228 	return ret;
3229 }
3230 
3231 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3232 {
3233 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3234 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3235 	struct mlx5_vdpa_virtqueue *mvq;
3236 
3237 	if (!is_index_valid(mvdev, idx))
3238 		return -EINVAL;
3239 
3240 	if (is_ctrl_vq_idx(mvdev, idx))
3241 		return -EOPNOTSUPP;
3242 
3243 	mvq = &ndev->vqs[idx];
3244 	if (!mvq->map.virq)
3245 		return -EOPNOTSUPP;
3246 
3247 	return mvq->map.virq;
3248 }
3249 
3250 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3251 {
3252 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3253 
3254 	return mvdev->actual_features;
3255 }
3256 
3257 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3258 			     u64 *received_desc, u64 *completed_desc)
3259 {
3260 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3261 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3262 	void *cmd_hdr;
3263 	void *ctx;
3264 	int err;
3265 
3266 	if (!counters_supported(&ndev->mvdev))
3267 		return -EOPNOTSUPP;
3268 
3269 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3270 		return -EAGAIN;
3271 
3272 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3273 
3274 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3275 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3276 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3277 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3278 
3279 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3280 	if (err)
3281 		return err;
3282 
3283 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3284 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3285 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3286 	return 0;
3287 }
3288 
3289 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3290 					 struct sk_buff *msg,
3291 					 struct netlink_ext_ack *extack)
3292 {
3293 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3294 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3295 	struct mlx5_vdpa_virtqueue *mvq;
3296 	struct mlx5_control_vq *cvq;
3297 	u64 received_desc;
3298 	u64 completed_desc;
3299 	int err = 0;
3300 
3301 	down_read(&ndev->reslock);
3302 	if (!is_index_valid(mvdev, idx)) {
3303 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3304 		err = -EINVAL;
3305 		goto out_err;
3306 	}
3307 
3308 	if (idx == ctrl_vq_idx(mvdev)) {
3309 		cvq = &mvdev->cvq;
3310 		received_desc = cvq->received_desc;
3311 		completed_desc = cvq->completed_desc;
3312 		goto out;
3313 	}
3314 
3315 	mvq = &ndev->vqs[idx];
3316 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3317 	if (err) {
3318 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3319 		goto out_err;
3320 	}
3321 
3322 out:
3323 	err = -EMSGSIZE;
3324 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3325 		goto out_err;
3326 
3327 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3328 			      VDPA_ATTR_PAD))
3329 		goto out_err;
3330 
3331 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3332 		goto out_err;
3333 
3334 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3335 			      VDPA_ATTR_PAD))
3336 		goto out_err;
3337 
3338 	err = 0;
3339 out_err:
3340 	up_read(&ndev->reslock);
3341 	return err;
3342 }
3343 
3344 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3345 {
3346 	struct mlx5_control_vq *cvq;
3347 
3348 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3349 		return;
3350 
3351 	cvq = &mvdev->cvq;
3352 	cvq->ready = false;
3353 }
3354 
3355 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3356 {
3357 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3358 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3359 	struct mlx5_vdpa_virtqueue *mvq;
3360 	int i;
3361 
3362 	mlx5_vdpa_info(mvdev, "suspending device\n");
3363 
3364 	down_write(&ndev->reslock);
3365 	unregister_link_notifier(ndev);
3366 	for (i = 0; i < ndev->cur_num_vqs; i++) {
3367 		mvq = &ndev->vqs[i];
3368 		suspend_vq(ndev, mvq);
3369 	}
3370 	mlx5_vdpa_cvq_suspend(mvdev);
3371 	mvdev->suspended = true;
3372 	up_write(&ndev->reslock);
3373 	return 0;
3374 }
3375 
3376 static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3377 {
3378 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3379 	struct mlx5_vdpa_net *ndev;
3380 
3381 	ndev = to_mlx5_vdpa_ndev(mvdev);
3382 
3383 	mlx5_vdpa_info(mvdev, "resuming device\n");
3384 
3385 	down_write(&ndev->reslock);
3386 	mvdev->suspended = false;
3387 	resume_vqs(ndev);
3388 	register_link_notifier(ndev);
3389 	up_write(&ndev->reslock);
3390 	return 0;
3391 }
3392 
3393 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3394 			       unsigned int asid)
3395 {
3396 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3397 	int err = 0;
3398 
3399 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3400 		return -EINVAL;
3401 
3402 	mvdev->group2asid[group] = asid;
3403 
3404 	mutex_lock(&mvdev->mr_mtx);
3405 	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid])
3406 		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid);
3407 	mutex_unlock(&mvdev->mr_mtx);
3408 
3409 	return err;
3410 }
3411 
3412 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3413 	.set_vq_address = mlx5_vdpa_set_vq_address,
3414 	.set_vq_num = mlx5_vdpa_set_vq_num,
3415 	.kick_vq = mlx5_vdpa_kick_vq,
3416 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3417 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3418 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3419 	.set_vq_state = mlx5_vdpa_set_vq_state,
3420 	.get_vq_state = mlx5_vdpa_get_vq_state,
3421 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3422 	.get_vq_notification = mlx5_get_vq_notification,
3423 	.get_vq_irq = mlx5_get_vq_irq,
3424 	.get_vq_align = mlx5_vdpa_get_vq_align,
3425 	.get_vq_group = mlx5_vdpa_get_vq_group,
3426 	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3427 	.get_device_features = mlx5_vdpa_get_device_features,
3428 	.get_backend_features = mlx5_vdpa_get_backend_features,
3429 	.set_driver_features = mlx5_vdpa_set_driver_features,
3430 	.get_driver_features = mlx5_vdpa_get_driver_features,
3431 	.set_config_cb = mlx5_vdpa_set_config_cb,
3432 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3433 	.get_device_id = mlx5_vdpa_get_device_id,
3434 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3435 	.get_status = mlx5_vdpa_get_status,
3436 	.set_status = mlx5_vdpa_set_status,
3437 	.reset = mlx5_vdpa_reset,
3438 	.compat_reset = mlx5_vdpa_compat_reset,
3439 	.get_config_size = mlx5_vdpa_get_config_size,
3440 	.get_config = mlx5_vdpa_get_config,
3441 	.set_config = mlx5_vdpa_set_config,
3442 	.get_generation = mlx5_vdpa_get_generation,
3443 	.set_map = mlx5_vdpa_set_map,
3444 	.reset_map = mlx5_vdpa_reset_map,
3445 	.set_group_asid = mlx5_set_group_asid,
3446 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3447 	.free = mlx5_vdpa_free,
3448 	.suspend = mlx5_vdpa_suspend,
3449 	.resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3450 };
3451 
3452 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3453 {
3454 	u16 hw_mtu;
3455 	int err;
3456 
3457 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3458 	if (err)
3459 		return err;
3460 
3461 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3462 	return 0;
3463 }
3464 
3465 static int alloc_resources(struct mlx5_vdpa_net *ndev)
3466 {
3467 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3468 	int err;
3469 
3470 	if (res->valid) {
3471 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3472 		return -EEXIST;
3473 	}
3474 
3475 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3476 	if (err)
3477 		return err;
3478 
3479 	err = create_tis(ndev);
3480 	if (err)
3481 		goto err_tis;
3482 
3483 	res->valid = true;
3484 
3485 	return 0;
3486 
3487 err_tis:
3488 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3489 	return err;
3490 }
3491 
3492 static void free_resources(struct mlx5_vdpa_net *ndev)
3493 {
3494 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3495 
3496 	if (!res->valid)
3497 		return;
3498 
3499 	destroy_tis(ndev);
3500 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3501 	res->valid = false;
3502 }
3503 
3504 static void init_mvqs(struct mlx5_vdpa_net *ndev)
3505 {
3506 	struct mlx5_vdpa_virtqueue *mvq;
3507 	int i;
3508 
3509 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3510 		mvq = &ndev->vqs[i];
3511 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3512 		mvq->index = i;
3513 		mvq->ndev = ndev;
3514 		mvq->fwqp.fw = true;
3515 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3516 	}
3517 	for (; i < ndev->mvdev.max_vqs; i++) {
3518 		mvq = &ndev->vqs[i];
3519 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3520 		mvq->index = i;
3521 		mvq->ndev = ndev;
3522 	}
3523 }
3524 
3525 struct mlx5_vdpa_mgmtdev {
3526 	struct vdpa_mgmt_dev mgtdev;
3527 	struct mlx5_adev *madev;
3528 	struct mlx5_vdpa_net *ndev;
3529 	struct vdpa_config_ops vdpa_ops;
3530 };
3531 
3532 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3533 {
3534 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3535 	void *in;
3536 	int err;
3537 
3538 	in = kvzalloc(inlen, GFP_KERNEL);
3539 	if (!in)
3540 		return -ENOMEM;
3541 
3542 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3543 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3544 		 mtu + MLX5V_ETH_HARD_MTU);
3545 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3546 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3547 
3548 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3549 
3550 	kvfree(in);
3551 	return err;
3552 }
3553 
3554 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3555 {
3556 	struct mlx5_vdpa_irq_pool_entry *ent;
3557 	int i;
3558 
3559 	if (!msix_mode_supported(&ndev->mvdev))
3560 		return;
3561 
3562 	if (!ndev->mvdev.mdev->pdev)
3563 		return;
3564 
3565 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3566 	if (!ndev->irqp.entries)
3567 		return;
3568 
3569 
3570 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3571 		ent = ndev->irqp.entries + i;
3572 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3573 			 dev_name(&ndev->mvdev.vdev.dev), i);
3574 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3575 		if (!ent->map.virq)
3576 			return;
3577 
3578 		ndev->irqp.num_ent++;
3579 	}
3580 }
3581 
3582 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3583 			     const struct vdpa_dev_set_config *add_config)
3584 {
3585 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3586 	struct virtio_net_config *config;
3587 	struct mlx5_core_dev *pfmdev;
3588 	struct mlx5_vdpa_dev *mvdev;
3589 	struct mlx5_vdpa_net *ndev;
3590 	struct mlx5_core_dev *mdev;
3591 	u64 device_features;
3592 	u32 max_vqs;
3593 	u16 mtu;
3594 	int err;
3595 
3596 	if (mgtdev->ndev)
3597 		return -ENOSPC;
3598 
3599 	mdev = mgtdev->madev->mdev;
3600 	device_features = mgtdev->mgtdev.supported_features;
3601 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3602 		if (add_config->device_features & ~device_features) {
3603 			dev_warn(mdev->device,
3604 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3605 				 add_config->device_features, device_features);
3606 			return -EINVAL;
3607 		}
3608 		device_features &= add_config->device_features;
3609 	} else {
3610 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3611 	}
3612 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3613 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3614 		dev_warn(mdev->device,
3615 			 "Must provision minimum features 0x%llx for this device",
3616 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3617 		return -EOPNOTSUPP;
3618 	}
3619 
3620 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3621 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3622 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3623 		return -EOPNOTSUPP;
3624 	}
3625 
3626 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3627 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3628 	if (max_vqs < 2) {
3629 		dev_warn(mdev->device,
3630 			 "%d virtqueues are supported. At least 2 are required\n",
3631 			 max_vqs);
3632 		return -EAGAIN;
3633 	}
3634 
3635 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3636 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3637 			return -EINVAL;
3638 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3639 	} else {
3640 		max_vqs = 2;
3641 	}
3642 
3643 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3644 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3645 	if (IS_ERR(ndev))
3646 		return PTR_ERR(ndev);
3647 
3648 	ndev->mvdev.max_vqs = max_vqs;
3649 	mvdev = &ndev->mvdev;
3650 	mvdev->mdev = mdev;
3651 
3652 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3653 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3654 	if (!ndev->vqs || !ndev->event_cbs) {
3655 		err = -ENOMEM;
3656 		goto err_alloc;
3657 	}
3658 
3659 	init_mvqs(ndev);
3660 	allocate_irqs(ndev);
3661 	init_rwsem(&ndev->reslock);
3662 	config = &ndev->config;
3663 
3664 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3665 		err = config_func_mtu(mdev, add_config->net.mtu);
3666 		if (err)
3667 			goto err_alloc;
3668 	}
3669 
3670 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3671 		err = query_mtu(mdev, &mtu);
3672 		if (err)
3673 			goto err_alloc;
3674 
3675 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3676 	}
3677 
3678 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3679 		if (get_link_state(mvdev))
3680 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3681 		else
3682 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3683 	}
3684 
3685 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3686 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3687 	/* No bother setting mac address in config if not going to provision _F_MAC */
3688 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3689 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3690 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3691 		if (err)
3692 			goto err_alloc;
3693 	}
3694 
3695 	if (!is_zero_ether_addr(config->mac)) {
3696 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3697 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3698 		if (err)
3699 			goto err_alloc;
3700 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3701 		/*
3702 		 * We used to clear _F_MAC feature bit if seeing
3703 		 * zero mac address when device features are not
3704 		 * specifically provisioned. Keep the behaviour
3705 		 * so old scripts do not break.
3706 		 */
3707 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3708 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3709 		/* Don't provision zero mac address for _F_MAC */
3710 		mlx5_vdpa_warn(&ndev->mvdev,
3711 			       "No mac address provisioned?\n");
3712 		err = -EINVAL;
3713 		goto err_alloc;
3714 	}
3715 
3716 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3717 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3718 
3719 	ndev->mvdev.mlx_features = device_features;
3720 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3721 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3722 	if (err)
3723 		goto err_mpfs;
3724 
3725 	INIT_LIST_HEAD(&mvdev->mr_list_head);
3726 
3727 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3728 		err = mlx5_vdpa_create_dma_mr(mvdev);
3729 		if (err)
3730 			goto err_res;
3731 	}
3732 
3733 	err = alloc_resources(ndev);
3734 	if (err)
3735 		goto err_mr;
3736 
3737 	ndev->cvq_ent.mvdev = mvdev;
3738 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3739 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3740 	if (!mvdev->wq) {
3741 		err = -ENOMEM;
3742 		goto err_res2;
3743 	}
3744 
3745 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3746 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3747 	if (err)
3748 		goto err_reg;
3749 
3750 	mgtdev->ndev = ndev;
3751 	return 0;
3752 
3753 err_reg:
3754 	destroy_workqueue(mvdev->wq);
3755 err_res2:
3756 	free_resources(ndev);
3757 err_mr:
3758 	mlx5_vdpa_destroy_mr_resources(mvdev);
3759 err_res:
3760 	mlx5_vdpa_free_resources(&ndev->mvdev);
3761 err_mpfs:
3762 	if (!is_zero_ether_addr(config->mac))
3763 		mlx5_mpfs_del_mac(pfmdev, config->mac);
3764 err_alloc:
3765 	put_device(&mvdev->vdev.dev);
3766 	return err;
3767 }
3768 
3769 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3770 {
3771 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3772 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3773 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3774 	struct workqueue_struct *wq;
3775 
3776 	unregister_link_notifier(ndev);
3777 	_vdpa_unregister_device(dev);
3778 	wq = mvdev->wq;
3779 	mvdev->wq = NULL;
3780 	destroy_workqueue(wq);
3781 	mgtdev->ndev = NULL;
3782 }
3783 
3784 static const struct vdpa_mgmtdev_ops mdev_ops = {
3785 	.dev_add = mlx5_vdpa_dev_add,
3786 	.dev_del = mlx5_vdpa_dev_del,
3787 };
3788 
3789 static struct virtio_device_id id_table[] = {
3790 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3791 	{ 0 },
3792 };
3793 
3794 static int mlx5v_probe(struct auxiliary_device *adev,
3795 		       const struct auxiliary_device_id *id)
3796 
3797 {
3798 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3799 	struct mlx5_core_dev *mdev = madev->mdev;
3800 	struct mlx5_vdpa_mgmtdev *mgtdev;
3801 	int err;
3802 
3803 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3804 	if (!mgtdev)
3805 		return -ENOMEM;
3806 
3807 	mgtdev->mgtdev.ops = &mdev_ops;
3808 	mgtdev->mgtdev.device = mdev->device;
3809 	mgtdev->mgtdev.id_table = id_table;
3810 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3811 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3812 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3813 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3814 	mgtdev->mgtdev.max_supported_vqs =
3815 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3816 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3817 	mgtdev->madev = madev;
3818 	mgtdev->vdpa_ops = mlx5_vdpa_ops;
3819 
3820 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
3821 		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
3822 
3823 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
3824 		mgtdev->vdpa_ops.resume = NULL;
3825 
3826 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3827 	if (err)
3828 		goto reg_err;
3829 
3830 	auxiliary_set_drvdata(adev, mgtdev);
3831 
3832 	return 0;
3833 
3834 reg_err:
3835 	kfree(mgtdev);
3836 	return err;
3837 }
3838 
3839 static void mlx5v_remove(struct auxiliary_device *adev)
3840 {
3841 	struct mlx5_vdpa_mgmtdev *mgtdev;
3842 
3843 	mgtdev = auxiliary_get_drvdata(adev);
3844 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3845 	kfree(mgtdev);
3846 }
3847 
3848 static const struct auxiliary_device_id mlx5v_id_table[] = {
3849 	{ .name = MLX5_ADEV_NAME ".vnet", },
3850 	{},
3851 };
3852 
3853 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3854 
3855 static struct auxiliary_driver mlx5v_driver = {
3856 	.name = "vnet",
3857 	.probe = mlx5v_probe,
3858 	.remove = mlx5v_remove,
3859 	.id_table = mlx5v_id_table,
3860 };
3861 
3862 module_auxiliary_driver(mlx5v_driver);
3863