xref: /linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision d0309c054362a235077327b46f727bc48878a3bc)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
23 
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 
49 #define MLX5V_UNTAGGED 0x1000
50 
51 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
52  * 5.1.6.5.5 "Device operation in multiqueue mode":
53  *
54  * Multiqueue is disabled by default.
55  * The driver enables multiqueue by sending a command using class
56  * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
57  * operation, as follows: ...
58  */
59 #define MLX5V_DEFAULT_VQ_COUNT 2
60 
61 #define MLX5V_DEFAULT_VQ_SIZE 256
62 
63 struct mlx5_vdpa_cq_buf {
64 	struct mlx5_frag_buf_ctrl fbc;
65 	struct mlx5_frag_buf frag_buf;
66 	int cqe_size;
67 	int nent;
68 };
69 
70 struct mlx5_vdpa_cq {
71 	struct mlx5_core_cq mcq;
72 	struct mlx5_vdpa_cq_buf buf;
73 	struct mlx5_db db;
74 	int cqe;
75 };
76 
77 struct mlx5_vdpa_umem {
78 	struct mlx5_frag_buf_ctrl fbc;
79 	struct mlx5_frag_buf frag_buf;
80 	int size;
81 	u32 id;
82 };
83 
84 struct mlx5_vdpa_qp {
85 	struct mlx5_core_qp mqp;
86 	struct mlx5_frag_buf frag_buf;
87 	struct mlx5_db db;
88 	u16 head;
89 	bool fw;
90 };
91 
92 struct mlx5_vq_restore_info {
93 	u32 num_ent;
94 	u64 desc_addr;
95 	u64 device_addr;
96 	u64 driver_addr;
97 	u16 avail_index;
98 	u16 used_index;
99 	struct msi_map map;
100 	bool ready;
101 	bool restore;
102 };
103 
104 struct mlx5_vdpa_virtqueue {
105 	bool ready;
106 	u64 desc_addr;
107 	u64 device_addr;
108 	u64 driver_addr;
109 	u32 num_ent;
110 
111 	/* Resources for implementing the notification channel from the device
112 	 * to the driver. fwqp is the firmware end of an RC connection; the
113 	 * other end is vqqp used by the driver. cq is where completions are
114 	 * reported.
115 	 */
116 	struct mlx5_vdpa_cq cq;
117 	struct mlx5_vdpa_qp fwqp;
118 	struct mlx5_vdpa_qp vqqp;
119 
120 	/* umem resources are required for the virtqueue operation. They're use
121 	 * is internal and they must be provided by the driver.
122 	 */
123 	struct mlx5_vdpa_umem umem1;
124 	struct mlx5_vdpa_umem umem2;
125 	struct mlx5_vdpa_umem umem3;
126 
127 	u32 counter_set_id;
128 	bool initialized;
129 	int index;
130 	u32 virtq_id;
131 	struct mlx5_vdpa_net *ndev;
132 	u16 avail_idx;
133 	u16 used_idx;
134 	int fw_state;
135 
136 	u64 modified_fields;
137 
138 	struct mlx5_vdpa_mr *vq_mr;
139 	struct mlx5_vdpa_mr *desc_mr;
140 
141 	struct msi_map map;
142 
143 	/* keep last in the struct */
144 	struct mlx5_vq_restore_info ri;
145 };
146 
is_index_valid(struct mlx5_vdpa_dev * mvdev,u16 idx)147 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
148 {
149 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
150 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
151 			return idx < 2;
152 		else
153 			return idx < 3;
154 	}
155 
156 	return idx <= mvdev->max_idx;
157 }
158 
159 static void free_fixed_resources(struct mlx5_vdpa_net *ndev);
160 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev);
161 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled);
162 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev);
163 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq);
164 
165 static bool mlx5_vdpa_debug;
166 
167 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
168 	do {                                                                                       \
169 		if (features & BIT_ULL(_feature))                                                  \
170 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
171 	} while (0)
172 
173 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
174 	do {                                                                                       \
175 		if (status & (_status))                                                            \
176 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
177 	} while (0)
178 
179 /* TODO: cross-endian support */
mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev * mvdev)180 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
181 {
182 	return virtio_legacy_is_little_endian() ||
183 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
184 }
185 
mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev * mvdev,__virtio16 val)186 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
187 {
188 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
189 }
190 
cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev * mvdev,u16 val)191 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
192 {
193 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
194 }
195 
ctrl_vq_idx(struct mlx5_vdpa_dev * mvdev)196 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
197 {
198 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
199 		return 2;
200 
201 	return mvdev->max_vqs;
202 }
203 
is_ctrl_vq_idx(struct mlx5_vdpa_dev * mvdev,u16 idx)204 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
205 {
206 	return idx == ctrl_vq_idx(mvdev);
207 }
208 
print_status(struct mlx5_vdpa_dev * mvdev,u8 status,bool set)209 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
210 {
211 	if (status & ~VALID_STATUS_MASK)
212 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
213 			       status & ~VALID_STATUS_MASK);
214 
215 	if (!mlx5_vdpa_debug)
216 		return;
217 
218 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
219 	if (set && !status) {
220 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
221 		return;
222 	}
223 
224 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
225 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
226 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
227 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
228 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
229 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
230 }
231 
print_features(struct mlx5_vdpa_dev * mvdev,u64 features,bool set)232 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
233 {
234 	if (features & ~VALID_FEATURES_MASK)
235 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
236 			       features & ~VALID_FEATURES_MASK);
237 
238 	if (!mlx5_vdpa_debug)
239 		return;
240 
241 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
242 	if (!features)
243 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
244 
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
262 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
263 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
264 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
265 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
266 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
267 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
279 }
280 
create_tis(struct mlx5_vdpa_net * ndev)281 static int create_tis(struct mlx5_vdpa_net *ndev)
282 {
283 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
284 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
285 	void *tisc;
286 	int err;
287 
288 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
289 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
290 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
291 	if (err)
292 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
293 
294 	return err;
295 }
296 
destroy_tis(struct mlx5_vdpa_net * ndev)297 static void destroy_tis(struct mlx5_vdpa_net *ndev)
298 {
299 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
300 }
301 
302 #define MLX5_VDPA_CQE_SIZE 64
303 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
304 
cq_frag_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_cq_buf * buf,int nent)305 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
306 {
307 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
308 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
309 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
310 	int err;
311 
312 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
313 				       ndev->mvdev.mdev->priv.numa_node);
314 	if (err)
315 		return err;
316 
317 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
318 
319 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
320 	buf->nent = nent;
321 
322 	return 0;
323 }
324 
umem_frag_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_umem * umem,int size)325 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
326 {
327 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
328 
329 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
330 					ndev->mvdev.mdev->priv.numa_node);
331 }
332 
cq_frag_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_cq_buf * buf)333 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
334 {
335 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
336 }
337 
get_cqe(struct mlx5_vdpa_cq * vcq,int n)338 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
339 {
340 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
341 }
342 
cq_frag_buf_init(struct mlx5_vdpa_cq * vcq,struct mlx5_vdpa_cq_buf * buf)343 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
344 {
345 	struct mlx5_cqe64 *cqe64;
346 	void *cqe;
347 	int i;
348 
349 	for (i = 0; i < buf->nent; i++) {
350 		cqe = get_cqe(vcq, i);
351 		cqe64 = cqe;
352 		cqe64->op_own = MLX5_CQE_INVALID << 4;
353 	}
354 }
355 
get_sw_cqe(struct mlx5_vdpa_cq * cq,int n)356 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
357 {
358 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
359 
360 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
361 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
362 		return cqe64;
363 
364 	return NULL;
365 }
366 
rx_post(struct mlx5_vdpa_qp * vqp,int n)367 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
368 {
369 	vqp->head += n;
370 	vqp->db.db[0] = cpu_to_be32(vqp->head);
371 }
372 
qp_prepare(struct mlx5_vdpa_net * ndev,bool fw,void * in,struct mlx5_vdpa_virtqueue * mvq,u32 num_ent)373 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
374 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
375 {
376 	struct mlx5_vdpa_qp *vqp;
377 	__be64 *pas;
378 	void *qpc;
379 
380 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
381 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
382 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
383 	if (vqp->fw) {
384 		/* Firmware QP is allocated by the driver for the firmware's
385 		 * use so we can skip part of the params as they will be chosen by firmware
386 		 */
387 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
388 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
389 		MLX5_SET(qpc, qpc, no_sq, 1);
390 		return;
391 	}
392 
393 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
394 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
395 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
396 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
397 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
398 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
399 	MLX5_SET(qpc, qpc, no_sq, 1);
400 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
401 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
402 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
403 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
404 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
405 }
406 
rq_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp,u32 num_ent)407 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
408 {
409 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
410 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
411 					ndev->mvdev.mdev->priv.numa_node);
412 }
413 
rq_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp)414 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
415 {
416 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
417 }
418 
qp_create(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,struct mlx5_vdpa_qp * vqp)419 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
420 		     struct mlx5_vdpa_qp *vqp)
421 {
422 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
423 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
424 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
425 	void *qpc;
426 	void *in;
427 	int err;
428 
429 	if (!vqp->fw) {
430 		vqp = &mvq->vqqp;
431 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
432 		if (err)
433 			return err;
434 
435 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
436 		if (err)
437 			goto err_db;
438 		inlen += vqp->frag_buf.npages * sizeof(__be64);
439 	}
440 
441 	in = kzalloc(inlen, GFP_KERNEL);
442 	if (!in) {
443 		err = -ENOMEM;
444 		goto err_kzalloc;
445 	}
446 
447 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
448 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
449 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
450 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
451 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
452 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
453 	if (!vqp->fw)
454 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
455 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
456 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
457 	kfree(in);
458 	if (err)
459 		goto err_kzalloc;
460 
461 	vqp->mqp.uid = ndev->mvdev.res.uid;
462 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
463 
464 	if (!vqp->fw)
465 		rx_post(vqp, mvq->num_ent);
466 
467 	return 0;
468 
469 err_kzalloc:
470 	if (!vqp->fw)
471 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
472 err_db:
473 	if (!vqp->fw)
474 		rq_buf_free(ndev, vqp);
475 
476 	return err;
477 }
478 
qp_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp)479 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
480 {
481 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
482 
483 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
484 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
485 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
486 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
487 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
488 	if (!vqp->fw) {
489 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
490 		rq_buf_free(ndev, vqp);
491 	}
492 }
493 
next_cqe_sw(struct mlx5_vdpa_cq * cq)494 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
495 {
496 	return get_sw_cqe(cq, cq->mcq.cons_index);
497 }
498 
mlx5_vdpa_poll_one(struct mlx5_vdpa_cq * vcq)499 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
500 {
501 	struct mlx5_cqe64 *cqe64;
502 
503 	cqe64 = next_cqe_sw(vcq);
504 	if (!cqe64)
505 		return -EAGAIN;
506 
507 	vcq->mcq.cons_index++;
508 	return 0;
509 }
510 
mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue * mvq,int num)511 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
512 {
513 	struct mlx5_vdpa_net *ndev = mvq->ndev;
514 	struct vdpa_callback *event_cb;
515 
516 	event_cb = &ndev->event_cbs[mvq->index];
517 	mlx5_cq_set_ci(&mvq->cq.mcq);
518 
519 	/* make sure CQ cosumer update is visible to the hardware before updating
520 	 * RX doorbell record.
521 	 */
522 	dma_wmb();
523 	rx_post(&mvq->vqqp, num);
524 	if (event_cb->callback)
525 		event_cb->callback(event_cb->private);
526 }
527 
mlx5_vdpa_cq_comp(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)528 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
529 {
530 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
531 	struct mlx5_vdpa_net *ndev = mvq->ndev;
532 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
533 	int num = 0;
534 
535 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
536 		num++;
537 		if (num > mvq->num_ent / 2) {
538 			/* If completions keep coming while we poll, we want to
539 			 * let the hardware know that we consumed them by
540 			 * updating the doorbell record.  We also let vdpa core
541 			 * know about this so it passes it on the virtio driver
542 			 * on the guest.
543 			 */
544 			mlx5_vdpa_handle_completions(mvq, num);
545 			num = 0;
546 		}
547 	}
548 
549 	if (num)
550 		mlx5_vdpa_handle_completions(mvq, num);
551 
552 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
553 }
554 
cq_create(struct mlx5_vdpa_net * ndev,u16 idx,u32 num_ent)555 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
556 {
557 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
558 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
559 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
560 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
561 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
562 	__be64 *pas;
563 	int inlen;
564 	void *cqc;
565 	void *in;
566 	int err;
567 	int eqn;
568 
569 	err = mlx5_db_alloc(mdev, &vcq->db);
570 	if (err)
571 		return err;
572 
573 	vcq->mcq.set_ci_db = vcq->db.db;
574 	vcq->mcq.arm_db = vcq->db.db + 1;
575 	vcq->mcq.cqe_sz = 64;
576 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
577 	vcq->cqe = num_ent;
578 
579 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
580 	if (err)
581 		goto err_db;
582 
583 	cq_frag_buf_init(vcq, &vcq->buf);
584 
585 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
586 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
587 	in = kzalloc(inlen, GFP_KERNEL);
588 	if (!in) {
589 		err = -ENOMEM;
590 		goto err_vzalloc;
591 	}
592 
593 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
594 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
595 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
596 
597 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
598 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
599 
600 	/* Use vector 0 by default. Consider adding code to choose least used
601 	 * vector.
602 	 */
603 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
604 	if (err)
605 		goto err_vec;
606 
607 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
608 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
609 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
610 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
611 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
612 
613 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
614 	if (err)
615 		goto err_vec;
616 
617 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
618 	kfree(in);
619 	return 0;
620 
621 err_vec:
622 	kfree(in);
623 err_vzalloc:
624 	cq_frag_buf_free(ndev, &vcq->buf);
625 err_db:
626 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
627 	return err;
628 }
629 
cq_destroy(struct mlx5_vdpa_net * ndev,u16 idx)630 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
631 {
632 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
633 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
634 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
635 
636 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
637 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
638 		return;
639 	}
640 	cq_frag_buf_free(ndev, &vcq->buf);
641 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
642 }
643 
read_umem_params(struct mlx5_vdpa_net * ndev)644 static int read_umem_params(struct mlx5_vdpa_net *ndev)
645 {
646 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
647 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
648 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
649 	int out_size;
650 	void *caps;
651 	void *out;
652 	int err;
653 
654 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
655 	out = kzalloc(out_size, GFP_KERNEL);
656 	if (!out)
657 		return -ENOMEM;
658 
659 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
660 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
661 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
662 	if (err) {
663 		mlx5_vdpa_warn(&ndev->mvdev,
664 			"Failed reading vdpa umem capabilities with err %d\n", err);
665 		goto out;
666 	}
667 
668 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
669 
670 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
671 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
672 
673 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
674 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
675 
676 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
677 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
678 
679 out:
680 	kfree(out);
681 	return 0;
682 }
683 
set_umem_size(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num,struct mlx5_vdpa_umem ** umemp)684 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
685 			  struct mlx5_vdpa_umem **umemp)
686 {
687 	u32 p_a;
688 	u32 p_b;
689 
690 	switch (num) {
691 	case 1:
692 		p_a = ndev->umem_1_buffer_param_a;
693 		p_b = ndev->umem_1_buffer_param_b;
694 		*umemp = &mvq->umem1;
695 		break;
696 	case 2:
697 		p_a = ndev->umem_2_buffer_param_a;
698 		p_b = ndev->umem_2_buffer_param_b;
699 		*umemp = &mvq->umem2;
700 		break;
701 	case 3:
702 		p_a = ndev->umem_3_buffer_param_a;
703 		p_b = ndev->umem_3_buffer_param_b;
704 		*umemp = &mvq->umem3;
705 		break;
706 	}
707 
708 	(*umemp)->size = p_a * mvq->num_ent + p_b;
709 }
710 
umem_frag_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_umem * umem)711 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
712 {
713 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
714 }
715 
create_umem(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num)716 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
717 {
718 	int inlen;
719 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
720 	void *um;
721 	void *in;
722 	int err;
723 	__be64 *pas;
724 	struct mlx5_vdpa_umem *umem;
725 
726 	set_umem_size(ndev, mvq, num, &umem);
727 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
728 	if (err)
729 		return err;
730 
731 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
732 
733 	in = kzalloc(inlen, GFP_KERNEL);
734 	if (!in) {
735 		err = -ENOMEM;
736 		goto err_in;
737 	}
738 
739 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
740 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
741 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
742 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
743 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
744 
745 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
746 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
747 
748 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
749 	if (err) {
750 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
751 		goto err_cmd;
752 	}
753 
754 	kfree(in);
755 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
756 
757 	return 0;
758 
759 err_cmd:
760 	kfree(in);
761 err_in:
762 	umem_frag_buf_free(ndev, umem);
763 	return err;
764 }
765 
umem_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num)766 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
767 {
768 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
769 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
770 	struct mlx5_vdpa_umem *umem;
771 
772 	switch (num) {
773 	case 1:
774 		umem = &mvq->umem1;
775 		break;
776 	case 2:
777 		umem = &mvq->umem2;
778 		break;
779 	case 3:
780 		umem = &mvq->umem3;
781 		break;
782 	}
783 
784 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
785 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
786 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
787 		return;
788 
789 	umem_frag_buf_free(ndev, umem);
790 }
791 
umems_create(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)792 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
793 {
794 	int num;
795 	int err;
796 
797 	for (num = 1; num <= 3; num++) {
798 		err = create_umem(ndev, mvq, num);
799 		if (err)
800 			goto err_umem;
801 	}
802 	return 0;
803 
804 err_umem:
805 	for (num--; num > 0; num--)
806 		umem_destroy(ndev, mvq, num);
807 
808 	return err;
809 }
810 
umems_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)811 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
812 {
813 	int num;
814 
815 	for (num = 3; num > 0; num--)
816 		umem_destroy(ndev, mvq, num);
817 }
818 
get_queue_type(struct mlx5_vdpa_net * ndev)819 static int get_queue_type(struct mlx5_vdpa_net *ndev)
820 {
821 	u32 type_mask;
822 
823 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
824 
825 	/* prefer split queue */
826 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
827 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
828 
829 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
830 
831 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
832 }
833 
vq_is_tx(u16 idx)834 static bool vq_is_tx(u16 idx)
835 {
836 	return idx % 2;
837 }
838 
839 enum {
840 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
841 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
842 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
843 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
844 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
845 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
846 	MLX5_VIRTIO_NET_F_CSUM = 10,
847 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
848 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
849 };
850 
get_features(u64 features)851 static u16 get_features(u64 features)
852 {
853 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
854 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
855 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
856 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
857 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
858 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
859 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
860 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
861 }
862 
counters_supported(const struct mlx5_vdpa_dev * mvdev)863 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
864 {
865 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
866 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
867 }
868 
msix_mode_supported(struct mlx5_vdpa_dev * mvdev)869 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
870 {
871 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
872 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
873 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
874 }
875 
create_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool filled)876 static int create_virtqueue(struct mlx5_vdpa_net *ndev,
877 			    struct mlx5_vdpa_virtqueue *mvq,
878 			    bool filled)
879 {
880 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
881 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
882 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
883 	struct mlx5_vdpa_mr *vq_mr;
884 	struct mlx5_vdpa_mr *vq_desc_mr;
885 	u64 features = filled ? mvdev->actual_features : mvdev->mlx_features;
886 	void *obj_context;
887 	u16 mlx_features;
888 	void *cmd_hdr;
889 	void *vq_ctx;
890 	void *in;
891 	int err;
892 
893 	err = umems_create(ndev, mvq);
894 	if (err)
895 		return err;
896 
897 	in = kzalloc(inlen, GFP_KERNEL);
898 	if (!in) {
899 		err = -ENOMEM;
900 		goto err_alloc;
901 	}
902 
903 	mlx_features = get_features(features);
904 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
905 
906 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
907 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
908 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
909 
910 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
911 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
912 		 mlx_features >> 3);
913 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
914 		 mlx_features & 7);
915 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
916 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
917 
918 	if (vq_is_tx(mvq->index))
919 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
920 
921 	if (mvq->map.virq) {
922 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
923 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
924 	} else {
925 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
926 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
927 	}
928 
929 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
930 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
931 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
932 		 !!(features & BIT_ULL(VIRTIO_F_VERSION_1)));
933 
934 	if (filled) {
935 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
936 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
937 
938 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
939 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
940 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
941 
942 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
943 		if (vq_mr)
944 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
945 
946 		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
947 		if (vq_desc_mr &&
948 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
949 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
950 	} else {
951 		/* If there is no mr update, make sure that the existing ones are set
952 		 * modify to ready.
953 		 */
954 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
955 		if (vq_mr)
956 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
957 
958 		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
959 		if (vq_desc_mr)
960 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
961 	}
962 
963 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
964 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
965 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
966 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
967 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
968 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
969 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
970 	if (counters_supported(&ndev->mvdev))
971 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
972 
973 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
974 	if (err)
975 		goto err_cmd;
976 
977 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
978 	kfree(in);
979 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
980 
981 	if (filled) {
982 		mlx5_vdpa_get_mr(mvdev, vq_mr);
983 		mvq->vq_mr = vq_mr;
984 
985 		if (vq_desc_mr &&
986 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
987 			mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
988 			mvq->desc_mr = vq_desc_mr;
989 		}
990 	}
991 
992 	return 0;
993 
994 err_cmd:
995 	kfree(in);
996 err_alloc:
997 	umems_destroy(ndev, mvq);
998 	return err;
999 }
1000 
destroy_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1001 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1002 {
1003 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
1004 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
1005 
1006 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
1007 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1008 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
1009 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
1010 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
1011 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1012 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
1013 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
1014 		return;
1015 	}
1016 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
1017 	umems_destroy(ndev, mvq);
1018 
1019 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
1020 	mvq->vq_mr = NULL;
1021 
1022 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
1023 	mvq->desc_mr = NULL;
1024 }
1025 
get_rqpn(struct mlx5_vdpa_virtqueue * mvq,bool fw)1026 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1027 {
1028 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
1029 }
1030 
get_qpn(struct mlx5_vdpa_virtqueue * mvq,bool fw)1031 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1032 {
1033 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1034 }
1035 
alloc_inout(struct mlx5_vdpa_net * ndev,int cmd,void ** in,int * inlen,void ** out,int * outlen,u32 qpn,u32 rqpn)1036 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1037 			int *outlen, u32 qpn, u32 rqpn)
1038 {
1039 	void *qpc;
1040 	void *pp;
1041 
1042 	switch (cmd) {
1043 	case MLX5_CMD_OP_2RST_QP:
1044 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1045 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1046 		*in = kzalloc(*inlen, GFP_KERNEL);
1047 		*out = kzalloc(*outlen, GFP_KERNEL);
1048 		if (!*in || !*out)
1049 			goto outerr;
1050 
1051 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1052 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1053 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1054 		break;
1055 	case MLX5_CMD_OP_RST2INIT_QP:
1056 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1057 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1058 		*in = kzalloc(*inlen, GFP_KERNEL);
1059 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1060 		if (!*in || !*out)
1061 			goto outerr;
1062 
1063 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1064 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1065 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1066 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1067 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1068 		MLX5_SET(qpc, qpc, rwe, 1);
1069 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1070 		MLX5_SET(ads, pp, vhca_port_num, 1);
1071 		break;
1072 	case MLX5_CMD_OP_INIT2RTR_QP:
1073 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1074 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1075 		*in = kzalloc(*inlen, GFP_KERNEL);
1076 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1077 		if (!*in || !*out)
1078 			goto outerr;
1079 
1080 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1081 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1082 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1083 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1084 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1085 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1086 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1087 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1088 		MLX5_SET(ads, pp, fl, 1);
1089 		break;
1090 	case MLX5_CMD_OP_RTR2RTS_QP:
1091 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1092 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1093 		*in = kzalloc(*inlen, GFP_KERNEL);
1094 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1095 		if (!*in || !*out)
1096 			goto outerr;
1097 
1098 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1099 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1100 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1101 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1102 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1103 		MLX5_SET(ads, pp, ack_timeout, 14);
1104 		MLX5_SET(qpc, qpc, retry_count, 7);
1105 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1106 		break;
1107 	default:
1108 		goto outerr_nullify;
1109 	}
1110 
1111 	return;
1112 
1113 outerr:
1114 	kfree(*in);
1115 	kfree(*out);
1116 outerr_nullify:
1117 	*in = NULL;
1118 	*out = NULL;
1119 }
1120 
free_inout(void * in,void * out)1121 static void free_inout(void *in, void *out)
1122 {
1123 	kfree(in);
1124 	kfree(out);
1125 }
1126 
1127 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1128  * firmware. The fw argument indicates whether the subjected QP is the one used
1129  * by firmware.
1130  */
modify_qp(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool fw,int cmd)1131 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1132 {
1133 	int outlen;
1134 	int inlen;
1135 	void *out;
1136 	void *in;
1137 	int err;
1138 
1139 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1140 	if (!in || !out)
1141 		return -ENOMEM;
1142 
1143 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1144 	free_inout(in, out);
1145 	return err;
1146 }
1147 
connect_qps(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1148 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1149 {
1150 	int err;
1151 
1152 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1153 	if (err)
1154 		return err;
1155 
1156 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1157 	if (err)
1158 		return err;
1159 
1160 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1161 	if (err)
1162 		return err;
1163 
1164 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1165 	if (err)
1166 		return err;
1167 
1168 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1169 	if (err)
1170 		return err;
1171 
1172 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1173 	if (err)
1174 		return err;
1175 
1176 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1177 }
1178 
1179 struct mlx5_virtq_attr {
1180 	u8 state;
1181 	u16 available_index;
1182 	u16 used_index;
1183 };
1184 
1185 struct mlx5_virtqueue_query_mem {
1186 	u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)];
1187 	u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)];
1188 };
1189 
1190 struct mlx5_virtqueue_modify_mem {
1191 	u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)];
1192 	u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)];
1193 };
1194 
fill_query_virtqueue_cmd(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,struct mlx5_virtqueue_query_mem * cmd)1195 static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
1196 				     struct mlx5_vdpa_virtqueue *mvq,
1197 				     struct mlx5_virtqueue_query_mem *cmd)
1198 {
1199 	void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
1200 
1201 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1202 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1203 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1204 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1205 }
1206 
query_virtqueue_end(struct mlx5_vdpa_net * ndev,struct mlx5_virtqueue_query_mem * cmd,struct mlx5_virtq_attr * attr)1207 static void query_virtqueue_end(struct mlx5_vdpa_net *ndev,
1208 				struct mlx5_virtqueue_query_mem *cmd,
1209 				struct mlx5_virtq_attr *attr)
1210 {
1211 	void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context);
1212 
1213 	memset(attr, 0, sizeof(*attr));
1214 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1215 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1216 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1217 }
1218 
query_virtqueues(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs,struct mlx5_virtq_attr * attrs)1219 static int query_virtqueues(struct mlx5_vdpa_net *ndev,
1220 			    int start_vq,
1221 			    int num_vqs,
1222 			    struct mlx5_virtq_attr *attrs)
1223 {
1224 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1225 	struct mlx5_virtqueue_query_mem *cmd_mem;
1226 	struct mlx5_vdpa_async_cmd *cmds;
1227 	int err = 0;
1228 
1229 	WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n",
1230 	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
1231 
1232 	cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
1233 	cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
1234 	if (!cmds || !cmd_mem) {
1235 		err = -ENOMEM;
1236 		goto done;
1237 	}
1238 
1239 	for (int i = 0; i < num_vqs; i++) {
1240 		cmds[i].in = &cmd_mem[i].in;
1241 		cmds[i].inlen = sizeof(cmd_mem[i].in);
1242 		cmds[i].out = &cmd_mem[i].out;
1243 		cmds[i].outlen = sizeof(cmd_mem[i].out);
1244 		fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]);
1245 	}
1246 
1247 	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
1248 	if (err) {
1249 		mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n",
1250 			      start_vq, start_vq + num_vqs, err);
1251 		goto done;
1252 	}
1253 
1254 	for (int i = 0; i < num_vqs; i++) {
1255 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1256 		int vq_idx = start_vq + i;
1257 
1258 		if (cmd->err) {
1259 			mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, err);
1260 			if (!err)
1261 				err = cmd->err;
1262 			continue;
1263 		}
1264 
1265 		query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]);
1266 	}
1267 
1268 done:
1269 	kvfree(cmd_mem);
1270 	kvfree(cmds);
1271 	return err;
1272 }
1273 
is_resumable(struct mlx5_vdpa_net * ndev)1274 static bool is_resumable(struct mlx5_vdpa_net *ndev)
1275 {
1276 	return ndev->mvdev.vdev.config->resume;
1277 }
1278 
is_valid_state_change(int oldstate,int newstate,bool resumable)1279 static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1280 {
1281 	switch (oldstate) {
1282 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1283 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1284 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1285 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1286 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1287 		return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1288 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1289 	default:
1290 		return false;
1291 	}
1292 }
1293 
modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue * mvq)1294 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1295 {
1296 	/* Only state is always modifiable */
1297 	if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1298 		return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1299 		       mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1300 
1301 	return true;
1302 }
1303 
fill_modify_virtqueue_cmd(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int state,struct mlx5_virtqueue_modify_mem * cmd)1304 static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
1305 				      struct mlx5_vdpa_virtqueue *mvq,
1306 				      int state,
1307 				      struct mlx5_virtqueue_modify_mem *cmd)
1308 {
1309 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1310 	struct mlx5_vdpa_mr *desc_mr = NULL;
1311 	struct mlx5_vdpa_mr *vq_mr = NULL;
1312 	void *obj_context;
1313 	void *cmd_hdr;
1314 	void *vq_ctx;
1315 
1316 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
1317 
1318 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1319 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1320 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1321 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1322 
1323 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context);
1324 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1325 
1326 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
1327 		MLX5_SET(virtio_net_q_object, obj_context, state, state);
1328 
1329 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1330 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1331 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1332 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1333 	}
1334 
1335 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1336 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1337 
1338 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1339 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1340 
1341 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION)
1342 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
1343 			!!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
1344 
1345 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) {
1346 		u16 mlx_features = get_features(ndev->mvdev.actual_features);
1347 
1348 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
1349 			 mlx_features >> 3);
1350 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
1351 			 mlx_features & 7);
1352 	}
1353 
1354 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1355 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1356 
1357 		if (vq_mr)
1358 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1359 		else
1360 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1361 	}
1362 
1363 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1364 		desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1365 
1366 		if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1367 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1368 		else
1369 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1370 	}
1371 
1372 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1373 }
1374 
modify_virtqueue_end(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int state)1375 static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev,
1376 				 struct mlx5_vdpa_virtqueue *mvq,
1377 				 int state)
1378 {
1379 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1380 
1381 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1382 		unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP];
1383 		struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid];
1384 
1385 		mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1386 		mlx5_vdpa_get_mr(mvdev, vq_mr);
1387 		mvq->vq_mr = vq_mr;
1388 	}
1389 
1390 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1391 		unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP];
1392 		struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid];
1393 
1394 		mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1395 		mlx5_vdpa_get_mr(mvdev, desc_mr);
1396 		mvq->desc_mr = desc_mr;
1397 	}
1398 
1399 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
1400 		mvq->fw_state = state;
1401 
1402 	mvq->modified_fields = 0;
1403 }
1404 
counter_set_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1405 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1406 {
1407 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1408 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1409 	void *cmd_hdr;
1410 	int err;
1411 
1412 	if (!counters_supported(&ndev->mvdev))
1413 		return 0;
1414 
1415 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1416 
1417 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1418 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1419 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1420 
1421 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1422 	if (err)
1423 		return err;
1424 
1425 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1426 
1427 	return 0;
1428 }
1429 
counter_set_dealloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1430 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1431 {
1432 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1433 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1434 
1435 	if (!counters_supported(&ndev->mvdev))
1436 		return;
1437 
1438 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1439 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1440 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1441 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1442 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1443 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1444 }
1445 
mlx5_vdpa_int_handler(int irq,void * priv)1446 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1447 {
1448 	struct vdpa_callback *cb = priv;
1449 
1450 	if (cb->callback)
1451 		return cb->callback(cb->private);
1452 
1453 	return IRQ_HANDLED;
1454 }
1455 
alloc_vector(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1456 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1457 			 struct mlx5_vdpa_virtqueue *mvq)
1458 {
1459 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1460 	struct mlx5_vdpa_irq_pool_entry *ent;
1461 	int err;
1462 	int i;
1463 
1464 	for (i = 0; i < irqp->num_ent; i++) {
1465 		ent = &irqp->entries[i];
1466 		if (!ent->used) {
1467 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1468 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1469 			ent->dev_id = &ndev->event_cbs[mvq->index];
1470 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1471 					  ent->name, ent->dev_id);
1472 			if (err)
1473 				return;
1474 
1475 			ent->used = true;
1476 			mvq->map = ent->map;
1477 			return;
1478 		}
1479 	}
1480 }
1481 
dealloc_vector(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1482 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1483 			   struct mlx5_vdpa_virtqueue *mvq)
1484 {
1485 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1486 	int i;
1487 
1488 	for (i = 0; i < irqp->num_ent; i++)
1489 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1490 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1491 			irqp->entries[i].used = false;
1492 			return;
1493 		}
1494 }
1495 
setup_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool filled)1496 static int setup_vq(struct mlx5_vdpa_net *ndev,
1497 		    struct mlx5_vdpa_virtqueue *mvq,
1498 		    bool filled)
1499 {
1500 	u16 idx = mvq->index;
1501 	int err;
1502 
1503 	if (mvq->initialized)
1504 		return 0;
1505 
1506 	err = cq_create(ndev, idx, mvq->num_ent);
1507 	if (err)
1508 		return err;
1509 
1510 	err = qp_create(ndev, mvq, &mvq->fwqp);
1511 	if (err)
1512 		goto err_fwqp;
1513 
1514 	err = qp_create(ndev, mvq, &mvq->vqqp);
1515 	if (err)
1516 		goto err_vqqp;
1517 
1518 	err = connect_qps(ndev, mvq);
1519 	if (err)
1520 		goto err_connect;
1521 
1522 	err = counter_set_alloc(ndev, mvq);
1523 	if (err)
1524 		goto err_connect;
1525 
1526 	alloc_vector(ndev, mvq);
1527 	err = create_virtqueue(ndev, mvq, filled);
1528 	if (err)
1529 		goto err_vq;
1530 
1531 	mvq->initialized = true;
1532 
1533 	if (mvq->ready) {
1534 		err = resume_vq(ndev, mvq);
1535 		if (err)
1536 			goto err_modify;
1537 	}
1538 
1539 	return 0;
1540 
1541 err_modify:
1542 	destroy_virtqueue(ndev, mvq);
1543 err_vq:
1544 	dealloc_vector(ndev, mvq);
1545 	counter_set_dealloc(ndev, mvq);
1546 err_connect:
1547 	qp_destroy(ndev, &mvq->vqqp);
1548 err_vqqp:
1549 	qp_destroy(ndev, &mvq->fwqp);
1550 err_fwqp:
1551 	cq_destroy(ndev, idx);
1552 	return err;
1553 }
1554 
modify_virtqueues(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs,int state)1555 static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state)
1556 {
1557 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1558 	struct mlx5_virtqueue_modify_mem *cmd_mem;
1559 	struct mlx5_vdpa_async_cmd *cmds;
1560 	int err = 0;
1561 
1562 	WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n",
1563 	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
1564 
1565 	cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
1566 	cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
1567 	if (!cmds || !cmd_mem) {
1568 		err = -ENOMEM;
1569 		goto done;
1570 	}
1571 
1572 	for (int i = 0; i < num_vqs; i++) {
1573 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1574 		struct mlx5_vdpa_virtqueue *mvq;
1575 		int vq_idx = start_vq + i;
1576 
1577 		mvq = &ndev->vqs[vq_idx];
1578 
1579 		if (!modifiable_virtqueue_fields(mvq)) {
1580 			err = -EINVAL;
1581 			goto done;
1582 		}
1583 
1584 		if (mvq->fw_state != state) {
1585 			if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1586 				err = -EINVAL;
1587 				goto done;
1588 			}
1589 
1590 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1591 		}
1592 
1593 		cmd->in = &cmd_mem[i].in;
1594 		cmd->inlen = sizeof(cmd_mem[i].in);
1595 		cmd->out = &cmd_mem[i].out;
1596 		cmd->outlen = sizeof(cmd_mem[i].out);
1597 		fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]);
1598 	}
1599 
1600 	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
1601 	if (err) {
1602 		mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n",
1603 			      start_vq, start_vq + num_vqs);
1604 		goto done;
1605 	}
1606 
1607 	for (int i = 0; i < num_vqs; i++) {
1608 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1609 		struct mlx5_vdpa_virtqueue *mvq;
1610 		int vq_idx = start_vq + i;
1611 
1612 		mvq = &ndev->vqs[vq_idx];
1613 
1614 		if (cmd->err) {
1615 			mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n",
1616 				      vq_idx, mvq->fw_state, state, err);
1617 			if (!err)
1618 				err = cmd->err;
1619 			continue;
1620 		}
1621 
1622 		modify_virtqueue_end(ndev, mvq, state);
1623 	}
1624 
1625 done:
1626 	kvfree(cmd_mem);
1627 	kvfree(cmds);
1628 	return err;
1629 }
1630 
suspend_vqs(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs)1631 static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
1632 {
1633 	struct mlx5_vdpa_virtqueue *mvq;
1634 	struct mlx5_virtq_attr *attrs;
1635 	int vq_idx, i;
1636 	int err;
1637 
1638 	if (start_vq >= ndev->cur_num_vqs)
1639 		return -EINVAL;
1640 
1641 	mvq = &ndev->vqs[start_vq];
1642 	if (!mvq->initialized)
1643 		return 0;
1644 
1645 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1646 		return 0;
1647 
1648 	err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND);
1649 	if (err)
1650 		return err;
1651 
1652 	attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL);
1653 	if (!attrs)
1654 		return -ENOMEM;
1655 
1656 	err = query_virtqueues(ndev, start_vq, num_vqs, attrs);
1657 	if (err)
1658 		goto done;
1659 
1660 	for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) {
1661 		mvq = &ndev->vqs[vq_idx];
1662 		mvq->avail_idx = attrs[i].available_index;
1663 		mvq->used_idx = attrs[i].used_index;
1664 	}
1665 
1666 done:
1667 	kfree(attrs);
1668 	return err;
1669 }
1670 
suspend_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1671 static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1672 {
1673 	return suspend_vqs(ndev, mvq->index, 1);
1674 }
1675 
resume_vqs(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs)1676 static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
1677 {
1678 	struct mlx5_vdpa_virtqueue *mvq;
1679 	int err;
1680 
1681 	if (start_vq >= ndev->mvdev.max_vqs)
1682 		return -EINVAL;
1683 
1684 	mvq = &ndev->vqs[start_vq];
1685 	if (!mvq->initialized)
1686 		return 0;
1687 
1688 	if (mvq->index >= ndev->cur_num_vqs)
1689 		return 0;
1690 
1691 	switch (mvq->fw_state) {
1692 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1693 		/* Due to a FW quirk we need to modify the VQ fields first then change state.
1694 		 * This should be fixed soon. After that, a single command can be used.
1695 		 */
1696 		err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state);
1697 		if (err)
1698 			return err;
1699 		break;
1700 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1701 		if (!is_resumable(ndev)) {
1702 			mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index);
1703 			return -EINVAL;
1704 		}
1705 		break;
1706 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1707 		return 0;
1708 	default:
1709 		mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n",
1710 			       mvq->index, mvq->fw_state);
1711 		return -EINVAL;
1712 	}
1713 
1714 	return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1715 }
1716 
resume_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1717 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1718 {
1719 	return resume_vqs(ndev, mvq->index, 1);
1720 }
1721 
teardown_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1722 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1723 {
1724 	if (!mvq->initialized)
1725 		return;
1726 
1727 	suspend_vq(ndev, mvq);
1728 	mvq->modified_fields = 0;
1729 	destroy_virtqueue(ndev, mvq);
1730 	dealloc_vector(ndev, mvq);
1731 	counter_set_dealloc(ndev, mvq);
1732 	qp_destroy(ndev, &mvq->vqqp);
1733 	qp_destroy(ndev, &mvq->fwqp);
1734 	cq_destroy(ndev, mvq->index);
1735 	mvq->initialized = false;
1736 }
1737 
create_rqt(struct mlx5_vdpa_net * ndev)1738 static int create_rqt(struct mlx5_vdpa_net *ndev)
1739 {
1740 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1741 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1742 	__be32 *list;
1743 	void *rqtc;
1744 	int inlen;
1745 	void *in;
1746 	int i, j;
1747 	int err;
1748 
1749 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1750 	in = kzalloc(inlen, GFP_KERNEL);
1751 	if (!in)
1752 		return -ENOMEM;
1753 
1754 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1755 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1756 
1757 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1758 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1759 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1760 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1761 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1762 
1763 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1764 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1765 	kfree(in);
1766 	if (err)
1767 		return err;
1768 
1769 	return 0;
1770 }
1771 
1772 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1773 
modify_rqt(struct mlx5_vdpa_net * ndev,int num)1774 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1775 {
1776 	int act_sz = roundup_pow_of_two(num / 2);
1777 	__be32 *list;
1778 	void *rqtc;
1779 	int inlen;
1780 	void *in;
1781 	int i, j;
1782 	int err;
1783 
1784 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1785 	in = kzalloc(inlen, GFP_KERNEL);
1786 	if (!in)
1787 		return -ENOMEM;
1788 
1789 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1790 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1791 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1792 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1793 
1794 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1795 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1796 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1797 
1798 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1799 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1800 	kfree(in);
1801 	if (err)
1802 		return err;
1803 
1804 	return 0;
1805 }
1806 
destroy_rqt(struct mlx5_vdpa_net * ndev)1807 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1808 {
1809 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1810 }
1811 
create_tir(struct mlx5_vdpa_net * ndev)1812 static int create_tir(struct mlx5_vdpa_net *ndev)
1813 {
1814 #define HASH_IP_L4PORTS                                                                            \
1815 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1816 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1817 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1818 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1819 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1820 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1821 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1822 	void *rss_key;
1823 	void *outer;
1824 	void *tirc;
1825 	void *in;
1826 	int err;
1827 
1828 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1829 	if (!in)
1830 		return -ENOMEM;
1831 
1832 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1833 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1834 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1835 
1836 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1837 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1838 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1839 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1840 
1841 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1842 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1843 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1844 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1845 
1846 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1847 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1848 
1849 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1850 	kfree(in);
1851 	if (err)
1852 		return err;
1853 
1854 	mlx5_vdpa_add_tirn(ndev);
1855 	return err;
1856 }
1857 
destroy_tir(struct mlx5_vdpa_net * ndev)1858 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1859 {
1860 	mlx5_vdpa_remove_tirn(ndev);
1861 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1862 }
1863 
1864 #define MAX_STEERING_ENT 0x8000
1865 #define MAX_STEERING_GROUPS 2
1866 
1867 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1868        #define NUM_DESTS 2
1869 #else
1870        #define NUM_DESTS 1
1871 #endif
1872 
add_steering_counters(struct mlx5_vdpa_net * ndev,struct macvlan_node * node,struct mlx5_flow_act * flow_act,struct mlx5_flow_destination * dests)1873 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1874 				 struct macvlan_node *node,
1875 				 struct mlx5_flow_act *flow_act,
1876 				 struct mlx5_flow_destination *dests)
1877 {
1878 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1879 	int err;
1880 
1881 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1882 	if (IS_ERR(node->ucast_counter.counter))
1883 		return PTR_ERR(node->ucast_counter.counter);
1884 
1885 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1886 	if (IS_ERR(node->mcast_counter.counter)) {
1887 		err = PTR_ERR(node->mcast_counter.counter);
1888 		goto err_mcast_counter;
1889 	}
1890 
1891 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1892 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1893 	return 0;
1894 
1895 err_mcast_counter:
1896 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1897 	return err;
1898 #else
1899 	return 0;
1900 #endif
1901 }
1902 
remove_steering_counters(struct mlx5_vdpa_net * ndev,struct macvlan_node * node)1903 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1904 				     struct macvlan_node *node)
1905 {
1906 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1907 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1908 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1909 #endif
1910 }
1911 
mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net * ndev,u8 * mac,struct macvlan_node * node)1912 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1913 					struct macvlan_node *node)
1914 {
1915 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1916 	struct mlx5_flow_act flow_act = {};
1917 	struct mlx5_flow_spec *spec;
1918 	void *headers_c;
1919 	void *headers_v;
1920 	u8 *dmac_c;
1921 	u8 *dmac_v;
1922 	int err;
1923 	u16 vid;
1924 
1925 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1926 	if (!spec)
1927 		return -ENOMEM;
1928 
1929 	vid = key2vid(node->macvlan);
1930 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1931 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1932 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1933 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1934 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1935 	eth_broadcast_addr(dmac_c);
1936 	ether_addr_copy(dmac_v, mac);
1937 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1938 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1939 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1940 	}
1941 	if (node->tagged) {
1942 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1943 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1944 	}
1945 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1946 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1947 	dests[0].tir_num = ndev->res.tirn;
1948 	err = add_steering_counters(ndev, node, &flow_act, dests);
1949 	if (err)
1950 		goto out_free;
1951 
1952 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1953 	dests[1].counter = node->ucast_counter.counter;
1954 #endif
1955 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1956 	if (IS_ERR(node->ucast_rule)) {
1957 		err = PTR_ERR(node->ucast_rule);
1958 		goto err_ucast;
1959 	}
1960 
1961 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1962 	dests[1].counter = node->mcast_counter.counter;
1963 #endif
1964 
1965 	memset(dmac_c, 0, ETH_ALEN);
1966 	memset(dmac_v, 0, ETH_ALEN);
1967 	dmac_c[0] = 1;
1968 	dmac_v[0] = 1;
1969 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1970 	if (IS_ERR(node->mcast_rule)) {
1971 		err = PTR_ERR(node->mcast_rule);
1972 		goto err_mcast;
1973 	}
1974 	kvfree(spec);
1975 	mlx5_vdpa_add_rx_counters(ndev, node);
1976 	return 0;
1977 
1978 err_mcast:
1979 	mlx5_del_flow_rules(node->ucast_rule);
1980 err_ucast:
1981 	remove_steering_counters(ndev, node);
1982 out_free:
1983 	kvfree(spec);
1984 	return err;
1985 }
1986 
mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net * ndev,struct macvlan_node * node)1987 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1988 					 struct macvlan_node *node)
1989 {
1990 	mlx5_vdpa_remove_rx_counters(ndev, node);
1991 	mlx5_del_flow_rules(node->ucast_rule);
1992 	mlx5_del_flow_rules(node->mcast_rule);
1993 }
1994 
search_val(u8 * mac,u16 vlan,bool tagged)1995 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1996 {
1997 	u64 val;
1998 
1999 	if (!tagged)
2000 		vlan = MLX5V_UNTAGGED;
2001 
2002 	val = (u64)vlan << 48 |
2003 	      (u64)mac[0] << 40 |
2004 	      (u64)mac[1] << 32 |
2005 	      (u64)mac[2] << 24 |
2006 	      (u64)mac[3] << 16 |
2007 	      (u64)mac[4] << 8 |
2008 	      (u64)mac[5];
2009 
2010 	return val;
2011 }
2012 
mac_vlan_lookup(struct mlx5_vdpa_net * ndev,u64 value)2013 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
2014 {
2015 	struct macvlan_node *pos;
2016 	u32 idx;
2017 
2018 	idx = hash_64(value, 8); // tbd 8
2019 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
2020 		if (pos->macvlan == value)
2021 			return pos;
2022 	}
2023 	return NULL;
2024 }
2025 
mac_vlan_add(struct mlx5_vdpa_net * ndev,u8 * mac,u16 vid,bool tagged)2026 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
2027 {
2028 	struct macvlan_node *ptr;
2029 	u64 val;
2030 	u32 idx;
2031 	int err;
2032 
2033 	val = search_val(mac, vid, tagged);
2034 	if (mac_vlan_lookup(ndev, val))
2035 		return -EEXIST;
2036 
2037 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
2038 	if (!ptr)
2039 		return -ENOMEM;
2040 
2041 	ptr->tagged = tagged;
2042 	ptr->macvlan = val;
2043 	ptr->ndev = ndev;
2044 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
2045 	if (err)
2046 		goto err_add;
2047 
2048 	idx = hash_64(val, 8);
2049 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
2050 	return 0;
2051 
2052 err_add:
2053 	kfree(ptr);
2054 	return err;
2055 }
2056 
mac_vlan_del(struct mlx5_vdpa_net * ndev,u8 * mac,u16 vlan,bool tagged)2057 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
2058 {
2059 	struct macvlan_node *ptr;
2060 
2061 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
2062 	if (!ptr)
2063 		return;
2064 
2065 	hlist_del(&ptr->hlist);
2066 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
2067 	remove_steering_counters(ndev, ptr);
2068 	kfree(ptr);
2069 }
2070 
clear_mac_vlan_table(struct mlx5_vdpa_net * ndev)2071 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
2072 {
2073 	struct macvlan_node *pos;
2074 	struct hlist_node *n;
2075 	int i;
2076 
2077 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
2078 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
2079 			hlist_del(&pos->hlist);
2080 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
2081 			remove_steering_counters(ndev, pos);
2082 			kfree(pos);
2083 		}
2084 	}
2085 }
2086 
setup_steering(struct mlx5_vdpa_net * ndev)2087 static int setup_steering(struct mlx5_vdpa_net *ndev)
2088 {
2089 	struct mlx5_flow_table_attr ft_attr = {};
2090 	struct mlx5_flow_namespace *ns;
2091 	int err;
2092 
2093 	ft_attr.max_fte = MAX_STEERING_ENT;
2094 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
2095 
2096 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
2097 	if (!ns) {
2098 		mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n");
2099 		return -EOPNOTSUPP;
2100 	}
2101 
2102 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
2103 	if (IS_ERR(ndev->rxft)) {
2104 		mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n");
2105 		return PTR_ERR(ndev->rxft);
2106 	}
2107 	mlx5_vdpa_add_rx_flow_table(ndev);
2108 
2109 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
2110 	if (err)
2111 		goto err_add;
2112 
2113 	return 0;
2114 
2115 err_add:
2116 	mlx5_vdpa_remove_rx_flow_table(ndev);
2117 	mlx5_destroy_flow_table(ndev->rxft);
2118 	return err;
2119 }
2120 
teardown_steering(struct mlx5_vdpa_net * ndev)2121 static void teardown_steering(struct mlx5_vdpa_net *ndev)
2122 {
2123 	clear_mac_vlan_table(ndev);
2124 	mlx5_vdpa_remove_rx_flow_table(ndev);
2125 	mlx5_destroy_flow_table(ndev->rxft);
2126 }
2127 
handle_ctrl_mac(struct mlx5_vdpa_dev * mvdev,u8 cmd)2128 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2129 {
2130 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2131 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2132 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2133 	struct mlx5_core_dev *pfmdev;
2134 	size_t read;
2135 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
2136 
2137 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2138 	switch (cmd) {
2139 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
2140 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
2141 		if (read != ETH_ALEN)
2142 			break;
2143 
2144 		if (!memcmp(ndev->config.mac, mac, 6)) {
2145 			status = VIRTIO_NET_OK;
2146 			break;
2147 		}
2148 
2149 		if (is_zero_ether_addr(mac))
2150 			break;
2151 
2152 		if (!is_zero_ether_addr(ndev->config.mac)) {
2153 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2154 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
2155 					       ndev->config.mac);
2156 				break;
2157 			}
2158 		}
2159 
2160 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
2161 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
2162 				       mac);
2163 			break;
2164 		}
2165 
2166 		/* backup the original mac address so that if failed to add the forward rules
2167 		 * we could restore it
2168 		 */
2169 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
2170 
2171 		memcpy(ndev->config.mac, mac, ETH_ALEN);
2172 
2173 		/* Need recreate the flow table entry, so that the packet could forward back
2174 		 */
2175 		mac_vlan_del(ndev, mac_back, 0, false);
2176 
2177 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
2178 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
2179 
2180 			/* Although it hardly run here, we still need double check */
2181 			if (is_zero_ether_addr(mac_back)) {
2182 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
2183 				break;
2184 			}
2185 
2186 			/* Try to restore original mac address to MFPS table, and try to restore
2187 			 * the forward rule entry.
2188 			 */
2189 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2190 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
2191 					       ndev->config.mac);
2192 			}
2193 
2194 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
2195 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2196 					       mac_back);
2197 			}
2198 
2199 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
2200 
2201 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2202 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2203 
2204 			break;
2205 		}
2206 
2207 		status = VIRTIO_NET_OK;
2208 		break;
2209 
2210 	default:
2211 		break;
2212 	}
2213 
2214 	return status;
2215 }
2216 
change_num_qps(struct mlx5_vdpa_dev * mvdev,int newqps)2217 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2218 {
2219 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2220 	int cur_vqs = ndev->cur_num_vqs;
2221 	int new_vqs = newqps * 2;
2222 	int err;
2223 	int i;
2224 
2225 	if (cur_vqs > new_vqs) {
2226 		err = modify_rqt(ndev, new_vqs);
2227 		if (err)
2228 			return err;
2229 
2230 		if (is_resumable(ndev)) {
2231 			suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs);
2232 		} else {
2233 			for (i = new_vqs; i < cur_vqs; i++)
2234 				teardown_vq(ndev, &ndev->vqs[i]);
2235 		}
2236 
2237 		ndev->cur_num_vqs = new_vqs;
2238 	} else {
2239 		ndev->cur_num_vqs = new_vqs;
2240 
2241 		for (i = cur_vqs; i < new_vqs; i++) {
2242 			err = setup_vq(ndev, &ndev->vqs[i], false);
2243 			if (err)
2244 				goto clean_added;
2245 		}
2246 
2247 		err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs);
2248 		if (err)
2249 			goto clean_added;
2250 
2251 		err = modify_rqt(ndev, new_vqs);
2252 		if (err)
2253 			goto clean_added;
2254 	}
2255 	return 0;
2256 
2257 clean_added:
2258 	for (--i; i >= cur_vqs; --i)
2259 		teardown_vq(ndev, &ndev->vqs[i]);
2260 
2261 	ndev->cur_num_vqs = cur_vqs;
2262 
2263 	return err;
2264 }
2265 
handle_ctrl_mq(struct mlx5_vdpa_dev * mvdev,u8 cmd)2266 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2267 {
2268 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2269 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2270 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2271 	struct virtio_net_ctrl_mq mq;
2272 	size_t read;
2273 	u16 newqps;
2274 
2275 	switch (cmd) {
2276 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2277 		/* This mq feature check aligns with pre-existing userspace
2278 		 * implementation.
2279 		 *
2280 		 * Without it, an untrusted driver could fake a multiqueue config
2281 		 * request down to a non-mq device that may cause kernel to
2282 		 * panic due to uninitialized resources for extra vqs. Even with
2283 		 * a well behaving guest driver, it is not expected to allow
2284 		 * changing the number of vqs on a non-mq device.
2285 		 */
2286 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2287 			break;
2288 
2289 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2290 		if (read != sizeof(mq))
2291 			break;
2292 
2293 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2294 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2295 		    newqps > ndev->rqt_size)
2296 			break;
2297 
2298 		if (ndev->cur_num_vqs == 2 * newqps) {
2299 			status = VIRTIO_NET_OK;
2300 			break;
2301 		}
2302 
2303 		if (!change_num_qps(mvdev, newqps))
2304 			status = VIRTIO_NET_OK;
2305 
2306 		break;
2307 	default:
2308 		break;
2309 	}
2310 
2311 	return status;
2312 }
2313 
handle_ctrl_vlan(struct mlx5_vdpa_dev * mvdev,u8 cmd)2314 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2315 {
2316 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2317 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2318 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2319 	__virtio16 vlan;
2320 	size_t read;
2321 	u16 id;
2322 
2323 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2324 		return status;
2325 
2326 	switch (cmd) {
2327 	case VIRTIO_NET_CTRL_VLAN_ADD:
2328 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2329 		if (read != sizeof(vlan))
2330 			break;
2331 
2332 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2333 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2334 			break;
2335 
2336 		status = VIRTIO_NET_OK;
2337 		break;
2338 	case VIRTIO_NET_CTRL_VLAN_DEL:
2339 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2340 		if (read != sizeof(vlan))
2341 			break;
2342 
2343 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2344 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2345 		status = VIRTIO_NET_OK;
2346 		break;
2347 	default:
2348 		break;
2349 	}
2350 
2351 	return status;
2352 }
2353 
mlx5_cvq_kick_handler(struct work_struct * work)2354 static void mlx5_cvq_kick_handler(struct work_struct *work)
2355 {
2356 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2357 	struct virtio_net_ctrl_hdr ctrl;
2358 	struct mlx5_vdpa_wq_ent *wqent;
2359 	struct mlx5_vdpa_dev *mvdev;
2360 	struct mlx5_control_vq *cvq;
2361 	struct mlx5_vdpa_net *ndev;
2362 	size_t read, write;
2363 	int err;
2364 
2365 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2366 	mvdev = wqent->mvdev;
2367 	ndev = to_mlx5_vdpa_ndev(mvdev);
2368 	cvq = &mvdev->cvq;
2369 
2370 	down_write(&ndev->reslock);
2371 
2372 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2373 		goto out;
2374 
2375 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2376 		goto out;
2377 
2378 	if (!cvq->ready)
2379 		goto out;
2380 
2381 	while (true) {
2382 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2383 					   GFP_ATOMIC);
2384 		if (err <= 0)
2385 			break;
2386 
2387 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2388 		if (read != sizeof(ctrl))
2389 			break;
2390 
2391 		cvq->received_desc++;
2392 		switch (ctrl.class) {
2393 		case VIRTIO_NET_CTRL_MAC:
2394 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2395 			break;
2396 		case VIRTIO_NET_CTRL_MQ:
2397 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2398 			break;
2399 		case VIRTIO_NET_CTRL_VLAN:
2400 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2401 			break;
2402 		default:
2403 			break;
2404 		}
2405 
2406 		/* Make sure data is written before advancing index */
2407 		smp_wmb();
2408 
2409 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2410 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2411 		vringh_kiov_cleanup(&cvq->riov);
2412 		vringh_kiov_cleanup(&cvq->wiov);
2413 
2414 		if (vringh_need_notify_iotlb(&cvq->vring))
2415 			vringh_notify(&cvq->vring);
2416 
2417 		cvq->completed_desc++;
2418 		queue_work(mvdev->wq, &wqent->work);
2419 		break;
2420 	}
2421 
2422 out:
2423 	up_write(&ndev->reslock);
2424 }
2425 
mlx5_vdpa_kick_vq(struct vdpa_device * vdev,u16 idx)2426 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2427 {
2428 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2429 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2430 	struct mlx5_vdpa_virtqueue *mvq;
2431 
2432 	if (!is_index_valid(mvdev, idx))
2433 		return;
2434 
2435 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2436 		if (!mvdev->wq || !mvdev->cvq.ready)
2437 			return;
2438 
2439 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2440 		return;
2441 	}
2442 
2443 	mvq = &ndev->vqs[idx];
2444 	if (unlikely(!mvq->ready))
2445 		return;
2446 
2447 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2448 }
2449 
mlx5_vdpa_set_vq_address(struct vdpa_device * vdev,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)2450 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2451 				    u64 driver_area, u64 device_area)
2452 {
2453 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2454 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2455 	struct mlx5_vdpa_virtqueue *mvq;
2456 
2457 	if (!is_index_valid(mvdev, idx))
2458 		return -EINVAL;
2459 
2460 	if (is_ctrl_vq_idx(mvdev, idx)) {
2461 		mvdev->cvq.desc_addr = desc_area;
2462 		mvdev->cvq.device_addr = device_area;
2463 		mvdev->cvq.driver_addr = driver_area;
2464 		return 0;
2465 	}
2466 
2467 	mvq = &ndev->vqs[idx];
2468 	mvq->desc_addr = desc_area;
2469 	mvq->device_addr = device_area;
2470 	mvq->driver_addr = driver_area;
2471 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2472 	return 0;
2473 }
2474 
mlx5_vdpa_set_vq_num(struct vdpa_device * vdev,u16 idx,u32 num)2475 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2476 {
2477 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2478 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2479 	struct mlx5_vdpa_virtqueue *mvq;
2480 
2481 	if (!is_index_valid(mvdev, idx))
2482 		return;
2483 
2484         if (is_ctrl_vq_idx(mvdev, idx)) {
2485                 struct mlx5_control_vq *cvq = &mvdev->cvq;
2486 
2487                 cvq->vring.vring.num = num;
2488                 return;
2489         }
2490 
2491 	mvq = &ndev->vqs[idx];
2492 	ndev->needs_teardown |= num != mvq->num_ent;
2493 	mvq->num_ent = num;
2494 }
2495 
mlx5_vdpa_set_vq_cb(struct vdpa_device * vdev,u16 idx,struct vdpa_callback * cb)2496 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2497 {
2498 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2499 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2500 
2501 	ndev->event_cbs[idx] = *cb;
2502 	if (is_ctrl_vq_idx(mvdev, idx))
2503 		mvdev->cvq.event_cb = *cb;
2504 }
2505 
mlx5_cvq_notify(struct vringh * vring)2506 static void mlx5_cvq_notify(struct vringh *vring)
2507 {
2508 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2509 
2510 	if (!cvq->event_cb.callback)
2511 		return;
2512 
2513 	cvq->event_cb.callback(cvq->event_cb.private);
2514 }
2515 
set_cvq_ready(struct mlx5_vdpa_dev * mvdev,bool ready)2516 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2517 {
2518 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2519 
2520 	cvq->ready = ready;
2521 	if (!ready)
2522 		return;
2523 
2524 	cvq->vring.notify = mlx5_cvq_notify;
2525 }
2526 
mlx5_vdpa_set_vq_ready(struct vdpa_device * vdev,u16 idx,bool ready)2527 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2528 {
2529 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2530 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2531 	struct mlx5_vdpa_virtqueue *mvq;
2532 
2533 	if (!mvdev->actual_features)
2534 		return;
2535 
2536 	if (!is_index_valid(mvdev, idx))
2537 		return;
2538 
2539 	if (is_ctrl_vq_idx(mvdev, idx)) {
2540 		set_cvq_ready(mvdev, ready);
2541 		return;
2542 	}
2543 
2544 	mvq = &ndev->vqs[idx];
2545 	if (!ready) {
2546 		suspend_vq(ndev, mvq);
2547 	} else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
2548 		if (resume_vq(ndev, mvq))
2549 			ready = false;
2550 	}
2551 
2552 	mvq->ready = ready;
2553 }
2554 
mlx5_vdpa_get_vq_ready(struct vdpa_device * vdev,u16 idx)2555 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2556 {
2557 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2558 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2559 
2560 	if (!is_index_valid(mvdev, idx))
2561 		return false;
2562 
2563 	if (is_ctrl_vq_idx(mvdev, idx))
2564 		return mvdev->cvq.ready;
2565 
2566 	return ndev->vqs[idx].ready;
2567 }
2568 
mlx5_vdpa_set_vq_state(struct vdpa_device * vdev,u16 idx,const struct vdpa_vq_state * state)2569 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2570 				  const struct vdpa_vq_state *state)
2571 {
2572 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2573 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2574 	struct mlx5_vdpa_virtqueue *mvq;
2575 
2576 	if (!is_index_valid(mvdev, idx))
2577 		return -EINVAL;
2578 
2579 	if (is_ctrl_vq_idx(mvdev, idx)) {
2580 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2581 		return 0;
2582 	}
2583 
2584 	mvq = &ndev->vqs[idx];
2585 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2586 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2587 		return -EINVAL;
2588 	}
2589 
2590 	mvq->used_idx = state->split.avail_index;
2591 	mvq->avail_idx = state->split.avail_index;
2592 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2593 				MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2594 	return 0;
2595 }
2596 
mlx5_vdpa_get_vq_state(struct vdpa_device * vdev,u16 idx,struct vdpa_vq_state * state)2597 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2598 {
2599 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2600 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2601 	struct mlx5_vdpa_virtqueue *mvq;
2602 	struct mlx5_virtq_attr attr;
2603 	int err;
2604 
2605 	if (!is_index_valid(mvdev, idx))
2606 		return -EINVAL;
2607 
2608 	if (is_ctrl_vq_idx(mvdev, idx)) {
2609 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2610 		return 0;
2611 	}
2612 
2613 	mvq = &ndev->vqs[idx];
2614 	/* If the virtq object was destroyed, use the value saved at
2615 	 * the last minute of suspend_vq. This caters for userspace
2616 	 * that cares about emulating the index after vq is stopped.
2617 	 */
2618 	if (!mvq->initialized) {
2619 		/* Firmware returns a wrong value for the available index.
2620 		 * Since both values should be identical, we take the value of
2621 		 * used_idx which is reported correctly.
2622 		 */
2623 		state->split.avail_index = mvq->used_idx;
2624 		return 0;
2625 	}
2626 
2627 	err = query_virtqueues(ndev, mvq->index, 1, &attr);
2628 	if (err) {
2629 		mlx5_vdpa_err(mvdev, "failed to query virtqueue\n");
2630 		return err;
2631 	}
2632 	state->split.avail_index = attr.used_index;
2633 	return 0;
2634 }
2635 
mlx5_vdpa_get_vq_align(struct vdpa_device * vdev)2636 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2637 {
2638 	return PAGE_SIZE;
2639 }
2640 
mlx5_vdpa_get_vq_group(struct vdpa_device * vdev,u16 idx)2641 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2642 {
2643 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2644 
2645 	if (is_ctrl_vq_idx(mvdev, idx))
2646 		return MLX5_VDPA_CVQ_GROUP;
2647 
2648 	return MLX5_VDPA_DATAVQ_GROUP;
2649 }
2650 
mlx5_vdpa_get_vq_desc_group(struct vdpa_device * vdev,u16 idx)2651 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2652 {
2653 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2654 
2655 	if (is_ctrl_vq_idx(mvdev, idx))
2656 		return MLX5_VDPA_CVQ_GROUP;
2657 
2658 	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2659 }
2660 
mlx_to_vritio_features(u16 dev_features)2661 static u64 mlx_to_vritio_features(u16 dev_features)
2662 {
2663 	u64 result = 0;
2664 
2665 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2666 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2667 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2668 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2669 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2670 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2671 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2672 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2673 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2674 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2675 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2676 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2677 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2678 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2679 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2680 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2681 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2682 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2683 
2684 	return result;
2685 }
2686 
get_supported_features(struct mlx5_core_dev * mdev)2687 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2688 {
2689 	u64 mlx_vdpa_features = 0;
2690 	u16 dev_features;
2691 
2692 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2693 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2694 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2695 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2696 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2697 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2698 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2699 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2700 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2701 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2702 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2703 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2704 
2705 	return mlx_vdpa_features;
2706 }
2707 
mlx5_vdpa_get_device_features(struct vdpa_device * vdev)2708 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2709 {
2710 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2711 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2712 
2713 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2714 	return ndev->mvdev.mlx_features;
2715 }
2716 
verify_driver_features(struct mlx5_vdpa_dev * mvdev,u64 features)2717 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2718 {
2719 	/* Minimum features to expect */
2720 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2721 		return -EOPNOTSUPP;
2722 
2723 	/* Double check features combination sent down by the driver.
2724 	 * Fail invalid features due to absence of the depended feature.
2725 	 *
2726 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2727 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2728 	 * By failing the invalid features sent down by untrusted drivers,
2729 	 * we're assured the assumption made upon is_index_valid() and
2730 	 * is_ctrl_vq_idx() will not be compromised.
2731 	 */
2732 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2733             BIT_ULL(VIRTIO_NET_F_MQ))
2734 		return -EINVAL;
2735 
2736 	return 0;
2737 }
2738 
setup_virtqueues(struct mlx5_vdpa_dev * mvdev,bool filled)2739 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled)
2740 {
2741 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2742 	int err;
2743 	int i;
2744 
2745 	for (i = 0; i < mvdev->max_vqs; i++) {
2746 		err = setup_vq(ndev, &ndev->vqs[i], filled);
2747 		if (err)
2748 			goto err_vq;
2749 	}
2750 
2751 	return 0;
2752 
2753 err_vq:
2754 	for (--i; i >= 0; i--)
2755 		teardown_vq(ndev, &ndev->vqs[i]);
2756 
2757 	return err;
2758 }
2759 
teardown_virtqueues(struct mlx5_vdpa_net * ndev)2760 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2761 {
2762 	int i;
2763 
2764 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--)
2765 		teardown_vq(ndev, &ndev->vqs[i]);
2766 }
2767 
update_cvq_info(struct mlx5_vdpa_dev * mvdev)2768 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2769 {
2770 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2771 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2772 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2773 			mvdev->max_idx = mvdev->max_vqs;
2774 		} else {
2775 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2776 			 * CVQ gets index 2
2777 			 */
2778 			mvdev->max_idx = 2;
2779 		}
2780 	} else {
2781 		/* Two data virtqueues only: one for rx and one for tx */
2782 		mvdev->max_idx = 1;
2783 	}
2784 }
2785 
query_vport_state(struct mlx5_core_dev * mdev,u8 opmod,u16 vport)2786 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2787 {
2788 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2789 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2790 	int err;
2791 
2792 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2793 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2794 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2795 	if (vport)
2796 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2797 
2798 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2799 	if (err)
2800 		return 0;
2801 
2802 	return MLX5_GET(query_vport_state_out, out, state);
2803 }
2804 
get_link_state(struct mlx5_vdpa_dev * mvdev)2805 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2806 {
2807 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2808 	    VPORT_STATE_UP)
2809 		return true;
2810 
2811 	return false;
2812 }
2813 
update_carrier(struct work_struct * work)2814 static void update_carrier(struct work_struct *work)
2815 {
2816 	struct mlx5_vdpa_wq_ent *wqent;
2817 	struct mlx5_vdpa_dev *mvdev;
2818 	struct mlx5_vdpa_net *ndev;
2819 
2820 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2821 	mvdev = wqent->mvdev;
2822 	ndev = to_mlx5_vdpa_ndev(mvdev);
2823 	if (get_link_state(mvdev))
2824 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2825 	else
2826 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2827 
2828 	if (ndev->config_cb.callback)
2829 		ndev->config_cb.callback(ndev->config_cb.private);
2830 
2831 	kfree(wqent);
2832 }
2833 
queue_link_work(struct mlx5_vdpa_net * ndev)2834 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2835 {
2836 	struct mlx5_vdpa_wq_ent *wqent;
2837 
2838 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2839 	if (!wqent)
2840 		return -ENOMEM;
2841 
2842 	wqent->mvdev = &ndev->mvdev;
2843 	INIT_WORK(&wqent->work, update_carrier);
2844 	queue_work(ndev->mvdev.wq, &wqent->work);
2845 	return 0;
2846 }
2847 
event_handler(struct notifier_block * nb,unsigned long event,void * param)2848 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2849 {
2850 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2851 	struct mlx5_eqe *eqe = param;
2852 	int ret = NOTIFY_DONE;
2853 
2854 	if (ndev->mvdev.suspended)
2855 		return NOTIFY_DONE;
2856 
2857 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2858 		switch (eqe->sub_type) {
2859 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2860 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2861 			if (queue_link_work(ndev))
2862 				return NOTIFY_DONE;
2863 
2864 			ret = NOTIFY_OK;
2865 			break;
2866 		default:
2867 			return NOTIFY_DONE;
2868 		}
2869 		return ret;
2870 	}
2871 	return ret;
2872 }
2873 
register_link_notifier(struct mlx5_vdpa_net * ndev)2874 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2875 {
2876 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2877 		return;
2878 
2879 	ndev->nb.notifier_call = event_handler;
2880 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2881 	ndev->nb_registered = true;
2882 	queue_link_work(ndev);
2883 }
2884 
unregister_link_notifier(struct mlx5_vdpa_net * ndev)2885 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2886 {
2887 	if (!ndev->nb_registered)
2888 		return;
2889 
2890 	ndev->nb_registered = false;
2891 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2892 	if (ndev->mvdev.wq)
2893 		flush_workqueue(ndev->mvdev.wq);
2894 }
2895 
mlx5_vdpa_get_backend_features(const struct vdpa_device * vdpa)2896 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2897 {
2898 	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2899 }
2900 
mlx5_vdpa_set_driver_features(struct vdpa_device * vdev,u64 features)2901 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2902 {
2903 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2904 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2905 	u64 old_features = mvdev->actual_features;
2906 	u64 diff_features;
2907 	int err;
2908 
2909 	print_features(mvdev, features, true);
2910 
2911 	err = verify_driver_features(mvdev, features);
2912 	if (err)
2913 		return err;
2914 
2915 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2916 
2917 	/* Interested in changes of vq features only. */
2918 	if (get_features(old_features) != get_features(mvdev->actual_features)) {
2919 		for (int i = 0; i < mvdev->max_vqs; ++i) {
2920 			struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i];
2921 
2922 			mvq->modified_fields |= (
2923 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION |
2924 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES
2925 			);
2926 		}
2927 	}
2928 
2929 	/* When below features diverge from initial device features, VQs need a full teardown. */
2930 #define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \
2931 			     BIT_ULL(VIRTIO_NET_F_CSUM) | \
2932 			     BIT_ULL(VIRTIO_F_VERSION_1))
2933 
2934 	diff_features = mvdev->mlx_features ^ mvdev->actual_features;
2935 	ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK);
2936 
2937 	update_cvq_info(mvdev);
2938 	return err;
2939 }
2940 
mlx5_vdpa_set_config_cb(struct vdpa_device * vdev,struct vdpa_callback * cb)2941 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2942 {
2943 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2944 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2945 
2946 	ndev->config_cb = *cb;
2947 }
2948 
2949 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
mlx5_vdpa_get_vq_num_max(struct vdpa_device * vdev)2950 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2951 {
2952 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2953 }
2954 
mlx5_vdpa_get_device_id(struct vdpa_device * vdev)2955 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2956 {
2957 	return VIRTIO_ID_NET;
2958 }
2959 
mlx5_vdpa_get_vendor_id(struct vdpa_device * vdev)2960 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2961 {
2962 	return PCI_VENDOR_ID_MELLANOX;
2963 }
2964 
mlx5_vdpa_get_status(struct vdpa_device * vdev)2965 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2966 {
2967 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2968 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2969 
2970 	print_status(mvdev, ndev->mvdev.status, false);
2971 	return ndev->mvdev.status;
2972 }
2973 
save_channel_info(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)2974 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2975 {
2976 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2977 	struct mlx5_virtq_attr attr = {};
2978 	int err;
2979 
2980 	if (mvq->initialized) {
2981 		err = query_virtqueues(ndev, mvq->index, 1, &attr);
2982 		if (err)
2983 			return err;
2984 	}
2985 
2986 	ri->avail_index = attr.available_index;
2987 	ri->used_index = attr.used_index;
2988 	ri->ready = mvq->ready;
2989 	ri->num_ent = mvq->num_ent;
2990 	ri->desc_addr = mvq->desc_addr;
2991 	ri->device_addr = mvq->device_addr;
2992 	ri->driver_addr = mvq->driver_addr;
2993 	ri->map = mvq->map;
2994 	ri->restore = true;
2995 	return 0;
2996 }
2997 
save_channels_info(struct mlx5_vdpa_net * ndev)2998 static int save_channels_info(struct mlx5_vdpa_net *ndev)
2999 {
3000 	int i;
3001 
3002 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3003 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
3004 		save_channel_info(ndev, &ndev->vqs[i]);
3005 	}
3006 	return 0;
3007 }
3008 
mlx5_clear_vqs(struct mlx5_vdpa_net * ndev)3009 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
3010 {
3011 	int i;
3012 
3013 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
3014 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3015 }
3016 
restore_channels_info(struct mlx5_vdpa_net * ndev)3017 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
3018 {
3019 	struct mlx5_vdpa_virtqueue *mvq;
3020 	struct mlx5_vq_restore_info *ri;
3021 	int i;
3022 
3023 	mlx5_clear_vqs(ndev);
3024 	mvqs_set_defaults(ndev);
3025 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3026 		mvq = &ndev->vqs[i];
3027 		ri = &mvq->ri;
3028 		if (!ri->restore)
3029 			continue;
3030 
3031 		mvq->avail_idx = ri->avail_index;
3032 		mvq->used_idx = ri->used_index;
3033 		mvq->ready = ri->ready;
3034 		mvq->num_ent = ri->num_ent;
3035 		mvq->desc_addr = ri->desc_addr;
3036 		mvq->device_addr = ri->device_addr;
3037 		mvq->driver_addr = ri->driver_addr;
3038 		mvq->map = ri->map;
3039 	}
3040 }
3041 
mlx5_vdpa_change_map(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * new_mr,unsigned int asid)3042 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
3043 				struct mlx5_vdpa_mr *new_mr,
3044 				unsigned int asid)
3045 {
3046 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3047 	bool teardown = !is_resumable(ndev);
3048 	int err;
3049 
3050 	suspend_vqs(ndev, 0, ndev->cur_num_vqs);
3051 	if (teardown) {
3052 		err = save_channels_info(ndev);
3053 		if (err)
3054 			return err;
3055 
3056 		teardown_vq_resources(ndev);
3057 	}
3058 
3059 	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3060 
3061 	for (int i = 0; i < mvdev->max_vqs; i++)
3062 		ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
3063 						MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
3064 
3065 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
3066 		return 0;
3067 
3068 	if (teardown) {
3069 		restore_channels_info(ndev);
3070 		err = setup_vq_resources(ndev, true);
3071 		if (err)
3072 			return err;
3073 	}
3074 
3075 	resume_vqs(ndev, 0, ndev->cur_num_vqs);
3076 
3077 	return 0;
3078 }
3079 
3080 /* reslock must be held for this function */
setup_vq_resources(struct mlx5_vdpa_net * ndev,bool filled)3081 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled)
3082 {
3083 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
3084 	int err;
3085 
3086 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3087 
3088 	if (ndev->setup) {
3089 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
3090 		err = 0;
3091 		goto out;
3092 	}
3093 	mlx5_vdpa_add_debugfs(ndev);
3094 
3095 	err = read_umem_params(ndev);
3096 	if (err)
3097 		goto err_setup;
3098 
3099 	err = setup_virtqueues(mvdev, filled);
3100 	if (err) {
3101 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
3102 		goto err_setup;
3103 	}
3104 
3105 	err = create_rqt(ndev);
3106 	if (err) {
3107 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
3108 		goto err_rqt;
3109 	}
3110 
3111 	err = create_tir(ndev);
3112 	if (err) {
3113 		mlx5_vdpa_warn(mvdev, "create_tir\n");
3114 		goto err_tir;
3115 	}
3116 
3117 	err = setup_steering(ndev);
3118 	if (err) {
3119 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
3120 		goto err_fwd;
3121 	}
3122 	ndev->setup = true;
3123 
3124 	return 0;
3125 
3126 err_fwd:
3127 	destroy_tir(ndev);
3128 err_tir:
3129 	destroy_rqt(ndev);
3130 err_rqt:
3131 	teardown_virtqueues(ndev);
3132 err_setup:
3133 	mlx5_vdpa_remove_debugfs(ndev);
3134 out:
3135 	return err;
3136 }
3137 
3138 /* reslock must be held for this function */
teardown_vq_resources(struct mlx5_vdpa_net * ndev)3139 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev)
3140 {
3141 
3142 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3143 
3144 	if (!ndev->setup)
3145 		return;
3146 
3147 	mlx5_vdpa_remove_debugfs(ndev);
3148 	teardown_steering(ndev);
3149 	destroy_tir(ndev);
3150 	destroy_rqt(ndev);
3151 	teardown_virtqueues(ndev);
3152 	ndev->setup = false;
3153 	ndev->needs_teardown = false;
3154 }
3155 
setup_cvq_vring(struct mlx5_vdpa_dev * mvdev)3156 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
3157 {
3158 	struct mlx5_control_vq *cvq = &mvdev->cvq;
3159 	int err = 0;
3160 
3161 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
3162 		u16 idx = cvq->vring.last_avail_idx;
3163 
3164 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
3165 					cvq->vring.vring.num, false,
3166 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
3167 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
3168 					(struct vring_used *)(uintptr_t)cvq->device_addr);
3169 
3170 		if (!err)
3171 			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
3172 	}
3173 	return err;
3174 }
3175 
mlx5_vdpa_set_status(struct vdpa_device * vdev,u8 status)3176 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
3177 {
3178 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3179 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3180 	int err;
3181 
3182 	print_status(mvdev, status, true);
3183 
3184 	down_write(&ndev->reslock);
3185 
3186 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
3187 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
3188 			err = setup_cvq_vring(mvdev);
3189 			if (err) {
3190 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
3191 				goto err_setup;
3192 			}
3193 			register_link_notifier(ndev);
3194 
3195 			if (ndev->needs_teardown)
3196 				teardown_vq_resources(ndev);
3197 
3198 			if (ndev->setup) {
3199 				err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
3200 				if (err) {
3201 					mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
3202 					goto err_driver;
3203 				}
3204 			} else {
3205 				err = setup_vq_resources(ndev, true);
3206 				if (err) {
3207 					mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
3208 					goto err_driver;
3209 				}
3210 			}
3211 		} else {
3212 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3213 			goto err_clear;
3214 		}
3215 	}
3216 
3217 	ndev->mvdev.status = status;
3218 	up_write(&ndev->reslock);
3219 	return;
3220 
3221 err_driver:
3222 	unregister_link_notifier(ndev);
3223 err_setup:
3224 	mlx5_vdpa_clean_mrs(&ndev->mvdev);
3225 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3226 err_clear:
3227 	up_write(&ndev->reslock);
3228 }
3229 
init_group_to_asid_map(struct mlx5_vdpa_dev * mvdev)3230 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3231 {
3232 	int i;
3233 
3234 	/* default mapping all groups are mapped to asid 0 */
3235 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3236 		mvdev->mres.group2asid[i] = 0;
3237 }
3238 
needs_vqs_reset(const struct mlx5_vdpa_dev * mvdev)3239 static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev)
3240 {
3241 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3242 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0];
3243 
3244 	if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
3245 		return true;
3246 
3247 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT)
3248 		return true;
3249 
3250 	return mvq->modified_fields & (
3251 		MLX5_VIRTQ_MODIFY_MASK_STATE |
3252 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS |
3253 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
3254 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX
3255 	);
3256 }
3257 
mlx5_vdpa_compat_reset(struct vdpa_device * vdev,u32 flags)3258 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3259 {
3260 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3261 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3262 	bool vq_reset;
3263 
3264 	print_status(mvdev, 0, true);
3265 	mlx5_vdpa_info(mvdev, "performing device reset\n");
3266 
3267 	down_write(&ndev->reslock);
3268 	unregister_link_notifier(ndev);
3269 	vq_reset = needs_vqs_reset(mvdev);
3270 	if (vq_reset) {
3271 		teardown_vq_resources(ndev);
3272 		mvqs_set_defaults(ndev);
3273 	}
3274 
3275 	if (flags & VDPA_RESET_F_CLEAN_MAP)
3276 		mlx5_vdpa_clean_mrs(&ndev->mvdev);
3277 	ndev->mvdev.status = 0;
3278 	ndev->mvdev.suspended = false;
3279 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3280 	ndev->mvdev.cvq.ready = false;
3281 	ndev->mvdev.cvq.received_desc = 0;
3282 	ndev->mvdev.cvq.completed_desc = 0;
3283 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3284 	ndev->mvdev.actual_features = 0;
3285 	init_group_to_asid_map(mvdev);
3286 	++mvdev->generation;
3287 
3288 	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3289 	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3290 		if (mlx5_vdpa_create_dma_mr(mvdev))
3291 			mlx5_vdpa_err(mvdev, "create MR failed\n");
3292 	}
3293 	if (vq_reset)
3294 		setup_vq_resources(ndev, false);
3295 	up_write(&ndev->reslock);
3296 
3297 	return 0;
3298 }
3299 
mlx5_vdpa_reset(struct vdpa_device * vdev)3300 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3301 {
3302 	return mlx5_vdpa_compat_reset(vdev, 0);
3303 }
3304 
mlx5_vdpa_get_config_size(struct vdpa_device * vdev)3305 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3306 {
3307 	return sizeof(struct virtio_net_config);
3308 }
3309 
mlx5_vdpa_get_config(struct vdpa_device * vdev,unsigned int offset,void * buf,unsigned int len)3310 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3311 				 unsigned int len)
3312 {
3313 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3314 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3315 
3316 	if (offset + len <= sizeof(struct virtio_net_config))
3317 		memcpy(buf, (u8 *)&ndev->config + offset, len);
3318 }
3319 
mlx5_vdpa_set_config(struct vdpa_device * vdev,unsigned int offset,const void * buf,unsigned int len)3320 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3321 				 unsigned int len)
3322 {
3323 	/* not supported */
3324 }
3325 
mlx5_vdpa_get_generation(struct vdpa_device * vdev)3326 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3327 {
3328 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3329 
3330 	return mvdev->generation;
3331 }
3332 
set_map_data(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb,unsigned int asid)3333 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3334 			unsigned int asid)
3335 {
3336 	struct mlx5_vdpa_mr *new_mr;
3337 	int err;
3338 
3339 	if (asid >= MLX5_VDPA_NUM_AS)
3340 		return -EINVAL;
3341 
3342 	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3343 		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3344 		if (IS_ERR(new_mr)) {
3345 			err = PTR_ERR(new_mr);
3346 			mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err);
3347 			return err;
3348 		}
3349 	} else {
3350 		/* Empty iotlbs don't have an mr but will clear the previous mr. */
3351 		new_mr = NULL;
3352 	}
3353 
3354 	if (!mvdev->mres.mr[asid]) {
3355 		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3356 	} else {
3357 		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3358 		if (err) {
3359 			mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err);
3360 			goto out_err;
3361 		}
3362 	}
3363 
3364 	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3365 
3366 out_err:
3367 	mlx5_vdpa_put_mr(mvdev, new_mr);
3368 	return err;
3369 }
3370 
mlx5_vdpa_set_map(struct vdpa_device * vdev,unsigned int asid,struct vhost_iotlb * iotlb)3371 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3372 			     struct vhost_iotlb *iotlb)
3373 {
3374 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3375 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3376 	int err = -EINVAL;
3377 
3378 	down_write(&ndev->reslock);
3379 	err = set_map_data(mvdev, iotlb, asid);
3380 	up_write(&ndev->reslock);
3381 	return err;
3382 }
3383 
mlx5_vdpa_reset_map(struct vdpa_device * vdev,unsigned int asid)3384 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3385 {
3386 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3387 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3388 	int err;
3389 
3390 	down_write(&ndev->reslock);
3391 	err = mlx5_vdpa_reset_mr(mvdev, asid);
3392 	up_write(&ndev->reslock);
3393 	return err;
3394 }
3395 
mlx5_get_vq_map(struct vdpa_device * vdev,u16 idx)3396 static union virtio_map mlx5_get_vq_map(struct vdpa_device *vdev, u16 idx)
3397 {
3398 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3399 	union virtio_map map;
3400 
3401 	if (is_ctrl_vq_idx(mvdev, idx))
3402 		map.dma_dev = &vdev->dev;
3403 	else
3404 		map.dma_dev = mvdev->vdev.vmap.dma_dev;
3405 
3406 	return map;
3407 }
3408 
free_irqs(struct mlx5_vdpa_net * ndev)3409 static void free_irqs(struct mlx5_vdpa_net *ndev)
3410 {
3411 	struct mlx5_vdpa_irq_pool_entry *ent;
3412 	int i;
3413 
3414 	if (!msix_mode_supported(&ndev->mvdev))
3415 		return;
3416 
3417 	if (!ndev->irqp.entries)
3418 		return;
3419 
3420 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3421 		ent = ndev->irqp.entries + i;
3422 		if (ent->map.virq)
3423 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3424 	}
3425 	kfree(ndev->irqp.entries);
3426 }
3427 
mlx5_vdpa_free(struct vdpa_device * vdev)3428 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3429 {
3430 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3431 	struct mlx5_core_dev *pfmdev;
3432 	struct mlx5_vdpa_net *ndev;
3433 
3434 	ndev = to_mlx5_vdpa_ndev(mvdev);
3435 
3436 	/* Functions called here should be able to work with
3437 	 * uninitialized resources.
3438 	 */
3439 	free_fixed_resources(ndev);
3440 	mlx5_vdpa_clean_mrs(mvdev);
3441 	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3442 	if (!is_zero_ether_addr(ndev->config.mac)) {
3443 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3444 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3445 	}
3446 	mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx);
3447 	mlx5_vdpa_free_resources(&ndev->mvdev);
3448 	free_irqs(ndev);
3449 	kfree(ndev->event_cbs);
3450 	kfree(ndev->vqs);
3451 }
3452 
mlx5_get_vq_notification(struct vdpa_device * vdev,u16 idx)3453 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3454 {
3455 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3456 	struct vdpa_notification_area ret = {};
3457 	struct mlx5_vdpa_net *ndev;
3458 	phys_addr_t addr;
3459 
3460 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3461 		return ret;
3462 
3463 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3464 	 * notification to avoid the risk of mapping pages that contain BAR of more
3465 	 * than one SF
3466 	 */
3467 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3468 		return ret;
3469 
3470 	ndev = to_mlx5_vdpa_ndev(mvdev);
3471 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3472 	ret.addr = addr;
3473 	ret.size = PAGE_SIZE;
3474 	return ret;
3475 }
3476 
mlx5_get_vq_irq(struct vdpa_device * vdev,u16 idx)3477 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3478 {
3479 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3480 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3481 	struct mlx5_vdpa_virtqueue *mvq;
3482 
3483 	if (!is_index_valid(mvdev, idx))
3484 		return -EINVAL;
3485 
3486 	if (is_ctrl_vq_idx(mvdev, idx))
3487 		return -EOPNOTSUPP;
3488 
3489 	mvq = &ndev->vqs[idx];
3490 	if (!mvq->map.virq)
3491 		return -EOPNOTSUPP;
3492 
3493 	return mvq->map.virq;
3494 }
3495 
mlx5_vdpa_get_driver_features(struct vdpa_device * vdev)3496 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3497 {
3498 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3499 
3500 	return mvdev->actual_features;
3501 }
3502 
counter_set_query(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,u64 * received_desc,u64 * completed_desc)3503 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3504 			     u64 *received_desc, u64 *completed_desc)
3505 {
3506 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3507 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3508 	void *cmd_hdr;
3509 	void *ctx;
3510 	int err;
3511 
3512 	if (!counters_supported(&ndev->mvdev))
3513 		return -EOPNOTSUPP;
3514 
3515 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3516 		return -EAGAIN;
3517 
3518 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3519 
3520 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3521 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3522 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3523 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3524 
3525 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3526 	if (err)
3527 		return err;
3528 
3529 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3530 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3531 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3532 	return 0;
3533 }
3534 
mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device * vdev,u16 idx,struct sk_buff * msg,struct netlink_ext_ack * extack)3535 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3536 					 struct sk_buff *msg,
3537 					 struct netlink_ext_ack *extack)
3538 {
3539 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3540 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3541 	struct mlx5_vdpa_virtqueue *mvq;
3542 	struct mlx5_control_vq *cvq;
3543 	u64 received_desc;
3544 	u64 completed_desc;
3545 	int err = 0;
3546 
3547 	down_read(&ndev->reslock);
3548 	if (!is_index_valid(mvdev, idx)) {
3549 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3550 		err = -EINVAL;
3551 		goto out_err;
3552 	}
3553 
3554 	if (idx == ctrl_vq_idx(mvdev)) {
3555 		cvq = &mvdev->cvq;
3556 		received_desc = cvq->received_desc;
3557 		completed_desc = cvq->completed_desc;
3558 		goto out;
3559 	}
3560 
3561 	mvq = &ndev->vqs[idx];
3562 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3563 	if (err) {
3564 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3565 		goto out_err;
3566 	}
3567 
3568 out:
3569 	err = -EMSGSIZE;
3570 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3571 		goto out_err;
3572 
3573 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3574 			      VDPA_ATTR_PAD))
3575 		goto out_err;
3576 
3577 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3578 		goto out_err;
3579 
3580 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3581 			      VDPA_ATTR_PAD))
3582 		goto out_err;
3583 
3584 	err = 0;
3585 out_err:
3586 	up_read(&ndev->reslock);
3587 	return err;
3588 }
3589 
mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev * mvdev)3590 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3591 {
3592 	struct mlx5_control_vq *cvq;
3593 
3594 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3595 		return;
3596 
3597 	cvq = &mvdev->cvq;
3598 	cvq->ready = false;
3599 }
3600 
mlx5_vdpa_suspend(struct vdpa_device * vdev)3601 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3602 {
3603 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3604 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3605 	int err;
3606 
3607 	mlx5_vdpa_info(mvdev, "suspending device\n");
3608 
3609 	down_write(&ndev->reslock);
3610 	err = suspend_vqs(ndev, 0, ndev->cur_num_vqs);
3611 	mlx5_vdpa_cvq_suspend(mvdev);
3612 	mvdev->suspended = true;
3613 	up_write(&ndev->reslock);
3614 
3615 	return err;
3616 }
3617 
mlx5_vdpa_resume(struct vdpa_device * vdev)3618 static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3619 {
3620 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3621 	struct mlx5_vdpa_net *ndev;
3622 	int err;
3623 
3624 	ndev = to_mlx5_vdpa_ndev(mvdev);
3625 
3626 	mlx5_vdpa_info(mvdev, "resuming device\n");
3627 
3628 	down_write(&ndev->reslock);
3629 	mvdev->suspended = false;
3630 	err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
3631 	queue_link_work(ndev);
3632 	up_write(&ndev->reslock);
3633 
3634 	return err;
3635 }
3636 
mlx5_set_group_asid(struct vdpa_device * vdev,u32 group,unsigned int asid)3637 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3638 			       unsigned int asid)
3639 {
3640 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3641 	int err = 0;
3642 
3643 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3644 		return -EINVAL;
3645 
3646 	mvdev->mres.group2asid[group] = asid;
3647 
3648 	mutex_lock(&mvdev->mres.lock);
3649 	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid])
3650 		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid);
3651 	mutex_unlock(&mvdev->mres.lock);
3652 
3653 	return err;
3654 }
3655 
3656 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3657 	.set_vq_address = mlx5_vdpa_set_vq_address,
3658 	.set_vq_num = mlx5_vdpa_set_vq_num,
3659 	.kick_vq = mlx5_vdpa_kick_vq,
3660 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3661 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3662 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3663 	.set_vq_state = mlx5_vdpa_set_vq_state,
3664 	.get_vq_state = mlx5_vdpa_get_vq_state,
3665 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3666 	.get_vq_notification = mlx5_get_vq_notification,
3667 	.get_vq_irq = mlx5_get_vq_irq,
3668 	.get_vq_align = mlx5_vdpa_get_vq_align,
3669 	.get_vq_group = mlx5_vdpa_get_vq_group,
3670 	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3671 	.get_device_features = mlx5_vdpa_get_device_features,
3672 	.get_backend_features = mlx5_vdpa_get_backend_features,
3673 	.set_driver_features = mlx5_vdpa_set_driver_features,
3674 	.get_driver_features = mlx5_vdpa_get_driver_features,
3675 	.set_config_cb = mlx5_vdpa_set_config_cb,
3676 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3677 	.get_device_id = mlx5_vdpa_get_device_id,
3678 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3679 	.get_status = mlx5_vdpa_get_status,
3680 	.set_status = mlx5_vdpa_set_status,
3681 	.reset = mlx5_vdpa_reset,
3682 	.compat_reset = mlx5_vdpa_compat_reset,
3683 	.get_config_size = mlx5_vdpa_get_config_size,
3684 	.get_config = mlx5_vdpa_get_config,
3685 	.set_config = mlx5_vdpa_set_config,
3686 	.get_generation = mlx5_vdpa_get_generation,
3687 	.set_map = mlx5_vdpa_set_map,
3688 	.reset_map = mlx5_vdpa_reset_map,
3689 	.set_group_asid = mlx5_set_group_asid,
3690 	.get_vq_map = mlx5_get_vq_map,
3691 	.free = mlx5_vdpa_free,
3692 	.suspend = mlx5_vdpa_suspend,
3693 	.resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3694 };
3695 
query_mtu(struct mlx5_core_dev * mdev,u16 * mtu)3696 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3697 {
3698 	u16 hw_mtu;
3699 	int err;
3700 
3701 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3702 	if (err)
3703 		return err;
3704 
3705 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3706 	return 0;
3707 }
3708 
alloc_fixed_resources(struct mlx5_vdpa_net * ndev)3709 static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev)
3710 {
3711 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3712 	int err;
3713 
3714 	if (res->valid) {
3715 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3716 		return -EEXIST;
3717 	}
3718 
3719 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3720 	if (err)
3721 		return err;
3722 
3723 	err = create_tis(ndev);
3724 	if (err)
3725 		goto err_tis;
3726 
3727 	res->valid = true;
3728 
3729 	return 0;
3730 
3731 err_tis:
3732 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3733 	return err;
3734 }
3735 
free_fixed_resources(struct mlx5_vdpa_net * ndev)3736 static void free_fixed_resources(struct mlx5_vdpa_net *ndev)
3737 {
3738 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3739 
3740 	if (!res->valid)
3741 		return;
3742 
3743 	destroy_tis(ndev);
3744 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3745 	res->valid = false;
3746 }
3747 
mvqs_set_defaults(struct mlx5_vdpa_net * ndev)3748 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev)
3749 {
3750 	struct mlx5_vdpa_virtqueue *mvq;
3751 	int i;
3752 
3753 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3754 		mvq = &ndev->vqs[i];
3755 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3756 		mvq->index = i;
3757 		mvq->ndev = ndev;
3758 		mvq->fwqp.fw = true;
3759 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3760 		mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE;
3761 	}
3762 }
3763 
3764 struct mlx5_vdpa_mgmtdev {
3765 	struct vdpa_mgmt_dev mgtdev;
3766 	struct mlx5_adev *madev;
3767 	struct mlx5_vdpa_net *ndev;
3768 	struct vdpa_config_ops vdpa_ops;
3769 };
3770 
config_func_mtu(struct mlx5_core_dev * mdev,u16 mtu)3771 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3772 {
3773 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3774 	void *in;
3775 	int err;
3776 
3777 	in = kvzalloc(inlen, GFP_KERNEL);
3778 	if (!in)
3779 		return -ENOMEM;
3780 
3781 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3782 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3783 		 mtu + MLX5V_ETH_HARD_MTU);
3784 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3785 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3786 
3787 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3788 
3789 	kvfree(in);
3790 	return err;
3791 }
3792 
allocate_irqs(struct mlx5_vdpa_net * ndev)3793 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3794 {
3795 	struct mlx5_vdpa_irq_pool_entry *ent;
3796 	int i;
3797 
3798 	if (!msix_mode_supported(&ndev->mvdev))
3799 		return;
3800 
3801 	if (!ndev->mvdev.mdev->pdev)
3802 		return;
3803 
3804 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3805 	if (!ndev->irqp.entries)
3806 		return;
3807 
3808 
3809 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3810 		ent = ndev->irqp.entries + i;
3811 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3812 			 dev_name(&ndev->mvdev.vdev.dev), i);
3813 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3814 		if (!ent->map.virq)
3815 			return;
3816 
3817 		ndev->irqp.num_ent++;
3818 	}
3819 }
3820 
mlx5_vdpa_dev_add(struct vdpa_mgmt_dev * v_mdev,const char * name,const struct vdpa_dev_set_config * add_config)3821 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3822 			     const struct vdpa_dev_set_config *add_config)
3823 {
3824 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3825 	struct virtio_net_config *config;
3826 	struct mlx5_core_dev *pfmdev;
3827 	struct mlx5_vdpa_dev *mvdev;
3828 	struct mlx5_vdpa_net *ndev;
3829 	struct mlx5_core_dev *mdev;
3830 	u64 device_features;
3831 	u32 max_vqs;
3832 	u16 mtu;
3833 	int err;
3834 
3835 	if (mgtdev->ndev)
3836 		return -ENOSPC;
3837 
3838 	mdev = mgtdev->madev->mdev;
3839 	device_features = mgtdev->mgtdev.supported_features;
3840 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3841 		if (add_config->device_features & ~device_features) {
3842 			dev_warn(mdev->device,
3843 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3844 				 add_config->device_features, device_features);
3845 			return -EINVAL;
3846 		}
3847 		device_features &= add_config->device_features;
3848 	} else {
3849 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3850 	}
3851 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3852 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3853 		dev_warn(mdev->device,
3854 			 "Must provision minimum features 0x%llx for this device",
3855 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3856 		return -EOPNOTSUPP;
3857 	}
3858 
3859 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3860 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3861 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3862 		return -EOPNOTSUPP;
3863 	}
3864 
3865 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3866 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3867 	if (max_vqs < 2) {
3868 		dev_warn(mdev->device,
3869 			 "%d virtqueues are supported. At least 2 are required\n",
3870 			 max_vqs);
3871 		return -EAGAIN;
3872 	}
3873 
3874 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3875 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3876 			return -EINVAL;
3877 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3878 	} else {
3879 		max_vqs = 2;
3880 	}
3881 
3882 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3883 				 NULL, MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3884 	if (IS_ERR(ndev))
3885 		return PTR_ERR(ndev);
3886 
3887 	ndev->mvdev.max_vqs = max_vqs;
3888 	mvdev = &ndev->mvdev;
3889 	mvdev->mdev = mdev;
3890 	/* cpu_to_mlx5vdpa16() below depends on this flag */
3891 	mvdev->actual_features =
3892 			(device_features & BIT_ULL(VIRTIO_F_VERSION_1));
3893 
3894 	mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx);
3895 
3896 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3897 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3898 	if (!ndev->vqs || !ndev->event_cbs) {
3899 		err = -ENOMEM;
3900 		goto err_alloc;
3901 	}
3902 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3903 
3904 	mvqs_set_defaults(ndev);
3905 	allocate_irqs(ndev);
3906 	init_rwsem(&ndev->reslock);
3907 	config = &ndev->config;
3908 
3909 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3910 		err = config_func_mtu(mdev, add_config->net.mtu);
3911 		if (err)
3912 			goto err_alloc;
3913 	}
3914 
3915 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3916 		err = query_mtu(mdev, &mtu);
3917 		if (err)
3918 			goto err_alloc;
3919 
3920 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3921 	}
3922 
3923 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3924 		if (get_link_state(mvdev))
3925 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3926 		else
3927 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3928 	}
3929 
3930 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3931 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3932 	/* No bother setting mac address in config if not going to provision _F_MAC */
3933 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3934 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3935 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3936 		if (err)
3937 			goto err_alloc;
3938 	}
3939 
3940 	if (!is_zero_ether_addr(config->mac)) {
3941 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3942 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3943 		if (err)
3944 			goto err_alloc;
3945 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3946 		/*
3947 		 * We used to clear _F_MAC feature bit if seeing
3948 		 * zero mac address when device features are not
3949 		 * specifically provisioned. Keep the behaviour
3950 		 * so old scripts do not break.
3951 		 */
3952 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3953 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3954 		/* Don't provision zero mac address for _F_MAC */
3955 		mlx5_vdpa_warn(&ndev->mvdev,
3956 			       "No mac address provisioned?\n");
3957 		err = -EINVAL;
3958 		goto err_alloc;
3959 	}
3960 
3961 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) {
3962 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3963 		ndev->rqt_size = max_vqs / 2;
3964 	} else {
3965 		ndev->rqt_size = 1;
3966 	}
3967 
3968 	ndev->mvdev.mlx_features = device_features;
3969 	mvdev->vdev.vmap.dma_dev = &mdev->pdev->dev;
3970 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3971 	if (err)
3972 		goto err_alloc;
3973 
3974 	err = mlx5_vdpa_init_mr_resources(mvdev);
3975 	if (err)
3976 		goto err_alloc;
3977 
3978 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3979 		err = mlx5_vdpa_create_dma_mr(mvdev);
3980 		if (err)
3981 			goto err_alloc;
3982 	}
3983 
3984 	err = alloc_fixed_resources(ndev);
3985 	if (err)
3986 		goto err_alloc;
3987 
3988 	ndev->cvq_ent.mvdev = mvdev;
3989 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3990 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3991 	if (!mvdev->wq) {
3992 		err = -ENOMEM;
3993 		goto err_alloc;
3994 	}
3995 
3996 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3997 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3998 	if (err)
3999 		goto err_reg;
4000 
4001 	mgtdev->ndev = ndev;
4002 
4003 	/* For virtio-vdpa, the device was set up during device register. */
4004 	if (ndev->setup)
4005 		return 0;
4006 
4007 	down_write(&ndev->reslock);
4008 	err = setup_vq_resources(ndev, false);
4009 	up_write(&ndev->reslock);
4010 	if (err)
4011 		goto err_setup_vq_res;
4012 
4013 	return 0;
4014 
4015 err_setup_vq_res:
4016 	_vdpa_unregister_device(&mvdev->vdev);
4017 err_reg:
4018 	destroy_workqueue(mvdev->wq);
4019 err_alloc:
4020 	put_device(&mvdev->vdev.dev);
4021 	return err;
4022 }
4023 
mlx5_vdpa_dev_del(struct vdpa_mgmt_dev * v_mdev,struct vdpa_device * dev)4024 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
4025 {
4026 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
4027 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
4028 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
4029 	struct workqueue_struct *wq;
4030 
4031 	unregister_link_notifier(ndev);
4032 	_vdpa_unregister_device(dev);
4033 
4034 	down_write(&ndev->reslock);
4035 	teardown_vq_resources(ndev);
4036 	up_write(&ndev->reslock);
4037 
4038 	wq = mvdev->wq;
4039 	mvdev->wq = NULL;
4040 	destroy_workqueue(wq);
4041 	mgtdev->ndev = NULL;
4042 }
4043 
mlx5_vdpa_set_attr(struct vdpa_mgmt_dev * v_mdev,struct vdpa_device * dev,const struct vdpa_dev_set_config * add_config)4044 static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev,
4045 			      const struct vdpa_dev_set_config *add_config)
4046 {
4047 	struct virtio_net_config *config;
4048 	struct mlx5_core_dev *pfmdev;
4049 	struct mlx5_vdpa_dev *mvdev;
4050 	struct mlx5_vdpa_net *ndev;
4051 	struct mlx5_core_dev *mdev;
4052 	int err = -EOPNOTSUPP;
4053 
4054 	mvdev = to_mvdev(dev);
4055 	ndev = to_mlx5_vdpa_ndev(mvdev);
4056 	mdev = mvdev->mdev;
4057 	config = &ndev->config;
4058 
4059 	down_write(&ndev->reslock);
4060 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
4061 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
4062 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
4063 		if (!err)
4064 			ether_addr_copy(config->mac, add_config->net.mac);
4065 	}
4066 
4067 	up_write(&ndev->reslock);
4068 	return err;
4069 }
4070 
4071 static const struct vdpa_mgmtdev_ops mdev_ops = {
4072 	.dev_add = mlx5_vdpa_dev_add,
4073 	.dev_del = mlx5_vdpa_dev_del,
4074 	.dev_set_attr = mlx5_vdpa_set_attr,
4075 };
4076 
4077 static struct virtio_device_id id_table[] = {
4078 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
4079 	{ 0 },
4080 };
4081 
mlx5v_probe(struct auxiliary_device * adev,const struct auxiliary_device_id * id)4082 static int mlx5v_probe(struct auxiliary_device *adev,
4083 		       const struct auxiliary_device_id *id)
4084 
4085 {
4086 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
4087 	struct mlx5_core_dev *mdev = madev->mdev;
4088 	struct mlx5_vdpa_mgmtdev *mgtdev;
4089 	int err;
4090 
4091 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
4092 	if (!mgtdev)
4093 		return -ENOMEM;
4094 
4095 	mgtdev->mgtdev.ops = &mdev_ops;
4096 	mgtdev->mgtdev.device = mdev->device;
4097 	mgtdev->mgtdev.id_table = id_table;
4098 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
4099 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
4100 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
4101 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
4102 	mgtdev->mgtdev.max_supported_vqs =
4103 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
4104 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
4105 	mgtdev->madev = madev;
4106 	mgtdev->vdpa_ops = mlx5_vdpa_ops;
4107 
4108 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
4109 		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
4110 
4111 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
4112 		mgtdev->vdpa_ops.resume = NULL;
4113 
4114 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
4115 	if (err)
4116 		goto reg_err;
4117 
4118 	auxiliary_set_drvdata(adev, mgtdev);
4119 
4120 	return 0;
4121 
4122 reg_err:
4123 	kfree(mgtdev);
4124 	return err;
4125 }
4126 
mlx5v_remove(struct auxiliary_device * adev)4127 static void mlx5v_remove(struct auxiliary_device *adev)
4128 {
4129 	struct mlx5_vdpa_mgmtdev *mgtdev;
4130 
4131 	mgtdev = auxiliary_get_drvdata(adev);
4132 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
4133 	kfree(mgtdev);
4134 }
4135 
4136 static const struct auxiliary_device_id mlx5v_id_table[] = {
4137 	{ .name = MLX5_ADEV_NAME ".vnet", },
4138 	{},
4139 };
4140 
4141 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
4142 
4143 static struct auxiliary_driver mlx5v_driver = {
4144 	.name = "vnet",
4145 	.probe = mlx5v_probe,
4146 	.remove = mlx5v_remove,
4147 	.id_table = mlx5v_id_table,
4148 };
4149 
4150 module_auxiliary_driver(mlx5v_driver);
4151