xref: /linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 189f164e573e18d9f8876dbd3ad8fcbe11f93037)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
23 
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 
49 #define MLX5V_UNTAGGED 0x1000
50 
51 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
52  * 5.1.6.5.5 "Device operation in multiqueue mode":
53  *
54  * Multiqueue is disabled by default.
55  * The driver enables multiqueue by sending a command using class
56  * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
57  * operation, as follows: ...
58  */
59 #define MLX5V_DEFAULT_VQ_COUNT 2
60 
61 #define MLX5V_DEFAULT_VQ_SIZE 256
62 
63 struct mlx5_vdpa_cq_buf {
64 	struct mlx5_frag_buf_ctrl fbc;
65 	struct mlx5_frag_buf frag_buf;
66 	int cqe_size;
67 	int nent;
68 };
69 
70 struct mlx5_vdpa_cq {
71 	struct mlx5_core_cq mcq;
72 	struct mlx5_vdpa_cq_buf buf;
73 	struct mlx5_db db;
74 	int cqe;
75 };
76 
77 struct mlx5_vdpa_umem {
78 	struct mlx5_frag_buf_ctrl fbc;
79 	struct mlx5_frag_buf frag_buf;
80 	int size;
81 	u32 id;
82 };
83 
84 struct mlx5_vdpa_qp {
85 	struct mlx5_core_qp mqp;
86 	struct mlx5_frag_buf frag_buf;
87 	struct mlx5_db db;
88 	u16 head;
89 	bool fw;
90 };
91 
92 struct mlx5_vq_restore_info {
93 	u32 num_ent;
94 	u64 desc_addr;
95 	u64 device_addr;
96 	u64 driver_addr;
97 	u16 avail_index;
98 	u16 used_index;
99 	struct msi_map map;
100 	bool ready;
101 	bool restore;
102 };
103 
104 struct mlx5_vdpa_virtqueue {
105 	bool ready;
106 	u64 desc_addr;
107 	u64 device_addr;
108 	u64 driver_addr;
109 	u32 num_ent;
110 
111 	/* Resources for implementing the notification channel from the device
112 	 * to the driver. fwqp is the firmware end of an RC connection; the
113 	 * other end is vqqp used by the driver. cq is where completions are
114 	 * reported.
115 	 */
116 	struct mlx5_vdpa_cq cq;
117 	struct mlx5_vdpa_qp fwqp;
118 	struct mlx5_vdpa_qp vqqp;
119 
120 	/* umem resources are required for the virtqueue operation. They're use
121 	 * is internal and they must be provided by the driver.
122 	 */
123 	struct mlx5_vdpa_umem umem1;
124 	struct mlx5_vdpa_umem umem2;
125 	struct mlx5_vdpa_umem umem3;
126 
127 	u32 counter_set_id;
128 	bool initialized;
129 	int index;
130 	u32 virtq_id;
131 	struct mlx5_vdpa_net *ndev;
132 	u16 avail_idx;
133 	u16 used_idx;
134 	int fw_state;
135 
136 	u64 modified_fields;
137 
138 	struct mlx5_vdpa_mr *vq_mr;
139 	struct mlx5_vdpa_mr *desc_mr;
140 
141 	struct msi_map map;
142 
143 	/* keep last in the struct */
144 	struct mlx5_vq_restore_info ri;
145 };
146 
is_index_valid(struct mlx5_vdpa_dev * mvdev,u16 idx)147 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
148 {
149 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
150 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
151 			return idx < 2;
152 		else
153 			return idx < 3;
154 	}
155 
156 	return idx <= mvdev->max_idx;
157 }
158 
159 static void free_fixed_resources(struct mlx5_vdpa_net *ndev);
160 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev);
161 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled);
162 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev);
163 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq);
164 
165 static bool mlx5_vdpa_debug;
166 
167 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
168 	do {                                                                                       \
169 		if (features & BIT_ULL(_feature))                                                  \
170 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
171 	} while (0)
172 
173 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
174 	do {                                                                                       \
175 		if (status & (_status))                                                            \
176 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
177 	} while (0)
178 
179 /* TODO: cross-endian support */
mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev * mvdev)180 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
181 {
182 	return virtio_legacy_is_little_endian() ||
183 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
184 }
185 
mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev * mvdev,__virtio16 val)186 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
187 {
188 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
189 }
190 
cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev * mvdev,u16 val)191 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
192 {
193 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
194 }
195 
ctrl_vq_idx(struct mlx5_vdpa_dev * mvdev)196 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
197 {
198 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
199 		return 2;
200 
201 	return mvdev->max_vqs;
202 }
203 
is_ctrl_vq_idx(struct mlx5_vdpa_dev * mvdev,u16 idx)204 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
205 {
206 	return idx == ctrl_vq_idx(mvdev);
207 }
208 
print_status(struct mlx5_vdpa_dev * mvdev,u8 status,bool set)209 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
210 {
211 	if (status & ~VALID_STATUS_MASK)
212 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
213 			       status & ~VALID_STATUS_MASK);
214 
215 	if (!mlx5_vdpa_debug)
216 		return;
217 
218 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
219 	if (set && !status) {
220 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
221 		return;
222 	}
223 
224 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
225 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
226 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
227 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
228 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
229 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
230 }
231 
print_features(struct mlx5_vdpa_dev * mvdev,u64 features,bool set)232 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
233 {
234 	if (features & ~VALID_FEATURES_MASK)
235 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
236 			       features & ~VALID_FEATURES_MASK);
237 
238 	if (!mlx5_vdpa_debug)
239 		return;
240 
241 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
242 	if (!features)
243 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
244 
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
262 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
263 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
264 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
265 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
266 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
267 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
279 }
280 
create_tis(struct mlx5_vdpa_net * ndev)281 static int create_tis(struct mlx5_vdpa_net *ndev)
282 {
283 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
284 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
285 	void *tisc;
286 	int err;
287 
288 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
289 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
290 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
291 	if (err)
292 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
293 
294 	return err;
295 }
296 
destroy_tis(struct mlx5_vdpa_net * ndev)297 static void destroy_tis(struct mlx5_vdpa_net *ndev)
298 {
299 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
300 }
301 
302 #define MLX5_VDPA_CQE_SIZE 64
303 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
304 
cq_frag_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_cq_buf * buf,int nent)305 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
306 {
307 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
308 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
309 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
310 	int err;
311 
312 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
313 				       ndev->mvdev.mdev->priv.numa_node);
314 	if (err)
315 		return err;
316 
317 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
318 
319 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
320 	buf->nent = nent;
321 
322 	return 0;
323 }
324 
umem_frag_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_umem * umem,int size)325 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
326 {
327 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
328 
329 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
330 					ndev->mvdev.mdev->priv.numa_node);
331 }
332 
cq_frag_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_cq_buf * buf)333 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
334 {
335 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
336 }
337 
get_cqe(struct mlx5_vdpa_cq * vcq,int n)338 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
339 {
340 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
341 }
342 
cq_frag_buf_init(struct mlx5_vdpa_cq * vcq,struct mlx5_vdpa_cq_buf * buf)343 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
344 {
345 	struct mlx5_cqe64 *cqe64;
346 	void *cqe;
347 	int i;
348 
349 	for (i = 0; i < buf->nent; i++) {
350 		cqe = get_cqe(vcq, i);
351 		cqe64 = cqe;
352 		cqe64->op_own = MLX5_CQE_INVALID << 4;
353 	}
354 }
355 
get_sw_cqe(struct mlx5_vdpa_cq * cq,int n)356 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
357 {
358 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
359 
360 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
361 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
362 		return cqe64;
363 
364 	return NULL;
365 }
366 
rx_post(struct mlx5_vdpa_qp * vqp,int n)367 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
368 {
369 	vqp->head += n;
370 	vqp->db.db[0] = cpu_to_be32(vqp->head);
371 }
372 
qp_prepare(struct mlx5_vdpa_net * ndev,bool fw,void * in,struct mlx5_vdpa_virtqueue * mvq,u32 num_ent)373 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
374 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
375 {
376 	struct mlx5_vdpa_qp *vqp;
377 	__be64 *pas;
378 	void *qpc;
379 
380 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
381 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
382 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
383 	if (vqp->fw) {
384 		/* Firmware QP is allocated by the driver for the firmware's
385 		 * use so we can skip part of the params as they will be chosen by firmware
386 		 */
387 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
388 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
389 		MLX5_SET(qpc, qpc, no_sq, 1);
390 		return;
391 	}
392 
393 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
394 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
395 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
396 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
397 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
398 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
399 	MLX5_SET(qpc, qpc, no_sq, 1);
400 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
401 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
402 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
403 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
404 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
405 }
406 
rq_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp,u32 num_ent)407 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
408 {
409 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
410 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
411 					ndev->mvdev.mdev->priv.numa_node);
412 }
413 
rq_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp)414 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
415 {
416 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
417 }
418 
qp_create(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,struct mlx5_vdpa_qp * vqp)419 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
420 		     struct mlx5_vdpa_qp *vqp)
421 {
422 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
423 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
424 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
425 	void *qpc;
426 	void *in;
427 	int err;
428 
429 	if (!vqp->fw) {
430 		vqp = &mvq->vqqp;
431 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
432 		if (err)
433 			return err;
434 
435 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
436 		if (err)
437 			goto err_db;
438 		inlen += vqp->frag_buf.npages * sizeof(__be64);
439 	}
440 
441 	in = kzalloc(inlen, GFP_KERNEL);
442 	if (!in) {
443 		err = -ENOMEM;
444 		goto err_kzalloc;
445 	}
446 
447 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
448 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
449 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
450 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
451 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
452 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
453 	if (!vqp->fw)
454 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
455 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
456 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
457 	kfree(in);
458 	if (err)
459 		goto err_kzalloc;
460 
461 	vqp->mqp.uid = ndev->mvdev.res.uid;
462 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
463 
464 	if (!vqp->fw)
465 		rx_post(vqp, mvq->num_ent);
466 
467 	return 0;
468 
469 err_kzalloc:
470 	if (!vqp->fw)
471 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
472 err_db:
473 	if (!vqp->fw)
474 		rq_buf_free(ndev, vqp);
475 
476 	return err;
477 }
478 
qp_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp)479 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
480 {
481 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
482 
483 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
484 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
485 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
486 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
487 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
488 	if (!vqp->fw) {
489 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
490 		rq_buf_free(ndev, vqp);
491 	}
492 }
493 
next_cqe_sw(struct mlx5_vdpa_cq * cq)494 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
495 {
496 	return get_sw_cqe(cq, cq->mcq.cons_index);
497 }
498 
mlx5_vdpa_poll_one(struct mlx5_vdpa_cq * vcq)499 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
500 {
501 	struct mlx5_cqe64 *cqe64;
502 
503 	cqe64 = next_cqe_sw(vcq);
504 	if (!cqe64)
505 		return -EAGAIN;
506 
507 	vcq->mcq.cons_index++;
508 	return 0;
509 }
510 
mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue * mvq,int num)511 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
512 {
513 	struct mlx5_vdpa_net *ndev = mvq->ndev;
514 	struct vdpa_callback *event_cb;
515 
516 	event_cb = &ndev->event_cbs[mvq->index];
517 	mlx5_cq_set_ci(&mvq->cq.mcq);
518 
519 	/* make sure CQ cosumer update is visible to the hardware before updating
520 	 * RX doorbell record.
521 	 */
522 	dma_wmb();
523 	rx_post(&mvq->vqqp, num);
524 	if (event_cb->callback)
525 		event_cb->callback(event_cb->private);
526 }
527 
mlx5_vdpa_cq_comp(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)528 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
529 {
530 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
531 	struct mlx5_vdpa_net *ndev = mvq->ndev;
532 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
533 	int num = 0;
534 
535 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
536 		num++;
537 		if (num > mvq->num_ent / 2) {
538 			/* If completions keep coming while we poll, we want to
539 			 * let the hardware know that we consumed them by
540 			 * updating the doorbell record.  We also let vdpa core
541 			 * know about this so it passes it on the virtio driver
542 			 * on the guest.
543 			 */
544 			mlx5_vdpa_handle_completions(mvq, num);
545 			num = 0;
546 		}
547 	}
548 
549 	if (num)
550 		mlx5_vdpa_handle_completions(mvq, num);
551 
552 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
553 }
554 
cq_create(struct mlx5_vdpa_net * ndev,u16 idx,u32 num_ent)555 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
556 {
557 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
558 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
559 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
560 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
561 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
562 	__be64 *pas;
563 	int inlen;
564 	void *cqc;
565 	void *in;
566 	int err;
567 	int eqn;
568 
569 	err = mlx5_db_alloc(mdev, &vcq->db);
570 	if (err)
571 		return err;
572 
573 	vcq->mcq.set_ci_db = vcq->db.db;
574 	vcq->mcq.arm_db = vcq->db.db + 1;
575 	vcq->mcq.cqe_sz = 64;
576 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
577 	vcq->cqe = num_ent;
578 
579 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
580 	if (err)
581 		goto err_db;
582 
583 	cq_frag_buf_init(vcq, &vcq->buf);
584 
585 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
586 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
587 	in = kzalloc(inlen, GFP_KERNEL);
588 	if (!in) {
589 		err = -ENOMEM;
590 		goto err_vzalloc;
591 	}
592 
593 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
594 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
595 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
596 
597 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
598 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
599 
600 	/* Use vector 0 by default. Consider adding code to choose least used
601 	 * vector.
602 	 */
603 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
604 	if (err)
605 		goto err_vec;
606 
607 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
608 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
609 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
610 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
611 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
612 
613 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
614 	if (err)
615 		goto err_vec;
616 
617 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
618 	kfree(in);
619 	return 0;
620 
621 err_vec:
622 	kfree(in);
623 err_vzalloc:
624 	cq_frag_buf_free(ndev, &vcq->buf);
625 err_db:
626 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
627 	return err;
628 }
629 
cq_destroy(struct mlx5_vdpa_net * ndev,u16 idx)630 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
631 {
632 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
633 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
634 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
635 
636 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
637 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
638 		return;
639 	}
640 	cq_frag_buf_free(ndev, &vcq->buf);
641 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
642 }
643 
read_umem_params(struct mlx5_vdpa_net * ndev)644 static int read_umem_params(struct mlx5_vdpa_net *ndev)
645 {
646 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
647 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
648 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
649 	int out_size;
650 	void *caps;
651 	void *out;
652 	int err;
653 
654 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
655 	out = kzalloc(out_size, GFP_KERNEL);
656 	if (!out)
657 		return -ENOMEM;
658 
659 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
660 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
661 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
662 	if (err) {
663 		mlx5_vdpa_warn(&ndev->mvdev,
664 			"Failed reading vdpa umem capabilities with err %d\n", err);
665 		goto out;
666 	}
667 
668 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
669 
670 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
671 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
672 
673 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
674 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
675 
676 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
677 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
678 
679 out:
680 	kfree(out);
681 	return 0;
682 }
683 
set_umem_size(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num,struct mlx5_vdpa_umem ** umemp)684 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
685 			  struct mlx5_vdpa_umem **umemp)
686 {
687 	u32 p_a;
688 	u32 p_b;
689 
690 	switch (num) {
691 	case 1:
692 		p_a = ndev->umem_1_buffer_param_a;
693 		p_b = ndev->umem_1_buffer_param_b;
694 		*umemp = &mvq->umem1;
695 		break;
696 	case 2:
697 		p_a = ndev->umem_2_buffer_param_a;
698 		p_b = ndev->umem_2_buffer_param_b;
699 		*umemp = &mvq->umem2;
700 		break;
701 	case 3:
702 		p_a = ndev->umem_3_buffer_param_a;
703 		p_b = ndev->umem_3_buffer_param_b;
704 		*umemp = &mvq->umem3;
705 		break;
706 	}
707 
708 	(*umemp)->size = p_a * mvq->num_ent + p_b;
709 }
710 
umem_frag_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_umem * umem)711 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
712 {
713 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
714 }
715 
create_umem(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num)716 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
717 {
718 	int inlen;
719 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
720 	void *um;
721 	void *in;
722 	int err;
723 	__be64 *pas;
724 	struct mlx5_vdpa_umem *umem;
725 
726 	set_umem_size(ndev, mvq, num, &umem);
727 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
728 	if (err)
729 		return err;
730 
731 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
732 
733 	in = kzalloc(inlen, GFP_KERNEL);
734 	if (!in) {
735 		err = -ENOMEM;
736 		goto err_in;
737 	}
738 
739 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
740 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
741 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
742 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
743 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
744 
745 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
746 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
747 
748 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
749 	if (err) {
750 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
751 		goto err_cmd;
752 	}
753 
754 	kfree(in);
755 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
756 
757 	return 0;
758 
759 err_cmd:
760 	kfree(in);
761 err_in:
762 	umem_frag_buf_free(ndev, umem);
763 	return err;
764 }
765 
umem_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num)766 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
767 {
768 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
769 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
770 	struct mlx5_vdpa_umem *umem;
771 
772 	switch (num) {
773 	case 1:
774 		umem = &mvq->umem1;
775 		break;
776 	case 2:
777 		umem = &mvq->umem2;
778 		break;
779 	case 3:
780 		umem = &mvq->umem3;
781 		break;
782 	}
783 
784 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
785 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
786 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
787 		return;
788 
789 	umem_frag_buf_free(ndev, umem);
790 }
791 
umems_create(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)792 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
793 {
794 	int num;
795 	int err;
796 
797 	for (num = 1; num <= 3; num++) {
798 		err = create_umem(ndev, mvq, num);
799 		if (err)
800 			goto err_umem;
801 	}
802 	return 0;
803 
804 err_umem:
805 	for (num--; num > 0; num--)
806 		umem_destroy(ndev, mvq, num);
807 
808 	return err;
809 }
810 
umems_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)811 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
812 {
813 	int num;
814 
815 	for (num = 3; num > 0; num--)
816 		umem_destroy(ndev, mvq, num);
817 }
818 
get_queue_type(struct mlx5_vdpa_net * ndev)819 static int get_queue_type(struct mlx5_vdpa_net *ndev)
820 {
821 	u32 type_mask;
822 
823 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
824 
825 	/* prefer split queue */
826 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
827 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
828 
829 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
830 
831 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
832 }
833 
vq_is_tx(u16 idx)834 static bool vq_is_tx(u16 idx)
835 {
836 	return idx % 2;
837 }
838 
839 enum {
840 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
841 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
842 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
843 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
844 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
845 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
846 	MLX5_VIRTIO_NET_F_CSUM = 10,
847 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
848 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
849 };
850 
get_features(u64 features)851 static u16 get_features(u64 features)
852 {
853 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
854 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
855 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
856 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
857 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
858 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
859 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
860 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
861 }
862 
counters_supported(const struct mlx5_vdpa_dev * mvdev)863 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
864 {
865 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
866 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
867 }
868 
msix_mode_supported(struct mlx5_vdpa_dev * mvdev)869 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
870 {
871 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
872 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
873 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
874 }
875 
create_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool filled)876 static int create_virtqueue(struct mlx5_vdpa_net *ndev,
877 			    struct mlx5_vdpa_virtqueue *mvq,
878 			    bool filled)
879 {
880 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
881 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
882 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
883 	struct mlx5_vdpa_mr *vq_mr;
884 	struct mlx5_vdpa_mr *vq_desc_mr;
885 	u64 features = filled ? mvdev->actual_features : mvdev->mlx_features;
886 	void *obj_context;
887 	u16 mlx_features;
888 	void *cmd_hdr;
889 	void *vq_ctx;
890 	void *in;
891 	int err;
892 
893 	err = umems_create(ndev, mvq);
894 	if (err)
895 		return err;
896 
897 	in = kzalloc(inlen, GFP_KERNEL);
898 	if (!in) {
899 		err = -ENOMEM;
900 		goto err_alloc;
901 	}
902 
903 	mlx_features = get_features(features);
904 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
905 
906 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
907 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
908 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
909 
910 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
911 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
912 		 mlx_features >> 3);
913 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
914 		 mlx_features & 7);
915 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
916 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
917 
918 	if (vq_is_tx(mvq->index))
919 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
920 
921 	if (mvq->map.virq) {
922 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
923 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
924 	} else {
925 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
926 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
927 	}
928 
929 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
930 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
931 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
932 		 !!(features & BIT_ULL(VIRTIO_F_VERSION_1)));
933 
934 	if (filled) {
935 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
936 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
937 
938 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
939 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
940 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
941 
942 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
943 		if (vq_mr)
944 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
945 
946 		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
947 		if (vq_desc_mr &&
948 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
949 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
950 	} else {
951 		/* If there is no mr update, make sure that the existing ones are set
952 		 * modify to ready.
953 		 */
954 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
955 		if (vq_mr)
956 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
957 
958 		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
959 		if (vq_desc_mr)
960 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
961 	}
962 
963 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
964 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
965 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
966 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
967 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
968 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
969 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
970 	if (counters_supported(&ndev->mvdev))
971 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
972 
973 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
974 	if (err)
975 		goto err_cmd;
976 
977 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
978 	kfree(in);
979 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
980 
981 	if (filled) {
982 		mlx5_vdpa_get_mr(mvdev, vq_mr);
983 		mvq->vq_mr = vq_mr;
984 
985 		if (vq_desc_mr &&
986 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
987 			mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
988 			mvq->desc_mr = vq_desc_mr;
989 		}
990 	}
991 
992 	return 0;
993 
994 err_cmd:
995 	kfree(in);
996 err_alloc:
997 	umems_destroy(ndev, mvq);
998 	return err;
999 }
1000 
destroy_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1001 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1002 {
1003 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
1004 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
1005 
1006 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
1007 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1008 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
1009 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
1010 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
1011 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1012 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
1013 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
1014 		return;
1015 	}
1016 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
1017 	umems_destroy(ndev, mvq);
1018 
1019 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
1020 	mvq->vq_mr = NULL;
1021 
1022 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
1023 	mvq->desc_mr = NULL;
1024 }
1025 
get_rqpn(struct mlx5_vdpa_virtqueue * mvq,bool fw)1026 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1027 {
1028 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
1029 }
1030 
get_qpn(struct mlx5_vdpa_virtqueue * mvq,bool fw)1031 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1032 {
1033 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1034 }
1035 
alloc_inout(struct mlx5_vdpa_net * ndev,int cmd,void ** in,int * inlen,void ** out,int * outlen,u32 qpn,u32 rqpn)1036 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1037 			int *outlen, u32 qpn, u32 rqpn)
1038 {
1039 	void *qpc;
1040 	void *pp;
1041 
1042 	switch (cmd) {
1043 	case MLX5_CMD_OP_2RST_QP:
1044 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1045 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1046 		*in = kzalloc(*inlen, GFP_KERNEL);
1047 		*out = kzalloc(*outlen, GFP_KERNEL);
1048 		if (!*in || !*out)
1049 			goto outerr;
1050 
1051 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1052 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1053 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1054 		break;
1055 	case MLX5_CMD_OP_RST2INIT_QP:
1056 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1057 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1058 		*in = kzalloc(*inlen, GFP_KERNEL);
1059 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1060 		if (!*in || !*out)
1061 			goto outerr;
1062 
1063 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1064 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1065 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1066 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1067 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1068 		MLX5_SET(qpc, qpc, rwe, 1);
1069 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1070 		MLX5_SET(ads, pp, vhca_port_num, 1);
1071 		break;
1072 	case MLX5_CMD_OP_INIT2RTR_QP:
1073 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1074 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1075 		*in = kzalloc(*inlen, GFP_KERNEL);
1076 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1077 		if (!*in || !*out)
1078 			goto outerr;
1079 
1080 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1081 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1082 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1083 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1084 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1085 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1086 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1087 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1088 		MLX5_SET(ads, pp, fl, 1);
1089 		break;
1090 	case MLX5_CMD_OP_RTR2RTS_QP:
1091 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1092 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1093 		*in = kzalloc(*inlen, GFP_KERNEL);
1094 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1095 		if (!*in || !*out)
1096 			goto outerr;
1097 
1098 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1099 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1100 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1101 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1102 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1103 		MLX5_SET(ads, pp, ack_timeout, 14);
1104 		MLX5_SET(qpc, qpc, retry_count, 7);
1105 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1106 		break;
1107 	default:
1108 		goto outerr_nullify;
1109 	}
1110 
1111 	return;
1112 
1113 outerr:
1114 	kfree(*in);
1115 	kfree(*out);
1116 outerr_nullify:
1117 	*in = NULL;
1118 	*out = NULL;
1119 }
1120 
free_inout(void * in,void * out)1121 static void free_inout(void *in, void *out)
1122 {
1123 	kfree(in);
1124 	kfree(out);
1125 }
1126 
1127 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1128  * firmware. The fw argument indicates whether the subjected QP is the one used
1129  * by firmware.
1130  */
modify_qp(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool fw,int cmd)1131 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1132 {
1133 	int outlen;
1134 	int inlen;
1135 	void *out;
1136 	void *in;
1137 	int err;
1138 
1139 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1140 	if (!in || !out)
1141 		return -ENOMEM;
1142 
1143 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1144 	free_inout(in, out);
1145 	return err;
1146 }
1147 
connect_qps(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1148 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1149 {
1150 	int err;
1151 
1152 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1153 	if (err)
1154 		return err;
1155 
1156 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1157 	if (err)
1158 		return err;
1159 
1160 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1161 	if (err)
1162 		return err;
1163 
1164 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1165 	if (err)
1166 		return err;
1167 
1168 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1169 	if (err)
1170 		return err;
1171 
1172 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1173 	if (err)
1174 		return err;
1175 
1176 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1177 }
1178 
1179 struct mlx5_virtq_attr {
1180 	u8 state;
1181 	u16 available_index;
1182 	u16 used_index;
1183 };
1184 
1185 struct mlx5_virtqueue_query_mem {
1186 	u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)];
1187 	u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)];
1188 };
1189 
1190 struct mlx5_virtqueue_modify_mem {
1191 	u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)];
1192 	u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)];
1193 };
1194 
fill_query_virtqueue_cmd(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,struct mlx5_virtqueue_query_mem * cmd)1195 static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
1196 				     struct mlx5_vdpa_virtqueue *mvq,
1197 				     struct mlx5_virtqueue_query_mem *cmd)
1198 {
1199 	void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
1200 
1201 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1202 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1203 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1204 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1205 }
1206 
query_virtqueue_end(struct mlx5_vdpa_net * ndev,struct mlx5_virtqueue_query_mem * cmd,struct mlx5_virtq_attr * attr)1207 static void query_virtqueue_end(struct mlx5_vdpa_net *ndev,
1208 				struct mlx5_virtqueue_query_mem *cmd,
1209 				struct mlx5_virtq_attr *attr)
1210 {
1211 	void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context);
1212 
1213 	memset(attr, 0, sizeof(*attr));
1214 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1215 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1216 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1217 }
1218 
query_virtqueues(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs,struct mlx5_virtq_attr * attrs)1219 static int query_virtqueues(struct mlx5_vdpa_net *ndev,
1220 			    int start_vq,
1221 			    int num_vqs,
1222 			    struct mlx5_virtq_attr *attrs)
1223 {
1224 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1225 	struct mlx5_virtqueue_query_mem *cmd_mem;
1226 	struct mlx5_vdpa_async_cmd *cmds;
1227 	int err = 0;
1228 
1229 	WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n",
1230 	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
1231 
1232 	cmds = kvzalloc_objs(*cmds, num_vqs);
1233 	cmd_mem = kvzalloc_objs(*cmd_mem, num_vqs);
1234 	if (!cmds || !cmd_mem) {
1235 		err = -ENOMEM;
1236 		goto done;
1237 	}
1238 
1239 	for (int i = 0; i < num_vqs; i++) {
1240 		cmds[i].in = &cmd_mem[i].in;
1241 		cmds[i].inlen = sizeof(cmd_mem[i].in);
1242 		cmds[i].out = &cmd_mem[i].out;
1243 		cmds[i].outlen = sizeof(cmd_mem[i].out);
1244 		fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]);
1245 	}
1246 
1247 	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
1248 	if (err) {
1249 		mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n",
1250 			      start_vq, start_vq + num_vqs, err);
1251 		goto done;
1252 	}
1253 
1254 	for (int i = 0; i < num_vqs; i++) {
1255 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1256 		int vq_idx = start_vq + i;
1257 
1258 		if (cmd->err) {
1259 			mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, cmd->err);
1260 			if (!err)
1261 				err = cmd->err;
1262 			continue;
1263 		}
1264 
1265 		query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]);
1266 	}
1267 
1268 done:
1269 	kvfree(cmd_mem);
1270 	kvfree(cmds);
1271 	return err;
1272 }
1273 
is_resumable(struct mlx5_vdpa_net * ndev)1274 static bool is_resumable(struct mlx5_vdpa_net *ndev)
1275 {
1276 	return ndev->mvdev.vdev.config->resume;
1277 }
1278 
is_valid_state_change(int oldstate,int newstate,bool resumable)1279 static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1280 {
1281 	switch (oldstate) {
1282 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1283 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1284 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1285 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1286 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1287 		return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1288 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1289 	default:
1290 		return false;
1291 	}
1292 }
1293 
modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue * mvq)1294 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1295 {
1296 	/* Only state is always modifiable */
1297 	if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1298 		return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1299 		       mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1300 
1301 	return true;
1302 }
1303 
fill_modify_virtqueue_cmd(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int state,struct mlx5_virtqueue_modify_mem * cmd)1304 static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
1305 				      struct mlx5_vdpa_virtqueue *mvq,
1306 				      int state,
1307 				      struct mlx5_virtqueue_modify_mem *cmd)
1308 {
1309 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1310 	struct mlx5_vdpa_mr *desc_mr = NULL;
1311 	struct mlx5_vdpa_mr *vq_mr = NULL;
1312 	void *obj_context;
1313 	void *cmd_hdr;
1314 	void *vq_ctx;
1315 
1316 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
1317 
1318 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1319 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1320 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1321 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1322 
1323 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context);
1324 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1325 
1326 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
1327 		MLX5_SET(virtio_net_q_object, obj_context, state, state);
1328 
1329 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1330 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1331 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1332 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1333 	}
1334 
1335 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1336 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1337 
1338 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1339 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1340 
1341 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION)
1342 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
1343 			!!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
1344 
1345 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) {
1346 		u16 mlx_features = get_features(ndev->mvdev.actual_features);
1347 
1348 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
1349 			 mlx_features >> 3);
1350 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
1351 			 mlx_features & 7);
1352 	}
1353 
1354 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1355 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1356 
1357 		if (vq_mr)
1358 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1359 		else
1360 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1361 	}
1362 
1363 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1364 		desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1365 
1366 		if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1367 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1368 		else
1369 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1370 	}
1371 
1372 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1373 }
1374 
modify_virtqueue_end(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int state)1375 static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev,
1376 				 struct mlx5_vdpa_virtqueue *mvq,
1377 				 int state)
1378 {
1379 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1380 
1381 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1382 		unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP];
1383 		struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid];
1384 
1385 		mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1386 		mlx5_vdpa_get_mr(mvdev, vq_mr);
1387 		mvq->vq_mr = vq_mr;
1388 	}
1389 
1390 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1391 		unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP];
1392 		struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid];
1393 
1394 		mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1395 		mlx5_vdpa_get_mr(mvdev, desc_mr);
1396 		mvq->desc_mr = desc_mr;
1397 	}
1398 
1399 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
1400 		mvq->fw_state = state;
1401 
1402 	mvq->modified_fields = 0;
1403 }
1404 
counter_set_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1405 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1406 {
1407 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1408 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1409 	void *cmd_hdr;
1410 	int err;
1411 
1412 	if (!counters_supported(&ndev->mvdev))
1413 		return 0;
1414 
1415 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1416 
1417 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1418 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1419 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1420 
1421 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1422 	if (err)
1423 		return err;
1424 
1425 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1426 
1427 	return 0;
1428 }
1429 
counter_set_dealloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1430 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1431 {
1432 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1433 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1434 
1435 	if (!counters_supported(&ndev->mvdev))
1436 		return;
1437 
1438 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1439 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1440 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1441 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1442 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1443 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1444 }
1445 
mlx5_vdpa_int_handler(int irq,void * priv)1446 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1447 {
1448 	struct vdpa_callback *cb = priv;
1449 
1450 	if (cb->callback)
1451 		return cb->callback(cb->private);
1452 
1453 	return IRQ_HANDLED;
1454 }
1455 
alloc_vector(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1456 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1457 			 struct mlx5_vdpa_virtqueue *mvq)
1458 {
1459 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1460 	struct mlx5_vdpa_irq_pool_entry *ent;
1461 	int err;
1462 	int i;
1463 
1464 	for (i = 0; i < irqp->num_ent; i++) {
1465 		ent = &irqp->entries[i];
1466 		if (!ent->used) {
1467 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1468 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1469 			ent->dev_id = &ndev->event_cbs[mvq->index];
1470 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1471 					  ent->name, ent->dev_id);
1472 			if (err)
1473 				return;
1474 
1475 			ent->used = true;
1476 			mvq->map = ent->map;
1477 			return;
1478 		}
1479 	}
1480 }
1481 
dealloc_vector(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1482 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1483 			   struct mlx5_vdpa_virtqueue *mvq)
1484 {
1485 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1486 	int i;
1487 
1488 	for (i = 0; i < irqp->num_ent; i++)
1489 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1490 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1491 			irqp->entries[i].used = false;
1492 			return;
1493 		}
1494 }
1495 
setup_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool filled)1496 static int setup_vq(struct mlx5_vdpa_net *ndev,
1497 		    struct mlx5_vdpa_virtqueue *mvq,
1498 		    bool filled)
1499 {
1500 	u16 idx = mvq->index;
1501 	int err;
1502 
1503 	if (mvq->initialized)
1504 		return 0;
1505 
1506 	err = cq_create(ndev, idx, mvq->num_ent);
1507 	if (err)
1508 		return err;
1509 
1510 	err = qp_create(ndev, mvq, &mvq->fwqp);
1511 	if (err)
1512 		goto err_fwqp;
1513 
1514 	err = qp_create(ndev, mvq, &mvq->vqqp);
1515 	if (err)
1516 		goto err_vqqp;
1517 
1518 	err = connect_qps(ndev, mvq);
1519 	if (err)
1520 		goto err_connect;
1521 
1522 	err = counter_set_alloc(ndev, mvq);
1523 	if (err)
1524 		goto err_connect;
1525 
1526 	alloc_vector(ndev, mvq);
1527 	err = create_virtqueue(ndev, mvq, filled);
1528 	if (err)
1529 		goto err_vq;
1530 
1531 	mvq->initialized = true;
1532 
1533 	if (mvq->ready) {
1534 		err = resume_vq(ndev, mvq);
1535 		if (err)
1536 			goto err_modify;
1537 	}
1538 
1539 	return 0;
1540 
1541 err_modify:
1542 	destroy_virtqueue(ndev, mvq);
1543 err_vq:
1544 	dealloc_vector(ndev, mvq);
1545 	counter_set_dealloc(ndev, mvq);
1546 err_connect:
1547 	qp_destroy(ndev, &mvq->vqqp);
1548 err_vqqp:
1549 	qp_destroy(ndev, &mvq->fwqp);
1550 err_fwqp:
1551 	cq_destroy(ndev, idx);
1552 	return err;
1553 }
1554 
modify_virtqueues(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs,int state)1555 static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state)
1556 {
1557 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1558 	struct mlx5_virtqueue_modify_mem *cmd_mem;
1559 	struct mlx5_vdpa_async_cmd *cmds;
1560 	int err = 0;
1561 
1562 	WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n",
1563 	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
1564 
1565 	cmds = kvzalloc_objs(*cmds, num_vqs);
1566 	cmd_mem = kvzalloc_objs(*cmd_mem, num_vqs);
1567 	if (!cmds || !cmd_mem) {
1568 		err = -ENOMEM;
1569 		goto done;
1570 	}
1571 
1572 	for (int i = 0; i < num_vqs; i++) {
1573 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1574 		struct mlx5_vdpa_virtqueue *mvq;
1575 		int vq_idx = start_vq + i;
1576 
1577 		mvq = &ndev->vqs[vq_idx];
1578 
1579 		if (!modifiable_virtqueue_fields(mvq)) {
1580 			err = -EINVAL;
1581 			goto done;
1582 		}
1583 
1584 		if (mvq->fw_state != state) {
1585 			if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1586 				err = -EINVAL;
1587 				goto done;
1588 			}
1589 
1590 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1591 		}
1592 
1593 		cmd->in = &cmd_mem[i].in;
1594 		cmd->inlen = sizeof(cmd_mem[i].in);
1595 		cmd->out = &cmd_mem[i].out;
1596 		cmd->outlen = sizeof(cmd_mem[i].out);
1597 		fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]);
1598 	}
1599 
1600 	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
1601 	if (err) {
1602 		mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n",
1603 			      start_vq, start_vq + num_vqs);
1604 		goto done;
1605 	}
1606 
1607 	for (int i = 0; i < num_vqs; i++) {
1608 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1609 		struct mlx5_vdpa_virtqueue *mvq;
1610 		int vq_idx = start_vq + i;
1611 
1612 		mvq = &ndev->vqs[vq_idx];
1613 
1614 		if (cmd->err) {
1615 			mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n",
1616 				      vq_idx, mvq->fw_state, state, err);
1617 			if (!err)
1618 				err = cmd->err;
1619 			continue;
1620 		}
1621 
1622 		modify_virtqueue_end(ndev, mvq, state);
1623 	}
1624 
1625 done:
1626 	kvfree(cmd_mem);
1627 	kvfree(cmds);
1628 	return err;
1629 }
1630 
suspend_vqs(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs)1631 static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
1632 {
1633 	struct mlx5_vdpa_virtqueue *mvq;
1634 	struct mlx5_virtq_attr *attrs;
1635 	int vq_idx, i;
1636 	int err;
1637 
1638 	if (start_vq >= ndev->cur_num_vqs)
1639 		return -EINVAL;
1640 
1641 	mvq = &ndev->vqs[start_vq];
1642 	if (!mvq->initialized)
1643 		return 0;
1644 
1645 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1646 		return 0;
1647 
1648 	err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND);
1649 	if (err)
1650 		return err;
1651 
1652 	attrs = kzalloc_objs(struct mlx5_virtq_attr, num_vqs);
1653 	if (!attrs)
1654 		return -ENOMEM;
1655 
1656 	err = query_virtqueues(ndev, start_vq, num_vqs, attrs);
1657 	if (err)
1658 		goto done;
1659 
1660 	for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) {
1661 		mvq = &ndev->vqs[vq_idx];
1662 		mvq->avail_idx = attrs[i].available_index;
1663 		mvq->used_idx = attrs[i].used_index;
1664 	}
1665 
1666 done:
1667 	kfree(attrs);
1668 	return err;
1669 }
1670 
suspend_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1671 static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1672 {
1673 	return suspend_vqs(ndev, mvq->index, 1);
1674 }
1675 
resume_vqs(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs)1676 static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
1677 {
1678 	struct mlx5_vdpa_virtqueue *mvq;
1679 	int err;
1680 
1681 	if (start_vq >= ndev->mvdev.max_vqs)
1682 		return -EINVAL;
1683 
1684 	mvq = &ndev->vqs[start_vq];
1685 	if (!mvq->initialized)
1686 		return 0;
1687 
1688 	if (mvq->index >= ndev->cur_num_vqs)
1689 		return 0;
1690 
1691 	switch (mvq->fw_state) {
1692 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1693 		/* Due to a FW quirk we need to modify the VQ fields first then change state.
1694 		 * This should be fixed soon. After that, a single command can be used.
1695 		 */
1696 		err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state);
1697 		if (err)
1698 			return err;
1699 		break;
1700 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1701 		if (!is_resumable(ndev)) {
1702 			mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index);
1703 			return -EINVAL;
1704 		}
1705 		break;
1706 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1707 		return 0;
1708 	default:
1709 		mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n",
1710 			       mvq->index, mvq->fw_state);
1711 		return -EINVAL;
1712 	}
1713 
1714 	return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1715 }
1716 
resume_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1717 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1718 {
1719 	return resume_vqs(ndev, mvq->index, 1);
1720 }
1721 
teardown_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1722 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1723 {
1724 	if (!mvq->initialized)
1725 		return;
1726 
1727 	suspend_vq(ndev, mvq);
1728 	mvq->modified_fields = 0;
1729 	destroy_virtqueue(ndev, mvq);
1730 	dealloc_vector(ndev, mvq);
1731 	counter_set_dealloc(ndev, mvq);
1732 	qp_destroy(ndev, &mvq->vqqp);
1733 	qp_destroy(ndev, &mvq->fwqp);
1734 	cq_destroy(ndev, mvq->index);
1735 	mvq->initialized = false;
1736 }
1737 
create_rqt(struct mlx5_vdpa_net * ndev)1738 static int create_rqt(struct mlx5_vdpa_net *ndev)
1739 {
1740 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1741 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1742 	__be32 *list;
1743 	void *rqtc;
1744 	int inlen;
1745 	void *in;
1746 	int i, j;
1747 	int err;
1748 
1749 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1750 	in = kzalloc(inlen, GFP_KERNEL);
1751 	if (!in)
1752 		return -ENOMEM;
1753 
1754 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1755 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1756 
1757 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1758 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1759 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1760 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1761 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1762 
1763 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1764 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1765 	kfree(in);
1766 	if (err)
1767 		return err;
1768 
1769 	return 0;
1770 }
1771 
1772 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1773 
modify_rqt(struct mlx5_vdpa_net * ndev,int num)1774 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1775 {
1776 	int act_sz = roundup_pow_of_two(num / 2);
1777 	__be32 *list;
1778 	void *rqtc;
1779 	int inlen;
1780 	void *in;
1781 	int i, j;
1782 	int err;
1783 
1784 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1785 	in = kzalloc(inlen, GFP_KERNEL);
1786 	if (!in)
1787 		return -ENOMEM;
1788 
1789 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1790 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1791 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1792 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1793 
1794 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1795 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1796 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1797 
1798 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1799 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1800 	kfree(in);
1801 	if (err)
1802 		return err;
1803 
1804 	return 0;
1805 }
1806 
destroy_rqt(struct mlx5_vdpa_net * ndev)1807 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1808 {
1809 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1810 }
1811 
create_tir(struct mlx5_vdpa_net * ndev)1812 static int create_tir(struct mlx5_vdpa_net *ndev)
1813 {
1814 #define HASH_IP_L4PORTS                                                                            \
1815 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1816 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1817 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1818 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1819 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1820 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1821 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1822 	void *rss_key;
1823 	void *outer;
1824 	void *tirc;
1825 	void *in;
1826 	int err;
1827 
1828 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1829 	if (!in)
1830 		return -ENOMEM;
1831 
1832 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1833 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1834 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1835 
1836 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1837 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1838 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1839 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1840 
1841 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1842 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1843 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1844 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1845 
1846 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1847 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1848 
1849 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1850 	kfree(in);
1851 	if (err)
1852 		return err;
1853 
1854 	mlx5_vdpa_add_tirn(ndev);
1855 	return err;
1856 }
1857 
destroy_tir(struct mlx5_vdpa_net * ndev)1858 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1859 {
1860 	mlx5_vdpa_remove_tirn(ndev);
1861 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1862 }
1863 
1864 #define MAX_STEERING_ENT 0x8000
1865 #define MAX_STEERING_GROUPS 2
1866 
1867 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1868        #define NUM_DESTS 2
1869 #else
1870        #define NUM_DESTS 1
1871 #endif
1872 
add_steering_counters(struct mlx5_vdpa_net * ndev,struct macvlan_node * node,struct mlx5_flow_act * flow_act,struct mlx5_flow_destination * dests)1873 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1874 				 struct macvlan_node *node,
1875 				 struct mlx5_flow_act *flow_act,
1876 				 struct mlx5_flow_destination *dests)
1877 {
1878 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1879 	int err;
1880 
1881 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1882 	if (IS_ERR(node->ucast_counter.counter))
1883 		return PTR_ERR(node->ucast_counter.counter);
1884 
1885 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1886 	if (IS_ERR(node->mcast_counter.counter)) {
1887 		err = PTR_ERR(node->mcast_counter.counter);
1888 		goto err_mcast_counter;
1889 	}
1890 
1891 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1892 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1893 	return 0;
1894 
1895 err_mcast_counter:
1896 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1897 	return err;
1898 #else
1899 	return 0;
1900 #endif
1901 }
1902 
remove_steering_counters(struct mlx5_vdpa_net * ndev,struct macvlan_node * node)1903 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1904 				     struct macvlan_node *node)
1905 {
1906 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1907 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1908 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1909 #endif
1910 }
1911 
mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net * ndev,u8 * mac,struct macvlan_node * node)1912 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1913 					struct macvlan_node *node)
1914 {
1915 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1916 	struct mlx5_flow_act flow_act = {};
1917 	struct mlx5_flow_spec *spec;
1918 	void *headers_c;
1919 	void *headers_v;
1920 	u8 *dmac_c;
1921 	u8 *dmac_v;
1922 	int err;
1923 	u16 vid;
1924 
1925 	spec = kvzalloc_obj(*spec);
1926 	if (!spec)
1927 		return -ENOMEM;
1928 
1929 	vid = key2vid(node->macvlan);
1930 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1931 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1932 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1933 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1934 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1935 	eth_broadcast_addr(dmac_c);
1936 	ether_addr_copy(dmac_v, mac);
1937 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1938 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1939 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1940 	}
1941 	if (node->tagged) {
1942 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1943 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1944 	}
1945 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1946 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1947 	dests[0].tir_num = ndev->res.tirn;
1948 	err = add_steering_counters(ndev, node, &flow_act, dests);
1949 	if (err)
1950 		goto out_free;
1951 
1952 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1953 	dests[1].counter = node->ucast_counter.counter;
1954 #endif
1955 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1956 	if (IS_ERR(node->ucast_rule)) {
1957 		err = PTR_ERR(node->ucast_rule);
1958 		goto err_ucast;
1959 	}
1960 
1961 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1962 	dests[1].counter = node->mcast_counter.counter;
1963 #endif
1964 
1965 	memset(dmac_c, 0, ETH_ALEN);
1966 	memset(dmac_v, 0, ETH_ALEN);
1967 	dmac_c[0] = 1;
1968 	dmac_v[0] = 1;
1969 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1970 	if (IS_ERR(node->mcast_rule)) {
1971 		err = PTR_ERR(node->mcast_rule);
1972 		goto err_mcast;
1973 	}
1974 	kvfree(spec);
1975 	mlx5_vdpa_add_rx_counters(ndev, node);
1976 	return 0;
1977 
1978 err_mcast:
1979 	mlx5_del_flow_rules(node->ucast_rule);
1980 err_ucast:
1981 	remove_steering_counters(ndev, node);
1982 out_free:
1983 	kvfree(spec);
1984 	return err;
1985 }
1986 
mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net * ndev,struct macvlan_node * node)1987 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1988 					 struct macvlan_node *node)
1989 {
1990 	mlx5_vdpa_remove_rx_counters(ndev, node);
1991 	mlx5_del_flow_rules(node->ucast_rule);
1992 	mlx5_del_flow_rules(node->mcast_rule);
1993 }
1994 
search_val(u8 * mac,u16 vlan,bool tagged)1995 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1996 {
1997 	u64 val;
1998 
1999 	if (!tagged)
2000 		vlan = MLX5V_UNTAGGED;
2001 
2002 	val = (u64)vlan << 48 |
2003 	      (u64)mac[0] << 40 |
2004 	      (u64)mac[1] << 32 |
2005 	      (u64)mac[2] << 24 |
2006 	      (u64)mac[3] << 16 |
2007 	      (u64)mac[4] << 8 |
2008 	      (u64)mac[5];
2009 
2010 	return val;
2011 }
2012 
mac_vlan_lookup(struct mlx5_vdpa_net * ndev,u64 value)2013 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
2014 {
2015 	struct macvlan_node *pos;
2016 	u32 idx;
2017 
2018 	idx = hash_64(value, 8); // tbd 8
2019 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
2020 		if (pos->macvlan == value)
2021 			return pos;
2022 	}
2023 	return NULL;
2024 }
2025 
mac_vlan_add(struct mlx5_vdpa_net * ndev,u8 * mac,u16 vid,bool tagged)2026 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
2027 {
2028 	struct macvlan_node *ptr;
2029 	u64 val;
2030 	u32 idx;
2031 	int err;
2032 
2033 	val = search_val(mac, vid, tagged);
2034 	if (mac_vlan_lookup(ndev, val))
2035 		return -EEXIST;
2036 
2037 	ptr = kzalloc_obj(*ptr);
2038 	if (!ptr)
2039 		return -ENOMEM;
2040 
2041 	ptr->tagged = tagged;
2042 	ptr->macvlan = val;
2043 	ptr->ndev = ndev;
2044 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
2045 	if (err)
2046 		goto err_add;
2047 
2048 	idx = hash_64(val, 8);
2049 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
2050 	return 0;
2051 
2052 err_add:
2053 	kfree(ptr);
2054 	return err;
2055 }
2056 
mac_vlan_del(struct mlx5_vdpa_net * ndev,u8 * mac,u16 vlan,bool tagged)2057 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
2058 {
2059 	struct macvlan_node *ptr;
2060 
2061 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
2062 	if (!ptr)
2063 		return;
2064 
2065 	hlist_del(&ptr->hlist);
2066 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
2067 	remove_steering_counters(ndev, ptr);
2068 	kfree(ptr);
2069 }
2070 
clear_mac_vlan_table(struct mlx5_vdpa_net * ndev)2071 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
2072 {
2073 	struct macvlan_node *pos;
2074 	struct hlist_node *n;
2075 	int i;
2076 
2077 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
2078 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
2079 			hlist_del(&pos->hlist);
2080 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
2081 			remove_steering_counters(ndev, pos);
2082 			kfree(pos);
2083 		}
2084 	}
2085 }
2086 
setup_steering(struct mlx5_vdpa_net * ndev)2087 static int setup_steering(struct mlx5_vdpa_net *ndev)
2088 {
2089 	struct mlx5_flow_table_attr ft_attr = {};
2090 	struct mlx5_flow_namespace *ns;
2091 	int err;
2092 
2093 	ft_attr.max_fte = MAX_STEERING_ENT;
2094 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
2095 
2096 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
2097 	if (!ns) {
2098 		mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n");
2099 		return -EOPNOTSUPP;
2100 	}
2101 
2102 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
2103 	if (IS_ERR(ndev->rxft)) {
2104 		mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n");
2105 		return PTR_ERR(ndev->rxft);
2106 	}
2107 	mlx5_vdpa_add_rx_flow_table(ndev);
2108 
2109 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
2110 	if (err)
2111 		goto err_add;
2112 
2113 	return 0;
2114 
2115 err_add:
2116 	mlx5_vdpa_remove_rx_flow_table(ndev);
2117 	mlx5_destroy_flow_table(ndev->rxft);
2118 	return err;
2119 }
2120 
teardown_steering(struct mlx5_vdpa_net * ndev)2121 static void teardown_steering(struct mlx5_vdpa_net *ndev)
2122 {
2123 	clear_mac_vlan_table(ndev);
2124 	mlx5_vdpa_remove_rx_flow_table(ndev);
2125 	mlx5_destroy_flow_table(ndev->rxft);
2126 }
2127 
mlx5_vdpa_change_mac(struct mlx5_vdpa_net * ndev,struct mlx5_core_dev * pfmdev,const u8 * new_mac)2128 static int mlx5_vdpa_change_mac(struct mlx5_vdpa_net *ndev,
2129 				struct mlx5_core_dev *pfmdev,
2130 				const u8 *new_mac)
2131 {
2132 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
2133 	u8 old_mac[ETH_ALEN];
2134 
2135 	if (is_zero_ether_addr(new_mac))
2136 		return -EINVAL;
2137 
2138 	if (!is_zero_ether_addr(ndev->config.mac)) {
2139 		if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2140 			mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
2141 				       ndev->config.mac);
2142 			return -EIO;
2143 		}
2144 	}
2145 
2146 	if (mlx5_mpfs_add_mac(pfmdev, (u8 *)new_mac)) {
2147 		mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
2148 			       new_mac);
2149 		return -EIO;
2150 	}
2151 
2152 	/* backup the original mac address so that if failed to add the forward rules
2153 	 * we could restore it
2154 	 */
2155 	ether_addr_copy(old_mac, ndev->config.mac);
2156 
2157 	ether_addr_copy(ndev->config.mac, new_mac);
2158 
2159 	/* Need recreate the flow table entry, so that the packet could forward back
2160 	 */
2161 	mac_vlan_del(ndev, old_mac, 0, false);
2162 
2163 	if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
2164 		mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
2165 
2166 		/* Although it hardly run here, we still need double check */
2167 		if (is_zero_ether_addr(old_mac)) {
2168 			mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
2169 			return -EIO;
2170 		}
2171 
2172 		/* Try to restore original mac address to MFPS table, and try to restore
2173 		 * the forward rule entry.
2174 		 */
2175 		if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2176 			mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
2177 				       ndev->config.mac);
2178 		}
2179 
2180 		if (mlx5_mpfs_add_mac(pfmdev, old_mac)) {
2181 			mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2182 				       old_mac);
2183 		}
2184 
2185 		ether_addr_copy(ndev->config.mac, old_mac);
2186 
2187 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2188 			mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2189 
2190 		return -EIO;
2191 	}
2192 
2193 	return 0;
2194 }
2195 
handle_ctrl_mac(struct mlx5_vdpa_dev * mvdev,u8 cmd)2196 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2197 {
2198 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2199 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2200 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2201 	struct mlx5_core_dev *pfmdev;
2202 	size_t read;
2203 	u8 mac[ETH_ALEN];
2204 
2205 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2206 	switch (cmd) {
2207 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
2208 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov,
2209 					     (void *)mac, ETH_ALEN);
2210 		if (read != ETH_ALEN)
2211 			break;
2212 
2213 		if (!memcmp(ndev->config.mac, mac, 6)) {
2214 			status = VIRTIO_NET_OK;
2215 			break;
2216 		}
2217 		status = mlx5_vdpa_change_mac(ndev, pfmdev, mac) ? VIRTIO_NET_ERR :
2218 								       VIRTIO_NET_OK;
2219 		break;
2220 
2221 	default:
2222 		break;
2223 	}
2224 
2225 	return status;
2226 }
2227 
change_num_qps(struct mlx5_vdpa_dev * mvdev,int newqps)2228 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2229 {
2230 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2231 	int cur_vqs = ndev->cur_num_vqs;
2232 	int new_vqs = newqps * 2;
2233 	int err;
2234 	int i;
2235 
2236 	if (cur_vqs > new_vqs) {
2237 		err = modify_rqt(ndev, new_vqs);
2238 		if (err)
2239 			return err;
2240 
2241 		if (is_resumable(ndev)) {
2242 			suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs);
2243 		} else {
2244 			for (i = new_vqs; i < cur_vqs; i++)
2245 				teardown_vq(ndev, &ndev->vqs[i]);
2246 		}
2247 
2248 		ndev->cur_num_vqs = new_vqs;
2249 	} else {
2250 		ndev->cur_num_vqs = new_vqs;
2251 
2252 		for (i = cur_vqs; i < new_vqs; i++) {
2253 			err = setup_vq(ndev, &ndev->vqs[i], false);
2254 			if (err)
2255 				goto clean_added;
2256 		}
2257 
2258 		err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs);
2259 		if (err)
2260 			goto clean_added;
2261 
2262 		err = modify_rqt(ndev, new_vqs);
2263 		if (err)
2264 			goto clean_added;
2265 	}
2266 	return 0;
2267 
2268 clean_added:
2269 	for (--i; i >= cur_vqs; --i)
2270 		teardown_vq(ndev, &ndev->vqs[i]);
2271 
2272 	ndev->cur_num_vqs = cur_vqs;
2273 
2274 	return err;
2275 }
2276 
handle_ctrl_mq(struct mlx5_vdpa_dev * mvdev,u8 cmd)2277 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2278 {
2279 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2280 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2281 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2282 	struct virtio_net_ctrl_mq mq;
2283 	size_t read;
2284 	u16 newqps;
2285 
2286 	switch (cmd) {
2287 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2288 		/* This mq feature check aligns with pre-existing userspace
2289 		 * implementation.
2290 		 *
2291 		 * Without it, an untrusted driver could fake a multiqueue config
2292 		 * request down to a non-mq device that may cause kernel to
2293 		 * panic due to uninitialized resources for extra vqs. Even with
2294 		 * a well behaving guest driver, it is not expected to allow
2295 		 * changing the number of vqs on a non-mq device.
2296 		 */
2297 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2298 			break;
2299 
2300 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2301 		if (read != sizeof(mq))
2302 			break;
2303 
2304 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2305 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2306 		    newqps > ndev->rqt_size)
2307 			break;
2308 
2309 		if (ndev->cur_num_vqs == 2 * newqps) {
2310 			status = VIRTIO_NET_OK;
2311 			break;
2312 		}
2313 
2314 		if (!change_num_qps(mvdev, newqps))
2315 			status = VIRTIO_NET_OK;
2316 
2317 		break;
2318 	default:
2319 		break;
2320 	}
2321 
2322 	return status;
2323 }
2324 
handle_ctrl_vlan(struct mlx5_vdpa_dev * mvdev,u8 cmd)2325 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2326 {
2327 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2328 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2329 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2330 	__virtio16 vlan;
2331 	size_t read;
2332 	u16 id;
2333 
2334 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2335 		return status;
2336 
2337 	switch (cmd) {
2338 	case VIRTIO_NET_CTRL_VLAN_ADD:
2339 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2340 		if (read != sizeof(vlan))
2341 			break;
2342 
2343 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2344 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2345 			break;
2346 
2347 		status = VIRTIO_NET_OK;
2348 		break;
2349 	case VIRTIO_NET_CTRL_VLAN_DEL:
2350 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2351 		if (read != sizeof(vlan))
2352 			break;
2353 
2354 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2355 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2356 		status = VIRTIO_NET_OK;
2357 		break;
2358 	default:
2359 		break;
2360 	}
2361 
2362 	return status;
2363 }
2364 
mlx5_cvq_kick_handler(struct work_struct * work)2365 static void mlx5_cvq_kick_handler(struct work_struct *work)
2366 {
2367 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2368 	struct virtio_net_ctrl_hdr ctrl;
2369 	struct mlx5_vdpa_wq_ent *wqent;
2370 	struct mlx5_vdpa_dev *mvdev;
2371 	struct mlx5_control_vq *cvq;
2372 	struct mlx5_vdpa_net *ndev;
2373 	size_t read, write;
2374 	int err;
2375 
2376 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2377 	mvdev = wqent->mvdev;
2378 	ndev = to_mlx5_vdpa_ndev(mvdev);
2379 	cvq = &mvdev->cvq;
2380 
2381 	down_write(&ndev->reslock);
2382 
2383 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2384 		goto out;
2385 
2386 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2387 		goto out;
2388 
2389 	if (!cvq->ready)
2390 		goto out;
2391 
2392 	while (true) {
2393 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2394 					   GFP_ATOMIC);
2395 		if (err <= 0)
2396 			break;
2397 
2398 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2399 		if (read != sizeof(ctrl))
2400 			break;
2401 
2402 		cvq->received_desc++;
2403 		switch (ctrl.class) {
2404 		case VIRTIO_NET_CTRL_MAC:
2405 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2406 			break;
2407 		case VIRTIO_NET_CTRL_MQ:
2408 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2409 			break;
2410 		case VIRTIO_NET_CTRL_VLAN:
2411 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2412 			break;
2413 		default:
2414 			break;
2415 		}
2416 
2417 		/* Make sure data is written before advancing index */
2418 		smp_wmb();
2419 
2420 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2421 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2422 		vringh_kiov_cleanup(&cvq->riov);
2423 		vringh_kiov_cleanup(&cvq->wiov);
2424 
2425 		if (vringh_need_notify_iotlb(&cvq->vring))
2426 			vringh_notify(&cvq->vring);
2427 
2428 		cvq->completed_desc++;
2429 		queue_work(mvdev->wq, &wqent->work);
2430 		break;
2431 	}
2432 
2433 out:
2434 	up_write(&ndev->reslock);
2435 }
2436 
mlx5_vdpa_kick_vq(struct vdpa_device * vdev,u16 idx)2437 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2438 {
2439 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2440 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2441 	struct mlx5_vdpa_virtqueue *mvq;
2442 
2443 	if (!is_index_valid(mvdev, idx))
2444 		return;
2445 
2446 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2447 		if (!mvdev->wq || !mvdev->cvq.ready)
2448 			return;
2449 
2450 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2451 		return;
2452 	}
2453 
2454 	mvq = &ndev->vqs[idx];
2455 	if (unlikely(!mvq->ready))
2456 		return;
2457 
2458 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2459 }
2460 
mlx5_vdpa_set_vq_address(struct vdpa_device * vdev,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)2461 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2462 				    u64 driver_area, u64 device_area)
2463 {
2464 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2465 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2466 	struct mlx5_vdpa_virtqueue *mvq;
2467 
2468 	if (!is_index_valid(mvdev, idx))
2469 		return -EINVAL;
2470 
2471 	if (is_ctrl_vq_idx(mvdev, idx)) {
2472 		mvdev->cvq.desc_addr = desc_area;
2473 		mvdev->cvq.device_addr = device_area;
2474 		mvdev->cvq.driver_addr = driver_area;
2475 		return 0;
2476 	}
2477 
2478 	mvq = &ndev->vqs[idx];
2479 	mvq->desc_addr = desc_area;
2480 	mvq->device_addr = device_area;
2481 	mvq->driver_addr = driver_area;
2482 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2483 	return 0;
2484 }
2485 
mlx5_vdpa_set_vq_num(struct vdpa_device * vdev,u16 idx,u32 num)2486 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2487 {
2488 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2489 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2490 	struct mlx5_vdpa_virtqueue *mvq;
2491 
2492 	if (!is_index_valid(mvdev, idx))
2493 		return;
2494 
2495         if (is_ctrl_vq_idx(mvdev, idx)) {
2496                 struct mlx5_control_vq *cvq = &mvdev->cvq;
2497 
2498                 cvq->vring.vring.num = num;
2499                 return;
2500         }
2501 
2502 	mvq = &ndev->vqs[idx];
2503 	ndev->needs_teardown |= num != mvq->num_ent;
2504 	mvq->num_ent = num;
2505 }
2506 
mlx5_vdpa_set_vq_cb(struct vdpa_device * vdev,u16 idx,struct vdpa_callback * cb)2507 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2508 {
2509 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2510 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2511 
2512 	ndev->event_cbs[idx] = *cb;
2513 	if (is_ctrl_vq_idx(mvdev, idx))
2514 		mvdev->cvq.event_cb = *cb;
2515 }
2516 
mlx5_cvq_notify(struct vringh * vring)2517 static void mlx5_cvq_notify(struct vringh *vring)
2518 {
2519 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2520 
2521 	if (!cvq->event_cb.callback)
2522 		return;
2523 
2524 	cvq->event_cb.callback(cvq->event_cb.private);
2525 }
2526 
set_cvq_ready(struct mlx5_vdpa_dev * mvdev,bool ready)2527 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2528 {
2529 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2530 
2531 	cvq->ready = ready;
2532 	if (!ready)
2533 		return;
2534 
2535 	cvq->vring.notify = mlx5_cvq_notify;
2536 }
2537 
mlx5_vdpa_set_vq_ready(struct vdpa_device * vdev,u16 idx,bool ready)2538 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2539 {
2540 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2541 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2542 	struct mlx5_vdpa_virtqueue *mvq;
2543 
2544 	if (!mvdev->actual_features)
2545 		return;
2546 
2547 	if (!is_index_valid(mvdev, idx))
2548 		return;
2549 
2550 	if (is_ctrl_vq_idx(mvdev, idx)) {
2551 		set_cvq_ready(mvdev, ready);
2552 		return;
2553 	}
2554 
2555 	mvq = &ndev->vqs[idx];
2556 	if (!ready) {
2557 		suspend_vq(ndev, mvq);
2558 	} else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
2559 		if (resume_vq(ndev, mvq))
2560 			ready = false;
2561 	}
2562 
2563 	mvq->ready = ready;
2564 }
2565 
mlx5_vdpa_get_vq_ready(struct vdpa_device * vdev,u16 idx)2566 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2567 {
2568 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2569 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2570 
2571 	if (!is_index_valid(mvdev, idx))
2572 		return false;
2573 
2574 	if (is_ctrl_vq_idx(mvdev, idx))
2575 		return mvdev->cvq.ready;
2576 
2577 	return ndev->vqs[idx].ready;
2578 }
2579 
mlx5_vdpa_set_vq_state(struct vdpa_device * vdev,u16 idx,const struct vdpa_vq_state * state)2580 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2581 				  const struct vdpa_vq_state *state)
2582 {
2583 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2584 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2585 	struct mlx5_vdpa_virtqueue *mvq;
2586 
2587 	if (!is_index_valid(mvdev, idx))
2588 		return -EINVAL;
2589 
2590 	if (is_ctrl_vq_idx(mvdev, idx)) {
2591 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2592 		return 0;
2593 	}
2594 
2595 	mvq = &ndev->vqs[idx];
2596 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2597 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2598 		return -EINVAL;
2599 	}
2600 
2601 	mvq->used_idx = state->split.avail_index;
2602 	mvq->avail_idx = state->split.avail_index;
2603 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2604 				MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2605 	return 0;
2606 }
2607 
mlx5_vdpa_get_vq_state(struct vdpa_device * vdev,u16 idx,struct vdpa_vq_state * state)2608 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2609 {
2610 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2611 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2612 	struct mlx5_vdpa_virtqueue *mvq;
2613 	struct mlx5_virtq_attr attr;
2614 	int err;
2615 
2616 	if (!is_index_valid(mvdev, idx))
2617 		return -EINVAL;
2618 
2619 	if (is_ctrl_vq_idx(mvdev, idx)) {
2620 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2621 		return 0;
2622 	}
2623 
2624 	mvq = &ndev->vqs[idx];
2625 	/* If the virtq object was destroyed, use the value saved at
2626 	 * the last minute of suspend_vq. This caters for userspace
2627 	 * that cares about emulating the index after vq is stopped.
2628 	 */
2629 	if (!mvq->initialized) {
2630 		/* Firmware returns a wrong value for the available index.
2631 		 * Since both values should be identical, we take the value of
2632 		 * used_idx which is reported correctly.
2633 		 */
2634 		state->split.avail_index = mvq->used_idx;
2635 		return 0;
2636 	}
2637 
2638 	err = query_virtqueues(ndev, mvq->index, 1, &attr);
2639 	if (err) {
2640 		mlx5_vdpa_err(mvdev, "failed to query virtqueue\n");
2641 		return err;
2642 	}
2643 	state->split.avail_index = attr.used_index;
2644 	return 0;
2645 }
2646 
mlx5_vdpa_get_vq_align(struct vdpa_device * vdev)2647 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2648 {
2649 	return PAGE_SIZE;
2650 }
2651 
mlx5_vdpa_get_vq_group(struct vdpa_device * vdev,u16 idx)2652 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2653 {
2654 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2655 
2656 	if (is_ctrl_vq_idx(mvdev, idx))
2657 		return MLX5_VDPA_CVQ_GROUP;
2658 
2659 	return MLX5_VDPA_DATAVQ_GROUP;
2660 }
2661 
mlx5_vdpa_get_vq_desc_group(struct vdpa_device * vdev,u16 idx)2662 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2663 {
2664 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2665 
2666 	if (is_ctrl_vq_idx(mvdev, idx))
2667 		return MLX5_VDPA_CVQ_GROUP;
2668 
2669 	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2670 }
2671 
mlx_to_vritio_features(u16 dev_features)2672 static u64 mlx_to_vritio_features(u16 dev_features)
2673 {
2674 	u64 result = 0;
2675 
2676 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2677 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2678 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2679 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2680 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2681 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2682 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2683 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2684 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2685 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2686 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2687 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2688 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2689 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2690 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2691 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2692 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2693 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2694 
2695 	return result;
2696 }
2697 
get_supported_features(struct mlx5_core_dev * mdev)2698 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2699 {
2700 	u64 mlx_vdpa_features = 0;
2701 	u16 dev_features;
2702 
2703 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2704 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2705 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2706 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2707 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2708 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2709 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2710 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2711 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2712 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2713 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2714 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2715 
2716 	return mlx_vdpa_features;
2717 }
2718 
mlx5_vdpa_get_device_features(struct vdpa_device * vdev)2719 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2720 {
2721 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2722 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2723 
2724 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2725 	return ndev->mvdev.mlx_features;
2726 }
2727 
verify_driver_features(struct mlx5_vdpa_dev * mvdev,u64 features)2728 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2729 {
2730 	/* Minimum features to expect */
2731 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2732 		return -EOPNOTSUPP;
2733 
2734 	/* Double check features combination sent down by the driver.
2735 	 * Fail invalid features due to absence of the depended feature.
2736 	 *
2737 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2738 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2739 	 * By failing the invalid features sent down by untrusted drivers,
2740 	 * we're assured the assumption made upon is_index_valid() and
2741 	 * is_ctrl_vq_idx() will not be compromised.
2742 	 */
2743 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2744             BIT_ULL(VIRTIO_NET_F_MQ))
2745 		return -EINVAL;
2746 
2747 	return 0;
2748 }
2749 
setup_virtqueues(struct mlx5_vdpa_dev * mvdev,bool filled)2750 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled)
2751 {
2752 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2753 	int err;
2754 	int i;
2755 
2756 	for (i = 0; i < mvdev->max_vqs; i++) {
2757 		err = setup_vq(ndev, &ndev->vqs[i], filled);
2758 		if (err)
2759 			goto err_vq;
2760 	}
2761 
2762 	return 0;
2763 
2764 err_vq:
2765 	for (--i; i >= 0; i--)
2766 		teardown_vq(ndev, &ndev->vqs[i]);
2767 
2768 	return err;
2769 }
2770 
teardown_virtqueues(struct mlx5_vdpa_net * ndev)2771 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2772 {
2773 	int i;
2774 
2775 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--)
2776 		teardown_vq(ndev, &ndev->vqs[i]);
2777 }
2778 
update_cvq_info(struct mlx5_vdpa_dev * mvdev)2779 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2780 {
2781 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2782 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2783 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2784 			mvdev->max_idx = mvdev->max_vqs;
2785 		} else {
2786 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2787 			 * CVQ gets index 2
2788 			 */
2789 			mvdev->max_idx = 2;
2790 		}
2791 	} else {
2792 		/* Two data virtqueues only: one for rx and one for tx */
2793 		mvdev->max_idx = 1;
2794 	}
2795 }
2796 
query_vport_state(struct mlx5_core_dev * mdev,u8 opmod,u16 vport)2797 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2798 {
2799 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2800 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2801 	int err;
2802 
2803 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2804 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2805 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2806 	if (vport)
2807 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2808 
2809 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2810 	if (err)
2811 		return 0;
2812 
2813 	return MLX5_GET(query_vport_state_out, out, state);
2814 }
2815 
get_link_state(struct mlx5_vdpa_dev * mvdev)2816 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2817 {
2818 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2819 	    VPORT_STATE_UP)
2820 		return true;
2821 
2822 	return false;
2823 }
2824 
update_carrier(struct work_struct * work)2825 static void update_carrier(struct work_struct *work)
2826 {
2827 	struct mlx5_vdpa_wq_ent *wqent;
2828 	struct mlx5_vdpa_dev *mvdev;
2829 	struct mlx5_vdpa_net *ndev;
2830 
2831 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2832 	mvdev = wqent->mvdev;
2833 	ndev = to_mlx5_vdpa_ndev(mvdev);
2834 	if (get_link_state(mvdev))
2835 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2836 	else
2837 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2838 
2839 	if (ndev->config_cb.callback)
2840 		ndev->config_cb.callback(ndev->config_cb.private);
2841 
2842 	kfree(wqent);
2843 }
2844 
queue_link_work(struct mlx5_vdpa_net * ndev)2845 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2846 {
2847 	struct mlx5_vdpa_wq_ent *wqent;
2848 
2849 	wqent = kzalloc_obj(*wqent, GFP_ATOMIC);
2850 	if (!wqent)
2851 		return -ENOMEM;
2852 
2853 	wqent->mvdev = &ndev->mvdev;
2854 	INIT_WORK(&wqent->work, update_carrier);
2855 	queue_work(ndev->mvdev.wq, &wqent->work);
2856 	return 0;
2857 }
2858 
event_handler(struct notifier_block * nb,unsigned long event,void * param)2859 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2860 {
2861 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2862 	struct mlx5_eqe *eqe = param;
2863 	int ret = NOTIFY_DONE;
2864 
2865 	if (ndev->mvdev.suspended)
2866 		return NOTIFY_DONE;
2867 
2868 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2869 		switch (eqe->sub_type) {
2870 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2871 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2872 			if (queue_link_work(ndev))
2873 				return NOTIFY_DONE;
2874 
2875 			ret = NOTIFY_OK;
2876 			break;
2877 		default:
2878 			return NOTIFY_DONE;
2879 		}
2880 		return ret;
2881 	}
2882 	return ret;
2883 }
2884 
register_link_notifier(struct mlx5_vdpa_net * ndev)2885 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2886 {
2887 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2888 		return;
2889 
2890 	ndev->nb.notifier_call = event_handler;
2891 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2892 	ndev->nb_registered = true;
2893 	queue_link_work(ndev);
2894 }
2895 
unregister_link_notifier(struct mlx5_vdpa_net * ndev)2896 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2897 {
2898 	if (!ndev->nb_registered)
2899 		return;
2900 
2901 	ndev->nb_registered = false;
2902 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2903 	if (ndev->mvdev.wq)
2904 		flush_workqueue(ndev->mvdev.wq);
2905 }
2906 
mlx5_vdpa_get_backend_features(const struct vdpa_device * vdpa)2907 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2908 {
2909 	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2910 }
2911 
mlx5_vdpa_set_driver_features(struct vdpa_device * vdev,u64 features)2912 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2913 {
2914 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2915 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2916 	u64 old_features = mvdev->actual_features;
2917 	u64 diff_features;
2918 	int err;
2919 
2920 	print_features(mvdev, features, true);
2921 
2922 	err = verify_driver_features(mvdev, features);
2923 	if (err)
2924 		return err;
2925 
2926 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2927 
2928 	/* Interested in changes of vq features only. */
2929 	if (get_features(old_features) != get_features(mvdev->actual_features)) {
2930 		for (int i = 0; i < mvdev->max_vqs; ++i) {
2931 			struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i];
2932 
2933 			mvq->modified_fields |= (
2934 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION |
2935 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES
2936 			);
2937 		}
2938 	}
2939 
2940 	/* When below features diverge from initial device features, VQs need a full teardown. */
2941 #define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \
2942 			     BIT_ULL(VIRTIO_NET_F_CSUM) | \
2943 			     BIT_ULL(VIRTIO_F_VERSION_1))
2944 
2945 	diff_features = mvdev->mlx_features ^ mvdev->actual_features;
2946 	ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK);
2947 
2948 	update_cvq_info(mvdev);
2949 	return err;
2950 }
2951 
mlx5_vdpa_set_config_cb(struct vdpa_device * vdev,struct vdpa_callback * cb)2952 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2953 {
2954 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2955 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2956 
2957 	ndev->config_cb = *cb;
2958 }
2959 
2960 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
mlx5_vdpa_get_vq_num_max(struct vdpa_device * vdev)2961 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2962 {
2963 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2964 }
2965 
mlx5_vdpa_get_device_id(struct vdpa_device * vdev)2966 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2967 {
2968 	return VIRTIO_ID_NET;
2969 }
2970 
mlx5_vdpa_get_vendor_id(struct vdpa_device * vdev)2971 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2972 {
2973 	return PCI_VENDOR_ID_MELLANOX;
2974 }
2975 
mlx5_vdpa_get_status(struct vdpa_device * vdev)2976 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2977 {
2978 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2979 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2980 
2981 	print_status(mvdev, ndev->mvdev.status, false);
2982 	return ndev->mvdev.status;
2983 }
2984 
save_channel_info(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)2985 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2986 {
2987 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2988 	struct mlx5_virtq_attr attr = {};
2989 	int err;
2990 
2991 	if (mvq->initialized) {
2992 		err = query_virtqueues(ndev, mvq->index, 1, &attr);
2993 		if (err)
2994 			return err;
2995 	}
2996 
2997 	ri->avail_index = attr.available_index;
2998 	ri->used_index = attr.used_index;
2999 	ri->ready = mvq->ready;
3000 	ri->num_ent = mvq->num_ent;
3001 	ri->desc_addr = mvq->desc_addr;
3002 	ri->device_addr = mvq->device_addr;
3003 	ri->driver_addr = mvq->driver_addr;
3004 	ri->map = mvq->map;
3005 	ri->restore = true;
3006 	return 0;
3007 }
3008 
save_channels_info(struct mlx5_vdpa_net * ndev)3009 static int save_channels_info(struct mlx5_vdpa_net *ndev)
3010 {
3011 	int i;
3012 
3013 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3014 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
3015 		save_channel_info(ndev, &ndev->vqs[i]);
3016 	}
3017 	return 0;
3018 }
3019 
mlx5_clear_vqs(struct mlx5_vdpa_net * ndev)3020 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
3021 {
3022 	int i;
3023 
3024 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
3025 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3026 }
3027 
restore_channels_info(struct mlx5_vdpa_net * ndev)3028 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
3029 {
3030 	struct mlx5_vdpa_virtqueue *mvq;
3031 	struct mlx5_vq_restore_info *ri;
3032 	int i;
3033 
3034 	mlx5_clear_vqs(ndev);
3035 	mvqs_set_defaults(ndev);
3036 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3037 		mvq = &ndev->vqs[i];
3038 		ri = &mvq->ri;
3039 		if (!ri->restore)
3040 			continue;
3041 
3042 		mvq->avail_idx = ri->avail_index;
3043 		mvq->used_idx = ri->used_index;
3044 		mvq->ready = ri->ready;
3045 		mvq->num_ent = ri->num_ent;
3046 		mvq->desc_addr = ri->desc_addr;
3047 		mvq->device_addr = ri->device_addr;
3048 		mvq->driver_addr = ri->driver_addr;
3049 		mvq->map = ri->map;
3050 	}
3051 }
3052 
mlx5_vdpa_change_map(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * new_mr,unsigned int asid)3053 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
3054 				struct mlx5_vdpa_mr *new_mr,
3055 				unsigned int asid)
3056 {
3057 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3058 	bool teardown = !is_resumable(ndev);
3059 	int err;
3060 
3061 	suspend_vqs(ndev, 0, ndev->cur_num_vqs);
3062 	if (teardown) {
3063 		err = save_channels_info(ndev);
3064 		if (err)
3065 			return err;
3066 
3067 		teardown_vq_resources(ndev);
3068 	}
3069 
3070 	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3071 
3072 	for (int i = 0; i < mvdev->max_vqs; i++)
3073 		ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
3074 						MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
3075 
3076 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
3077 		return 0;
3078 
3079 	if (teardown) {
3080 		restore_channels_info(ndev);
3081 		err = setup_vq_resources(ndev, true);
3082 		if (err)
3083 			return err;
3084 	}
3085 
3086 	resume_vqs(ndev, 0, ndev->cur_num_vqs);
3087 
3088 	return 0;
3089 }
3090 
3091 /* reslock must be held for this function */
setup_vq_resources(struct mlx5_vdpa_net * ndev,bool filled)3092 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled)
3093 {
3094 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
3095 	int err;
3096 
3097 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3098 
3099 	if (ndev->setup) {
3100 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
3101 		err = 0;
3102 		goto out;
3103 	}
3104 	mlx5_vdpa_add_debugfs(ndev);
3105 
3106 	err = read_umem_params(ndev);
3107 	if (err)
3108 		goto err_setup;
3109 
3110 	err = setup_virtqueues(mvdev, filled);
3111 	if (err) {
3112 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
3113 		goto err_setup;
3114 	}
3115 
3116 	err = create_rqt(ndev);
3117 	if (err) {
3118 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
3119 		goto err_rqt;
3120 	}
3121 
3122 	err = create_tir(ndev);
3123 	if (err) {
3124 		mlx5_vdpa_warn(mvdev, "create_tir\n");
3125 		goto err_tir;
3126 	}
3127 
3128 	err = setup_steering(ndev);
3129 	if (err) {
3130 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
3131 		goto err_fwd;
3132 	}
3133 	ndev->setup = true;
3134 
3135 	return 0;
3136 
3137 err_fwd:
3138 	destroy_tir(ndev);
3139 err_tir:
3140 	destroy_rqt(ndev);
3141 err_rqt:
3142 	teardown_virtqueues(ndev);
3143 err_setup:
3144 	mlx5_vdpa_remove_debugfs(ndev);
3145 out:
3146 	return err;
3147 }
3148 
3149 /* reslock must be held for this function */
teardown_vq_resources(struct mlx5_vdpa_net * ndev)3150 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev)
3151 {
3152 
3153 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3154 
3155 	if (!ndev->setup)
3156 		return;
3157 
3158 	mlx5_vdpa_remove_debugfs(ndev);
3159 	teardown_steering(ndev);
3160 	destroy_tir(ndev);
3161 	destroy_rqt(ndev);
3162 	teardown_virtqueues(ndev);
3163 	ndev->setup = false;
3164 	ndev->needs_teardown = false;
3165 }
3166 
setup_cvq_vring(struct mlx5_vdpa_dev * mvdev)3167 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
3168 {
3169 	struct mlx5_control_vq *cvq = &mvdev->cvq;
3170 	int err = 0;
3171 
3172 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
3173 		u16 idx = cvq->vring.last_avail_idx;
3174 
3175 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
3176 					cvq->vring.vring.num, false,
3177 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
3178 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
3179 					(struct vring_used *)(uintptr_t)cvq->device_addr);
3180 
3181 		if (!err)
3182 			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
3183 	}
3184 	return err;
3185 }
3186 
mlx5_vdpa_set_status(struct vdpa_device * vdev,u8 status)3187 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
3188 {
3189 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3190 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3191 	int err;
3192 
3193 	print_status(mvdev, status, true);
3194 
3195 	down_write(&ndev->reslock);
3196 
3197 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
3198 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
3199 			err = setup_cvq_vring(mvdev);
3200 			if (err) {
3201 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
3202 				goto err_setup;
3203 			}
3204 			register_link_notifier(ndev);
3205 
3206 			if (ndev->needs_teardown)
3207 				teardown_vq_resources(ndev);
3208 
3209 			if (ndev->setup) {
3210 				err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
3211 				if (err) {
3212 					mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
3213 					goto err_driver;
3214 				}
3215 			} else {
3216 				err = setup_vq_resources(ndev, true);
3217 				if (err) {
3218 					mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
3219 					goto err_driver;
3220 				}
3221 			}
3222 		} else {
3223 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3224 			goto err_clear;
3225 		}
3226 	}
3227 
3228 	ndev->mvdev.status = status;
3229 	up_write(&ndev->reslock);
3230 	return;
3231 
3232 err_driver:
3233 	unregister_link_notifier(ndev);
3234 err_setup:
3235 	mlx5_vdpa_clean_mrs(&ndev->mvdev);
3236 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3237 err_clear:
3238 	up_write(&ndev->reslock);
3239 }
3240 
init_group_to_asid_map(struct mlx5_vdpa_dev * mvdev)3241 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3242 {
3243 	int i;
3244 
3245 	/* default mapping all groups are mapped to asid 0 */
3246 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3247 		mvdev->mres.group2asid[i] = 0;
3248 }
3249 
needs_vqs_reset(const struct mlx5_vdpa_dev * mvdev)3250 static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev)
3251 {
3252 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3253 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0];
3254 
3255 	if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
3256 		return true;
3257 
3258 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT)
3259 		return true;
3260 
3261 	return mvq->modified_fields & (
3262 		MLX5_VIRTQ_MODIFY_MASK_STATE |
3263 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS |
3264 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
3265 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX
3266 	);
3267 }
3268 
mlx5_vdpa_compat_reset(struct vdpa_device * vdev,u32 flags)3269 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3270 {
3271 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3272 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3273 	bool vq_reset;
3274 
3275 	print_status(mvdev, 0, true);
3276 	mlx5_vdpa_info(mvdev, "performing device reset\n");
3277 
3278 	down_write(&ndev->reslock);
3279 	unregister_link_notifier(ndev);
3280 	vq_reset = needs_vqs_reset(mvdev);
3281 	if (vq_reset) {
3282 		teardown_vq_resources(ndev);
3283 		mvqs_set_defaults(ndev);
3284 	}
3285 
3286 	if (flags & VDPA_RESET_F_CLEAN_MAP)
3287 		mlx5_vdpa_clean_mrs(&ndev->mvdev);
3288 	ndev->mvdev.status = 0;
3289 	ndev->mvdev.suspended = false;
3290 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3291 	ndev->mvdev.cvq.ready = false;
3292 	ndev->mvdev.cvq.received_desc = 0;
3293 	ndev->mvdev.cvq.completed_desc = 0;
3294 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3295 	ndev->mvdev.actual_features = 0;
3296 	init_group_to_asid_map(mvdev);
3297 	++mvdev->generation;
3298 
3299 	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3300 	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3301 		if (mlx5_vdpa_create_dma_mr(mvdev))
3302 			mlx5_vdpa_err(mvdev, "create MR failed\n");
3303 	}
3304 	if (vq_reset)
3305 		setup_vq_resources(ndev, false);
3306 	up_write(&ndev->reslock);
3307 
3308 	return 0;
3309 }
3310 
mlx5_vdpa_reset(struct vdpa_device * vdev)3311 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3312 {
3313 	return mlx5_vdpa_compat_reset(vdev, 0);
3314 }
3315 
mlx5_vdpa_get_config_size(struct vdpa_device * vdev)3316 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3317 {
3318 	return sizeof(struct virtio_net_config);
3319 }
3320 
mlx5_vdpa_get_config(struct vdpa_device * vdev,unsigned int offset,void * buf,unsigned int len)3321 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3322 				 unsigned int len)
3323 {
3324 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3325 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3326 
3327 	if (offset + len <= sizeof(struct virtio_net_config))
3328 		memcpy(buf, (u8 *)&ndev->config + offset, len);
3329 }
3330 
mlx5_vdpa_set_config(struct vdpa_device * vdev,unsigned int offset,const void * buf,unsigned int len)3331 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3332 				 unsigned int len)
3333 {
3334 	/* not supported */
3335 }
3336 
mlx5_vdpa_get_generation(struct vdpa_device * vdev)3337 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3338 {
3339 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3340 
3341 	return mvdev->generation;
3342 }
3343 
set_map_data(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb,unsigned int asid)3344 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3345 			unsigned int asid)
3346 {
3347 	struct mlx5_vdpa_mr *new_mr;
3348 	int err;
3349 
3350 	if (asid >= MLX5_VDPA_NUM_AS)
3351 		return -EINVAL;
3352 
3353 	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3354 		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3355 		if (IS_ERR(new_mr)) {
3356 			err = PTR_ERR(new_mr);
3357 			mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err);
3358 			return err;
3359 		}
3360 	} else {
3361 		/* Empty iotlbs don't have an mr but will clear the previous mr. */
3362 		new_mr = NULL;
3363 	}
3364 
3365 	if (!mvdev->mres.mr[asid]) {
3366 		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3367 	} else {
3368 		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3369 		if (err) {
3370 			mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err);
3371 			goto out_err;
3372 		}
3373 	}
3374 
3375 	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3376 
3377 out_err:
3378 	mlx5_vdpa_put_mr(mvdev, new_mr);
3379 	return err;
3380 }
3381 
mlx5_vdpa_set_map(struct vdpa_device * vdev,unsigned int asid,struct vhost_iotlb * iotlb)3382 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3383 			     struct vhost_iotlb *iotlb)
3384 {
3385 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3386 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3387 	int err = -EINVAL;
3388 
3389 	down_write(&ndev->reslock);
3390 	err = set_map_data(mvdev, iotlb, asid);
3391 	up_write(&ndev->reslock);
3392 	return err;
3393 }
3394 
mlx5_vdpa_reset_map(struct vdpa_device * vdev,unsigned int asid)3395 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3396 {
3397 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3398 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3399 	int err;
3400 
3401 	down_write(&ndev->reslock);
3402 	err = mlx5_vdpa_reset_mr(mvdev, asid);
3403 	up_write(&ndev->reslock);
3404 	return err;
3405 }
3406 
mlx5_get_vq_map(struct vdpa_device * vdev,u16 idx)3407 static union virtio_map mlx5_get_vq_map(struct vdpa_device *vdev, u16 idx)
3408 {
3409 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3410 	union virtio_map map;
3411 
3412 	if (is_ctrl_vq_idx(mvdev, idx))
3413 		map.dma_dev = &vdev->dev;
3414 	else
3415 		map.dma_dev = mvdev->vdev.vmap.dma_dev;
3416 
3417 	return map;
3418 }
3419 
free_irqs(struct mlx5_vdpa_net * ndev)3420 static void free_irqs(struct mlx5_vdpa_net *ndev)
3421 {
3422 	struct mlx5_vdpa_irq_pool_entry *ent;
3423 	int i;
3424 
3425 	if (!msix_mode_supported(&ndev->mvdev))
3426 		return;
3427 
3428 	if (!ndev->irqp.entries)
3429 		return;
3430 
3431 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3432 		ent = ndev->irqp.entries + i;
3433 		if (ent->map.virq)
3434 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3435 	}
3436 	kfree(ndev->irqp.entries);
3437 }
3438 
mlx5_vdpa_free(struct vdpa_device * vdev)3439 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3440 {
3441 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3442 	struct mlx5_core_dev *pfmdev;
3443 	struct mlx5_vdpa_net *ndev;
3444 
3445 	ndev = to_mlx5_vdpa_ndev(mvdev);
3446 
3447 	/* Functions called here should be able to work with
3448 	 * uninitialized resources.
3449 	 */
3450 	free_fixed_resources(ndev);
3451 	mlx5_vdpa_clean_mrs(mvdev);
3452 	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3453 	if (!is_zero_ether_addr(ndev->config.mac)) {
3454 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3455 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3456 	}
3457 	mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx);
3458 	mlx5_vdpa_free_resources(&ndev->mvdev);
3459 	free_irqs(ndev);
3460 	kfree(ndev->event_cbs);
3461 	kfree(ndev->vqs);
3462 }
3463 
mlx5_get_vq_notification(struct vdpa_device * vdev,u16 idx)3464 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3465 {
3466 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3467 	struct vdpa_notification_area ret = {};
3468 	struct mlx5_vdpa_net *ndev;
3469 	phys_addr_t addr;
3470 
3471 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3472 		return ret;
3473 
3474 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3475 	 * notification to avoid the risk of mapping pages that contain BAR of more
3476 	 * than one SF
3477 	 */
3478 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3479 		return ret;
3480 
3481 	ndev = to_mlx5_vdpa_ndev(mvdev);
3482 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3483 	ret.addr = addr;
3484 	ret.size = PAGE_SIZE;
3485 	return ret;
3486 }
3487 
mlx5_get_vq_irq(struct vdpa_device * vdev,u16 idx)3488 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3489 {
3490 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3491 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3492 	struct mlx5_vdpa_virtqueue *mvq;
3493 
3494 	if (!is_index_valid(mvdev, idx))
3495 		return -EINVAL;
3496 
3497 	if (is_ctrl_vq_idx(mvdev, idx))
3498 		return -EOPNOTSUPP;
3499 
3500 	mvq = &ndev->vqs[idx];
3501 	if (!mvq->map.virq)
3502 		return -EOPNOTSUPP;
3503 
3504 	return mvq->map.virq;
3505 }
3506 
mlx5_vdpa_get_driver_features(struct vdpa_device * vdev)3507 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3508 {
3509 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3510 
3511 	return mvdev->actual_features;
3512 }
3513 
counter_set_query(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,u64 * received_desc,u64 * completed_desc)3514 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3515 			     u64 *received_desc, u64 *completed_desc)
3516 {
3517 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3518 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3519 	void *cmd_hdr;
3520 	void *ctx;
3521 	int err;
3522 
3523 	if (!counters_supported(&ndev->mvdev))
3524 		return -EOPNOTSUPP;
3525 
3526 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3527 		return -EAGAIN;
3528 
3529 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3530 
3531 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3532 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3533 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3534 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3535 
3536 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3537 	if (err)
3538 		return err;
3539 
3540 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3541 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3542 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3543 	return 0;
3544 }
3545 
mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device * vdev,u16 idx,struct sk_buff * msg,struct netlink_ext_ack * extack)3546 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3547 					 struct sk_buff *msg,
3548 					 struct netlink_ext_ack *extack)
3549 {
3550 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3551 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3552 	struct mlx5_vdpa_virtqueue *mvq;
3553 	struct mlx5_control_vq *cvq;
3554 	u64 received_desc;
3555 	u64 completed_desc;
3556 	int err = 0;
3557 
3558 	down_read(&ndev->reslock);
3559 	if (!is_index_valid(mvdev, idx)) {
3560 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3561 		err = -EINVAL;
3562 		goto out_err;
3563 	}
3564 
3565 	if (idx == ctrl_vq_idx(mvdev)) {
3566 		cvq = &mvdev->cvq;
3567 		received_desc = cvq->received_desc;
3568 		completed_desc = cvq->completed_desc;
3569 		goto out;
3570 	}
3571 
3572 	mvq = &ndev->vqs[idx];
3573 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3574 	if (err) {
3575 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3576 		goto out_err;
3577 	}
3578 
3579 out:
3580 	err = -EMSGSIZE;
3581 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3582 		goto out_err;
3583 
3584 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3585 			      VDPA_ATTR_PAD))
3586 		goto out_err;
3587 
3588 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3589 		goto out_err;
3590 
3591 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3592 			      VDPA_ATTR_PAD))
3593 		goto out_err;
3594 
3595 	err = 0;
3596 out_err:
3597 	up_read(&ndev->reslock);
3598 	return err;
3599 }
3600 
mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev * mvdev)3601 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3602 {
3603 	struct mlx5_control_vq *cvq;
3604 
3605 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3606 		return;
3607 
3608 	cvq = &mvdev->cvq;
3609 	cvq->ready = false;
3610 }
3611 
mlx5_vdpa_suspend(struct vdpa_device * vdev)3612 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3613 {
3614 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3615 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3616 	int err;
3617 
3618 	mlx5_vdpa_info(mvdev, "suspending device\n");
3619 
3620 	down_write(&ndev->reslock);
3621 	err = suspend_vqs(ndev, 0, ndev->cur_num_vqs);
3622 	mlx5_vdpa_cvq_suspend(mvdev);
3623 	mvdev->suspended = true;
3624 	up_write(&ndev->reslock);
3625 
3626 	return err;
3627 }
3628 
mlx5_vdpa_resume(struct vdpa_device * vdev)3629 static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3630 {
3631 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3632 	struct mlx5_vdpa_net *ndev;
3633 	int err;
3634 
3635 	ndev = to_mlx5_vdpa_ndev(mvdev);
3636 
3637 	mlx5_vdpa_info(mvdev, "resuming device\n");
3638 
3639 	down_write(&ndev->reslock);
3640 	mvdev->suspended = false;
3641 	err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
3642 	queue_link_work(ndev);
3643 	up_write(&ndev->reslock);
3644 
3645 	return err;
3646 }
3647 
mlx5_set_group_asid(struct vdpa_device * vdev,u32 group,unsigned int asid)3648 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3649 			       unsigned int asid)
3650 {
3651 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3652 	int err = 0;
3653 
3654 	mvdev->mres.group2asid[group] = asid;
3655 
3656 	mutex_lock(&mvdev->mres.lock);
3657 	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid])
3658 		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid);
3659 	mutex_unlock(&mvdev->mres.lock);
3660 
3661 	return err;
3662 }
3663 
3664 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3665 	.set_vq_address = mlx5_vdpa_set_vq_address,
3666 	.set_vq_num = mlx5_vdpa_set_vq_num,
3667 	.kick_vq = mlx5_vdpa_kick_vq,
3668 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3669 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3670 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3671 	.set_vq_state = mlx5_vdpa_set_vq_state,
3672 	.get_vq_state = mlx5_vdpa_get_vq_state,
3673 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3674 	.get_vq_notification = mlx5_get_vq_notification,
3675 	.get_vq_irq = mlx5_get_vq_irq,
3676 	.get_vq_align = mlx5_vdpa_get_vq_align,
3677 	.get_vq_group = mlx5_vdpa_get_vq_group,
3678 	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3679 	.get_device_features = mlx5_vdpa_get_device_features,
3680 	.get_backend_features = mlx5_vdpa_get_backend_features,
3681 	.set_driver_features = mlx5_vdpa_set_driver_features,
3682 	.get_driver_features = mlx5_vdpa_get_driver_features,
3683 	.set_config_cb = mlx5_vdpa_set_config_cb,
3684 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3685 	.get_device_id = mlx5_vdpa_get_device_id,
3686 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3687 	.get_status = mlx5_vdpa_get_status,
3688 	.set_status = mlx5_vdpa_set_status,
3689 	.reset = mlx5_vdpa_reset,
3690 	.compat_reset = mlx5_vdpa_compat_reset,
3691 	.get_config_size = mlx5_vdpa_get_config_size,
3692 	.get_config = mlx5_vdpa_get_config,
3693 	.set_config = mlx5_vdpa_set_config,
3694 	.get_generation = mlx5_vdpa_get_generation,
3695 	.set_map = mlx5_vdpa_set_map,
3696 	.reset_map = mlx5_vdpa_reset_map,
3697 	.set_group_asid = mlx5_set_group_asid,
3698 	.get_vq_map = mlx5_get_vq_map,
3699 	.free = mlx5_vdpa_free,
3700 	.suspend = mlx5_vdpa_suspend,
3701 	.resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3702 };
3703 
query_mtu(struct mlx5_core_dev * mdev,u16 * mtu)3704 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3705 {
3706 	u16 hw_mtu;
3707 	int err;
3708 
3709 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3710 	if (err)
3711 		return err;
3712 
3713 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3714 	return 0;
3715 }
3716 
alloc_fixed_resources(struct mlx5_vdpa_net * ndev)3717 static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev)
3718 {
3719 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3720 	int err;
3721 
3722 	if (res->valid) {
3723 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3724 		return -EEXIST;
3725 	}
3726 
3727 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3728 	if (err)
3729 		return err;
3730 
3731 	err = create_tis(ndev);
3732 	if (err)
3733 		goto err_tis;
3734 
3735 	res->valid = true;
3736 
3737 	return 0;
3738 
3739 err_tis:
3740 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3741 	return err;
3742 }
3743 
free_fixed_resources(struct mlx5_vdpa_net * ndev)3744 static void free_fixed_resources(struct mlx5_vdpa_net *ndev)
3745 {
3746 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3747 
3748 	if (!res->valid)
3749 		return;
3750 
3751 	destroy_tis(ndev);
3752 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3753 	res->valid = false;
3754 }
3755 
mvqs_set_defaults(struct mlx5_vdpa_net * ndev)3756 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev)
3757 {
3758 	struct mlx5_vdpa_virtqueue *mvq;
3759 	int i;
3760 
3761 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3762 		mvq = &ndev->vqs[i];
3763 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3764 		mvq->index = i;
3765 		mvq->ndev = ndev;
3766 		mvq->fwqp.fw = true;
3767 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3768 		mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE;
3769 	}
3770 }
3771 
3772 struct mlx5_vdpa_mgmtdev {
3773 	struct vdpa_mgmt_dev mgtdev;
3774 	struct mlx5_adev *madev;
3775 	struct mlx5_vdpa_net *ndev;
3776 	struct vdpa_config_ops vdpa_ops;
3777 };
3778 
config_func_mtu(struct mlx5_core_dev * mdev,u16 mtu)3779 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3780 {
3781 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3782 	void *in;
3783 	int err;
3784 
3785 	in = kvzalloc(inlen, GFP_KERNEL);
3786 	if (!in)
3787 		return -ENOMEM;
3788 
3789 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3790 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3791 		 mtu + MLX5V_ETH_HARD_MTU);
3792 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3793 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3794 
3795 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3796 
3797 	kvfree(in);
3798 	return err;
3799 }
3800 
allocate_irqs(struct mlx5_vdpa_net * ndev)3801 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3802 {
3803 	struct mlx5_vdpa_irq_pool_entry *ent;
3804 	int i;
3805 
3806 	if (!msix_mode_supported(&ndev->mvdev))
3807 		return;
3808 
3809 	if (!ndev->mvdev.mdev->pdev)
3810 		return;
3811 
3812 	ndev->irqp.entries = kzalloc_objs(*ndev->irqp.entries,
3813 					  ndev->mvdev.max_vqs);
3814 	if (!ndev->irqp.entries)
3815 		return;
3816 
3817 
3818 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3819 		ent = ndev->irqp.entries + i;
3820 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3821 			 dev_name(&ndev->mvdev.vdev.dev), i);
3822 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3823 		if (!ent->map.virq)
3824 			return;
3825 
3826 		ndev->irqp.num_ent++;
3827 	}
3828 }
3829 
mlx5_vdpa_dev_add(struct vdpa_mgmt_dev * v_mdev,const char * name,const struct vdpa_dev_set_config * add_config)3830 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3831 			     const struct vdpa_dev_set_config *add_config)
3832 {
3833 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3834 	struct virtio_net_config *config;
3835 	struct mlx5_core_dev *pfmdev;
3836 	struct mlx5_vdpa_dev *mvdev;
3837 	struct mlx5_vdpa_net *ndev;
3838 	struct mlx5_core_dev *mdev;
3839 	u64 device_features;
3840 	u32 max_vqs;
3841 	u16 mtu;
3842 	int err;
3843 
3844 	if (mgtdev->ndev)
3845 		return -ENOSPC;
3846 
3847 	mdev = mgtdev->madev->mdev;
3848 	device_features = mgtdev->mgtdev.supported_features;
3849 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3850 		if (add_config->device_features & ~device_features) {
3851 			dev_warn(mdev->device,
3852 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3853 				 add_config->device_features, device_features);
3854 			return -EINVAL;
3855 		}
3856 		device_features &= add_config->device_features;
3857 	} else {
3858 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3859 	}
3860 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3861 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3862 		dev_warn(mdev->device,
3863 			 "Must provision minimum features 0x%llx for this device",
3864 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3865 		return -EOPNOTSUPP;
3866 	}
3867 
3868 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3869 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3870 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3871 		return -EOPNOTSUPP;
3872 	}
3873 
3874 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3875 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3876 	if (max_vqs < 2) {
3877 		dev_warn(mdev->device,
3878 			 "%d virtqueues are supported. At least 2 are required\n",
3879 			 max_vqs);
3880 		return -EAGAIN;
3881 	}
3882 
3883 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3884 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3885 			return -EINVAL;
3886 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3887 	} else {
3888 		max_vqs = 2;
3889 	}
3890 
3891 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3892 				 NULL, MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3893 	if (IS_ERR(ndev))
3894 		return PTR_ERR(ndev);
3895 
3896 	ndev->mvdev.max_vqs = max_vqs;
3897 	mvdev = &ndev->mvdev;
3898 	mvdev->mdev = mdev;
3899 	/* cpu_to_mlx5vdpa16() below depends on this flag */
3900 	mvdev->actual_features =
3901 			(device_features & BIT_ULL(VIRTIO_F_VERSION_1));
3902 
3903 	mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx);
3904 
3905 	ndev->vqs = kzalloc_objs(*ndev->vqs, max_vqs);
3906 	ndev->event_cbs = kzalloc_objs(*ndev->event_cbs, max_vqs + 1);
3907 	if (!ndev->vqs || !ndev->event_cbs) {
3908 		err = -ENOMEM;
3909 		goto err_alloc;
3910 	}
3911 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3912 
3913 	mvqs_set_defaults(ndev);
3914 	allocate_irqs(ndev);
3915 	init_rwsem(&ndev->reslock);
3916 	config = &ndev->config;
3917 
3918 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3919 		err = config_func_mtu(mdev, add_config->net.mtu);
3920 		if (err)
3921 			goto err_alloc;
3922 	}
3923 
3924 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3925 		err = query_mtu(mdev, &mtu);
3926 		if (err)
3927 			goto err_alloc;
3928 
3929 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3930 	}
3931 
3932 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3933 		if (get_link_state(mvdev))
3934 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3935 		else
3936 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3937 	}
3938 
3939 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3940 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3941 	/* No bother setting mac address in config if not going to provision _F_MAC */
3942 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3943 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3944 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3945 		if (err)
3946 			goto err_alloc;
3947 	}
3948 
3949 	if (!is_zero_ether_addr(config->mac)) {
3950 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3951 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3952 		if (err)
3953 			goto err_alloc;
3954 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3955 		/*
3956 		 * We used to clear _F_MAC feature bit if seeing
3957 		 * zero mac address when device features are not
3958 		 * specifically provisioned. Keep the behaviour
3959 		 * so old scripts do not break.
3960 		 */
3961 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3962 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3963 		/* Don't provision zero mac address for _F_MAC */
3964 		mlx5_vdpa_warn(&ndev->mvdev,
3965 			       "No mac address provisioned?\n");
3966 		err = -EINVAL;
3967 		goto err_alloc;
3968 	}
3969 
3970 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) {
3971 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3972 		ndev->rqt_size = max_vqs / 2;
3973 	} else {
3974 		ndev->rqt_size = 1;
3975 	}
3976 
3977 	ndev->mvdev.mlx_features = device_features;
3978 	mvdev->vdev.vmap.dma_dev = &mdev->pdev->dev;
3979 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3980 	if (err)
3981 		goto err_alloc;
3982 
3983 	err = mlx5_vdpa_init_mr_resources(mvdev);
3984 	if (err)
3985 		goto err_alloc;
3986 
3987 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3988 		err = mlx5_vdpa_create_dma_mr(mvdev);
3989 		if (err)
3990 			goto err_alloc;
3991 	}
3992 
3993 	err = alloc_fixed_resources(ndev);
3994 	if (err)
3995 		goto err_alloc;
3996 
3997 	ndev->cvq_ent.mvdev = mvdev;
3998 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3999 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
4000 	if (!mvdev->wq) {
4001 		err = -ENOMEM;
4002 		goto err_alloc;
4003 	}
4004 
4005 	mvdev->vdev.mdev = &mgtdev->mgtdev;
4006 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
4007 	if (err)
4008 		goto err_reg;
4009 
4010 	mgtdev->ndev = ndev;
4011 
4012 	/* For virtio-vdpa, the device was set up during device register. */
4013 	if (ndev->setup)
4014 		return 0;
4015 
4016 	down_write(&ndev->reslock);
4017 	err = setup_vq_resources(ndev, false);
4018 	up_write(&ndev->reslock);
4019 	if (err)
4020 		goto err_setup_vq_res;
4021 
4022 	return 0;
4023 
4024 err_setup_vq_res:
4025 	_vdpa_unregister_device(&mvdev->vdev);
4026 err_reg:
4027 	destroy_workqueue(mvdev->wq);
4028 err_alloc:
4029 	put_device(&mvdev->vdev.dev);
4030 	return err;
4031 }
4032 
mlx5_vdpa_dev_del(struct vdpa_mgmt_dev * v_mdev,struct vdpa_device * dev)4033 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
4034 {
4035 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
4036 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
4037 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
4038 	struct workqueue_struct *wq;
4039 
4040 	unregister_link_notifier(ndev);
4041 	_vdpa_unregister_device(dev);
4042 
4043 	down_write(&ndev->reslock);
4044 	teardown_vq_resources(ndev);
4045 	up_write(&ndev->reslock);
4046 
4047 	wq = mvdev->wq;
4048 	mvdev->wq = NULL;
4049 	destroy_workqueue(wq);
4050 	mgtdev->ndev = NULL;
4051 }
4052 
mlx5_vdpa_set_attr(struct vdpa_mgmt_dev * v_mdev,struct vdpa_device * dev,const struct vdpa_dev_set_config * add_config)4053 static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev,
4054 			      const struct vdpa_dev_set_config *add_config)
4055 {
4056 	struct mlx5_core_dev *pfmdev;
4057 	struct mlx5_vdpa_dev *mvdev;
4058 	struct mlx5_vdpa_net *ndev;
4059 	struct mlx5_core_dev *mdev;
4060 	int err = -EOPNOTSUPP;
4061 
4062 	mvdev = to_mvdev(dev);
4063 	ndev = to_mlx5_vdpa_ndev(mvdev);
4064 	mdev = mvdev->mdev;
4065 
4066 	down_write(&ndev->reslock);
4067 
4068 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
4069 		if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
4070 			ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
4071 		} else {
4072 			mlx5_vdpa_warn(mvdev, "device running, skip updating MAC\n");
4073 			err = -EBUSY;
4074 			goto out;
4075 		}
4076 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
4077 		err = mlx5_vdpa_change_mac(ndev, pfmdev,
4078 					   (u8 *)add_config->net.mac);
4079 	}
4080 
4081 out:
4082 	up_write(&ndev->reslock);
4083 	return err;
4084 }
4085 
4086 static const struct vdpa_mgmtdev_ops mdev_ops = {
4087 	.dev_add = mlx5_vdpa_dev_add,
4088 	.dev_del = mlx5_vdpa_dev_del,
4089 	.dev_set_attr = mlx5_vdpa_set_attr,
4090 };
4091 
4092 static struct virtio_device_id id_table[] = {
4093 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
4094 	{ 0 },
4095 };
4096 
mlx5v_probe(struct auxiliary_device * adev,const struct auxiliary_device_id * id)4097 static int mlx5v_probe(struct auxiliary_device *adev,
4098 		       const struct auxiliary_device_id *id)
4099 
4100 {
4101 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
4102 	struct mlx5_core_dev *mdev = madev->mdev;
4103 	struct mlx5_vdpa_mgmtdev *mgtdev;
4104 	int err;
4105 
4106 	mgtdev = kzalloc_obj(*mgtdev);
4107 	if (!mgtdev)
4108 		return -ENOMEM;
4109 
4110 	mgtdev->mgtdev.ops = &mdev_ops;
4111 	mgtdev->mgtdev.device = mdev->device;
4112 	mgtdev->mgtdev.id_table = id_table;
4113 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
4114 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
4115 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
4116 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
4117 	mgtdev->mgtdev.max_supported_vqs =
4118 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
4119 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
4120 	mgtdev->madev = madev;
4121 	mgtdev->vdpa_ops = mlx5_vdpa_ops;
4122 
4123 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
4124 		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
4125 
4126 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
4127 		mgtdev->vdpa_ops.resume = NULL;
4128 
4129 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
4130 	if (err)
4131 		goto reg_err;
4132 
4133 	auxiliary_set_drvdata(adev, mgtdev);
4134 
4135 	return 0;
4136 
4137 reg_err:
4138 	kfree(mgtdev);
4139 	return err;
4140 }
4141 
mlx5v_remove(struct auxiliary_device * adev)4142 static void mlx5v_remove(struct auxiliary_device *adev)
4143 {
4144 	struct mlx5_vdpa_mgmtdev *mgtdev;
4145 
4146 	mgtdev = auxiliary_get_drvdata(adev);
4147 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
4148 	kfree(mgtdev);
4149 }
4150 
4151 static const struct auxiliary_device_id mlx5v_id_table[] = {
4152 	{ .name = MLX5_ADEV_NAME ".vnet", },
4153 	{},
4154 };
4155 
4156 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
4157 
4158 static struct auxiliary_driver mlx5v_driver = {
4159 	.name = "vnet",
4160 	.probe = mlx5v_probe,
4161 	.remove = mlx5v_remove,
4162 	.id_table = mlx5v_id_table,
4163 };
4164 
4165 module_auxiliary_driver(mlx5v_driver);
4166