xref: /linux/drivers/vdpa/mlx5/net/mlx5_vnet.c (revision 821c9e515db512904250e1d460109a1dc4c7ef6b)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/module.h>
5 #include <linux/vdpa.h>
6 #include <linux/vringh.h>
7 #include <uapi/linux/virtio_net.h>
8 #include <uapi/linux/virtio_ids.h>
9 #include <uapi/linux/vdpa.h>
10 #include <uapi/linux/vhost_types.h>
11 #include <linux/virtio_config.h>
12 #include <linux/auxiliary_bus.h>
13 #include <linux/mlx5/cq.h>
14 #include <linux/mlx5/qp.h>
15 #include <linux/mlx5/device.h>
16 #include <linux/mlx5/driver.h>
17 #include <linux/mlx5/vport.h>
18 #include <linux/mlx5/fs.h>
19 #include <linux/mlx5/mlx5_ifc_vdpa.h>
20 #include <linux/mlx5/mpfs.h>
21 #include "mlx5_vdpa.h"
22 #include "mlx5_vnet.h"
23 
24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
25 MODULE_DESCRIPTION("Mellanox VDPA driver");
26 MODULE_LICENSE("Dual BSD/GPL");
27 
28 #define VALID_FEATURES_MASK                                                                        \
29 	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
30 	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
31 	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
32 	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
33 	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
34 	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
35 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
36 	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
37 	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
38 	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
39 	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
40 	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
41 	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
42 
43 #define VALID_STATUS_MASK                                                                          \
44 	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
45 	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
46 
47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
48 
49 #define MLX5V_UNTAGGED 0x1000
50 
51 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
52  * 5.1.6.5.5 "Device operation in multiqueue mode":
53  *
54  * Multiqueue is disabled by default.
55  * The driver enables multiqueue by sending a command using class
56  * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
57  * operation, as follows: ...
58  */
59 #define MLX5V_DEFAULT_VQ_COUNT 2
60 
61 #define MLX5V_DEFAULT_VQ_SIZE 256
62 
63 struct mlx5_vdpa_cq_buf {
64 	struct mlx5_frag_buf_ctrl fbc;
65 	struct mlx5_frag_buf frag_buf;
66 	int cqe_size;
67 	int nent;
68 };
69 
70 struct mlx5_vdpa_cq {
71 	struct mlx5_core_cq mcq;
72 	struct mlx5_vdpa_cq_buf buf;
73 	struct mlx5_db db;
74 	int cqe;
75 };
76 
77 struct mlx5_vdpa_umem {
78 	struct mlx5_frag_buf_ctrl fbc;
79 	struct mlx5_frag_buf frag_buf;
80 	int size;
81 	u32 id;
82 };
83 
84 struct mlx5_vdpa_qp {
85 	struct mlx5_core_qp mqp;
86 	struct mlx5_frag_buf frag_buf;
87 	struct mlx5_db db;
88 	u16 head;
89 	bool fw;
90 };
91 
92 struct mlx5_vq_restore_info {
93 	u32 num_ent;
94 	u64 desc_addr;
95 	u64 device_addr;
96 	u64 driver_addr;
97 	u16 avail_index;
98 	u16 used_index;
99 	struct msi_map map;
100 	bool ready;
101 	bool restore;
102 };
103 
104 struct mlx5_vdpa_virtqueue {
105 	bool ready;
106 	u64 desc_addr;
107 	u64 device_addr;
108 	u64 driver_addr;
109 	u32 num_ent;
110 
111 	/* Resources for implementing the notification channel from the device
112 	 * to the driver. fwqp is the firmware end of an RC connection; the
113 	 * other end is vqqp used by the driver. cq is where completions are
114 	 * reported.
115 	 */
116 	struct mlx5_vdpa_cq cq;
117 	struct mlx5_vdpa_qp fwqp;
118 	struct mlx5_vdpa_qp vqqp;
119 
120 	/* umem resources are required for the virtqueue operation. They're use
121 	 * is internal and they must be provided by the driver.
122 	 */
123 	struct mlx5_vdpa_umem umem1;
124 	struct mlx5_vdpa_umem umem2;
125 	struct mlx5_vdpa_umem umem3;
126 
127 	u32 counter_set_id;
128 	bool initialized;
129 	int index;
130 	u32 virtq_id;
131 	struct mlx5_vdpa_net *ndev;
132 	u16 avail_idx;
133 	u16 used_idx;
134 	int fw_state;
135 
136 	u64 modified_fields;
137 
138 	struct mlx5_vdpa_mr *vq_mr;
139 	struct mlx5_vdpa_mr *desc_mr;
140 
141 	struct msi_map map;
142 
143 	/* keep last in the struct */
144 	struct mlx5_vq_restore_info ri;
145 };
146 
is_index_valid(struct mlx5_vdpa_dev * mvdev,u16 idx)147 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
148 {
149 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
150 		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
151 			return idx < 2;
152 		else
153 			return idx < 3;
154 	}
155 
156 	return idx <= mvdev->max_idx;
157 }
158 
159 static void free_fixed_resources(struct mlx5_vdpa_net *ndev);
160 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev);
161 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled);
162 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev);
163 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq);
164 
165 static bool mlx5_vdpa_debug;
166 
167 #define MLX5_LOG_VIO_FLAG(_feature)                                                                \
168 	do {                                                                                       \
169 		if (features & BIT_ULL(_feature))                                                  \
170 			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
171 	} while (0)
172 
173 #define MLX5_LOG_VIO_STAT(_status)                                                                 \
174 	do {                                                                                       \
175 		if (status & (_status))                                                            \
176 			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
177 	} while (0)
178 
179 /* TODO: cross-endian support */
mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev * mvdev)180 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
181 {
182 	return virtio_legacy_is_little_endian() ||
183 		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
184 }
185 
mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev * mvdev,__virtio16 val)186 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
187 {
188 	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
189 }
190 
cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev * mvdev,u16 val)191 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
192 {
193 	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
194 }
195 
ctrl_vq_idx(struct mlx5_vdpa_dev * mvdev)196 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
197 {
198 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
199 		return 2;
200 
201 	return mvdev->max_vqs;
202 }
203 
is_ctrl_vq_idx(struct mlx5_vdpa_dev * mvdev,u16 idx)204 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
205 {
206 	return idx == ctrl_vq_idx(mvdev);
207 }
208 
print_status(struct mlx5_vdpa_dev * mvdev,u8 status,bool set)209 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
210 {
211 	if (status & ~VALID_STATUS_MASK)
212 		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
213 			       status & ~VALID_STATUS_MASK);
214 
215 	if (!mlx5_vdpa_debug)
216 		return;
217 
218 	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
219 	if (set && !status) {
220 		mlx5_vdpa_info(mvdev, "driver resets the device\n");
221 		return;
222 	}
223 
224 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
225 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
226 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
227 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
228 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
229 	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
230 }
231 
print_features(struct mlx5_vdpa_dev * mvdev,u64 features,bool set)232 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
233 {
234 	if (features & ~VALID_FEATURES_MASK)
235 		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
236 			       features & ~VALID_FEATURES_MASK);
237 
238 	if (!mlx5_vdpa_debug)
239 		return;
240 
241 	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
242 	if (!features)
243 		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
244 
245 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
246 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
247 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
248 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
249 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
250 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
251 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
252 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
253 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
254 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
255 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
256 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
257 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
258 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
259 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
260 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
261 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
262 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
263 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
264 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
265 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
266 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
267 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
268 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
269 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
270 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
271 	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
272 	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
273 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
274 	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
275 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
276 	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
277 	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
278 	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
279 }
280 
create_tis(struct mlx5_vdpa_net * ndev)281 static int create_tis(struct mlx5_vdpa_net *ndev)
282 {
283 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
284 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
285 	void *tisc;
286 	int err;
287 
288 	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
289 	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
290 	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
291 	if (err)
292 		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
293 
294 	return err;
295 }
296 
destroy_tis(struct mlx5_vdpa_net * ndev)297 static void destroy_tis(struct mlx5_vdpa_net *ndev)
298 {
299 	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
300 }
301 
302 #define MLX5_VDPA_CQE_SIZE 64
303 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
304 
cq_frag_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_cq_buf * buf,int nent)305 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
306 {
307 	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
308 	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
309 	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
310 	int err;
311 
312 	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
313 				       ndev->mvdev.mdev->priv.numa_node);
314 	if (err)
315 		return err;
316 
317 	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
318 
319 	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
320 	buf->nent = nent;
321 
322 	return 0;
323 }
324 
umem_frag_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_umem * umem,int size)325 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
326 {
327 	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
328 
329 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
330 					ndev->mvdev.mdev->priv.numa_node);
331 }
332 
cq_frag_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_cq_buf * buf)333 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
334 {
335 	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
336 }
337 
get_cqe(struct mlx5_vdpa_cq * vcq,int n)338 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
339 {
340 	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
341 }
342 
cq_frag_buf_init(struct mlx5_vdpa_cq * vcq,struct mlx5_vdpa_cq_buf * buf)343 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
344 {
345 	struct mlx5_cqe64 *cqe64;
346 	void *cqe;
347 	int i;
348 
349 	for (i = 0; i < buf->nent; i++) {
350 		cqe = get_cqe(vcq, i);
351 		cqe64 = cqe;
352 		cqe64->op_own = MLX5_CQE_INVALID << 4;
353 	}
354 }
355 
get_sw_cqe(struct mlx5_vdpa_cq * cq,int n)356 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
357 {
358 	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
359 
360 	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
361 	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
362 		return cqe64;
363 
364 	return NULL;
365 }
366 
rx_post(struct mlx5_vdpa_qp * vqp,int n)367 static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
368 {
369 	vqp->head += n;
370 	vqp->db.db[0] = cpu_to_be32(vqp->head);
371 }
372 
qp_prepare(struct mlx5_vdpa_net * ndev,bool fw,void * in,struct mlx5_vdpa_virtqueue * mvq,u32 num_ent)373 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
374 		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
375 {
376 	struct mlx5_vdpa_qp *vqp;
377 	__be64 *pas;
378 	void *qpc;
379 
380 	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
381 	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
382 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
383 	if (vqp->fw) {
384 		/* Firmware QP is allocated by the driver for the firmware's
385 		 * use so we can skip part of the params as they will be chosen by firmware
386 		 */
387 		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
388 		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
389 		MLX5_SET(qpc, qpc, no_sq, 1);
390 		return;
391 	}
392 
393 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
394 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
395 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
396 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
397 	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
398 	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
399 	MLX5_SET(qpc, qpc, no_sq, 1);
400 	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
401 	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
402 	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
403 	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
404 	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
405 }
406 
rq_buf_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp,u32 num_ent)407 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
408 {
409 	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
410 					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
411 					ndev->mvdev.mdev->priv.numa_node);
412 }
413 
rq_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp)414 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
415 {
416 	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
417 }
418 
qp_create(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,struct mlx5_vdpa_qp * vqp)419 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
420 		     struct mlx5_vdpa_qp *vqp)
421 {
422 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
423 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
424 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
425 	void *qpc;
426 	void *in;
427 	int err;
428 
429 	if (!vqp->fw) {
430 		vqp = &mvq->vqqp;
431 		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
432 		if (err)
433 			return err;
434 
435 		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
436 		if (err)
437 			goto err_db;
438 		inlen += vqp->frag_buf.npages * sizeof(__be64);
439 	}
440 
441 	in = kzalloc(inlen, GFP_KERNEL);
442 	if (!in) {
443 		err = -ENOMEM;
444 		goto err_kzalloc;
445 	}
446 
447 	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
448 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
449 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
450 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
451 	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
452 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
453 	if (!vqp->fw)
454 		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
455 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
456 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
457 	kfree(in);
458 	if (err)
459 		goto err_kzalloc;
460 
461 	vqp->mqp.uid = ndev->mvdev.res.uid;
462 	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
463 
464 	if (!vqp->fw)
465 		rx_post(vqp, mvq->num_ent);
466 
467 	return 0;
468 
469 err_kzalloc:
470 	if (!vqp->fw)
471 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
472 err_db:
473 	if (!vqp->fw)
474 		rq_buf_free(ndev, vqp);
475 
476 	return err;
477 }
478 
qp_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_qp * vqp)479 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
480 {
481 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
482 
483 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
484 	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
485 	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
486 	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
487 		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
488 	if (!vqp->fw) {
489 		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
490 		rq_buf_free(ndev, vqp);
491 	}
492 }
493 
next_cqe_sw(struct mlx5_vdpa_cq * cq)494 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
495 {
496 	return get_sw_cqe(cq, cq->mcq.cons_index);
497 }
498 
mlx5_vdpa_poll_one(struct mlx5_vdpa_cq * vcq)499 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
500 {
501 	struct mlx5_cqe64 *cqe64;
502 
503 	cqe64 = next_cqe_sw(vcq);
504 	if (!cqe64)
505 		return -EAGAIN;
506 
507 	vcq->mcq.cons_index++;
508 	return 0;
509 }
510 
mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue * mvq,int num)511 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
512 {
513 	struct mlx5_vdpa_net *ndev = mvq->ndev;
514 	struct vdpa_callback *event_cb;
515 
516 	event_cb = &ndev->event_cbs[mvq->index];
517 	mlx5_cq_set_ci(&mvq->cq.mcq);
518 
519 	/* make sure CQ cosumer update is visible to the hardware before updating
520 	 * RX doorbell record.
521 	 */
522 	dma_wmb();
523 	rx_post(&mvq->vqqp, num);
524 	if (event_cb->callback)
525 		event_cb->callback(event_cb->private);
526 }
527 
mlx5_vdpa_cq_comp(struct mlx5_core_cq * mcq,struct mlx5_eqe * eqe)528 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
529 {
530 	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
531 	struct mlx5_vdpa_net *ndev = mvq->ndev;
532 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
533 	int num = 0;
534 
535 	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
536 		num++;
537 		if (num > mvq->num_ent / 2) {
538 			/* If completions keep coming while we poll, we want to
539 			 * let the hardware know that we consumed them by
540 			 * updating the doorbell record.  We also let vdpa core
541 			 * know about this so it passes it on the virtio driver
542 			 * on the guest.
543 			 */
544 			mlx5_vdpa_handle_completions(mvq, num);
545 			num = 0;
546 		}
547 	}
548 
549 	if (num)
550 		mlx5_vdpa_handle_completions(mvq, num);
551 
552 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
553 }
554 
cq_create(struct mlx5_vdpa_net * ndev,u16 idx,u32 num_ent)555 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
556 {
557 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
558 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
559 	void __iomem *uar_page = ndev->mvdev.res.uar->map;
560 	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
561 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
562 	__be64 *pas;
563 	int inlen;
564 	void *cqc;
565 	void *in;
566 	int err;
567 	int eqn;
568 
569 	err = mlx5_db_alloc(mdev, &vcq->db);
570 	if (err)
571 		return err;
572 
573 	vcq->mcq.set_ci_db = vcq->db.db;
574 	vcq->mcq.arm_db = vcq->db.db + 1;
575 	vcq->mcq.cqe_sz = 64;
576 
577 	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
578 	if (err)
579 		goto err_db;
580 
581 	cq_frag_buf_init(vcq, &vcq->buf);
582 
583 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
584 		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
585 	in = kzalloc(inlen, GFP_KERNEL);
586 	if (!in) {
587 		err = -ENOMEM;
588 		goto err_vzalloc;
589 	}
590 
591 	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
592 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
593 	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
594 
595 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
596 	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
597 
598 	/* Use vector 0 by default. Consider adding code to choose least used
599 	 * vector.
600 	 */
601 	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
602 	if (err)
603 		goto err_vec;
604 
605 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
606 	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
607 	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
608 	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
609 	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
610 
611 	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
612 	if (err)
613 		goto err_vec;
614 
615 	vcq->mcq.comp = mlx5_vdpa_cq_comp;
616 	vcq->cqe = num_ent;
617 	vcq->mcq.set_ci_db = vcq->db.db;
618 	vcq->mcq.arm_db = vcq->db.db + 1;
619 	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
620 	kfree(in);
621 	return 0;
622 
623 err_vec:
624 	kfree(in);
625 err_vzalloc:
626 	cq_frag_buf_free(ndev, &vcq->buf);
627 err_db:
628 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
629 	return err;
630 }
631 
cq_destroy(struct mlx5_vdpa_net * ndev,u16 idx)632 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
633 {
634 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
635 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
636 	struct mlx5_vdpa_cq *vcq = &mvq->cq;
637 
638 	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
639 		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
640 		return;
641 	}
642 	cq_frag_buf_free(ndev, &vcq->buf);
643 	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
644 }
645 
read_umem_params(struct mlx5_vdpa_net * ndev)646 static int read_umem_params(struct mlx5_vdpa_net *ndev)
647 {
648 	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
649 	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
650 	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
651 	int out_size;
652 	void *caps;
653 	void *out;
654 	int err;
655 
656 	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
657 	out = kzalloc(out_size, GFP_KERNEL);
658 	if (!out)
659 		return -ENOMEM;
660 
661 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
662 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
663 	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
664 	if (err) {
665 		mlx5_vdpa_warn(&ndev->mvdev,
666 			"Failed reading vdpa umem capabilities with err %d\n", err);
667 		goto out;
668 	}
669 
670 	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
671 
672 	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
673 	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
674 
675 	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
676 	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
677 
678 	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
679 	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
680 
681 out:
682 	kfree(out);
683 	return 0;
684 }
685 
set_umem_size(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num,struct mlx5_vdpa_umem ** umemp)686 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
687 			  struct mlx5_vdpa_umem **umemp)
688 {
689 	u32 p_a;
690 	u32 p_b;
691 
692 	switch (num) {
693 	case 1:
694 		p_a = ndev->umem_1_buffer_param_a;
695 		p_b = ndev->umem_1_buffer_param_b;
696 		*umemp = &mvq->umem1;
697 		break;
698 	case 2:
699 		p_a = ndev->umem_2_buffer_param_a;
700 		p_b = ndev->umem_2_buffer_param_b;
701 		*umemp = &mvq->umem2;
702 		break;
703 	case 3:
704 		p_a = ndev->umem_3_buffer_param_a;
705 		p_b = ndev->umem_3_buffer_param_b;
706 		*umemp = &mvq->umem3;
707 		break;
708 	}
709 
710 	(*umemp)->size = p_a * mvq->num_ent + p_b;
711 }
712 
umem_frag_buf_free(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_umem * umem)713 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
714 {
715 	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
716 }
717 
create_umem(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num)718 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
719 {
720 	int inlen;
721 	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
722 	void *um;
723 	void *in;
724 	int err;
725 	__be64 *pas;
726 	struct mlx5_vdpa_umem *umem;
727 
728 	set_umem_size(ndev, mvq, num, &umem);
729 	err = umem_frag_buf_alloc(ndev, umem, umem->size);
730 	if (err)
731 		return err;
732 
733 	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
734 
735 	in = kzalloc(inlen, GFP_KERNEL);
736 	if (!in) {
737 		err = -ENOMEM;
738 		goto err_in;
739 	}
740 
741 	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
742 	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
743 	um = MLX5_ADDR_OF(create_umem_in, in, umem);
744 	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
745 	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
746 
747 	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
748 	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
749 
750 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
751 	if (err) {
752 		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
753 		goto err_cmd;
754 	}
755 
756 	kfree(in);
757 	umem->id = MLX5_GET(create_umem_out, out, umem_id);
758 
759 	return 0;
760 
761 err_cmd:
762 	kfree(in);
763 err_in:
764 	umem_frag_buf_free(ndev, umem);
765 	return err;
766 }
767 
umem_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int num)768 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
769 {
770 	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
771 	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
772 	struct mlx5_vdpa_umem *umem;
773 
774 	switch (num) {
775 	case 1:
776 		umem = &mvq->umem1;
777 		break;
778 	case 2:
779 		umem = &mvq->umem2;
780 		break;
781 	case 3:
782 		umem = &mvq->umem3;
783 		break;
784 	}
785 
786 	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
787 	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
788 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
789 		return;
790 
791 	umem_frag_buf_free(ndev, umem);
792 }
793 
umems_create(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)794 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
795 {
796 	int num;
797 	int err;
798 
799 	for (num = 1; num <= 3; num++) {
800 		err = create_umem(ndev, mvq, num);
801 		if (err)
802 			goto err_umem;
803 	}
804 	return 0;
805 
806 err_umem:
807 	for (num--; num > 0; num--)
808 		umem_destroy(ndev, mvq, num);
809 
810 	return err;
811 }
812 
umems_destroy(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)813 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
814 {
815 	int num;
816 
817 	for (num = 3; num > 0; num--)
818 		umem_destroy(ndev, mvq, num);
819 }
820 
get_queue_type(struct mlx5_vdpa_net * ndev)821 static int get_queue_type(struct mlx5_vdpa_net *ndev)
822 {
823 	u32 type_mask;
824 
825 	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
826 
827 	/* prefer split queue */
828 	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
829 		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
830 
831 	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
832 
833 	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
834 }
835 
vq_is_tx(u16 idx)836 static bool vq_is_tx(u16 idx)
837 {
838 	return idx % 2;
839 }
840 
841 enum {
842 	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
843 	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
844 	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
845 	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
846 	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
847 	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
848 	MLX5_VIRTIO_NET_F_CSUM = 10,
849 	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
850 	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
851 };
852 
get_features(u64 features)853 static u16 get_features(u64 features)
854 {
855 	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
856 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
857 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
858 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
859 	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
860 	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
861 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
862 	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
863 }
864 
counters_supported(const struct mlx5_vdpa_dev * mvdev)865 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
866 {
867 	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
868 	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
869 }
870 
msix_mode_supported(struct mlx5_vdpa_dev * mvdev)871 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
872 {
873 	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
874 		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
875 		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
876 }
877 
create_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool filled)878 static int create_virtqueue(struct mlx5_vdpa_net *ndev,
879 			    struct mlx5_vdpa_virtqueue *mvq,
880 			    bool filled)
881 {
882 	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
883 	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
884 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
885 	struct mlx5_vdpa_mr *vq_mr;
886 	struct mlx5_vdpa_mr *vq_desc_mr;
887 	u64 features = filled ? mvdev->actual_features : mvdev->mlx_features;
888 	void *obj_context;
889 	u16 mlx_features;
890 	void *cmd_hdr;
891 	void *vq_ctx;
892 	void *in;
893 	int err;
894 
895 	err = umems_create(ndev, mvq);
896 	if (err)
897 		return err;
898 
899 	in = kzalloc(inlen, GFP_KERNEL);
900 	if (!in) {
901 		err = -ENOMEM;
902 		goto err_alloc;
903 	}
904 
905 	mlx_features = get_features(features);
906 	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
907 
908 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
909 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
910 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
911 
912 	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
913 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
914 		 mlx_features >> 3);
915 	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
916 		 mlx_features & 7);
917 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
918 	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
919 
920 	if (vq_is_tx(mvq->index))
921 		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
922 
923 	if (mvq->map.virq) {
924 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
925 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
926 	} else {
927 		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
928 		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
929 	}
930 
931 	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
932 	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
933 	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
934 		 !!(features & BIT_ULL(VIRTIO_F_VERSION_1)));
935 
936 	if (filled) {
937 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
938 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
939 
940 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
941 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
942 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
943 
944 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
945 		if (vq_mr)
946 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
947 
948 		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
949 		if (vq_desc_mr &&
950 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
951 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
952 	} else {
953 		/* If there is no mr update, make sure that the existing ones are set
954 		 * modify to ready.
955 		 */
956 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
957 		if (vq_mr)
958 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
959 
960 		vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
961 		if (vq_desc_mr)
962 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
963 	}
964 
965 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
966 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
967 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
968 	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
969 	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
970 	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
971 	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
972 	if (counters_supported(&ndev->mvdev))
973 		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
974 
975 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
976 	if (err)
977 		goto err_cmd;
978 
979 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
980 	kfree(in);
981 	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
982 
983 	if (filled) {
984 		mlx5_vdpa_get_mr(mvdev, vq_mr);
985 		mvq->vq_mr = vq_mr;
986 
987 		if (vq_desc_mr &&
988 		    MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
989 			mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
990 			mvq->desc_mr = vq_desc_mr;
991 		}
992 	}
993 
994 	return 0;
995 
996 err_cmd:
997 	kfree(in);
998 err_alloc:
999 	umems_destroy(ndev, mvq);
1000 	return err;
1001 }
1002 
destroy_virtqueue(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1003 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1004 {
1005 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
1006 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
1007 
1008 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
1009 		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1010 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
1011 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
1012 	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
1013 		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1014 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
1015 		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
1016 		return;
1017 	}
1018 	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
1019 	umems_destroy(ndev, mvq);
1020 
1021 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
1022 	mvq->vq_mr = NULL;
1023 
1024 	mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
1025 	mvq->desc_mr = NULL;
1026 }
1027 
get_rqpn(struct mlx5_vdpa_virtqueue * mvq,bool fw)1028 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1029 {
1030 	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
1031 }
1032 
get_qpn(struct mlx5_vdpa_virtqueue * mvq,bool fw)1033 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
1034 {
1035 	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
1036 }
1037 
alloc_inout(struct mlx5_vdpa_net * ndev,int cmd,void ** in,int * inlen,void ** out,int * outlen,u32 qpn,u32 rqpn)1038 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
1039 			int *outlen, u32 qpn, u32 rqpn)
1040 {
1041 	void *qpc;
1042 	void *pp;
1043 
1044 	switch (cmd) {
1045 	case MLX5_CMD_OP_2RST_QP:
1046 		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
1047 		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
1048 		*in = kzalloc(*inlen, GFP_KERNEL);
1049 		*out = kzalloc(*outlen, GFP_KERNEL);
1050 		if (!*in || !*out)
1051 			goto outerr;
1052 
1053 		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
1054 		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
1055 		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
1056 		break;
1057 	case MLX5_CMD_OP_RST2INIT_QP:
1058 		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
1059 		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
1060 		*in = kzalloc(*inlen, GFP_KERNEL);
1061 		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
1062 		if (!*in || !*out)
1063 			goto outerr;
1064 
1065 		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
1066 		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1067 		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1068 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1069 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1070 		MLX5_SET(qpc, qpc, rwe, 1);
1071 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1072 		MLX5_SET(ads, pp, vhca_port_num, 1);
1073 		break;
1074 	case MLX5_CMD_OP_INIT2RTR_QP:
1075 		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1076 		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1077 		*in = kzalloc(*inlen, GFP_KERNEL);
1078 		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1079 		if (!*in || !*out)
1080 			goto outerr;
1081 
1082 		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1083 		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1084 		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1085 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1086 		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1087 		MLX5_SET(qpc, qpc, log_msg_max, 30);
1088 		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1089 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1090 		MLX5_SET(ads, pp, fl, 1);
1091 		break;
1092 	case MLX5_CMD_OP_RTR2RTS_QP:
1093 		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1094 		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1095 		*in = kzalloc(*inlen, GFP_KERNEL);
1096 		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1097 		if (!*in || !*out)
1098 			goto outerr;
1099 
1100 		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1101 		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1102 		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1103 		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1104 		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1105 		MLX5_SET(ads, pp, ack_timeout, 14);
1106 		MLX5_SET(qpc, qpc, retry_count, 7);
1107 		MLX5_SET(qpc, qpc, rnr_retry, 7);
1108 		break;
1109 	default:
1110 		goto outerr_nullify;
1111 	}
1112 
1113 	return;
1114 
1115 outerr:
1116 	kfree(*in);
1117 	kfree(*out);
1118 outerr_nullify:
1119 	*in = NULL;
1120 	*out = NULL;
1121 }
1122 
free_inout(void * in,void * out)1123 static void free_inout(void *in, void *out)
1124 {
1125 	kfree(in);
1126 	kfree(out);
1127 }
1128 
1129 /* Two QPs are used by each virtqueue. One is used by the driver and one by
1130  * firmware. The fw argument indicates whether the subjected QP is the one used
1131  * by firmware.
1132  */
modify_qp(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool fw,int cmd)1133 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1134 {
1135 	int outlen;
1136 	int inlen;
1137 	void *out;
1138 	void *in;
1139 	int err;
1140 
1141 	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1142 	if (!in || !out)
1143 		return -ENOMEM;
1144 
1145 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1146 	free_inout(in, out);
1147 	return err;
1148 }
1149 
connect_qps(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1150 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1151 {
1152 	int err;
1153 
1154 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1155 	if (err)
1156 		return err;
1157 
1158 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1159 	if (err)
1160 		return err;
1161 
1162 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1163 	if (err)
1164 		return err;
1165 
1166 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1167 	if (err)
1168 		return err;
1169 
1170 	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1171 	if (err)
1172 		return err;
1173 
1174 	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1175 	if (err)
1176 		return err;
1177 
1178 	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1179 }
1180 
1181 struct mlx5_virtq_attr {
1182 	u8 state;
1183 	u16 available_index;
1184 	u16 used_index;
1185 };
1186 
1187 struct mlx5_virtqueue_query_mem {
1188 	u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)];
1189 	u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)];
1190 };
1191 
1192 struct mlx5_virtqueue_modify_mem {
1193 	u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)];
1194 	u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)];
1195 };
1196 
fill_query_virtqueue_cmd(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,struct mlx5_virtqueue_query_mem * cmd)1197 static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
1198 				     struct mlx5_vdpa_virtqueue *mvq,
1199 				     struct mlx5_virtqueue_query_mem *cmd)
1200 {
1201 	void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
1202 
1203 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1204 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1205 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1206 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1207 }
1208 
query_virtqueue_end(struct mlx5_vdpa_net * ndev,struct mlx5_virtqueue_query_mem * cmd,struct mlx5_virtq_attr * attr)1209 static void query_virtqueue_end(struct mlx5_vdpa_net *ndev,
1210 				struct mlx5_virtqueue_query_mem *cmd,
1211 				struct mlx5_virtq_attr *attr)
1212 {
1213 	void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context);
1214 
1215 	memset(attr, 0, sizeof(*attr));
1216 	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1217 	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1218 	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1219 }
1220 
query_virtqueues(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs,struct mlx5_virtq_attr * attrs)1221 static int query_virtqueues(struct mlx5_vdpa_net *ndev,
1222 			    int start_vq,
1223 			    int num_vqs,
1224 			    struct mlx5_virtq_attr *attrs)
1225 {
1226 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1227 	struct mlx5_virtqueue_query_mem *cmd_mem;
1228 	struct mlx5_vdpa_async_cmd *cmds;
1229 	int err = 0;
1230 
1231 	WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n",
1232 	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
1233 
1234 	cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
1235 	cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
1236 	if (!cmds || !cmd_mem) {
1237 		err = -ENOMEM;
1238 		goto done;
1239 	}
1240 
1241 	for (int i = 0; i < num_vqs; i++) {
1242 		cmds[i].in = &cmd_mem[i].in;
1243 		cmds[i].inlen = sizeof(cmd_mem[i].in);
1244 		cmds[i].out = &cmd_mem[i].out;
1245 		cmds[i].outlen = sizeof(cmd_mem[i].out);
1246 		fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]);
1247 	}
1248 
1249 	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
1250 	if (err) {
1251 		mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n",
1252 			      start_vq, start_vq + num_vqs, err);
1253 		goto done;
1254 	}
1255 
1256 	for (int i = 0; i < num_vqs; i++) {
1257 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1258 		int vq_idx = start_vq + i;
1259 
1260 		if (cmd->err) {
1261 			mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, err);
1262 			if (!err)
1263 				err = cmd->err;
1264 			continue;
1265 		}
1266 
1267 		query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]);
1268 	}
1269 
1270 done:
1271 	kvfree(cmd_mem);
1272 	kvfree(cmds);
1273 	return err;
1274 }
1275 
is_resumable(struct mlx5_vdpa_net * ndev)1276 static bool is_resumable(struct mlx5_vdpa_net *ndev)
1277 {
1278 	return ndev->mvdev.vdev.config->resume;
1279 }
1280 
is_valid_state_change(int oldstate,int newstate,bool resumable)1281 static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
1282 {
1283 	switch (oldstate) {
1284 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1285 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1286 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1287 		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1288 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1289 		return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false;
1290 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1291 	default:
1292 		return false;
1293 	}
1294 }
1295 
modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue * mvq)1296 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
1297 {
1298 	/* Only state is always modifiable */
1299 	if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
1300 		return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
1301 		       mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1302 
1303 	return true;
1304 }
1305 
fill_modify_virtqueue_cmd(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int state,struct mlx5_virtqueue_modify_mem * cmd)1306 static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
1307 				      struct mlx5_vdpa_virtqueue *mvq,
1308 				      int state,
1309 				      struct mlx5_virtqueue_modify_mem *cmd)
1310 {
1311 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1312 	struct mlx5_vdpa_mr *desc_mr = NULL;
1313 	struct mlx5_vdpa_mr *vq_mr = NULL;
1314 	void *obj_context;
1315 	void *cmd_hdr;
1316 	void *vq_ctx;
1317 
1318 	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
1319 
1320 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1321 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1322 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1323 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1324 
1325 	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context);
1326 	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
1327 
1328 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
1329 		MLX5_SET(virtio_net_q_object, obj_context, state, state);
1330 
1331 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
1332 		MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
1333 		MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
1334 		MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
1335 	}
1336 
1337 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
1338 		MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
1339 
1340 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
1341 		MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1342 
1343 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION)
1344 		MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
1345 			!!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
1346 
1347 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) {
1348 		u16 mlx_features = get_features(ndev->mvdev.actual_features);
1349 
1350 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
1351 			 mlx_features >> 3);
1352 		MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
1353 			 mlx_features & 7);
1354 	}
1355 
1356 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1357 		vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
1358 
1359 		if (vq_mr)
1360 			MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
1361 		else
1362 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
1363 	}
1364 
1365 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1366 		desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
1367 
1368 		if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
1369 			MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
1370 		else
1371 			mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
1372 	}
1373 
1374 	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
1375 }
1376 
modify_virtqueue_end(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,int state)1377 static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev,
1378 				 struct mlx5_vdpa_virtqueue *mvq,
1379 				 int state)
1380 {
1381 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1382 
1383 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
1384 		unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP];
1385 		struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid];
1386 
1387 		mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
1388 		mlx5_vdpa_get_mr(mvdev, vq_mr);
1389 		mvq->vq_mr = vq_mr;
1390 	}
1391 
1392 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
1393 		unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP];
1394 		struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid];
1395 
1396 		mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
1397 		mlx5_vdpa_get_mr(mvdev, desc_mr);
1398 		mvq->desc_mr = desc_mr;
1399 	}
1400 
1401 	if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
1402 		mvq->fw_state = state;
1403 
1404 	mvq->modified_fields = 0;
1405 }
1406 
counter_set_alloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1407 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1408 {
1409 	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1410 	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1411 	void *cmd_hdr;
1412 	int err;
1413 
1414 	if (!counters_supported(&ndev->mvdev))
1415 		return 0;
1416 
1417 	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1418 
1419 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1420 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1421 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1422 
1423 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1424 	if (err)
1425 		return err;
1426 
1427 	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1428 
1429 	return 0;
1430 }
1431 
counter_set_dealloc(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1432 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1433 {
1434 	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1435 	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1436 
1437 	if (!counters_supported(&ndev->mvdev))
1438 		return;
1439 
1440 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1441 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1442 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1443 	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1444 	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1445 		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1446 }
1447 
mlx5_vdpa_int_handler(int irq,void * priv)1448 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1449 {
1450 	struct vdpa_callback *cb = priv;
1451 
1452 	if (cb->callback)
1453 		return cb->callback(cb->private);
1454 
1455 	return IRQ_HANDLED;
1456 }
1457 
alloc_vector(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1458 static void alloc_vector(struct mlx5_vdpa_net *ndev,
1459 			 struct mlx5_vdpa_virtqueue *mvq)
1460 {
1461 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1462 	struct mlx5_vdpa_irq_pool_entry *ent;
1463 	int err;
1464 	int i;
1465 
1466 	for (i = 0; i < irqp->num_ent; i++) {
1467 		ent = &irqp->entries[i];
1468 		if (!ent->used) {
1469 			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1470 				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1471 			ent->dev_id = &ndev->event_cbs[mvq->index];
1472 			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1473 					  ent->name, ent->dev_id);
1474 			if (err)
1475 				return;
1476 
1477 			ent->used = true;
1478 			mvq->map = ent->map;
1479 			return;
1480 		}
1481 	}
1482 }
1483 
dealloc_vector(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1484 static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1485 			   struct mlx5_vdpa_virtqueue *mvq)
1486 {
1487 	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1488 	int i;
1489 
1490 	for (i = 0; i < irqp->num_ent; i++)
1491 		if (mvq->map.virq == irqp->entries[i].map.virq) {
1492 			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1493 			irqp->entries[i].used = false;
1494 			return;
1495 		}
1496 }
1497 
setup_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,bool filled)1498 static int setup_vq(struct mlx5_vdpa_net *ndev,
1499 		    struct mlx5_vdpa_virtqueue *mvq,
1500 		    bool filled)
1501 {
1502 	u16 idx = mvq->index;
1503 	int err;
1504 
1505 	if (mvq->initialized)
1506 		return 0;
1507 
1508 	err = cq_create(ndev, idx, mvq->num_ent);
1509 	if (err)
1510 		return err;
1511 
1512 	err = qp_create(ndev, mvq, &mvq->fwqp);
1513 	if (err)
1514 		goto err_fwqp;
1515 
1516 	err = qp_create(ndev, mvq, &mvq->vqqp);
1517 	if (err)
1518 		goto err_vqqp;
1519 
1520 	err = connect_qps(ndev, mvq);
1521 	if (err)
1522 		goto err_connect;
1523 
1524 	err = counter_set_alloc(ndev, mvq);
1525 	if (err)
1526 		goto err_connect;
1527 
1528 	alloc_vector(ndev, mvq);
1529 	err = create_virtqueue(ndev, mvq, filled);
1530 	if (err)
1531 		goto err_vq;
1532 
1533 	mvq->initialized = true;
1534 
1535 	if (mvq->ready) {
1536 		err = resume_vq(ndev, mvq);
1537 		if (err)
1538 			goto err_modify;
1539 	}
1540 
1541 	return 0;
1542 
1543 err_modify:
1544 	destroy_virtqueue(ndev, mvq);
1545 err_vq:
1546 	dealloc_vector(ndev, mvq);
1547 	counter_set_dealloc(ndev, mvq);
1548 err_connect:
1549 	qp_destroy(ndev, &mvq->vqqp);
1550 err_vqqp:
1551 	qp_destroy(ndev, &mvq->fwqp);
1552 err_fwqp:
1553 	cq_destroy(ndev, idx);
1554 	return err;
1555 }
1556 
modify_virtqueues(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs,int state)1557 static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state)
1558 {
1559 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
1560 	struct mlx5_virtqueue_modify_mem *cmd_mem;
1561 	struct mlx5_vdpa_async_cmd *cmds;
1562 	int err = 0;
1563 
1564 	WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n",
1565 	     start_vq, start_vq + num_vqs, mvdev->max_vqs);
1566 
1567 	cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL);
1568 	cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL);
1569 	if (!cmds || !cmd_mem) {
1570 		err = -ENOMEM;
1571 		goto done;
1572 	}
1573 
1574 	for (int i = 0; i < num_vqs; i++) {
1575 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1576 		struct mlx5_vdpa_virtqueue *mvq;
1577 		int vq_idx = start_vq + i;
1578 
1579 		mvq = &ndev->vqs[vq_idx];
1580 
1581 		if (!modifiable_virtqueue_fields(mvq)) {
1582 			err = -EINVAL;
1583 			goto done;
1584 		}
1585 
1586 		if (mvq->fw_state != state) {
1587 			if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
1588 				err = -EINVAL;
1589 				goto done;
1590 			}
1591 
1592 			mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
1593 		}
1594 
1595 		cmd->in = &cmd_mem[i].in;
1596 		cmd->inlen = sizeof(cmd_mem[i].in);
1597 		cmd->out = &cmd_mem[i].out;
1598 		cmd->outlen = sizeof(cmd_mem[i].out);
1599 		fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]);
1600 	}
1601 
1602 	err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
1603 	if (err) {
1604 		mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n",
1605 			      start_vq, start_vq + num_vqs);
1606 		goto done;
1607 	}
1608 
1609 	for (int i = 0; i < num_vqs; i++) {
1610 		struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
1611 		struct mlx5_vdpa_virtqueue *mvq;
1612 		int vq_idx = start_vq + i;
1613 
1614 		mvq = &ndev->vqs[vq_idx];
1615 
1616 		if (cmd->err) {
1617 			mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n",
1618 				      vq_idx, mvq->fw_state, state, err);
1619 			if (!err)
1620 				err = cmd->err;
1621 			continue;
1622 		}
1623 
1624 		modify_virtqueue_end(ndev, mvq, state);
1625 	}
1626 
1627 done:
1628 	kvfree(cmd_mem);
1629 	kvfree(cmds);
1630 	return err;
1631 }
1632 
suspend_vqs(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs)1633 static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
1634 {
1635 	struct mlx5_vdpa_virtqueue *mvq;
1636 	struct mlx5_virtq_attr *attrs;
1637 	int vq_idx, i;
1638 	int err;
1639 
1640 	if (start_vq >= ndev->cur_num_vqs)
1641 		return -EINVAL;
1642 
1643 	mvq = &ndev->vqs[start_vq];
1644 	if (!mvq->initialized)
1645 		return 0;
1646 
1647 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1648 		return 0;
1649 
1650 	err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND);
1651 	if (err)
1652 		return err;
1653 
1654 	attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL);
1655 	if (!attrs)
1656 		return -ENOMEM;
1657 
1658 	err = query_virtqueues(ndev, start_vq, num_vqs, attrs);
1659 	if (err)
1660 		goto done;
1661 
1662 	for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) {
1663 		mvq = &ndev->vqs[vq_idx];
1664 		mvq->avail_idx = attrs[i].available_index;
1665 		mvq->used_idx = attrs[i].used_index;
1666 	}
1667 
1668 done:
1669 	kfree(attrs);
1670 	return err;
1671 }
1672 
suspend_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1673 static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1674 {
1675 	return suspend_vqs(ndev, mvq->index, 1);
1676 }
1677 
resume_vqs(struct mlx5_vdpa_net * ndev,int start_vq,int num_vqs)1678 static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
1679 {
1680 	struct mlx5_vdpa_virtqueue *mvq;
1681 	int err;
1682 
1683 	if (start_vq >= ndev->mvdev.max_vqs)
1684 		return -EINVAL;
1685 
1686 	mvq = &ndev->vqs[start_vq];
1687 	if (!mvq->initialized)
1688 		return 0;
1689 
1690 	if (mvq->index >= ndev->cur_num_vqs)
1691 		return 0;
1692 
1693 	switch (mvq->fw_state) {
1694 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1695 		/* Due to a FW quirk we need to modify the VQ fields first then change state.
1696 		 * This should be fixed soon. After that, a single command can be used.
1697 		 */
1698 		err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state);
1699 		if (err)
1700 			return err;
1701 		break;
1702 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1703 		if (!is_resumable(ndev)) {
1704 			mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index);
1705 			return -EINVAL;
1706 		}
1707 		break;
1708 	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1709 		return 0;
1710 	default:
1711 		mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n",
1712 			       mvq->index, mvq->fw_state);
1713 		return -EINVAL;
1714 	}
1715 
1716 	return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1717 }
1718 
resume_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1719 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1720 {
1721 	return resume_vqs(ndev, mvq->index, 1);
1722 }
1723 
teardown_vq(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)1724 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1725 {
1726 	if (!mvq->initialized)
1727 		return;
1728 
1729 	suspend_vq(ndev, mvq);
1730 	mvq->modified_fields = 0;
1731 	destroy_virtqueue(ndev, mvq);
1732 	dealloc_vector(ndev, mvq);
1733 	counter_set_dealloc(ndev, mvq);
1734 	qp_destroy(ndev, &mvq->vqqp);
1735 	qp_destroy(ndev, &mvq->fwqp);
1736 	cq_destroy(ndev, mvq->index);
1737 	mvq->initialized = false;
1738 }
1739 
create_rqt(struct mlx5_vdpa_net * ndev)1740 static int create_rqt(struct mlx5_vdpa_net *ndev)
1741 {
1742 	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1743 	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1744 	__be32 *list;
1745 	void *rqtc;
1746 	int inlen;
1747 	void *in;
1748 	int i, j;
1749 	int err;
1750 
1751 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1752 	in = kzalloc(inlen, GFP_KERNEL);
1753 	if (!in)
1754 		return -ENOMEM;
1755 
1756 	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1757 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1758 
1759 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1760 	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1761 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1762 	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1763 		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1764 
1765 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1766 	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1767 	kfree(in);
1768 	if (err)
1769 		return err;
1770 
1771 	return 0;
1772 }
1773 
1774 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1775 
modify_rqt(struct mlx5_vdpa_net * ndev,int num)1776 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1777 {
1778 	int act_sz = roundup_pow_of_two(num / 2);
1779 	__be32 *list;
1780 	void *rqtc;
1781 	int inlen;
1782 	void *in;
1783 	int i, j;
1784 	int err;
1785 
1786 	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1787 	in = kzalloc(inlen, GFP_KERNEL);
1788 	if (!in)
1789 		return -ENOMEM;
1790 
1791 	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1792 	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1793 	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1794 	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1795 
1796 	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1797 	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1798 		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1799 
1800 	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1801 	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1802 	kfree(in);
1803 	if (err)
1804 		return err;
1805 
1806 	return 0;
1807 }
1808 
destroy_rqt(struct mlx5_vdpa_net * ndev)1809 static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1810 {
1811 	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1812 }
1813 
create_tir(struct mlx5_vdpa_net * ndev)1814 static int create_tir(struct mlx5_vdpa_net *ndev)
1815 {
1816 #define HASH_IP_L4PORTS                                                                            \
1817 	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1818 	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1819 	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1820 						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1821 						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1822 						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1823 						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1824 	void *rss_key;
1825 	void *outer;
1826 	void *tirc;
1827 	void *in;
1828 	int err;
1829 
1830 	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1831 	if (!in)
1832 		return -ENOMEM;
1833 
1834 	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1835 	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1836 	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1837 
1838 	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1839 	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1840 	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1841 	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1842 
1843 	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1844 	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1845 	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1846 	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1847 
1848 	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1849 	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1850 
1851 	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1852 	kfree(in);
1853 	if (err)
1854 		return err;
1855 
1856 	mlx5_vdpa_add_tirn(ndev);
1857 	return err;
1858 }
1859 
destroy_tir(struct mlx5_vdpa_net * ndev)1860 static void destroy_tir(struct mlx5_vdpa_net *ndev)
1861 {
1862 	mlx5_vdpa_remove_tirn(ndev);
1863 	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1864 }
1865 
1866 #define MAX_STEERING_ENT 0x8000
1867 #define MAX_STEERING_GROUPS 2
1868 
1869 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1870        #define NUM_DESTS 2
1871 #else
1872        #define NUM_DESTS 1
1873 #endif
1874 
add_steering_counters(struct mlx5_vdpa_net * ndev,struct macvlan_node * node,struct mlx5_flow_act * flow_act,struct mlx5_flow_destination * dests)1875 static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1876 				 struct macvlan_node *node,
1877 				 struct mlx5_flow_act *flow_act,
1878 				 struct mlx5_flow_destination *dests)
1879 {
1880 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1881 	int err;
1882 
1883 	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1884 	if (IS_ERR(node->ucast_counter.counter))
1885 		return PTR_ERR(node->ucast_counter.counter);
1886 
1887 	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1888 	if (IS_ERR(node->mcast_counter.counter)) {
1889 		err = PTR_ERR(node->mcast_counter.counter);
1890 		goto err_mcast_counter;
1891 	}
1892 
1893 	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1894 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1895 	return 0;
1896 
1897 err_mcast_counter:
1898 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1899 	return err;
1900 #else
1901 	return 0;
1902 #endif
1903 }
1904 
remove_steering_counters(struct mlx5_vdpa_net * ndev,struct macvlan_node * node)1905 static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1906 				     struct macvlan_node *node)
1907 {
1908 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1909 	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1910 	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1911 #endif
1912 }
1913 
mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net * ndev,u8 * mac,struct macvlan_node * node)1914 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1915 					struct macvlan_node *node)
1916 {
1917 	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1918 	struct mlx5_flow_act flow_act = {};
1919 	struct mlx5_flow_spec *spec;
1920 	void *headers_c;
1921 	void *headers_v;
1922 	u8 *dmac_c;
1923 	u8 *dmac_v;
1924 	int err;
1925 	u16 vid;
1926 
1927 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1928 	if (!spec)
1929 		return -ENOMEM;
1930 
1931 	vid = key2vid(node->macvlan);
1932 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1933 	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1934 	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1935 	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1936 	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1937 	eth_broadcast_addr(dmac_c);
1938 	ether_addr_copy(dmac_v, mac);
1939 	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1940 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1941 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1942 	}
1943 	if (node->tagged) {
1944 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1945 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1946 	}
1947 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1948 	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1949 	dests[0].tir_num = ndev->res.tirn;
1950 	err = add_steering_counters(ndev, node, &flow_act, dests);
1951 	if (err)
1952 		goto out_free;
1953 
1954 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1955 	dests[1].counter = node->ucast_counter.counter;
1956 #endif
1957 	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1958 	if (IS_ERR(node->ucast_rule)) {
1959 		err = PTR_ERR(node->ucast_rule);
1960 		goto err_ucast;
1961 	}
1962 
1963 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1964 	dests[1].counter = node->mcast_counter.counter;
1965 #endif
1966 
1967 	memset(dmac_c, 0, ETH_ALEN);
1968 	memset(dmac_v, 0, ETH_ALEN);
1969 	dmac_c[0] = 1;
1970 	dmac_v[0] = 1;
1971 	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1972 	if (IS_ERR(node->mcast_rule)) {
1973 		err = PTR_ERR(node->mcast_rule);
1974 		goto err_mcast;
1975 	}
1976 	kvfree(spec);
1977 	mlx5_vdpa_add_rx_counters(ndev, node);
1978 	return 0;
1979 
1980 err_mcast:
1981 	mlx5_del_flow_rules(node->ucast_rule);
1982 err_ucast:
1983 	remove_steering_counters(ndev, node);
1984 out_free:
1985 	kvfree(spec);
1986 	return err;
1987 }
1988 
mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net * ndev,struct macvlan_node * node)1989 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1990 					 struct macvlan_node *node)
1991 {
1992 	mlx5_vdpa_remove_rx_counters(ndev, node);
1993 	mlx5_del_flow_rules(node->ucast_rule);
1994 	mlx5_del_flow_rules(node->mcast_rule);
1995 }
1996 
search_val(u8 * mac,u16 vlan,bool tagged)1997 static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1998 {
1999 	u64 val;
2000 
2001 	if (!tagged)
2002 		vlan = MLX5V_UNTAGGED;
2003 
2004 	val = (u64)vlan << 48 |
2005 	      (u64)mac[0] << 40 |
2006 	      (u64)mac[1] << 32 |
2007 	      (u64)mac[2] << 24 |
2008 	      (u64)mac[3] << 16 |
2009 	      (u64)mac[4] << 8 |
2010 	      (u64)mac[5];
2011 
2012 	return val;
2013 }
2014 
mac_vlan_lookup(struct mlx5_vdpa_net * ndev,u64 value)2015 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
2016 {
2017 	struct macvlan_node *pos;
2018 	u32 idx;
2019 
2020 	idx = hash_64(value, 8); // tbd 8
2021 	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
2022 		if (pos->macvlan == value)
2023 			return pos;
2024 	}
2025 	return NULL;
2026 }
2027 
mac_vlan_add(struct mlx5_vdpa_net * ndev,u8 * mac,u16 vid,bool tagged)2028 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
2029 {
2030 	struct macvlan_node *ptr;
2031 	u64 val;
2032 	u32 idx;
2033 	int err;
2034 
2035 	val = search_val(mac, vid, tagged);
2036 	if (mac_vlan_lookup(ndev, val))
2037 		return -EEXIST;
2038 
2039 	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
2040 	if (!ptr)
2041 		return -ENOMEM;
2042 
2043 	ptr->tagged = tagged;
2044 	ptr->macvlan = val;
2045 	ptr->ndev = ndev;
2046 	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
2047 	if (err)
2048 		goto err_add;
2049 
2050 	idx = hash_64(val, 8);
2051 	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
2052 	return 0;
2053 
2054 err_add:
2055 	kfree(ptr);
2056 	return err;
2057 }
2058 
mac_vlan_del(struct mlx5_vdpa_net * ndev,u8 * mac,u16 vlan,bool tagged)2059 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
2060 {
2061 	struct macvlan_node *ptr;
2062 
2063 	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
2064 	if (!ptr)
2065 		return;
2066 
2067 	hlist_del(&ptr->hlist);
2068 	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
2069 	remove_steering_counters(ndev, ptr);
2070 	kfree(ptr);
2071 }
2072 
clear_mac_vlan_table(struct mlx5_vdpa_net * ndev)2073 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
2074 {
2075 	struct macvlan_node *pos;
2076 	struct hlist_node *n;
2077 	int i;
2078 
2079 	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
2080 		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
2081 			hlist_del(&pos->hlist);
2082 			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
2083 			remove_steering_counters(ndev, pos);
2084 			kfree(pos);
2085 		}
2086 	}
2087 }
2088 
setup_steering(struct mlx5_vdpa_net * ndev)2089 static int setup_steering(struct mlx5_vdpa_net *ndev)
2090 {
2091 	struct mlx5_flow_table_attr ft_attr = {};
2092 	struct mlx5_flow_namespace *ns;
2093 	int err;
2094 
2095 	ft_attr.max_fte = MAX_STEERING_ENT;
2096 	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
2097 
2098 	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
2099 	if (!ns) {
2100 		mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n");
2101 		return -EOPNOTSUPP;
2102 	}
2103 
2104 	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
2105 	if (IS_ERR(ndev->rxft)) {
2106 		mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n");
2107 		return PTR_ERR(ndev->rxft);
2108 	}
2109 	mlx5_vdpa_add_rx_flow_table(ndev);
2110 
2111 	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
2112 	if (err)
2113 		goto err_add;
2114 
2115 	return 0;
2116 
2117 err_add:
2118 	mlx5_vdpa_remove_rx_flow_table(ndev);
2119 	mlx5_destroy_flow_table(ndev->rxft);
2120 	return err;
2121 }
2122 
teardown_steering(struct mlx5_vdpa_net * ndev)2123 static void teardown_steering(struct mlx5_vdpa_net *ndev)
2124 {
2125 	clear_mac_vlan_table(ndev);
2126 	mlx5_vdpa_remove_rx_flow_table(ndev);
2127 	mlx5_destroy_flow_table(ndev->rxft);
2128 }
2129 
handle_ctrl_mac(struct mlx5_vdpa_dev * mvdev,u8 cmd)2130 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2131 {
2132 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2133 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2134 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2135 	struct mlx5_core_dev *pfmdev;
2136 	size_t read;
2137 	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
2138 
2139 	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2140 	switch (cmd) {
2141 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
2142 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
2143 		if (read != ETH_ALEN)
2144 			break;
2145 
2146 		if (!memcmp(ndev->config.mac, mac, 6)) {
2147 			status = VIRTIO_NET_OK;
2148 			break;
2149 		}
2150 
2151 		if (is_zero_ether_addr(mac))
2152 			break;
2153 
2154 		if (!is_zero_ether_addr(ndev->config.mac)) {
2155 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2156 				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
2157 					       ndev->config.mac);
2158 				break;
2159 			}
2160 		}
2161 
2162 		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
2163 			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
2164 				       mac);
2165 			break;
2166 		}
2167 
2168 		/* backup the original mac address so that if failed to add the forward rules
2169 		 * we could restore it
2170 		 */
2171 		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
2172 
2173 		memcpy(ndev->config.mac, mac, ETH_ALEN);
2174 
2175 		/* Need recreate the flow table entry, so that the packet could forward back
2176 		 */
2177 		mac_vlan_del(ndev, mac_back, 0, false);
2178 
2179 		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
2180 			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
2181 
2182 			/* Although it hardly run here, we still need double check */
2183 			if (is_zero_ether_addr(mac_back)) {
2184 				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
2185 				break;
2186 			}
2187 
2188 			/* Try to restore original mac address to MFPS table, and try to restore
2189 			 * the forward rule entry.
2190 			 */
2191 			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
2192 				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
2193 					       ndev->config.mac);
2194 			}
2195 
2196 			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
2197 				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
2198 					       mac_back);
2199 			}
2200 
2201 			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
2202 
2203 			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
2204 				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
2205 
2206 			break;
2207 		}
2208 
2209 		status = VIRTIO_NET_OK;
2210 		break;
2211 
2212 	default:
2213 		break;
2214 	}
2215 
2216 	return status;
2217 }
2218 
change_num_qps(struct mlx5_vdpa_dev * mvdev,int newqps)2219 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
2220 {
2221 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2222 	int cur_vqs = ndev->cur_num_vqs;
2223 	int new_vqs = newqps * 2;
2224 	int err;
2225 	int i;
2226 
2227 	if (cur_vqs > new_vqs) {
2228 		err = modify_rqt(ndev, new_vqs);
2229 		if (err)
2230 			return err;
2231 
2232 		if (is_resumable(ndev)) {
2233 			suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs);
2234 		} else {
2235 			for (i = new_vqs; i < cur_vqs; i++)
2236 				teardown_vq(ndev, &ndev->vqs[i]);
2237 		}
2238 
2239 		ndev->cur_num_vqs = new_vqs;
2240 	} else {
2241 		ndev->cur_num_vqs = new_vqs;
2242 
2243 		for (i = cur_vqs; i < new_vqs; i++) {
2244 			err = setup_vq(ndev, &ndev->vqs[i], false);
2245 			if (err)
2246 				goto clean_added;
2247 		}
2248 
2249 		err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs);
2250 		if (err)
2251 			goto clean_added;
2252 
2253 		err = modify_rqt(ndev, new_vqs);
2254 		if (err)
2255 			goto clean_added;
2256 	}
2257 	return 0;
2258 
2259 clean_added:
2260 	for (--i; i >= cur_vqs; --i)
2261 		teardown_vq(ndev, &ndev->vqs[i]);
2262 
2263 	ndev->cur_num_vqs = cur_vqs;
2264 
2265 	return err;
2266 }
2267 
handle_ctrl_mq(struct mlx5_vdpa_dev * mvdev,u8 cmd)2268 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2269 {
2270 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2271 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2272 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2273 	struct virtio_net_ctrl_mq mq;
2274 	size_t read;
2275 	u16 newqps;
2276 
2277 	switch (cmd) {
2278 	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
2279 		/* This mq feature check aligns with pre-existing userspace
2280 		 * implementation.
2281 		 *
2282 		 * Without it, an untrusted driver could fake a multiqueue config
2283 		 * request down to a non-mq device that may cause kernel to
2284 		 * panic due to uninitialized resources for extra vqs. Even with
2285 		 * a well behaving guest driver, it is not expected to allow
2286 		 * changing the number of vqs on a non-mq device.
2287 		 */
2288 		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
2289 			break;
2290 
2291 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
2292 		if (read != sizeof(mq))
2293 			break;
2294 
2295 		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
2296 		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2297 		    newqps > ndev->rqt_size)
2298 			break;
2299 
2300 		if (ndev->cur_num_vqs == 2 * newqps) {
2301 			status = VIRTIO_NET_OK;
2302 			break;
2303 		}
2304 
2305 		if (!change_num_qps(mvdev, newqps))
2306 			status = VIRTIO_NET_OK;
2307 
2308 		break;
2309 	default:
2310 		break;
2311 	}
2312 
2313 	return status;
2314 }
2315 
handle_ctrl_vlan(struct mlx5_vdpa_dev * mvdev,u8 cmd)2316 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
2317 {
2318 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2319 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2320 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2321 	__virtio16 vlan;
2322 	size_t read;
2323 	u16 id;
2324 
2325 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
2326 		return status;
2327 
2328 	switch (cmd) {
2329 	case VIRTIO_NET_CTRL_VLAN_ADD:
2330 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2331 		if (read != sizeof(vlan))
2332 			break;
2333 
2334 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2335 		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
2336 			break;
2337 
2338 		status = VIRTIO_NET_OK;
2339 		break;
2340 	case VIRTIO_NET_CTRL_VLAN_DEL:
2341 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
2342 		if (read != sizeof(vlan))
2343 			break;
2344 
2345 		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2346 		mac_vlan_del(ndev, ndev->config.mac, id, true);
2347 		status = VIRTIO_NET_OK;
2348 		break;
2349 	default:
2350 		break;
2351 	}
2352 
2353 	return status;
2354 }
2355 
mlx5_cvq_kick_handler(struct work_struct * work)2356 static void mlx5_cvq_kick_handler(struct work_struct *work)
2357 {
2358 	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2359 	struct virtio_net_ctrl_hdr ctrl;
2360 	struct mlx5_vdpa_wq_ent *wqent;
2361 	struct mlx5_vdpa_dev *mvdev;
2362 	struct mlx5_control_vq *cvq;
2363 	struct mlx5_vdpa_net *ndev;
2364 	size_t read, write;
2365 	int err;
2366 
2367 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2368 	mvdev = wqent->mvdev;
2369 	ndev = to_mlx5_vdpa_ndev(mvdev);
2370 	cvq = &mvdev->cvq;
2371 
2372 	down_write(&ndev->reslock);
2373 
2374 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2375 		goto out;
2376 
2377 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2378 		goto out;
2379 
2380 	if (!cvq->ready)
2381 		goto out;
2382 
2383 	while (true) {
2384 		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2385 					   GFP_ATOMIC);
2386 		if (err <= 0)
2387 			break;
2388 
2389 		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2390 		if (read != sizeof(ctrl))
2391 			break;
2392 
2393 		cvq->received_desc++;
2394 		switch (ctrl.class) {
2395 		case VIRTIO_NET_CTRL_MAC:
2396 			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2397 			break;
2398 		case VIRTIO_NET_CTRL_MQ:
2399 			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2400 			break;
2401 		case VIRTIO_NET_CTRL_VLAN:
2402 			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2403 			break;
2404 		default:
2405 			break;
2406 		}
2407 
2408 		/* Make sure data is written before advancing index */
2409 		smp_wmb();
2410 
2411 		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2412 		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2413 		vringh_kiov_cleanup(&cvq->riov);
2414 		vringh_kiov_cleanup(&cvq->wiov);
2415 
2416 		if (vringh_need_notify_iotlb(&cvq->vring))
2417 			vringh_notify(&cvq->vring);
2418 
2419 		cvq->completed_desc++;
2420 		queue_work(mvdev->wq, &wqent->work);
2421 		break;
2422 	}
2423 
2424 out:
2425 	up_write(&ndev->reslock);
2426 }
2427 
mlx5_vdpa_kick_vq(struct vdpa_device * vdev,u16 idx)2428 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2429 {
2430 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2431 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2432 	struct mlx5_vdpa_virtqueue *mvq;
2433 
2434 	if (!is_index_valid(mvdev, idx))
2435 		return;
2436 
2437 	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2438 		if (!mvdev->wq || !mvdev->cvq.ready)
2439 			return;
2440 
2441 		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2442 		return;
2443 	}
2444 
2445 	mvq = &ndev->vqs[idx];
2446 	if (unlikely(!mvq->ready))
2447 		return;
2448 
2449 	iowrite16(idx, ndev->mvdev.res.kick_addr);
2450 }
2451 
mlx5_vdpa_set_vq_address(struct vdpa_device * vdev,u16 idx,u64 desc_area,u64 driver_area,u64 device_area)2452 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2453 				    u64 driver_area, u64 device_area)
2454 {
2455 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2456 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2457 	struct mlx5_vdpa_virtqueue *mvq;
2458 
2459 	if (!is_index_valid(mvdev, idx))
2460 		return -EINVAL;
2461 
2462 	if (is_ctrl_vq_idx(mvdev, idx)) {
2463 		mvdev->cvq.desc_addr = desc_area;
2464 		mvdev->cvq.device_addr = device_area;
2465 		mvdev->cvq.driver_addr = driver_area;
2466 		return 0;
2467 	}
2468 
2469 	mvq = &ndev->vqs[idx];
2470 	mvq->desc_addr = desc_area;
2471 	mvq->device_addr = device_area;
2472 	mvq->driver_addr = driver_area;
2473 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS;
2474 	return 0;
2475 }
2476 
mlx5_vdpa_set_vq_num(struct vdpa_device * vdev,u16 idx,u32 num)2477 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2478 {
2479 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2480 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2481 	struct mlx5_vdpa_virtqueue *mvq;
2482 
2483 	if (!is_index_valid(mvdev, idx))
2484 		return;
2485 
2486         if (is_ctrl_vq_idx(mvdev, idx)) {
2487                 struct mlx5_control_vq *cvq = &mvdev->cvq;
2488 
2489                 cvq->vring.vring.num = num;
2490                 return;
2491         }
2492 
2493 	mvq = &ndev->vqs[idx];
2494 	ndev->needs_teardown |= num != mvq->num_ent;
2495 	mvq->num_ent = num;
2496 }
2497 
mlx5_vdpa_set_vq_cb(struct vdpa_device * vdev,u16 idx,struct vdpa_callback * cb)2498 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2499 {
2500 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2501 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2502 
2503 	ndev->event_cbs[idx] = *cb;
2504 	if (is_ctrl_vq_idx(mvdev, idx))
2505 		mvdev->cvq.event_cb = *cb;
2506 }
2507 
mlx5_cvq_notify(struct vringh * vring)2508 static void mlx5_cvq_notify(struct vringh *vring)
2509 {
2510 	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2511 
2512 	if (!cvq->event_cb.callback)
2513 		return;
2514 
2515 	cvq->event_cb.callback(cvq->event_cb.private);
2516 }
2517 
set_cvq_ready(struct mlx5_vdpa_dev * mvdev,bool ready)2518 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2519 {
2520 	struct mlx5_control_vq *cvq = &mvdev->cvq;
2521 
2522 	cvq->ready = ready;
2523 	if (!ready)
2524 		return;
2525 
2526 	cvq->vring.notify = mlx5_cvq_notify;
2527 }
2528 
mlx5_vdpa_set_vq_ready(struct vdpa_device * vdev,u16 idx,bool ready)2529 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2530 {
2531 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2532 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2533 	struct mlx5_vdpa_virtqueue *mvq;
2534 
2535 	if (!mvdev->actual_features)
2536 		return;
2537 
2538 	if (!is_index_valid(mvdev, idx))
2539 		return;
2540 
2541 	if (is_ctrl_vq_idx(mvdev, idx)) {
2542 		set_cvq_ready(mvdev, ready);
2543 		return;
2544 	}
2545 
2546 	mvq = &ndev->vqs[idx];
2547 	if (!ready) {
2548 		suspend_vq(ndev, mvq);
2549 	} else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
2550 		if (resume_vq(ndev, mvq))
2551 			ready = false;
2552 	}
2553 
2554 	mvq->ready = ready;
2555 }
2556 
mlx5_vdpa_get_vq_ready(struct vdpa_device * vdev,u16 idx)2557 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2558 {
2559 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2560 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2561 
2562 	if (!is_index_valid(mvdev, idx))
2563 		return false;
2564 
2565 	if (is_ctrl_vq_idx(mvdev, idx))
2566 		return mvdev->cvq.ready;
2567 
2568 	return ndev->vqs[idx].ready;
2569 }
2570 
mlx5_vdpa_set_vq_state(struct vdpa_device * vdev,u16 idx,const struct vdpa_vq_state * state)2571 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2572 				  const struct vdpa_vq_state *state)
2573 {
2574 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2575 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2576 	struct mlx5_vdpa_virtqueue *mvq;
2577 
2578 	if (!is_index_valid(mvdev, idx))
2579 		return -EINVAL;
2580 
2581 	if (is_ctrl_vq_idx(mvdev, idx)) {
2582 		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2583 		return 0;
2584 	}
2585 
2586 	mvq = &ndev->vqs[idx];
2587 	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2588 		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2589 		return -EINVAL;
2590 	}
2591 
2592 	mvq->used_idx = state->split.avail_index;
2593 	mvq->avail_idx = state->split.avail_index;
2594 	mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
2595 				MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX;
2596 	return 0;
2597 }
2598 
mlx5_vdpa_get_vq_state(struct vdpa_device * vdev,u16 idx,struct vdpa_vq_state * state)2599 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2600 {
2601 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2602 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2603 	struct mlx5_vdpa_virtqueue *mvq;
2604 	struct mlx5_virtq_attr attr;
2605 	int err;
2606 
2607 	if (!is_index_valid(mvdev, idx))
2608 		return -EINVAL;
2609 
2610 	if (is_ctrl_vq_idx(mvdev, idx)) {
2611 		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2612 		return 0;
2613 	}
2614 
2615 	mvq = &ndev->vqs[idx];
2616 	/* If the virtq object was destroyed, use the value saved at
2617 	 * the last minute of suspend_vq. This caters for userspace
2618 	 * that cares about emulating the index after vq is stopped.
2619 	 */
2620 	if (!mvq->initialized) {
2621 		/* Firmware returns a wrong value for the available index.
2622 		 * Since both values should be identical, we take the value of
2623 		 * used_idx which is reported correctly.
2624 		 */
2625 		state->split.avail_index = mvq->used_idx;
2626 		return 0;
2627 	}
2628 
2629 	err = query_virtqueues(ndev, mvq->index, 1, &attr);
2630 	if (err) {
2631 		mlx5_vdpa_err(mvdev, "failed to query virtqueue\n");
2632 		return err;
2633 	}
2634 	state->split.avail_index = attr.used_index;
2635 	return 0;
2636 }
2637 
mlx5_vdpa_get_vq_align(struct vdpa_device * vdev)2638 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2639 {
2640 	return PAGE_SIZE;
2641 }
2642 
mlx5_vdpa_get_vq_group(struct vdpa_device * vdev,u16 idx)2643 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2644 {
2645 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2646 
2647 	if (is_ctrl_vq_idx(mvdev, idx))
2648 		return MLX5_VDPA_CVQ_GROUP;
2649 
2650 	return MLX5_VDPA_DATAVQ_GROUP;
2651 }
2652 
mlx5_vdpa_get_vq_desc_group(struct vdpa_device * vdev,u16 idx)2653 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx)
2654 {
2655 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2656 
2657 	if (is_ctrl_vq_idx(mvdev, idx))
2658 		return MLX5_VDPA_CVQ_GROUP;
2659 
2660 	return MLX5_VDPA_DATAVQ_DESC_GROUP;
2661 }
2662 
mlx_to_vritio_features(u16 dev_features)2663 static u64 mlx_to_vritio_features(u16 dev_features)
2664 {
2665 	u64 result = 0;
2666 
2667 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2668 		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2669 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2670 		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2671 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2672 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2673 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2674 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2675 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2676 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2677 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2678 		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2679 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2680 		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2681 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2682 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2683 	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2684 		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2685 
2686 	return result;
2687 }
2688 
get_supported_features(struct mlx5_core_dev * mdev)2689 static u64 get_supported_features(struct mlx5_core_dev *mdev)
2690 {
2691 	u64 mlx_vdpa_features = 0;
2692 	u16 dev_features;
2693 
2694 	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2695 	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2696 	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2697 		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2698 	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2699 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2700 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2701 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2702 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2703 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2704 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2705 	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2706 
2707 	return mlx_vdpa_features;
2708 }
2709 
mlx5_vdpa_get_device_features(struct vdpa_device * vdev)2710 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2711 {
2712 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2713 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2714 
2715 	print_features(mvdev, ndev->mvdev.mlx_features, false);
2716 	return ndev->mvdev.mlx_features;
2717 }
2718 
verify_driver_features(struct mlx5_vdpa_dev * mvdev,u64 features)2719 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2720 {
2721 	/* Minimum features to expect */
2722 	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2723 		return -EOPNOTSUPP;
2724 
2725 	/* Double check features combination sent down by the driver.
2726 	 * Fail invalid features due to absence of the depended feature.
2727 	 *
2728 	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2729 	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2730 	 * By failing the invalid features sent down by untrusted drivers,
2731 	 * we're assured the assumption made upon is_index_valid() and
2732 	 * is_ctrl_vq_idx() will not be compromised.
2733 	 */
2734 	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2735             BIT_ULL(VIRTIO_NET_F_MQ))
2736 		return -EINVAL;
2737 
2738 	return 0;
2739 }
2740 
setup_virtqueues(struct mlx5_vdpa_dev * mvdev,bool filled)2741 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled)
2742 {
2743 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2744 	int err;
2745 	int i;
2746 
2747 	for (i = 0; i < mvdev->max_vqs; i++) {
2748 		err = setup_vq(ndev, &ndev->vqs[i], filled);
2749 		if (err)
2750 			goto err_vq;
2751 	}
2752 
2753 	return 0;
2754 
2755 err_vq:
2756 	for (--i; i >= 0; i--)
2757 		teardown_vq(ndev, &ndev->vqs[i]);
2758 
2759 	return err;
2760 }
2761 
teardown_virtqueues(struct mlx5_vdpa_net * ndev)2762 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2763 {
2764 	int i;
2765 
2766 	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--)
2767 		teardown_vq(ndev, &ndev->vqs[i]);
2768 }
2769 
update_cvq_info(struct mlx5_vdpa_dev * mvdev)2770 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2771 {
2772 	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2773 		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2774 			/* MQ supported. CVQ index is right above the last data virtqueue's */
2775 			mvdev->max_idx = mvdev->max_vqs;
2776 		} else {
2777 			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2778 			 * CVQ gets index 2
2779 			 */
2780 			mvdev->max_idx = 2;
2781 		}
2782 	} else {
2783 		/* Two data virtqueues only: one for rx and one for tx */
2784 		mvdev->max_idx = 1;
2785 	}
2786 }
2787 
query_vport_state(struct mlx5_core_dev * mdev,u8 opmod,u16 vport)2788 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2789 {
2790 	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2791 	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2792 	int err;
2793 
2794 	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2795 	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2796 	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2797 	if (vport)
2798 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2799 
2800 	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2801 	if (err)
2802 		return 0;
2803 
2804 	return MLX5_GET(query_vport_state_out, out, state);
2805 }
2806 
get_link_state(struct mlx5_vdpa_dev * mvdev)2807 static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2808 {
2809 	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2810 	    VPORT_STATE_UP)
2811 		return true;
2812 
2813 	return false;
2814 }
2815 
update_carrier(struct work_struct * work)2816 static void update_carrier(struct work_struct *work)
2817 {
2818 	struct mlx5_vdpa_wq_ent *wqent;
2819 	struct mlx5_vdpa_dev *mvdev;
2820 	struct mlx5_vdpa_net *ndev;
2821 
2822 	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2823 	mvdev = wqent->mvdev;
2824 	ndev = to_mlx5_vdpa_ndev(mvdev);
2825 	if (get_link_state(mvdev))
2826 		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2827 	else
2828 		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2829 
2830 	if (ndev->config_cb.callback)
2831 		ndev->config_cb.callback(ndev->config_cb.private);
2832 
2833 	kfree(wqent);
2834 }
2835 
queue_link_work(struct mlx5_vdpa_net * ndev)2836 static int queue_link_work(struct mlx5_vdpa_net *ndev)
2837 {
2838 	struct mlx5_vdpa_wq_ent *wqent;
2839 
2840 	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2841 	if (!wqent)
2842 		return -ENOMEM;
2843 
2844 	wqent->mvdev = &ndev->mvdev;
2845 	INIT_WORK(&wqent->work, update_carrier);
2846 	queue_work(ndev->mvdev.wq, &wqent->work);
2847 	return 0;
2848 }
2849 
event_handler(struct notifier_block * nb,unsigned long event,void * param)2850 static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2851 {
2852 	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2853 	struct mlx5_eqe *eqe = param;
2854 	int ret = NOTIFY_DONE;
2855 
2856 	if (ndev->mvdev.suspended)
2857 		return NOTIFY_DONE;
2858 
2859 	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2860 		switch (eqe->sub_type) {
2861 		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2862 		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2863 			if (queue_link_work(ndev))
2864 				return NOTIFY_DONE;
2865 
2866 			ret = NOTIFY_OK;
2867 			break;
2868 		default:
2869 			return NOTIFY_DONE;
2870 		}
2871 		return ret;
2872 	}
2873 	return ret;
2874 }
2875 
register_link_notifier(struct mlx5_vdpa_net * ndev)2876 static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2877 {
2878 	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2879 		return;
2880 
2881 	ndev->nb.notifier_call = event_handler;
2882 	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2883 	ndev->nb_registered = true;
2884 	queue_link_work(ndev);
2885 }
2886 
unregister_link_notifier(struct mlx5_vdpa_net * ndev)2887 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2888 {
2889 	if (!ndev->nb_registered)
2890 		return;
2891 
2892 	ndev->nb_registered = false;
2893 	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2894 	if (ndev->mvdev.wq)
2895 		flush_workqueue(ndev->mvdev.wq);
2896 }
2897 
mlx5_vdpa_get_backend_features(const struct vdpa_device * vdpa)2898 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa)
2899 {
2900 	return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK);
2901 }
2902 
mlx5_vdpa_set_driver_features(struct vdpa_device * vdev,u64 features)2903 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2904 {
2905 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2906 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2907 	u64 old_features = mvdev->actual_features;
2908 	u64 diff_features;
2909 	int err;
2910 
2911 	print_features(mvdev, features, true);
2912 
2913 	err = verify_driver_features(mvdev, features);
2914 	if (err)
2915 		return err;
2916 
2917 	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2918 
2919 	/* Interested in changes of vq features only. */
2920 	if (get_features(old_features) != get_features(mvdev->actual_features)) {
2921 		for (int i = 0; i < mvdev->max_vqs; ++i) {
2922 			struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i];
2923 
2924 			mvq->modified_fields |= (
2925 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION |
2926 				MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES
2927 			);
2928 		}
2929 	}
2930 
2931 	/* When below features diverge from initial device features, VQs need a full teardown. */
2932 #define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \
2933 			     BIT_ULL(VIRTIO_NET_F_CSUM) | \
2934 			     BIT_ULL(VIRTIO_F_VERSION_1))
2935 
2936 	diff_features = mvdev->mlx_features ^ mvdev->actual_features;
2937 	ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK);
2938 
2939 	update_cvq_info(mvdev);
2940 	return err;
2941 }
2942 
mlx5_vdpa_set_config_cb(struct vdpa_device * vdev,struct vdpa_callback * cb)2943 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2944 {
2945 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2946 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2947 
2948 	ndev->config_cb = *cb;
2949 }
2950 
2951 #define MLX5_VDPA_MAX_VQ_ENTRIES 256
mlx5_vdpa_get_vq_num_max(struct vdpa_device * vdev)2952 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2953 {
2954 	return MLX5_VDPA_MAX_VQ_ENTRIES;
2955 }
2956 
mlx5_vdpa_get_device_id(struct vdpa_device * vdev)2957 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2958 {
2959 	return VIRTIO_ID_NET;
2960 }
2961 
mlx5_vdpa_get_vendor_id(struct vdpa_device * vdev)2962 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2963 {
2964 	return PCI_VENDOR_ID_MELLANOX;
2965 }
2966 
mlx5_vdpa_get_status(struct vdpa_device * vdev)2967 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2968 {
2969 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2970 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2971 
2972 	print_status(mvdev, ndev->mvdev.status, false);
2973 	return ndev->mvdev.status;
2974 }
2975 
save_channel_info(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq)2976 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2977 {
2978 	struct mlx5_vq_restore_info *ri = &mvq->ri;
2979 	struct mlx5_virtq_attr attr = {};
2980 	int err;
2981 
2982 	if (mvq->initialized) {
2983 		err = query_virtqueues(ndev, mvq->index, 1, &attr);
2984 		if (err)
2985 			return err;
2986 	}
2987 
2988 	ri->avail_index = attr.available_index;
2989 	ri->used_index = attr.used_index;
2990 	ri->ready = mvq->ready;
2991 	ri->num_ent = mvq->num_ent;
2992 	ri->desc_addr = mvq->desc_addr;
2993 	ri->device_addr = mvq->device_addr;
2994 	ri->driver_addr = mvq->driver_addr;
2995 	ri->map = mvq->map;
2996 	ri->restore = true;
2997 	return 0;
2998 }
2999 
save_channels_info(struct mlx5_vdpa_net * ndev)3000 static int save_channels_info(struct mlx5_vdpa_net *ndev)
3001 {
3002 	int i;
3003 
3004 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3005 		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
3006 		save_channel_info(ndev, &ndev->vqs[i]);
3007 	}
3008 	return 0;
3009 }
3010 
mlx5_clear_vqs(struct mlx5_vdpa_net * ndev)3011 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
3012 {
3013 	int i;
3014 
3015 	for (i = 0; i < ndev->mvdev.max_vqs; i++)
3016 		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3017 }
3018 
restore_channels_info(struct mlx5_vdpa_net * ndev)3019 static void restore_channels_info(struct mlx5_vdpa_net *ndev)
3020 {
3021 	struct mlx5_vdpa_virtqueue *mvq;
3022 	struct mlx5_vq_restore_info *ri;
3023 	int i;
3024 
3025 	mlx5_clear_vqs(ndev);
3026 	mvqs_set_defaults(ndev);
3027 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3028 		mvq = &ndev->vqs[i];
3029 		ri = &mvq->ri;
3030 		if (!ri->restore)
3031 			continue;
3032 
3033 		mvq->avail_idx = ri->avail_index;
3034 		mvq->used_idx = ri->used_index;
3035 		mvq->ready = ri->ready;
3036 		mvq->num_ent = ri->num_ent;
3037 		mvq->desc_addr = ri->desc_addr;
3038 		mvq->device_addr = ri->device_addr;
3039 		mvq->driver_addr = ri->driver_addr;
3040 		mvq->map = ri->map;
3041 	}
3042 }
3043 
mlx5_vdpa_change_map(struct mlx5_vdpa_dev * mvdev,struct mlx5_vdpa_mr * new_mr,unsigned int asid)3044 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
3045 				struct mlx5_vdpa_mr *new_mr,
3046 				unsigned int asid)
3047 {
3048 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3049 	bool teardown = !is_resumable(ndev);
3050 	int err;
3051 
3052 	suspend_vqs(ndev, 0, ndev->cur_num_vqs);
3053 	if (teardown) {
3054 		err = save_channels_info(ndev);
3055 		if (err)
3056 			return err;
3057 
3058 		teardown_vq_resources(ndev);
3059 	}
3060 
3061 	mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3062 
3063 	for (int i = 0; i < mvdev->max_vqs; i++)
3064 		ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY |
3065 						MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
3066 
3067 	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
3068 		return 0;
3069 
3070 	if (teardown) {
3071 		restore_channels_info(ndev);
3072 		err = setup_vq_resources(ndev, true);
3073 		if (err)
3074 			return err;
3075 	}
3076 
3077 	resume_vqs(ndev, 0, ndev->cur_num_vqs);
3078 
3079 	return 0;
3080 }
3081 
3082 /* reslock must be held for this function */
setup_vq_resources(struct mlx5_vdpa_net * ndev,bool filled)3083 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled)
3084 {
3085 	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
3086 	int err;
3087 
3088 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3089 
3090 	if (ndev->setup) {
3091 		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
3092 		err = 0;
3093 		goto out;
3094 	}
3095 	mlx5_vdpa_add_debugfs(ndev);
3096 
3097 	err = read_umem_params(ndev);
3098 	if (err)
3099 		goto err_setup;
3100 
3101 	err = setup_virtqueues(mvdev, filled);
3102 	if (err) {
3103 		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
3104 		goto err_setup;
3105 	}
3106 
3107 	err = create_rqt(ndev);
3108 	if (err) {
3109 		mlx5_vdpa_warn(mvdev, "create_rqt\n");
3110 		goto err_rqt;
3111 	}
3112 
3113 	err = create_tir(ndev);
3114 	if (err) {
3115 		mlx5_vdpa_warn(mvdev, "create_tir\n");
3116 		goto err_tir;
3117 	}
3118 
3119 	err = setup_steering(ndev);
3120 	if (err) {
3121 		mlx5_vdpa_warn(mvdev, "setup_steering\n");
3122 		goto err_fwd;
3123 	}
3124 	ndev->setup = true;
3125 
3126 	return 0;
3127 
3128 err_fwd:
3129 	destroy_tir(ndev);
3130 err_tir:
3131 	destroy_rqt(ndev);
3132 err_rqt:
3133 	teardown_virtqueues(ndev);
3134 err_setup:
3135 	mlx5_vdpa_remove_debugfs(ndev);
3136 out:
3137 	return err;
3138 }
3139 
3140 /* reslock must be held for this function */
teardown_vq_resources(struct mlx5_vdpa_net * ndev)3141 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev)
3142 {
3143 
3144 	WARN_ON(!rwsem_is_locked(&ndev->reslock));
3145 
3146 	if (!ndev->setup)
3147 		return;
3148 
3149 	mlx5_vdpa_remove_debugfs(ndev);
3150 	teardown_steering(ndev);
3151 	destroy_tir(ndev);
3152 	destroy_rqt(ndev);
3153 	teardown_virtqueues(ndev);
3154 	ndev->setup = false;
3155 	ndev->needs_teardown = false;
3156 }
3157 
setup_cvq_vring(struct mlx5_vdpa_dev * mvdev)3158 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
3159 {
3160 	struct mlx5_control_vq *cvq = &mvdev->cvq;
3161 	int err = 0;
3162 
3163 	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
3164 		u16 idx = cvq->vring.last_avail_idx;
3165 
3166 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
3167 					cvq->vring.vring.num, false,
3168 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
3169 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
3170 					(struct vring_used *)(uintptr_t)cvq->device_addr);
3171 
3172 		if (!err)
3173 			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
3174 	}
3175 	return err;
3176 }
3177 
mlx5_vdpa_set_status(struct vdpa_device * vdev,u8 status)3178 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
3179 {
3180 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3181 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3182 	int err;
3183 
3184 	print_status(mvdev, status, true);
3185 
3186 	down_write(&ndev->reslock);
3187 
3188 	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
3189 		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
3190 			err = setup_cvq_vring(mvdev);
3191 			if (err) {
3192 				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
3193 				goto err_setup;
3194 			}
3195 			register_link_notifier(ndev);
3196 
3197 			if (ndev->needs_teardown)
3198 				teardown_vq_resources(ndev);
3199 
3200 			if (ndev->setup) {
3201 				err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
3202 				if (err) {
3203 					mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
3204 					goto err_driver;
3205 				}
3206 			} else {
3207 				err = setup_vq_resources(ndev, true);
3208 				if (err) {
3209 					mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
3210 					goto err_driver;
3211 				}
3212 			}
3213 		} else {
3214 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
3215 			goto err_clear;
3216 		}
3217 	}
3218 
3219 	ndev->mvdev.status = status;
3220 	up_write(&ndev->reslock);
3221 	return;
3222 
3223 err_driver:
3224 	unregister_link_notifier(ndev);
3225 err_setup:
3226 	mlx5_vdpa_clean_mrs(&ndev->mvdev);
3227 	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
3228 err_clear:
3229 	up_write(&ndev->reslock);
3230 }
3231 
init_group_to_asid_map(struct mlx5_vdpa_dev * mvdev)3232 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
3233 {
3234 	int i;
3235 
3236 	/* default mapping all groups are mapped to asid 0 */
3237 	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
3238 		mvdev->mres.group2asid[i] = 0;
3239 }
3240 
needs_vqs_reset(const struct mlx5_vdpa_dev * mvdev)3241 static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev)
3242 {
3243 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3244 	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0];
3245 
3246 	if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)
3247 		return true;
3248 
3249 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT)
3250 		return true;
3251 
3252 	return mvq->modified_fields & (
3253 		MLX5_VIRTQ_MODIFY_MASK_STATE |
3254 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS |
3255 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX |
3256 		MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX
3257 	);
3258 }
3259 
mlx5_vdpa_compat_reset(struct vdpa_device * vdev,u32 flags)3260 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
3261 {
3262 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3263 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3264 	bool vq_reset;
3265 
3266 	print_status(mvdev, 0, true);
3267 	mlx5_vdpa_info(mvdev, "performing device reset\n");
3268 
3269 	down_write(&ndev->reslock);
3270 	unregister_link_notifier(ndev);
3271 	vq_reset = needs_vqs_reset(mvdev);
3272 	if (vq_reset) {
3273 		teardown_vq_resources(ndev);
3274 		mvqs_set_defaults(ndev);
3275 	}
3276 
3277 	if (flags & VDPA_RESET_F_CLEAN_MAP)
3278 		mlx5_vdpa_clean_mrs(&ndev->mvdev);
3279 	ndev->mvdev.status = 0;
3280 	ndev->mvdev.suspended = false;
3281 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3282 	ndev->mvdev.cvq.ready = false;
3283 	ndev->mvdev.cvq.received_desc = 0;
3284 	ndev->mvdev.cvq.completed_desc = 0;
3285 	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
3286 	ndev->mvdev.actual_features = 0;
3287 	init_group_to_asid_map(mvdev);
3288 	++mvdev->generation;
3289 
3290 	if ((flags & VDPA_RESET_F_CLEAN_MAP) &&
3291 	    MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3292 		if (mlx5_vdpa_create_dma_mr(mvdev))
3293 			mlx5_vdpa_err(mvdev, "create MR failed\n");
3294 	}
3295 	if (vq_reset)
3296 		setup_vq_resources(ndev, false);
3297 	up_write(&ndev->reslock);
3298 
3299 	return 0;
3300 }
3301 
mlx5_vdpa_reset(struct vdpa_device * vdev)3302 static int mlx5_vdpa_reset(struct vdpa_device *vdev)
3303 {
3304 	return mlx5_vdpa_compat_reset(vdev, 0);
3305 }
3306 
mlx5_vdpa_get_config_size(struct vdpa_device * vdev)3307 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
3308 {
3309 	return sizeof(struct virtio_net_config);
3310 }
3311 
mlx5_vdpa_get_config(struct vdpa_device * vdev,unsigned int offset,void * buf,unsigned int len)3312 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
3313 				 unsigned int len)
3314 {
3315 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3316 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3317 
3318 	if (offset + len <= sizeof(struct virtio_net_config))
3319 		memcpy(buf, (u8 *)&ndev->config + offset, len);
3320 }
3321 
mlx5_vdpa_set_config(struct vdpa_device * vdev,unsigned int offset,const void * buf,unsigned int len)3322 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
3323 				 unsigned int len)
3324 {
3325 	/* not supported */
3326 }
3327 
mlx5_vdpa_get_generation(struct vdpa_device * vdev)3328 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
3329 {
3330 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3331 
3332 	return mvdev->generation;
3333 }
3334 
set_map_data(struct mlx5_vdpa_dev * mvdev,struct vhost_iotlb * iotlb,unsigned int asid)3335 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
3336 			unsigned int asid)
3337 {
3338 	struct mlx5_vdpa_mr *new_mr;
3339 	int err;
3340 
3341 	if (asid >= MLX5_VDPA_NUM_AS)
3342 		return -EINVAL;
3343 
3344 	if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) {
3345 		new_mr = mlx5_vdpa_create_mr(mvdev, iotlb);
3346 		if (IS_ERR(new_mr)) {
3347 			err = PTR_ERR(new_mr);
3348 			mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err);
3349 			return err;
3350 		}
3351 	} else {
3352 		/* Empty iotlbs don't have an mr but will clear the previous mr. */
3353 		new_mr = NULL;
3354 	}
3355 
3356 	if (!mvdev->mres.mr[asid]) {
3357 		mlx5_vdpa_update_mr(mvdev, new_mr, asid);
3358 	} else {
3359 		err = mlx5_vdpa_change_map(mvdev, new_mr, asid);
3360 		if (err) {
3361 			mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err);
3362 			goto out_err;
3363 		}
3364 	}
3365 
3366 	return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid);
3367 
3368 out_err:
3369 	mlx5_vdpa_put_mr(mvdev, new_mr);
3370 	return err;
3371 }
3372 
mlx5_vdpa_set_map(struct vdpa_device * vdev,unsigned int asid,struct vhost_iotlb * iotlb)3373 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
3374 			     struct vhost_iotlb *iotlb)
3375 {
3376 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3377 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3378 	int err = -EINVAL;
3379 
3380 	down_write(&ndev->reslock);
3381 	err = set_map_data(mvdev, iotlb, asid);
3382 	up_write(&ndev->reslock);
3383 	return err;
3384 }
3385 
mlx5_vdpa_reset_map(struct vdpa_device * vdev,unsigned int asid)3386 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
3387 {
3388 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3389 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3390 	int err;
3391 
3392 	down_write(&ndev->reslock);
3393 	err = mlx5_vdpa_reset_mr(mvdev, asid);
3394 	up_write(&ndev->reslock);
3395 	return err;
3396 }
3397 
mlx5_get_vq_dma_dev(struct vdpa_device * vdev,u16 idx)3398 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
3399 {
3400 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3401 
3402 	if (is_ctrl_vq_idx(mvdev, idx))
3403 		return &vdev->dev;
3404 
3405 	return mvdev->vdev.dma_dev;
3406 }
3407 
free_irqs(struct mlx5_vdpa_net * ndev)3408 static void free_irqs(struct mlx5_vdpa_net *ndev)
3409 {
3410 	struct mlx5_vdpa_irq_pool_entry *ent;
3411 	int i;
3412 
3413 	if (!msix_mode_supported(&ndev->mvdev))
3414 		return;
3415 
3416 	if (!ndev->irqp.entries)
3417 		return;
3418 
3419 	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
3420 		ent = ndev->irqp.entries + i;
3421 		if (ent->map.virq)
3422 			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
3423 	}
3424 	kfree(ndev->irqp.entries);
3425 }
3426 
mlx5_vdpa_free(struct vdpa_device * vdev)3427 static void mlx5_vdpa_free(struct vdpa_device *vdev)
3428 {
3429 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3430 	struct mlx5_core_dev *pfmdev;
3431 	struct mlx5_vdpa_net *ndev;
3432 
3433 	ndev = to_mlx5_vdpa_ndev(mvdev);
3434 
3435 	/* Functions called here should be able to work with
3436 	 * uninitialized resources.
3437 	 */
3438 	free_fixed_resources(ndev);
3439 	mlx5_vdpa_clean_mrs(mvdev);
3440 	mlx5_vdpa_destroy_mr_resources(&ndev->mvdev);
3441 	if (!is_zero_ether_addr(ndev->config.mac)) {
3442 		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3443 		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3444 	}
3445 	mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx);
3446 	mlx5_vdpa_free_resources(&ndev->mvdev);
3447 	free_irqs(ndev);
3448 	kfree(ndev->event_cbs);
3449 	kfree(ndev->vqs);
3450 }
3451 
mlx5_get_vq_notification(struct vdpa_device * vdev,u16 idx)3452 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3453 {
3454 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3455 	struct vdpa_notification_area ret = {};
3456 	struct mlx5_vdpa_net *ndev;
3457 	phys_addr_t addr;
3458 
3459 	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3460 		return ret;
3461 
3462 	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3463 	 * notification to avoid the risk of mapping pages that contain BAR of more
3464 	 * than one SF
3465 	 */
3466 	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3467 		return ret;
3468 
3469 	ndev = to_mlx5_vdpa_ndev(mvdev);
3470 	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3471 	ret.addr = addr;
3472 	ret.size = PAGE_SIZE;
3473 	return ret;
3474 }
3475 
mlx5_get_vq_irq(struct vdpa_device * vdev,u16 idx)3476 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3477 {
3478 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3479 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3480 	struct mlx5_vdpa_virtqueue *mvq;
3481 
3482 	if (!is_index_valid(mvdev, idx))
3483 		return -EINVAL;
3484 
3485 	if (is_ctrl_vq_idx(mvdev, idx))
3486 		return -EOPNOTSUPP;
3487 
3488 	mvq = &ndev->vqs[idx];
3489 	if (!mvq->map.virq)
3490 		return -EOPNOTSUPP;
3491 
3492 	return mvq->map.virq;
3493 }
3494 
mlx5_vdpa_get_driver_features(struct vdpa_device * vdev)3495 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3496 {
3497 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3498 
3499 	return mvdev->actual_features;
3500 }
3501 
counter_set_query(struct mlx5_vdpa_net * ndev,struct mlx5_vdpa_virtqueue * mvq,u64 * received_desc,u64 * completed_desc)3502 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3503 			     u64 *received_desc, u64 *completed_desc)
3504 {
3505 	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3506 	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3507 	void *cmd_hdr;
3508 	void *ctx;
3509 	int err;
3510 
3511 	if (!counters_supported(&ndev->mvdev))
3512 		return -EOPNOTSUPP;
3513 
3514 	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3515 		return -EAGAIN;
3516 
3517 	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3518 
3519 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3520 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3521 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3522 	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3523 
3524 	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3525 	if (err)
3526 		return err;
3527 
3528 	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3529 	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3530 	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3531 	return 0;
3532 }
3533 
mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device * vdev,u16 idx,struct sk_buff * msg,struct netlink_ext_ack * extack)3534 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3535 					 struct sk_buff *msg,
3536 					 struct netlink_ext_ack *extack)
3537 {
3538 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3539 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3540 	struct mlx5_vdpa_virtqueue *mvq;
3541 	struct mlx5_control_vq *cvq;
3542 	u64 received_desc;
3543 	u64 completed_desc;
3544 	int err = 0;
3545 
3546 	down_read(&ndev->reslock);
3547 	if (!is_index_valid(mvdev, idx)) {
3548 		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3549 		err = -EINVAL;
3550 		goto out_err;
3551 	}
3552 
3553 	if (idx == ctrl_vq_idx(mvdev)) {
3554 		cvq = &mvdev->cvq;
3555 		received_desc = cvq->received_desc;
3556 		completed_desc = cvq->completed_desc;
3557 		goto out;
3558 	}
3559 
3560 	mvq = &ndev->vqs[idx];
3561 	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3562 	if (err) {
3563 		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3564 		goto out_err;
3565 	}
3566 
3567 out:
3568 	err = -EMSGSIZE;
3569 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3570 		goto out_err;
3571 
3572 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3573 			      VDPA_ATTR_PAD))
3574 		goto out_err;
3575 
3576 	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3577 		goto out_err;
3578 
3579 	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3580 			      VDPA_ATTR_PAD))
3581 		goto out_err;
3582 
3583 	err = 0;
3584 out_err:
3585 	up_read(&ndev->reslock);
3586 	return err;
3587 }
3588 
mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev * mvdev)3589 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3590 {
3591 	struct mlx5_control_vq *cvq;
3592 
3593 	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3594 		return;
3595 
3596 	cvq = &mvdev->cvq;
3597 	cvq->ready = false;
3598 }
3599 
mlx5_vdpa_suspend(struct vdpa_device * vdev)3600 static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3601 {
3602 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3603 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3604 	int err;
3605 
3606 	mlx5_vdpa_info(mvdev, "suspending device\n");
3607 
3608 	down_write(&ndev->reslock);
3609 	err = suspend_vqs(ndev, 0, ndev->cur_num_vqs);
3610 	mlx5_vdpa_cvq_suspend(mvdev);
3611 	mvdev->suspended = true;
3612 	up_write(&ndev->reslock);
3613 
3614 	return err;
3615 }
3616 
mlx5_vdpa_resume(struct vdpa_device * vdev)3617 static int mlx5_vdpa_resume(struct vdpa_device *vdev)
3618 {
3619 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3620 	struct mlx5_vdpa_net *ndev;
3621 	int err;
3622 
3623 	ndev = to_mlx5_vdpa_ndev(mvdev);
3624 
3625 	mlx5_vdpa_info(mvdev, "resuming device\n");
3626 
3627 	down_write(&ndev->reslock);
3628 	mvdev->suspended = false;
3629 	err = resume_vqs(ndev, 0, ndev->cur_num_vqs);
3630 	queue_link_work(ndev);
3631 	up_write(&ndev->reslock);
3632 
3633 	return err;
3634 }
3635 
mlx5_set_group_asid(struct vdpa_device * vdev,u32 group,unsigned int asid)3636 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3637 			       unsigned int asid)
3638 {
3639 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3640 	int err = 0;
3641 
3642 	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3643 		return -EINVAL;
3644 
3645 	mvdev->mres.group2asid[group] = asid;
3646 
3647 	mutex_lock(&mvdev->mres.lock);
3648 	if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid])
3649 		err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid);
3650 	mutex_unlock(&mvdev->mres.lock);
3651 
3652 	return err;
3653 }
3654 
3655 static const struct vdpa_config_ops mlx5_vdpa_ops = {
3656 	.set_vq_address = mlx5_vdpa_set_vq_address,
3657 	.set_vq_num = mlx5_vdpa_set_vq_num,
3658 	.kick_vq = mlx5_vdpa_kick_vq,
3659 	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3660 	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3661 	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3662 	.set_vq_state = mlx5_vdpa_set_vq_state,
3663 	.get_vq_state = mlx5_vdpa_get_vq_state,
3664 	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3665 	.get_vq_notification = mlx5_get_vq_notification,
3666 	.get_vq_irq = mlx5_get_vq_irq,
3667 	.get_vq_align = mlx5_vdpa_get_vq_align,
3668 	.get_vq_group = mlx5_vdpa_get_vq_group,
3669 	.get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */
3670 	.get_device_features = mlx5_vdpa_get_device_features,
3671 	.get_backend_features = mlx5_vdpa_get_backend_features,
3672 	.set_driver_features = mlx5_vdpa_set_driver_features,
3673 	.get_driver_features = mlx5_vdpa_get_driver_features,
3674 	.set_config_cb = mlx5_vdpa_set_config_cb,
3675 	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3676 	.get_device_id = mlx5_vdpa_get_device_id,
3677 	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3678 	.get_status = mlx5_vdpa_get_status,
3679 	.set_status = mlx5_vdpa_set_status,
3680 	.reset = mlx5_vdpa_reset,
3681 	.compat_reset = mlx5_vdpa_compat_reset,
3682 	.get_config_size = mlx5_vdpa_get_config_size,
3683 	.get_config = mlx5_vdpa_get_config,
3684 	.set_config = mlx5_vdpa_set_config,
3685 	.get_generation = mlx5_vdpa_get_generation,
3686 	.set_map = mlx5_vdpa_set_map,
3687 	.reset_map = mlx5_vdpa_reset_map,
3688 	.set_group_asid = mlx5_set_group_asid,
3689 	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3690 	.free = mlx5_vdpa_free,
3691 	.suspend = mlx5_vdpa_suspend,
3692 	.resume = mlx5_vdpa_resume, /* Op disabled if not supported. */
3693 };
3694 
query_mtu(struct mlx5_core_dev * mdev,u16 * mtu)3695 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3696 {
3697 	u16 hw_mtu;
3698 	int err;
3699 
3700 	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3701 	if (err)
3702 		return err;
3703 
3704 	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3705 	return 0;
3706 }
3707 
alloc_fixed_resources(struct mlx5_vdpa_net * ndev)3708 static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev)
3709 {
3710 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3711 	int err;
3712 
3713 	if (res->valid) {
3714 		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3715 		return -EEXIST;
3716 	}
3717 
3718 	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3719 	if (err)
3720 		return err;
3721 
3722 	err = create_tis(ndev);
3723 	if (err)
3724 		goto err_tis;
3725 
3726 	res->valid = true;
3727 
3728 	return 0;
3729 
3730 err_tis:
3731 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3732 	return err;
3733 }
3734 
free_fixed_resources(struct mlx5_vdpa_net * ndev)3735 static void free_fixed_resources(struct mlx5_vdpa_net *ndev)
3736 {
3737 	struct mlx5_vdpa_net_resources *res = &ndev->res;
3738 
3739 	if (!res->valid)
3740 		return;
3741 
3742 	destroy_tis(ndev);
3743 	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3744 	res->valid = false;
3745 }
3746 
mvqs_set_defaults(struct mlx5_vdpa_net * ndev)3747 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev)
3748 {
3749 	struct mlx5_vdpa_virtqueue *mvq;
3750 	int i;
3751 
3752 	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3753 		mvq = &ndev->vqs[i];
3754 		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3755 		mvq->index = i;
3756 		mvq->ndev = ndev;
3757 		mvq->fwqp.fw = true;
3758 		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3759 		mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE;
3760 	}
3761 }
3762 
3763 struct mlx5_vdpa_mgmtdev {
3764 	struct vdpa_mgmt_dev mgtdev;
3765 	struct mlx5_adev *madev;
3766 	struct mlx5_vdpa_net *ndev;
3767 	struct vdpa_config_ops vdpa_ops;
3768 };
3769 
config_func_mtu(struct mlx5_core_dev * mdev,u16 mtu)3770 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3771 {
3772 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3773 	void *in;
3774 	int err;
3775 
3776 	in = kvzalloc(inlen, GFP_KERNEL);
3777 	if (!in)
3778 		return -ENOMEM;
3779 
3780 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3781 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3782 		 mtu + MLX5V_ETH_HARD_MTU);
3783 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3784 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3785 
3786 	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3787 
3788 	kvfree(in);
3789 	return err;
3790 }
3791 
allocate_irqs(struct mlx5_vdpa_net * ndev)3792 static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3793 {
3794 	struct mlx5_vdpa_irq_pool_entry *ent;
3795 	int i;
3796 
3797 	if (!msix_mode_supported(&ndev->mvdev))
3798 		return;
3799 
3800 	if (!ndev->mvdev.mdev->pdev)
3801 		return;
3802 
3803 	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3804 	if (!ndev->irqp.entries)
3805 		return;
3806 
3807 
3808 	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3809 		ent = ndev->irqp.entries + i;
3810 		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3811 			 dev_name(&ndev->mvdev.vdev.dev), i);
3812 		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3813 		if (!ent->map.virq)
3814 			return;
3815 
3816 		ndev->irqp.num_ent++;
3817 	}
3818 }
3819 
mlx5_vdpa_dev_add(struct vdpa_mgmt_dev * v_mdev,const char * name,const struct vdpa_dev_set_config * add_config)3820 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3821 			     const struct vdpa_dev_set_config *add_config)
3822 {
3823 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3824 	struct virtio_net_config *config;
3825 	struct mlx5_core_dev *pfmdev;
3826 	struct mlx5_vdpa_dev *mvdev;
3827 	struct mlx5_vdpa_net *ndev;
3828 	struct mlx5_core_dev *mdev;
3829 	u64 device_features;
3830 	u32 max_vqs;
3831 	u16 mtu;
3832 	int err;
3833 
3834 	if (mgtdev->ndev)
3835 		return -ENOSPC;
3836 
3837 	mdev = mgtdev->madev->mdev;
3838 	device_features = mgtdev->mgtdev.supported_features;
3839 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3840 		if (add_config->device_features & ~device_features) {
3841 			dev_warn(mdev->device,
3842 				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3843 				 add_config->device_features, device_features);
3844 			return -EINVAL;
3845 		}
3846 		device_features &= add_config->device_features;
3847 	} else {
3848 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3849 	}
3850 	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3851 	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3852 		dev_warn(mdev->device,
3853 			 "Must provision minimum features 0x%llx for this device",
3854 			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3855 		return -EOPNOTSUPP;
3856 	}
3857 
3858 	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3859 	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3860 		dev_warn(mdev->device, "missing support for split virtqueues\n");
3861 		return -EOPNOTSUPP;
3862 	}
3863 
3864 	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3865 			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3866 	if (max_vqs < 2) {
3867 		dev_warn(mdev->device,
3868 			 "%d virtqueues are supported. At least 2 are required\n",
3869 			 max_vqs);
3870 		return -EAGAIN;
3871 	}
3872 
3873 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3874 		if (add_config->net.max_vq_pairs > max_vqs / 2)
3875 			return -EINVAL;
3876 		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3877 	} else {
3878 		max_vqs = 2;
3879 	}
3880 
3881 	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops,
3882 				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3883 	if (IS_ERR(ndev))
3884 		return PTR_ERR(ndev);
3885 
3886 	ndev->mvdev.max_vqs = max_vqs;
3887 	mvdev = &ndev->mvdev;
3888 	mvdev->mdev = mdev;
3889 	/* cpu_to_mlx5vdpa16() below depends on this flag */
3890 	mvdev->actual_features =
3891 			(device_features & BIT_ULL(VIRTIO_F_VERSION_1));
3892 
3893 	mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx);
3894 
3895 	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3896 	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3897 	if (!ndev->vqs || !ndev->event_cbs) {
3898 		err = -ENOMEM;
3899 		goto err_alloc;
3900 	}
3901 	ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT;
3902 
3903 	mvqs_set_defaults(ndev);
3904 	allocate_irqs(ndev);
3905 	init_rwsem(&ndev->reslock);
3906 	config = &ndev->config;
3907 
3908 	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3909 		err = config_func_mtu(mdev, add_config->net.mtu);
3910 		if (err)
3911 			goto err_alloc;
3912 	}
3913 
3914 	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3915 		err = query_mtu(mdev, &mtu);
3916 		if (err)
3917 			goto err_alloc;
3918 
3919 		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3920 	}
3921 
3922 	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3923 		if (get_link_state(mvdev))
3924 			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3925 		else
3926 			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3927 	}
3928 
3929 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3930 		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3931 	/* No bother setting mac address in config if not going to provision _F_MAC */
3932 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3933 		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3934 		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3935 		if (err)
3936 			goto err_alloc;
3937 	}
3938 
3939 	if (!is_zero_ether_addr(config->mac)) {
3940 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3941 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3942 		if (err)
3943 			goto err_alloc;
3944 	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3945 		/*
3946 		 * We used to clear _F_MAC feature bit if seeing
3947 		 * zero mac address when device features are not
3948 		 * specifically provisioned. Keep the behaviour
3949 		 * so old scripts do not break.
3950 		 */
3951 		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3952 	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3953 		/* Don't provision zero mac address for _F_MAC */
3954 		mlx5_vdpa_warn(&ndev->mvdev,
3955 			       "No mac address provisioned?\n");
3956 		err = -EINVAL;
3957 		goto err_alloc;
3958 	}
3959 
3960 	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) {
3961 		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3962 		ndev->rqt_size = max_vqs / 2;
3963 	} else {
3964 		ndev->rqt_size = 1;
3965 	}
3966 
3967 	ndev->mvdev.mlx_features = device_features;
3968 	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3969 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3970 	if (err)
3971 		goto err_alloc;
3972 
3973 	err = mlx5_vdpa_init_mr_resources(mvdev);
3974 	if (err)
3975 		goto err_alloc;
3976 
3977 	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3978 		err = mlx5_vdpa_create_dma_mr(mvdev);
3979 		if (err)
3980 			goto err_alloc;
3981 	}
3982 
3983 	err = alloc_fixed_resources(ndev);
3984 	if (err)
3985 		goto err_alloc;
3986 
3987 	ndev->cvq_ent.mvdev = mvdev;
3988 	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3989 	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3990 	if (!mvdev->wq) {
3991 		err = -ENOMEM;
3992 		goto err_alloc;
3993 	}
3994 
3995 	mvdev->vdev.mdev = &mgtdev->mgtdev;
3996 	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3997 	if (err)
3998 		goto err_reg;
3999 
4000 	mgtdev->ndev = ndev;
4001 
4002 	/* For virtio-vdpa, the device was set up during device register. */
4003 	if (ndev->setup)
4004 		return 0;
4005 
4006 	down_write(&ndev->reslock);
4007 	err = setup_vq_resources(ndev, false);
4008 	up_write(&ndev->reslock);
4009 	if (err)
4010 		goto err_setup_vq_res;
4011 
4012 	return 0;
4013 
4014 err_setup_vq_res:
4015 	_vdpa_unregister_device(&mvdev->vdev);
4016 err_reg:
4017 	destroy_workqueue(mvdev->wq);
4018 err_alloc:
4019 	put_device(&mvdev->vdev.dev);
4020 	return err;
4021 }
4022 
mlx5_vdpa_dev_del(struct vdpa_mgmt_dev * v_mdev,struct vdpa_device * dev)4023 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
4024 {
4025 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
4026 	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
4027 	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
4028 	struct workqueue_struct *wq;
4029 
4030 	unregister_link_notifier(ndev);
4031 	_vdpa_unregister_device(dev);
4032 
4033 	down_write(&ndev->reslock);
4034 	teardown_vq_resources(ndev);
4035 	up_write(&ndev->reslock);
4036 
4037 	wq = mvdev->wq;
4038 	mvdev->wq = NULL;
4039 	destroy_workqueue(wq);
4040 	mgtdev->ndev = NULL;
4041 }
4042 
mlx5_vdpa_set_attr(struct vdpa_mgmt_dev * v_mdev,struct vdpa_device * dev,const struct vdpa_dev_set_config * add_config)4043 static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev,
4044 			      const struct vdpa_dev_set_config *add_config)
4045 {
4046 	struct virtio_net_config *config;
4047 	struct mlx5_core_dev *pfmdev;
4048 	struct mlx5_vdpa_dev *mvdev;
4049 	struct mlx5_vdpa_net *ndev;
4050 	struct mlx5_core_dev *mdev;
4051 	int err = -EOPNOTSUPP;
4052 
4053 	mvdev = to_mvdev(dev);
4054 	ndev = to_mlx5_vdpa_ndev(mvdev);
4055 	mdev = mvdev->mdev;
4056 	config = &ndev->config;
4057 
4058 	down_write(&ndev->reslock);
4059 	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
4060 		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
4061 		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
4062 		if (!err)
4063 			ether_addr_copy(config->mac, add_config->net.mac);
4064 	}
4065 
4066 	up_write(&ndev->reslock);
4067 	return err;
4068 }
4069 
4070 static const struct vdpa_mgmtdev_ops mdev_ops = {
4071 	.dev_add = mlx5_vdpa_dev_add,
4072 	.dev_del = mlx5_vdpa_dev_del,
4073 	.dev_set_attr = mlx5_vdpa_set_attr,
4074 };
4075 
4076 static struct virtio_device_id id_table[] = {
4077 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
4078 	{ 0 },
4079 };
4080 
mlx5v_probe(struct auxiliary_device * adev,const struct auxiliary_device_id * id)4081 static int mlx5v_probe(struct auxiliary_device *adev,
4082 		       const struct auxiliary_device_id *id)
4083 
4084 {
4085 	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
4086 	struct mlx5_core_dev *mdev = madev->mdev;
4087 	struct mlx5_vdpa_mgmtdev *mgtdev;
4088 	int err;
4089 
4090 	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
4091 	if (!mgtdev)
4092 		return -ENOMEM;
4093 
4094 	mgtdev->mgtdev.ops = &mdev_ops;
4095 	mgtdev->mgtdev.device = mdev->device;
4096 	mgtdev->mgtdev.id_table = id_table;
4097 	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
4098 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
4099 					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
4100 					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
4101 	mgtdev->mgtdev.max_supported_vqs =
4102 		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
4103 	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
4104 	mgtdev->madev = madev;
4105 	mgtdev->vdpa_ops = mlx5_vdpa_ops;
4106 
4107 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported))
4108 		mgtdev->vdpa_ops.get_vq_desc_group = NULL;
4109 
4110 	if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported))
4111 		mgtdev->vdpa_ops.resume = NULL;
4112 
4113 	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
4114 	if (err)
4115 		goto reg_err;
4116 
4117 	auxiliary_set_drvdata(adev, mgtdev);
4118 
4119 	return 0;
4120 
4121 reg_err:
4122 	kfree(mgtdev);
4123 	return err;
4124 }
4125 
mlx5v_remove(struct auxiliary_device * adev)4126 static void mlx5v_remove(struct auxiliary_device *adev)
4127 {
4128 	struct mlx5_vdpa_mgmtdev *mgtdev;
4129 
4130 	mgtdev = auxiliary_get_drvdata(adev);
4131 	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
4132 	kfree(mgtdev);
4133 }
4134 
4135 static const struct auxiliary_device_id mlx5v_id_table[] = {
4136 	{ .name = MLX5_ADEV_NAME ".vnet", },
4137 	{},
4138 };
4139 
4140 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
4141 
4142 static struct auxiliary_driver mlx5v_driver = {
4143 	.name = "vnet",
4144 	.probe = mlx5v_probe,
4145 	.remove = mlx5v_remove,
4146 	.id_table = mlx5v_id_table,
4147 };
4148 
4149 module_auxiliary_driver(mlx5v_driver);
4150