1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <uapi/linux/vhost_types.h> 11 #include <linux/virtio_config.h> 12 #include <linux/auxiliary_bus.h> 13 #include <linux/mlx5/cq.h> 14 #include <linux/mlx5/qp.h> 15 #include <linux/mlx5/device.h> 16 #include <linux/mlx5/driver.h> 17 #include <linux/mlx5/vport.h> 18 #include <linux/mlx5/fs.h> 19 #include <linux/mlx5/mlx5_ifc_vdpa.h> 20 #include <linux/mlx5/mpfs.h> 21 #include "mlx5_vdpa.h" 22 #include "mlx5_vnet.h" 23 24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 25 MODULE_DESCRIPTION("Mellanox VDPA driver"); 26 MODULE_LICENSE("Dual BSD/GPL"); 27 28 #define VALID_FEATURES_MASK \ 29 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 30 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 31 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 32 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 33 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 34 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 35 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 36 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 37 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 38 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 39 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 40 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 41 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 42 43 #define VALID_STATUS_MASK \ 44 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 45 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 46 47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 48 49 #define MLX5V_UNTAGGED 0x1000 50 51 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section 52 * 5.1.6.5.5 "Device operation in multiqueue mode": 53 * 54 * Multiqueue is disabled by default. 55 * The driver enables multiqueue by sending a command using class 56 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue 57 * operation, as follows: ... 58 */ 59 #define MLX5V_DEFAULT_VQ_COUNT 2 60 61 #define MLX5V_DEFAULT_VQ_SIZE 256 62 63 struct mlx5_vdpa_cq_buf { 64 struct mlx5_frag_buf_ctrl fbc; 65 struct mlx5_frag_buf frag_buf; 66 int cqe_size; 67 int nent; 68 }; 69 70 struct mlx5_vdpa_cq { 71 struct mlx5_core_cq mcq; 72 struct mlx5_vdpa_cq_buf buf; 73 struct mlx5_db db; 74 int cqe; 75 }; 76 77 struct mlx5_vdpa_umem { 78 struct mlx5_frag_buf_ctrl fbc; 79 struct mlx5_frag_buf frag_buf; 80 int size; 81 u32 id; 82 }; 83 84 struct mlx5_vdpa_qp { 85 struct mlx5_core_qp mqp; 86 struct mlx5_frag_buf frag_buf; 87 struct mlx5_db db; 88 u16 head; 89 bool fw; 90 }; 91 92 struct mlx5_vq_restore_info { 93 u32 num_ent; 94 u64 desc_addr; 95 u64 device_addr; 96 u64 driver_addr; 97 u16 avail_index; 98 u16 used_index; 99 struct msi_map map; 100 bool ready; 101 bool restore; 102 }; 103 104 struct mlx5_vdpa_virtqueue { 105 bool ready; 106 u64 desc_addr; 107 u64 device_addr; 108 u64 driver_addr; 109 u32 num_ent; 110 111 /* Resources for implementing the notification channel from the device 112 * to the driver. fwqp is the firmware end of an RC connection; the 113 * other end is vqqp used by the driver. cq is where completions are 114 * reported. 115 */ 116 struct mlx5_vdpa_cq cq; 117 struct mlx5_vdpa_qp fwqp; 118 struct mlx5_vdpa_qp vqqp; 119 120 /* umem resources are required for the virtqueue operation. They're use 121 * is internal and they must be provided by the driver. 122 */ 123 struct mlx5_vdpa_umem umem1; 124 struct mlx5_vdpa_umem umem2; 125 struct mlx5_vdpa_umem umem3; 126 127 u32 counter_set_id; 128 bool initialized; 129 int index; 130 u32 virtq_id; 131 struct mlx5_vdpa_net *ndev; 132 u16 avail_idx; 133 u16 used_idx; 134 int fw_state; 135 136 u64 modified_fields; 137 138 struct mlx5_vdpa_mr *vq_mr; 139 struct mlx5_vdpa_mr *desc_mr; 140 141 struct msi_map map; 142 143 /* keep last in the struct */ 144 struct mlx5_vq_restore_info ri; 145 }; 146 147 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 148 { 149 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 150 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 151 return idx < 2; 152 else 153 return idx < 3; 154 } 155 156 return idx <= mvdev->max_idx; 157 } 158 159 static void free_fixed_resources(struct mlx5_vdpa_net *ndev); 160 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev); 161 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled); 162 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev); 163 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq); 164 165 static bool mlx5_vdpa_debug; 166 167 #define MLX5_LOG_VIO_FLAG(_feature) \ 168 do { \ 169 if (features & BIT_ULL(_feature)) \ 170 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 171 } while (0) 172 173 #define MLX5_LOG_VIO_STAT(_status) \ 174 do { \ 175 if (status & (_status)) \ 176 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 177 } while (0) 178 179 /* TODO: cross-endian support */ 180 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 181 { 182 return virtio_legacy_is_little_endian() || 183 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 184 } 185 186 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 187 { 188 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 189 } 190 191 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 192 { 193 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 194 } 195 196 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 197 { 198 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 199 return 2; 200 201 return mvdev->max_vqs; 202 } 203 204 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 205 { 206 return idx == ctrl_vq_idx(mvdev); 207 } 208 209 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 210 { 211 if (status & ~VALID_STATUS_MASK) 212 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 213 status & ~VALID_STATUS_MASK); 214 215 if (!mlx5_vdpa_debug) 216 return; 217 218 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 219 if (set && !status) { 220 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 221 return; 222 } 223 224 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 225 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 226 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 227 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 228 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 229 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 230 } 231 232 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 233 { 234 if (features & ~VALID_FEATURES_MASK) 235 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 236 features & ~VALID_FEATURES_MASK); 237 238 if (!mlx5_vdpa_debug) 239 return; 240 241 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 242 if (!features) 243 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 244 245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 254 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 255 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 256 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 257 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 258 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 259 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 260 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 261 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 262 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 263 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 264 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 265 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 266 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 267 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 268 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 269 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 270 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 271 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 272 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 273 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 274 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 275 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 276 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 277 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 278 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 279 } 280 281 static int create_tis(struct mlx5_vdpa_net *ndev) 282 { 283 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 284 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 285 void *tisc; 286 int err; 287 288 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 289 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 290 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 291 if (err) 292 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 293 294 return err; 295 } 296 297 static void destroy_tis(struct mlx5_vdpa_net *ndev) 298 { 299 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 300 } 301 302 #define MLX5_VDPA_CQE_SIZE 64 303 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 304 305 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 306 { 307 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 308 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 309 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 310 int err; 311 312 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 313 ndev->mvdev.mdev->priv.numa_node); 314 if (err) 315 return err; 316 317 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 318 319 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 320 buf->nent = nent; 321 322 return 0; 323 } 324 325 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 326 { 327 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 328 329 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 330 ndev->mvdev.mdev->priv.numa_node); 331 } 332 333 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 334 { 335 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 336 } 337 338 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 339 { 340 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 341 } 342 343 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 344 { 345 struct mlx5_cqe64 *cqe64; 346 void *cqe; 347 int i; 348 349 for (i = 0; i < buf->nent; i++) { 350 cqe = get_cqe(vcq, i); 351 cqe64 = cqe; 352 cqe64->op_own = MLX5_CQE_INVALID << 4; 353 } 354 } 355 356 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 357 { 358 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 359 360 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 361 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 362 return cqe64; 363 364 return NULL; 365 } 366 367 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 368 { 369 vqp->head += n; 370 vqp->db.db[0] = cpu_to_be32(vqp->head); 371 } 372 373 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 374 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 375 { 376 struct mlx5_vdpa_qp *vqp; 377 __be64 *pas; 378 void *qpc; 379 380 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 381 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 382 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 383 if (vqp->fw) { 384 /* Firmware QP is allocated by the driver for the firmware's 385 * use so we can skip part of the params as they will be chosen by firmware 386 */ 387 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 388 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 389 MLX5_SET(qpc, qpc, no_sq, 1); 390 return; 391 } 392 393 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 394 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 395 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 396 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 397 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 398 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 399 MLX5_SET(qpc, qpc, no_sq, 1); 400 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 401 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 402 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 403 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 404 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 405 } 406 407 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 408 { 409 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 410 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 411 ndev->mvdev.mdev->priv.numa_node); 412 } 413 414 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 415 { 416 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 417 } 418 419 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 420 struct mlx5_vdpa_qp *vqp) 421 { 422 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 423 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 424 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 425 void *qpc; 426 void *in; 427 int err; 428 429 if (!vqp->fw) { 430 vqp = &mvq->vqqp; 431 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 432 if (err) 433 return err; 434 435 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 436 if (err) 437 goto err_db; 438 inlen += vqp->frag_buf.npages * sizeof(__be64); 439 } 440 441 in = kzalloc(inlen, GFP_KERNEL); 442 if (!in) { 443 err = -ENOMEM; 444 goto err_kzalloc; 445 } 446 447 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 448 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 449 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 450 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 451 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 452 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 453 if (!vqp->fw) 454 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 455 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 456 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 457 kfree(in); 458 if (err) 459 goto err_kzalloc; 460 461 vqp->mqp.uid = ndev->mvdev.res.uid; 462 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 463 464 if (!vqp->fw) 465 rx_post(vqp, mvq->num_ent); 466 467 return 0; 468 469 err_kzalloc: 470 if (!vqp->fw) 471 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 472 err_db: 473 if (!vqp->fw) 474 rq_buf_free(ndev, vqp); 475 476 return err; 477 } 478 479 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 480 { 481 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 482 483 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 484 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 485 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 486 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 487 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 488 if (!vqp->fw) { 489 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 490 rq_buf_free(ndev, vqp); 491 } 492 } 493 494 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 495 { 496 return get_sw_cqe(cq, cq->mcq.cons_index); 497 } 498 499 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 500 { 501 struct mlx5_cqe64 *cqe64; 502 503 cqe64 = next_cqe_sw(vcq); 504 if (!cqe64) 505 return -EAGAIN; 506 507 vcq->mcq.cons_index++; 508 return 0; 509 } 510 511 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 512 { 513 struct mlx5_vdpa_net *ndev = mvq->ndev; 514 struct vdpa_callback *event_cb; 515 516 event_cb = &ndev->event_cbs[mvq->index]; 517 mlx5_cq_set_ci(&mvq->cq.mcq); 518 519 /* make sure CQ cosumer update is visible to the hardware before updating 520 * RX doorbell record. 521 */ 522 dma_wmb(); 523 rx_post(&mvq->vqqp, num); 524 if (event_cb->callback) 525 event_cb->callback(event_cb->private); 526 } 527 528 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 529 { 530 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 531 struct mlx5_vdpa_net *ndev = mvq->ndev; 532 void __iomem *uar_page = ndev->mvdev.res.uar->map; 533 int num = 0; 534 535 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 536 num++; 537 if (num > mvq->num_ent / 2) { 538 /* If completions keep coming while we poll, we want to 539 * let the hardware know that we consumed them by 540 * updating the doorbell record. We also let vdpa core 541 * know about this so it passes it on the virtio driver 542 * on the guest. 543 */ 544 mlx5_vdpa_handle_completions(mvq, num); 545 num = 0; 546 } 547 } 548 549 if (num) 550 mlx5_vdpa_handle_completions(mvq, num); 551 552 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 553 } 554 555 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 556 { 557 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 558 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 559 void __iomem *uar_page = ndev->mvdev.res.uar->map; 560 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 561 struct mlx5_vdpa_cq *vcq = &mvq->cq; 562 __be64 *pas; 563 int inlen; 564 void *cqc; 565 void *in; 566 int err; 567 int eqn; 568 569 err = mlx5_db_alloc(mdev, &vcq->db); 570 if (err) 571 return err; 572 573 vcq->mcq.set_ci_db = vcq->db.db; 574 vcq->mcq.arm_db = vcq->db.db + 1; 575 vcq->mcq.cqe_sz = 64; 576 577 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 578 if (err) 579 goto err_db; 580 581 cq_frag_buf_init(vcq, &vcq->buf); 582 583 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 584 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 585 in = kzalloc(inlen, GFP_KERNEL); 586 if (!in) { 587 err = -ENOMEM; 588 goto err_vzalloc; 589 } 590 591 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 592 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 593 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 594 595 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 596 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 597 598 /* Use vector 0 by default. Consider adding code to choose least used 599 * vector. 600 */ 601 err = mlx5_comp_eqn_get(mdev, 0, &eqn); 602 if (err) 603 goto err_vec; 604 605 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 606 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 607 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 608 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 609 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 610 611 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 612 if (err) 613 goto err_vec; 614 615 vcq->mcq.comp = mlx5_vdpa_cq_comp; 616 vcq->cqe = num_ent; 617 vcq->mcq.set_ci_db = vcq->db.db; 618 vcq->mcq.arm_db = vcq->db.db + 1; 619 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 620 kfree(in); 621 return 0; 622 623 err_vec: 624 kfree(in); 625 err_vzalloc: 626 cq_frag_buf_free(ndev, &vcq->buf); 627 err_db: 628 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 629 return err; 630 } 631 632 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 633 { 634 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 635 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 636 struct mlx5_vdpa_cq *vcq = &mvq->cq; 637 638 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 639 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 640 return; 641 } 642 cq_frag_buf_free(ndev, &vcq->buf); 643 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 644 } 645 646 static int read_umem_params(struct mlx5_vdpa_net *ndev) 647 { 648 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 649 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01); 650 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 651 int out_size; 652 void *caps; 653 void *out; 654 int err; 655 656 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 657 out = kzalloc(out_size, GFP_KERNEL); 658 if (!out) 659 return -ENOMEM; 660 661 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 662 MLX5_SET(query_hca_cap_in, in, op_mod, opmod); 663 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 664 if (err) { 665 mlx5_vdpa_warn(&ndev->mvdev, 666 "Failed reading vdpa umem capabilities with err %d\n", err); 667 goto out; 668 } 669 670 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); 671 672 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a); 673 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b); 674 675 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a); 676 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b); 677 678 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a); 679 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b); 680 681 out: 682 kfree(out); 683 return 0; 684 } 685 686 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 687 struct mlx5_vdpa_umem **umemp) 688 { 689 u32 p_a; 690 u32 p_b; 691 692 switch (num) { 693 case 1: 694 p_a = ndev->umem_1_buffer_param_a; 695 p_b = ndev->umem_1_buffer_param_b; 696 *umemp = &mvq->umem1; 697 break; 698 case 2: 699 p_a = ndev->umem_2_buffer_param_a; 700 p_b = ndev->umem_2_buffer_param_b; 701 *umemp = &mvq->umem2; 702 break; 703 case 3: 704 p_a = ndev->umem_3_buffer_param_a; 705 p_b = ndev->umem_3_buffer_param_b; 706 *umemp = &mvq->umem3; 707 break; 708 } 709 710 (*umemp)->size = p_a * mvq->num_ent + p_b; 711 } 712 713 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 714 { 715 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 716 } 717 718 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 719 { 720 int inlen; 721 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 722 void *um; 723 void *in; 724 int err; 725 __be64 *pas; 726 struct mlx5_vdpa_umem *umem; 727 728 set_umem_size(ndev, mvq, num, &umem); 729 err = umem_frag_buf_alloc(ndev, umem, umem->size); 730 if (err) 731 return err; 732 733 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 734 735 in = kzalloc(inlen, GFP_KERNEL); 736 if (!in) { 737 err = -ENOMEM; 738 goto err_in; 739 } 740 741 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 742 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 743 um = MLX5_ADDR_OF(create_umem_in, in, umem); 744 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 745 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 746 747 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 748 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 749 750 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 751 if (err) { 752 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 753 goto err_cmd; 754 } 755 756 kfree(in); 757 umem->id = MLX5_GET(create_umem_out, out, umem_id); 758 759 return 0; 760 761 err_cmd: 762 kfree(in); 763 err_in: 764 umem_frag_buf_free(ndev, umem); 765 return err; 766 } 767 768 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 769 { 770 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 771 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 772 struct mlx5_vdpa_umem *umem; 773 774 switch (num) { 775 case 1: 776 umem = &mvq->umem1; 777 break; 778 case 2: 779 umem = &mvq->umem2; 780 break; 781 case 3: 782 umem = &mvq->umem3; 783 break; 784 } 785 786 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 787 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 788 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 789 return; 790 791 umem_frag_buf_free(ndev, umem); 792 } 793 794 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 795 { 796 int num; 797 int err; 798 799 for (num = 1; num <= 3; num++) { 800 err = create_umem(ndev, mvq, num); 801 if (err) 802 goto err_umem; 803 } 804 return 0; 805 806 err_umem: 807 for (num--; num > 0; num--) 808 umem_destroy(ndev, mvq, num); 809 810 return err; 811 } 812 813 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 814 { 815 int num; 816 817 for (num = 3; num > 0; num--) 818 umem_destroy(ndev, mvq, num); 819 } 820 821 static int get_queue_type(struct mlx5_vdpa_net *ndev) 822 { 823 u32 type_mask; 824 825 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 826 827 /* prefer split queue */ 828 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 829 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 830 831 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 832 833 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 834 } 835 836 static bool vq_is_tx(u16 idx) 837 { 838 return idx % 2; 839 } 840 841 enum { 842 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 843 MLX5_VIRTIO_NET_F_HOST_ECN = 4, 844 MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 845 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 846 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 847 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 848 MLX5_VIRTIO_NET_F_CSUM = 10, 849 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 850 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 851 }; 852 853 static u16 get_features(u64 features) 854 { 855 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 856 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 857 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 858 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 859 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 860 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 861 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 862 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 863 } 864 865 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 866 { 867 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 868 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 869 } 870 871 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) 872 { 873 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & 874 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && 875 pci_msix_can_alloc_dyn(mvdev->mdev->pdev); 876 } 877 878 static int create_virtqueue(struct mlx5_vdpa_net *ndev, 879 struct mlx5_vdpa_virtqueue *mvq, 880 bool filled) 881 { 882 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 883 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 884 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 885 struct mlx5_vdpa_mr *vq_mr; 886 struct mlx5_vdpa_mr *vq_desc_mr; 887 u64 features = filled ? mvdev->actual_features : mvdev->mlx_features; 888 void *obj_context; 889 u16 mlx_features; 890 void *cmd_hdr; 891 void *vq_ctx; 892 void *in; 893 int err; 894 895 err = umems_create(ndev, mvq); 896 if (err) 897 return err; 898 899 in = kzalloc(inlen, GFP_KERNEL); 900 if (!in) { 901 err = -ENOMEM; 902 goto err_alloc; 903 } 904 905 mlx_features = get_features(features); 906 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 907 908 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 909 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 910 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 911 912 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 913 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 914 mlx_features >> 3); 915 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 916 mlx_features & 7); 917 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 918 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 919 920 if (vq_is_tx(mvq->index)) 921 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 922 923 if (mvq->map.virq) { 924 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); 925 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); 926 } else { 927 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 928 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 929 } 930 931 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 932 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 933 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 934 !!(features & BIT_ULL(VIRTIO_F_VERSION_1))); 935 936 if (filled) { 937 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 938 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 939 940 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 941 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 942 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 943 944 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 945 if (vq_mr) 946 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); 947 948 vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 949 if (vq_desc_mr && 950 MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) 951 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); 952 } else { 953 /* If there is no mr update, make sure that the existing ones are set 954 * modify to ready. 955 */ 956 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 957 if (vq_mr) 958 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; 959 960 vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 961 if (vq_desc_mr) 962 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 963 } 964 965 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 966 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 967 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 968 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 969 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 970 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 971 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 972 if (counters_supported(&ndev->mvdev)) 973 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 974 975 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 976 if (err) 977 goto err_cmd; 978 979 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; 980 kfree(in); 981 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 982 983 if (filled) { 984 mlx5_vdpa_get_mr(mvdev, vq_mr); 985 mvq->vq_mr = vq_mr; 986 987 if (vq_desc_mr && 988 MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { 989 mlx5_vdpa_get_mr(mvdev, vq_desc_mr); 990 mvq->desc_mr = vq_desc_mr; 991 } 992 } 993 994 return 0; 995 996 err_cmd: 997 kfree(in); 998 err_alloc: 999 umems_destroy(ndev, mvq); 1000 return err; 1001 } 1002 1003 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1004 { 1005 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 1006 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 1007 1008 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 1009 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1010 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 1011 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 1012 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 1013 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1014 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 1015 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 1016 return; 1017 } 1018 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 1019 umems_destroy(ndev, mvq); 1020 1021 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr); 1022 mvq->vq_mr = NULL; 1023 1024 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr); 1025 mvq->desc_mr = NULL; 1026 } 1027 1028 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 1029 { 1030 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 1031 } 1032 1033 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 1034 { 1035 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 1036 } 1037 1038 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 1039 int *outlen, u32 qpn, u32 rqpn) 1040 { 1041 void *qpc; 1042 void *pp; 1043 1044 switch (cmd) { 1045 case MLX5_CMD_OP_2RST_QP: 1046 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 1047 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 1048 *in = kzalloc(*inlen, GFP_KERNEL); 1049 *out = kzalloc(*outlen, GFP_KERNEL); 1050 if (!*in || !*out) 1051 goto outerr; 1052 1053 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 1054 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 1055 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 1056 break; 1057 case MLX5_CMD_OP_RST2INIT_QP: 1058 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 1059 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 1060 *in = kzalloc(*inlen, GFP_KERNEL); 1061 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 1062 if (!*in || !*out) 1063 goto outerr; 1064 1065 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 1066 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 1067 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 1068 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1069 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1070 MLX5_SET(qpc, qpc, rwe, 1); 1071 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1072 MLX5_SET(ads, pp, vhca_port_num, 1); 1073 break; 1074 case MLX5_CMD_OP_INIT2RTR_QP: 1075 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 1076 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 1077 *in = kzalloc(*inlen, GFP_KERNEL); 1078 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 1079 if (!*in || !*out) 1080 goto outerr; 1081 1082 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 1083 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 1084 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 1085 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1086 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 1087 MLX5_SET(qpc, qpc, log_msg_max, 30); 1088 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1089 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1090 MLX5_SET(ads, pp, fl, 1); 1091 break; 1092 case MLX5_CMD_OP_RTR2RTS_QP: 1093 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 1094 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 1095 *in = kzalloc(*inlen, GFP_KERNEL); 1096 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 1097 if (!*in || !*out) 1098 goto outerr; 1099 1100 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 1101 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 1102 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 1103 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1104 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1105 MLX5_SET(ads, pp, ack_timeout, 14); 1106 MLX5_SET(qpc, qpc, retry_count, 7); 1107 MLX5_SET(qpc, qpc, rnr_retry, 7); 1108 break; 1109 default: 1110 goto outerr_nullify; 1111 } 1112 1113 return; 1114 1115 outerr: 1116 kfree(*in); 1117 kfree(*out); 1118 outerr_nullify: 1119 *in = NULL; 1120 *out = NULL; 1121 } 1122 1123 static void free_inout(void *in, void *out) 1124 { 1125 kfree(in); 1126 kfree(out); 1127 } 1128 1129 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1130 * firmware. The fw argument indicates whether the subjected QP is the one used 1131 * by firmware. 1132 */ 1133 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1134 { 1135 int outlen; 1136 int inlen; 1137 void *out; 1138 void *in; 1139 int err; 1140 1141 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1142 if (!in || !out) 1143 return -ENOMEM; 1144 1145 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1146 free_inout(in, out); 1147 return err; 1148 } 1149 1150 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1151 { 1152 int err; 1153 1154 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1155 if (err) 1156 return err; 1157 1158 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1159 if (err) 1160 return err; 1161 1162 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1163 if (err) 1164 return err; 1165 1166 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1167 if (err) 1168 return err; 1169 1170 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1171 if (err) 1172 return err; 1173 1174 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1175 if (err) 1176 return err; 1177 1178 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1179 } 1180 1181 struct mlx5_virtq_attr { 1182 u8 state; 1183 u16 available_index; 1184 u16 used_index; 1185 }; 1186 1187 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 1188 struct mlx5_virtq_attr *attr) 1189 { 1190 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); 1191 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; 1192 void *out; 1193 void *obj_context; 1194 void *cmd_hdr; 1195 int err; 1196 1197 out = kzalloc(outlen, GFP_KERNEL); 1198 if (!out) 1199 return -ENOMEM; 1200 1201 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1202 1203 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1204 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1205 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1206 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1207 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); 1208 if (err) 1209 goto err_cmd; 1210 1211 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); 1212 memset(attr, 0, sizeof(*attr)); 1213 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1214 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1215 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1216 kfree(out); 1217 return 0; 1218 1219 err_cmd: 1220 kfree(out); 1221 return err; 1222 } 1223 1224 static bool is_resumable(struct mlx5_vdpa_net *ndev) 1225 { 1226 return ndev->mvdev.vdev.config->resume; 1227 } 1228 1229 static bool is_valid_state_change(int oldstate, int newstate, bool resumable) 1230 { 1231 switch (oldstate) { 1232 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1233 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; 1234 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1235 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1236 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1237 return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false; 1238 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1239 default: 1240 return false; 1241 } 1242 } 1243 1244 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) 1245 { 1246 /* Only state is always modifiable */ 1247 if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE) 1248 return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT || 1249 mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1250 1251 return true; 1252 } 1253 1254 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, 1255 struct mlx5_vdpa_virtqueue *mvq, 1256 int state) 1257 { 1258 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1259 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1260 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 1261 struct mlx5_vdpa_mr *desc_mr = NULL; 1262 struct mlx5_vdpa_mr *vq_mr = NULL; 1263 bool state_change = false; 1264 void *obj_context; 1265 void *cmd_hdr; 1266 void *vq_ctx; 1267 void *in; 1268 int err; 1269 1270 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) 1271 return 0; 1272 1273 if (!modifiable_virtqueue_fields(mvq)) 1274 return -EINVAL; 1275 1276 in = kzalloc(inlen, GFP_KERNEL); 1277 if (!in) 1278 return -ENOMEM; 1279 1280 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1281 1282 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1283 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1284 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1285 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1286 1287 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1288 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 1289 1290 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) { 1291 if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { 1292 err = -EINVAL; 1293 goto done; 1294 } 1295 1296 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1297 state_change = true; 1298 } 1299 1300 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { 1301 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 1302 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 1303 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 1304 } 1305 1306 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX) 1307 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 1308 1309 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX) 1310 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 1311 1312 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION) 1313 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1314 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 1315 1316 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) { 1317 u16 mlx_features = get_features(ndev->mvdev.actual_features); 1318 1319 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 1320 mlx_features >> 3); 1321 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 1322 mlx_features & 7); 1323 } 1324 1325 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { 1326 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 1327 1328 if (vq_mr) 1329 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); 1330 else 1331 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; 1332 } 1333 1334 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { 1335 desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 1336 1337 if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) 1338 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); 1339 else 1340 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 1341 } 1342 1343 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); 1344 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1345 if (err) 1346 goto done; 1347 1348 if (state_change) 1349 mvq->fw_state = state; 1350 1351 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { 1352 mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); 1353 mlx5_vdpa_get_mr(mvdev, vq_mr); 1354 mvq->vq_mr = vq_mr; 1355 } 1356 1357 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { 1358 mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); 1359 mlx5_vdpa_get_mr(mvdev, desc_mr); 1360 mvq->desc_mr = desc_mr; 1361 } 1362 1363 mvq->modified_fields = 0; 1364 1365 done: 1366 kfree(in); 1367 return err; 1368 } 1369 1370 static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev, 1371 struct mlx5_vdpa_virtqueue *mvq, 1372 unsigned int state) 1373 { 1374 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; 1375 return modify_virtqueue(ndev, mvq, state); 1376 } 1377 1378 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1379 { 1380 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1381 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1382 void *cmd_hdr; 1383 int err; 1384 1385 if (!counters_supported(&ndev->mvdev)) 1386 return 0; 1387 1388 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1389 1390 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1391 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1392 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1393 1394 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1395 if (err) 1396 return err; 1397 1398 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1399 1400 return 0; 1401 } 1402 1403 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1404 { 1405 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1406 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1407 1408 if (!counters_supported(&ndev->mvdev)) 1409 return; 1410 1411 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1412 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1413 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1414 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1415 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1416 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1417 } 1418 1419 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) 1420 { 1421 struct vdpa_callback *cb = priv; 1422 1423 if (cb->callback) 1424 return cb->callback(cb->private); 1425 1426 return IRQ_HANDLED; 1427 } 1428 1429 static void alloc_vector(struct mlx5_vdpa_net *ndev, 1430 struct mlx5_vdpa_virtqueue *mvq) 1431 { 1432 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1433 struct mlx5_vdpa_irq_pool_entry *ent; 1434 int err; 1435 int i; 1436 1437 for (i = 0; i < irqp->num_ent; i++) { 1438 ent = &irqp->entries[i]; 1439 if (!ent->used) { 1440 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 1441 dev_name(&ndev->mvdev.vdev.dev), mvq->index); 1442 ent->dev_id = &ndev->event_cbs[mvq->index]; 1443 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, 1444 ent->name, ent->dev_id); 1445 if (err) 1446 return; 1447 1448 ent->used = true; 1449 mvq->map = ent->map; 1450 return; 1451 } 1452 } 1453 } 1454 1455 static void dealloc_vector(struct mlx5_vdpa_net *ndev, 1456 struct mlx5_vdpa_virtqueue *mvq) 1457 { 1458 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1459 int i; 1460 1461 for (i = 0; i < irqp->num_ent; i++) 1462 if (mvq->map.virq == irqp->entries[i].map.virq) { 1463 free_irq(mvq->map.virq, irqp->entries[i].dev_id); 1464 irqp->entries[i].used = false; 1465 return; 1466 } 1467 } 1468 1469 static int setup_vq(struct mlx5_vdpa_net *ndev, 1470 struct mlx5_vdpa_virtqueue *mvq, 1471 bool filled) 1472 { 1473 u16 idx = mvq->index; 1474 int err; 1475 1476 if (mvq->initialized) 1477 return 0; 1478 1479 err = cq_create(ndev, idx, mvq->num_ent); 1480 if (err) 1481 return err; 1482 1483 err = qp_create(ndev, mvq, &mvq->fwqp); 1484 if (err) 1485 goto err_fwqp; 1486 1487 err = qp_create(ndev, mvq, &mvq->vqqp); 1488 if (err) 1489 goto err_vqqp; 1490 1491 err = connect_qps(ndev, mvq); 1492 if (err) 1493 goto err_connect; 1494 1495 err = counter_set_alloc(ndev, mvq); 1496 if (err) 1497 goto err_connect; 1498 1499 alloc_vector(ndev, mvq); 1500 err = create_virtqueue(ndev, mvq, filled); 1501 if (err) 1502 goto err_vq; 1503 1504 mvq->initialized = true; 1505 1506 if (mvq->ready) { 1507 err = resume_vq(ndev, mvq); 1508 if (err) 1509 goto err_modify; 1510 } 1511 1512 return 0; 1513 1514 err_modify: 1515 destroy_virtqueue(ndev, mvq); 1516 err_vq: 1517 dealloc_vector(ndev, mvq); 1518 counter_set_dealloc(ndev, mvq); 1519 err_connect: 1520 qp_destroy(ndev, &mvq->vqqp); 1521 err_vqqp: 1522 qp_destroy(ndev, &mvq->fwqp); 1523 err_fwqp: 1524 cq_destroy(ndev, idx); 1525 return err; 1526 } 1527 1528 static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1529 { 1530 struct mlx5_virtq_attr attr; 1531 int err; 1532 1533 if (!mvq->initialized) 1534 return 0; 1535 1536 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1537 return 0; 1538 1539 err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); 1540 if (err) { 1541 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed, err: %d\n", err); 1542 return err; 1543 } 1544 1545 err = query_virtqueue(ndev, mvq, &attr); 1546 if (err) { 1547 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue, err: %d\n", err); 1548 return err; 1549 } 1550 1551 mvq->avail_idx = attr.available_index; 1552 mvq->used_idx = attr.used_index; 1553 1554 return 0; 1555 } 1556 1557 static int suspend_vqs(struct mlx5_vdpa_net *ndev) 1558 { 1559 int err = 0; 1560 int i; 1561 1562 for (i = 0; i < ndev->cur_num_vqs; i++) { 1563 int local_err = suspend_vq(ndev, &ndev->vqs[i]); 1564 1565 err = local_err ? local_err : err; 1566 } 1567 1568 return err; 1569 } 1570 1571 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1572 { 1573 int err; 1574 1575 if (!mvq->initialized) 1576 return 0; 1577 1578 if (mvq->index >= ndev->cur_num_vqs) 1579 return 0; 1580 1581 switch (mvq->fw_state) { 1582 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1583 /* Due to a FW quirk we need to modify the VQ fields first then change state. 1584 * This should be fixed soon. After that, a single command can be used. 1585 */ 1586 err = modify_virtqueue(ndev, mvq, 0); 1587 if (err) { 1588 mlx5_vdpa_warn(&ndev->mvdev, 1589 "modify vq properties failed for vq %u, err: %d\n", 1590 mvq->index, err); 1591 return err; 1592 } 1593 break; 1594 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1595 if (!is_resumable(ndev)) { 1596 mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index); 1597 return -EINVAL; 1598 } 1599 break; 1600 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1601 return 0; 1602 default: 1603 mlx5_vdpa_warn(&ndev->mvdev, "resume vq %u called from bad state %d\n", 1604 mvq->index, mvq->fw_state); 1605 return -EINVAL; 1606 } 1607 1608 err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1609 if (err) 1610 mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u, err: %d\n", 1611 mvq->index, err); 1612 1613 return err; 1614 } 1615 1616 static int resume_vqs(struct mlx5_vdpa_net *ndev) 1617 { 1618 int err = 0; 1619 1620 for (int i = 0; i < ndev->cur_num_vqs; i++) { 1621 int local_err = resume_vq(ndev, &ndev->vqs[i]); 1622 1623 err = local_err ? local_err : err; 1624 } 1625 1626 return err; 1627 } 1628 1629 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1630 { 1631 if (!mvq->initialized) 1632 return; 1633 1634 suspend_vq(ndev, mvq); 1635 mvq->modified_fields = 0; 1636 destroy_virtqueue(ndev, mvq); 1637 dealloc_vector(ndev, mvq); 1638 counter_set_dealloc(ndev, mvq); 1639 qp_destroy(ndev, &mvq->vqqp); 1640 qp_destroy(ndev, &mvq->fwqp); 1641 cq_destroy(ndev, mvq->index); 1642 mvq->initialized = false; 1643 } 1644 1645 static int create_rqt(struct mlx5_vdpa_net *ndev) 1646 { 1647 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); 1648 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); 1649 __be32 *list; 1650 void *rqtc; 1651 int inlen; 1652 void *in; 1653 int i, j; 1654 int err; 1655 1656 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); 1657 in = kzalloc(inlen, GFP_KERNEL); 1658 if (!in) 1659 return -ENOMEM; 1660 1661 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1662 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1663 1664 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1665 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); 1666 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1667 for (i = 0, j = 0; i < act_sz; i++, j += 2) 1668 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1669 1670 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1671 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1672 kfree(in); 1673 if (err) 1674 return err; 1675 1676 return 0; 1677 } 1678 1679 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1680 1681 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1682 { 1683 int act_sz = roundup_pow_of_two(num / 2); 1684 __be32 *list; 1685 void *rqtc; 1686 int inlen; 1687 void *in; 1688 int i, j; 1689 int err; 1690 1691 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); 1692 in = kzalloc(inlen, GFP_KERNEL); 1693 if (!in) 1694 return -ENOMEM; 1695 1696 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1697 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1698 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1699 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1700 1701 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1702 for (i = 0, j = 0; i < act_sz; i++, j = j + 2) 1703 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1704 1705 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1706 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1707 kfree(in); 1708 if (err) 1709 return err; 1710 1711 return 0; 1712 } 1713 1714 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1715 { 1716 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1717 } 1718 1719 static int create_tir(struct mlx5_vdpa_net *ndev) 1720 { 1721 #define HASH_IP_L4PORTS \ 1722 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1723 MLX5_HASH_FIELD_SEL_L4_DPORT) 1724 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1725 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1726 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1727 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1728 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1729 void *rss_key; 1730 void *outer; 1731 void *tirc; 1732 void *in; 1733 int err; 1734 1735 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1736 if (!in) 1737 return -ENOMEM; 1738 1739 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1740 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1741 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1742 1743 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1744 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1745 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1746 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1747 1748 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1749 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1750 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1751 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1752 1753 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1754 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1755 1756 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1757 kfree(in); 1758 if (err) 1759 return err; 1760 1761 mlx5_vdpa_add_tirn(ndev); 1762 return err; 1763 } 1764 1765 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1766 { 1767 mlx5_vdpa_remove_tirn(ndev); 1768 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1769 } 1770 1771 #define MAX_STEERING_ENT 0x8000 1772 #define MAX_STEERING_GROUPS 2 1773 1774 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1775 #define NUM_DESTS 2 1776 #else 1777 #define NUM_DESTS 1 1778 #endif 1779 1780 static int add_steering_counters(struct mlx5_vdpa_net *ndev, 1781 struct macvlan_node *node, 1782 struct mlx5_flow_act *flow_act, 1783 struct mlx5_flow_destination *dests) 1784 { 1785 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1786 int err; 1787 1788 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1789 if (IS_ERR(node->ucast_counter.counter)) 1790 return PTR_ERR(node->ucast_counter.counter); 1791 1792 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1793 if (IS_ERR(node->mcast_counter.counter)) { 1794 err = PTR_ERR(node->mcast_counter.counter); 1795 goto err_mcast_counter; 1796 } 1797 1798 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1799 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1800 return 0; 1801 1802 err_mcast_counter: 1803 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1804 return err; 1805 #else 1806 return 0; 1807 #endif 1808 } 1809 1810 static void remove_steering_counters(struct mlx5_vdpa_net *ndev, 1811 struct macvlan_node *node) 1812 { 1813 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1814 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); 1815 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1816 #endif 1817 } 1818 1819 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1820 struct macvlan_node *node) 1821 { 1822 struct mlx5_flow_destination dests[NUM_DESTS] = {}; 1823 struct mlx5_flow_act flow_act = {}; 1824 struct mlx5_flow_spec *spec; 1825 void *headers_c; 1826 void *headers_v; 1827 u8 *dmac_c; 1828 u8 *dmac_v; 1829 int err; 1830 u16 vid; 1831 1832 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1833 if (!spec) 1834 return -ENOMEM; 1835 1836 vid = key2vid(node->macvlan); 1837 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1838 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1839 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1840 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1841 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1842 eth_broadcast_addr(dmac_c); 1843 ether_addr_copy(dmac_v, mac); 1844 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { 1845 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1846 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1847 } 1848 if (node->tagged) { 1849 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1850 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); 1851 } 1852 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1853 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1854 dests[0].tir_num = ndev->res.tirn; 1855 err = add_steering_counters(ndev, node, &flow_act, dests); 1856 if (err) 1857 goto out_free; 1858 1859 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1860 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); 1861 #endif 1862 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1863 if (IS_ERR(node->ucast_rule)) { 1864 err = PTR_ERR(node->ucast_rule); 1865 goto err_ucast; 1866 } 1867 1868 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1869 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); 1870 #endif 1871 1872 memset(dmac_c, 0, ETH_ALEN); 1873 memset(dmac_v, 0, ETH_ALEN); 1874 dmac_c[0] = 1; 1875 dmac_v[0] = 1; 1876 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1877 if (IS_ERR(node->mcast_rule)) { 1878 err = PTR_ERR(node->mcast_rule); 1879 goto err_mcast; 1880 } 1881 kvfree(spec); 1882 mlx5_vdpa_add_rx_counters(ndev, node); 1883 return 0; 1884 1885 err_mcast: 1886 mlx5_del_flow_rules(node->ucast_rule); 1887 err_ucast: 1888 remove_steering_counters(ndev, node); 1889 out_free: 1890 kvfree(spec); 1891 return err; 1892 } 1893 1894 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1895 struct macvlan_node *node) 1896 { 1897 mlx5_vdpa_remove_rx_counters(ndev, node); 1898 mlx5_del_flow_rules(node->ucast_rule); 1899 mlx5_del_flow_rules(node->mcast_rule); 1900 } 1901 1902 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1903 { 1904 u64 val; 1905 1906 if (!tagged) 1907 vlan = MLX5V_UNTAGGED; 1908 1909 val = (u64)vlan << 48 | 1910 (u64)mac[0] << 40 | 1911 (u64)mac[1] << 32 | 1912 (u64)mac[2] << 24 | 1913 (u64)mac[3] << 16 | 1914 (u64)mac[4] << 8 | 1915 (u64)mac[5]; 1916 1917 return val; 1918 } 1919 1920 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1921 { 1922 struct macvlan_node *pos; 1923 u32 idx; 1924 1925 idx = hash_64(value, 8); // tbd 8 1926 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1927 if (pos->macvlan == value) 1928 return pos; 1929 } 1930 return NULL; 1931 } 1932 1933 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) 1934 { 1935 struct macvlan_node *ptr; 1936 u64 val; 1937 u32 idx; 1938 int err; 1939 1940 val = search_val(mac, vid, tagged); 1941 if (mac_vlan_lookup(ndev, val)) 1942 return -EEXIST; 1943 1944 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1945 if (!ptr) 1946 return -ENOMEM; 1947 1948 ptr->tagged = tagged; 1949 ptr->macvlan = val; 1950 ptr->ndev = ndev; 1951 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); 1952 if (err) 1953 goto err_add; 1954 1955 idx = hash_64(val, 8); 1956 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1957 return 0; 1958 1959 err_add: 1960 kfree(ptr); 1961 return err; 1962 } 1963 1964 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1965 { 1966 struct macvlan_node *ptr; 1967 1968 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1969 if (!ptr) 1970 return; 1971 1972 hlist_del(&ptr->hlist); 1973 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); 1974 remove_steering_counters(ndev, ptr); 1975 kfree(ptr); 1976 } 1977 1978 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1979 { 1980 struct macvlan_node *pos; 1981 struct hlist_node *n; 1982 int i; 1983 1984 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1985 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1986 hlist_del(&pos->hlist); 1987 mlx5_vdpa_del_mac_vlan_rules(ndev, pos); 1988 remove_steering_counters(ndev, pos); 1989 kfree(pos); 1990 } 1991 } 1992 } 1993 1994 static int setup_steering(struct mlx5_vdpa_net *ndev) 1995 { 1996 struct mlx5_flow_table_attr ft_attr = {}; 1997 struct mlx5_flow_namespace *ns; 1998 int err; 1999 2000 ft_attr.max_fte = MAX_STEERING_ENT; 2001 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 2002 2003 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 2004 if (!ns) { 2005 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 2006 return -EOPNOTSUPP; 2007 } 2008 2009 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 2010 if (IS_ERR(ndev->rxft)) { 2011 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 2012 return PTR_ERR(ndev->rxft); 2013 } 2014 mlx5_vdpa_add_rx_flow_table(ndev); 2015 2016 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 2017 if (err) 2018 goto err_add; 2019 2020 return 0; 2021 2022 err_add: 2023 mlx5_vdpa_remove_rx_flow_table(ndev); 2024 mlx5_destroy_flow_table(ndev->rxft); 2025 return err; 2026 } 2027 2028 static void teardown_steering(struct mlx5_vdpa_net *ndev) 2029 { 2030 clear_mac_vlan_table(ndev); 2031 mlx5_vdpa_remove_rx_flow_table(ndev); 2032 mlx5_destroy_flow_table(ndev->rxft); 2033 } 2034 2035 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2036 { 2037 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2038 struct mlx5_control_vq *cvq = &mvdev->cvq; 2039 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2040 struct mlx5_core_dev *pfmdev; 2041 size_t read; 2042 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 2043 2044 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 2045 switch (cmd) { 2046 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 2047 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 2048 if (read != ETH_ALEN) 2049 break; 2050 2051 if (!memcmp(ndev->config.mac, mac, 6)) { 2052 status = VIRTIO_NET_OK; 2053 break; 2054 } 2055 2056 if (is_zero_ether_addr(mac)) 2057 break; 2058 2059 if (!is_zero_ether_addr(ndev->config.mac)) { 2060 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 2061 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 2062 ndev->config.mac); 2063 break; 2064 } 2065 } 2066 2067 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 2068 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 2069 mac); 2070 break; 2071 } 2072 2073 /* backup the original mac address so that if failed to add the forward rules 2074 * we could restore it 2075 */ 2076 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 2077 2078 memcpy(ndev->config.mac, mac, ETH_ALEN); 2079 2080 /* Need recreate the flow table entry, so that the packet could forward back 2081 */ 2082 mac_vlan_del(ndev, mac_back, 0, false); 2083 2084 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 2085 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 2086 2087 /* Although it hardly run here, we still need double check */ 2088 if (is_zero_ether_addr(mac_back)) { 2089 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 2090 break; 2091 } 2092 2093 /* Try to restore original mac address to MFPS table, and try to restore 2094 * the forward rule entry. 2095 */ 2096 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 2097 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 2098 ndev->config.mac); 2099 } 2100 2101 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 2102 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 2103 mac_back); 2104 } 2105 2106 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 2107 2108 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 2109 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 2110 2111 break; 2112 } 2113 2114 status = VIRTIO_NET_OK; 2115 break; 2116 2117 default: 2118 break; 2119 } 2120 2121 return status; 2122 } 2123 2124 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 2125 { 2126 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2127 int cur_qps = ndev->cur_num_vqs / 2; 2128 int err; 2129 int i; 2130 2131 if (cur_qps > newqps) { 2132 err = modify_rqt(ndev, 2 * newqps); 2133 if (err) 2134 return err; 2135 2136 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) { 2137 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; 2138 2139 if (is_resumable(ndev)) 2140 suspend_vq(ndev, mvq); 2141 else 2142 teardown_vq(ndev, mvq); 2143 } 2144 2145 ndev->cur_num_vqs = 2 * newqps; 2146 } else { 2147 ndev->cur_num_vqs = 2 * newqps; 2148 for (i = cur_qps * 2; i < 2 * newqps; i++) { 2149 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; 2150 2151 err = mvq->initialized ? resume_vq(ndev, mvq) : setup_vq(ndev, mvq, true); 2152 if (err) 2153 goto clean_added; 2154 } 2155 err = modify_rqt(ndev, 2 * newqps); 2156 if (err) 2157 goto clean_added; 2158 } 2159 return 0; 2160 2161 clean_added: 2162 for (--i; i >= 2 * cur_qps; --i) 2163 teardown_vq(ndev, &ndev->vqs[i]); 2164 2165 ndev->cur_num_vqs = 2 * cur_qps; 2166 2167 return err; 2168 } 2169 2170 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2171 { 2172 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2173 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2174 struct mlx5_control_vq *cvq = &mvdev->cvq; 2175 struct virtio_net_ctrl_mq mq; 2176 size_t read; 2177 u16 newqps; 2178 2179 switch (cmd) { 2180 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 2181 /* This mq feature check aligns with pre-existing userspace 2182 * implementation. 2183 * 2184 * Without it, an untrusted driver could fake a multiqueue config 2185 * request down to a non-mq device that may cause kernel to 2186 * panic due to uninitialized resources for extra vqs. Even with 2187 * a well behaving guest driver, it is not expected to allow 2188 * changing the number of vqs on a non-mq device. 2189 */ 2190 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 2191 break; 2192 2193 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 2194 if (read != sizeof(mq)) 2195 break; 2196 2197 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 2198 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 2199 newqps > ndev->rqt_size) 2200 break; 2201 2202 if (ndev->cur_num_vqs == 2 * newqps) { 2203 status = VIRTIO_NET_OK; 2204 break; 2205 } 2206 2207 if (!change_num_qps(mvdev, newqps)) 2208 status = VIRTIO_NET_OK; 2209 2210 break; 2211 default: 2212 break; 2213 } 2214 2215 return status; 2216 } 2217 2218 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2219 { 2220 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2221 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2222 struct mlx5_control_vq *cvq = &mvdev->cvq; 2223 __virtio16 vlan; 2224 size_t read; 2225 u16 id; 2226 2227 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) 2228 return status; 2229 2230 switch (cmd) { 2231 case VIRTIO_NET_CTRL_VLAN_ADD: 2232 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 2233 if (read != sizeof(vlan)) 2234 break; 2235 2236 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2237 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 2238 break; 2239 2240 status = VIRTIO_NET_OK; 2241 break; 2242 case VIRTIO_NET_CTRL_VLAN_DEL: 2243 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 2244 if (read != sizeof(vlan)) 2245 break; 2246 2247 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2248 mac_vlan_del(ndev, ndev->config.mac, id, true); 2249 status = VIRTIO_NET_OK; 2250 break; 2251 default: 2252 break; 2253 } 2254 2255 return status; 2256 } 2257 2258 static void mlx5_cvq_kick_handler(struct work_struct *work) 2259 { 2260 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2261 struct virtio_net_ctrl_hdr ctrl; 2262 struct mlx5_vdpa_wq_ent *wqent; 2263 struct mlx5_vdpa_dev *mvdev; 2264 struct mlx5_control_vq *cvq; 2265 struct mlx5_vdpa_net *ndev; 2266 size_t read, write; 2267 int err; 2268 2269 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2270 mvdev = wqent->mvdev; 2271 ndev = to_mlx5_vdpa_ndev(mvdev); 2272 cvq = &mvdev->cvq; 2273 2274 down_write(&ndev->reslock); 2275 2276 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2277 goto out; 2278 2279 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 2280 goto out; 2281 2282 if (!cvq->ready) 2283 goto out; 2284 2285 while (true) { 2286 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 2287 GFP_ATOMIC); 2288 if (err <= 0) 2289 break; 2290 2291 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 2292 if (read != sizeof(ctrl)) 2293 break; 2294 2295 cvq->received_desc++; 2296 switch (ctrl.class) { 2297 case VIRTIO_NET_CTRL_MAC: 2298 status = handle_ctrl_mac(mvdev, ctrl.cmd); 2299 break; 2300 case VIRTIO_NET_CTRL_MQ: 2301 status = handle_ctrl_mq(mvdev, ctrl.cmd); 2302 break; 2303 case VIRTIO_NET_CTRL_VLAN: 2304 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 2305 break; 2306 default: 2307 break; 2308 } 2309 2310 /* Make sure data is written before advancing index */ 2311 smp_wmb(); 2312 2313 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 2314 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 2315 vringh_kiov_cleanup(&cvq->riov); 2316 vringh_kiov_cleanup(&cvq->wiov); 2317 2318 if (vringh_need_notify_iotlb(&cvq->vring)) 2319 vringh_notify(&cvq->vring); 2320 2321 cvq->completed_desc++; 2322 queue_work(mvdev->wq, &wqent->work); 2323 break; 2324 } 2325 2326 out: 2327 up_write(&ndev->reslock); 2328 } 2329 2330 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 2331 { 2332 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2333 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2334 struct mlx5_vdpa_virtqueue *mvq; 2335 2336 if (!is_index_valid(mvdev, idx)) 2337 return; 2338 2339 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 2340 if (!mvdev->wq || !mvdev->cvq.ready) 2341 return; 2342 2343 queue_work(mvdev->wq, &ndev->cvq_ent.work); 2344 return; 2345 } 2346 2347 mvq = &ndev->vqs[idx]; 2348 if (unlikely(!mvq->ready)) 2349 return; 2350 2351 iowrite16(idx, ndev->mvdev.res.kick_addr); 2352 } 2353 2354 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 2355 u64 driver_area, u64 device_area) 2356 { 2357 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2358 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2359 struct mlx5_vdpa_virtqueue *mvq; 2360 2361 if (!is_index_valid(mvdev, idx)) 2362 return -EINVAL; 2363 2364 if (is_ctrl_vq_idx(mvdev, idx)) { 2365 mvdev->cvq.desc_addr = desc_area; 2366 mvdev->cvq.device_addr = device_area; 2367 mvdev->cvq.driver_addr = driver_area; 2368 return 0; 2369 } 2370 2371 mvq = &ndev->vqs[idx]; 2372 mvq->desc_addr = desc_area; 2373 mvq->device_addr = device_area; 2374 mvq->driver_addr = driver_area; 2375 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS; 2376 return 0; 2377 } 2378 2379 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 2380 { 2381 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2382 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2383 struct mlx5_vdpa_virtqueue *mvq; 2384 2385 if (!is_index_valid(mvdev, idx)) 2386 return; 2387 2388 if (is_ctrl_vq_idx(mvdev, idx)) { 2389 struct mlx5_control_vq *cvq = &mvdev->cvq; 2390 2391 cvq->vring.vring.num = num; 2392 return; 2393 } 2394 2395 mvq = &ndev->vqs[idx]; 2396 ndev->needs_teardown = num != mvq->num_ent; 2397 mvq->num_ent = num; 2398 } 2399 2400 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 2401 { 2402 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2403 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2404 2405 ndev->event_cbs[idx] = *cb; 2406 if (is_ctrl_vq_idx(mvdev, idx)) 2407 mvdev->cvq.event_cb = *cb; 2408 } 2409 2410 static void mlx5_cvq_notify(struct vringh *vring) 2411 { 2412 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 2413 2414 if (!cvq->event_cb.callback) 2415 return; 2416 2417 cvq->event_cb.callback(cvq->event_cb.private); 2418 } 2419 2420 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 2421 { 2422 struct mlx5_control_vq *cvq = &mvdev->cvq; 2423 2424 cvq->ready = ready; 2425 if (!ready) 2426 return; 2427 2428 cvq->vring.notify = mlx5_cvq_notify; 2429 } 2430 2431 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 2432 { 2433 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2434 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2435 struct mlx5_vdpa_virtqueue *mvq; 2436 2437 if (!mvdev->actual_features) 2438 return; 2439 2440 if (!is_index_valid(mvdev, idx)) 2441 return; 2442 2443 if (is_ctrl_vq_idx(mvdev, idx)) { 2444 set_cvq_ready(mvdev, ready); 2445 return; 2446 } 2447 2448 mvq = &ndev->vqs[idx]; 2449 if (!ready) { 2450 suspend_vq(ndev, mvq); 2451 } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) { 2452 if (resume_vq(ndev, mvq)) 2453 ready = false; 2454 } 2455 2456 mvq->ready = ready; 2457 } 2458 2459 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2460 { 2461 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2462 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2463 2464 if (!is_index_valid(mvdev, idx)) 2465 return false; 2466 2467 if (is_ctrl_vq_idx(mvdev, idx)) 2468 return mvdev->cvq.ready; 2469 2470 return ndev->vqs[idx].ready; 2471 } 2472 2473 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2474 const struct vdpa_vq_state *state) 2475 { 2476 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2477 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2478 struct mlx5_vdpa_virtqueue *mvq; 2479 2480 if (!is_index_valid(mvdev, idx)) 2481 return -EINVAL; 2482 2483 if (is_ctrl_vq_idx(mvdev, idx)) { 2484 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2485 return 0; 2486 } 2487 2488 mvq = &ndev->vqs[idx]; 2489 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2490 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2491 return -EINVAL; 2492 } 2493 2494 mvq->used_idx = state->split.avail_index; 2495 mvq->avail_idx = state->split.avail_index; 2496 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | 2497 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX; 2498 return 0; 2499 } 2500 2501 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2502 { 2503 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2504 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2505 struct mlx5_vdpa_virtqueue *mvq; 2506 struct mlx5_virtq_attr attr; 2507 int err; 2508 2509 if (!is_index_valid(mvdev, idx)) 2510 return -EINVAL; 2511 2512 if (is_ctrl_vq_idx(mvdev, idx)) { 2513 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2514 return 0; 2515 } 2516 2517 mvq = &ndev->vqs[idx]; 2518 /* If the virtq object was destroyed, use the value saved at 2519 * the last minute of suspend_vq. This caters for userspace 2520 * that cares about emulating the index after vq is stopped. 2521 */ 2522 if (!mvq->initialized) { 2523 /* Firmware returns a wrong value for the available index. 2524 * Since both values should be identical, we take the value of 2525 * used_idx which is reported correctly. 2526 */ 2527 state->split.avail_index = mvq->used_idx; 2528 return 0; 2529 } 2530 2531 err = query_virtqueue(ndev, mvq, &attr); 2532 if (err) { 2533 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); 2534 return err; 2535 } 2536 state->split.avail_index = attr.used_index; 2537 return 0; 2538 } 2539 2540 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2541 { 2542 return PAGE_SIZE; 2543 } 2544 2545 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) 2546 { 2547 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2548 2549 if (is_ctrl_vq_idx(mvdev, idx)) 2550 return MLX5_VDPA_CVQ_GROUP; 2551 2552 return MLX5_VDPA_DATAVQ_GROUP; 2553 } 2554 2555 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx) 2556 { 2557 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2558 2559 if (is_ctrl_vq_idx(mvdev, idx)) 2560 return MLX5_VDPA_CVQ_GROUP; 2561 2562 return MLX5_VDPA_DATAVQ_DESC_GROUP; 2563 } 2564 2565 static u64 mlx_to_vritio_features(u16 dev_features) 2566 { 2567 u64 result = 0; 2568 2569 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2570 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2571 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2572 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2573 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2574 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2575 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2576 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2577 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2578 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2579 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2580 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2581 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2582 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2583 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2584 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2585 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2586 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2587 2588 return result; 2589 } 2590 2591 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2592 { 2593 u64 mlx_vdpa_features = 0; 2594 u16 dev_features; 2595 2596 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2597 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2598 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2599 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2600 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2601 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2602 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2603 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2604 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2605 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2606 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2607 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); 2608 2609 return mlx_vdpa_features; 2610 } 2611 2612 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2613 { 2614 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2615 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2616 2617 print_features(mvdev, ndev->mvdev.mlx_features, false); 2618 return ndev->mvdev.mlx_features; 2619 } 2620 2621 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2622 { 2623 /* Minimum features to expect */ 2624 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2625 return -EOPNOTSUPP; 2626 2627 /* Double check features combination sent down by the driver. 2628 * Fail invalid features due to absence of the depended feature. 2629 * 2630 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2631 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2632 * By failing the invalid features sent down by untrusted drivers, 2633 * we're assured the assumption made upon is_index_valid() and 2634 * is_ctrl_vq_idx() will not be compromised. 2635 */ 2636 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2637 BIT_ULL(VIRTIO_NET_F_MQ)) 2638 return -EINVAL; 2639 2640 return 0; 2641 } 2642 2643 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled) 2644 { 2645 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2646 int err; 2647 int i; 2648 2649 for (i = 0; i < mvdev->max_vqs; i++) { 2650 err = setup_vq(ndev, &ndev->vqs[i], filled); 2651 if (err) 2652 goto err_vq; 2653 } 2654 2655 return 0; 2656 2657 err_vq: 2658 for (--i; i >= 0; i--) 2659 teardown_vq(ndev, &ndev->vqs[i]); 2660 2661 return err; 2662 } 2663 2664 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2665 { 2666 int i; 2667 2668 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) 2669 teardown_vq(ndev, &ndev->vqs[i]); 2670 } 2671 2672 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2673 { 2674 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2675 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2676 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2677 mvdev->max_idx = mvdev->max_vqs; 2678 } else { 2679 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2680 * CVQ gets index 2 2681 */ 2682 mvdev->max_idx = 2; 2683 } 2684 } else { 2685 /* Two data virtqueues only: one for rx and one for tx */ 2686 mvdev->max_idx = 1; 2687 } 2688 } 2689 2690 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2691 { 2692 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2693 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2694 int err; 2695 2696 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2697 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2698 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2699 if (vport) 2700 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2701 2702 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2703 if (err) 2704 return 0; 2705 2706 return MLX5_GET(query_vport_state_out, out, state); 2707 } 2708 2709 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2710 { 2711 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2712 VPORT_STATE_UP) 2713 return true; 2714 2715 return false; 2716 } 2717 2718 static void update_carrier(struct work_struct *work) 2719 { 2720 struct mlx5_vdpa_wq_ent *wqent; 2721 struct mlx5_vdpa_dev *mvdev; 2722 struct mlx5_vdpa_net *ndev; 2723 2724 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2725 mvdev = wqent->mvdev; 2726 ndev = to_mlx5_vdpa_ndev(mvdev); 2727 if (get_link_state(mvdev)) 2728 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2729 else 2730 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2731 2732 if (ndev->config_cb.callback) 2733 ndev->config_cb.callback(ndev->config_cb.private); 2734 2735 kfree(wqent); 2736 } 2737 2738 static int queue_link_work(struct mlx5_vdpa_net *ndev) 2739 { 2740 struct mlx5_vdpa_wq_ent *wqent; 2741 2742 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2743 if (!wqent) 2744 return -ENOMEM; 2745 2746 wqent->mvdev = &ndev->mvdev; 2747 INIT_WORK(&wqent->work, update_carrier); 2748 queue_work(ndev->mvdev.wq, &wqent->work); 2749 return 0; 2750 } 2751 2752 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2753 { 2754 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2755 struct mlx5_eqe *eqe = param; 2756 int ret = NOTIFY_DONE; 2757 2758 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2759 switch (eqe->sub_type) { 2760 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2761 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2762 if (queue_link_work(ndev)) 2763 return NOTIFY_DONE; 2764 2765 ret = NOTIFY_OK; 2766 break; 2767 default: 2768 return NOTIFY_DONE; 2769 } 2770 return ret; 2771 } 2772 return ret; 2773 } 2774 2775 static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2776 { 2777 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2778 return; 2779 2780 ndev->nb.notifier_call = event_handler; 2781 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2782 ndev->nb_registered = true; 2783 queue_link_work(ndev); 2784 } 2785 2786 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2787 { 2788 if (!ndev->nb_registered) 2789 return; 2790 2791 ndev->nb_registered = false; 2792 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2793 if (ndev->mvdev.wq) 2794 flush_workqueue(ndev->mvdev.wq); 2795 } 2796 2797 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa) 2798 { 2799 return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); 2800 } 2801 2802 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2803 { 2804 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2805 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2806 u64 old_features = mvdev->actual_features; 2807 u64 diff_features; 2808 int err; 2809 2810 print_features(mvdev, features, true); 2811 2812 err = verify_driver_features(mvdev, features); 2813 if (err) 2814 return err; 2815 2816 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2817 2818 /* Interested in changes of vq features only. */ 2819 if (get_features(old_features) != get_features(mvdev->actual_features)) { 2820 for (int i = 0; i < mvdev->max_vqs; ++i) { 2821 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; 2822 2823 mvq->modified_fields |= ( 2824 MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION | 2825 MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES 2826 ); 2827 } 2828 } 2829 2830 /* When below features diverge from initial device features, VQs need a full teardown. */ 2831 #define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \ 2832 BIT_ULL(VIRTIO_NET_F_CSUM) | \ 2833 BIT_ULL(VIRTIO_F_VERSION_1)) 2834 2835 diff_features = mvdev->mlx_features ^ mvdev->actual_features; 2836 ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK); 2837 2838 update_cvq_info(mvdev); 2839 return err; 2840 } 2841 2842 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2843 { 2844 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2845 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2846 2847 ndev->config_cb = *cb; 2848 } 2849 2850 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2851 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2852 { 2853 return MLX5_VDPA_MAX_VQ_ENTRIES; 2854 } 2855 2856 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2857 { 2858 return VIRTIO_ID_NET; 2859 } 2860 2861 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2862 { 2863 return PCI_VENDOR_ID_MELLANOX; 2864 } 2865 2866 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2867 { 2868 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2869 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2870 2871 print_status(mvdev, ndev->mvdev.status, false); 2872 return ndev->mvdev.status; 2873 } 2874 2875 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2876 { 2877 struct mlx5_vq_restore_info *ri = &mvq->ri; 2878 struct mlx5_virtq_attr attr = {}; 2879 int err; 2880 2881 if (mvq->initialized) { 2882 err = query_virtqueue(ndev, mvq, &attr); 2883 if (err) 2884 return err; 2885 } 2886 2887 ri->avail_index = attr.available_index; 2888 ri->used_index = attr.used_index; 2889 ri->ready = mvq->ready; 2890 ri->num_ent = mvq->num_ent; 2891 ri->desc_addr = mvq->desc_addr; 2892 ri->device_addr = mvq->device_addr; 2893 ri->driver_addr = mvq->driver_addr; 2894 ri->map = mvq->map; 2895 ri->restore = true; 2896 return 0; 2897 } 2898 2899 static int save_channels_info(struct mlx5_vdpa_net *ndev) 2900 { 2901 int i; 2902 2903 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2904 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 2905 save_channel_info(ndev, &ndev->vqs[i]); 2906 } 2907 return 0; 2908 } 2909 2910 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 2911 { 2912 int i; 2913 2914 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2915 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2916 } 2917 2918 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 2919 { 2920 struct mlx5_vdpa_virtqueue *mvq; 2921 struct mlx5_vq_restore_info *ri; 2922 int i; 2923 2924 mlx5_clear_vqs(ndev); 2925 mvqs_set_defaults(ndev); 2926 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2927 mvq = &ndev->vqs[i]; 2928 ri = &mvq->ri; 2929 if (!ri->restore) 2930 continue; 2931 2932 mvq->avail_idx = ri->avail_index; 2933 mvq->used_idx = ri->used_index; 2934 mvq->ready = ri->ready; 2935 mvq->num_ent = ri->num_ent; 2936 mvq->desc_addr = ri->desc_addr; 2937 mvq->device_addr = ri->device_addr; 2938 mvq->driver_addr = ri->driver_addr; 2939 mvq->map = ri->map; 2940 } 2941 } 2942 2943 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, 2944 struct mlx5_vdpa_mr *new_mr, 2945 unsigned int asid) 2946 { 2947 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2948 bool teardown = !is_resumable(ndev); 2949 int err; 2950 2951 suspend_vqs(ndev); 2952 if (teardown) { 2953 err = save_channels_info(ndev); 2954 if (err) 2955 return err; 2956 2957 teardown_vq_resources(ndev); 2958 } 2959 2960 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 2961 2962 for (int i = 0; i < mvdev->max_vqs; i++) 2963 ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY | 2964 MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 2965 2966 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 2967 return 0; 2968 2969 if (teardown) { 2970 restore_channels_info(ndev); 2971 err = setup_vq_resources(ndev, true); 2972 if (err) 2973 return err; 2974 } 2975 2976 resume_vqs(ndev); 2977 2978 return 0; 2979 } 2980 2981 /* reslock must be held for this function */ 2982 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled) 2983 { 2984 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 2985 int err; 2986 2987 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2988 2989 if (ndev->setup) { 2990 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2991 err = 0; 2992 goto out; 2993 } 2994 mlx5_vdpa_add_debugfs(ndev); 2995 2996 err = read_umem_params(ndev); 2997 if (err) 2998 goto err_setup; 2999 3000 err = setup_virtqueues(mvdev, filled); 3001 if (err) { 3002 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 3003 goto err_setup; 3004 } 3005 3006 err = create_rqt(ndev); 3007 if (err) { 3008 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 3009 goto err_rqt; 3010 } 3011 3012 err = create_tir(ndev); 3013 if (err) { 3014 mlx5_vdpa_warn(mvdev, "create_tir\n"); 3015 goto err_tir; 3016 } 3017 3018 err = setup_steering(ndev); 3019 if (err) { 3020 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 3021 goto err_fwd; 3022 } 3023 ndev->setup = true; 3024 3025 return 0; 3026 3027 err_fwd: 3028 destroy_tir(ndev); 3029 err_tir: 3030 destroy_rqt(ndev); 3031 err_rqt: 3032 teardown_virtqueues(ndev); 3033 err_setup: 3034 mlx5_vdpa_remove_debugfs(ndev); 3035 out: 3036 return err; 3037 } 3038 3039 /* reslock must be held for this function */ 3040 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev) 3041 { 3042 3043 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 3044 3045 if (!ndev->setup) 3046 return; 3047 3048 mlx5_vdpa_remove_debugfs(ndev); 3049 teardown_steering(ndev); 3050 destroy_tir(ndev); 3051 destroy_rqt(ndev); 3052 teardown_virtqueues(ndev); 3053 ndev->setup = false; 3054 ndev->needs_teardown = false; 3055 } 3056 3057 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) 3058 { 3059 struct mlx5_control_vq *cvq = &mvdev->cvq; 3060 int err = 0; 3061 3062 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { 3063 u16 idx = cvq->vring.last_avail_idx; 3064 3065 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 3066 cvq->vring.vring.num, false, 3067 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 3068 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 3069 (struct vring_used *)(uintptr_t)cvq->device_addr); 3070 3071 if (!err) 3072 cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; 3073 } 3074 return err; 3075 } 3076 3077 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 3078 { 3079 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3080 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3081 int err; 3082 3083 print_status(mvdev, status, true); 3084 3085 down_write(&ndev->reslock); 3086 3087 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 3088 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 3089 err = setup_cvq_vring(mvdev); 3090 if (err) { 3091 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 3092 goto err_setup; 3093 } 3094 register_link_notifier(ndev); 3095 3096 if (ndev->needs_teardown) 3097 teardown_vq_resources(ndev); 3098 3099 if (ndev->setup) { 3100 err = resume_vqs(ndev); 3101 if (err) { 3102 mlx5_vdpa_warn(mvdev, "failed to resume VQs\n"); 3103 goto err_driver; 3104 } 3105 } else { 3106 err = setup_vq_resources(ndev, true); 3107 if (err) { 3108 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 3109 goto err_driver; 3110 } 3111 } 3112 } else { 3113 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 3114 goto err_clear; 3115 } 3116 } 3117 3118 ndev->mvdev.status = status; 3119 up_write(&ndev->reslock); 3120 return; 3121 3122 err_driver: 3123 unregister_link_notifier(ndev); 3124 err_setup: 3125 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); 3126 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 3127 err_clear: 3128 up_write(&ndev->reslock); 3129 } 3130 3131 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) 3132 { 3133 int i; 3134 3135 /* default mapping all groups are mapped to asid 0 */ 3136 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) 3137 mvdev->group2asid[i] = 0; 3138 } 3139 3140 static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev) 3141 { 3142 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3143 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0]; 3144 3145 if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) 3146 return true; 3147 3148 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT) 3149 return true; 3150 3151 return mvq->modified_fields & ( 3152 MLX5_VIRTQ_MODIFY_MASK_STATE | 3153 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS | 3154 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | 3155 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX 3156 ); 3157 } 3158 3159 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) 3160 { 3161 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3162 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3163 bool vq_reset; 3164 3165 print_status(mvdev, 0, true); 3166 mlx5_vdpa_info(mvdev, "performing device reset\n"); 3167 3168 down_write(&ndev->reslock); 3169 unregister_link_notifier(ndev); 3170 vq_reset = needs_vqs_reset(mvdev); 3171 if (vq_reset) { 3172 teardown_vq_resources(ndev); 3173 mvqs_set_defaults(ndev); 3174 } 3175 3176 if (flags & VDPA_RESET_F_CLEAN_MAP) 3177 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); 3178 ndev->mvdev.status = 0; 3179 ndev->mvdev.suspended = false; 3180 ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; 3181 ndev->mvdev.cvq.ready = false; 3182 ndev->mvdev.cvq.received_desc = 0; 3183 ndev->mvdev.cvq.completed_desc = 0; 3184 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 3185 ndev->mvdev.actual_features = 0; 3186 init_group_to_asid_map(mvdev); 3187 ++mvdev->generation; 3188 3189 if ((flags & VDPA_RESET_F_CLEAN_MAP) && 3190 MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3191 if (mlx5_vdpa_create_dma_mr(mvdev)) 3192 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 3193 } 3194 if (vq_reset) 3195 setup_vq_resources(ndev, false); 3196 up_write(&ndev->reslock); 3197 3198 return 0; 3199 } 3200 3201 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 3202 { 3203 return mlx5_vdpa_compat_reset(vdev, 0); 3204 } 3205 3206 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 3207 { 3208 return sizeof(struct virtio_net_config); 3209 } 3210 3211 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 3212 unsigned int len) 3213 { 3214 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3215 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3216 3217 if (offset + len <= sizeof(struct virtio_net_config)) 3218 memcpy(buf, (u8 *)&ndev->config + offset, len); 3219 } 3220 3221 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 3222 unsigned int len) 3223 { 3224 /* not supported */ 3225 } 3226 3227 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 3228 { 3229 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3230 3231 return mvdev->generation; 3232 } 3233 3234 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 3235 unsigned int asid) 3236 { 3237 struct mlx5_vdpa_mr *new_mr; 3238 int err; 3239 3240 if (asid >= MLX5_VDPA_NUM_AS) 3241 return -EINVAL; 3242 3243 if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) { 3244 new_mr = mlx5_vdpa_create_mr(mvdev, iotlb); 3245 if (IS_ERR(new_mr)) { 3246 err = PTR_ERR(new_mr); 3247 mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err); 3248 return err; 3249 } 3250 } else { 3251 /* Empty iotlbs don't have an mr but will clear the previous mr. */ 3252 new_mr = NULL; 3253 } 3254 3255 if (!mvdev->mr[asid]) { 3256 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 3257 } else { 3258 err = mlx5_vdpa_change_map(mvdev, new_mr, asid); 3259 if (err) { 3260 mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err); 3261 goto out_err; 3262 } 3263 } 3264 3265 return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); 3266 3267 out_err: 3268 mlx5_vdpa_put_mr(mvdev, new_mr); 3269 return err; 3270 } 3271 3272 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 3273 struct vhost_iotlb *iotlb) 3274 { 3275 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3276 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3277 int err = -EINVAL; 3278 3279 down_write(&ndev->reslock); 3280 err = set_map_data(mvdev, iotlb, asid); 3281 up_write(&ndev->reslock); 3282 return err; 3283 } 3284 3285 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) 3286 { 3287 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3288 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3289 int err; 3290 3291 down_write(&ndev->reslock); 3292 err = mlx5_vdpa_reset_mr(mvdev, asid); 3293 up_write(&ndev->reslock); 3294 return err; 3295 } 3296 3297 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) 3298 { 3299 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3300 3301 if (is_ctrl_vq_idx(mvdev, idx)) 3302 return &vdev->dev; 3303 3304 return mvdev->vdev.dma_dev; 3305 } 3306 3307 static void free_irqs(struct mlx5_vdpa_net *ndev) 3308 { 3309 struct mlx5_vdpa_irq_pool_entry *ent; 3310 int i; 3311 3312 if (!msix_mode_supported(&ndev->mvdev)) 3313 return; 3314 3315 if (!ndev->irqp.entries) 3316 return; 3317 3318 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { 3319 ent = ndev->irqp.entries + i; 3320 if (ent->map.virq) 3321 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); 3322 } 3323 kfree(ndev->irqp.entries); 3324 } 3325 3326 static void mlx5_vdpa_free(struct vdpa_device *vdev) 3327 { 3328 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3329 struct mlx5_core_dev *pfmdev; 3330 struct mlx5_vdpa_net *ndev; 3331 3332 ndev = to_mlx5_vdpa_ndev(mvdev); 3333 3334 free_fixed_resources(ndev); 3335 mlx5_vdpa_destroy_mr_resources(mvdev); 3336 if (!is_zero_ether_addr(ndev->config.mac)) { 3337 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 3338 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 3339 } 3340 mlx5_vdpa_free_resources(&ndev->mvdev); 3341 free_irqs(ndev); 3342 kfree(ndev->event_cbs); 3343 kfree(ndev->vqs); 3344 } 3345 3346 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 3347 { 3348 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3349 struct vdpa_notification_area ret = {}; 3350 struct mlx5_vdpa_net *ndev; 3351 phys_addr_t addr; 3352 3353 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 3354 return ret; 3355 3356 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 3357 * notification to avoid the risk of mapping pages that contain BAR of more 3358 * than one SF 3359 */ 3360 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 3361 return ret; 3362 3363 ndev = to_mlx5_vdpa_ndev(mvdev); 3364 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 3365 ret.addr = addr; 3366 ret.size = PAGE_SIZE; 3367 return ret; 3368 } 3369 3370 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) 3371 { 3372 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3373 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3374 struct mlx5_vdpa_virtqueue *mvq; 3375 3376 if (!is_index_valid(mvdev, idx)) 3377 return -EINVAL; 3378 3379 if (is_ctrl_vq_idx(mvdev, idx)) 3380 return -EOPNOTSUPP; 3381 3382 mvq = &ndev->vqs[idx]; 3383 if (!mvq->map.virq) 3384 return -EOPNOTSUPP; 3385 3386 return mvq->map.virq; 3387 } 3388 3389 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 3390 { 3391 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3392 3393 return mvdev->actual_features; 3394 } 3395 3396 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 3397 u64 *received_desc, u64 *completed_desc) 3398 { 3399 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 3400 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 3401 void *cmd_hdr; 3402 void *ctx; 3403 int err; 3404 3405 if (!counters_supported(&ndev->mvdev)) 3406 return -EOPNOTSUPP; 3407 3408 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 3409 return -EAGAIN; 3410 3411 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 3412 3413 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 3414 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 3415 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 3416 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 3417 3418 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 3419 if (err) 3420 return err; 3421 3422 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 3423 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 3424 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 3425 return 0; 3426 } 3427 3428 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 3429 struct sk_buff *msg, 3430 struct netlink_ext_ack *extack) 3431 { 3432 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3433 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3434 struct mlx5_vdpa_virtqueue *mvq; 3435 struct mlx5_control_vq *cvq; 3436 u64 received_desc; 3437 u64 completed_desc; 3438 int err = 0; 3439 3440 down_read(&ndev->reslock); 3441 if (!is_index_valid(mvdev, idx)) { 3442 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 3443 err = -EINVAL; 3444 goto out_err; 3445 } 3446 3447 if (idx == ctrl_vq_idx(mvdev)) { 3448 cvq = &mvdev->cvq; 3449 received_desc = cvq->received_desc; 3450 completed_desc = cvq->completed_desc; 3451 goto out; 3452 } 3453 3454 mvq = &ndev->vqs[idx]; 3455 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 3456 if (err) { 3457 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 3458 goto out_err; 3459 } 3460 3461 out: 3462 err = -EMSGSIZE; 3463 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 3464 goto out_err; 3465 3466 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 3467 VDPA_ATTR_PAD)) 3468 goto out_err; 3469 3470 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 3471 goto out_err; 3472 3473 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 3474 VDPA_ATTR_PAD)) 3475 goto out_err; 3476 3477 err = 0; 3478 out_err: 3479 up_read(&ndev->reslock); 3480 return err; 3481 } 3482 3483 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) 3484 { 3485 struct mlx5_control_vq *cvq; 3486 3487 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 3488 return; 3489 3490 cvq = &mvdev->cvq; 3491 cvq->ready = false; 3492 } 3493 3494 static int mlx5_vdpa_suspend(struct vdpa_device *vdev) 3495 { 3496 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3497 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3498 int err; 3499 3500 mlx5_vdpa_info(mvdev, "suspending device\n"); 3501 3502 down_write(&ndev->reslock); 3503 unregister_link_notifier(ndev); 3504 err = suspend_vqs(ndev); 3505 mlx5_vdpa_cvq_suspend(mvdev); 3506 mvdev->suspended = true; 3507 up_write(&ndev->reslock); 3508 3509 return err; 3510 } 3511 3512 static int mlx5_vdpa_resume(struct vdpa_device *vdev) 3513 { 3514 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3515 struct mlx5_vdpa_net *ndev; 3516 int err; 3517 3518 ndev = to_mlx5_vdpa_ndev(mvdev); 3519 3520 mlx5_vdpa_info(mvdev, "resuming device\n"); 3521 3522 down_write(&ndev->reslock); 3523 mvdev->suspended = false; 3524 err = resume_vqs(ndev); 3525 register_link_notifier(ndev); 3526 up_write(&ndev->reslock); 3527 3528 return err; 3529 } 3530 3531 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3532 unsigned int asid) 3533 { 3534 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3535 int err = 0; 3536 3537 if (group >= MLX5_VDPA_NUMVQ_GROUPS) 3538 return -EINVAL; 3539 3540 mvdev->group2asid[group] = asid; 3541 3542 mutex_lock(&mvdev->mr_mtx); 3543 if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid]) 3544 err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid); 3545 mutex_unlock(&mvdev->mr_mtx); 3546 3547 return err; 3548 } 3549 3550 static const struct vdpa_config_ops mlx5_vdpa_ops = { 3551 .set_vq_address = mlx5_vdpa_set_vq_address, 3552 .set_vq_num = mlx5_vdpa_set_vq_num, 3553 .kick_vq = mlx5_vdpa_kick_vq, 3554 .set_vq_cb = mlx5_vdpa_set_vq_cb, 3555 .set_vq_ready = mlx5_vdpa_set_vq_ready, 3556 .get_vq_ready = mlx5_vdpa_get_vq_ready, 3557 .set_vq_state = mlx5_vdpa_set_vq_state, 3558 .get_vq_state = mlx5_vdpa_get_vq_state, 3559 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 3560 .get_vq_notification = mlx5_get_vq_notification, 3561 .get_vq_irq = mlx5_get_vq_irq, 3562 .get_vq_align = mlx5_vdpa_get_vq_align, 3563 .get_vq_group = mlx5_vdpa_get_vq_group, 3564 .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */ 3565 .get_device_features = mlx5_vdpa_get_device_features, 3566 .get_backend_features = mlx5_vdpa_get_backend_features, 3567 .set_driver_features = mlx5_vdpa_set_driver_features, 3568 .get_driver_features = mlx5_vdpa_get_driver_features, 3569 .set_config_cb = mlx5_vdpa_set_config_cb, 3570 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 3571 .get_device_id = mlx5_vdpa_get_device_id, 3572 .get_vendor_id = mlx5_vdpa_get_vendor_id, 3573 .get_status = mlx5_vdpa_get_status, 3574 .set_status = mlx5_vdpa_set_status, 3575 .reset = mlx5_vdpa_reset, 3576 .compat_reset = mlx5_vdpa_compat_reset, 3577 .get_config_size = mlx5_vdpa_get_config_size, 3578 .get_config = mlx5_vdpa_get_config, 3579 .set_config = mlx5_vdpa_set_config, 3580 .get_generation = mlx5_vdpa_get_generation, 3581 .set_map = mlx5_vdpa_set_map, 3582 .reset_map = mlx5_vdpa_reset_map, 3583 .set_group_asid = mlx5_set_group_asid, 3584 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3585 .free = mlx5_vdpa_free, 3586 .suspend = mlx5_vdpa_suspend, 3587 .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ 3588 }; 3589 3590 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 3591 { 3592 u16 hw_mtu; 3593 int err; 3594 3595 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3596 if (err) 3597 return err; 3598 3599 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 3600 return 0; 3601 } 3602 3603 static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev) 3604 { 3605 struct mlx5_vdpa_net_resources *res = &ndev->res; 3606 int err; 3607 3608 if (res->valid) { 3609 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 3610 return -EEXIST; 3611 } 3612 3613 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 3614 if (err) 3615 return err; 3616 3617 err = create_tis(ndev); 3618 if (err) 3619 goto err_tis; 3620 3621 res->valid = true; 3622 3623 return 0; 3624 3625 err_tis: 3626 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3627 return err; 3628 } 3629 3630 static void free_fixed_resources(struct mlx5_vdpa_net *ndev) 3631 { 3632 struct mlx5_vdpa_net_resources *res = &ndev->res; 3633 3634 if (!res->valid) 3635 return; 3636 3637 destroy_tis(ndev); 3638 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3639 res->valid = false; 3640 } 3641 3642 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev) 3643 { 3644 struct mlx5_vdpa_virtqueue *mvq; 3645 int i; 3646 3647 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 3648 mvq = &ndev->vqs[i]; 3649 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3650 mvq->index = i; 3651 mvq->ndev = ndev; 3652 mvq->fwqp.fw = true; 3653 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 3654 mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE; 3655 } 3656 } 3657 3658 struct mlx5_vdpa_mgmtdev { 3659 struct vdpa_mgmt_dev mgtdev; 3660 struct mlx5_adev *madev; 3661 struct mlx5_vdpa_net *ndev; 3662 struct vdpa_config_ops vdpa_ops; 3663 }; 3664 3665 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3666 { 3667 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 3668 void *in; 3669 int err; 3670 3671 in = kvzalloc(inlen, GFP_KERNEL); 3672 if (!in) 3673 return -ENOMEM; 3674 3675 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 3676 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 3677 mtu + MLX5V_ETH_HARD_MTU); 3678 MLX5_SET(modify_nic_vport_context_in, in, opcode, 3679 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 3680 3681 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 3682 3683 kvfree(in); 3684 return err; 3685 } 3686 3687 static void allocate_irqs(struct mlx5_vdpa_net *ndev) 3688 { 3689 struct mlx5_vdpa_irq_pool_entry *ent; 3690 int i; 3691 3692 if (!msix_mode_supported(&ndev->mvdev)) 3693 return; 3694 3695 if (!ndev->mvdev.mdev->pdev) 3696 return; 3697 3698 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); 3699 if (!ndev->irqp.entries) 3700 return; 3701 3702 3703 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3704 ent = ndev->irqp.entries + i; 3705 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 3706 dev_name(&ndev->mvdev.vdev.dev), i); 3707 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); 3708 if (!ent->map.virq) 3709 return; 3710 3711 ndev->irqp.num_ent++; 3712 } 3713 } 3714 3715 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 3716 const struct vdpa_dev_set_config *add_config) 3717 { 3718 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3719 struct virtio_net_config *config; 3720 struct mlx5_core_dev *pfmdev; 3721 struct mlx5_vdpa_dev *mvdev; 3722 struct mlx5_vdpa_net *ndev; 3723 struct mlx5_core_dev *mdev; 3724 u64 device_features; 3725 u32 max_vqs; 3726 u16 mtu; 3727 int err; 3728 3729 if (mgtdev->ndev) 3730 return -ENOSPC; 3731 3732 mdev = mgtdev->madev->mdev; 3733 device_features = mgtdev->mgtdev.supported_features; 3734 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 3735 if (add_config->device_features & ~device_features) { 3736 dev_warn(mdev->device, 3737 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", 3738 add_config->device_features, device_features); 3739 return -EINVAL; 3740 } 3741 device_features &= add_config->device_features; 3742 } else { 3743 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3744 } 3745 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3746 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { 3747 dev_warn(mdev->device, 3748 "Must provision minimum features 0x%llx for this device", 3749 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); 3750 return -EOPNOTSUPP; 3751 } 3752 3753 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 3754 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 3755 dev_warn(mdev->device, "missing support for split virtqueues\n"); 3756 return -EOPNOTSUPP; 3757 } 3758 3759 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 3760 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 3761 if (max_vqs < 2) { 3762 dev_warn(mdev->device, 3763 "%d virtqueues are supported. At least 2 are required\n", 3764 max_vqs); 3765 return -EAGAIN; 3766 } 3767 3768 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 3769 if (add_config->net.max_vq_pairs > max_vqs / 2) 3770 return -EINVAL; 3771 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 3772 } else { 3773 max_vqs = 2; 3774 } 3775 3776 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops, 3777 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); 3778 if (IS_ERR(ndev)) 3779 return PTR_ERR(ndev); 3780 3781 ndev->mvdev.max_vqs = max_vqs; 3782 mvdev = &ndev->mvdev; 3783 mvdev->mdev = mdev; 3784 3785 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 3786 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 3787 if (!ndev->vqs || !ndev->event_cbs) { 3788 err = -ENOMEM; 3789 goto err_alloc; 3790 } 3791 ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; 3792 3793 mvqs_set_defaults(ndev); 3794 allocate_irqs(ndev); 3795 init_rwsem(&ndev->reslock); 3796 config = &ndev->config; 3797 3798 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 3799 err = config_func_mtu(mdev, add_config->net.mtu); 3800 if (err) 3801 goto err_alloc; 3802 } 3803 3804 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { 3805 err = query_mtu(mdev, &mtu); 3806 if (err) 3807 goto err_alloc; 3808 3809 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3810 } 3811 3812 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { 3813 if (get_link_state(mvdev)) 3814 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3815 else 3816 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3817 } 3818 3819 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3820 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3821 /* No bother setting mac address in config if not going to provision _F_MAC */ 3822 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || 3823 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3824 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3825 if (err) 3826 goto err_alloc; 3827 } 3828 3829 if (!is_zero_ether_addr(config->mac)) { 3830 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3831 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3832 if (err) 3833 goto err_alloc; 3834 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { 3835 /* 3836 * We used to clear _F_MAC feature bit if seeing 3837 * zero mac address when device features are not 3838 * specifically provisioned. Keep the behaviour 3839 * so old scripts do not break. 3840 */ 3841 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 3842 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3843 /* Don't provision zero mac address for _F_MAC */ 3844 mlx5_vdpa_warn(&ndev->mvdev, 3845 "No mac address provisioned?\n"); 3846 err = -EINVAL; 3847 goto err_alloc; 3848 } 3849 3850 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) { 3851 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3852 ndev->rqt_size = max_vqs / 2; 3853 } else { 3854 ndev->rqt_size = 1; 3855 } 3856 3857 ndev->mvdev.mlx_features = device_features; 3858 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3859 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3860 if (err) 3861 goto err_mpfs; 3862 3863 INIT_LIST_HEAD(&mvdev->mr_list_head); 3864 3865 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3866 err = mlx5_vdpa_create_dma_mr(mvdev); 3867 if (err) 3868 goto err_res; 3869 } 3870 3871 err = alloc_fixed_resources(ndev); 3872 if (err) 3873 goto err_mr; 3874 3875 ndev->cvq_ent.mvdev = mvdev; 3876 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3877 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3878 if (!mvdev->wq) { 3879 err = -ENOMEM; 3880 goto err_res2; 3881 } 3882 3883 mvdev->vdev.mdev = &mgtdev->mgtdev; 3884 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3885 if (err) 3886 goto err_reg; 3887 3888 mgtdev->ndev = ndev; 3889 3890 /* For virtio-vdpa, the device was set up during device register. */ 3891 if (ndev->setup) 3892 return 0; 3893 3894 down_write(&ndev->reslock); 3895 err = setup_vq_resources(ndev, false); 3896 up_write(&ndev->reslock); 3897 if (err) 3898 goto err_setup_vq_res; 3899 3900 return 0; 3901 3902 err_setup_vq_res: 3903 _vdpa_unregister_device(&mvdev->vdev); 3904 err_reg: 3905 destroy_workqueue(mvdev->wq); 3906 err_res2: 3907 free_fixed_resources(ndev); 3908 err_mr: 3909 mlx5_vdpa_destroy_mr_resources(mvdev); 3910 err_res: 3911 mlx5_vdpa_free_resources(&ndev->mvdev); 3912 err_mpfs: 3913 if (!is_zero_ether_addr(config->mac)) 3914 mlx5_mpfs_del_mac(pfmdev, config->mac); 3915 err_alloc: 3916 put_device(&mvdev->vdev.dev); 3917 return err; 3918 } 3919 3920 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 3921 { 3922 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3923 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 3924 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3925 struct workqueue_struct *wq; 3926 3927 unregister_link_notifier(ndev); 3928 _vdpa_unregister_device(dev); 3929 3930 down_write(&ndev->reslock); 3931 teardown_vq_resources(ndev); 3932 up_write(&ndev->reslock); 3933 3934 wq = mvdev->wq; 3935 mvdev->wq = NULL; 3936 destroy_workqueue(wq); 3937 mgtdev->ndev = NULL; 3938 } 3939 3940 static const struct vdpa_mgmtdev_ops mdev_ops = { 3941 .dev_add = mlx5_vdpa_dev_add, 3942 .dev_del = mlx5_vdpa_dev_del, 3943 }; 3944 3945 static struct virtio_device_id id_table[] = { 3946 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3947 { 0 }, 3948 }; 3949 3950 static int mlx5v_probe(struct auxiliary_device *adev, 3951 const struct auxiliary_device_id *id) 3952 3953 { 3954 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 3955 struct mlx5_core_dev *mdev = madev->mdev; 3956 struct mlx5_vdpa_mgmtdev *mgtdev; 3957 int err; 3958 3959 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 3960 if (!mgtdev) 3961 return -ENOMEM; 3962 3963 mgtdev->mgtdev.ops = &mdev_ops; 3964 mgtdev->mgtdev.device = mdev->device; 3965 mgtdev->mgtdev.id_table = id_table; 3966 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 3967 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 3968 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | 3969 BIT_ULL(VDPA_ATTR_DEV_FEATURES); 3970 mgtdev->mgtdev.max_supported_vqs = 3971 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 3972 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 3973 mgtdev->madev = madev; 3974 mgtdev->vdpa_ops = mlx5_vdpa_ops; 3975 3976 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) 3977 mgtdev->vdpa_ops.get_vq_desc_group = NULL; 3978 3979 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported)) 3980 mgtdev->vdpa_ops.resume = NULL; 3981 3982 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3983 if (err) 3984 goto reg_err; 3985 3986 auxiliary_set_drvdata(adev, mgtdev); 3987 3988 return 0; 3989 3990 reg_err: 3991 kfree(mgtdev); 3992 return err; 3993 } 3994 3995 static void mlx5v_remove(struct auxiliary_device *adev) 3996 { 3997 struct mlx5_vdpa_mgmtdev *mgtdev; 3998 3999 mgtdev = auxiliary_get_drvdata(adev); 4000 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 4001 kfree(mgtdev); 4002 } 4003 4004 static const struct auxiliary_device_id mlx5v_id_table[] = { 4005 { .name = MLX5_ADEV_NAME ".vnet", }, 4006 {}, 4007 }; 4008 4009 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 4010 4011 static struct auxiliary_driver mlx5v_driver = { 4012 .name = "vnet", 4013 .probe = mlx5v_probe, 4014 .remove = mlx5v_remove, 4015 .id_table = mlx5v_id_table, 4016 }; 4017 4018 module_auxiliary_driver(mlx5v_driver); 4019