1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <uapi/linux/vhost_types.h> 11 #include <linux/virtio_config.h> 12 #include <linux/auxiliary_bus.h> 13 #include <linux/mlx5/cq.h> 14 #include <linux/mlx5/qp.h> 15 #include <linux/mlx5/device.h> 16 #include <linux/mlx5/driver.h> 17 #include <linux/mlx5/vport.h> 18 #include <linux/mlx5/fs.h> 19 #include <linux/mlx5/mlx5_ifc_vdpa.h> 20 #include <linux/mlx5/mpfs.h> 21 #include "mlx5_vdpa.h" 22 #include "mlx5_vnet.h" 23 24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 25 MODULE_DESCRIPTION("Mellanox VDPA driver"); 26 MODULE_LICENSE("Dual BSD/GPL"); 27 28 #define VALID_FEATURES_MASK \ 29 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 30 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 31 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 32 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 33 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 34 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 35 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 36 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 37 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 38 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 39 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 40 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 41 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 42 43 #define VALID_STATUS_MASK \ 44 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 45 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 46 47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 48 49 #define MLX5V_UNTAGGED 0x1000 50 51 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section 52 * 5.1.6.5.5 "Device operation in multiqueue mode": 53 * 54 * Multiqueue is disabled by default. 55 * The driver enables multiqueue by sending a command using class 56 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue 57 * operation, as follows: ... 58 */ 59 #define MLX5V_DEFAULT_VQ_COUNT 2 60 61 #define MLX5V_DEFAULT_VQ_SIZE 256 62 63 struct mlx5_vdpa_cq_buf { 64 struct mlx5_frag_buf_ctrl fbc; 65 struct mlx5_frag_buf frag_buf; 66 int cqe_size; 67 int nent; 68 }; 69 70 struct mlx5_vdpa_cq { 71 struct mlx5_core_cq mcq; 72 struct mlx5_vdpa_cq_buf buf; 73 struct mlx5_db db; 74 int cqe; 75 }; 76 77 struct mlx5_vdpa_umem { 78 struct mlx5_frag_buf_ctrl fbc; 79 struct mlx5_frag_buf frag_buf; 80 int size; 81 u32 id; 82 }; 83 84 struct mlx5_vdpa_qp { 85 struct mlx5_core_qp mqp; 86 struct mlx5_frag_buf frag_buf; 87 struct mlx5_db db; 88 u16 head; 89 bool fw; 90 }; 91 92 struct mlx5_vq_restore_info { 93 u32 num_ent; 94 u64 desc_addr; 95 u64 device_addr; 96 u64 driver_addr; 97 u16 avail_index; 98 u16 used_index; 99 struct msi_map map; 100 bool ready; 101 bool restore; 102 }; 103 104 struct mlx5_vdpa_virtqueue { 105 bool ready; 106 u64 desc_addr; 107 u64 device_addr; 108 u64 driver_addr; 109 u32 num_ent; 110 111 /* Resources for implementing the notification channel from the device 112 * to the driver. fwqp is the firmware end of an RC connection; the 113 * other end is vqqp used by the driver. cq is where completions are 114 * reported. 115 */ 116 struct mlx5_vdpa_cq cq; 117 struct mlx5_vdpa_qp fwqp; 118 struct mlx5_vdpa_qp vqqp; 119 120 /* umem resources are required for the virtqueue operation. They're use 121 * is internal and they must be provided by the driver. 122 */ 123 struct mlx5_vdpa_umem umem1; 124 struct mlx5_vdpa_umem umem2; 125 struct mlx5_vdpa_umem umem3; 126 127 u32 counter_set_id; 128 bool initialized; 129 int index; 130 u32 virtq_id; 131 struct mlx5_vdpa_net *ndev; 132 u16 avail_idx; 133 u16 used_idx; 134 int fw_state; 135 136 u64 modified_fields; 137 138 struct mlx5_vdpa_mr *vq_mr; 139 struct mlx5_vdpa_mr *desc_mr; 140 141 struct msi_map map; 142 143 /* keep last in the struct */ 144 struct mlx5_vq_restore_info ri; 145 }; 146 147 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 148 { 149 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 150 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 151 return idx < 2; 152 else 153 return idx < 3; 154 } 155 156 return idx <= mvdev->max_idx; 157 } 158 159 static void free_fixed_resources(struct mlx5_vdpa_net *ndev); 160 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev); 161 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled); 162 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev); 163 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq); 164 165 static bool mlx5_vdpa_debug; 166 167 #define MLX5_LOG_VIO_FLAG(_feature) \ 168 do { \ 169 if (features & BIT_ULL(_feature)) \ 170 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 171 } while (0) 172 173 #define MLX5_LOG_VIO_STAT(_status) \ 174 do { \ 175 if (status & (_status)) \ 176 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 177 } while (0) 178 179 /* TODO: cross-endian support */ 180 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 181 { 182 return virtio_legacy_is_little_endian() || 183 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 184 } 185 186 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 187 { 188 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 189 } 190 191 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 192 { 193 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 194 } 195 196 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 197 { 198 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 199 return 2; 200 201 return mvdev->max_vqs; 202 } 203 204 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 205 { 206 return idx == ctrl_vq_idx(mvdev); 207 } 208 209 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 210 { 211 if (status & ~VALID_STATUS_MASK) 212 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 213 status & ~VALID_STATUS_MASK); 214 215 if (!mlx5_vdpa_debug) 216 return; 217 218 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 219 if (set && !status) { 220 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 221 return; 222 } 223 224 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 225 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 226 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 227 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 228 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 229 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 230 } 231 232 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 233 { 234 if (features & ~VALID_FEATURES_MASK) 235 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 236 features & ~VALID_FEATURES_MASK); 237 238 if (!mlx5_vdpa_debug) 239 return; 240 241 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 242 if (!features) 243 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 244 245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 254 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 255 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 256 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 257 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 258 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 259 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 260 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 261 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 262 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 263 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 264 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 265 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 266 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 267 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 268 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 269 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 270 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 271 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 272 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 273 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 274 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 275 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 276 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 277 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 278 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 279 } 280 281 static int create_tis(struct mlx5_vdpa_net *ndev) 282 { 283 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 284 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 285 void *tisc; 286 int err; 287 288 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 289 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 290 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 291 if (err) 292 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 293 294 return err; 295 } 296 297 static void destroy_tis(struct mlx5_vdpa_net *ndev) 298 { 299 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 300 } 301 302 #define MLX5_VDPA_CQE_SIZE 64 303 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 304 305 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 306 { 307 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 308 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 309 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 310 int err; 311 312 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 313 ndev->mvdev.mdev->priv.numa_node); 314 if (err) 315 return err; 316 317 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 318 319 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 320 buf->nent = nent; 321 322 return 0; 323 } 324 325 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 326 { 327 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 328 329 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 330 ndev->mvdev.mdev->priv.numa_node); 331 } 332 333 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 334 { 335 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 336 } 337 338 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 339 { 340 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 341 } 342 343 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 344 { 345 struct mlx5_cqe64 *cqe64; 346 void *cqe; 347 int i; 348 349 for (i = 0; i < buf->nent; i++) { 350 cqe = get_cqe(vcq, i); 351 cqe64 = cqe; 352 cqe64->op_own = MLX5_CQE_INVALID << 4; 353 } 354 } 355 356 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 357 { 358 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 359 360 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 361 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 362 return cqe64; 363 364 return NULL; 365 } 366 367 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 368 { 369 vqp->head += n; 370 vqp->db.db[0] = cpu_to_be32(vqp->head); 371 } 372 373 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 374 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 375 { 376 struct mlx5_vdpa_qp *vqp; 377 __be64 *pas; 378 void *qpc; 379 380 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 381 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 382 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 383 if (vqp->fw) { 384 /* Firmware QP is allocated by the driver for the firmware's 385 * use so we can skip part of the params as they will be chosen by firmware 386 */ 387 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 388 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 389 MLX5_SET(qpc, qpc, no_sq, 1); 390 return; 391 } 392 393 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 394 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 395 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 396 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 397 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 398 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 399 MLX5_SET(qpc, qpc, no_sq, 1); 400 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 401 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 402 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 403 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 404 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 405 } 406 407 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 408 { 409 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 410 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 411 ndev->mvdev.mdev->priv.numa_node); 412 } 413 414 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 415 { 416 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 417 } 418 419 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 420 struct mlx5_vdpa_qp *vqp) 421 { 422 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 423 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 424 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 425 void *qpc; 426 void *in; 427 int err; 428 429 if (!vqp->fw) { 430 vqp = &mvq->vqqp; 431 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 432 if (err) 433 return err; 434 435 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 436 if (err) 437 goto err_db; 438 inlen += vqp->frag_buf.npages * sizeof(__be64); 439 } 440 441 in = kzalloc(inlen, GFP_KERNEL); 442 if (!in) { 443 err = -ENOMEM; 444 goto err_kzalloc; 445 } 446 447 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 448 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 449 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 450 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 451 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 452 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 453 if (!vqp->fw) 454 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 455 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 456 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 457 kfree(in); 458 if (err) 459 goto err_kzalloc; 460 461 vqp->mqp.uid = ndev->mvdev.res.uid; 462 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 463 464 if (!vqp->fw) 465 rx_post(vqp, mvq->num_ent); 466 467 return 0; 468 469 err_kzalloc: 470 if (!vqp->fw) 471 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 472 err_db: 473 if (!vqp->fw) 474 rq_buf_free(ndev, vqp); 475 476 return err; 477 } 478 479 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 480 { 481 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 482 483 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 484 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 485 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 486 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 487 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 488 if (!vqp->fw) { 489 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 490 rq_buf_free(ndev, vqp); 491 } 492 } 493 494 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 495 { 496 return get_sw_cqe(cq, cq->mcq.cons_index); 497 } 498 499 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 500 { 501 struct mlx5_cqe64 *cqe64; 502 503 cqe64 = next_cqe_sw(vcq); 504 if (!cqe64) 505 return -EAGAIN; 506 507 vcq->mcq.cons_index++; 508 return 0; 509 } 510 511 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 512 { 513 struct mlx5_vdpa_net *ndev = mvq->ndev; 514 struct vdpa_callback *event_cb; 515 516 event_cb = &ndev->event_cbs[mvq->index]; 517 mlx5_cq_set_ci(&mvq->cq.mcq); 518 519 /* make sure CQ cosumer update is visible to the hardware before updating 520 * RX doorbell record. 521 */ 522 dma_wmb(); 523 rx_post(&mvq->vqqp, num); 524 if (event_cb->callback) 525 event_cb->callback(event_cb->private); 526 } 527 528 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 529 { 530 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 531 struct mlx5_vdpa_net *ndev = mvq->ndev; 532 void __iomem *uar_page = ndev->mvdev.res.uar->map; 533 int num = 0; 534 535 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 536 num++; 537 if (num > mvq->num_ent / 2) { 538 /* If completions keep coming while we poll, we want to 539 * let the hardware know that we consumed them by 540 * updating the doorbell record. We also let vdpa core 541 * know about this so it passes it on the virtio driver 542 * on the guest. 543 */ 544 mlx5_vdpa_handle_completions(mvq, num); 545 num = 0; 546 } 547 } 548 549 if (num) 550 mlx5_vdpa_handle_completions(mvq, num); 551 552 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 553 } 554 555 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 556 { 557 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 558 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 559 void __iomem *uar_page = ndev->mvdev.res.uar->map; 560 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 561 struct mlx5_vdpa_cq *vcq = &mvq->cq; 562 __be64 *pas; 563 int inlen; 564 void *cqc; 565 void *in; 566 int err; 567 int eqn; 568 569 err = mlx5_db_alloc(mdev, &vcq->db); 570 if (err) 571 return err; 572 573 vcq->mcq.set_ci_db = vcq->db.db; 574 vcq->mcq.arm_db = vcq->db.db + 1; 575 vcq->mcq.cqe_sz = 64; 576 577 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 578 if (err) 579 goto err_db; 580 581 cq_frag_buf_init(vcq, &vcq->buf); 582 583 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 584 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 585 in = kzalloc(inlen, GFP_KERNEL); 586 if (!in) { 587 err = -ENOMEM; 588 goto err_vzalloc; 589 } 590 591 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 592 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 593 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 594 595 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 596 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 597 598 /* Use vector 0 by default. Consider adding code to choose least used 599 * vector. 600 */ 601 err = mlx5_comp_eqn_get(mdev, 0, &eqn); 602 if (err) 603 goto err_vec; 604 605 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 606 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 607 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 608 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 609 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 610 611 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 612 if (err) 613 goto err_vec; 614 615 vcq->mcq.comp = mlx5_vdpa_cq_comp; 616 vcq->cqe = num_ent; 617 vcq->mcq.set_ci_db = vcq->db.db; 618 vcq->mcq.arm_db = vcq->db.db + 1; 619 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 620 kfree(in); 621 return 0; 622 623 err_vec: 624 kfree(in); 625 err_vzalloc: 626 cq_frag_buf_free(ndev, &vcq->buf); 627 err_db: 628 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 629 return err; 630 } 631 632 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 633 { 634 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 635 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 636 struct mlx5_vdpa_cq *vcq = &mvq->cq; 637 638 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 639 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 640 return; 641 } 642 cq_frag_buf_free(ndev, &vcq->buf); 643 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 644 } 645 646 static int read_umem_params(struct mlx5_vdpa_net *ndev) 647 { 648 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 649 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01); 650 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 651 int out_size; 652 void *caps; 653 void *out; 654 int err; 655 656 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 657 out = kzalloc(out_size, GFP_KERNEL); 658 if (!out) 659 return -ENOMEM; 660 661 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 662 MLX5_SET(query_hca_cap_in, in, op_mod, opmod); 663 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 664 if (err) { 665 mlx5_vdpa_warn(&ndev->mvdev, 666 "Failed reading vdpa umem capabilities with err %d\n", err); 667 goto out; 668 } 669 670 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); 671 672 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a); 673 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b); 674 675 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a); 676 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b); 677 678 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a); 679 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b); 680 681 out: 682 kfree(out); 683 return 0; 684 } 685 686 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 687 struct mlx5_vdpa_umem **umemp) 688 { 689 u32 p_a; 690 u32 p_b; 691 692 switch (num) { 693 case 1: 694 p_a = ndev->umem_1_buffer_param_a; 695 p_b = ndev->umem_1_buffer_param_b; 696 *umemp = &mvq->umem1; 697 break; 698 case 2: 699 p_a = ndev->umem_2_buffer_param_a; 700 p_b = ndev->umem_2_buffer_param_b; 701 *umemp = &mvq->umem2; 702 break; 703 case 3: 704 p_a = ndev->umem_3_buffer_param_a; 705 p_b = ndev->umem_3_buffer_param_b; 706 *umemp = &mvq->umem3; 707 break; 708 } 709 710 (*umemp)->size = p_a * mvq->num_ent + p_b; 711 } 712 713 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 714 { 715 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 716 } 717 718 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 719 { 720 int inlen; 721 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 722 void *um; 723 void *in; 724 int err; 725 __be64 *pas; 726 struct mlx5_vdpa_umem *umem; 727 728 set_umem_size(ndev, mvq, num, &umem); 729 err = umem_frag_buf_alloc(ndev, umem, umem->size); 730 if (err) 731 return err; 732 733 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 734 735 in = kzalloc(inlen, GFP_KERNEL); 736 if (!in) { 737 err = -ENOMEM; 738 goto err_in; 739 } 740 741 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 742 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 743 um = MLX5_ADDR_OF(create_umem_in, in, umem); 744 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 745 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 746 747 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 748 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 749 750 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 751 if (err) { 752 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 753 goto err_cmd; 754 } 755 756 kfree(in); 757 umem->id = MLX5_GET(create_umem_out, out, umem_id); 758 759 return 0; 760 761 err_cmd: 762 kfree(in); 763 err_in: 764 umem_frag_buf_free(ndev, umem); 765 return err; 766 } 767 768 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 769 { 770 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 771 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 772 struct mlx5_vdpa_umem *umem; 773 774 switch (num) { 775 case 1: 776 umem = &mvq->umem1; 777 break; 778 case 2: 779 umem = &mvq->umem2; 780 break; 781 case 3: 782 umem = &mvq->umem3; 783 break; 784 } 785 786 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 787 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 788 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 789 return; 790 791 umem_frag_buf_free(ndev, umem); 792 } 793 794 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 795 { 796 int num; 797 int err; 798 799 for (num = 1; num <= 3; num++) { 800 err = create_umem(ndev, mvq, num); 801 if (err) 802 goto err_umem; 803 } 804 return 0; 805 806 err_umem: 807 for (num--; num > 0; num--) 808 umem_destroy(ndev, mvq, num); 809 810 return err; 811 } 812 813 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 814 { 815 int num; 816 817 for (num = 3; num > 0; num--) 818 umem_destroy(ndev, mvq, num); 819 } 820 821 static int get_queue_type(struct mlx5_vdpa_net *ndev) 822 { 823 u32 type_mask; 824 825 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 826 827 /* prefer split queue */ 828 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 829 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 830 831 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 832 833 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 834 } 835 836 static bool vq_is_tx(u16 idx) 837 { 838 return idx % 2; 839 } 840 841 enum { 842 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 843 MLX5_VIRTIO_NET_F_HOST_ECN = 4, 844 MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 845 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 846 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 847 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 848 MLX5_VIRTIO_NET_F_CSUM = 10, 849 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 850 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 851 }; 852 853 static u16 get_features(u64 features) 854 { 855 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 856 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 857 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 858 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 859 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 860 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 861 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 862 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 863 } 864 865 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 866 { 867 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 868 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 869 } 870 871 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) 872 { 873 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & 874 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && 875 pci_msix_can_alloc_dyn(mvdev->mdev->pdev); 876 } 877 878 static int create_virtqueue(struct mlx5_vdpa_net *ndev, 879 struct mlx5_vdpa_virtqueue *mvq, 880 bool filled) 881 { 882 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 883 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 884 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 885 struct mlx5_vdpa_mr *vq_mr; 886 struct mlx5_vdpa_mr *vq_desc_mr; 887 u64 features = filled ? mvdev->actual_features : mvdev->mlx_features; 888 void *obj_context; 889 u16 mlx_features; 890 void *cmd_hdr; 891 void *vq_ctx; 892 void *in; 893 int err; 894 895 err = umems_create(ndev, mvq); 896 if (err) 897 return err; 898 899 in = kzalloc(inlen, GFP_KERNEL); 900 if (!in) { 901 err = -ENOMEM; 902 goto err_alloc; 903 } 904 905 mlx_features = get_features(features); 906 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 907 908 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 909 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 910 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 911 912 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 913 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 914 mlx_features >> 3); 915 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 916 mlx_features & 7); 917 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 918 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 919 920 if (vq_is_tx(mvq->index)) 921 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 922 923 if (mvq->map.virq) { 924 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); 925 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); 926 } else { 927 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 928 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 929 } 930 931 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 932 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 933 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 934 !!(features & BIT_ULL(VIRTIO_F_VERSION_1))); 935 936 if (filled) { 937 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 938 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 939 940 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 941 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 942 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 943 944 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 945 if (vq_mr) 946 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); 947 948 vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 949 if (vq_desc_mr && 950 MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) 951 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); 952 } else { 953 /* If there is no mr update, make sure that the existing ones are set 954 * modify to ready. 955 */ 956 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 957 if (vq_mr) 958 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; 959 960 vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 961 if (vq_desc_mr) 962 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 963 } 964 965 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 966 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 967 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 968 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 969 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 970 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 971 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 972 if (counters_supported(&ndev->mvdev)) 973 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 974 975 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 976 if (err) 977 goto err_cmd; 978 979 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; 980 kfree(in); 981 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 982 983 if (filled) { 984 mlx5_vdpa_get_mr(mvdev, vq_mr); 985 mvq->vq_mr = vq_mr; 986 987 if (vq_desc_mr && 988 MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { 989 mlx5_vdpa_get_mr(mvdev, vq_desc_mr); 990 mvq->desc_mr = vq_desc_mr; 991 } 992 } 993 994 return 0; 995 996 err_cmd: 997 kfree(in); 998 err_alloc: 999 umems_destroy(ndev, mvq); 1000 return err; 1001 } 1002 1003 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1004 { 1005 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 1006 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 1007 1008 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 1009 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1010 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 1011 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 1012 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 1013 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1014 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 1015 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 1016 return; 1017 } 1018 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 1019 umems_destroy(ndev, mvq); 1020 1021 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr); 1022 mvq->vq_mr = NULL; 1023 1024 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr); 1025 mvq->desc_mr = NULL; 1026 } 1027 1028 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 1029 { 1030 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 1031 } 1032 1033 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 1034 { 1035 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 1036 } 1037 1038 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 1039 int *outlen, u32 qpn, u32 rqpn) 1040 { 1041 void *qpc; 1042 void *pp; 1043 1044 switch (cmd) { 1045 case MLX5_CMD_OP_2RST_QP: 1046 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 1047 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 1048 *in = kzalloc(*inlen, GFP_KERNEL); 1049 *out = kzalloc(*outlen, GFP_KERNEL); 1050 if (!*in || !*out) 1051 goto outerr; 1052 1053 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 1054 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 1055 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 1056 break; 1057 case MLX5_CMD_OP_RST2INIT_QP: 1058 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 1059 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 1060 *in = kzalloc(*inlen, GFP_KERNEL); 1061 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 1062 if (!*in || !*out) 1063 goto outerr; 1064 1065 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 1066 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 1067 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 1068 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1069 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1070 MLX5_SET(qpc, qpc, rwe, 1); 1071 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1072 MLX5_SET(ads, pp, vhca_port_num, 1); 1073 break; 1074 case MLX5_CMD_OP_INIT2RTR_QP: 1075 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 1076 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 1077 *in = kzalloc(*inlen, GFP_KERNEL); 1078 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 1079 if (!*in || !*out) 1080 goto outerr; 1081 1082 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 1083 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 1084 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 1085 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1086 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 1087 MLX5_SET(qpc, qpc, log_msg_max, 30); 1088 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1089 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1090 MLX5_SET(ads, pp, fl, 1); 1091 break; 1092 case MLX5_CMD_OP_RTR2RTS_QP: 1093 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 1094 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 1095 *in = kzalloc(*inlen, GFP_KERNEL); 1096 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 1097 if (!*in || !*out) 1098 goto outerr; 1099 1100 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 1101 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 1102 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 1103 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1104 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1105 MLX5_SET(ads, pp, ack_timeout, 14); 1106 MLX5_SET(qpc, qpc, retry_count, 7); 1107 MLX5_SET(qpc, qpc, rnr_retry, 7); 1108 break; 1109 default: 1110 goto outerr_nullify; 1111 } 1112 1113 return; 1114 1115 outerr: 1116 kfree(*in); 1117 kfree(*out); 1118 outerr_nullify: 1119 *in = NULL; 1120 *out = NULL; 1121 } 1122 1123 static void free_inout(void *in, void *out) 1124 { 1125 kfree(in); 1126 kfree(out); 1127 } 1128 1129 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1130 * firmware. The fw argument indicates whether the subjected QP is the one used 1131 * by firmware. 1132 */ 1133 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1134 { 1135 int outlen; 1136 int inlen; 1137 void *out; 1138 void *in; 1139 int err; 1140 1141 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1142 if (!in || !out) 1143 return -ENOMEM; 1144 1145 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1146 free_inout(in, out); 1147 return err; 1148 } 1149 1150 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1151 { 1152 int err; 1153 1154 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1155 if (err) 1156 return err; 1157 1158 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1159 if (err) 1160 return err; 1161 1162 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1163 if (err) 1164 return err; 1165 1166 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1167 if (err) 1168 return err; 1169 1170 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1171 if (err) 1172 return err; 1173 1174 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1175 if (err) 1176 return err; 1177 1178 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1179 } 1180 1181 struct mlx5_virtq_attr { 1182 u8 state; 1183 u16 available_index; 1184 u16 used_index; 1185 }; 1186 1187 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 1188 struct mlx5_virtq_attr *attr) 1189 { 1190 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); 1191 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; 1192 void *out; 1193 void *obj_context; 1194 void *cmd_hdr; 1195 int err; 1196 1197 out = kzalloc(outlen, GFP_KERNEL); 1198 if (!out) 1199 return -ENOMEM; 1200 1201 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1202 1203 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1204 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1205 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1206 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1207 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); 1208 if (err) 1209 goto err_cmd; 1210 1211 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); 1212 memset(attr, 0, sizeof(*attr)); 1213 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1214 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1215 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1216 kfree(out); 1217 return 0; 1218 1219 err_cmd: 1220 kfree(out); 1221 return err; 1222 } 1223 1224 static bool is_resumable(struct mlx5_vdpa_net *ndev) 1225 { 1226 return ndev->mvdev.vdev.config->resume; 1227 } 1228 1229 static bool is_valid_state_change(int oldstate, int newstate, bool resumable) 1230 { 1231 switch (oldstate) { 1232 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1233 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; 1234 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1235 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1236 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1237 return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false; 1238 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1239 default: 1240 return false; 1241 } 1242 } 1243 1244 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) 1245 { 1246 /* Only state is always modifiable */ 1247 if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE) 1248 return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT || 1249 mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1250 1251 return true; 1252 } 1253 1254 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, 1255 struct mlx5_vdpa_virtqueue *mvq, 1256 int state) 1257 { 1258 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1259 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1260 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 1261 struct mlx5_vdpa_mr *desc_mr = NULL; 1262 struct mlx5_vdpa_mr *vq_mr = NULL; 1263 bool state_change = false; 1264 void *obj_context; 1265 void *cmd_hdr; 1266 void *vq_ctx; 1267 void *in; 1268 int err; 1269 1270 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) 1271 return 0; 1272 1273 if (!modifiable_virtqueue_fields(mvq)) 1274 return -EINVAL; 1275 1276 in = kzalloc(inlen, GFP_KERNEL); 1277 if (!in) 1278 return -ENOMEM; 1279 1280 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1281 1282 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1283 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1284 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1285 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1286 1287 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1288 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 1289 1290 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) { 1291 if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { 1292 err = -EINVAL; 1293 goto done; 1294 } 1295 1296 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1297 state_change = true; 1298 } 1299 1300 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { 1301 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 1302 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 1303 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 1304 } 1305 1306 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX) 1307 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 1308 1309 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX) 1310 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 1311 1312 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION) 1313 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1314 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 1315 1316 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) { 1317 u16 mlx_features = get_features(ndev->mvdev.actual_features); 1318 1319 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 1320 mlx_features >> 3); 1321 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 1322 mlx_features & 7); 1323 } 1324 1325 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { 1326 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 1327 1328 if (vq_mr) 1329 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); 1330 else 1331 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; 1332 } 1333 1334 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { 1335 desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 1336 1337 if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) 1338 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); 1339 else 1340 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 1341 } 1342 1343 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); 1344 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1345 if (err) 1346 goto done; 1347 1348 if (state_change) 1349 mvq->fw_state = state; 1350 1351 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { 1352 mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); 1353 mlx5_vdpa_get_mr(mvdev, vq_mr); 1354 mvq->vq_mr = vq_mr; 1355 } 1356 1357 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { 1358 mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); 1359 mlx5_vdpa_get_mr(mvdev, desc_mr); 1360 mvq->desc_mr = desc_mr; 1361 } 1362 1363 mvq->modified_fields = 0; 1364 1365 done: 1366 kfree(in); 1367 return err; 1368 } 1369 1370 static int modify_virtqueue_state(struct mlx5_vdpa_net *ndev, 1371 struct mlx5_vdpa_virtqueue *mvq, 1372 unsigned int state) 1373 { 1374 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; 1375 return modify_virtqueue(ndev, mvq, state); 1376 } 1377 1378 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1379 { 1380 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1381 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1382 void *cmd_hdr; 1383 int err; 1384 1385 if (!counters_supported(&ndev->mvdev)) 1386 return 0; 1387 1388 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1389 1390 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1391 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1392 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1393 1394 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1395 if (err) 1396 return err; 1397 1398 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1399 1400 return 0; 1401 } 1402 1403 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1404 { 1405 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1406 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1407 1408 if (!counters_supported(&ndev->mvdev)) 1409 return; 1410 1411 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1412 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1413 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1414 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1415 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1416 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1417 } 1418 1419 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) 1420 { 1421 struct vdpa_callback *cb = priv; 1422 1423 if (cb->callback) 1424 return cb->callback(cb->private); 1425 1426 return IRQ_HANDLED; 1427 } 1428 1429 static void alloc_vector(struct mlx5_vdpa_net *ndev, 1430 struct mlx5_vdpa_virtqueue *mvq) 1431 { 1432 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1433 struct mlx5_vdpa_irq_pool_entry *ent; 1434 int err; 1435 int i; 1436 1437 for (i = 0; i < irqp->num_ent; i++) { 1438 ent = &irqp->entries[i]; 1439 if (!ent->used) { 1440 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 1441 dev_name(&ndev->mvdev.vdev.dev), mvq->index); 1442 ent->dev_id = &ndev->event_cbs[mvq->index]; 1443 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, 1444 ent->name, ent->dev_id); 1445 if (err) 1446 return; 1447 1448 ent->used = true; 1449 mvq->map = ent->map; 1450 return; 1451 } 1452 } 1453 } 1454 1455 static void dealloc_vector(struct mlx5_vdpa_net *ndev, 1456 struct mlx5_vdpa_virtqueue *mvq) 1457 { 1458 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1459 int i; 1460 1461 for (i = 0; i < irqp->num_ent; i++) 1462 if (mvq->map.virq == irqp->entries[i].map.virq) { 1463 free_irq(mvq->map.virq, irqp->entries[i].dev_id); 1464 irqp->entries[i].used = false; 1465 return; 1466 } 1467 } 1468 1469 static int setup_vq(struct mlx5_vdpa_net *ndev, 1470 struct mlx5_vdpa_virtqueue *mvq, 1471 bool filled) 1472 { 1473 u16 idx = mvq->index; 1474 int err; 1475 1476 if (mvq->initialized) 1477 return 0; 1478 1479 err = cq_create(ndev, idx, mvq->num_ent); 1480 if (err) 1481 return err; 1482 1483 err = qp_create(ndev, mvq, &mvq->fwqp); 1484 if (err) 1485 goto err_fwqp; 1486 1487 err = qp_create(ndev, mvq, &mvq->vqqp); 1488 if (err) 1489 goto err_vqqp; 1490 1491 err = connect_qps(ndev, mvq); 1492 if (err) 1493 goto err_connect; 1494 1495 err = counter_set_alloc(ndev, mvq); 1496 if (err) 1497 goto err_connect; 1498 1499 alloc_vector(ndev, mvq); 1500 err = create_virtqueue(ndev, mvq, filled); 1501 if (err) 1502 goto err_vq; 1503 1504 mvq->initialized = true; 1505 1506 if (mvq->ready) { 1507 err = resume_vq(ndev, mvq); 1508 if (err) 1509 goto err_modify; 1510 } 1511 1512 return 0; 1513 1514 err_modify: 1515 destroy_virtqueue(ndev, mvq); 1516 err_vq: 1517 dealloc_vector(ndev, mvq); 1518 counter_set_dealloc(ndev, mvq); 1519 err_connect: 1520 qp_destroy(ndev, &mvq->vqqp); 1521 err_vqqp: 1522 qp_destroy(ndev, &mvq->fwqp); 1523 err_fwqp: 1524 cq_destroy(ndev, idx); 1525 return err; 1526 } 1527 1528 static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1529 { 1530 struct mlx5_virtq_attr attr; 1531 int err; 1532 1533 if (!mvq->initialized) 1534 return 0; 1535 1536 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1537 return 0; 1538 1539 err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); 1540 if (err) { 1541 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed, err: %d\n", err); 1542 return err; 1543 } 1544 1545 err = query_virtqueue(ndev, mvq, &attr); 1546 if (err) { 1547 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue, err: %d\n", err); 1548 return err; 1549 } 1550 1551 mvq->avail_idx = attr.available_index; 1552 mvq->used_idx = attr.used_index; 1553 1554 return 0; 1555 } 1556 1557 static int suspend_vqs(struct mlx5_vdpa_net *ndev) 1558 { 1559 int err = 0; 1560 int i; 1561 1562 for (i = 0; i < ndev->cur_num_vqs; i++) { 1563 int local_err = suspend_vq(ndev, &ndev->vqs[i]); 1564 1565 err = local_err ? local_err : err; 1566 } 1567 1568 return err; 1569 } 1570 1571 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1572 { 1573 int err; 1574 1575 if (!mvq->initialized) 1576 return 0; 1577 1578 switch (mvq->fw_state) { 1579 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1580 /* Due to a FW quirk we need to modify the VQ fields first then change state. 1581 * This should be fixed soon. After that, a single command can be used. 1582 */ 1583 err = modify_virtqueue(ndev, mvq, 0); 1584 if (err) { 1585 mlx5_vdpa_warn(&ndev->mvdev, 1586 "modify vq properties failed for vq %u, err: %d\n", 1587 mvq->index, err); 1588 return err; 1589 } 1590 break; 1591 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1592 if (!is_resumable(ndev)) { 1593 mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index); 1594 return -EINVAL; 1595 } 1596 break; 1597 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1598 return 0; 1599 default: 1600 mlx5_vdpa_warn(&ndev->mvdev, "resume vq %u called from bad state %d\n", 1601 mvq->index, mvq->fw_state); 1602 return -EINVAL; 1603 } 1604 1605 err = modify_virtqueue_state(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1606 if (err) 1607 mlx5_vdpa_warn(&ndev->mvdev, "modify to resume failed for vq %u, err: %d\n", 1608 mvq->index, err); 1609 1610 return err; 1611 } 1612 1613 static int resume_vqs(struct mlx5_vdpa_net *ndev) 1614 { 1615 int err = 0; 1616 1617 for (int i = 0; i < ndev->cur_num_vqs; i++) { 1618 int local_err = resume_vq(ndev, &ndev->vqs[i]); 1619 1620 err = local_err ? local_err : err; 1621 } 1622 1623 return err; 1624 } 1625 1626 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1627 { 1628 if (!mvq->initialized) 1629 return; 1630 1631 suspend_vq(ndev, mvq); 1632 mvq->modified_fields = 0; 1633 destroy_virtqueue(ndev, mvq); 1634 dealloc_vector(ndev, mvq); 1635 counter_set_dealloc(ndev, mvq); 1636 qp_destroy(ndev, &mvq->vqqp); 1637 qp_destroy(ndev, &mvq->fwqp); 1638 cq_destroy(ndev, mvq->index); 1639 mvq->initialized = false; 1640 } 1641 1642 static int create_rqt(struct mlx5_vdpa_net *ndev) 1643 { 1644 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); 1645 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); 1646 __be32 *list; 1647 void *rqtc; 1648 int inlen; 1649 void *in; 1650 int i, j; 1651 int err; 1652 1653 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); 1654 in = kzalloc(inlen, GFP_KERNEL); 1655 if (!in) 1656 return -ENOMEM; 1657 1658 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1659 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1660 1661 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1662 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); 1663 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1664 for (i = 0, j = 0; i < act_sz; i++, j += 2) 1665 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1666 1667 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1668 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1669 kfree(in); 1670 if (err) 1671 return err; 1672 1673 return 0; 1674 } 1675 1676 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1677 1678 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1679 { 1680 int act_sz = roundup_pow_of_two(num / 2); 1681 __be32 *list; 1682 void *rqtc; 1683 int inlen; 1684 void *in; 1685 int i, j; 1686 int err; 1687 1688 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); 1689 in = kzalloc(inlen, GFP_KERNEL); 1690 if (!in) 1691 return -ENOMEM; 1692 1693 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1694 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1695 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1696 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1697 1698 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1699 for (i = 0, j = 0; i < act_sz; i++, j = j + 2) 1700 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1701 1702 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1703 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1704 kfree(in); 1705 if (err) 1706 return err; 1707 1708 return 0; 1709 } 1710 1711 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1712 { 1713 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1714 } 1715 1716 static int create_tir(struct mlx5_vdpa_net *ndev) 1717 { 1718 #define HASH_IP_L4PORTS \ 1719 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1720 MLX5_HASH_FIELD_SEL_L4_DPORT) 1721 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1722 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1723 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1724 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1725 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1726 void *rss_key; 1727 void *outer; 1728 void *tirc; 1729 void *in; 1730 int err; 1731 1732 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1733 if (!in) 1734 return -ENOMEM; 1735 1736 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1737 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1738 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1739 1740 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1741 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1742 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1743 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1744 1745 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1746 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1747 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1748 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1749 1750 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1751 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1752 1753 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1754 kfree(in); 1755 if (err) 1756 return err; 1757 1758 mlx5_vdpa_add_tirn(ndev); 1759 return err; 1760 } 1761 1762 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1763 { 1764 mlx5_vdpa_remove_tirn(ndev); 1765 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1766 } 1767 1768 #define MAX_STEERING_ENT 0x8000 1769 #define MAX_STEERING_GROUPS 2 1770 1771 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1772 #define NUM_DESTS 2 1773 #else 1774 #define NUM_DESTS 1 1775 #endif 1776 1777 static int add_steering_counters(struct mlx5_vdpa_net *ndev, 1778 struct macvlan_node *node, 1779 struct mlx5_flow_act *flow_act, 1780 struct mlx5_flow_destination *dests) 1781 { 1782 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1783 int err; 1784 1785 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1786 if (IS_ERR(node->ucast_counter.counter)) 1787 return PTR_ERR(node->ucast_counter.counter); 1788 1789 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1790 if (IS_ERR(node->mcast_counter.counter)) { 1791 err = PTR_ERR(node->mcast_counter.counter); 1792 goto err_mcast_counter; 1793 } 1794 1795 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1796 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1797 return 0; 1798 1799 err_mcast_counter: 1800 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1801 return err; 1802 #else 1803 return 0; 1804 #endif 1805 } 1806 1807 static void remove_steering_counters(struct mlx5_vdpa_net *ndev, 1808 struct macvlan_node *node) 1809 { 1810 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1811 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); 1812 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1813 #endif 1814 } 1815 1816 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1817 struct macvlan_node *node) 1818 { 1819 struct mlx5_flow_destination dests[NUM_DESTS] = {}; 1820 struct mlx5_flow_act flow_act = {}; 1821 struct mlx5_flow_spec *spec; 1822 void *headers_c; 1823 void *headers_v; 1824 u8 *dmac_c; 1825 u8 *dmac_v; 1826 int err; 1827 u16 vid; 1828 1829 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1830 if (!spec) 1831 return -ENOMEM; 1832 1833 vid = key2vid(node->macvlan); 1834 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1835 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1836 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1837 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1838 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1839 eth_broadcast_addr(dmac_c); 1840 ether_addr_copy(dmac_v, mac); 1841 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { 1842 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1843 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1844 } 1845 if (node->tagged) { 1846 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1847 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); 1848 } 1849 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1850 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1851 dests[0].tir_num = ndev->res.tirn; 1852 err = add_steering_counters(ndev, node, &flow_act, dests); 1853 if (err) 1854 goto out_free; 1855 1856 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1857 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); 1858 #endif 1859 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1860 if (IS_ERR(node->ucast_rule)) { 1861 err = PTR_ERR(node->ucast_rule); 1862 goto err_ucast; 1863 } 1864 1865 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1866 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); 1867 #endif 1868 1869 memset(dmac_c, 0, ETH_ALEN); 1870 memset(dmac_v, 0, ETH_ALEN); 1871 dmac_c[0] = 1; 1872 dmac_v[0] = 1; 1873 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1874 if (IS_ERR(node->mcast_rule)) { 1875 err = PTR_ERR(node->mcast_rule); 1876 goto err_mcast; 1877 } 1878 kvfree(spec); 1879 mlx5_vdpa_add_rx_counters(ndev, node); 1880 return 0; 1881 1882 err_mcast: 1883 mlx5_del_flow_rules(node->ucast_rule); 1884 err_ucast: 1885 remove_steering_counters(ndev, node); 1886 out_free: 1887 kvfree(spec); 1888 return err; 1889 } 1890 1891 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1892 struct macvlan_node *node) 1893 { 1894 mlx5_vdpa_remove_rx_counters(ndev, node); 1895 mlx5_del_flow_rules(node->ucast_rule); 1896 mlx5_del_flow_rules(node->mcast_rule); 1897 } 1898 1899 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1900 { 1901 u64 val; 1902 1903 if (!tagged) 1904 vlan = MLX5V_UNTAGGED; 1905 1906 val = (u64)vlan << 48 | 1907 (u64)mac[0] << 40 | 1908 (u64)mac[1] << 32 | 1909 (u64)mac[2] << 24 | 1910 (u64)mac[3] << 16 | 1911 (u64)mac[4] << 8 | 1912 (u64)mac[5]; 1913 1914 return val; 1915 } 1916 1917 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1918 { 1919 struct macvlan_node *pos; 1920 u32 idx; 1921 1922 idx = hash_64(value, 8); // tbd 8 1923 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1924 if (pos->macvlan == value) 1925 return pos; 1926 } 1927 return NULL; 1928 } 1929 1930 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) 1931 { 1932 struct macvlan_node *ptr; 1933 u64 val; 1934 u32 idx; 1935 int err; 1936 1937 val = search_val(mac, vid, tagged); 1938 if (mac_vlan_lookup(ndev, val)) 1939 return -EEXIST; 1940 1941 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1942 if (!ptr) 1943 return -ENOMEM; 1944 1945 ptr->tagged = tagged; 1946 ptr->macvlan = val; 1947 ptr->ndev = ndev; 1948 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); 1949 if (err) 1950 goto err_add; 1951 1952 idx = hash_64(val, 8); 1953 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1954 return 0; 1955 1956 err_add: 1957 kfree(ptr); 1958 return err; 1959 } 1960 1961 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1962 { 1963 struct macvlan_node *ptr; 1964 1965 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1966 if (!ptr) 1967 return; 1968 1969 hlist_del(&ptr->hlist); 1970 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); 1971 remove_steering_counters(ndev, ptr); 1972 kfree(ptr); 1973 } 1974 1975 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1976 { 1977 struct macvlan_node *pos; 1978 struct hlist_node *n; 1979 int i; 1980 1981 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1982 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1983 hlist_del(&pos->hlist); 1984 mlx5_vdpa_del_mac_vlan_rules(ndev, pos); 1985 remove_steering_counters(ndev, pos); 1986 kfree(pos); 1987 } 1988 } 1989 } 1990 1991 static int setup_steering(struct mlx5_vdpa_net *ndev) 1992 { 1993 struct mlx5_flow_table_attr ft_attr = {}; 1994 struct mlx5_flow_namespace *ns; 1995 int err; 1996 1997 ft_attr.max_fte = MAX_STEERING_ENT; 1998 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 1999 2000 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 2001 if (!ns) { 2002 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 2003 return -EOPNOTSUPP; 2004 } 2005 2006 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 2007 if (IS_ERR(ndev->rxft)) { 2008 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 2009 return PTR_ERR(ndev->rxft); 2010 } 2011 mlx5_vdpa_add_rx_flow_table(ndev); 2012 2013 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 2014 if (err) 2015 goto err_add; 2016 2017 return 0; 2018 2019 err_add: 2020 mlx5_vdpa_remove_rx_flow_table(ndev); 2021 mlx5_destroy_flow_table(ndev->rxft); 2022 return err; 2023 } 2024 2025 static void teardown_steering(struct mlx5_vdpa_net *ndev) 2026 { 2027 clear_mac_vlan_table(ndev); 2028 mlx5_vdpa_remove_rx_flow_table(ndev); 2029 mlx5_destroy_flow_table(ndev->rxft); 2030 } 2031 2032 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2033 { 2034 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2035 struct mlx5_control_vq *cvq = &mvdev->cvq; 2036 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2037 struct mlx5_core_dev *pfmdev; 2038 size_t read; 2039 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 2040 2041 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 2042 switch (cmd) { 2043 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 2044 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 2045 if (read != ETH_ALEN) 2046 break; 2047 2048 if (!memcmp(ndev->config.mac, mac, 6)) { 2049 status = VIRTIO_NET_OK; 2050 break; 2051 } 2052 2053 if (is_zero_ether_addr(mac)) 2054 break; 2055 2056 if (!is_zero_ether_addr(ndev->config.mac)) { 2057 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 2058 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 2059 ndev->config.mac); 2060 break; 2061 } 2062 } 2063 2064 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 2065 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 2066 mac); 2067 break; 2068 } 2069 2070 /* backup the original mac address so that if failed to add the forward rules 2071 * we could restore it 2072 */ 2073 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 2074 2075 memcpy(ndev->config.mac, mac, ETH_ALEN); 2076 2077 /* Need recreate the flow table entry, so that the packet could forward back 2078 */ 2079 mac_vlan_del(ndev, mac_back, 0, false); 2080 2081 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 2082 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 2083 2084 /* Although it hardly run here, we still need double check */ 2085 if (is_zero_ether_addr(mac_back)) { 2086 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 2087 break; 2088 } 2089 2090 /* Try to restore original mac address to MFPS table, and try to restore 2091 * the forward rule entry. 2092 */ 2093 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 2094 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 2095 ndev->config.mac); 2096 } 2097 2098 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 2099 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 2100 mac_back); 2101 } 2102 2103 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 2104 2105 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 2106 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 2107 2108 break; 2109 } 2110 2111 status = VIRTIO_NET_OK; 2112 break; 2113 2114 default: 2115 break; 2116 } 2117 2118 return status; 2119 } 2120 2121 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 2122 { 2123 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2124 int cur_qps = ndev->cur_num_vqs / 2; 2125 int err; 2126 int i; 2127 2128 if (cur_qps > newqps) { 2129 err = modify_rqt(ndev, 2 * newqps); 2130 if (err) 2131 return err; 2132 2133 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) { 2134 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; 2135 2136 if (is_resumable(ndev)) 2137 suspend_vq(ndev, mvq); 2138 else 2139 teardown_vq(ndev, mvq); 2140 } 2141 2142 ndev->cur_num_vqs = 2 * newqps; 2143 } else { 2144 ndev->cur_num_vqs = 2 * newqps; 2145 for (i = cur_qps * 2; i < 2 * newqps; i++) { 2146 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; 2147 2148 err = mvq->initialized ? resume_vq(ndev, mvq) : setup_vq(ndev, mvq, true); 2149 if (err) 2150 goto clean_added; 2151 } 2152 err = modify_rqt(ndev, 2 * newqps); 2153 if (err) 2154 goto clean_added; 2155 } 2156 return 0; 2157 2158 clean_added: 2159 for (--i; i >= 2 * cur_qps; --i) 2160 teardown_vq(ndev, &ndev->vqs[i]); 2161 2162 ndev->cur_num_vqs = 2 * cur_qps; 2163 2164 return err; 2165 } 2166 2167 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2168 { 2169 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2170 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2171 struct mlx5_control_vq *cvq = &mvdev->cvq; 2172 struct virtio_net_ctrl_mq mq; 2173 size_t read; 2174 u16 newqps; 2175 2176 switch (cmd) { 2177 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 2178 /* This mq feature check aligns with pre-existing userspace 2179 * implementation. 2180 * 2181 * Without it, an untrusted driver could fake a multiqueue config 2182 * request down to a non-mq device that may cause kernel to 2183 * panic due to uninitialized resources for extra vqs. Even with 2184 * a well behaving guest driver, it is not expected to allow 2185 * changing the number of vqs on a non-mq device. 2186 */ 2187 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 2188 break; 2189 2190 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 2191 if (read != sizeof(mq)) 2192 break; 2193 2194 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 2195 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 2196 newqps > ndev->rqt_size) 2197 break; 2198 2199 if (ndev->cur_num_vqs == 2 * newqps) { 2200 status = VIRTIO_NET_OK; 2201 break; 2202 } 2203 2204 if (!change_num_qps(mvdev, newqps)) 2205 status = VIRTIO_NET_OK; 2206 2207 break; 2208 default: 2209 break; 2210 } 2211 2212 return status; 2213 } 2214 2215 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2216 { 2217 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2218 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2219 struct mlx5_control_vq *cvq = &mvdev->cvq; 2220 __virtio16 vlan; 2221 size_t read; 2222 u16 id; 2223 2224 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) 2225 return status; 2226 2227 switch (cmd) { 2228 case VIRTIO_NET_CTRL_VLAN_ADD: 2229 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 2230 if (read != sizeof(vlan)) 2231 break; 2232 2233 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2234 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 2235 break; 2236 2237 status = VIRTIO_NET_OK; 2238 break; 2239 case VIRTIO_NET_CTRL_VLAN_DEL: 2240 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 2241 if (read != sizeof(vlan)) 2242 break; 2243 2244 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2245 mac_vlan_del(ndev, ndev->config.mac, id, true); 2246 status = VIRTIO_NET_OK; 2247 break; 2248 default: 2249 break; 2250 } 2251 2252 return status; 2253 } 2254 2255 static void mlx5_cvq_kick_handler(struct work_struct *work) 2256 { 2257 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2258 struct virtio_net_ctrl_hdr ctrl; 2259 struct mlx5_vdpa_wq_ent *wqent; 2260 struct mlx5_vdpa_dev *mvdev; 2261 struct mlx5_control_vq *cvq; 2262 struct mlx5_vdpa_net *ndev; 2263 size_t read, write; 2264 int err; 2265 2266 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2267 mvdev = wqent->mvdev; 2268 ndev = to_mlx5_vdpa_ndev(mvdev); 2269 cvq = &mvdev->cvq; 2270 2271 down_write(&ndev->reslock); 2272 2273 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2274 goto out; 2275 2276 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 2277 goto out; 2278 2279 if (!cvq->ready) 2280 goto out; 2281 2282 while (true) { 2283 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 2284 GFP_ATOMIC); 2285 if (err <= 0) 2286 break; 2287 2288 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 2289 if (read != sizeof(ctrl)) 2290 break; 2291 2292 cvq->received_desc++; 2293 switch (ctrl.class) { 2294 case VIRTIO_NET_CTRL_MAC: 2295 status = handle_ctrl_mac(mvdev, ctrl.cmd); 2296 break; 2297 case VIRTIO_NET_CTRL_MQ: 2298 status = handle_ctrl_mq(mvdev, ctrl.cmd); 2299 break; 2300 case VIRTIO_NET_CTRL_VLAN: 2301 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 2302 break; 2303 default: 2304 break; 2305 } 2306 2307 /* Make sure data is written before advancing index */ 2308 smp_wmb(); 2309 2310 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 2311 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 2312 vringh_kiov_cleanup(&cvq->riov); 2313 vringh_kiov_cleanup(&cvq->wiov); 2314 2315 if (vringh_need_notify_iotlb(&cvq->vring)) 2316 vringh_notify(&cvq->vring); 2317 2318 cvq->completed_desc++; 2319 queue_work(mvdev->wq, &wqent->work); 2320 break; 2321 } 2322 2323 out: 2324 up_write(&ndev->reslock); 2325 } 2326 2327 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 2328 { 2329 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2330 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2331 struct mlx5_vdpa_virtqueue *mvq; 2332 2333 if (!is_index_valid(mvdev, idx)) 2334 return; 2335 2336 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 2337 if (!mvdev->wq || !mvdev->cvq.ready) 2338 return; 2339 2340 queue_work(mvdev->wq, &ndev->cvq_ent.work); 2341 return; 2342 } 2343 2344 mvq = &ndev->vqs[idx]; 2345 if (unlikely(!mvq->ready)) 2346 return; 2347 2348 iowrite16(idx, ndev->mvdev.res.kick_addr); 2349 } 2350 2351 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 2352 u64 driver_area, u64 device_area) 2353 { 2354 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2355 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2356 struct mlx5_vdpa_virtqueue *mvq; 2357 2358 if (!is_index_valid(mvdev, idx)) 2359 return -EINVAL; 2360 2361 if (is_ctrl_vq_idx(mvdev, idx)) { 2362 mvdev->cvq.desc_addr = desc_area; 2363 mvdev->cvq.device_addr = device_area; 2364 mvdev->cvq.driver_addr = driver_area; 2365 return 0; 2366 } 2367 2368 mvq = &ndev->vqs[idx]; 2369 mvq->desc_addr = desc_area; 2370 mvq->device_addr = device_area; 2371 mvq->driver_addr = driver_area; 2372 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS; 2373 return 0; 2374 } 2375 2376 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 2377 { 2378 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2379 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2380 struct mlx5_vdpa_virtqueue *mvq; 2381 2382 if (!is_index_valid(mvdev, idx)) 2383 return; 2384 2385 if (is_ctrl_vq_idx(mvdev, idx)) { 2386 struct mlx5_control_vq *cvq = &mvdev->cvq; 2387 2388 cvq->vring.vring.num = num; 2389 return; 2390 } 2391 2392 mvq = &ndev->vqs[idx]; 2393 mvq->num_ent = num; 2394 } 2395 2396 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 2397 { 2398 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2399 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2400 2401 ndev->event_cbs[idx] = *cb; 2402 if (is_ctrl_vq_idx(mvdev, idx)) 2403 mvdev->cvq.event_cb = *cb; 2404 } 2405 2406 static void mlx5_cvq_notify(struct vringh *vring) 2407 { 2408 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 2409 2410 if (!cvq->event_cb.callback) 2411 return; 2412 2413 cvq->event_cb.callback(cvq->event_cb.private); 2414 } 2415 2416 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 2417 { 2418 struct mlx5_control_vq *cvq = &mvdev->cvq; 2419 2420 cvq->ready = ready; 2421 if (!ready) 2422 return; 2423 2424 cvq->vring.notify = mlx5_cvq_notify; 2425 } 2426 2427 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 2428 { 2429 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2430 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2431 struct mlx5_vdpa_virtqueue *mvq; 2432 2433 if (!mvdev->actual_features) 2434 return; 2435 2436 if (!is_index_valid(mvdev, idx)) 2437 return; 2438 2439 if (is_ctrl_vq_idx(mvdev, idx)) { 2440 set_cvq_ready(mvdev, ready); 2441 return; 2442 } 2443 2444 mvq = &ndev->vqs[idx]; 2445 if (!ready) { 2446 suspend_vq(ndev, mvq); 2447 } else { 2448 if (resume_vq(ndev, mvq)) 2449 ready = false; 2450 } 2451 2452 mvq->ready = ready; 2453 } 2454 2455 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2456 { 2457 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2458 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2459 2460 if (!is_index_valid(mvdev, idx)) 2461 return false; 2462 2463 if (is_ctrl_vq_idx(mvdev, idx)) 2464 return mvdev->cvq.ready; 2465 2466 return ndev->vqs[idx].ready; 2467 } 2468 2469 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2470 const struct vdpa_vq_state *state) 2471 { 2472 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2473 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2474 struct mlx5_vdpa_virtqueue *mvq; 2475 2476 if (!is_index_valid(mvdev, idx)) 2477 return -EINVAL; 2478 2479 if (is_ctrl_vq_idx(mvdev, idx)) { 2480 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2481 return 0; 2482 } 2483 2484 mvq = &ndev->vqs[idx]; 2485 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2486 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2487 return -EINVAL; 2488 } 2489 2490 mvq->used_idx = state->split.avail_index; 2491 mvq->avail_idx = state->split.avail_index; 2492 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | 2493 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX; 2494 return 0; 2495 } 2496 2497 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2498 { 2499 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2500 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2501 struct mlx5_vdpa_virtqueue *mvq; 2502 struct mlx5_virtq_attr attr; 2503 int err; 2504 2505 if (!is_index_valid(mvdev, idx)) 2506 return -EINVAL; 2507 2508 if (is_ctrl_vq_idx(mvdev, idx)) { 2509 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2510 return 0; 2511 } 2512 2513 mvq = &ndev->vqs[idx]; 2514 /* If the virtq object was destroyed, use the value saved at 2515 * the last minute of suspend_vq. This caters for userspace 2516 * that cares about emulating the index after vq is stopped. 2517 */ 2518 if (!mvq->initialized) { 2519 /* Firmware returns a wrong value for the available index. 2520 * Since both values should be identical, we take the value of 2521 * used_idx which is reported correctly. 2522 */ 2523 state->split.avail_index = mvq->used_idx; 2524 return 0; 2525 } 2526 2527 err = query_virtqueue(ndev, mvq, &attr); 2528 if (err) { 2529 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); 2530 return err; 2531 } 2532 state->split.avail_index = attr.used_index; 2533 return 0; 2534 } 2535 2536 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2537 { 2538 return PAGE_SIZE; 2539 } 2540 2541 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) 2542 { 2543 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2544 2545 if (is_ctrl_vq_idx(mvdev, idx)) 2546 return MLX5_VDPA_CVQ_GROUP; 2547 2548 return MLX5_VDPA_DATAVQ_GROUP; 2549 } 2550 2551 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx) 2552 { 2553 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2554 2555 if (is_ctrl_vq_idx(mvdev, idx)) 2556 return MLX5_VDPA_CVQ_GROUP; 2557 2558 return MLX5_VDPA_DATAVQ_DESC_GROUP; 2559 } 2560 2561 static u64 mlx_to_vritio_features(u16 dev_features) 2562 { 2563 u64 result = 0; 2564 2565 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2566 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2567 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2568 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2569 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2570 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2571 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2572 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2573 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2574 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2575 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2576 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2577 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2578 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2579 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2580 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2581 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2582 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2583 2584 return result; 2585 } 2586 2587 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2588 { 2589 u64 mlx_vdpa_features = 0; 2590 u16 dev_features; 2591 2592 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2593 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2594 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2595 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2596 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2597 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2598 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2599 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2600 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2601 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2602 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2603 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); 2604 2605 return mlx_vdpa_features; 2606 } 2607 2608 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2609 { 2610 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2611 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2612 2613 print_features(mvdev, ndev->mvdev.mlx_features, false); 2614 return ndev->mvdev.mlx_features; 2615 } 2616 2617 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2618 { 2619 /* Minimum features to expect */ 2620 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2621 return -EOPNOTSUPP; 2622 2623 /* Double check features combination sent down by the driver. 2624 * Fail invalid features due to absence of the depended feature. 2625 * 2626 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2627 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2628 * By failing the invalid features sent down by untrusted drivers, 2629 * we're assured the assumption made upon is_index_valid() and 2630 * is_ctrl_vq_idx() will not be compromised. 2631 */ 2632 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2633 BIT_ULL(VIRTIO_NET_F_MQ)) 2634 return -EINVAL; 2635 2636 return 0; 2637 } 2638 2639 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled) 2640 { 2641 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2642 int err; 2643 int i; 2644 2645 for (i = 0; i < mvdev->max_vqs; i++) { 2646 err = setup_vq(ndev, &ndev->vqs[i], filled); 2647 if (err) 2648 goto err_vq; 2649 } 2650 2651 return 0; 2652 2653 err_vq: 2654 for (--i; i >= 0; i--) 2655 teardown_vq(ndev, &ndev->vqs[i]); 2656 2657 return err; 2658 } 2659 2660 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2661 { 2662 int i; 2663 2664 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) 2665 teardown_vq(ndev, &ndev->vqs[i]); 2666 } 2667 2668 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2669 { 2670 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2671 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2672 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2673 mvdev->max_idx = mvdev->max_vqs; 2674 } else { 2675 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2676 * CVQ gets index 2 2677 */ 2678 mvdev->max_idx = 2; 2679 } 2680 } else { 2681 /* Two data virtqueues only: one for rx and one for tx */ 2682 mvdev->max_idx = 1; 2683 } 2684 } 2685 2686 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2687 { 2688 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2689 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2690 int err; 2691 2692 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2693 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2694 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2695 if (vport) 2696 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2697 2698 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2699 if (err) 2700 return 0; 2701 2702 return MLX5_GET(query_vport_state_out, out, state); 2703 } 2704 2705 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2706 { 2707 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2708 VPORT_STATE_UP) 2709 return true; 2710 2711 return false; 2712 } 2713 2714 static void update_carrier(struct work_struct *work) 2715 { 2716 struct mlx5_vdpa_wq_ent *wqent; 2717 struct mlx5_vdpa_dev *mvdev; 2718 struct mlx5_vdpa_net *ndev; 2719 2720 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2721 mvdev = wqent->mvdev; 2722 ndev = to_mlx5_vdpa_ndev(mvdev); 2723 if (get_link_state(mvdev)) 2724 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2725 else 2726 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2727 2728 if (ndev->config_cb.callback) 2729 ndev->config_cb.callback(ndev->config_cb.private); 2730 2731 kfree(wqent); 2732 } 2733 2734 static int queue_link_work(struct mlx5_vdpa_net *ndev) 2735 { 2736 struct mlx5_vdpa_wq_ent *wqent; 2737 2738 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2739 if (!wqent) 2740 return -ENOMEM; 2741 2742 wqent->mvdev = &ndev->mvdev; 2743 INIT_WORK(&wqent->work, update_carrier); 2744 queue_work(ndev->mvdev.wq, &wqent->work); 2745 return 0; 2746 } 2747 2748 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2749 { 2750 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2751 struct mlx5_eqe *eqe = param; 2752 int ret = NOTIFY_DONE; 2753 2754 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2755 switch (eqe->sub_type) { 2756 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2757 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2758 if (queue_link_work(ndev)) 2759 return NOTIFY_DONE; 2760 2761 ret = NOTIFY_OK; 2762 break; 2763 default: 2764 return NOTIFY_DONE; 2765 } 2766 return ret; 2767 } 2768 return ret; 2769 } 2770 2771 static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2772 { 2773 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2774 return; 2775 2776 ndev->nb.notifier_call = event_handler; 2777 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2778 ndev->nb_registered = true; 2779 queue_link_work(ndev); 2780 } 2781 2782 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2783 { 2784 if (!ndev->nb_registered) 2785 return; 2786 2787 ndev->nb_registered = false; 2788 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2789 if (ndev->mvdev.wq) 2790 flush_workqueue(ndev->mvdev.wq); 2791 } 2792 2793 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa) 2794 { 2795 return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); 2796 } 2797 2798 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2799 { 2800 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2801 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2802 u64 old_features = mvdev->actual_features; 2803 int err; 2804 2805 print_features(mvdev, features, true); 2806 2807 err = verify_driver_features(mvdev, features); 2808 if (err) 2809 return err; 2810 2811 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2812 2813 /* Interested in changes of vq features only. */ 2814 if (get_features(old_features) != get_features(mvdev->actual_features)) { 2815 for (int i = 0; i < mvdev->max_vqs; ++i) { 2816 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; 2817 2818 mvq->modified_fields |= ( 2819 MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION | 2820 MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES 2821 ); 2822 } 2823 } 2824 2825 update_cvq_info(mvdev); 2826 return err; 2827 } 2828 2829 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2830 { 2831 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2832 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2833 2834 ndev->config_cb = *cb; 2835 } 2836 2837 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2838 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2839 { 2840 return MLX5_VDPA_MAX_VQ_ENTRIES; 2841 } 2842 2843 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2844 { 2845 return VIRTIO_ID_NET; 2846 } 2847 2848 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2849 { 2850 return PCI_VENDOR_ID_MELLANOX; 2851 } 2852 2853 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2854 { 2855 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2856 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2857 2858 print_status(mvdev, ndev->mvdev.status, false); 2859 return ndev->mvdev.status; 2860 } 2861 2862 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2863 { 2864 struct mlx5_vq_restore_info *ri = &mvq->ri; 2865 struct mlx5_virtq_attr attr = {}; 2866 int err; 2867 2868 if (mvq->initialized) { 2869 err = query_virtqueue(ndev, mvq, &attr); 2870 if (err) 2871 return err; 2872 } 2873 2874 ri->avail_index = attr.available_index; 2875 ri->used_index = attr.used_index; 2876 ri->ready = mvq->ready; 2877 ri->num_ent = mvq->num_ent; 2878 ri->desc_addr = mvq->desc_addr; 2879 ri->device_addr = mvq->device_addr; 2880 ri->driver_addr = mvq->driver_addr; 2881 ri->map = mvq->map; 2882 ri->restore = true; 2883 return 0; 2884 } 2885 2886 static int save_channels_info(struct mlx5_vdpa_net *ndev) 2887 { 2888 int i; 2889 2890 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2891 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 2892 save_channel_info(ndev, &ndev->vqs[i]); 2893 } 2894 return 0; 2895 } 2896 2897 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 2898 { 2899 int i; 2900 2901 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2902 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2903 } 2904 2905 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 2906 { 2907 struct mlx5_vdpa_virtqueue *mvq; 2908 struct mlx5_vq_restore_info *ri; 2909 int i; 2910 2911 mlx5_clear_vqs(ndev); 2912 mvqs_set_defaults(ndev); 2913 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2914 mvq = &ndev->vqs[i]; 2915 ri = &mvq->ri; 2916 if (!ri->restore) 2917 continue; 2918 2919 mvq->avail_idx = ri->avail_index; 2920 mvq->used_idx = ri->used_index; 2921 mvq->ready = ri->ready; 2922 mvq->num_ent = ri->num_ent; 2923 mvq->desc_addr = ri->desc_addr; 2924 mvq->device_addr = ri->device_addr; 2925 mvq->driver_addr = ri->driver_addr; 2926 mvq->map = ri->map; 2927 } 2928 } 2929 2930 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, 2931 struct mlx5_vdpa_mr *new_mr, 2932 unsigned int asid) 2933 { 2934 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2935 bool teardown = !is_resumable(ndev); 2936 int err; 2937 2938 suspend_vqs(ndev); 2939 if (teardown) { 2940 err = save_channels_info(ndev); 2941 if (err) 2942 return err; 2943 2944 teardown_vq_resources(ndev); 2945 } 2946 2947 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 2948 2949 for (int i = 0; i < mvdev->max_vqs; i++) 2950 ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY | 2951 MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 2952 2953 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 2954 return 0; 2955 2956 if (teardown) { 2957 restore_channels_info(ndev); 2958 err = setup_vq_resources(ndev, true); 2959 if (err) 2960 return err; 2961 } 2962 2963 resume_vqs(ndev); 2964 2965 return 0; 2966 } 2967 2968 /* reslock must be held for this function */ 2969 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled) 2970 { 2971 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 2972 int err; 2973 2974 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2975 2976 if (ndev->setup) { 2977 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2978 err = 0; 2979 goto out; 2980 } 2981 mlx5_vdpa_add_debugfs(ndev); 2982 2983 err = read_umem_params(ndev); 2984 if (err) 2985 goto err_setup; 2986 2987 err = setup_virtqueues(mvdev, filled); 2988 if (err) { 2989 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 2990 goto err_setup; 2991 } 2992 2993 err = create_rqt(ndev); 2994 if (err) { 2995 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 2996 goto err_rqt; 2997 } 2998 2999 err = create_tir(ndev); 3000 if (err) { 3001 mlx5_vdpa_warn(mvdev, "create_tir\n"); 3002 goto err_tir; 3003 } 3004 3005 err = setup_steering(ndev); 3006 if (err) { 3007 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 3008 goto err_fwd; 3009 } 3010 ndev->setup = true; 3011 3012 return 0; 3013 3014 err_fwd: 3015 destroy_tir(ndev); 3016 err_tir: 3017 destroy_rqt(ndev); 3018 err_rqt: 3019 teardown_virtqueues(ndev); 3020 err_setup: 3021 mlx5_vdpa_remove_debugfs(ndev); 3022 out: 3023 return err; 3024 } 3025 3026 /* reslock must be held for this function */ 3027 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev) 3028 { 3029 3030 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 3031 3032 if (!ndev->setup) 3033 return; 3034 3035 mlx5_vdpa_remove_debugfs(ndev); 3036 teardown_steering(ndev); 3037 destroy_tir(ndev); 3038 destroy_rqt(ndev); 3039 teardown_virtqueues(ndev); 3040 ndev->setup = false; 3041 } 3042 3043 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) 3044 { 3045 struct mlx5_control_vq *cvq = &mvdev->cvq; 3046 int err = 0; 3047 3048 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { 3049 u16 idx = cvq->vring.last_avail_idx; 3050 3051 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 3052 cvq->vring.vring.num, false, 3053 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 3054 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 3055 (struct vring_used *)(uintptr_t)cvq->device_addr); 3056 3057 if (!err) 3058 cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; 3059 } 3060 return err; 3061 } 3062 3063 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 3064 { 3065 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3066 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3067 int err; 3068 3069 print_status(mvdev, status, true); 3070 3071 down_write(&ndev->reslock); 3072 3073 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 3074 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 3075 err = setup_cvq_vring(mvdev); 3076 if (err) { 3077 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 3078 goto err_setup; 3079 } 3080 register_link_notifier(ndev); 3081 err = setup_vq_resources(ndev, true); 3082 if (err) { 3083 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 3084 goto err_driver; 3085 } 3086 } else { 3087 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 3088 goto err_clear; 3089 } 3090 } 3091 3092 ndev->mvdev.status = status; 3093 up_write(&ndev->reslock); 3094 return; 3095 3096 err_driver: 3097 unregister_link_notifier(ndev); 3098 err_setup: 3099 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); 3100 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 3101 err_clear: 3102 up_write(&ndev->reslock); 3103 } 3104 3105 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) 3106 { 3107 int i; 3108 3109 /* default mapping all groups are mapped to asid 0 */ 3110 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) 3111 mvdev->group2asid[i] = 0; 3112 } 3113 3114 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) 3115 { 3116 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3117 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3118 3119 print_status(mvdev, 0, true); 3120 mlx5_vdpa_info(mvdev, "performing device reset\n"); 3121 3122 down_write(&ndev->reslock); 3123 unregister_link_notifier(ndev); 3124 teardown_vq_resources(ndev); 3125 mvqs_set_defaults(ndev); 3126 3127 if (flags & VDPA_RESET_F_CLEAN_MAP) 3128 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); 3129 ndev->mvdev.status = 0; 3130 ndev->mvdev.suspended = false; 3131 ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; 3132 ndev->mvdev.cvq.ready = false; 3133 ndev->mvdev.cvq.received_desc = 0; 3134 ndev->mvdev.cvq.completed_desc = 0; 3135 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 3136 ndev->mvdev.actual_features = 0; 3137 init_group_to_asid_map(mvdev); 3138 ++mvdev->generation; 3139 3140 if ((flags & VDPA_RESET_F_CLEAN_MAP) && 3141 MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3142 if (mlx5_vdpa_create_dma_mr(mvdev)) 3143 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 3144 } 3145 up_write(&ndev->reslock); 3146 3147 return 0; 3148 } 3149 3150 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 3151 { 3152 return mlx5_vdpa_compat_reset(vdev, 0); 3153 } 3154 3155 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 3156 { 3157 return sizeof(struct virtio_net_config); 3158 } 3159 3160 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 3161 unsigned int len) 3162 { 3163 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3164 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3165 3166 if (offset + len <= sizeof(struct virtio_net_config)) 3167 memcpy(buf, (u8 *)&ndev->config + offset, len); 3168 } 3169 3170 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 3171 unsigned int len) 3172 { 3173 /* not supported */ 3174 } 3175 3176 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 3177 { 3178 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3179 3180 return mvdev->generation; 3181 } 3182 3183 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 3184 unsigned int asid) 3185 { 3186 struct mlx5_vdpa_mr *new_mr; 3187 int err; 3188 3189 if (asid >= MLX5_VDPA_NUM_AS) 3190 return -EINVAL; 3191 3192 if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) { 3193 new_mr = mlx5_vdpa_create_mr(mvdev, iotlb); 3194 if (IS_ERR(new_mr)) { 3195 err = PTR_ERR(new_mr); 3196 mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err); 3197 return err; 3198 } 3199 } else { 3200 /* Empty iotlbs don't have an mr but will clear the previous mr. */ 3201 new_mr = NULL; 3202 } 3203 3204 if (!mvdev->mr[asid]) { 3205 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 3206 } else { 3207 err = mlx5_vdpa_change_map(mvdev, new_mr, asid); 3208 if (err) { 3209 mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err); 3210 goto out_err; 3211 } 3212 } 3213 3214 return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); 3215 3216 out_err: 3217 mlx5_vdpa_put_mr(mvdev, new_mr); 3218 return err; 3219 } 3220 3221 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 3222 struct vhost_iotlb *iotlb) 3223 { 3224 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3225 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3226 int err = -EINVAL; 3227 3228 down_write(&ndev->reslock); 3229 err = set_map_data(mvdev, iotlb, asid); 3230 up_write(&ndev->reslock); 3231 return err; 3232 } 3233 3234 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) 3235 { 3236 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3237 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3238 int err; 3239 3240 down_write(&ndev->reslock); 3241 err = mlx5_vdpa_reset_mr(mvdev, asid); 3242 up_write(&ndev->reslock); 3243 return err; 3244 } 3245 3246 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) 3247 { 3248 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3249 3250 if (is_ctrl_vq_idx(mvdev, idx)) 3251 return &vdev->dev; 3252 3253 return mvdev->vdev.dma_dev; 3254 } 3255 3256 static void free_irqs(struct mlx5_vdpa_net *ndev) 3257 { 3258 struct mlx5_vdpa_irq_pool_entry *ent; 3259 int i; 3260 3261 if (!msix_mode_supported(&ndev->mvdev)) 3262 return; 3263 3264 if (!ndev->irqp.entries) 3265 return; 3266 3267 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { 3268 ent = ndev->irqp.entries + i; 3269 if (ent->map.virq) 3270 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); 3271 } 3272 kfree(ndev->irqp.entries); 3273 } 3274 3275 static void mlx5_vdpa_free(struct vdpa_device *vdev) 3276 { 3277 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3278 struct mlx5_core_dev *pfmdev; 3279 struct mlx5_vdpa_net *ndev; 3280 3281 ndev = to_mlx5_vdpa_ndev(mvdev); 3282 3283 free_fixed_resources(ndev); 3284 mlx5_vdpa_destroy_mr_resources(mvdev); 3285 if (!is_zero_ether_addr(ndev->config.mac)) { 3286 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 3287 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 3288 } 3289 mlx5_vdpa_free_resources(&ndev->mvdev); 3290 free_irqs(ndev); 3291 kfree(ndev->event_cbs); 3292 kfree(ndev->vqs); 3293 } 3294 3295 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 3296 { 3297 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3298 struct vdpa_notification_area ret = {}; 3299 struct mlx5_vdpa_net *ndev; 3300 phys_addr_t addr; 3301 3302 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 3303 return ret; 3304 3305 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 3306 * notification to avoid the risk of mapping pages that contain BAR of more 3307 * than one SF 3308 */ 3309 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 3310 return ret; 3311 3312 ndev = to_mlx5_vdpa_ndev(mvdev); 3313 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 3314 ret.addr = addr; 3315 ret.size = PAGE_SIZE; 3316 return ret; 3317 } 3318 3319 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) 3320 { 3321 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3322 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3323 struct mlx5_vdpa_virtqueue *mvq; 3324 3325 if (!is_index_valid(mvdev, idx)) 3326 return -EINVAL; 3327 3328 if (is_ctrl_vq_idx(mvdev, idx)) 3329 return -EOPNOTSUPP; 3330 3331 mvq = &ndev->vqs[idx]; 3332 if (!mvq->map.virq) 3333 return -EOPNOTSUPP; 3334 3335 return mvq->map.virq; 3336 } 3337 3338 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 3339 { 3340 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3341 3342 return mvdev->actual_features; 3343 } 3344 3345 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 3346 u64 *received_desc, u64 *completed_desc) 3347 { 3348 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 3349 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 3350 void *cmd_hdr; 3351 void *ctx; 3352 int err; 3353 3354 if (!counters_supported(&ndev->mvdev)) 3355 return -EOPNOTSUPP; 3356 3357 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 3358 return -EAGAIN; 3359 3360 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 3361 3362 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 3363 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 3364 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 3365 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 3366 3367 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 3368 if (err) 3369 return err; 3370 3371 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 3372 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 3373 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 3374 return 0; 3375 } 3376 3377 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 3378 struct sk_buff *msg, 3379 struct netlink_ext_ack *extack) 3380 { 3381 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3382 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3383 struct mlx5_vdpa_virtqueue *mvq; 3384 struct mlx5_control_vq *cvq; 3385 u64 received_desc; 3386 u64 completed_desc; 3387 int err = 0; 3388 3389 down_read(&ndev->reslock); 3390 if (!is_index_valid(mvdev, idx)) { 3391 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 3392 err = -EINVAL; 3393 goto out_err; 3394 } 3395 3396 if (idx == ctrl_vq_idx(mvdev)) { 3397 cvq = &mvdev->cvq; 3398 received_desc = cvq->received_desc; 3399 completed_desc = cvq->completed_desc; 3400 goto out; 3401 } 3402 3403 mvq = &ndev->vqs[idx]; 3404 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 3405 if (err) { 3406 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 3407 goto out_err; 3408 } 3409 3410 out: 3411 err = -EMSGSIZE; 3412 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 3413 goto out_err; 3414 3415 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 3416 VDPA_ATTR_PAD)) 3417 goto out_err; 3418 3419 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 3420 goto out_err; 3421 3422 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 3423 VDPA_ATTR_PAD)) 3424 goto out_err; 3425 3426 err = 0; 3427 out_err: 3428 up_read(&ndev->reslock); 3429 return err; 3430 } 3431 3432 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) 3433 { 3434 struct mlx5_control_vq *cvq; 3435 3436 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 3437 return; 3438 3439 cvq = &mvdev->cvq; 3440 cvq->ready = false; 3441 } 3442 3443 static int mlx5_vdpa_suspend(struct vdpa_device *vdev) 3444 { 3445 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3446 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3447 int err; 3448 3449 mlx5_vdpa_info(mvdev, "suspending device\n"); 3450 3451 down_write(&ndev->reslock); 3452 unregister_link_notifier(ndev); 3453 err = suspend_vqs(ndev); 3454 mlx5_vdpa_cvq_suspend(mvdev); 3455 mvdev->suspended = true; 3456 up_write(&ndev->reslock); 3457 3458 return err; 3459 } 3460 3461 static int mlx5_vdpa_resume(struct vdpa_device *vdev) 3462 { 3463 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3464 struct mlx5_vdpa_net *ndev; 3465 int err; 3466 3467 ndev = to_mlx5_vdpa_ndev(mvdev); 3468 3469 mlx5_vdpa_info(mvdev, "resuming device\n"); 3470 3471 down_write(&ndev->reslock); 3472 mvdev->suspended = false; 3473 err = resume_vqs(ndev); 3474 register_link_notifier(ndev); 3475 up_write(&ndev->reslock); 3476 3477 return err; 3478 } 3479 3480 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3481 unsigned int asid) 3482 { 3483 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3484 int err = 0; 3485 3486 if (group >= MLX5_VDPA_NUMVQ_GROUPS) 3487 return -EINVAL; 3488 3489 mvdev->group2asid[group] = asid; 3490 3491 mutex_lock(&mvdev->mr_mtx); 3492 if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid]) 3493 err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid); 3494 mutex_unlock(&mvdev->mr_mtx); 3495 3496 return err; 3497 } 3498 3499 static const struct vdpa_config_ops mlx5_vdpa_ops = { 3500 .set_vq_address = mlx5_vdpa_set_vq_address, 3501 .set_vq_num = mlx5_vdpa_set_vq_num, 3502 .kick_vq = mlx5_vdpa_kick_vq, 3503 .set_vq_cb = mlx5_vdpa_set_vq_cb, 3504 .set_vq_ready = mlx5_vdpa_set_vq_ready, 3505 .get_vq_ready = mlx5_vdpa_get_vq_ready, 3506 .set_vq_state = mlx5_vdpa_set_vq_state, 3507 .get_vq_state = mlx5_vdpa_get_vq_state, 3508 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 3509 .get_vq_notification = mlx5_get_vq_notification, 3510 .get_vq_irq = mlx5_get_vq_irq, 3511 .get_vq_align = mlx5_vdpa_get_vq_align, 3512 .get_vq_group = mlx5_vdpa_get_vq_group, 3513 .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */ 3514 .get_device_features = mlx5_vdpa_get_device_features, 3515 .get_backend_features = mlx5_vdpa_get_backend_features, 3516 .set_driver_features = mlx5_vdpa_set_driver_features, 3517 .get_driver_features = mlx5_vdpa_get_driver_features, 3518 .set_config_cb = mlx5_vdpa_set_config_cb, 3519 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 3520 .get_device_id = mlx5_vdpa_get_device_id, 3521 .get_vendor_id = mlx5_vdpa_get_vendor_id, 3522 .get_status = mlx5_vdpa_get_status, 3523 .set_status = mlx5_vdpa_set_status, 3524 .reset = mlx5_vdpa_reset, 3525 .compat_reset = mlx5_vdpa_compat_reset, 3526 .get_config_size = mlx5_vdpa_get_config_size, 3527 .get_config = mlx5_vdpa_get_config, 3528 .set_config = mlx5_vdpa_set_config, 3529 .get_generation = mlx5_vdpa_get_generation, 3530 .set_map = mlx5_vdpa_set_map, 3531 .reset_map = mlx5_vdpa_reset_map, 3532 .set_group_asid = mlx5_set_group_asid, 3533 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3534 .free = mlx5_vdpa_free, 3535 .suspend = mlx5_vdpa_suspend, 3536 .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ 3537 }; 3538 3539 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 3540 { 3541 u16 hw_mtu; 3542 int err; 3543 3544 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3545 if (err) 3546 return err; 3547 3548 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 3549 return 0; 3550 } 3551 3552 static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev) 3553 { 3554 struct mlx5_vdpa_net_resources *res = &ndev->res; 3555 int err; 3556 3557 if (res->valid) { 3558 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 3559 return -EEXIST; 3560 } 3561 3562 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 3563 if (err) 3564 return err; 3565 3566 err = create_tis(ndev); 3567 if (err) 3568 goto err_tis; 3569 3570 res->valid = true; 3571 3572 return 0; 3573 3574 err_tis: 3575 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3576 return err; 3577 } 3578 3579 static void free_fixed_resources(struct mlx5_vdpa_net *ndev) 3580 { 3581 struct mlx5_vdpa_net_resources *res = &ndev->res; 3582 3583 if (!res->valid) 3584 return; 3585 3586 destroy_tis(ndev); 3587 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3588 res->valid = false; 3589 } 3590 3591 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev) 3592 { 3593 struct mlx5_vdpa_virtqueue *mvq; 3594 int i; 3595 3596 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 3597 mvq = &ndev->vqs[i]; 3598 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3599 mvq->index = i; 3600 mvq->ndev = ndev; 3601 mvq->fwqp.fw = true; 3602 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 3603 mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE; 3604 } 3605 } 3606 3607 struct mlx5_vdpa_mgmtdev { 3608 struct vdpa_mgmt_dev mgtdev; 3609 struct mlx5_adev *madev; 3610 struct mlx5_vdpa_net *ndev; 3611 struct vdpa_config_ops vdpa_ops; 3612 }; 3613 3614 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3615 { 3616 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 3617 void *in; 3618 int err; 3619 3620 in = kvzalloc(inlen, GFP_KERNEL); 3621 if (!in) 3622 return -ENOMEM; 3623 3624 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 3625 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 3626 mtu + MLX5V_ETH_HARD_MTU); 3627 MLX5_SET(modify_nic_vport_context_in, in, opcode, 3628 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 3629 3630 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 3631 3632 kvfree(in); 3633 return err; 3634 } 3635 3636 static void allocate_irqs(struct mlx5_vdpa_net *ndev) 3637 { 3638 struct mlx5_vdpa_irq_pool_entry *ent; 3639 int i; 3640 3641 if (!msix_mode_supported(&ndev->mvdev)) 3642 return; 3643 3644 if (!ndev->mvdev.mdev->pdev) 3645 return; 3646 3647 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); 3648 if (!ndev->irqp.entries) 3649 return; 3650 3651 3652 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3653 ent = ndev->irqp.entries + i; 3654 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 3655 dev_name(&ndev->mvdev.vdev.dev), i); 3656 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); 3657 if (!ent->map.virq) 3658 return; 3659 3660 ndev->irqp.num_ent++; 3661 } 3662 } 3663 3664 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 3665 const struct vdpa_dev_set_config *add_config) 3666 { 3667 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3668 struct virtio_net_config *config; 3669 struct mlx5_core_dev *pfmdev; 3670 struct mlx5_vdpa_dev *mvdev; 3671 struct mlx5_vdpa_net *ndev; 3672 struct mlx5_core_dev *mdev; 3673 u64 device_features; 3674 u32 max_vqs; 3675 u16 mtu; 3676 int err; 3677 3678 if (mgtdev->ndev) 3679 return -ENOSPC; 3680 3681 mdev = mgtdev->madev->mdev; 3682 device_features = mgtdev->mgtdev.supported_features; 3683 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 3684 if (add_config->device_features & ~device_features) { 3685 dev_warn(mdev->device, 3686 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", 3687 add_config->device_features, device_features); 3688 return -EINVAL; 3689 } 3690 device_features &= add_config->device_features; 3691 } else { 3692 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3693 } 3694 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3695 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { 3696 dev_warn(mdev->device, 3697 "Must provision minimum features 0x%llx for this device", 3698 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); 3699 return -EOPNOTSUPP; 3700 } 3701 3702 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 3703 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 3704 dev_warn(mdev->device, "missing support for split virtqueues\n"); 3705 return -EOPNOTSUPP; 3706 } 3707 3708 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 3709 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 3710 if (max_vqs < 2) { 3711 dev_warn(mdev->device, 3712 "%d virtqueues are supported. At least 2 are required\n", 3713 max_vqs); 3714 return -EAGAIN; 3715 } 3716 3717 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 3718 if (add_config->net.max_vq_pairs > max_vqs / 2) 3719 return -EINVAL; 3720 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 3721 } else { 3722 max_vqs = 2; 3723 } 3724 3725 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops, 3726 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); 3727 if (IS_ERR(ndev)) 3728 return PTR_ERR(ndev); 3729 3730 ndev->mvdev.max_vqs = max_vqs; 3731 mvdev = &ndev->mvdev; 3732 mvdev->mdev = mdev; 3733 3734 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 3735 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 3736 if (!ndev->vqs || !ndev->event_cbs) { 3737 err = -ENOMEM; 3738 goto err_alloc; 3739 } 3740 ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; 3741 3742 mvqs_set_defaults(ndev); 3743 allocate_irqs(ndev); 3744 init_rwsem(&ndev->reslock); 3745 config = &ndev->config; 3746 3747 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 3748 err = config_func_mtu(mdev, add_config->net.mtu); 3749 if (err) 3750 goto err_alloc; 3751 } 3752 3753 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { 3754 err = query_mtu(mdev, &mtu); 3755 if (err) 3756 goto err_alloc; 3757 3758 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3759 } 3760 3761 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { 3762 if (get_link_state(mvdev)) 3763 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3764 else 3765 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3766 } 3767 3768 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3769 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3770 /* No bother setting mac address in config if not going to provision _F_MAC */ 3771 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || 3772 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3773 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3774 if (err) 3775 goto err_alloc; 3776 } 3777 3778 if (!is_zero_ether_addr(config->mac)) { 3779 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3780 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3781 if (err) 3782 goto err_alloc; 3783 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { 3784 /* 3785 * We used to clear _F_MAC feature bit if seeing 3786 * zero mac address when device features are not 3787 * specifically provisioned. Keep the behaviour 3788 * so old scripts do not break. 3789 */ 3790 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 3791 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3792 /* Don't provision zero mac address for _F_MAC */ 3793 mlx5_vdpa_warn(&ndev->mvdev, 3794 "No mac address provisioned?\n"); 3795 err = -EINVAL; 3796 goto err_alloc; 3797 } 3798 3799 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) { 3800 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3801 ndev->rqt_size = max_vqs / 2; 3802 } else { 3803 ndev->rqt_size = 1; 3804 } 3805 3806 ndev->mvdev.mlx_features = device_features; 3807 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3808 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3809 if (err) 3810 goto err_mpfs; 3811 3812 INIT_LIST_HEAD(&mvdev->mr_list_head); 3813 3814 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3815 err = mlx5_vdpa_create_dma_mr(mvdev); 3816 if (err) 3817 goto err_res; 3818 } 3819 3820 err = alloc_fixed_resources(ndev); 3821 if (err) 3822 goto err_mr; 3823 3824 ndev->cvq_ent.mvdev = mvdev; 3825 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3826 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3827 if (!mvdev->wq) { 3828 err = -ENOMEM; 3829 goto err_res2; 3830 } 3831 3832 mvdev->vdev.mdev = &mgtdev->mgtdev; 3833 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3834 if (err) 3835 goto err_reg; 3836 3837 mgtdev->ndev = ndev; 3838 return 0; 3839 3840 err_reg: 3841 destroy_workqueue(mvdev->wq); 3842 err_res2: 3843 free_fixed_resources(ndev); 3844 err_mr: 3845 mlx5_vdpa_destroy_mr_resources(mvdev); 3846 err_res: 3847 mlx5_vdpa_free_resources(&ndev->mvdev); 3848 err_mpfs: 3849 if (!is_zero_ether_addr(config->mac)) 3850 mlx5_mpfs_del_mac(pfmdev, config->mac); 3851 err_alloc: 3852 put_device(&mvdev->vdev.dev); 3853 return err; 3854 } 3855 3856 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 3857 { 3858 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3859 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 3860 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3861 struct workqueue_struct *wq; 3862 3863 unregister_link_notifier(ndev); 3864 _vdpa_unregister_device(dev); 3865 wq = mvdev->wq; 3866 mvdev->wq = NULL; 3867 destroy_workqueue(wq); 3868 mgtdev->ndev = NULL; 3869 } 3870 3871 static const struct vdpa_mgmtdev_ops mdev_ops = { 3872 .dev_add = mlx5_vdpa_dev_add, 3873 .dev_del = mlx5_vdpa_dev_del, 3874 }; 3875 3876 static struct virtio_device_id id_table[] = { 3877 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3878 { 0 }, 3879 }; 3880 3881 static int mlx5v_probe(struct auxiliary_device *adev, 3882 const struct auxiliary_device_id *id) 3883 3884 { 3885 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 3886 struct mlx5_core_dev *mdev = madev->mdev; 3887 struct mlx5_vdpa_mgmtdev *mgtdev; 3888 int err; 3889 3890 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 3891 if (!mgtdev) 3892 return -ENOMEM; 3893 3894 mgtdev->mgtdev.ops = &mdev_ops; 3895 mgtdev->mgtdev.device = mdev->device; 3896 mgtdev->mgtdev.id_table = id_table; 3897 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 3898 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 3899 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | 3900 BIT_ULL(VDPA_ATTR_DEV_FEATURES); 3901 mgtdev->mgtdev.max_supported_vqs = 3902 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 3903 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 3904 mgtdev->madev = madev; 3905 mgtdev->vdpa_ops = mlx5_vdpa_ops; 3906 3907 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) 3908 mgtdev->vdpa_ops.get_vq_desc_group = NULL; 3909 3910 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported)) 3911 mgtdev->vdpa_ops.resume = NULL; 3912 3913 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3914 if (err) 3915 goto reg_err; 3916 3917 auxiliary_set_drvdata(adev, mgtdev); 3918 3919 return 0; 3920 3921 reg_err: 3922 kfree(mgtdev); 3923 return err; 3924 } 3925 3926 static void mlx5v_remove(struct auxiliary_device *adev) 3927 { 3928 struct mlx5_vdpa_mgmtdev *mgtdev; 3929 3930 mgtdev = auxiliary_get_drvdata(adev); 3931 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 3932 kfree(mgtdev); 3933 } 3934 3935 static const struct auxiliary_device_id mlx5v_id_table[] = { 3936 { .name = MLX5_ADEV_NAME ".vnet", }, 3937 {}, 3938 }; 3939 3940 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 3941 3942 static struct auxiliary_driver mlx5v_driver = { 3943 .name = "vnet", 3944 .probe = mlx5v_probe, 3945 .remove = mlx5v_remove, 3946 .id_table = mlx5v_id_table, 3947 }; 3948 3949 module_auxiliary_driver(mlx5v_driver); 3950