1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <uapi/linux/vhost_types.h> 11 #include <linux/virtio_config.h> 12 #include <linux/auxiliary_bus.h> 13 #include <linux/mlx5/cq.h> 14 #include <linux/mlx5/qp.h> 15 #include <linux/mlx5/device.h> 16 #include <linux/mlx5/driver.h> 17 #include <linux/mlx5/vport.h> 18 #include <linux/mlx5/fs.h> 19 #include <linux/mlx5/mlx5_ifc_vdpa.h> 20 #include <linux/mlx5/mpfs.h> 21 #include "mlx5_vdpa.h" 22 #include "mlx5_vnet.h" 23 24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 25 MODULE_DESCRIPTION("Mellanox VDPA driver"); 26 MODULE_LICENSE("Dual BSD/GPL"); 27 28 #define VALID_FEATURES_MASK \ 29 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 30 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 31 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 32 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 33 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 34 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 35 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 36 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 37 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 38 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 39 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 40 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 41 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 42 43 #define VALID_STATUS_MASK \ 44 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 45 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 46 47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 48 49 #define MLX5V_UNTAGGED 0x1000 50 51 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section 52 * 5.1.6.5.5 "Device operation in multiqueue mode": 53 * 54 * Multiqueue is disabled by default. 55 * The driver enables multiqueue by sending a command using class 56 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue 57 * operation, as follows: ... 58 */ 59 #define MLX5V_DEFAULT_VQ_COUNT 2 60 61 #define MLX5V_DEFAULT_VQ_SIZE 256 62 63 struct mlx5_vdpa_cq_buf { 64 struct mlx5_frag_buf_ctrl fbc; 65 struct mlx5_frag_buf frag_buf; 66 int cqe_size; 67 int nent; 68 }; 69 70 struct mlx5_vdpa_cq { 71 struct mlx5_core_cq mcq; 72 struct mlx5_vdpa_cq_buf buf; 73 struct mlx5_db db; 74 int cqe; 75 }; 76 77 struct mlx5_vdpa_umem { 78 struct mlx5_frag_buf_ctrl fbc; 79 struct mlx5_frag_buf frag_buf; 80 int size; 81 u32 id; 82 }; 83 84 struct mlx5_vdpa_qp { 85 struct mlx5_core_qp mqp; 86 struct mlx5_frag_buf frag_buf; 87 struct mlx5_db db; 88 u16 head; 89 bool fw; 90 }; 91 92 struct mlx5_vq_restore_info { 93 u32 num_ent; 94 u64 desc_addr; 95 u64 device_addr; 96 u64 driver_addr; 97 u16 avail_index; 98 u16 used_index; 99 struct msi_map map; 100 bool ready; 101 bool restore; 102 }; 103 104 struct mlx5_vdpa_virtqueue { 105 bool ready; 106 u64 desc_addr; 107 u64 device_addr; 108 u64 driver_addr; 109 u32 num_ent; 110 111 /* Resources for implementing the notification channel from the device 112 * to the driver. fwqp is the firmware end of an RC connection; the 113 * other end is vqqp used by the driver. cq is where completions are 114 * reported. 115 */ 116 struct mlx5_vdpa_cq cq; 117 struct mlx5_vdpa_qp fwqp; 118 struct mlx5_vdpa_qp vqqp; 119 120 /* umem resources are required for the virtqueue operation. They're use 121 * is internal and they must be provided by the driver. 122 */ 123 struct mlx5_vdpa_umem umem1; 124 struct mlx5_vdpa_umem umem2; 125 struct mlx5_vdpa_umem umem3; 126 127 u32 counter_set_id; 128 bool initialized; 129 int index; 130 u32 virtq_id; 131 struct mlx5_vdpa_net *ndev; 132 u16 avail_idx; 133 u16 used_idx; 134 int fw_state; 135 136 u64 modified_fields; 137 138 struct mlx5_vdpa_mr *vq_mr; 139 struct mlx5_vdpa_mr *desc_mr; 140 141 struct msi_map map; 142 143 /* keep last in the struct */ 144 struct mlx5_vq_restore_info ri; 145 }; 146 147 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 148 { 149 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 150 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 151 return idx < 2; 152 else 153 return idx < 3; 154 } 155 156 return idx <= mvdev->max_idx; 157 } 158 159 static void free_fixed_resources(struct mlx5_vdpa_net *ndev); 160 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev); 161 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled); 162 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev); 163 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq); 164 165 static bool mlx5_vdpa_debug; 166 167 #define MLX5_LOG_VIO_FLAG(_feature) \ 168 do { \ 169 if (features & BIT_ULL(_feature)) \ 170 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 171 } while (0) 172 173 #define MLX5_LOG_VIO_STAT(_status) \ 174 do { \ 175 if (status & (_status)) \ 176 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 177 } while (0) 178 179 /* TODO: cross-endian support */ 180 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 181 { 182 return virtio_legacy_is_little_endian() || 183 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 184 } 185 186 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 187 { 188 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 189 } 190 191 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 192 { 193 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 194 } 195 196 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 197 { 198 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 199 return 2; 200 201 return mvdev->max_vqs; 202 } 203 204 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 205 { 206 return idx == ctrl_vq_idx(mvdev); 207 } 208 209 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 210 { 211 if (status & ~VALID_STATUS_MASK) 212 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 213 status & ~VALID_STATUS_MASK); 214 215 if (!mlx5_vdpa_debug) 216 return; 217 218 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 219 if (set && !status) { 220 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 221 return; 222 } 223 224 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 225 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 226 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 227 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 228 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 229 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 230 } 231 232 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 233 { 234 if (features & ~VALID_FEATURES_MASK) 235 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 236 features & ~VALID_FEATURES_MASK); 237 238 if (!mlx5_vdpa_debug) 239 return; 240 241 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 242 if (!features) 243 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 244 245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 254 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 255 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 256 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 257 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 258 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 259 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 260 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 261 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 262 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 263 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 264 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 265 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 266 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 267 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 268 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 269 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 270 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 271 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 272 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 273 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 274 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 275 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 276 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 277 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 278 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 279 } 280 281 static int create_tis(struct mlx5_vdpa_net *ndev) 282 { 283 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 284 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 285 void *tisc; 286 int err; 287 288 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 289 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 290 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 291 if (err) 292 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 293 294 return err; 295 } 296 297 static void destroy_tis(struct mlx5_vdpa_net *ndev) 298 { 299 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 300 } 301 302 #define MLX5_VDPA_CQE_SIZE 64 303 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 304 305 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 306 { 307 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 308 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 309 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 310 int err; 311 312 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 313 ndev->mvdev.mdev->priv.numa_node); 314 if (err) 315 return err; 316 317 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 318 319 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 320 buf->nent = nent; 321 322 return 0; 323 } 324 325 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 326 { 327 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 328 329 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 330 ndev->mvdev.mdev->priv.numa_node); 331 } 332 333 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 334 { 335 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 336 } 337 338 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 339 { 340 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 341 } 342 343 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 344 { 345 struct mlx5_cqe64 *cqe64; 346 void *cqe; 347 int i; 348 349 for (i = 0; i < buf->nent; i++) { 350 cqe = get_cqe(vcq, i); 351 cqe64 = cqe; 352 cqe64->op_own = MLX5_CQE_INVALID << 4; 353 } 354 } 355 356 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 357 { 358 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 359 360 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 361 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 362 return cqe64; 363 364 return NULL; 365 } 366 367 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 368 { 369 vqp->head += n; 370 vqp->db.db[0] = cpu_to_be32(vqp->head); 371 } 372 373 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 374 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 375 { 376 struct mlx5_vdpa_qp *vqp; 377 __be64 *pas; 378 void *qpc; 379 380 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 381 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 382 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 383 if (vqp->fw) { 384 /* Firmware QP is allocated by the driver for the firmware's 385 * use so we can skip part of the params as they will be chosen by firmware 386 */ 387 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 388 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 389 MLX5_SET(qpc, qpc, no_sq, 1); 390 return; 391 } 392 393 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 394 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 395 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 396 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 397 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 398 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 399 MLX5_SET(qpc, qpc, no_sq, 1); 400 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 401 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 402 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 403 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 404 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 405 } 406 407 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 408 { 409 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 410 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 411 ndev->mvdev.mdev->priv.numa_node); 412 } 413 414 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 415 { 416 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 417 } 418 419 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 420 struct mlx5_vdpa_qp *vqp) 421 { 422 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 423 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 424 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 425 void *qpc; 426 void *in; 427 int err; 428 429 if (!vqp->fw) { 430 vqp = &mvq->vqqp; 431 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 432 if (err) 433 return err; 434 435 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 436 if (err) 437 goto err_db; 438 inlen += vqp->frag_buf.npages * sizeof(__be64); 439 } 440 441 in = kzalloc(inlen, GFP_KERNEL); 442 if (!in) { 443 err = -ENOMEM; 444 goto err_kzalloc; 445 } 446 447 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 448 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 449 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 450 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 451 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 452 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 453 if (!vqp->fw) 454 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 455 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 456 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 457 kfree(in); 458 if (err) 459 goto err_kzalloc; 460 461 vqp->mqp.uid = ndev->mvdev.res.uid; 462 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 463 464 if (!vqp->fw) 465 rx_post(vqp, mvq->num_ent); 466 467 return 0; 468 469 err_kzalloc: 470 if (!vqp->fw) 471 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 472 err_db: 473 if (!vqp->fw) 474 rq_buf_free(ndev, vqp); 475 476 return err; 477 } 478 479 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 480 { 481 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 482 483 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 484 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 485 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 486 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 487 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 488 if (!vqp->fw) { 489 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 490 rq_buf_free(ndev, vqp); 491 } 492 } 493 494 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 495 { 496 return get_sw_cqe(cq, cq->mcq.cons_index); 497 } 498 499 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 500 { 501 struct mlx5_cqe64 *cqe64; 502 503 cqe64 = next_cqe_sw(vcq); 504 if (!cqe64) 505 return -EAGAIN; 506 507 vcq->mcq.cons_index++; 508 return 0; 509 } 510 511 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 512 { 513 struct mlx5_vdpa_net *ndev = mvq->ndev; 514 struct vdpa_callback *event_cb; 515 516 event_cb = &ndev->event_cbs[mvq->index]; 517 mlx5_cq_set_ci(&mvq->cq.mcq); 518 519 /* make sure CQ cosumer update is visible to the hardware before updating 520 * RX doorbell record. 521 */ 522 dma_wmb(); 523 rx_post(&mvq->vqqp, num); 524 if (event_cb->callback) 525 event_cb->callback(event_cb->private); 526 } 527 528 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 529 { 530 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 531 struct mlx5_vdpa_net *ndev = mvq->ndev; 532 void __iomem *uar_page = ndev->mvdev.res.uar->map; 533 int num = 0; 534 535 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 536 num++; 537 if (num > mvq->num_ent / 2) { 538 /* If completions keep coming while we poll, we want to 539 * let the hardware know that we consumed them by 540 * updating the doorbell record. We also let vdpa core 541 * know about this so it passes it on the virtio driver 542 * on the guest. 543 */ 544 mlx5_vdpa_handle_completions(mvq, num); 545 num = 0; 546 } 547 } 548 549 if (num) 550 mlx5_vdpa_handle_completions(mvq, num); 551 552 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 553 } 554 555 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 556 { 557 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 558 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 559 void __iomem *uar_page = ndev->mvdev.res.uar->map; 560 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 561 struct mlx5_vdpa_cq *vcq = &mvq->cq; 562 __be64 *pas; 563 int inlen; 564 void *cqc; 565 void *in; 566 int err; 567 int eqn; 568 569 err = mlx5_db_alloc(mdev, &vcq->db); 570 if (err) 571 return err; 572 573 vcq->mcq.set_ci_db = vcq->db.db; 574 vcq->mcq.arm_db = vcq->db.db + 1; 575 vcq->mcq.cqe_sz = 64; 576 577 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 578 if (err) 579 goto err_db; 580 581 cq_frag_buf_init(vcq, &vcq->buf); 582 583 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 584 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 585 in = kzalloc(inlen, GFP_KERNEL); 586 if (!in) { 587 err = -ENOMEM; 588 goto err_vzalloc; 589 } 590 591 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 592 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 593 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 594 595 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 596 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 597 598 /* Use vector 0 by default. Consider adding code to choose least used 599 * vector. 600 */ 601 err = mlx5_comp_eqn_get(mdev, 0, &eqn); 602 if (err) 603 goto err_vec; 604 605 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 606 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 607 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 608 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 609 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 610 611 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 612 if (err) 613 goto err_vec; 614 615 vcq->mcq.comp = mlx5_vdpa_cq_comp; 616 vcq->cqe = num_ent; 617 vcq->mcq.set_ci_db = vcq->db.db; 618 vcq->mcq.arm_db = vcq->db.db + 1; 619 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 620 kfree(in); 621 return 0; 622 623 err_vec: 624 kfree(in); 625 err_vzalloc: 626 cq_frag_buf_free(ndev, &vcq->buf); 627 err_db: 628 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 629 return err; 630 } 631 632 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 633 { 634 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 635 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 636 struct mlx5_vdpa_cq *vcq = &mvq->cq; 637 638 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 639 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 640 return; 641 } 642 cq_frag_buf_free(ndev, &vcq->buf); 643 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 644 } 645 646 static int read_umem_params(struct mlx5_vdpa_net *ndev) 647 { 648 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 649 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01); 650 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 651 int out_size; 652 void *caps; 653 void *out; 654 int err; 655 656 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 657 out = kzalloc(out_size, GFP_KERNEL); 658 if (!out) 659 return -ENOMEM; 660 661 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 662 MLX5_SET(query_hca_cap_in, in, op_mod, opmod); 663 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 664 if (err) { 665 mlx5_vdpa_warn(&ndev->mvdev, 666 "Failed reading vdpa umem capabilities with err %d\n", err); 667 goto out; 668 } 669 670 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); 671 672 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a); 673 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b); 674 675 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a); 676 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b); 677 678 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a); 679 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b); 680 681 out: 682 kfree(out); 683 return 0; 684 } 685 686 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 687 struct mlx5_vdpa_umem **umemp) 688 { 689 u32 p_a; 690 u32 p_b; 691 692 switch (num) { 693 case 1: 694 p_a = ndev->umem_1_buffer_param_a; 695 p_b = ndev->umem_1_buffer_param_b; 696 *umemp = &mvq->umem1; 697 break; 698 case 2: 699 p_a = ndev->umem_2_buffer_param_a; 700 p_b = ndev->umem_2_buffer_param_b; 701 *umemp = &mvq->umem2; 702 break; 703 case 3: 704 p_a = ndev->umem_3_buffer_param_a; 705 p_b = ndev->umem_3_buffer_param_b; 706 *umemp = &mvq->umem3; 707 break; 708 } 709 710 (*umemp)->size = p_a * mvq->num_ent + p_b; 711 } 712 713 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 714 { 715 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 716 } 717 718 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 719 { 720 int inlen; 721 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 722 void *um; 723 void *in; 724 int err; 725 __be64 *pas; 726 struct mlx5_vdpa_umem *umem; 727 728 set_umem_size(ndev, mvq, num, &umem); 729 err = umem_frag_buf_alloc(ndev, umem, umem->size); 730 if (err) 731 return err; 732 733 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 734 735 in = kzalloc(inlen, GFP_KERNEL); 736 if (!in) { 737 err = -ENOMEM; 738 goto err_in; 739 } 740 741 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 742 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 743 um = MLX5_ADDR_OF(create_umem_in, in, umem); 744 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 745 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 746 747 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 748 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 749 750 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 751 if (err) { 752 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 753 goto err_cmd; 754 } 755 756 kfree(in); 757 umem->id = MLX5_GET(create_umem_out, out, umem_id); 758 759 return 0; 760 761 err_cmd: 762 kfree(in); 763 err_in: 764 umem_frag_buf_free(ndev, umem); 765 return err; 766 } 767 768 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 769 { 770 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 771 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 772 struct mlx5_vdpa_umem *umem; 773 774 switch (num) { 775 case 1: 776 umem = &mvq->umem1; 777 break; 778 case 2: 779 umem = &mvq->umem2; 780 break; 781 case 3: 782 umem = &mvq->umem3; 783 break; 784 } 785 786 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 787 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 788 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 789 return; 790 791 umem_frag_buf_free(ndev, umem); 792 } 793 794 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 795 { 796 int num; 797 int err; 798 799 for (num = 1; num <= 3; num++) { 800 err = create_umem(ndev, mvq, num); 801 if (err) 802 goto err_umem; 803 } 804 return 0; 805 806 err_umem: 807 for (num--; num > 0; num--) 808 umem_destroy(ndev, mvq, num); 809 810 return err; 811 } 812 813 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 814 { 815 int num; 816 817 for (num = 3; num > 0; num--) 818 umem_destroy(ndev, mvq, num); 819 } 820 821 static int get_queue_type(struct mlx5_vdpa_net *ndev) 822 { 823 u32 type_mask; 824 825 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 826 827 /* prefer split queue */ 828 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 829 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 830 831 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 832 833 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 834 } 835 836 static bool vq_is_tx(u16 idx) 837 { 838 return idx % 2; 839 } 840 841 enum { 842 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 843 MLX5_VIRTIO_NET_F_HOST_ECN = 4, 844 MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 845 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 846 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 847 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 848 MLX5_VIRTIO_NET_F_CSUM = 10, 849 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 850 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 851 }; 852 853 static u16 get_features(u64 features) 854 { 855 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 856 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 857 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 858 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 859 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 860 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 861 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 862 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 863 } 864 865 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 866 { 867 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 868 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 869 } 870 871 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) 872 { 873 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & 874 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && 875 pci_msix_can_alloc_dyn(mvdev->mdev->pdev); 876 } 877 878 static int create_virtqueue(struct mlx5_vdpa_net *ndev, 879 struct mlx5_vdpa_virtqueue *mvq, 880 bool filled) 881 { 882 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 883 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 884 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 885 struct mlx5_vdpa_mr *vq_mr; 886 struct mlx5_vdpa_mr *vq_desc_mr; 887 u64 features = filled ? mvdev->actual_features : mvdev->mlx_features; 888 void *obj_context; 889 u16 mlx_features; 890 void *cmd_hdr; 891 void *vq_ctx; 892 void *in; 893 int err; 894 895 err = umems_create(ndev, mvq); 896 if (err) 897 return err; 898 899 in = kzalloc(inlen, GFP_KERNEL); 900 if (!in) { 901 err = -ENOMEM; 902 goto err_alloc; 903 } 904 905 mlx_features = get_features(features); 906 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 907 908 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 909 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 910 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 911 912 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 913 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 914 mlx_features >> 3); 915 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 916 mlx_features & 7); 917 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 918 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 919 920 if (vq_is_tx(mvq->index)) 921 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 922 923 if (mvq->map.virq) { 924 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); 925 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); 926 } else { 927 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 928 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 929 } 930 931 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 932 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 933 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 934 !!(features & BIT_ULL(VIRTIO_F_VERSION_1))); 935 936 if (filled) { 937 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 938 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 939 940 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 941 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 942 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 943 944 vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 945 if (vq_mr) 946 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); 947 948 vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 949 if (vq_desc_mr && 950 MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) 951 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); 952 } else { 953 /* If there is no mr update, make sure that the existing ones are set 954 * modify to ready. 955 */ 956 vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 957 if (vq_mr) 958 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; 959 960 vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 961 if (vq_desc_mr) 962 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 963 } 964 965 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 966 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 967 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 968 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 969 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 970 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 971 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 972 if (counters_supported(&ndev->mvdev)) 973 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 974 975 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 976 if (err) 977 goto err_cmd; 978 979 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; 980 kfree(in); 981 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 982 983 if (filled) { 984 mlx5_vdpa_get_mr(mvdev, vq_mr); 985 mvq->vq_mr = vq_mr; 986 987 if (vq_desc_mr && 988 MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) { 989 mlx5_vdpa_get_mr(mvdev, vq_desc_mr); 990 mvq->desc_mr = vq_desc_mr; 991 } 992 } 993 994 return 0; 995 996 err_cmd: 997 kfree(in); 998 err_alloc: 999 umems_destroy(ndev, mvq); 1000 return err; 1001 } 1002 1003 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1004 { 1005 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 1006 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 1007 1008 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 1009 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1010 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 1011 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 1012 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 1013 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1014 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 1015 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 1016 return; 1017 } 1018 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 1019 umems_destroy(ndev, mvq); 1020 1021 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr); 1022 mvq->vq_mr = NULL; 1023 1024 mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr); 1025 mvq->desc_mr = NULL; 1026 } 1027 1028 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 1029 { 1030 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 1031 } 1032 1033 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 1034 { 1035 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 1036 } 1037 1038 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 1039 int *outlen, u32 qpn, u32 rqpn) 1040 { 1041 void *qpc; 1042 void *pp; 1043 1044 switch (cmd) { 1045 case MLX5_CMD_OP_2RST_QP: 1046 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 1047 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 1048 *in = kzalloc(*inlen, GFP_KERNEL); 1049 *out = kzalloc(*outlen, GFP_KERNEL); 1050 if (!*in || !*out) 1051 goto outerr; 1052 1053 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 1054 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 1055 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 1056 break; 1057 case MLX5_CMD_OP_RST2INIT_QP: 1058 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 1059 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 1060 *in = kzalloc(*inlen, GFP_KERNEL); 1061 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 1062 if (!*in || !*out) 1063 goto outerr; 1064 1065 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 1066 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 1067 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 1068 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1069 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1070 MLX5_SET(qpc, qpc, rwe, 1); 1071 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1072 MLX5_SET(ads, pp, vhca_port_num, 1); 1073 break; 1074 case MLX5_CMD_OP_INIT2RTR_QP: 1075 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 1076 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 1077 *in = kzalloc(*inlen, GFP_KERNEL); 1078 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 1079 if (!*in || !*out) 1080 goto outerr; 1081 1082 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 1083 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 1084 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 1085 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1086 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 1087 MLX5_SET(qpc, qpc, log_msg_max, 30); 1088 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1089 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1090 MLX5_SET(ads, pp, fl, 1); 1091 break; 1092 case MLX5_CMD_OP_RTR2RTS_QP: 1093 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 1094 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 1095 *in = kzalloc(*inlen, GFP_KERNEL); 1096 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 1097 if (!*in || !*out) 1098 goto outerr; 1099 1100 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 1101 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 1102 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 1103 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1104 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1105 MLX5_SET(ads, pp, ack_timeout, 14); 1106 MLX5_SET(qpc, qpc, retry_count, 7); 1107 MLX5_SET(qpc, qpc, rnr_retry, 7); 1108 break; 1109 default: 1110 goto outerr_nullify; 1111 } 1112 1113 return; 1114 1115 outerr: 1116 kfree(*in); 1117 kfree(*out); 1118 outerr_nullify: 1119 *in = NULL; 1120 *out = NULL; 1121 } 1122 1123 static void free_inout(void *in, void *out) 1124 { 1125 kfree(in); 1126 kfree(out); 1127 } 1128 1129 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1130 * firmware. The fw argument indicates whether the subjected QP is the one used 1131 * by firmware. 1132 */ 1133 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1134 { 1135 int outlen; 1136 int inlen; 1137 void *out; 1138 void *in; 1139 int err; 1140 1141 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1142 if (!in || !out) 1143 return -ENOMEM; 1144 1145 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1146 free_inout(in, out); 1147 return err; 1148 } 1149 1150 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1151 { 1152 int err; 1153 1154 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1155 if (err) 1156 return err; 1157 1158 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1159 if (err) 1160 return err; 1161 1162 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1163 if (err) 1164 return err; 1165 1166 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1167 if (err) 1168 return err; 1169 1170 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1171 if (err) 1172 return err; 1173 1174 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1175 if (err) 1176 return err; 1177 1178 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1179 } 1180 1181 struct mlx5_virtq_attr { 1182 u8 state; 1183 u16 available_index; 1184 u16 used_index; 1185 }; 1186 1187 struct mlx5_virtqueue_query_mem { 1188 u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)]; 1189 u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)]; 1190 }; 1191 1192 struct mlx5_virtqueue_modify_mem { 1193 u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)]; 1194 u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)]; 1195 }; 1196 1197 static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev, 1198 struct mlx5_vdpa_virtqueue *mvq, 1199 struct mlx5_virtqueue_query_mem *cmd) 1200 { 1201 void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr); 1202 1203 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1204 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1205 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1206 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1207 } 1208 1209 static void query_virtqueue_end(struct mlx5_vdpa_net *ndev, 1210 struct mlx5_virtqueue_query_mem *cmd, 1211 struct mlx5_virtq_attr *attr) 1212 { 1213 void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context); 1214 1215 memset(attr, 0, sizeof(*attr)); 1216 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1217 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1218 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1219 } 1220 1221 static int query_virtqueues(struct mlx5_vdpa_net *ndev, 1222 int start_vq, 1223 int num_vqs, 1224 struct mlx5_virtq_attr *attrs) 1225 { 1226 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 1227 struct mlx5_virtqueue_query_mem *cmd_mem; 1228 struct mlx5_vdpa_async_cmd *cmds; 1229 int err = 0; 1230 1231 WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n", 1232 start_vq, start_vq + num_vqs, mvdev->max_vqs); 1233 1234 cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); 1235 cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL); 1236 if (!cmds || !cmd_mem) { 1237 err = -ENOMEM; 1238 goto done; 1239 } 1240 1241 for (int i = 0; i < num_vqs; i++) { 1242 cmds[i].in = &cmd_mem[i].in; 1243 cmds[i].inlen = sizeof(cmd_mem[i].in); 1244 cmds[i].out = &cmd_mem[i].out; 1245 cmds[i].outlen = sizeof(cmd_mem[i].out); 1246 fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]); 1247 } 1248 1249 err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs); 1250 if (err) { 1251 mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n", 1252 start_vq, start_vq + num_vqs, err); 1253 goto done; 1254 } 1255 1256 for (int i = 0; i < num_vqs; i++) { 1257 struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; 1258 int vq_idx = start_vq + i; 1259 1260 if (cmd->err) { 1261 mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n", vq_idx, err); 1262 if (!err) 1263 err = cmd->err; 1264 continue; 1265 } 1266 1267 query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]); 1268 } 1269 1270 done: 1271 kvfree(cmd_mem); 1272 kvfree(cmds); 1273 return err; 1274 } 1275 1276 static bool is_resumable(struct mlx5_vdpa_net *ndev) 1277 { 1278 return ndev->mvdev.vdev.config->resume; 1279 } 1280 1281 static bool is_valid_state_change(int oldstate, int newstate, bool resumable) 1282 { 1283 switch (oldstate) { 1284 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1285 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; 1286 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1287 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1288 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1289 return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false; 1290 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1291 default: 1292 return false; 1293 } 1294 } 1295 1296 static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq) 1297 { 1298 /* Only state is always modifiable */ 1299 if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE) 1300 return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT || 1301 mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1302 1303 return true; 1304 } 1305 1306 static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev, 1307 struct mlx5_vdpa_virtqueue *mvq, 1308 int state, 1309 struct mlx5_virtqueue_modify_mem *cmd) 1310 { 1311 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 1312 struct mlx5_vdpa_mr *desc_mr = NULL; 1313 struct mlx5_vdpa_mr *vq_mr = NULL; 1314 void *obj_context; 1315 void *cmd_hdr; 1316 void *vq_ctx; 1317 1318 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr); 1319 1320 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1321 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1322 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1323 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1324 1325 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context); 1326 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 1327 1328 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) 1329 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1330 1331 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) { 1332 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 1333 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 1334 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 1335 } 1336 1337 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX) 1338 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 1339 1340 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX) 1341 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 1342 1343 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION) 1344 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1345 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 1346 1347 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) { 1348 u16 mlx_features = get_features(ndev->mvdev.actual_features); 1349 1350 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 1351 mlx_features >> 3); 1352 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 1353 mlx_features & 7); 1354 } 1355 1356 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { 1357 vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 1358 1359 if (vq_mr) 1360 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); 1361 else 1362 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY; 1363 } 1364 1365 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { 1366 desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 1367 1368 if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) 1369 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey); 1370 else 1371 mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 1372 } 1373 1374 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields); 1375 } 1376 1377 static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev, 1378 struct mlx5_vdpa_virtqueue *mvq, 1379 int state) 1380 { 1381 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 1382 1383 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) { 1384 unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]; 1385 struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid]; 1386 1387 mlx5_vdpa_put_mr(mvdev, mvq->vq_mr); 1388 mlx5_vdpa_get_mr(mvdev, vq_mr); 1389 mvq->vq_mr = vq_mr; 1390 } 1391 1392 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) { 1393 unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]; 1394 struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid]; 1395 1396 mlx5_vdpa_put_mr(mvdev, mvq->desc_mr); 1397 mlx5_vdpa_get_mr(mvdev, desc_mr); 1398 mvq->desc_mr = desc_mr; 1399 } 1400 1401 if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE) 1402 mvq->fw_state = state; 1403 1404 mvq->modified_fields = 0; 1405 } 1406 1407 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1408 { 1409 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1410 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1411 void *cmd_hdr; 1412 int err; 1413 1414 if (!counters_supported(&ndev->mvdev)) 1415 return 0; 1416 1417 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1418 1419 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1420 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1421 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1422 1423 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1424 if (err) 1425 return err; 1426 1427 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1428 1429 return 0; 1430 } 1431 1432 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1433 { 1434 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1435 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1436 1437 if (!counters_supported(&ndev->mvdev)) 1438 return; 1439 1440 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1441 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1442 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1443 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1444 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1445 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1446 } 1447 1448 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) 1449 { 1450 struct vdpa_callback *cb = priv; 1451 1452 if (cb->callback) 1453 return cb->callback(cb->private); 1454 1455 return IRQ_HANDLED; 1456 } 1457 1458 static void alloc_vector(struct mlx5_vdpa_net *ndev, 1459 struct mlx5_vdpa_virtqueue *mvq) 1460 { 1461 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1462 struct mlx5_vdpa_irq_pool_entry *ent; 1463 int err; 1464 int i; 1465 1466 for (i = 0; i < irqp->num_ent; i++) { 1467 ent = &irqp->entries[i]; 1468 if (!ent->used) { 1469 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 1470 dev_name(&ndev->mvdev.vdev.dev), mvq->index); 1471 ent->dev_id = &ndev->event_cbs[mvq->index]; 1472 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, 1473 ent->name, ent->dev_id); 1474 if (err) 1475 return; 1476 1477 ent->used = true; 1478 mvq->map = ent->map; 1479 return; 1480 } 1481 } 1482 } 1483 1484 static void dealloc_vector(struct mlx5_vdpa_net *ndev, 1485 struct mlx5_vdpa_virtqueue *mvq) 1486 { 1487 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1488 int i; 1489 1490 for (i = 0; i < irqp->num_ent; i++) 1491 if (mvq->map.virq == irqp->entries[i].map.virq) { 1492 free_irq(mvq->map.virq, irqp->entries[i].dev_id); 1493 irqp->entries[i].used = false; 1494 return; 1495 } 1496 } 1497 1498 static int setup_vq(struct mlx5_vdpa_net *ndev, 1499 struct mlx5_vdpa_virtqueue *mvq, 1500 bool filled) 1501 { 1502 u16 idx = mvq->index; 1503 int err; 1504 1505 if (mvq->initialized) 1506 return 0; 1507 1508 err = cq_create(ndev, idx, mvq->num_ent); 1509 if (err) 1510 return err; 1511 1512 err = qp_create(ndev, mvq, &mvq->fwqp); 1513 if (err) 1514 goto err_fwqp; 1515 1516 err = qp_create(ndev, mvq, &mvq->vqqp); 1517 if (err) 1518 goto err_vqqp; 1519 1520 err = connect_qps(ndev, mvq); 1521 if (err) 1522 goto err_connect; 1523 1524 err = counter_set_alloc(ndev, mvq); 1525 if (err) 1526 goto err_connect; 1527 1528 alloc_vector(ndev, mvq); 1529 err = create_virtqueue(ndev, mvq, filled); 1530 if (err) 1531 goto err_vq; 1532 1533 mvq->initialized = true; 1534 1535 if (mvq->ready) { 1536 err = resume_vq(ndev, mvq); 1537 if (err) 1538 goto err_modify; 1539 } 1540 1541 return 0; 1542 1543 err_modify: 1544 destroy_virtqueue(ndev, mvq); 1545 err_vq: 1546 dealloc_vector(ndev, mvq); 1547 counter_set_dealloc(ndev, mvq); 1548 err_connect: 1549 qp_destroy(ndev, &mvq->vqqp); 1550 err_vqqp: 1551 qp_destroy(ndev, &mvq->fwqp); 1552 err_fwqp: 1553 cq_destroy(ndev, idx); 1554 return err; 1555 } 1556 1557 static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state) 1558 { 1559 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 1560 struct mlx5_virtqueue_modify_mem *cmd_mem; 1561 struct mlx5_vdpa_async_cmd *cmds; 1562 int err = 0; 1563 1564 WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n", 1565 start_vq, start_vq + num_vqs, mvdev->max_vqs); 1566 1567 cmds = kvcalloc(num_vqs, sizeof(*cmds), GFP_KERNEL); 1568 cmd_mem = kvcalloc(num_vqs, sizeof(*cmd_mem), GFP_KERNEL); 1569 if (!cmds || !cmd_mem) { 1570 err = -ENOMEM; 1571 goto done; 1572 } 1573 1574 for (int i = 0; i < num_vqs; i++) { 1575 struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; 1576 struct mlx5_vdpa_virtqueue *mvq; 1577 int vq_idx = start_vq + i; 1578 1579 mvq = &ndev->vqs[vq_idx]; 1580 1581 if (!modifiable_virtqueue_fields(mvq)) { 1582 err = -EINVAL; 1583 goto done; 1584 } 1585 1586 if (mvq->fw_state != state) { 1587 if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) { 1588 err = -EINVAL; 1589 goto done; 1590 } 1591 1592 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE; 1593 } 1594 1595 cmd->in = &cmd_mem[i].in; 1596 cmd->inlen = sizeof(cmd_mem[i].in); 1597 cmd->out = &cmd_mem[i].out; 1598 cmd->outlen = sizeof(cmd_mem[i].out); 1599 fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]); 1600 } 1601 1602 err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs); 1603 if (err) { 1604 mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n", 1605 start_vq, start_vq + num_vqs); 1606 goto done; 1607 } 1608 1609 for (int i = 0; i < num_vqs; i++) { 1610 struct mlx5_vdpa_async_cmd *cmd = &cmds[i]; 1611 struct mlx5_vdpa_virtqueue *mvq; 1612 int vq_idx = start_vq + i; 1613 1614 mvq = &ndev->vqs[vq_idx]; 1615 1616 if (cmd->err) { 1617 mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n", 1618 vq_idx, mvq->fw_state, state, err); 1619 if (!err) 1620 err = cmd->err; 1621 continue; 1622 } 1623 1624 modify_virtqueue_end(ndev, mvq, state); 1625 } 1626 1627 done: 1628 kvfree(cmd_mem); 1629 kvfree(cmds); 1630 return err; 1631 } 1632 1633 static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) 1634 { 1635 struct mlx5_vdpa_virtqueue *mvq; 1636 struct mlx5_virtq_attr *attrs; 1637 int vq_idx, i; 1638 int err; 1639 1640 if (start_vq >= ndev->cur_num_vqs) 1641 return -EINVAL; 1642 1643 mvq = &ndev->vqs[start_vq]; 1644 if (!mvq->initialized) 1645 return 0; 1646 1647 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1648 return 0; 1649 1650 err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND); 1651 if (err) 1652 return err; 1653 1654 attrs = kcalloc(num_vqs, sizeof(struct mlx5_virtq_attr), GFP_KERNEL); 1655 if (!attrs) 1656 return -ENOMEM; 1657 1658 err = query_virtqueues(ndev, start_vq, num_vqs, attrs); 1659 if (err) 1660 goto done; 1661 1662 for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) { 1663 mvq = &ndev->vqs[vq_idx]; 1664 mvq->avail_idx = attrs[i].available_index; 1665 mvq->used_idx = attrs[i].used_index; 1666 } 1667 1668 done: 1669 kfree(attrs); 1670 return err; 1671 } 1672 1673 static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1674 { 1675 return suspend_vqs(ndev, mvq->index, 1); 1676 } 1677 1678 static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs) 1679 { 1680 struct mlx5_vdpa_virtqueue *mvq; 1681 int err; 1682 1683 if (start_vq >= ndev->mvdev.max_vqs) 1684 return -EINVAL; 1685 1686 mvq = &ndev->vqs[start_vq]; 1687 if (!mvq->initialized) 1688 return 0; 1689 1690 if (mvq->index >= ndev->cur_num_vqs) 1691 return 0; 1692 1693 switch (mvq->fw_state) { 1694 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1695 /* Due to a FW quirk we need to modify the VQ fields first then change state. 1696 * This should be fixed soon. After that, a single command can be used. 1697 */ 1698 err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state); 1699 if (err) 1700 return err; 1701 break; 1702 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1703 if (!is_resumable(ndev)) { 1704 mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n", mvq->index); 1705 return -EINVAL; 1706 } 1707 break; 1708 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1709 return 0; 1710 default: 1711 mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n", 1712 mvq->index, mvq->fw_state); 1713 return -EINVAL; 1714 } 1715 1716 return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1717 } 1718 1719 static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1720 { 1721 return resume_vqs(ndev, mvq->index, 1); 1722 } 1723 1724 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1725 { 1726 if (!mvq->initialized) 1727 return; 1728 1729 suspend_vq(ndev, mvq); 1730 mvq->modified_fields = 0; 1731 destroy_virtqueue(ndev, mvq); 1732 dealloc_vector(ndev, mvq); 1733 counter_set_dealloc(ndev, mvq); 1734 qp_destroy(ndev, &mvq->vqqp); 1735 qp_destroy(ndev, &mvq->fwqp); 1736 cq_destroy(ndev, mvq->index); 1737 mvq->initialized = false; 1738 } 1739 1740 static int create_rqt(struct mlx5_vdpa_net *ndev) 1741 { 1742 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); 1743 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); 1744 __be32 *list; 1745 void *rqtc; 1746 int inlen; 1747 void *in; 1748 int i, j; 1749 int err; 1750 1751 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); 1752 in = kzalloc(inlen, GFP_KERNEL); 1753 if (!in) 1754 return -ENOMEM; 1755 1756 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1757 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1758 1759 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1760 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); 1761 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1762 for (i = 0, j = 0; i < act_sz; i++, j += 2) 1763 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1764 1765 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1766 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1767 kfree(in); 1768 if (err) 1769 return err; 1770 1771 return 0; 1772 } 1773 1774 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1775 1776 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1777 { 1778 int act_sz = roundup_pow_of_two(num / 2); 1779 __be32 *list; 1780 void *rqtc; 1781 int inlen; 1782 void *in; 1783 int i, j; 1784 int err; 1785 1786 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); 1787 in = kzalloc(inlen, GFP_KERNEL); 1788 if (!in) 1789 return -ENOMEM; 1790 1791 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1792 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1793 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1794 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1795 1796 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1797 for (i = 0, j = 0; i < act_sz; i++, j = j + 2) 1798 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1799 1800 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1801 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1802 kfree(in); 1803 if (err) 1804 return err; 1805 1806 return 0; 1807 } 1808 1809 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1810 { 1811 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1812 } 1813 1814 static int create_tir(struct mlx5_vdpa_net *ndev) 1815 { 1816 #define HASH_IP_L4PORTS \ 1817 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1818 MLX5_HASH_FIELD_SEL_L4_DPORT) 1819 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1820 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1821 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1822 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1823 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1824 void *rss_key; 1825 void *outer; 1826 void *tirc; 1827 void *in; 1828 int err; 1829 1830 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1831 if (!in) 1832 return -ENOMEM; 1833 1834 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1835 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1836 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1837 1838 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1839 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1840 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1841 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1842 1843 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1844 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1845 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1846 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1847 1848 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1849 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1850 1851 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1852 kfree(in); 1853 if (err) 1854 return err; 1855 1856 mlx5_vdpa_add_tirn(ndev); 1857 return err; 1858 } 1859 1860 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1861 { 1862 mlx5_vdpa_remove_tirn(ndev); 1863 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1864 } 1865 1866 #define MAX_STEERING_ENT 0x8000 1867 #define MAX_STEERING_GROUPS 2 1868 1869 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1870 #define NUM_DESTS 2 1871 #else 1872 #define NUM_DESTS 1 1873 #endif 1874 1875 static int add_steering_counters(struct mlx5_vdpa_net *ndev, 1876 struct macvlan_node *node, 1877 struct mlx5_flow_act *flow_act, 1878 struct mlx5_flow_destination *dests) 1879 { 1880 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1881 int err; 1882 1883 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1884 if (IS_ERR(node->ucast_counter.counter)) 1885 return PTR_ERR(node->ucast_counter.counter); 1886 1887 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1888 if (IS_ERR(node->mcast_counter.counter)) { 1889 err = PTR_ERR(node->mcast_counter.counter); 1890 goto err_mcast_counter; 1891 } 1892 1893 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1894 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1895 return 0; 1896 1897 err_mcast_counter: 1898 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1899 return err; 1900 #else 1901 return 0; 1902 #endif 1903 } 1904 1905 static void remove_steering_counters(struct mlx5_vdpa_net *ndev, 1906 struct macvlan_node *node) 1907 { 1908 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1909 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); 1910 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1911 #endif 1912 } 1913 1914 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1915 struct macvlan_node *node) 1916 { 1917 struct mlx5_flow_destination dests[NUM_DESTS] = {}; 1918 struct mlx5_flow_act flow_act = {}; 1919 struct mlx5_flow_spec *spec; 1920 void *headers_c; 1921 void *headers_v; 1922 u8 *dmac_c; 1923 u8 *dmac_v; 1924 int err; 1925 u16 vid; 1926 1927 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1928 if (!spec) 1929 return -ENOMEM; 1930 1931 vid = key2vid(node->macvlan); 1932 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1933 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1934 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1935 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1936 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1937 eth_broadcast_addr(dmac_c); 1938 ether_addr_copy(dmac_v, mac); 1939 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { 1940 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1941 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1942 } 1943 if (node->tagged) { 1944 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1945 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); 1946 } 1947 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1948 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1949 dests[0].tir_num = ndev->res.tirn; 1950 err = add_steering_counters(ndev, node, &flow_act, dests); 1951 if (err) 1952 goto out_free; 1953 1954 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1955 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); 1956 #endif 1957 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1958 if (IS_ERR(node->ucast_rule)) { 1959 err = PTR_ERR(node->ucast_rule); 1960 goto err_ucast; 1961 } 1962 1963 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1964 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); 1965 #endif 1966 1967 memset(dmac_c, 0, ETH_ALEN); 1968 memset(dmac_v, 0, ETH_ALEN); 1969 dmac_c[0] = 1; 1970 dmac_v[0] = 1; 1971 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1972 if (IS_ERR(node->mcast_rule)) { 1973 err = PTR_ERR(node->mcast_rule); 1974 goto err_mcast; 1975 } 1976 kvfree(spec); 1977 mlx5_vdpa_add_rx_counters(ndev, node); 1978 return 0; 1979 1980 err_mcast: 1981 mlx5_del_flow_rules(node->ucast_rule); 1982 err_ucast: 1983 remove_steering_counters(ndev, node); 1984 out_free: 1985 kvfree(spec); 1986 return err; 1987 } 1988 1989 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1990 struct macvlan_node *node) 1991 { 1992 mlx5_vdpa_remove_rx_counters(ndev, node); 1993 mlx5_del_flow_rules(node->ucast_rule); 1994 mlx5_del_flow_rules(node->mcast_rule); 1995 } 1996 1997 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1998 { 1999 u64 val; 2000 2001 if (!tagged) 2002 vlan = MLX5V_UNTAGGED; 2003 2004 val = (u64)vlan << 48 | 2005 (u64)mac[0] << 40 | 2006 (u64)mac[1] << 32 | 2007 (u64)mac[2] << 24 | 2008 (u64)mac[3] << 16 | 2009 (u64)mac[4] << 8 | 2010 (u64)mac[5]; 2011 2012 return val; 2013 } 2014 2015 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 2016 { 2017 struct macvlan_node *pos; 2018 u32 idx; 2019 2020 idx = hash_64(value, 8); // tbd 8 2021 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 2022 if (pos->macvlan == value) 2023 return pos; 2024 } 2025 return NULL; 2026 } 2027 2028 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) 2029 { 2030 struct macvlan_node *ptr; 2031 u64 val; 2032 u32 idx; 2033 int err; 2034 2035 val = search_val(mac, vid, tagged); 2036 if (mac_vlan_lookup(ndev, val)) 2037 return -EEXIST; 2038 2039 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 2040 if (!ptr) 2041 return -ENOMEM; 2042 2043 ptr->tagged = tagged; 2044 ptr->macvlan = val; 2045 ptr->ndev = ndev; 2046 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); 2047 if (err) 2048 goto err_add; 2049 2050 idx = hash_64(val, 8); 2051 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 2052 return 0; 2053 2054 err_add: 2055 kfree(ptr); 2056 return err; 2057 } 2058 2059 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 2060 { 2061 struct macvlan_node *ptr; 2062 2063 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 2064 if (!ptr) 2065 return; 2066 2067 hlist_del(&ptr->hlist); 2068 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); 2069 remove_steering_counters(ndev, ptr); 2070 kfree(ptr); 2071 } 2072 2073 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 2074 { 2075 struct macvlan_node *pos; 2076 struct hlist_node *n; 2077 int i; 2078 2079 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 2080 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 2081 hlist_del(&pos->hlist); 2082 mlx5_vdpa_del_mac_vlan_rules(ndev, pos); 2083 remove_steering_counters(ndev, pos); 2084 kfree(pos); 2085 } 2086 } 2087 } 2088 2089 static int setup_steering(struct mlx5_vdpa_net *ndev) 2090 { 2091 struct mlx5_flow_table_attr ft_attr = {}; 2092 struct mlx5_flow_namespace *ns; 2093 int err; 2094 2095 ft_attr.max_fte = MAX_STEERING_ENT; 2096 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 2097 2098 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 2099 if (!ns) { 2100 mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n"); 2101 return -EOPNOTSUPP; 2102 } 2103 2104 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 2105 if (IS_ERR(ndev->rxft)) { 2106 mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n"); 2107 return PTR_ERR(ndev->rxft); 2108 } 2109 mlx5_vdpa_add_rx_flow_table(ndev); 2110 2111 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 2112 if (err) 2113 goto err_add; 2114 2115 return 0; 2116 2117 err_add: 2118 mlx5_vdpa_remove_rx_flow_table(ndev); 2119 mlx5_destroy_flow_table(ndev->rxft); 2120 return err; 2121 } 2122 2123 static void teardown_steering(struct mlx5_vdpa_net *ndev) 2124 { 2125 clear_mac_vlan_table(ndev); 2126 mlx5_vdpa_remove_rx_flow_table(ndev); 2127 mlx5_destroy_flow_table(ndev->rxft); 2128 } 2129 2130 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2131 { 2132 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2133 struct mlx5_control_vq *cvq = &mvdev->cvq; 2134 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2135 struct mlx5_core_dev *pfmdev; 2136 size_t read; 2137 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 2138 2139 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 2140 switch (cmd) { 2141 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 2142 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 2143 if (read != ETH_ALEN) 2144 break; 2145 2146 if (!memcmp(ndev->config.mac, mac, 6)) { 2147 status = VIRTIO_NET_OK; 2148 break; 2149 } 2150 2151 if (is_zero_ether_addr(mac)) 2152 break; 2153 2154 if (!is_zero_ether_addr(ndev->config.mac)) { 2155 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 2156 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 2157 ndev->config.mac); 2158 break; 2159 } 2160 } 2161 2162 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 2163 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 2164 mac); 2165 break; 2166 } 2167 2168 /* backup the original mac address so that if failed to add the forward rules 2169 * we could restore it 2170 */ 2171 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 2172 2173 memcpy(ndev->config.mac, mac, ETH_ALEN); 2174 2175 /* Need recreate the flow table entry, so that the packet could forward back 2176 */ 2177 mac_vlan_del(ndev, mac_back, 0, false); 2178 2179 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 2180 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 2181 2182 /* Although it hardly run here, we still need double check */ 2183 if (is_zero_ether_addr(mac_back)) { 2184 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 2185 break; 2186 } 2187 2188 /* Try to restore original mac address to MFPS table, and try to restore 2189 * the forward rule entry. 2190 */ 2191 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 2192 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 2193 ndev->config.mac); 2194 } 2195 2196 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 2197 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 2198 mac_back); 2199 } 2200 2201 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 2202 2203 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 2204 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 2205 2206 break; 2207 } 2208 2209 status = VIRTIO_NET_OK; 2210 break; 2211 2212 default: 2213 break; 2214 } 2215 2216 return status; 2217 } 2218 2219 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 2220 { 2221 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2222 int cur_vqs = ndev->cur_num_vqs; 2223 int new_vqs = newqps * 2; 2224 int err; 2225 int i; 2226 2227 if (cur_vqs > new_vqs) { 2228 err = modify_rqt(ndev, new_vqs); 2229 if (err) 2230 return err; 2231 2232 if (is_resumable(ndev)) { 2233 suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs); 2234 } else { 2235 for (i = new_vqs; i < cur_vqs; i++) 2236 teardown_vq(ndev, &ndev->vqs[i]); 2237 } 2238 2239 ndev->cur_num_vqs = new_vqs; 2240 } else { 2241 ndev->cur_num_vqs = new_vqs; 2242 2243 for (i = cur_vqs; i < new_vqs; i++) { 2244 err = setup_vq(ndev, &ndev->vqs[i], false); 2245 if (err) 2246 goto clean_added; 2247 } 2248 2249 err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs); 2250 if (err) 2251 goto clean_added; 2252 2253 err = modify_rqt(ndev, new_vqs); 2254 if (err) 2255 goto clean_added; 2256 } 2257 return 0; 2258 2259 clean_added: 2260 for (--i; i >= cur_vqs; --i) 2261 teardown_vq(ndev, &ndev->vqs[i]); 2262 2263 ndev->cur_num_vqs = cur_vqs; 2264 2265 return err; 2266 } 2267 2268 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2269 { 2270 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2271 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2272 struct mlx5_control_vq *cvq = &mvdev->cvq; 2273 struct virtio_net_ctrl_mq mq; 2274 size_t read; 2275 u16 newqps; 2276 2277 switch (cmd) { 2278 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 2279 /* This mq feature check aligns with pre-existing userspace 2280 * implementation. 2281 * 2282 * Without it, an untrusted driver could fake a multiqueue config 2283 * request down to a non-mq device that may cause kernel to 2284 * panic due to uninitialized resources for extra vqs. Even with 2285 * a well behaving guest driver, it is not expected to allow 2286 * changing the number of vqs on a non-mq device. 2287 */ 2288 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 2289 break; 2290 2291 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 2292 if (read != sizeof(mq)) 2293 break; 2294 2295 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 2296 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 2297 newqps > ndev->rqt_size) 2298 break; 2299 2300 if (ndev->cur_num_vqs == 2 * newqps) { 2301 status = VIRTIO_NET_OK; 2302 break; 2303 } 2304 2305 if (!change_num_qps(mvdev, newqps)) 2306 status = VIRTIO_NET_OK; 2307 2308 break; 2309 default: 2310 break; 2311 } 2312 2313 return status; 2314 } 2315 2316 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 2317 { 2318 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2319 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2320 struct mlx5_control_vq *cvq = &mvdev->cvq; 2321 __virtio16 vlan; 2322 size_t read; 2323 u16 id; 2324 2325 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) 2326 return status; 2327 2328 switch (cmd) { 2329 case VIRTIO_NET_CTRL_VLAN_ADD: 2330 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 2331 if (read != sizeof(vlan)) 2332 break; 2333 2334 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2335 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 2336 break; 2337 2338 status = VIRTIO_NET_OK; 2339 break; 2340 case VIRTIO_NET_CTRL_VLAN_DEL: 2341 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 2342 if (read != sizeof(vlan)) 2343 break; 2344 2345 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2346 mac_vlan_del(ndev, ndev->config.mac, id, true); 2347 status = VIRTIO_NET_OK; 2348 break; 2349 default: 2350 break; 2351 } 2352 2353 return status; 2354 } 2355 2356 static void mlx5_cvq_kick_handler(struct work_struct *work) 2357 { 2358 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2359 struct virtio_net_ctrl_hdr ctrl; 2360 struct mlx5_vdpa_wq_ent *wqent; 2361 struct mlx5_vdpa_dev *mvdev; 2362 struct mlx5_control_vq *cvq; 2363 struct mlx5_vdpa_net *ndev; 2364 size_t read, write; 2365 int err; 2366 2367 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2368 mvdev = wqent->mvdev; 2369 ndev = to_mlx5_vdpa_ndev(mvdev); 2370 cvq = &mvdev->cvq; 2371 2372 down_write(&ndev->reslock); 2373 2374 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2375 goto out; 2376 2377 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 2378 goto out; 2379 2380 if (!cvq->ready) 2381 goto out; 2382 2383 while (true) { 2384 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 2385 GFP_ATOMIC); 2386 if (err <= 0) 2387 break; 2388 2389 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 2390 if (read != sizeof(ctrl)) 2391 break; 2392 2393 cvq->received_desc++; 2394 switch (ctrl.class) { 2395 case VIRTIO_NET_CTRL_MAC: 2396 status = handle_ctrl_mac(mvdev, ctrl.cmd); 2397 break; 2398 case VIRTIO_NET_CTRL_MQ: 2399 status = handle_ctrl_mq(mvdev, ctrl.cmd); 2400 break; 2401 case VIRTIO_NET_CTRL_VLAN: 2402 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 2403 break; 2404 default: 2405 break; 2406 } 2407 2408 /* Make sure data is written before advancing index */ 2409 smp_wmb(); 2410 2411 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 2412 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 2413 vringh_kiov_cleanup(&cvq->riov); 2414 vringh_kiov_cleanup(&cvq->wiov); 2415 2416 if (vringh_need_notify_iotlb(&cvq->vring)) 2417 vringh_notify(&cvq->vring); 2418 2419 cvq->completed_desc++; 2420 queue_work(mvdev->wq, &wqent->work); 2421 break; 2422 } 2423 2424 out: 2425 up_write(&ndev->reslock); 2426 } 2427 2428 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 2429 { 2430 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2431 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2432 struct mlx5_vdpa_virtqueue *mvq; 2433 2434 if (!is_index_valid(mvdev, idx)) 2435 return; 2436 2437 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 2438 if (!mvdev->wq || !mvdev->cvq.ready) 2439 return; 2440 2441 queue_work(mvdev->wq, &ndev->cvq_ent.work); 2442 return; 2443 } 2444 2445 mvq = &ndev->vqs[idx]; 2446 if (unlikely(!mvq->ready)) 2447 return; 2448 2449 iowrite16(idx, ndev->mvdev.res.kick_addr); 2450 } 2451 2452 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 2453 u64 driver_area, u64 device_area) 2454 { 2455 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2456 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2457 struct mlx5_vdpa_virtqueue *mvq; 2458 2459 if (!is_index_valid(mvdev, idx)) 2460 return -EINVAL; 2461 2462 if (is_ctrl_vq_idx(mvdev, idx)) { 2463 mvdev->cvq.desc_addr = desc_area; 2464 mvdev->cvq.device_addr = device_area; 2465 mvdev->cvq.driver_addr = driver_area; 2466 return 0; 2467 } 2468 2469 mvq = &ndev->vqs[idx]; 2470 mvq->desc_addr = desc_area; 2471 mvq->device_addr = device_area; 2472 mvq->driver_addr = driver_area; 2473 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS; 2474 return 0; 2475 } 2476 2477 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 2478 { 2479 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2480 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2481 struct mlx5_vdpa_virtqueue *mvq; 2482 2483 if (!is_index_valid(mvdev, idx)) 2484 return; 2485 2486 if (is_ctrl_vq_idx(mvdev, idx)) { 2487 struct mlx5_control_vq *cvq = &mvdev->cvq; 2488 2489 cvq->vring.vring.num = num; 2490 return; 2491 } 2492 2493 mvq = &ndev->vqs[idx]; 2494 ndev->needs_teardown = num != mvq->num_ent; 2495 mvq->num_ent = num; 2496 } 2497 2498 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 2499 { 2500 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2501 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2502 2503 ndev->event_cbs[idx] = *cb; 2504 if (is_ctrl_vq_idx(mvdev, idx)) 2505 mvdev->cvq.event_cb = *cb; 2506 } 2507 2508 static void mlx5_cvq_notify(struct vringh *vring) 2509 { 2510 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 2511 2512 if (!cvq->event_cb.callback) 2513 return; 2514 2515 cvq->event_cb.callback(cvq->event_cb.private); 2516 } 2517 2518 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 2519 { 2520 struct mlx5_control_vq *cvq = &mvdev->cvq; 2521 2522 cvq->ready = ready; 2523 if (!ready) 2524 return; 2525 2526 cvq->vring.notify = mlx5_cvq_notify; 2527 } 2528 2529 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 2530 { 2531 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2532 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2533 struct mlx5_vdpa_virtqueue *mvq; 2534 2535 if (!mvdev->actual_features) 2536 return; 2537 2538 if (!is_index_valid(mvdev, idx)) 2539 return; 2540 2541 if (is_ctrl_vq_idx(mvdev, idx)) { 2542 set_cvq_ready(mvdev, ready); 2543 return; 2544 } 2545 2546 mvq = &ndev->vqs[idx]; 2547 if (!ready) { 2548 suspend_vq(ndev, mvq); 2549 } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) { 2550 if (resume_vq(ndev, mvq)) 2551 ready = false; 2552 } 2553 2554 mvq->ready = ready; 2555 } 2556 2557 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2558 { 2559 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2560 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2561 2562 if (!is_index_valid(mvdev, idx)) 2563 return false; 2564 2565 if (is_ctrl_vq_idx(mvdev, idx)) 2566 return mvdev->cvq.ready; 2567 2568 return ndev->vqs[idx].ready; 2569 } 2570 2571 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2572 const struct vdpa_vq_state *state) 2573 { 2574 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2575 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2576 struct mlx5_vdpa_virtqueue *mvq; 2577 2578 if (!is_index_valid(mvdev, idx)) 2579 return -EINVAL; 2580 2581 if (is_ctrl_vq_idx(mvdev, idx)) { 2582 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2583 return 0; 2584 } 2585 2586 mvq = &ndev->vqs[idx]; 2587 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2588 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2589 return -EINVAL; 2590 } 2591 2592 mvq->used_idx = state->split.avail_index; 2593 mvq->avail_idx = state->split.avail_index; 2594 mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | 2595 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX; 2596 return 0; 2597 } 2598 2599 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2600 { 2601 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2602 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2603 struct mlx5_vdpa_virtqueue *mvq; 2604 struct mlx5_virtq_attr attr; 2605 int err; 2606 2607 if (!is_index_valid(mvdev, idx)) 2608 return -EINVAL; 2609 2610 if (is_ctrl_vq_idx(mvdev, idx)) { 2611 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2612 return 0; 2613 } 2614 2615 mvq = &ndev->vqs[idx]; 2616 /* If the virtq object was destroyed, use the value saved at 2617 * the last minute of suspend_vq. This caters for userspace 2618 * that cares about emulating the index after vq is stopped. 2619 */ 2620 if (!mvq->initialized) { 2621 /* Firmware returns a wrong value for the available index. 2622 * Since both values should be identical, we take the value of 2623 * used_idx which is reported correctly. 2624 */ 2625 state->split.avail_index = mvq->used_idx; 2626 return 0; 2627 } 2628 2629 err = query_virtqueues(ndev, mvq->index, 1, &attr); 2630 if (err) { 2631 mlx5_vdpa_err(mvdev, "failed to query virtqueue\n"); 2632 return err; 2633 } 2634 state->split.avail_index = attr.used_index; 2635 return 0; 2636 } 2637 2638 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2639 { 2640 return PAGE_SIZE; 2641 } 2642 2643 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) 2644 { 2645 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2646 2647 if (is_ctrl_vq_idx(mvdev, idx)) 2648 return MLX5_VDPA_CVQ_GROUP; 2649 2650 return MLX5_VDPA_DATAVQ_GROUP; 2651 } 2652 2653 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx) 2654 { 2655 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2656 2657 if (is_ctrl_vq_idx(mvdev, idx)) 2658 return MLX5_VDPA_CVQ_GROUP; 2659 2660 return MLX5_VDPA_DATAVQ_DESC_GROUP; 2661 } 2662 2663 static u64 mlx_to_vritio_features(u16 dev_features) 2664 { 2665 u64 result = 0; 2666 2667 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2668 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2669 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2670 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2671 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2672 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2673 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2674 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2675 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2676 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2677 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2678 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2679 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2680 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2681 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2682 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2683 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2684 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2685 2686 return result; 2687 } 2688 2689 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2690 { 2691 u64 mlx_vdpa_features = 0; 2692 u16 dev_features; 2693 2694 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2695 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2696 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2697 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2698 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2699 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2700 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2701 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2702 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2703 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2704 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2705 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); 2706 2707 return mlx_vdpa_features; 2708 } 2709 2710 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2711 { 2712 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2713 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2714 2715 print_features(mvdev, ndev->mvdev.mlx_features, false); 2716 return ndev->mvdev.mlx_features; 2717 } 2718 2719 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2720 { 2721 /* Minimum features to expect */ 2722 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2723 return -EOPNOTSUPP; 2724 2725 /* Double check features combination sent down by the driver. 2726 * Fail invalid features due to absence of the depended feature. 2727 * 2728 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2729 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2730 * By failing the invalid features sent down by untrusted drivers, 2731 * we're assured the assumption made upon is_index_valid() and 2732 * is_ctrl_vq_idx() will not be compromised. 2733 */ 2734 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2735 BIT_ULL(VIRTIO_NET_F_MQ)) 2736 return -EINVAL; 2737 2738 return 0; 2739 } 2740 2741 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev, bool filled) 2742 { 2743 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2744 int err; 2745 int i; 2746 2747 for (i = 0; i < mvdev->max_vqs; i++) { 2748 err = setup_vq(ndev, &ndev->vqs[i], filled); 2749 if (err) 2750 goto err_vq; 2751 } 2752 2753 return 0; 2754 2755 err_vq: 2756 for (--i; i >= 0; i--) 2757 teardown_vq(ndev, &ndev->vqs[i]); 2758 2759 return err; 2760 } 2761 2762 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2763 { 2764 int i; 2765 2766 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) 2767 teardown_vq(ndev, &ndev->vqs[i]); 2768 } 2769 2770 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2771 { 2772 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2773 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2774 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2775 mvdev->max_idx = mvdev->max_vqs; 2776 } else { 2777 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2778 * CVQ gets index 2 2779 */ 2780 mvdev->max_idx = 2; 2781 } 2782 } else { 2783 /* Two data virtqueues only: one for rx and one for tx */ 2784 mvdev->max_idx = 1; 2785 } 2786 } 2787 2788 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2789 { 2790 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2791 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2792 int err; 2793 2794 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2795 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2796 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2797 if (vport) 2798 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2799 2800 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2801 if (err) 2802 return 0; 2803 2804 return MLX5_GET(query_vport_state_out, out, state); 2805 } 2806 2807 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2808 { 2809 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2810 VPORT_STATE_UP) 2811 return true; 2812 2813 return false; 2814 } 2815 2816 static void update_carrier(struct work_struct *work) 2817 { 2818 struct mlx5_vdpa_wq_ent *wqent; 2819 struct mlx5_vdpa_dev *mvdev; 2820 struct mlx5_vdpa_net *ndev; 2821 2822 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2823 mvdev = wqent->mvdev; 2824 ndev = to_mlx5_vdpa_ndev(mvdev); 2825 if (get_link_state(mvdev)) 2826 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2827 else 2828 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2829 2830 if (ndev->config_cb.callback) 2831 ndev->config_cb.callback(ndev->config_cb.private); 2832 2833 kfree(wqent); 2834 } 2835 2836 static int queue_link_work(struct mlx5_vdpa_net *ndev) 2837 { 2838 struct mlx5_vdpa_wq_ent *wqent; 2839 2840 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2841 if (!wqent) 2842 return -ENOMEM; 2843 2844 wqent->mvdev = &ndev->mvdev; 2845 INIT_WORK(&wqent->work, update_carrier); 2846 queue_work(ndev->mvdev.wq, &wqent->work); 2847 return 0; 2848 } 2849 2850 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2851 { 2852 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2853 struct mlx5_eqe *eqe = param; 2854 int ret = NOTIFY_DONE; 2855 2856 if (ndev->mvdev.suspended) 2857 return NOTIFY_DONE; 2858 2859 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2860 switch (eqe->sub_type) { 2861 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2862 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2863 if (queue_link_work(ndev)) 2864 return NOTIFY_DONE; 2865 2866 ret = NOTIFY_OK; 2867 break; 2868 default: 2869 return NOTIFY_DONE; 2870 } 2871 return ret; 2872 } 2873 return ret; 2874 } 2875 2876 static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2877 { 2878 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2879 return; 2880 2881 ndev->nb.notifier_call = event_handler; 2882 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2883 ndev->nb_registered = true; 2884 queue_link_work(ndev); 2885 } 2886 2887 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2888 { 2889 if (!ndev->nb_registered) 2890 return; 2891 2892 ndev->nb_registered = false; 2893 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2894 if (ndev->mvdev.wq) 2895 flush_workqueue(ndev->mvdev.wq); 2896 } 2897 2898 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa) 2899 { 2900 return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); 2901 } 2902 2903 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2904 { 2905 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2906 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2907 u64 old_features = mvdev->actual_features; 2908 u64 diff_features; 2909 int err; 2910 2911 print_features(mvdev, features, true); 2912 2913 err = verify_driver_features(mvdev, features); 2914 if (err) 2915 return err; 2916 2917 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2918 2919 /* Interested in changes of vq features only. */ 2920 if (get_features(old_features) != get_features(mvdev->actual_features)) { 2921 for (int i = 0; i < mvdev->max_vqs; ++i) { 2922 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[i]; 2923 2924 mvq->modified_fields |= ( 2925 MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION | 2926 MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES 2927 ); 2928 } 2929 } 2930 2931 /* When below features diverge from initial device features, VQs need a full teardown. */ 2932 #define NEEDS_TEARDOWN_MASK (BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | \ 2933 BIT_ULL(VIRTIO_NET_F_CSUM) | \ 2934 BIT_ULL(VIRTIO_F_VERSION_1)) 2935 2936 diff_features = mvdev->mlx_features ^ mvdev->actual_features; 2937 ndev->needs_teardown = !!(diff_features & NEEDS_TEARDOWN_MASK); 2938 2939 update_cvq_info(mvdev); 2940 return err; 2941 } 2942 2943 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2944 { 2945 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2946 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2947 2948 ndev->config_cb = *cb; 2949 } 2950 2951 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2952 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2953 { 2954 return MLX5_VDPA_MAX_VQ_ENTRIES; 2955 } 2956 2957 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2958 { 2959 return VIRTIO_ID_NET; 2960 } 2961 2962 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2963 { 2964 return PCI_VENDOR_ID_MELLANOX; 2965 } 2966 2967 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2968 { 2969 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2970 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2971 2972 print_status(mvdev, ndev->mvdev.status, false); 2973 return ndev->mvdev.status; 2974 } 2975 2976 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2977 { 2978 struct mlx5_vq_restore_info *ri = &mvq->ri; 2979 struct mlx5_virtq_attr attr = {}; 2980 int err; 2981 2982 if (mvq->initialized) { 2983 err = query_virtqueues(ndev, mvq->index, 1, &attr); 2984 if (err) 2985 return err; 2986 } 2987 2988 ri->avail_index = attr.available_index; 2989 ri->used_index = attr.used_index; 2990 ri->ready = mvq->ready; 2991 ri->num_ent = mvq->num_ent; 2992 ri->desc_addr = mvq->desc_addr; 2993 ri->device_addr = mvq->device_addr; 2994 ri->driver_addr = mvq->driver_addr; 2995 ri->map = mvq->map; 2996 ri->restore = true; 2997 return 0; 2998 } 2999 3000 static int save_channels_info(struct mlx5_vdpa_net *ndev) 3001 { 3002 int i; 3003 3004 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3005 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 3006 save_channel_info(ndev, &ndev->vqs[i]); 3007 } 3008 return 0; 3009 } 3010 3011 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 3012 { 3013 int i; 3014 3015 for (i = 0; i < ndev->mvdev.max_vqs; i++) 3016 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3017 } 3018 3019 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 3020 { 3021 struct mlx5_vdpa_virtqueue *mvq; 3022 struct mlx5_vq_restore_info *ri; 3023 int i; 3024 3025 mlx5_clear_vqs(ndev); 3026 mvqs_set_defaults(ndev); 3027 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3028 mvq = &ndev->vqs[i]; 3029 ri = &mvq->ri; 3030 if (!ri->restore) 3031 continue; 3032 3033 mvq->avail_idx = ri->avail_index; 3034 mvq->used_idx = ri->used_index; 3035 mvq->ready = ri->ready; 3036 mvq->num_ent = ri->num_ent; 3037 mvq->desc_addr = ri->desc_addr; 3038 mvq->device_addr = ri->device_addr; 3039 mvq->driver_addr = ri->driver_addr; 3040 mvq->map = ri->map; 3041 } 3042 } 3043 3044 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, 3045 struct mlx5_vdpa_mr *new_mr, 3046 unsigned int asid) 3047 { 3048 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3049 bool teardown = !is_resumable(ndev); 3050 int err; 3051 3052 suspend_vqs(ndev, 0, ndev->cur_num_vqs); 3053 if (teardown) { 3054 err = save_channels_info(ndev); 3055 if (err) 3056 return err; 3057 3058 teardown_vq_resources(ndev); 3059 } 3060 3061 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 3062 3063 for (int i = 0; i < mvdev->max_vqs; i++) 3064 ndev->vqs[i].modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY | 3065 MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY; 3066 3067 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 3068 return 0; 3069 3070 if (teardown) { 3071 restore_channels_info(ndev); 3072 err = setup_vq_resources(ndev, true); 3073 if (err) 3074 return err; 3075 } 3076 3077 resume_vqs(ndev, 0, ndev->cur_num_vqs); 3078 3079 return 0; 3080 } 3081 3082 /* reslock must be held for this function */ 3083 static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled) 3084 { 3085 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 3086 int err; 3087 3088 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 3089 3090 if (ndev->setup) { 3091 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 3092 err = 0; 3093 goto out; 3094 } 3095 mlx5_vdpa_add_debugfs(ndev); 3096 3097 err = read_umem_params(ndev); 3098 if (err) 3099 goto err_setup; 3100 3101 err = setup_virtqueues(mvdev, filled); 3102 if (err) { 3103 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 3104 goto err_setup; 3105 } 3106 3107 err = create_rqt(ndev); 3108 if (err) { 3109 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 3110 goto err_rqt; 3111 } 3112 3113 err = create_tir(ndev); 3114 if (err) { 3115 mlx5_vdpa_warn(mvdev, "create_tir\n"); 3116 goto err_tir; 3117 } 3118 3119 err = setup_steering(ndev); 3120 if (err) { 3121 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 3122 goto err_fwd; 3123 } 3124 ndev->setup = true; 3125 3126 return 0; 3127 3128 err_fwd: 3129 destroy_tir(ndev); 3130 err_tir: 3131 destroy_rqt(ndev); 3132 err_rqt: 3133 teardown_virtqueues(ndev); 3134 err_setup: 3135 mlx5_vdpa_remove_debugfs(ndev); 3136 out: 3137 return err; 3138 } 3139 3140 /* reslock must be held for this function */ 3141 static void teardown_vq_resources(struct mlx5_vdpa_net *ndev) 3142 { 3143 3144 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 3145 3146 if (!ndev->setup) 3147 return; 3148 3149 mlx5_vdpa_remove_debugfs(ndev); 3150 teardown_steering(ndev); 3151 destroy_tir(ndev); 3152 destroy_rqt(ndev); 3153 teardown_virtqueues(ndev); 3154 ndev->setup = false; 3155 ndev->needs_teardown = false; 3156 } 3157 3158 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) 3159 { 3160 struct mlx5_control_vq *cvq = &mvdev->cvq; 3161 int err = 0; 3162 3163 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { 3164 u16 idx = cvq->vring.last_avail_idx; 3165 3166 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 3167 cvq->vring.vring.num, false, 3168 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 3169 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 3170 (struct vring_used *)(uintptr_t)cvq->device_addr); 3171 3172 if (!err) 3173 cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; 3174 } 3175 return err; 3176 } 3177 3178 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 3179 { 3180 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3181 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3182 int err; 3183 3184 print_status(mvdev, status, true); 3185 3186 down_write(&ndev->reslock); 3187 3188 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 3189 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 3190 err = setup_cvq_vring(mvdev); 3191 if (err) { 3192 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 3193 goto err_setup; 3194 } 3195 register_link_notifier(ndev); 3196 3197 if (ndev->needs_teardown) 3198 teardown_vq_resources(ndev); 3199 3200 if (ndev->setup) { 3201 err = resume_vqs(ndev, 0, ndev->cur_num_vqs); 3202 if (err) { 3203 mlx5_vdpa_warn(mvdev, "failed to resume VQs\n"); 3204 goto err_driver; 3205 } 3206 } else { 3207 err = setup_vq_resources(ndev, true); 3208 if (err) { 3209 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 3210 goto err_driver; 3211 } 3212 } 3213 } else { 3214 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 3215 goto err_clear; 3216 } 3217 } 3218 3219 ndev->mvdev.status = status; 3220 up_write(&ndev->reslock); 3221 return; 3222 3223 err_driver: 3224 unregister_link_notifier(ndev); 3225 err_setup: 3226 mlx5_vdpa_clean_mrs(&ndev->mvdev); 3227 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 3228 err_clear: 3229 up_write(&ndev->reslock); 3230 } 3231 3232 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) 3233 { 3234 int i; 3235 3236 /* default mapping all groups are mapped to asid 0 */ 3237 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) 3238 mvdev->mres.group2asid[i] = 0; 3239 } 3240 3241 static bool needs_vqs_reset(const struct mlx5_vdpa_dev *mvdev) 3242 { 3243 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3244 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[0]; 3245 3246 if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) 3247 return true; 3248 3249 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT) 3250 return true; 3251 3252 return mvq->modified_fields & ( 3253 MLX5_VIRTQ_MODIFY_MASK_STATE | 3254 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS | 3255 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX | 3256 MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX 3257 ); 3258 } 3259 3260 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) 3261 { 3262 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3263 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3264 bool vq_reset; 3265 3266 print_status(mvdev, 0, true); 3267 mlx5_vdpa_info(mvdev, "performing device reset\n"); 3268 3269 down_write(&ndev->reslock); 3270 unregister_link_notifier(ndev); 3271 vq_reset = needs_vqs_reset(mvdev); 3272 if (vq_reset) { 3273 teardown_vq_resources(ndev); 3274 mvqs_set_defaults(ndev); 3275 } 3276 3277 if (flags & VDPA_RESET_F_CLEAN_MAP) 3278 mlx5_vdpa_clean_mrs(&ndev->mvdev); 3279 ndev->mvdev.status = 0; 3280 ndev->mvdev.suspended = false; 3281 ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; 3282 ndev->mvdev.cvq.ready = false; 3283 ndev->mvdev.cvq.received_desc = 0; 3284 ndev->mvdev.cvq.completed_desc = 0; 3285 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 3286 ndev->mvdev.actual_features = 0; 3287 init_group_to_asid_map(mvdev); 3288 ++mvdev->generation; 3289 3290 if ((flags & VDPA_RESET_F_CLEAN_MAP) && 3291 MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3292 if (mlx5_vdpa_create_dma_mr(mvdev)) 3293 mlx5_vdpa_err(mvdev, "create MR failed\n"); 3294 } 3295 if (vq_reset) 3296 setup_vq_resources(ndev, false); 3297 up_write(&ndev->reslock); 3298 3299 return 0; 3300 } 3301 3302 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 3303 { 3304 return mlx5_vdpa_compat_reset(vdev, 0); 3305 } 3306 3307 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 3308 { 3309 return sizeof(struct virtio_net_config); 3310 } 3311 3312 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 3313 unsigned int len) 3314 { 3315 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3316 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3317 3318 if (offset + len <= sizeof(struct virtio_net_config)) 3319 memcpy(buf, (u8 *)&ndev->config + offset, len); 3320 } 3321 3322 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 3323 unsigned int len) 3324 { 3325 /* not supported */ 3326 } 3327 3328 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 3329 { 3330 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3331 3332 return mvdev->generation; 3333 } 3334 3335 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 3336 unsigned int asid) 3337 { 3338 struct mlx5_vdpa_mr *new_mr; 3339 int err; 3340 3341 if (asid >= MLX5_VDPA_NUM_AS) 3342 return -EINVAL; 3343 3344 if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) { 3345 new_mr = mlx5_vdpa_create_mr(mvdev, iotlb); 3346 if (IS_ERR(new_mr)) { 3347 err = PTR_ERR(new_mr); 3348 mlx5_vdpa_err(mvdev, "create map failed(%d)\n", err); 3349 return err; 3350 } 3351 } else { 3352 /* Empty iotlbs don't have an mr but will clear the previous mr. */ 3353 new_mr = NULL; 3354 } 3355 3356 if (!mvdev->mres.mr[asid]) { 3357 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 3358 } else { 3359 err = mlx5_vdpa_change_map(mvdev, new_mr, asid); 3360 if (err) { 3361 mlx5_vdpa_err(mvdev, "change map failed(%d)\n", err); 3362 goto out_err; 3363 } 3364 } 3365 3366 return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); 3367 3368 out_err: 3369 mlx5_vdpa_put_mr(mvdev, new_mr); 3370 return err; 3371 } 3372 3373 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 3374 struct vhost_iotlb *iotlb) 3375 { 3376 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3377 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3378 int err = -EINVAL; 3379 3380 down_write(&ndev->reslock); 3381 err = set_map_data(mvdev, iotlb, asid); 3382 up_write(&ndev->reslock); 3383 return err; 3384 } 3385 3386 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) 3387 { 3388 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3389 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3390 int err; 3391 3392 down_write(&ndev->reslock); 3393 err = mlx5_vdpa_reset_mr(mvdev, asid); 3394 up_write(&ndev->reslock); 3395 return err; 3396 } 3397 3398 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) 3399 { 3400 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3401 3402 if (is_ctrl_vq_idx(mvdev, idx)) 3403 return &vdev->dev; 3404 3405 return mvdev->vdev.dma_dev; 3406 } 3407 3408 static void free_irqs(struct mlx5_vdpa_net *ndev) 3409 { 3410 struct mlx5_vdpa_irq_pool_entry *ent; 3411 int i; 3412 3413 if (!msix_mode_supported(&ndev->mvdev)) 3414 return; 3415 3416 if (!ndev->irqp.entries) 3417 return; 3418 3419 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { 3420 ent = ndev->irqp.entries + i; 3421 if (ent->map.virq) 3422 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); 3423 } 3424 kfree(ndev->irqp.entries); 3425 } 3426 3427 static void mlx5_vdpa_free(struct vdpa_device *vdev) 3428 { 3429 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3430 struct mlx5_core_dev *pfmdev; 3431 struct mlx5_vdpa_net *ndev; 3432 3433 ndev = to_mlx5_vdpa_ndev(mvdev); 3434 3435 free_fixed_resources(ndev); 3436 mlx5_vdpa_clean_mrs(mvdev); 3437 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); 3438 mlx5_cmd_cleanup_async_ctx(&mvdev->async_ctx); 3439 3440 if (!is_zero_ether_addr(ndev->config.mac)) { 3441 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 3442 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 3443 } 3444 mlx5_vdpa_free_resources(&ndev->mvdev); 3445 free_irqs(ndev); 3446 kfree(ndev->event_cbs); 3447 kfree(ndev->vqs); 3448 } 3449 3450 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 3451 { 3452 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3453 struct vdpa_notification_area ret = {}; 3454 struct mlx5_vdpa_net *ndev; 3455 phys_addr_t addr; 3456 3457 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 3458 return ret; 3459 3460 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 3461 * notification to avoid the risk of mapping pages that contain BAR of more 3462 * than one SF 3463 */ 3464 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 3465 return ret; 3466 3467 ndev = to_mlx5_vdpa_ndev(mvdev); 3468 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 3469 ret.addr = addr; 3470 ret.size = PAGE_SIZE; 3471 return ret; 3472 } 3473 3474 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) 3475 { 3476 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3477 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3478 struct mlx5_vdpa_virtqueue *mvq; 3479 3480 if (!is_index_valid(mvdev, idx)) 3481 return -EINVAL; 3482 3483 if (is_ctrl_vq_idx(mvdev, idx)) 3484 return -EOPNOTSUPP; 3485 3486 mvq = &ndev->vqs[idx]; 3487 if (!mvq->map.virq) 3488 return -EOPNOTSUPP; 3489 3490 return mvq->map.virq; 3491 } 3492 3493 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 3494 { 3495 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3496 3497 return mvdev->actual_features; 3498 } 3499 3500 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 3501 u64 *received_desc, u64 *completed_desc) 3502 { 3503 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 3504 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 3505 void *cmd_hdr; 3506 void *ctx; 3507 int err; 3508 3509 if (!counters_supported(&ndev->mvdev)) 3510 return -EOPNOTSUPP; 3511 3512 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 3513 return -EAGAIN; 3514 3515 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 3516 3517 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 3518 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 3519 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 3520 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 3521 3522 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 3523 if (err) 3524 return err; 3525 3526 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 3527 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 3528 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 3529 return 0; 3530 } 3531 3532 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 3533 struct sk_buff *msg, 3534 struct netlink_ext_ack *extack) 3535 { 3536 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3537 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3538 struct mlx5_vdpa_virtqueue *mvq; 3539 struct mlx5_control_vq *cvq; 3540 u64 received_desc; 3541 u64 completed_desc; 3542 int err = 0; 3543 3544 down_read(&ndev->reslock); 3545 if (!is_index_valid(mvdev, idx)) { 3546 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 3547 err = -EINVAL; 3548 goto out_err; 3549 } 3550 3551 if (idx == ctrl_vq_idx(mvdev)) { 3552 cvq = &mvdev->cvq; 3553 received_desc = cvq->received_desc; 3554 completed_desc = cvq->completed_desc; 3555 goto out; 3556 } 3557 3558 mvq = &ndev->vqs[idx]; 3559 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 3560 if (err) { 3561 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 3562 goto out_err; 3563 } 3564 3565 out: 3566 err = -EMSGSIZE; 3567 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 3568 goto out_err; 3569 3570 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 3571 VDPA_ATTR_PAD)) 3572 goto out_err; 3573 3574 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 3575 goto out_err; 3576 3577 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 3578 VDPA_ATTR_PAD)) 3579 goto out_err; 3580 3581 err = 0; 3582 out_err: 3583 up_read(&ndev->reslock); 3584 return err; 3585 } 3586 3587 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) 3588 { 3589 struct mlx5_control_vq *cvq; 3590 3591 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 3592 return; 3593 3594 cvq = &mvdev->cvq; 3595 cvq->ready = false; 3596 } 3597 3598 static int mlx5_vdpa_suspend(struct vdpa_device *vdev) 3599 { 3600 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3601 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3602 int err; 3603 3604 mlx5_vdpa_info(mvdev, "suspending device\n"); 3605 3606 down_write(&ndev->reslock); 3607 err = suspend_vqs(ndev, 0, ndev->cur_num_vqs); 3608 mlx5_vdpa_cvq_suspend(mvdev); 3609 mvdev->suspended = true; 3610 up_write(&ndev->reslock); 3611 3612 return err; 3613 } 3614 3615 static int mlx5_vdpa_resume(struct vdpa_device *vdev) 3616 { 3617 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3618 struct mlx5_vdpa_net *ndev; 3619 int err; 3620 3621 ndev = to_mlx5_vdpa_ndev(mvdev); 3622 3623 mlx5_vdpa_info(mvdev, "resuming device\n"); 3624 3625 down_write(&ndev->reslock); 3626 mvdev->suspended = false; 3627 err = resume_vqs(ndev, 0, ndev->cur_num_vqs); 3628 queue_link_work(ndev); 3629 up_write(&ndev->reslock); 3630 3631 return err; 3632 } 3633 3634 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3635 unsigned int asid) 3636 { 3637 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3638 int err = 0; 3639 3640 if (group >= MLX5_VDPA_NUMVQ_GROUPS) 3641 return -EINVAL; 3642 3643 mvdev->mres.group2asid[group] = asid; 3644 3645 mutex_lock(&mvdev->mres.lock); 3646 if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mres.mr[asid]) 3647 err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mres.mr[asid]->iotlb, asid); 3648 mutex_unlock(&mvdev->mres.lock); 3649 3650 return err; 3651 } 3652 3653 static const struct vdpa_config_ops mlx5_vdpa_ops = { 3654 .set_vq_address = mlx5_vdpa_set_vq_address, 3655 .set_vq_num = mlx5_vdpa_set_vq_num, 3656 .kick_vq = mlx5_vdpa_kick_vq, 3657 .set_vq_cb = mlx5_vdpa_set_vq_cb, 3658 .set_vq_ready = mlx5_vdpa_set_vq_ready, 3659 .get_vq_ready = mlx5_vdpa_get_vq_ready, 3660 .set_vq_state = mlx5_vdpa_set_vq_state, 3661 .get_vq_state = mlx5_vdpa_get_vq_state, 3662 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 3663 .get_vq_notification = mlx5_get_vq_notification, 3664 .get_vq_irq = mlx5_get_vq_irq, 3665 .get_vq_align = mlx5_vdpa_get_vq_align, 3666 .get_vq_group = mlx5_vdpa_get_vq_group, 3667 .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */ 3668 .get_device_features = mlx5_vdpa_get_device_features, 3669 .get_backend_features = mlx5_vdpa_get_backend_features, 3670 .set_driver_features = mlx5_vdpa_set_driver_features, 3671 .get_driver_features = mlx5_vdpa_get_driver_features, 3672 .set_config_cb = mlx5_vdpa_set_config_cb, 3673 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 3674 .get_device_id = mlx5_vdpa_get_device_id, 3675 .get_vendor_id = mlx5_vdpa_get_vendor_id, 3676 .get_status = mlx5_vdpa_get_status, 3677 .set_status = mlx5_vdpa_set_status, 3678 .reset = mlx5_vdpa_reset, 3679 .compat_reset = mlx5_vdpa_compat_reset, 3680 .get_config_size = mlx5_vdpa_get_config_size, 3681 .get_config = mlx5_vdpa_get_config, 3682 .set_config = mlx5_vdpa_set_config, 3683 .get_generation = mlx5_vdpa_get_generation, 3684 .set_map = mlx5_vdpa_set_map, 3685 .reset_map = mlx5_vdpa_reset_map, 3686 .set_group_asid = mlx5_set_group_asid, 3687 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3688 .free = mlx5_vdpa_free, 3689 .suspend = mlx5_vdpa_suspend, 3690 .resume = mlx5_vdpa_resume, /* Op disabled if not supported. */ 3691 }; 3692 3693 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 3694 { 3695 u16 hw_mtu; 3696 int err; 3697 3698 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3699 if (err) 3700 return err; 3701 3702 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 3703 return 0; 3704 } 3705 3706 static int alloc_fixed_resources(struct mlx5_vdpa_net *ndev) 3707 { 3708 struct mlx5_vdpa_net_resources *res = &ndev->res; 3709 int err; 3710 3711 if (res->valid) { 3712 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 3713 return -EEXIST; 3714 } 3715 3716 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 3717 if (err) 3718 return err; 3719 3720 err = create_tis(ndev); 3721 if (err) 3722 goto err_tis; 3723 3724 res->valid = true; 3725 3726 return 0; 3727 3728 err_tis: 3729 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3730 return err; 3731 } 3732 3733 static void free_fixed_resources(struct mlx5_vdpa_net *ndev) 3734 { 3735 struct mlx5_vdpa_net_resources *res = &ndev->res; 3736 3737 if (!res->valid) 3738 return; 3739 3740 destroy_tis(ndev); 3741 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3742 res->valid = false; 3743 } 3744 3745 static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev) 3746 { 3747 struct mlx5_vdpa_virtqueue *mvq; 3748 int i; 3749 3750 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 3751 mvq = &ndev->vqs[i]; 3752 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3753 mvq->index = i; 3754 mvq->ndev = ndev; 3755 mvq->fwqp.fw = true; 3756 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 3757 mvq->num_ent = MLX5V_DEFAULT_VQ_SIZE; 3758 } 3759 } 3760 3761 struct mlx5_vdpa_mgmtdev { 3762 struct vdpa_mgmt_dev mgtdev; 3763 struct mlx5_adev *madev; 3764 struct mlx5_vdpa_net *ndev; 3765 struct vdpa_config_ops vdpa_ops; 3766 }; 3767 3768 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3769 { 3770 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 3771 void *in; 3772 int err; 3773 3774 in = kvzalloc(inlen, GFP_KERNEL); 3775 if (!in) 3776 return -ENOMEM; 3777 3778 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 3779 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 3780 mtu + MLX5V_ETH_HARD_MTU); 3781 MLX5_SET(modify_nic_vport_context_in, in, opcode, 3782 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 3783 3784 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 3785 3786 kvfree(in); 3787 return err; 3788 } 3789 3790 static void allocate_irqs(struct mlx5_vdpa_net *ndev) 3791 { 3792 struct mlx5_vdpa_irq_pool_entry *ent; 3793 int i; 3794 3795 if (!msix_mode_supported(&ndev->mvdev)) 3796 return; 3797 3798 if (!ndev->mvdev.mdev->pdev) 3799 return; 3800 3801 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); 3802 if (!ndev->irqp.entries) 3803 return; 3804 3805 3806 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3807 ent = ndev->irqp.entries + i; 3808 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 3809 dev_name(&ndev->mvdev.vdev.dev), i); 3810 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); 3811 if (!ent->map.virq) 3812 return; 3813 3814 ndev->irqp.num_ent++; 3815 } 3816 } 3817 3818 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 3819 const struct vdpa_dev_set_config *add_config) 3820 { 3821 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3822 struct virtio_net_config *config; 3823 struct mlx5_core_dev *pfmdev; 3824 struct mlx5_vdpa_dev *mvdev; 3825 struct mlx5_vdpa_net *ndev; 3826 struct mlx5_core_dev *mdev; 3827 u64 device_features; 3828 u32 max_vqs; 3829 u16 mtu; 3830 int err; 3831 3832 if (mgtdev->ndev) 3833 return -ENOSPC; 3834 3835 mdev = mgtdev->madev->mdev; 3836 device_features = mgtdev->mgtdev.supported_features; 3837 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 3838 if (add_config->device_features & ~device_features) { 3839 dev_warn(mdev->device, 3840 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", 3841 add_config->device_features, device_features); 3842 return -EINVAL; 3843 } 3844 device_features &= add_config->device_features; 3845 } else { 3846 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3847 } 3848 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3849 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { 3850 dev_warn(mdev->device, 3851 "Must provision minimum features 0x%llx for this device", 3852 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); 3853 return -EOPNOTSUPP; 3854 } 3855 3856 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 3857 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 3858 dev_warn(mdev->device, "missing support for split virtqueues\n"); 3859 return -EOPNOTSUPP; 3860 } 3861 3862 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 3863 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 3864 if (max_vqs < 2) { 3865 dev_warn(mdev->device, 3866 "%d virtqueues are supported. At least 2 are required\n", 3867 max_vqs); 3868 return -EAGAIN; 3869 } 3870 3871 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 3872 if (add_config->net.max_vq_pairs > max_vqs / 2) 3873 return -EINVAL; 3874 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 3875 } else { 3876 max_vqs = 2; 3877 } 3878 3879 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops, 3880 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); 3881 if (IS_ERR(ndev)) 3882 return PTR_ERR(ndev); 3883 3884 ndev->mvdev.max_vqs = max_vqs; 3885 mvdev = &ndev->mvdev; 3886 mvdev->mdev = mdev; 3887 3888 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 3889 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 3890 if (!ndev->vqs || !ndev->event_cbs) { 3891 err = -ENOMEM; 3892 goto err_alloc; 3893 } 3894 ndev->cur_num_vqs = MLX5V_DEFAULT_VQ_COUNT; 3895 3896 mvqs_set_defaults(ndev); 3897 allocate_irqs(ndev); 3898 init_rwsem(&ndev->reslock); 3899 config = &ndev->config; 3900 3901 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 3902 err = config_func_mtu(mdev, add_config->net.mtu); 3903 if (err) 3904 goto err_alloc; 3905 } 3906 3907 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { 3908 err = query_mtu(mdev, &mtu); 3909 if (err) 3910 goto err_alloc; 3911 3912 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3913 } 3914 3915 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { 3916 if (get_link_state(mvdev)) 3917 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3918 else 3919 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3920 } 3921 3922 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3923 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3924 /* No bother setting mac address in config if not going to provision _F_MAC */ 3925 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || 3926 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3927 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3928 if (err) 3929 goto err_alloc; 3930 } 3931 3932 if (!is_zero_ether_addr(config->mac)) { 3933 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3934 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3935 if (err) 3936 goto err_alloc; 3937 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { 3938 /* 3939 * We used to clear _F_MAC feature bit if seeing 3940 * zero mac address when device features are not 3941 * specifically provisioned. Keep the behaviour 3942 * so old scripts do not break. 3943 */ 3944 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 3945 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3946 /* Don't provision zero mac address for _F_MAC */ 3947 mlx5_vdpa_warn(&ndev->mvdev, 3948 "No mac address provisioned?\n"); 3949 err = -EINVAL; 3950 goto err_alloc; 3951 } 3952 3953 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) { 3954 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3955 ndev->rqt_size = max_vqs / 2; 3956 } else { 3957 ndev->rqt_size = 1; 3958 } 3959 3960 mlx5_cmd_init_async_ctx(mdev, &mvdev->async_ctx); 3961 3962 ndev->mvdev.mlx_features = device_features; 3963 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3964 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3965 if (err) 3966 goto err_alloc; 3967 3968 err = mlx5_vdpa_init_mr_resources(mvdev); 3969 if (err) 3970 goto err_alloc; 3971 3972 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3973 err = mlx5_vdpa_create_dma_mr(mvdev); 3974 if (err) 3975 goto err_alloc; 3976 } 3977 3978 err = alloc_fixed_resources(ndev); 3979 if (err) 3980 goto err_alloc; 3981 3982 ndev->cvq_ent.mvdev = mvdev; 3983 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3984 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3985 if (!mvdev->wq) { 3986 err = -ENOMEM; 3987 goto err_alloc; 3988 } 3989 3990 mvdev->vdev.mdev = &mgtdev->mgtdev; 3991 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3992 if (err) 3993 goto err_reg; 3994 3995 mgtdev->ndev = ndev; 3996 3997 /* For virtio-vdpa, the device was set up during device register. */ 3998 if (ndev->setup) 3999 return 0; 4000 4001 down_write(&ndev->reslock); 4002 err = setup_vq_resources(ndev, false); 4003 up_write(&ndev->reslock); 4004 if (err) 4005 goto err_setup_vq_res; 4006 4007 return 0; 4008 4009 err_setup_vq_res: 4010 _vdpa_unregister_device(&mvdev->vdev); 4011 err_reg: 4012 destroy_workqueue(mvdev->wq); 4013 err_alloc: 4014 put_device(&mvdev->vdev.dev); 4015 return err; 4016 } 4017 4018 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 4019 { 4020 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 4021 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 4022 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 4023 struct workqueue_struct *wq; 4024 4025 unregister_link_notifier(ndev); 4026 _vdpa_unregister_device(dev); 4027 4028 down_write(&ndev->reslock); 4029 teardown_vq_resources(ndev); 4030 up_write(&ndev->reslock); 4031 4032 wq = mvdev->wq; 4033 mvdev->wq = NULL; 4034 destroy_workqueue(wq); 4035 mgtdev->ndev = NULL; 4036 } 4037 4038 static int mlx5_vdpa_set_attr(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev, 4039 const struct vdpa_dev_set_config *add_config) 4040 { 4041 struct virtio_net_config *config; 4042 struct mlx5_core_dev *pfmdev; 4043 struct mlx5_vdpa_dev *mvdev; 4044 struct mlx5_vdpa_net *ndev; 4045 struct mlx5_core_dev *mdev; 4046 int err = -EOPNOTSUPP; 4047 4048 mvdev = to_mvdev(dev); 4049 ndev = to_mlx5_vdpa_ndev(mvdev); 4050 mdev = mvdev->mdev; 4051 config = &ndev->config; 4052 4053 down_write(&ndev->reslock); 4054 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 4055 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 4056 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 4057 if (!err) 4058 ether_addr_copy(config->mac, add_config->net.mac); 4059 } 4060 4061 up_write(&ndev->reslock); 4062 return err; 4063 } 4064 4065 static const struct vdpa_mgmtdev_ops mdev_ops = { 4066 .dev_add = mlx5_vdpa_dev_add, 4067 .dev_del = mlx5_vdpa_dev_del, 4068 .dev_set_attr = mlx5_vdpa_set_attr, 4069 }; 4070 4071 static struct virtio_device_id id_table[] = { 4072 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 4073 { 0 }, 4074 }; 4075 4076 static int mlx5v_probe(struct auxiliary_device *adev, 4077 const struct auxiliary_device_id *id) 4078 4079 { 4080 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 4081 struct mlx5_core_dev *mdev = madev->mdev; 4082 struct mlx5_vdpa_mgmtdev *mgtdev; 4083 int err; 4084 4085 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 4086 if (!mgtdev) 4087 return -ENOMEM; 4088 4089 mgtdev->mgtdev.ops = &mdev_ops; 4090 mgtdev->mgtdev.device = mdev->device; 4091 mgtdev->mgtdev.id_table = id_table; 4092 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 4093 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 4094 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | 4095 BIT_ULL(VDPA_ATTR_DEV_FEATURES); 4096 mgtdev->mgtdev.max_supported_vqs = 4097 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 4098 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 4099 mgtdev->madev = madev; 4100 mgtdev->vdpa_ops = mlx5_vdpa_ops; 4101 4102 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) 4103 mgtdev->vdpa_ops.get_vq_desc_group = NULL; 4104 4105 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, freeze_to_rdy_supported)) 4106 mgtdev->vdpa_ops.resume = NULL; 4107 4108 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 4109 if (err) 4110 goto reg_err; 4111 4112 auxiliary_set_drvdata(adev, mgtdev); 4113 4114 return 0; 4115 4116 reg_err: 4117 kfree(mgtdev); 4118 return err; 4119 } 4120 4121 static void mlx5v_remove(struct auxiliary_device *adev) 4122 { 4123 struct mlx5_vdpa_mgmtdev *mgtdev; 4124 4125 mgtdev = auxiliary_get_drvdata(adev); 4126 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 4127 kfree(mgtdev); 4128 } 4129 4130 static const struct auxiliary_device_id mlx5v_id_table[] = { 4131 { .name = MLX5_ADEV_NAME ".vnet", }, 4132 {}, 4133 }; 4134 4135 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 4136 4137 static struct auxiliary_driver mlx5v_driver = { 4138 .name = "vnet", 4139 .probe = mlx5v_probe, 4140 .remove = mlx5v_remove, 4141 .id_table = mlx5v_id_table, 4142 }; 4143 4144 module_auxiliary_driver(mlx5v_driver); 4145