1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */ 3 4 #include <linux/module.h> 5 #include <linux/vdpa.h> 6 #include <linux/vringh.h> 7 #include <uapi/linux/virtio_net.h> 8 #include <uapi/linux/virtio_ids.h> 9 #include <uapi/linux/vdpa.h> 10 #include <uapi/linux/vhost_types.h> 11 #include <linux/virtio_config.h> 12 #include <linux/auxiliary_bus.h> 13 #include <linux/mlx5/cq.h> 14 #include <linux/mlx5/qp.h> 15 #include <linux/mlx5/device.h> 16 #include <linux/mlx5/driver.h> 17 #include <linux/mlx5/vport.h> 18 #include <linux/mlx5/fs.h> 19 #include <linux/mlx5/mlx5_ifc_vdpa.h> 20 #include <linux/mlx5/mpfs.h> 21 #include "mlx5_vdpa.h" 22 #include "mlx5_vnet.h" 23 24 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 25 MODULE_DESCRIPTION("Mellanox VDPA driver"); 26 MODULE_LICENSE("Dual BSD/GPL"); 27 28 #define VALID_FEATURES_MASK \ 29 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ 30 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ 31 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ 32 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ 33 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ 34 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ 35 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ 36 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ 37 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ 38 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ 39 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ 40 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ 41 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) 42 43 #define VALID_STATUS_MASK \ 44 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ 45 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) 46 47 #define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature))) 48 49 #define MLX5V_UNTAGGED 0x1000 50 51 struct mlx5_vdpa_cq_buf { 52 struct mlx5_frag_buf_ctrl fbc; 53 struct mlx5_frag_buf frag_buf; 54 int cqe_size; 55 int nent; 56 }; 57 58 struct mlx5_vdpa_cq { 59 struct mlx5_core_cq mcq; 60 struct mlx5_vdpa_cq_buf buf; 61 struct mlx5_db db; 62 int cqe; 63 }; 64 65 struct mlx5_vdpa_umem { 66 struct mlx5_frag_buf_ctrl fbc; 67 struct mlx5_frag_buf frag_buf; 68 int size; 69 u32 id; 70 }; 71 72 struct mlx5_vdpa_qp { 73 struct mlx5_core_qp mqp; 74 struct mlx5_frag_buf frag_buf; 75 struct mlx5_db db; 76 u16 head; 77 bool fw; 78 }; 79 80 struct mlx5_vq_restore_info { 81 u32 num_ent; 82 u64 desc_addr; 83 u64 device_addr; 84 u64 driver_addr; 85 u16 avail_index; 86 u16 used_index; 87 struct msi_map map; 88 bool ready; 89 bool restore; 90 }; 91 92 struct mlx5_vdpa_virtqueue { 93 bool ready; 94 u64 desc_addr; 95 u64 device_addr; 96 u64 driver_addr; 97 u32 num_ent; 98 99 /* Resources for implementing the notification channel from the device 100 * to the driver. fwqp is the firmware end of an RC connection; the 101 * other end is vqqp used by the driver. cq is where completions are 102 * reported. 103 */ 104 struct mlx5_vdpa_cq cq; 105 struct mlx5_vdpa_qp fwqp; 106 struct mlx5_vdpa_qp vqqp; 107 108 /* umem resources are required for the virtqueue operation. They're use 109 * is internal and they must be provided by the driver. 110 */ 111 struct mlx5_vdpa_umem umem1; 112 struct mlx5_vdpa_umem umem2; 113 struct mlx5_vdpa_umem umem3; 114 115 u32 counter_set_id; 116 bool initialized; 117 int index; 118 u32 virtq_id; 119 struct mlx5_vdpa_net *ndev; 120 u16 avail_idx; 121 u16 used_idx; 122 int fw_state; 123 struct msi_map map; 124 125 /* keep last in the struct */ 126 struct mlx5_vq_restore_info ri; 127 }; 128 129 static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx) 130 { 131 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) { 132 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 133 return idx < 2; 134 else 135 return idx < 3; 136 } 137 138 return idx <= mvdev->max_idx; 139 } 140 141 static void free_resources(struct mlx5_vdpa_net *ndev); 142 static void init_mvqs(struct mlx5_vdpa_net *ndev); 143 static int setup_driver(struct mlx5_vdpa_dev *mvdev); 144 static void teardown_driver(struct mlx5_vdpa_net *ndev); 145 146 static bool mlx5_vdpa_debug; 147 148 #define MLX5_CVQ_MAX_ENT 16 149 150 #define MLX5_LOG_VIO_FLAG(_feature) \ 151 do { \ 152 if (features & BIT_ULL(_feature)) \ 153 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ 154 } while (0) 155 156 #define MLX5_LOG_VIO_STAT(_status) \ 157 do { \ 158 if (status & (_status)) \ 159 mlx5_vdpa_info(mvdev, "%s\n", #_status); \ 160 } while (0) 161 162 /* TODO: cross-endian support */ 163 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) 164 { 165 return virtio_legacy_is_little_endian() || 166 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); 167 } 168 169 static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val) 170 { 171 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val); 172 } 173 174 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) 175 { 176 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); 177 } 178 179 static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev) 180 { 181 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) 182 return 2; 183 184 return mvdev->max_vqs; 185 } 186 187 static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx) 188 { 189 return idx == ctrl_vq_idx(mvdev); 190 } 191 192 static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) 193 { 194 if (status & ~VALID_STATUS_MASK) 195 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", 196 status & ~VALID_STATUS_MASK); 197 198 if (!mlx5_vdpa_debug) 199 return; 200 201 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); 202 if (set && !status) { 203 mlx5_vdpa_info(mvdev, "driver resets the device\n"); 204 return; 205 } 206 207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); 208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); 209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); 210 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); 211 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); 212 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); 213 } 214 215 static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) 216 { 217 if (features & ~VALID_FEATURES_MASK) 218 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", 219 features & ~VALID_FEATURES_MASK); 220 221 if (!mlx5_vdpa_debug) 222 return; 223 224 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); 225 if (!features) 226 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); 227 228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); 229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); 230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); 232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); 233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); 234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); 235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); 236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); 237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); 238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); 239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); 240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); 241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); 242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); 243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); 244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); 245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); 246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); 247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); 248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); 249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); 250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); 251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); 252 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); 253 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); 254 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); 255 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); 256 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); 257 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); 258 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); 259 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); 260 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); 261 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); 262 } 263 264 static int create_tis(struct mlx5_vdpa_net *ndev) 265 { 266 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 267 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; 268 void *tisc; 269 int err; 270 271 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 272 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); 273 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); 274 if (err) 275 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); 276 277 return err; 278 } 279 280 static void destroy_tis(struct mlx5_vdpa_net *ndev) 281 { 282 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); 283 } 284 285 #define MLX5_VDPA_CQE_SIZE 64 286 #define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) 287 288 static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) 289 { 290 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 291 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; 292 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; 293 int err; 294 295 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, 296 ndev->mvdev.mdev->priv.numa_node); 297 if (err) 298 return err; 299 300 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 301 302 buf->cqe_size = MLX5_VDPA_CQE_SIZE; 303 buf->nent = nent; 304 305 return 0; 306 } 307 308 static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) 309 { 310 struct mlx5_frag_buf *frag_buf = &umem->frag_buf; 311 312 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, 313 ndev->mvdev.mdev->priv.numa_node); 314 } 315 316 static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) 317 { 318 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); 319 } 320 321 static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) 322 { 323 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); 324 } 325 326 static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) 327 { 328 struct mlx5_cqe64 *cqe64; 329 void *cqe; 330 int i; 331 332 for (i = 0; i < buf->nent; i++) { 333 cqe = get_cqe(vcq, i); 334 cqe64 = cqe; 335 cqe64->op_own = MLX5_CQE_INVALID << 4; 336 } 337 } 338 339 static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) 340 { 341 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); 342 343 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 344 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) 345 return cqe64; 346 347 return NULL; 348 } 349 350 static void rx_post(struct mlx5_vdpa_qp *vqp, int n) 351 { 352 vqp->head += n; 353 vqp->db.db[0] = cpu_to_be32(vqp->head); 354 } 355 356 static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, 357 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) 358 { 359 struct mlx5_vdpa_qp *vqp; 360 __be64 *pas; 361 void *qpc; 362 363 vqp = fw ? &mvq->fwqp : &mvq->vqqp; 364 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); 365 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 366 if (vqp->fw) { 367 /* Firmware QP is allocated by the driver for the firmware's 368 * use so we can skip part of the params as they will be chosen by firmware 369 */ 370 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 371 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 372 MLX5_SET(qpc, qpc, no_sq, 1); 373 return; 374 } 375 376 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 377 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 378 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 379 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 380 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); 381 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 382 MLX5_SET(qpc, qpc, no_sq, 1); 383 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); 384 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); 385 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 386 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); 387 mlx5_fill_page_frag_array(&vqp->frag_buf, pas); 388 } 389 390 static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) 391 { 392 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, 393 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, 394 ndev->mvdev.mdev->priv.numa_node); 395 } 396 397 static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 398 { 399 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); 400 } 401 402 static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 403 struct mlx5_vdpa_qp *vqp) 404 { 405 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 406 int inlen = MLX5_ST_SZ_BYTES(create_qp_in); 407 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 408 void *qpc; 409 void *in; 410 int err; 411 412 if (!vqp->fw) { 413 vqp = &mvq->vqqp; 414 err = rq_buf_alloc(ndev, vqp, mvq->num_ent); 415 if (err) 416 return err; 417 418 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); 419 if (err) 420 goto err_db; 421 inlen += vqp->frag_buf.npages * sizeof(__be64); 422 } 423 424 in = kzalloc(inlen, GFP_KERNEL); 425 if (!in) { 426 err = -ENOMEM; 427 goto err_kzalloc; 428 } 429 430 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); 431 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 432 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 433 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 434 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); 435 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 436 if (!vqp->fw) 437 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); 438 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 439 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 440 kfree(in); 441 if (err) 442 goto err_kzalloc; 443 444 vqp->mqp.uid = ndev->mvdev.res.uid; 445 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); 446 447 if (!vqp->fw) 448 rx_post(vqp, mvq->num_ent); 449 450 return 0; 451 452 err_kzalloc: 453 if (!vqp->fw) 454 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 455 err_db: 456 if (!vqp->fw) 457 rq_buf_free(ndev, vqp); 458 459 return err; 460 } 461 462 static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) 463 { 464 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 465 466 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 467 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); 468 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); 469 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) 470 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); 471 if (!vqp->fw) { 472 mlx5_db_free(ndev->mvdev.mdev, &vqp->db); 473 rq_buf_free(ndev, vqp); 474 } 475 } 476 477 static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) 478 { 479 return get_sw_cqe(cq, cq->mcq.cons_index); 480 } 481 482 static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) 483 { 484 struct mlx5_cqe64 *cqe64; 485 486 cqe64 = next_cqe_sw(vcq); 487 if (!cqe64) 488 return -EAGAIN; 489 490 vcq->mcq.cons_index++; 491 return 0; 492 } 493 494 static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) 495 { 496 struct mlx5_vdpa_net *ndev = mvq->ndev; 497 struct vdpa_callback *event_cb; 498 499 event_cb = &ndev->event_cbs[mvq->index]; 500 mlx5_cq_set_ci(&mvq->cq.mcq); 501 502 /* make sure CQ cosumer update is visible to the hardware before updating 503 * RX doorbell record. 504 */ 505 dma_wmb(); 506 rx_post(&mvq->vqqp, num); 507 if (event_cb->callback) 508 event_cb->callback(event_cb->private); 509 } 510 511 static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) 512 { 513 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); 514 struct mlx5_vdpa_net *ndev = mvq->ndev; 515 void __iomem *uar_page = ndev->mvdev.res.uar->map; 516 int num = 0; 517 518 while (!mlx5_vdpa_poll_one(&mvq->cq)) { 519 num++; 520 if (num > mvq->num_ent / 2) { 521 /* If completions keep coming while we poll, we want to 522 * let the hardware know that we consumed them by 523 * updating the doorbell record. We also let vdpa core 524 * know about this so it passes it on the virtio driver 525 * on the guest. 526 */ 527 mlx5_vdpa_handle_completions(mvq, num); 528 num = 0; 529 } 530 } 531 532 if (num) 533 mlx5_vdpa_handle_completions(mvq, num); 534 535 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 536 } 537 538 static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) 539 { 540 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 541 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 542 void __iomem *uar_page = ndev->mvdev.res.uar->map; 543 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 544 struct mlx5_vdpa_cq *vcq = &mvq->cq; 545 __be64 *pas; 546 int inlen; 547 void *cqc; 548 void *in; 549 int err; 550 int eqn; 551 552 err = mlx5_db_alloc(mdev, &vcq->db); 553 if (err) 554 return err; 555 556 vcq->mcq.set_ci_db = vcq->db.db; 557 vcq->mcq.arm_db = vcq->db.db + 1; 558 vcq->mcq.cqe_sz = 64; 559 560 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 561 if (err) 562 goto err_db; 563 564 cq_frag_buf_init(vcq, &vcq->buf); 565 566 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 567 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; 568 in = kzalloc(inlen, GFP_KERNEL); 569 if (!in) { 570 err = -ENOMEM; 571 goto err_vzalloc; 572 } 573 574 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); 575 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 576 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); 577 578 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 579 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 580 581 /* Use vector 0 by default. Consider adding code to choose least used 582 * vector. 583 */ 584 err = mlx5_comp_eqn_get(mdev, 0, &eqn); 585 if (err) 586 goto err_vec; 587 588 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 589 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); 590 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); 591 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 592 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); 593 594 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); 595 if (err) 596 goto err_vec; 597 598 vcq->mcq.comp = mlx5_vdpa_cq_comp; 599 vcq->cqe = num_ent; 600 vcq->mcq.set_ci_db = vcq->db.db; 601 vcq->mcq.arm_db = vcq->db.db + 1; 602 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 603 kfree(in); 604 return 0; 605 606 err_vec: 607 kfree(in); 608 err_vzalloc: 609 cq_frag_buf_free(ndev, &vcq->buf); 610 err_db: 611 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 612 return err; 613 } 614 615 static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) 616 { 617 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; 618 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 619 struct mlx5_vdpa_cq *vcq = &mvq->cq; 620 621 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { 622 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); 623 return; 624 } 625 cq_frag_buf_free(ndev, &vcq->buf); 626 mlx5_db_free(ndev->mvdev.mdev, &vcq->db); 627 } 628 629 static int read_umem_params(struct mlx5_vdpa_net *ndev) 630 { 631 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 632 u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01); 633 struct mlx5_core_dev *mdev = ndev->mvdev.mdev; 634 int out_size; 635 void *caps; 636 void *out; 637 int err; 638 639 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 640 out = kzalloc(out_size, GFP_KERNEL); 641 if (!out) 642 return -ENOMEM; 643 644 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 645 MLX5_SET(query_hca_cap_in, in, op_mod, opmod); 646 err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 647 if (err) { 648 mlx5_vdpa_warn(&ndev->mvdev, 649 "Failed reading vdpa umem capabilities with err %d\n", err); 650 goto out; 651 } 652 653 caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); 654 655 ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a); 656 ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b); 657 658 ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a); 659 ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b); 660 661 ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a); 662 ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b); 663 664 out: 665 kfree(out); 666 return 0; 667 } 668 669 static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, 670 struct mlx5_vdpa_umem **umemp) 671 { 672 u32 p_a; 673 u32 p_b; 674 675 switch (num) { 676 case 1: 677 p_a = ndev->umem_1_buffer_param_a; 678 p_b = ndev->umem_1_buffer_param_b; 679 *umemp = &mvq->umem1; 680 break; 681 case 2: 682 p_a = ndev->umem_2_buffer_param_a; 683 p_b = ndev->umem_2_buffer_param_b; 684 *umemp = &mvq->umem2; 685 break; 686 case 3: 687 p_a = ndev->umem_3_buffer_param_a; 688 p_b = ndev->umem_3_buffer_param_b; 689 *umemp = &mvq->umem3; 690 break; 691 } 692 693 (*umemp)->size = p_a * mvq->num_ent + p_b; 694 } 695 696 static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) 697 { 698 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); 699 } 700 701 static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 702 { 703 int inlen; 704 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; 705 void *um; 706 void *in; 707 int err; 708 __be64 *pas; 709 struct mlx5_vdpa_umem *umem; 710 711 set_umem_size(ndev, mvq, num, &umem); 712 err = umem_frag_buf_alloc(ndev, umem, umem->size); 713 if (err) 714 return err; 715 716 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; 717 718 in = kzalloc(inlen, GFP_KERNEL); 719 if (!in) { 720 err = -ENOMEM; 721 goto err_in; 722 } 723 724 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); 725 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); 726 um = MLX5_ADDR_OF(create_umem_in, in, umem); 727 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 728 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); 729 730 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); 731 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); 732 733 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 734 if (err) { 735 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); 736 goto err_cmd; 737 } 738 739 kfree(in); 740 umem->id = MLX5_GET(create_umem_out, out, umem_id); 741 742 return 0; 743 744 err_cmd: 745 kfree(in); 746 err_in: 747 umem_frag_buf_free(ndev, umem); 748 return err; 749 } 750 751 static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) 752 { 753 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; 754 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; 755 struct mlx5_vdpa_umem *umem; 756 757 switch (num) { 758 case 1: 759 umem = &mvq->umem1; 760 break; 761 case 2: 762 umem = &mvq->umem2; 763 break; 764 case 3: 765 umem = &mvq->umem3; 766 break; 767 } 768 769 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); 770 MLX5_SET(destroy_umem_in, in, umem_id, umem->id); 771 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 772 return; 773 774 umem_frag_buf_free(ndev, umem); 775 } 776 777 static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 778 { 779 int num; 780 int err; 781 782 for (num = 1; num <= 3; num++) { 783 err = create_umem(ndev, mvq, num); 784 if (err) 785 goto err_umem; 786 } 787 return 0; 788 789 err_umem: 790 for (num--; num > 0; num--) 791 umem_destroy(ndev, mvq, num); 792 793 return err; 794 } 795 796 static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 797 { 798 int num; 799 800 for (num = 3; num > 0; num--) 801 umem_destroy(ndev, mvq, num); 802 } 803 804 static int get_queue_type(struct mlx5_vdpa_net *ndev) 805 { 806 u32 type_mask; 807 808 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); 809 810 /* prefer split queue */ 811 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) 812 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; 813 814 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); 815 816 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; 817 } 818 819 static bool vq_is_tx(u16 idx) 820 { 821 return idx % 2; 822 } 823 824 enum { 825 MLX5_VIRTIO_NET_F_MRG_RXBUF = 2, 826 MLX5_VIRTIO_NET_F_HOST_ECN = 4, 827 MLX5_VIRTIO_NET_F_GUEST_ECN = 6, 828 MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7, 829 MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8, 830 MLX5_VIRTIO_NET_F_GUEST_CSUM = 9, 831 MLX5_VIRTIO_NET_F_CSUM = 10, 832 MLX5_VIRTIO_NET_F_HOST_TSO6 = 11, 833 MLX5_VIRTIO_NET_F_HOST_TSO4 = 12, 834 }; 835 836 static u16 get_features(u64 features) 837 { 838 return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) | 839 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) | 840 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) | 841 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) | 842 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) | 843 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) | 844 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) | 845 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4); 846 } 847 848 static bool counters_supported(const struct mlx5_vdpa_dev *mvdev) 849 { 850 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) & 851 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 852 } 853 854 static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev) 855 { 856 return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) & 857 (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) && 858 pci_msix_can_alloc_dyn(mvdev->mdev->pdev); 859 } 860 861 static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 862 { 863 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); 864 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; 865 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; 866 struct mlx5_vdpa_mr *vq_mr; 867 struct mlx5_vdpa_mr *vq_desc_mr; 868 void *obj_context; 869 u16 mlx_features; 870 void *cmd_hdr; 871 void *vq_ctx; 872 void *in; 873 int err; 874 875 err = umems_create(ndev, mvq); 876 if (err) 877 return err; 878 879 in = kzalloc(inlen, GFP_KERNEL); 880 if (!in) { 881 err = -ENOMEM; 882 goto err_alloc; 883 } 884 885 mlx_features = get_features(ndev->mvdev.actual_features); 886 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); 887 888 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 889 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 890 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 891 892 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); 893 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); 894 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); 895 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, 896 mlx_features >> 3); 897 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0, 898 mlx_features & 7); 899 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); 900 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); 901 902 if (vq_is_tx(mvq->index)) 903 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); 904 905 if (mvq->map.virq) { 906 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE); 907 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index); 908 } else { 909 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); 910 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); 911 } 912 913 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); 914 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); 915 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 916 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); 917 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); 918 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); 919 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); 920 vq_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]]; 921 if (vq_mr) 922 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey); 923 924 vq_desc_mr = mvdev->mr[mvdev->group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]]; 925 if (vq_desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) 926 MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey); 927 928 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); 929 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); 930 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); 931 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); 932 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); 933 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); 934 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); 935 if (counters_supported(&ndev->mvdev)) 936 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id); 937 938 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 939 if (err) 940 goto err_cmd; 941 942 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT; 943 kfree(in); 944 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 945 946 return 0; 947 948 err_cmd: 949 kfree(in); 950 err_alloc: 951 umems_destroy(ndev, mvq); 952 return err; 953 } 954 955 static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 956 { 957 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; 958 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; 959 960 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, 961 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 962 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); 963 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); 964 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, 965 MLX5_OBJ_TYPE_VIRTIO_NET_Q); 966 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { 967 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); 968 return; 969 } 970 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 971 umems_destroy(ndev, mvq); 972 } 973 974 static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 975 { 976 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; 977 } 978 979 static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) 980 { 981 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; 982 } 983 984 static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, 985 int *outlen, u32 qpn, u32 rqpn) 986 { 987 void *qpc; 988 void *pp; 989 990 switch (cmd) { 991 case MLX5_CMD_OP_2RST_QP: 992 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); 993 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); 994 *in = kzalloc(*inlen, GFP_KERNEL); 995 *out = kzalloc(*outlen, GFP_KERNEL); 996 if (!*in || !*out) 997 goto outerr; 998 999 MLX5_SET(qp_2rst_in, *in, opcode, cmd); 1000 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); 1001 MLX5_SET(qp_2rst_in, *in, qpn, qpn); 1002 break; 1003 case MLX5_CMD_OP_RST2INIT_QP: 1004 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); 1005 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); 1006 *in = kzalloc(*inlen, GFP_KERNEL); 1007 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); 1008 if (!*in || !*out) 1009 goto outerr; 1010 1011 MLX5_SET(rst2init_qp_in, *in, opcode, cmd); 1012 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); 1013 MLX5_SET(rst2init_qp_in, *in, qpn, qpn); 1014 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1015 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1016 MLX5_SET(qpc, qpc, rwe, 1); 1017 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1018 MLX5_SET(ads, pp, vhca_port_num, 1); 1019 break; 1020 case MLX5_CMD_OP_INIT2RTR_QP: 1021 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); 1022 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); 1023 *in = kzalloc(*inlen, GFP_KERNEL); 1024 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); 1025 if (!*in || !*out) 1026 goto outerr; 1027 1028 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); 1029 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); 1030 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); 1031 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1032 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); 1033 MLX5_SET(qpc, qpc, log_msg_max, 30); 1034 MLX5_SET(qpc, qpc, remote_qpn, rqpn); 1035 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1036 MLX5_SET(ads, pp, fl, 1); 1037 break; 1038 case MLX5_CMD_OP_RTR2RTS_QP: 1039 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); 1040 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); 1041 *in = kzalloc(*inlen, GFP_KERNEL); 1042 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); 1043 if (!*in || !*out) 1044 goto outerr; 1045 1046 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); 1047 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); 1048 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); 1049 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); 1050 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); 1051 MLX5_SET(ads, pp, ack_timeout, 14); 1052 MLX5_SET(qpc, qpc, retry_count, 7); 1053 MLX5_SET(qpc, qpc, rnr_retry, 7); 1054 break; 1055 default: 1056 goto outerr_nullify; 1057 } 1058 1059 return; 1060 1061 outerr: 1062 kfree(*in); 1063 kfree(*out); 1064 outerr_nullify: 1065 *in = NULL; 1066 *out = NULL; 1067 } 1068 1069 static void free_inout(void *in, void *out) 1070 { 1071 kfree(in); 1072 kfree(out); 1073 } 1074 1075 /* Two QPs are used by each virtqueue. One is used by the driver and one by 1076 * firmware. The fw argument indicates whether the subjected QP is the one used 1077 * by firmware. 1078 */ 1079 static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) 1080 { 1081 int outlen; 1082 int inlen; 1083 void *out; 1084 void *in; 1085 int err; 1086 1087 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); 1088 if (!in || !out) 1089 return -ENOMEM; 1090 1091 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); 1092 free_inout(in, out); 1093 return err; 1094 } 1095 1096 static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1097 { 1098 int err; 1099 1100 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); 1101 if (err) 1102 return err; 1103 1104 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); 1105 if (err) 1106 return err; 1107 1108 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); 1109 if (err) 1110 return err; 1111 1112 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); 1113 if (err) 1114 return err; 1115 1116 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); 1117 if (err) 1118 return err; 1119 1120 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); 1121 if (err) 1122 return err; 1123 1124 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); 1125 } 1126 1127 struct mlx5_virtq_attr { 1128 u8 state; 1129 u16 available_index; 1130 u16 used_index; 1131 }; 1132 1133 static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 1134 struct mlx5_virtq_attr *attr) 1135 { 1136 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); 1137 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; 1138 void *out; 1139 void *obj_context; 1140 void *cmd_hdr; 1141 int err; 1142 1143 out = kzalloc(outlen, GFP_KERNEL); 1144 if (!out) 1145 return -ENOMEM; 1146 1147 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1148 1149 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1150 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1151 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1152 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1153 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); 1154 if (err) 1155 goto err_cmd; 1156 1157 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); 1158 memset(attr, 0, sizeof(*attr)); 1159 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); 1160 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); 1161 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); 1162 kfree(out); 1163 return 0; 1164 1165 err_cmd: 1166 kfree(out); 1167 return err; 1168 } 1169 1170 static bool is_valid_state_change(int oldstate, int newstate) 1171 { 1172 switch (oldstate) { 1173 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT: 1174 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY; 1175 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY: 1176 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND; 1177 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND: 1178 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR: 1179 default: 1180 return false; 1181 } 1182 } 1183 1184 static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) 1185 { 1186 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); 1187 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; 1188 void *obj_context; 1189 void *cmd_hdr; 1190 void *in; 1191 int err; 1192 1193 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE) 1194 return 0; 1195 1196 if (!is_valid_state_change(mvq->fw_state, state)) 1197 return -EINVAL; 1198 1199 in = kzalloc(inlen, GFP_KERNEL); 1200 if (!in) 1201 return -ENOMEM; 1202 1203 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); 1204 1205 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1206 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); 1207 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); 1208 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1209 1210 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); 1211 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, 1212 MLX5_VIRTQ_MODIFY_MASK_STATE); 1213 MLX5_SET(virtio_net_q_object, obj_context, state, state); 1214 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); 1215 kfree(in); 1216 if (!err) 1217 mvq->fw_state = state; 1218 1219 return err; 1220 } 1221 1222 static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1223 { 1224 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {}; 1225 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {}; 1226 void *cmd_hdr; 1227 int err; 1228 1229 if (!counters_supported(&ndev->mvdev)) 1230 return 0; 1231 1232 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr); 1233 1234 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 1235 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1236 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 1237 1238 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 1239 if (err) 1240 return err; 1241 1242 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1243 1244 return 0; 1245 } 1246 1247 static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1248 { 1249 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {}; 1250 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {}; 1251 1252 if (!counters_supported(&ndev->mvdev)) 1253 return; 1254 1255 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1256 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id); 1257 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid); 1258 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 1259 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) 1260 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id); 1261 } 1262 1263 static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv) 1264 { 1265 struct vdpa_callback *cb = priv; 1266 1267 if (cb->callback) 1268 return cb->callback(cb->private); 1269 1270 return IRQ_HANDLED; 1271 } 1272 1273 static void alloc_vector(struct mlx5_vdpa_net *ndev, 1274 struct mlx5_vdpa_virtqueue *mvq) 1275 { 1276 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1277 struct mlx5_vdpa_irq_pool_entry *ent; 1278 int err; 1279 int i; 1280 1281 for (i = 0; i < irqp->num_ent; i++) { 1282 ent = &irqp->entries[i]; 1283 if (!ent->used) { 1284 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 1285 dev_name(&ndev->mvdev.vdev.dev), mvq->index); 1286 ent->dev_id = &ndev->event_cbs[mvq->index]; 1287 err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0, 1288 ent->name, ent->dev_id); 1289 if (err) 1290 return; 1291 1292 ent->used = true; 1293 mvq->map = ent->map; 1294 return; 1295 } 1296 } 1297 } 1298 1299 static void dealloc_vector(struct mlx5_vdpa_net *ndev, 1300 struct mlx5_vdpa_virtqueue *mvq) 1301 { 1302 struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp; 1303 int i; 1304 1305 for (i = 0; i < irqp->num_ent; i++) 1306 if (mvq->map.virq == irqp->entries[i].map.virq) { 1307 free_irq(mvq->map.virq, irqp->entries[i].dev_id); 1308 irqp->entries[i].used = false; 1309 return; 1310 } 1311 } 1312 1313 static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1314 { 1315 u16 idx = mvq->index; 1316 int err; 1317 1318 if (!mvq->num_ent) 1319 return 0; 1320 1321 if (mvq->initialized) 1322 return 0; 1323 1324 err = cq_create(ndev, idx, mvq->num_ent); 1325 if (err) 1326 return err; 1327 1328 err = qp_create(ndev, mvq, &mvq->fwqp); 1329 if (err) 1330 goto err_fwqp; 1331 1332 err = qp_create(ndev, mvq, &mvq->vqqp); 1333 if (err) 1334 goto err_vqqp; 1335 1336 err = connect_qps(ndev, mvq); 1337 if (err) 1338 goto err_connect; 1339 1340 err = counter_set_alloc(ndev, mvq); 1341 if (err) 1342 goto err_connect; 1343 1344 alloc_vector(ndev, mvq); 1345 err = create_virtqueue(ndev, mvq); 1346 if (err) 1347 goto err_vq; 1348 1349 if (mvq->ready) { 1350 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 1351 if (err) { 1352 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", 1353 idx, err); 1354 goto err_modify; 1355 } 1356 } 1357 1358 mvq->initialized = true; 1359 return 0; 1360 1361 err_modify: 1362 destroy_virtqueue(ndev, mvq); 1363 err_vq: 1364 dealloc_vector(ndev, mvq); 1365 counter_set_dealloc(ndev, mvq); 1366 err_connect: 1367 qp_destroy(ndev, &mvq->vqqp); 1368 err_vqqp: 1369 qp_destroy(ndev, &mvq->fwqp); 1370 err_fwqp: 1371 cq_destroy(ndev, idx); 1372 return err; 1373 } 1374 1375 static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1376 { 1377 struct mlx5_virtq_attr attr; 1378 1379 if (!mvq->initialized) 1380 return; 1381 1382 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 1383 return; 1384 1385 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) 1386 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); 1387 1388 if (query_virtqueue(ndev, mvq, &attr)) { 1389 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); 1390 return; 1391 } 1392 mvq->avail_idx = attr.available_index; 1393 mvq->used_idx = attr.used_index; 1394 } 1395 1396 static void suspend_vqs(struct mlx5_vdpa_net *ndev) 1397 { 1398 int i; 1399 1400 for (i = 0; i < ndev->mvdev.max_vqs; i++) 1401 suspend_vq(ndev, &ndev->vqs[i]); 1402 } 1403 1404 static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 1405 { 1406 if (!mvq->initialized) 1407 return; 1408 1409 suspend_vq(ndev, mvq); 1410 destroy_virtqueue(ndev, mvq); 1411 dealloc_vector(ndev, mvq); 1412 counter_set_dealloc(ndev, mvq); 1413 qp_destroy(ndev, &mvq->vqqp); 1414 qp_destroy(ndev, &mvq->fwqp); 1415 cq_destroy(ndev, mvq->index); 1416 mvq->initialized = false; 1417 } 1418 1419 static int create_rqt(struct mlx5_vdpa_net *ndev) 1420 { 1421 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size); 1422 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2); 1423 __be32 *list; 1424 void *rqtc; 1425 int inlen; 1426 void *in; 1427 int i, j; 1428 int err; 1429 1430 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num); 1431 in = kzalloc(inlen, GFP_KERNEL); 1432 if (!in) 1433 return -ENOMEM; 1434 1435 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); 1436 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); 1437 1438 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1439 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size); 1440 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1441 for (i = 0, j = 0; i < act_sz; i++, j += 2) 1442 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id); 1443 1444 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1445 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); 1446 kfree(in); 1447 if (err) 1448 return err; 1449 1450 return 0; 1451 } 1452 1453 #define MLX5_MODIFY_RQT_NUM_RQS ((u64)1) 1454 1455 static int modify_rqt(struct mlx5_vdpa_net *ndev, int num) 1456 { 1457 int act_sz = roundup_pow_of_two(num / 2); 1458 __be32 *list; 1459 void *rqtc; 1460 int inlen; 1461 void *in; 1462 int i, j; 1463 int err; 1464 1465 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num); 1466 in = kzalloc(inlen, GFP_KERNEL); 1467 if (!in) 1468 return -ENOMEM; 1469 1470 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid); 1471 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS); 1472 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); 1473 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); 1474 1475 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); 1476 for (i = 0, j = 0; i < act_sz; i++, j = j + 2) 1477 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id); 1478 1479 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz); 1480 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn); 1481 kfree(in); 1482 if (err) 1483 return err; 1484 1485 return 0; 1486 } 1487 1488 static void destroy_rqt(struct mlx5_vdpa_net *ndev) 1489 { 1490 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); 1491 } 1492 1493 static int create_tir(struct mlx5_vdpa_net *ndev) 1494 { 1495 #define HASH_IP_L4PORTS \ 1496 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ 1497 MLX5_HASH_FIELD_SEL_L4_DPORT) 1498 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, 1499 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, 1500 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, 1501 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, 1502 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; 1503 void *rss_key; 1504 void *outer; 1505 void *tirc; 1506 void *in; 1507 int err; 1508 1509 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); 1510 if (!in) 1511 return -ENOMEM; 1512 1513 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); 1514 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); 1515 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); 1516 1517 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); 1518 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); 1519 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); 1520 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); 1521 1522 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); 1523 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); 1524 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); 1525 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); 1526 1527 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); 1528 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); 1529 1530 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); 1531 kfree(in); 1532 if (err) 1533 return err; 1534 1535 mlx5_vdpa_add_tirn(ndev); 1536 return err; 1537 } 1538 1539 static void destroy_tir(struct mlx5_vdpa_net *ndev) 1540 { 1541 mlx5_vdpa_remove_tirn(ndev); 1542 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); 1543 } 1544 1545 #define MAX_STEERING_ENT 0x8000 1546 #define MAX_STEERING_GROUPS 2 1547 1548 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1549 #define NUM_DESTS 2 1550 #else 1551 #define NUM_DESTS 1 1552 #endif 1553 1554 static int add_steering_counters(struct mlx5_vdpa_net *ndev, 1555 struct macvlan_node *node, 1556 struct mlx5_flow_act *flow_act, 1557 struct mlx5_flow_destination *dests) 1558 { 1559 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1560 int err; 1561 1562 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1563 if (IS_ERR(node->ucast_counter.counter)) 1564 return PTR_ERR(node->ucast_counter.counter); 1565 1566 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1567 if (IS_ERR(node->mcast_counter.counter)) { 1568 err = PTR_ERR(node->mcast_counter.counter); 1569 goto err_mcast_counter; 1570 } 1571 1572 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1573 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; 1574 return 0; 1575 1576 err_mcast_counter: 1577 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1578 return err; 1579 #else 1580 return 0; 1581 #endif 1582 } 1583 1584 static void remove_steering_counters(struct mlx5_vdpa_net *ndev, 1585 struct macvlan_node *node) 1586 { 1587 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1588 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter); 1589 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter); 1590 #endif 1591 } 1592 1593 static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, 1594 struct macvlan_node *node) 1595 { 1596 struct mlx5_flow_destination dests[NUM_DESTS] = {}; 1597 struct mlx5_flow_act flow_act = {}; 1598 struct mlx5_flow_spec *spec; 1599 void *headers_c; 1600 void *headers_v; 1601 u8 *dmac_c; 1602 u8 *dmac_v; 1603 int err; 1604 u16 vid; 1605 1606 spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1607 if (!spec) 1608 return -ENOMEM; 1609 1610 vid = key2vid(node->macvlan); 1611 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1612 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1613 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1614 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1615 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1616 eth_broadcast_addr(dmac_c); 1617 ether_addr_copy(dmac_v, mac); 1618 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { 1619 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); 1620 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); 1621 } 1622 if (node->tagged) { 1623 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); 1624 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); 1625 } 1626 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1627 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1628 dests[0].tir_num = ndev->res.tirn; 1629 err = add_steering_counters(ndev, node, &flow_act, dests); 1630 if (err) 1631 goto out_free; 1632 1633 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1634 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); 1635 #endif 1636 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1637 if (IS_ERR(node->ucast_rule)) { 1638 err = PTR_ERR(node->ucast_rule); 1639 goto err_ucast; 1640 } 1641 1642 #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) 1643 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); 1644 #endif 1645 1646 memset(dmac_c, 0, ETH_ALEN); 1647 memset(dmac_v, 0, ETH_ALEN); 1648 dmac_c[0] = 1; 1649 dmac_v[0] = 1; 1650 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); 1651 if (IS_ERR(node->mcast_rule)) { 1652 err = PTR_ERR(node->mcast_rule); 1653 goto err_mcast; 1654 } 1655 kvfree(spec); 1656 mlx5_vdpa_add_rx_counters(ndev, node); 1657 return 0; 1658 1659 err_mcast: 1660 mlx5_del_flow_rules(node->ucast_rule); 1661 err_ucast: 1662 remove_steering_counters(ndev, node); 1663 out_free: 1664 kvfree(spec); 1665 return err; 1666 } 1667 1668 static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev, 1669 struct macvlan_node *node) 1670 { 1671 mlx5_vdpa_remove_rx_counters(ndev, node); 1672 mlx5_del_flow_rules(node->ucast_rule); 1673 mlx5_del_flow_rules(node->mcast_rule); 1674 } 1675 1676 static u64 search_val(u8 *mac, u16 vlan, bool tagged) 1677 { 1678 u64 val; 1679 1680 if (!tagged) 1681 vlan = MLX5V_UNTAGGED; 1682 1683 val = (u64)vlan << 48 | 1684 (u64)mac[0] << 40 | 1685 (u64)mac[1] << 32 | 1686 (u64)mac[2] << 24 | 1687 (u64)mac[3] << 16 | 1688 (u64)mac[4] << 8 | 1689 (u64)mac[5]; 1690 1691 return val; 1692 } 1693 1694 static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value) 1695 { 1696 struct macvlan_node *pos; 1697 u32 idx; 1698 1699 idx = hash_64(value, 8); // tbd 8 1700 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) { 1701 if (pos->macvlan == value) 1702 return pos; 1703 } 1704 return NULL; 1705 } 1706 1707 static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged) 1708 { 1709 struct macvlan_node *ptr; 1710 u64 val; 1711 u32 idx; 1712 int err; 1713 1714 val = search_val(mac, vid, tagged); 1715 if (mac_vlan_lookup(ndev, val)) 1716 return -EEXIST; 1717 1718 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); 1719 if (!ptr) 1720 return -ENOMEM; 1721 1722 ptr->tagged = tagged; 1723 ptr->macvlan = val; 1724 ptr->ndev = ndev; 1725 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr); 1726 if (err) 1727 goto err_add; 1728 1729 idx = hash_64(val, 8); 1730 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]); 1731 return 0; 1732 1733 err_add: 1734 kfree(ptr); 1735 return err; 1736 } 1737 1738 static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged) 1739 { 1740 struct macvlan_node *ptr; 1741 1742 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged)); 1743 if (!ptr) 1744 return; 1745 1746 hlist_del(&ptr->hlist); 1747 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr); 1748 remove_steering_counters(ndev, ptr); 1749 kfree(ptr); 1750 } 1751 1752 static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev) 1753 { 1754 struct macvlan_node *pos; 1755 struct hlist_node *n; 1756 int i; 1757 1758 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) { 1759 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) { 1760 hlist_del(&pos->hlist); 1761 mlx5_vdpa_del_mac_vlan_rules(ndev, pos); 1762 remove_steering_counters(ndev, pos); 1763 kfree(pos); 1764 } 1765 } 1766 } 1767 1768 static int setup_steering(struct mlx5_vdpa_net *ndev) 1769 { 1770 struct mlx5_flow_table_attr ft_attr = {}; 1771 struct mlx5_flow_namespace *ns; 1772 int err; 1773 1774 ft_attr.max_fte = MAX_STEERING_ENT; 1775 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS; 1776 1777 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1778 if (!ns) { 1779 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1780 return -EOPNOTSUPP; 1781 } 1782 1783 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1784 if (IS_ERR(ndev->rxft)) { 1785 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n"); 1786 return PTR_ERR(ndev->rxft); 1787 } 1788 mlx5_vdpa_add_rx_flow_table(ndev); 1789 1790 err = mac_vlan_add(ndev, ndev->config.mac, 0, false); 1791 if (err) 1792 goto err_add; 1793 1794 return 0; 1795 1796 err_add: 1797 mlx5_vdpa_remove_rx_flow_table(ndev); 1798 mlx5_destroy_flow_table(ndev->rxft); 1799 return err; 1800 } 1801 1802 static void teardown_steering(struct mlx5_vdpa_net *ndev) 1803 { 1804 clear_mac_vlan_table(ndev); 1805 mlx5_vdpa_remove_rx_flow_table(ndev); 1806 mlx5_destroy_flow_table(ndev->rxft); 1807 } 1808 1809 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1810 { 1811 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1812 struct mlx5_control_vq *cvq = &mvdev->cvq; 1813 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1814 struct mlx5_core_dev *pfmdev; 1815 size_t read; 1816 u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; 1817 1818 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 1819 switch (cmd) { 1820 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 1821 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN); 1822 if (read != ETH_ALEN) 1823 break; 1824 1825 if (!memcmp(ndev->config.mac, mac, 6)) { 1826 status = VIRTIO_NET_OK; 1827 break; 1828 } 1829 1830 if (is_zero_ether_addr(mac)) 1831 break; 1832 1833 if (!is_zero_ether_addr(ndev->config.mac)) { 1834 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1835 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", 1836 ndev->config.mac); 1837 break; 1838 } 1839 } 1840 1841 if (mlx5_mpfs_add_mac(pfmdev, mac)) { 1842 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n", 1843 mac); 1844 break; 1845 } 1846 1847 /* backup the original mac address so that if failed to add the forward rules 1848 * we could restore it 1849 */ 1850 memcpy(mac_back, ndev->config.mac, ETH_ALEN); 1851 1852 memcpy(ndev->config.mac, mac, ETH_ALEN); 1853 1854 /* Need recreate the flow table entry, so that the packet could forward back 1855 */ 1856 mac_vlan_del(ndev, mac_back, 0, false); 1857 1858 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { 1859 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); 1860 1861 /* Although it hardly run here, we still need double check */ 1862 if (is_zero_ether_addr(mac_back)) { 1863 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); 1864 break; 1865 } 1866 1867 /* Try to restore original mac address to MFPS table, and try to restore 1868 * the forward rule entry. 1869 */ 1870 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { 1871 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", 1872 ndev->config.mac); 1873 } 1874 1875 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { 1876 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", 1877 mac_back); 1878 } 1879 1880 memcpy(ndev->config.mac, mac_back, ETH_ALEN); 1881 1882 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) 1883 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); 1884 1885 break; 1886 } 1887 1888 status = VIRTIO_NET_OK; 1889 break; 1890 1891 default: 1892 break; 1893 } 1894 1895 return status; 1896 } 1897 1898 static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps) 1899 { 1900 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1901 int cur_qps = ndev->cur_num_vqs / 2; 1902 int err; 1903 int i; 1904 1905 if (cur_qps > newqps) { 1906 err = modify_rqt(ndev, 2 * newqps); 1907 if (err) 1908 return err; 1909 1910 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--) 1911 teardown_vq(ndev, &ndev->vqs[i]); 1912 1913 ndev->cur_num_vqs = 2 * newqps; 1914 } else { 1915 ndev->cur_num_vqs = 2 * newqps; 1916 for (i = cur_qps * 2; i < 2 * newqps; i++) { 1917 err = setup_vq(ndev, &ndev->vqs[i]); 1918 if (err) 1919 goto clean_added; 1920 } 1921 err = modify_rqt(ndev, 2 * newqps); 1922 if (err) 1923 goto clean_added; 1924 } 1925 return 0; 1926 1927 clean_added: 1928 for (--i; i >= 2 * cur_qps; --i) 1929 teardown_vq(ndev, &ndev->vqs[i]); 1930 1931 ndev->cur_num_vqs = 2 * cur_qps; 1932 1933 return err; 1934 } 1935 1936 static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1937 { 1938 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1939 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1940 struct mlx5_control_vq *cvq = &mvdev->cvq; 1941 struct virtio_net_ctrl_mq mq; 1942 size_t read; 1943 u16 newqps; 1944 1945 switch (cmd) { 1946 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET: 1947 /* This mq feature check aligns with pre-existing userspace 1948 * implementation. 1949 * 1950 * Without it, an untrusted driver could fake a multiqueue config 1951 * request down to a non-mq device that may cause kernel to 1952 * panic due to uninitialized resources for extra vqs. Even with 1953 * a well behaving guest driver, it is not expected to allow 1954 * changing the number of vqs on a non-mq device. 1955 */ 1956 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) 1957 break; 1958 1959 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq)); 1960 if (read != sizeof(mq)) 1961 break; 1962 1963 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs); 1964 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 1965 newqps > ndev->rqt_size) 1966 break; 1967 1968 if (ndev->cur_num_vqs == 2 * newqps) { 1969 status = VIRTIO_NET_OK; 1970 break; 1971 } 1972 1973 if (!change_num_qps(mvdev, newqps)) 1974 status = VIRTIO_NET_OK; 1975 1976 break; 1977 default: 1978 break; 1979 } 1980 1981 return status; 1982 } 1983 1984 static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) 1985 { 1986 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1987 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1988 struct mlx5_control_vq *cvq = &mvdev->cvq; 1989 __virtio16 vlan; 1990 size_t read; 1991 u16 id; 1992 1993 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) 1994 return status; 1995 1996 switch (cmd) { 1997 case VIRTIO_NET_CTRL_VLAN_ADD: 1998 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 1999 if (read != sizeof(vlan)) 2000 break; 2001 2002 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2003 if (mac_vlan_add(ndev, ndev->config.mac, id, true)) 2004 break; 2005 2006 status = VIRTIO_NET_OK; 2007 break; 2008 case VIRTIO_NET_CTRL_VLAN_DEL: 2009 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); 2010 if (read != sizeof(vlan)) 2011 break; 2012 2013 id = mlx5vdpa16_to_cpu(mvdev, vlan); 2014 mac_vlan_del(ndev, ndev->config.mac, id, true); 2015 status = VIRTIO_NET_OK; 2016 break; 2017 default: 2018 break; 2019 } 2020 2021 return status; 2022 } 2023 2024 static void mlx5_cvq_kick_handler(struct work_struct *work) 2025 { 2026 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 2027 struct virtio_net_ctrl_hdr ctrl; 2028 struct mlx5_vdpa_wq_ent *wqent; 2029 struct mlx5_vdpa_dev *mvdev; 2030 struct mlx5_control_vq *cvq; 2031 struct mlx5_vdpa_net *ndev; 2032 size_t read, write; 2033 int err; 2034 2035 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2036 mvdev = wqent->mvdev; 2037 ndev = to_mlx5_vdpa_ndev(mvdev); 2038 cvq = &mvdev->cvq; 2039 2040 down_write(&ndev->reslock); 2041 2042 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2043 goto out; 2044 2045 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 2046 goto out; 2047 2048 if (!cvq->ready) 2049 goto out; 2050 2051 while (true) { 2052 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, 2053 GFP_ATOMIC); 2054 if (err <= 0) 2055 break; 2056 2057 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl)); 2058 if (read != sizeof(ctrl)) 2059 break; 2060 2061 cvq->received_desc++; 2062 switch (ctrl.class) { 2063 case VIRTIO_NET_CTRL_MAC: 2064 status = handle_ctrl_mac(mvdev, ctrl.cmd); 2065 break; 2066 case VIRTIO_NET_CTRL_MQ: 2067 status = handle_ctrl_mq(mvdev, ctrl.cmd); 2068 break; 2069 case VIRTIO_NET_CTRL_VLAN: 2070 status = handle_ctrl_vlan(mvdev, ctrl.cmd); 2071 break; 2072 default: 2073 break; 2074 } 2075 2076 /* Make sure data is written before advancing index */ 2077 smp_wmb(); 2078 2079 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status)); 2080 vringh_complete_iotlb(&cvq->vring, cvq->head, write); 2081 vringh_kiov_cleanup(&cvq->riov); 2082 vringh_kiov_cleanup(&cvq->wiov); 2083 2084 if (vringh_need_notify_iotlb(&cvq->vring)) 2085 vringh_notify(&cvq->vring); 2086 2087 cvq->completed_desc++; 2088 queue_work(mvdev->wq, &wqent->work); 2089 break; 2090 } 2091 2092 out: 2093 up_write(&ndev->reslock); 2094 } 2095 2096 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) 2097 { 2098 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2099 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2100 struct mlx5_vdpa_virtqueue *mvq; 2101 2102 if (!is_index_valid(mvdev, idx)) 2103 return; 2104 2105 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { 2106 if (!mvdev->wq || !mvdev->cvq.ready) 2107 return; 2108 2109 queue_work(mvdev->wq, &ndev->cvq_ent.work); 2110 return; 2111 } 2112 2113 mvq = &ndev->vqs[idx]; 2114 if (unlikely(!mvq->ready)) 2115 return; 2116 2117 iowrite16(idx, ndev->mvdev.res.kick_addr); 2118 } 2119 2120 static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, 2121 u64 driver_area, u64 device_area) 2122 { 2123 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2124 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2125 struct mlx5_vdpa_virtqueue *mvq; 2126 2127 if (!is_index_valid(mvdev, idx)) 2128 return -EINVAL; 2129 2130 if (is_ctrl_vq_idx(mvdev, idx)) { 2131 mvdev->cvq.desc_addr = desc_area; 2132 mvdev->cvq.device_addr = device_area; 2133 mvdev->cvq.driver_addr = driver_area; 2134 return 0; 2135 } 2136 2137 mvq = &ndev->vqs[idx]; 2138 mvq->desc_addr = desc_area; 2139 mvq->device_addr = device_area; 2140 mvq->driver_addr = driver_area; 2141 return 0; 2142 } 2143 2144 static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) 2145 { 2146 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2147 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2148 struct mlx5_vdpa_virtqueue *mvq; 2149 2150 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 2151 return; 2152 2153 mvq = &ndev->vqs[idx]; 2154 mvq->num_ent = num; 2155 } 2156 2157 static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) 2158 { 2159 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2160 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2161 2162 ndev->event_cbs[idx] = *cb; 2163 if (is_ctrl_vq_idx(mvdev, idx)) 2164 mvdev->cvq.event_cb = *cb; 2165 } 2166 2167 static void mlx5_cvq_notify(struct vringh *vring) 2168 { 2169 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring); 2170 2171 if (!cvq->event_cb.callback) 2172 return; 2173 2174 cvq->event_cb.callback(cvq->event_cb.private); 2175 } 2176 2177 static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready) 2178 { 2179 struct mlx5_control_vq *cvq = &mvdev->cvq; 2180 2181 cvq->ready = ready; 2182 if (!ready) 2183 return; 2184 2185 cvq->vring.notify = mlx5_cvq_notify; 2186 } 2187 2188 static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) 2189 { 2190 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2191 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2192 struct mlx5_vdpa_virtqueue *mvq; 2193 int err; 2194 2195 if (!mvdev->actual_features) 2196 return; 2197 2198 if (!is_index_valid(mvdev, idx)) 2199 return; 2200 2201 if (is_ctrl_vq_idx(mvdev, idx)) { 2202 set_cvq_ready(mvdev, ready); 2203 return; 2204 } 2205 2206 mvq = &ndev->vqs[idx]; 2207 if (!ready) { 2208 suspend_vq(ndev, mvq); 2209 } else { 2210 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); 2211 if (err) { 2212 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err); 2213 ready = false; 2214 } 2215 } 2216 2217 2218 mvq->ready = ready; 2219 } 2220 2221 static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) 2222 { 2223 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2224 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2225 2226 if (!is_index_valid(mvdev, idx)) 2227 return false; 2228 2229 if (is_ctrl_vq_idx(mvdev, idx)) 2230 return mvdev->cvq.ready; 2231 2232 return ndev->vqs[idx].ready; 2233 } 2234 2235 static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, 2236 const struct vdpa_vq_state *state) 2237 { 2238 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2239 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2240 struct mlx5_vdpa_virtqueue *mvq; 2241 2242 if (!is_index_valid(mvdev, idx)) 2243 return -EINVAL; 2244 2245 if (is_ctrl_vq_idx(mvdev, idx)) { 2246 mvdev->cvq.vring.last_avail_idx = state->split.avail_index; 2247 return 0; 2248 } 2249 2250 mvq = &ndev->vqs[idx]; 2251 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { 2252 mlx5_vdpa_warn(mvdev, "can't modify available index\n"); 2253 return -EINVAL; 2254 } 2255 2256 mvq->used_idx = state->split.avail_index; 2257 mvq->avail_idx = state->split.avail_index; 2258 return 0; 2259 } 2260 2261 static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) 2262 { 2263 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2264 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2265 struct mlx5_vdpa_virtqueue *mvq; 2266 struct mlx5_virtq_attr attr; 2267 int err; 2268 2269 if (!is_index_valid(mvdev, idx)) 2270 return -EINVAL; 2271 2272 if (is_ctrl_vq_idx(mvdev, idx)) { 2273 state->split.avail_index = mvdev->cvq.vring.last_avail_idx; 2274 return 0; 2275 } 2276 2277 mvq = &ndev->vqs[idx]; 2278 /* If the virtq object was destroyed, use the value saved at 2279 * the last minute of suspend_vq. This caters for userspace 2280 * that cares about emulating the index after vq is stopped. 2281 */ 2282 if (!mvq->initialized) { 2283 /* Firmware returns a wrong value for the available index. 2284 * Since both values should be identical, we take the value of 2285 * used_idx which is reported correctly. 2286 */ 2287 state->split.avail_index = mvq->used_idx; 2288 return 0; 2289 } 2290 2291 err = query_virtqueue(ndev, mvq, &attr); 2292 if (err) { 2293 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); 2294 return err; 2295 } 2296 state->split.avail_index = attr.used_index; 2297 return 0; 2298 } 2299 2300 static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) 2301 { 2302 return PAGE_SIZE; 2303 } 2304 2305 static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx) 2306 { 2307 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2308 2309 if (is_ctrl_vq_idx(mvdev, idx)) 2310 return MLX5_VDPA_CVQ_GROUP; 2311 2312 return MLX5_VDPA_DATAVQ_GROUP; 2313 } 2314 2315 static u32 mlx5_vdpa_get_vq_desc_group(struct vdpa_device *vdev, u16 idx) 2316 { 2317 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2318 2319 if (is_ctrl_vq_idx(mvdev, idx)) 2320 return MLX5_VDPA_CVQ_GROUP; 2321 2322 return MLX5_VDPA_DATAVQ_DESC_GROUP; 2323 } 2324 2325 static u64 mlx_to_vritio_features(u16 dev_features) 2326 { 2327 u64 result = 0; 2328 2329 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF)) 2330 result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 2331 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN)) 2332 result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN); 2333 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN)) 2334 result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN); 2335 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6)) 2336 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6); 2337 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4)) 2338 result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4); 2339 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM)) 2340 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); 2341 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM)) 2342 result |= BIT_ULL(VIRTIO_NET_F_CSUM); 2343 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6)) 2344 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); 2345 if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4)) 2346 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); 2347 2348 return result; 2349 } 2350 2351 static u64 get_supported_features(struct mlx5_core_dev *mdev) 2352 { 2353 u64 mlx_vdpa_features = 0; 2354 u16 dev_features; 2355 2356 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask); 2357 mlx_vdpa_features |= mlx_to_vritio_features(dev_features); 2358 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0)) 2359 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1); 2360 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 2361 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 2362 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 2363 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ); 2364 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 2365 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU); 2366 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN); 2367 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC); 2368 2369 return mlx_vdpa_features; 2370 } 2371 2372 static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev) 2373 { 2374 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2375 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2376 2377 print_features(mvdev, ndev->mvdev.mlx_features, false); 2378 return ndev->mvdev.mlx_features; 2379 } 2380 2381 static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) 2382 { 2383 /* Minimum features to expect */ 2384 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) 2385 return -EOPNOTSUPP; 2386 2387 /* Double check features combination sent down by the driver. 2388 * Fail invalid features due to absence of the depended feature. 2389 * 2390 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit 2391 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". 2392 * By failing the invalid features sent down by untrusted drivers, 2393 * we're assured the assumption made upon is_index_valid() and 2394 * is_ctrl_vq_idx() will not be compromised. 2395 */ 2396 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == 2397 BIT_ULL(VIRTIO_NET_F_MQ)) 2398 return -EINVAL; 2399 2400 return 0; 2401 } 2402 2403 static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) 2404 { 2405 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2406 int err; 2407 int i; 2408 2409 for (i = 0; i < mvdev->max_vqs; i++) { 2410 err = setup_vq(ndev, &ndev->vqs[i]); 2411 if (err) 2412 goto err_vq; 2413 } 2414 2415 return 0; 2416 2417 err_vq: 2418 for (--i; i >= 0; i--) 2419 teardown_vq(ndev, &ndev->vqs[i]); 2420 2421 return err; 2422 } 2423 2424 static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) 2425 { 2426 struct mlx5_vdpa_virtqueue *mvq; 2427 int i; 2428 2429 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { 2430 mvq = &ndev->vqs[i]; 2431 if (!mvq->initialized) 2432 continue; 2433 2434 teardown_vq(ndev, mvq); 2435 } 2436 } 2437 2438 static void update_cvq_info(struct mlx5_vdpa_dev *mvdev) 2439 { 2440 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) { 2441 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) { 2442 /* MQ supported. CVQ index is right above the last data virtqueue's */ 2443 mvdev->max_idx = mvdev->max_vqs; 2444 } else { 2445 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1. 2446 * CVQ gets index 2 2447 */ 2448 mvdev->max_idx = 2; 2449 } 2450 } else { 2451 /* Two data virtqueues only: one for rx and one for tx */ 2452 mvdev->max_idx = 1; 2453 } 2454 } 2455 2456 static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2457 { 2458 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2459 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2460 int err; 2461 2462 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2463 MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2464 MLX5_SET(query_vport_state_in, in, vport_number, vport); 2465 if (vport) 2466 MLX5_SET(query_vport_state_in, in, other_vport, 1); 2467 2468 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2469 if (err) 2470 return 0; 2471 2472 return MLX5_GET(query_vport_state_out, out, state); 2473 } 2474 2475 static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2476 { 2477 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2478 VPORT_STATE_UP) 2479 return true; 2480 2481 return false; 2482 } 2483 2484 static void update_carrier(struct work_struct *work) 2485 { 2486 struct mlx5_vdpa_wq_ent *wqent; 2487 struct mlx5_vdpa_dev *mvdev; 2488 struct mlx5_vdpa_net *ndev; 2489 2490 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2491 mvdev = wqent->mvdev; 2492 ndev = to_mlx5_vdpa_ndev(mvdev); 2493 if (get_link_state(mvdev)) 2494 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2495 else 2496 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2497 2498 if (ndev->config_cb.callback) 2499 ndev->config_cb.callback(ndev->config_cb.private); 2500 2501 kfree(wqent); 2502 } 2503 2504 static int queue_link_work(struct mlx5_vdpa_net *ndev) 2505 { 2506 struct mlx5_vdpa_wq_ent *wqent; 2507 2508 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2509 if (!wqent) 2510 return -ENOMEM; 2511 2512 wqent->mvdev = &ndev->mvdev; 2513 INIT_WORK(&wqent->work, update_carrier); 2514 queue_work(ndev->mvdev.wq, &wqent->work); 2515 return 0; 2516 } 2517 2518 static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2519 { 2520 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2521 struct mlx5_eqe *eqe = param; 2522 int ret = NOTIFY_DONE; 2523 2524 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2525 switch (eqe->sub_type) { 2526 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2527 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2528 if (queue_link_work(ndev)) 2529 return NOTIFY_DONE; 2530 2531 ret = NOTIFY_OK; 2532 break; 2533 default: 2534 return NOTIFY_DONE; 2535 } 2536 return ret; 2537 } 2538 return ret; 2539 } 2540 2541 static void register_link_notifier(struct mlx5_vdpa_net *ndev) 2542 { 2543 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS))) 2544 return; 2545 2546 ndev->nb.notifier_call = event_handler; 2547 mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb); 2548 ndev->nb_registered = true; 2549 queue_link_work(ndev); 2550 } 2551 2552 static void unregister_link_notifier(struct mlx5_vdpa_net *ndev) 2553 { 2554 if (!ndev->nb_registered) 2555 return; 2556 2557 ndev->nb_registered = false; 2558 mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb); 2559 if (ndev->mvdev.wq) 2560 flush_workqueue(ndev->mvdev.wq); 2561 } 2562 2563 static u64 mlx5_vdpa_get_backend_features(const struct vdpa_device *vdpa) 2564 { 2565 return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); 2566 } 2567 2568 static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features) 2569 { 2570 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2571 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2572 int err; 2573 2574 print_features(mvdev, features, true); 2575 2576 err = verify_driver_features(mvdev, features); 2577 if (err) 2578 return err; 2579 2580 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 2581 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ)) 2582 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs); 2583 else 2584 ndev->rqt_size = 1; 2585 2586 /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section 2587 * 5.1.6.5.5 "Device operation in multiqueue mode": 2588 * 2589 * Multiqueue is disabled by default. 2590 * The driver enables multiqueue by sending a command using class 2591 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue 2592 * operation, as follows: ... 2593 */ 2594 ndev->cur_num_vqs = 2; 2595 2596 update_cvq_info(mvdev); 2597 return err; 2598 } 2599 2600 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 2601 { 2602 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2603 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2604 2605 ndev->config_cb = *cb; 2606 } 2607 2608 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 2609 static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) 2610 { 2611 return MLX5_VDPA_MAX_VQ_ENTRIES; 2612 } 2613 2614 static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) 2615 { 2616 return VIRTIO_ID_NET; 2617 } 2618 2619 static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) 2620 { 2621 return PCI_VENDOR_ID_MELLANOX; 2622 } 2623 2624 static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) 2625 { 2626 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2627 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2628 2629 print_status(mvdev, ndev->mvdev.status, false); 2630 return ndev->mvdev.status; 2631 } 2632 2633 static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) 2634 { 2635 struct mlx5_vq_restore_info *ri = &mvq->ri; 2636 struct mlx5_virtq_attr attr = {}; 2637 int err; 2638 2639 if (mvq->initialized) { 2640 err = query_virtqueue(ndev, mvq, &attr); 2641 if (err) 2642 return err; 2643 } 2644 2645 ri->avail_index = attr.available_index; 2646 ri->used_index = attr.used_index; 2647 ri->ready = mvq->ready; 2648 ri->num_ent = mvq->num_ent; 2649 ri->desc_addr = mvq->desc_addr; 2650 ri->device_addr = mvq->device_addr; 2651 ri->driver_addr = mvq->driver_addr; 2652 ri->map = mvq->map; 2653 ri->restore = true; 2654 return 0; 2655 } 2656 2657 static int save_channels_info(struct mlx5_vdpa_net *ndev) 2658 { 2659 int i; 2660 2661 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2662 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); 2663 save_channel_info(ndev, &ndev->vqs[i]); 2664 } 2665 return 0; 2666 } 2667 2668 static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) 2669 { 2670 int i; 2671 2672 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2673 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 2674 } 2675 2676 static void restore_channels_info(struct mlx5_vdpa_net *ndev) 2677 { 2678 struct mlx5_vdpa_virtqueue *mvq; 2679 struct mlx5_vq_restore_info *ri; 2680 int i; 2681 2682 mlx5_clear_vqs(ndev); 2683 init_mvqs(ndev); 2684 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 2685 mvq = &ndev->vqs[i]; 2686 ri = &mvq->ri; 2687 if (!ri->restore) 2688 continue; 2689 2690 mvq->avail_idx = ri->avail_index; 2691 mvq->used_idx = ri->used_index; 2692 mvq->ready = ri->ready; 2693 mvq->num_ent = ri->num_ent; 2694 mvq->desc_addr = ri->desc_addr; 2695 mvq->device_addr = ri->device_addr; 2696 mvq->driver_addr = ri->driver_addr; 2697 mvq->map = ri->map; 2698 } 2699 } 2700 2701 static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, 2702 struct mlx5_vdpa_mr *new_mr, 2703 unsigned int asid) 2704 { 2705 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2706 int err; 2707 2708 suspend_vqs(ndev); 2709 err = save_channels_info(ndev); 2710 if (err) 2711 return err; 2712 2713 teardown_driver(ndev); 2714 2715 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 2716 2717 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended) 2718 return 0; 2719 2720 restore_channels_info(ndev); 2721 err = setup_driver(mvdev); 2722 if (err) 2723 return err; 2724 2725 return 0; 2726 } 2727 2728 /* reslock must be held for this function */ 2729 static int setup_driver(struct mlx5_vdpa_dev *mvdev) 2730 { 2731 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2732 int err; 2733 2734 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2735 2736 if (ndev->setup) { 2737 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2738 err = 0; 2739 goto out; 2740 } 2741 mlx5_vdpa_add_debugfs(ndev); 2742 2743 err = read_umem_params(ndev); 2744 if (err) 2745 goto err_setup; 2746 2747 err = setup_virtqueues(mvdev); 2748 if (err) { 2749 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n"); 2750 goto err_setup; 2751 } 2752 2753 err = create_rqt(ndev); 2754 if (err) { 2755 mlx5_vdpa_warn(mvdev, "create_rqt\n"); 2756 goto err_rqt; 2757 } 2758 2759 err = create_tir(ndev); 2760 if (err) { 2761 mlx5_vdpa_warn(mvdev, "create_tir\n"); 2762 goto err_tir; 2763 } 2764 2765 err = setup_steering(ndev); 2766 if (err) { 2767 mlx5_vdpa_warn(mvdev, "setup_steering\n"); 2768 goto err_fwd; 2769 } 2770 ndev->setup = true; 2771 2772 return 0; 2773 2774 err_fwd: 2775 destroy_tir(ndev); 2776 err_tir: 2777 destroy_rqt(ndev); 2778 err_rqt: 2779 teardown_virtqueues(ndev); 2780 err_setup: 2781 mlx5_vdpa_remove_debugfs(ndev); 2782 out: 2783 return err; 2784 } 2785 2786 /* reslock must be held for this function */ 2787 static void teardown_driver(struct mlx5_vdpa_net *ndev) 2788 { 2789 2790 WARN_ON(!rwsem_is_locked(&ndev->reslock)); 2791 2792 if (!ndev->setup) 2793 return; 2794 2795 mlx5_vdpa_remove_debugfs(ndev); 2796 teardown_steering(ndev); 2797 destroy_tir(ndev); 2798 destroy_rqt(ndev); 2799 teardown_virtqueues(ndev); 2800 ndev->setup = false; 2801 } 2802 2803 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) 2804 { 2805 int i; 2806 2807 for (i = 0; i < ndev->mvdev.max_vqs; i++) 2808 ndev->vqs[i].ready = false; 2809 2810 ndev->mvdev.cvq.ready = false; 2811 } 2812 2813 static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) 2814 { 2815 struct mlx5_control_vq *cvq = &mvdev->cvq; 2816 int err = 0; 2817 2818 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { 2819 u16 idx = cvq->vring.last_avail_idx; 2820 2821 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, 2822 MLX5_CVQ_MAX_ENT, false, 2823 (struct vring_desc *)(uintptr_t)cvq->desc_addr, 2824 (struct vring_avail *)(uintptr_t)cvq->driver_addr, 2825 (struct vring_used *)(uintptr_t)cvq->device_addr); 2826 2827 if (!err) 2828 cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx; 2829 } 2830 return err; 2831 } 2832 2833 static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) 2834 { 2835 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2836 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2837 int err; 2838 2839 print_status(mvdev, status, true); 2840 2841 down_write(&ndev->reslock); 2842 2843 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 2844 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 2845 err = setup_cvq_vring(mvdev); 2846 if (err) { 2847 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); 2848 goto err_setup; 2849 } 2850 register_link_notifier(ndev); 2851 err = setup_driver(mvdev); 2852 if (err) { 2853 mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); 2854 goto err_driver; 2855 } 2856 } else { 2857 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 2858 goto err_clear; 2859 } 2860 } 2861 2862 ndev->mvdev.status = status; 2863 up_write(&ndev->reslock); 2864 return; 2865 2866 err_driver: 2867 unregister_link_notifier(ndev); 2868 err_setup: 2869 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); 2870 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 2871 err_clear: 2872 up_write(&ndev->reslock); 2873 } 2874 2875 static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) 2876 { 2877 int i; 2878 2879 /* default mapping all groups are mapped to asid 0 */ 2880 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++) 2881 mvdev->group2asid[i] = 0; 2882 } 2883 2884 static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags) 2885 { 2886 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2887 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2888 2889 print_status(mvdev, 0, true); 2890 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2891 2892 down_write(&ndev->reslock); 2893 unregister_link_notifier(ndev); 2894 teardown_driver(ndev); 2895 clear_vqs_ready(ndev); 2896 if (flags & VDPA_RESET_F_CLEAN_MAP) 2897 mlx5_vdpa_destroy_mr_resources(&ndev->mvdev); 2898 ndev->mvdev.status = 0; 2899 ndev->mvdev.suspended = false; 2900 ndev->cur_num_vqs = 0; 2901 ndev->mvdev.cvq.received_desc = 0; 2902 ndev->mvdev.cvq.completed_desc = 0; 2903 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1)); 2904 ndev->mvdev.actual_features = 0; 2905 init_group_to_asid_map(mvdev); 2906 ++mvdev->generation; 2907 2908 if ((flags & VDPA_RESET_F_CLEAN_MAP) && 2909 MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 2910 if (mlx5_vdpa_create_dma_mr(mvdev)) 2911 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 2912 } 2913 up_write(&ndev->reslock); 2914 2915 return 0; 2916 } 2917 2918 static int mlx5_vdpa_reset(struct vdpa_device *vdev) 2919 { 2920 return mlx5_vdpa_compat_reset(vdev, 0); 2921 } 2922 2923 static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev) 2924 { 2925 return sizeof(struct virtio_net_config); 2926 } 2927 2928 static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, 2929 unsigned int len) 2930 { 2931 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2932 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2933 2934 if (offset + len <= sizeof(struct virtio_net_config)) 2935 memcpy(buf, (u8 *)&ndev->config + offset, len); 2936 } 2937 2938 static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, 2939 unsigned int len) 2940 { 2941 /* not supported */ 2942 } 2943 2944 static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) 2945 { 2946 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2947 2948 return mvdev->generation; 2949 } 2950 2951 static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, 2952 unsigned int asid) 2953 { 2954 struct mlx5_vdpa_mr *new_mr; 2955 int err; 2956 2957 if (asid >= MLX5_VDPA_NUM_AS) 2958 return -EINVAL; 2959 2960 if (vhost_iotlb_itree_first(iotlb, 0, U64_MAX)) { 2961 new_mr = mlx5_vdpa_create_mr(mvdev, iotlb); 2962 if (IS_ERR(new_mr)) { 2963 err = PTR_ERR(new_mr); 2964 mlx5_vdpa_warn(mvdev, "create map failed(%d)\n", err); 2965 return err; 2966 } 2967 } else { 2968 /* Empty iotlbs don't have an mr but will clear the previous mr. */ 2969 new_mr = NULL; 2970 } 2971 2972 if (!mvdev->mr[asid]) { 2973 mlx5_vdpa_update_mr(mvdev, new_mr, asid); 2974 } else { 2975 err = mlx5_vdpa_change_map(mvdev, new_mr, asid); 2976 if (err) { 2977 mlx5_vdpa_warn(mvdev, "change map failed(%d)\n", err); 2978 goto out_err; 2979 } 2980 } 2981 2982 return mlx5_vdpa_update_cvq_iotlb(mvdev, iotlb, asid); 2983 2984 out_err: 2985 mlx5_vdpa_destroy_mr(mvdev, new_mr); 2986 return err; 2987 } 2988 2989 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, 2990 struct vhost_iotlb *iotlb) 2991 { 2992 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2993 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2994 int err = -EINVAL; 2995 2996 down_write(&ndev->reslock); 2997 err = set_map_data(mvdev, iotlb, asid); 2998 up_write(&ndev->reslock); 2999 return err; 3000 } 3001 3002 static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid) 3003 { 3004 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3005 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3006 int err; 3007 3008 down_write(&ndev->reslock); 3009 err = mlx5_vdpa_reset_mr(mvdev, asid); 3010 up_write(&ndev->reslock); 3011 return err; 3012 } 3013 3014 static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx) 3015 { 3016 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3017 3018 if (is_ctrl_vq_idx(mvdev, idx)) 3019 return &vdev->dev; 3020 3021 return mvdev->vdev.dma_dev; 3022 } 3023 3024 static void free_irqs(struct mlx5_vdpa_net *ndev) 3025 { 3026 struct mlx5_vdpa_irq_pool_entry *ent; 3027 int i; 3028 3029 if (!msix_mode_supported(&ndev->mvdev)) 3030 return; 3031 3032 if (!ndev->irqp.entries) 3033 return; 3034 3035 for (i = ndev->irqp.num_ent - 1; i >= 0; i--) { 3036 ent = ndev->irqp.entries + i; 3037 if (ent->map.virq) 3038 pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map); 3039 } 3040 kfree(ndev->irqp.entries); 3041 } 3042 3043 static void mlx5_vdpa_free(struct vdpa_device *vdev) 3044 { 3045 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3046 struct mlx5_core_dev *pfmdev; 3047 struct mlx5_vdpa_net *ndev; 3048 3049 ndev = to_mlx5_vdpa_ndev(mvdev); 3050 3051 free_resources(ndev); 3052 mlx5_vdpa_destroy_mr_resources(mvdev); 3053 if (!is_zero_ether_addr(ndev->config.mac)) { 3054 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); 3055 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); 3056 } 3057 mlx5_vdpa_free_resources(&ndev->mvdev); 3058 free_irqs(ndev); 3059 kfree(ndev->event_cbs); 3060 kfree(ndev->vqs); 3061 } 3062 3063 static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) 3064 { 3065 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3066 struct vdpa_notification_area ret = {}; 3067 struct mlx5_vdpa_net *ndev; 3068 phys_addr_t addr; 3069 3070 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx)) 3071 return ret; 3072 3073 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct 3074 * notification to avoid the risk of mapping pages that contain BAR of more 3075 * than one SF 3076 */ 3077 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT) 3078 return ret; 3079 3080 ndev = to_mlx5_vdpa_ndev(mvdev); 3081 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr; 3082 ret.addr = addr; 3083 ret.size = PAGE_SIZE; 3084 return ret; 3085 } 3086 3087 static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx) 3088 { 3089 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3090 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3091 struct mlx5_vdpa_virtqueue *mvq; 3092 3093 if (!is_index_valid(mvdev, idx)) 3094 return -EINVAL; 3095 3096 if (is_ctrl_vq_idx(mvdev, idx)) 3097 return -EOPNOTSUPP; 3098 3099 mvq = &ndev->vqs[idx]; 3100 if (!mvq->map.virq) 3101 return -EOPNOTSUPP; 3102 3103 return mvq->map.virq; 3104 } 3105 3106 static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev) 3107 { 3108 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3109 3110 return mvdev->actual_features; 3111 } 3112 3113 static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, 3114 u64 *received_desc, u64 *completed_desc) 3115 { 3116 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {}; 3117 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {}; 3118 void *cmd_hdr; 3119 void *ctx; 3120 int err; 3121 3122 if (!counters_supported(&ndev->mvdev)) 3123 return -EOPNOTSUPP; 3124 3125 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) 3126 return -EAGAIN; 3127 3128 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr); 3129 3130 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 3131 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS); 3132 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); 3133 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id); 3134 3135 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)); 3136 if (err) 3137 return err; 3138 3139 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters); 3140 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc); 3141 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc); 3142 return 0; 3143 } 3144 3145 static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx, 3146 struct sk_buff *msg, 3147 struct netlink_ext_ack *extack) 3148 { 3149 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3150 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3151 struct mlx5_vdpa_virtqueue *mvq; 3152 struct mlx5_control_vq *cvq; 3153 u64 received_desc; 3154 u64 completed_desc; 3155 int err = 0; 3156 3157 down_read(&ndev->reslock); 3158 if (!is_index_valid(mvdev, idx)) { 3159 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid"); 3160 err = -EINVAL; 3161 goto out_err; 3162 } 3163 3164 if (idx == ctrl_vq_idx(mvdev)) { 3165 cvq = &mvdev->cvq; 3166 received_desc = cvq->received_desc; 3167 completed_desc = cvq->completed_desc; 3168 goto out; 3169 } 3170 3171 mvq = &ndev->vqs[idx]; 3172 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc); 3173 if (err) { 3174 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware"); 3175 goto out_err; 3176 } 3177 3178 out: 3179 err = -EMSGSIZE; 3180 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc")) 3181 goto out_err; 3182 3183 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc, 3184 VDPA_ATTR_PAD)) 3185 goto out_err; 3186 3187 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc")) 3188 goto out_err; 3189 3190 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc, 3191 VDPA_ATTR_PAD)) 3192 goto out_err; 3193 3194 err = 0; 3195 out_err: 3196 up_read(&ndev->reslock); 3197 return err; 3198 } 3199 3200 static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev) 3201 { 3202 struct mlx5_control_vq *cvq; 3203 3204 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 3205 return; 3206 3207 cvq = &mvdev->cvq; 3208 cvq->ready = false; 3209 } 3210 3211 static int mlx5_vdpa_suspend(struct vdpa_device *vdev) 3212 { 3213 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3214 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3215 struct mlx5_vdpa_virtqueue *mvq; 3216 int i; 3217 3218 mlx5_vdpa_info(mvdev, "suspending device\n"); 3219 3220 down_write(&ndev->reslock); 3221 unregister_link_notifier(ndev); 3222 for (i = 0; i < ndev->cur_num_vqs; i++) { 3223 mvq = &ndev->vqs[i]; 3224 suspend_vq(ndev, mvq); 3225 } 3226 mlx5_vdpa_cvq_suspend(mvdev); 3227 mvdev->suspended = true; 3228 up_write(&ndev->reslock); 3229 return 0; 3230 } 3231 3232 static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group, 3233 unsigned int asid) 3234 { 3235 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 3236 int err = 0; 3237 3238 if (group >= MLX5_VDPA_NUMVQ_GROUPS) 3239 return -EINVAL; 3240 3241 mvdev->group2asid[group] = asid; 3242 3243 mutex_lock(&mvdev->mr_mtx); 3244 if (group == MLX5_VDPA_CVQ_GROUP && mvdev->mr[asid]) 3245 err = mlx5_vdpa_update_cvq_iotlb(mvdev, mvdev->mr[asid]->iotlb, asid); 3246 mutex_unlock(&mvdev->mr_mtx); 3247 3248 return err; 3249 } 3250 3251 static const struct vdpa_config_ops mlx5_vdpa_ops = { 3252 .set_vq_address = mlx5_vdpa_set_vq_address, 3253 .set_vq_num = mlx5_vdpa_set_vq_num, 3254 .kick_vq = mlx5_vdpa_kick_vq, 3255 .set_vq_cb = mlx5_vdpa_set_vq_cb, 3256 .set_vq_ready = mlx5_vdpa_set_vq_ready, 3257 .get_vq_ready = mlx5_vdpa_get_vq_ready, 3258 .set_vq_state = mlx5_vdpa_set_vq_state, 3259 .get_vq_state = mlx5_vdpa_get_vq_state, 3260 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats, 3261 .get_vq_notification = mlx5_get_vq_notification, 3262 .get_vq_irq = mlx5_get_vq_irq, 3263 .get_vq_align = mlx5_vdpa_get_vq_align, 3264 .get_vq_group = mlx5_vdpa_get_vq_group, 3265 .get_vq_desc_group = mlx5_vdpa_get_vq_desc_group, /* Op disabled if not supported. */ 3266 .get_device_features = mlx5_vdpa_get_device_features, 3267 .get_backend_features = mlx5_vdpa_get_backend_features, 3268 .set_driver_features = mlx5_vdpa_set_driver_features, 3269 .get_driver_features = mlx5_vdpa_get_driver_features, 3270 .set_config_cb = mlx5_vdpa_set_config_cb, 3271 .get_vq_num_max = mlx5_vdpa_get_vq_num_max, 3272 .get_device_id = mlx5_vdpa_get_device_id, 3273 .get_vendor_id = mlx5_vdpa_get_vendor_id, 3274 .get_status = mlx5_vdpa_get_status, 3275 .set_status = mlx5_vdpa_set_status, 3276 .reset = mlx5_vdpa_reset, 3277 .compat_reset = mlx5_vdpa_compat_reset, 3278 .get_config_size = mlx5_vdpa_get_config_size, 3279 .get_config = mlx5_vdpa_get_config, 3280 .set_config = mlx5_vdpa_set_config, 3281 .get_generation = mlx5_vdpa_get_generation, 3282 .set_map = mlx5_vdpa_set_map, 3283 .reset_map = mlx5_vdpa_reset_map, 3284 .set_group_asid = mlx5_set_group_asid, 3285 .get_vq_dma_dev = mlx5_get_vq_dma_dev, 3286 .free = mlx5_vdpa_free, 3287 .suspend = mlx5_vdpa_suspend, 3288 }; 3289 3290 static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) 3291 { 3292 u16 hw_mtu; 3293 int err; 3294 3295 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); 3296 if (err) 3297 return err; 3298 3299 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; 3300 return 0; 3301 } 3302 3303 static int alloc_resources(struct mlx5_vdpa_net *ndev) 3304 { 3305 struct mlx5_vdpa_net_resources *res = &ndev->res; 3306 int err; 3307 3308 if (res->valid) { 3309 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); 3310 return -EEXIST; 3311 } 3312 3313 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); 3314 if (err) 3315 return err; 3316 3317 err = create_tis(ndev); 3318 if (err) 3319 goto err_tis; 3320 3321 res->valid = true; 3322 3323 return 0; 3324 3325 err_tis: 3326 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3327 return err; 3328 } 3329 3330 static void free_resources(struct mlx5_vdpa_net *ndev) 3331 { 3332 struct mlx5_vdpa_net_resources *res = &ndev->res; 3333 3334 if (!res->valid) 3335 return; 3336 3337 destroy_tis(ndev); 3338 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); 3339 res->valid = false; 3340 } 3341 3342 static void init_mvqs(struct mlx5_vdpa_net *ndev) 3343 { 3344 struct mlx5_vdpa_virtqueue *mvq; 3345 int i; 3346 3347 for (i = 0; i < ndev->mvdev.max_vqs; ++i) { 3348 mvq = &ndev->vqs[i]; 3349 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3350 mvq->index = i; 3351 mvq->ndev = ndev; 3352 mvq->fwqp.fw = true; 3353 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE; 3354 } 3355 for (; i < ndev->mvdev.max_vqs; i++) { 3356 mvq = &ndev->vqs[i]; 3357 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); 3358 mvq->index = i; 3359 mvq->ndev = ndev; 3360 } 3361 } 3362 3363 struct mlx5_vdpa_mgmtdev { 3364 struct vdpa_mgmt_dev mgtdev; 3365 struct mlx5_adev *madev; 3366 struct mlx5_vdpa_net *ndev; 3367 struct vdpa_config_ops vdpa_ops; 3368 }; 3369 3370 static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) 3371 { 3372 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); 3373 void *in; 3374 int err; 3375 3376 in = kvzalloc(inlen, GFP_KERNEL); 3377 if (!in) 3378 return -ENOMEM; 3379 3380 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); 3381 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, 3382 mtu + MLX5V_ETH_HARD_MTU); 3383 MLX5_SET(modify_nic_vport_context_in, in, opcode, 3384 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); 3385 3386 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); 3387 3388 kvfree(in); 3389 return err; 3390 } 3391 3392 static void allocate_irqs(struct mlx5_vdpa_net *ndev) 3393 { 3394 struct mlx5_vdpa_irq_pool_entry *ent; 3395 int i; 3396 3397 if (!msix_mode_supported(&ndev->mvdev)) 3398 return; 3399 3400 if (!ndev->mvdev.mdev->pdev) 3401 return; 3402 3403 ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL); 3404 if (!ndev->irqp.entries) 3405 return; 3406 3407 3408 for (i = 0; i < ndev->mvdev.max_vqs; i++) { 3409 ent = ndev->irqp.entries + i; 3410 snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d", 3411 dev_name(&ndev->mvdev.vdev.dev), i); 3412 ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL); 3413 if (!ent->map.virq) 3414 return; 3415 3416 ndev->irqp.num_ent++; 3417 } 3418 } 3419 3420 static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 3421 const struct vdpa_dev_set_config *add_config) 3422 { 3423 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3424 struct virtio_net_config *config; 3425 struct mlx5_core_dev *pfmdev; 3426 struct mlx5_vdpa_dev *mvdev; 3427 struct mlx5_vdpa_net *ndev; 3428 struct mlx5_core_dev *mdev; 3429 u64 device_features; 3430 u32 max_vqs; 3431 u16 mtu; 3432 int err; 3433 3434 if (mgtdev->ndev) 3435 return -ENOSPC; 3436 3437 mdev = mgtdev->madev->mdev; 3438 device_features = mgtdev->mgtdev.supported_features; 3439 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { 3440 if (add_config->device_features & ~device_features) { 3441 dev_warn(mdev->device, 3442 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n", 3443 add_config->device_features, device_features); 3444 return -EINVAL; 3445 } 3446 device_features &= add_config->device_features; 3447 } else { 3448 device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF); 3449 } 3450 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) && 3451 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) { 3452 dev_warn(mdev->device, 3453 "Must provision minimum features 0x%llx for this device", 3454 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)); 3455 return -EOPNOTSUPP; 3456 } 3457 3458 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & 3459 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { 3460 dev_warn(mdev->device, "missing support for split virtqueues\n"); 3461 return -EOPNOTSUPP; 3462 } 3463 3464 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues), 3465 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size)); 3466 if (max_vqs < 2) { 3467 dev_warn(mdev->device, 3468 "%d virtqueues are supported. At least 2 are required\n", 3469 max_vqs); 3470 return -EAGAIN; 3471 } 3472 3473 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) { 3474 if (add_config->net.max_vq_pairs > max_vqs / 2) 3475 return -EINVAL; 3476 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs); 3477 } else { 3478 max_vqs = 2; 3479 } 3480 3481 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mgtdev->vdpa_ops, 3482 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false); 3483 if (IS_ERR(ndev)) 3484 return PTR_ERR(ndev); 3485 3486 ndev->mvdev.max_vqs = max_vqs; 3487 mvdev = &ndev->mvdev; 3488 mvdev->mdev = mdev; 3489 3490 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL); 3491 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL); 3492 if (!ndev->vqs || !ndev->event_cbs) { 3493 err = -ENOMEM; 3494 goto err_alloc; 3495 } 3496 3497 init_mvqs(ndev); 3498 allocate_irqs(ndev); 3499 init_rwsem(&ndev->reslock); 3500 config = &ndev->config; 3501 3502 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { 3503 err = config_func_mtu(mdev, add_config->net.mtu); 3504 if (err) 3505 goto err_alloc; 3506 } 3507 3508 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) { 3509 err = query_mtu(mdev, &mtu); 3510 if (err) 3511 goto err_alloc; 3512 3513 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 3514 } 3515 3516 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) { 3517 if (get_link_state(mvdev)) 3518 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 3519 else 3520 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 3521 } 3522 3523 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 3524 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 3525 /* No bother setting mac address in config if not going to provision _F_MAC */ 3526 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 || 3527 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3528 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 3529 if (err) 3530 goto err_alloc; 3531 } 3532 3533 if (!is_zero_ether_addr(config->mac)) { 3534 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); 3535 err = mlx5_mpfs_add_mac(pfmdev, config->mac); 3536 if (err) 3537 goto err_alloc; 3538 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) { 3539 /* 3540 * We used to clear _F_MAC feature bit if seeing 3541 * zero mac address when device features are not 3542 * specifically provisioned. Keep the behaviour 3543 * so old scripts do not break. 3544 */ 3545 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC); 3546 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) { 3547 /* Don't provision zero mac address for _F_MAC */ 3548 mlx5_vdpa_warn(&ndev->mvdev, 3549 "No mac address provisioned?\n"); 3550 err = -EINVAL; 3551 goto err_alloc; 3552 } 3553 3554 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ)) 3555 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2); 3556 3557 ndev->mvdev.mlx_features = device_features; 3558 mvdev->vdev.dma_dev = &mdev->pdev->dev; 3559 err = mlx5_vdpa_alloc_resources(&ndev->mvdev); 3560 if (err) 3561 goto err_mpfs; 3562 3563 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { 3564 err = mlx5_vdpa_create_dma_mr(mvdev); 3565 if (err) 3566 goto err_res; 3567 } 3568 3569 err = alloc_resources(ndev); 3570 if (err) 3571 goto err_mr; 3572 3573 ndev->cvq_ent.mvdev = mvdev; 3574 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler); 3575 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 3576 if (!mvdev->wq) { 3577 err = -ENOMEM; 3578 goto err_res2; 3579 } 3580 3581 mvdev->vdev.mdev = &mgtdev->mgtdev; 3582 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1); 3583 if (err) 3584 goto err_reg; 3585 3586 mgtdev->ndev = ndev; 3587 return 0; 3588 3589 err_reg: 3590 destroy_workqueue(mvdev->wq); 3591 err_res2: 3592 free_resources(ndev); 3593 err_mr: 3594 mlx5_vdpa_destroy_mr_resources(mvdev); 3595 err_res: 3596 mlx5_vdpa_free_resources(&ndev->mvdev); 3597 err_mpfs: 3598 if (!is_zero_ether_addr(config->mac)) 3599 mlx5_mpfs_del_mac(pfmdev, config->mac); 3600 err_alloc: 3601 put_device(&mvdev->vdev.dev); 3602 return err; 3603 } 3604 3605 static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev) 3606 { 3607 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 3608 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 3609 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 3610 struct workqueue_struct *wq; 3611 3612 unregister_link_notifier(ndev); 3613 _vdpa_unregister_device(dev); 3614 wq = mvdev->wq; 3615 mvdev->wq = NULL; 3616 destroy_workqueue(wq); 3617 mgtdev->ndev = NULL; 3618 } 3619 3620 static const struct vdpa_mgmtdev_ops mdev_ops = { 3621 .dev_add = mlx5_vdpa_dev_add, 3622 .dev_del = mlx5_vdpa_dev_del, 3623 }; 3624 3625 static struct virtio_device_id id_table[] = { 3626 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, 3627 { 0 }, 3628 }; 3629 3630 static int mlx5v_probe(struct auxiliary_device *adev, 3631 const struct auxiliary_device_id *id) 3632 3633 { 3634 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev); 3635 struct mlx5_core_dev *mdev = madev->mdev; 3636 struct mlx5_vdpa_mgmtdev *mgtdev; 3637 int err; 3638 3639 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL); 3640 if (!mgtdev) 3641 return -ENOMEM; 3642 3643 mgtdev->mgtdev.ops = &mdev_ops; 3644 mgtdev->mgtdev.device = mdev->device; 3645 mgtdev->mgtdev.id_table = id_table; 3646 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | 3647 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | 3648 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) | 3649 BIT_ULL(VDPA_ATTR_DEV_FEATURES); 3650 mgtdev->mgtdev.max_supported_vqs = 3651 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; 3652 mgtdev->mgtdev.supported_features = get_supported_features(mdev); 3653 mgtdev->madev = madev; 3654 mgtdev->vdpa_ops = mlx5_vdpa_ops; 3655 3656 if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, desc_group_mkey_supported)) 3657 mgtdev->vdpa_ops.get_vq_desc_group = NULL; 3658 3659 err = vdpa_mgmtdev_register(&mgtdev->mgtdev); 3660 if (err) 3661 goto reg_err; 3662 3663 auxiliary_set_drvdata(adev, mgtdev); 3664 3665 return 0; 3666 3667 reg_err: 3668 kfree(mgtdev); 3669 return err; 3670 } 3671 3672 static void mlx5v_remove(struct auxiliary_device *adev) 3673 { 3674 struct mlx5_vdpa_mgmtdev *mgtdev; 3675 3676 mgtdev = auxiliary_get_drvdata(adev); 3677 vdpa_mgmtdev_unregister(&mgtdev->mgtdev); 3678 kfree(mgtdev); 3679 } 3680 3681 static const struct auxiliary_device_id mlx5v_id_table[] = { 3682 { .name = MLX5_ADEV_NAME ".vnet", }, 3683 {}, 3684 }; 3685 3686 MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table); 3687 3688 static struct auxiliary_driver mlx5v_driver = { 3689 .name = "vnet", 3690 .probe = mlx5v_probe, 3691 .remove = mlx5v_remove, 3692 .id_table = mlx5v_id_table, 3693 }; 3694 3695 module_auxiliary_driver(mlx5v_driver); 3696