1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved 4 */ 5 6 #include "cmd.h" 7 8 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 9 10 static int mlx5vf_is_migratable(struct mlx5_core_dev *mdev, u16 func_id) 11 { 12 int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); 13 void *query_cap = NULL, *cap; 14 int ret; 15 16 query_cap = kzalloc(query_sz, GFP_KERNEL); 17 if (!query_cap) 18 return -ENOMEM; 19 20 ret = mlx5_vport_get_other_func_cap(mdev, func_id, query_cap, 21 MLX5_CAP_GENERAL_2); 22 if (ret) 23 goto out; 24 25 cap = MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability); 26 if (!MLX5_GET(cmd_hca_cap_2, cap, migratable)) 27 ret = -EOPNOTSUPP; 28 out: 29 kfree(query_cap); 30 return ret; 31 } 32 33 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, 34 u16 *vhca_id); 35 static void 36 _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev); 37 38 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) 39 { 40 struct mlx5_vf_migration_file *migf = mvdev->saving_migf; 41 u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; 42 u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; 43 int err; 44 45 lockdep_assert_held(&mvdev->state_mutex); 46 if (mvdev->mdev_detach) 47 return -ENOTCONN; 48 49 /* 50 * In case PRE_COPY is used, saving_migf is exposed while the device is 51 * running. Make sure to run only once there is no active save command. 52 * Running both in parallel, might end-up with a failure in the save 53 * command once it will try to turn on 'tracking' on a suspended device. 54 */ 55 if (migf) { 56 err = wait_for_completion_interruptible(&migf->save_comp); 57 if (err) 58 return err; 59 } 60 61 MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); 62 MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); 63 MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); 64 65 err = mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); 66 if (migf) 67 complete(&migf->save_comp); 68 69 return err; 70 } 71 72 int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) 73 { 74 u32 out[MLX5_ST_SZ_DW(resume_vhca_out)] = {}; 75 u32 in[MLX5_ST_SZ_DW(resume_vhca_in)] = {}; 76 77 lockdep_assert_held(&mvdev->state_mutex); 78 if (mvdev->mdev_detach) 79 return -ENOTCONN; 80 81 MLX5_SET(resume_vhca_in, in, opcode, MLX5_CMD_OP_RESUME_VHCA); 82 MLX5_SET(resume_vhca_in, in, vhca_id, mvdev->vhca_id); 83 MLX5_SET(resume_vhca_in, in, op_mod, op_mod); 84 85 return mlx5_cmd_exec_inout(mvdev->mdev, resume_vhca, in, out); 86 } 87 88 int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, 89 size_t *state_size, u64 *total_size, 90 u8 *mig_state, u8 query_flags) 91 { 92 u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; 93 u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; 94 bool inc = query_flags & MLX5VF_QUERY_INC; 95 int ret; 96 97 lockdep_assert_held(&mvdev->state_mutex); 98 if (mvdev->mdev_detach) 99 return -ENOTCONN; 100 101 /* 102 * In case PRE_COPY is used, saving_migf is exposed while device is 103 * running. Make sure to run only once there is no active save command. 104 * Running both in parallel, might end-up with a failure in the 105 * incremental query command on un-tracked vhca. 106 */ 107 if (inc) { 108 ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); 109 if (ret) 110 return ret; 111 /* Upon cleanup, ignore previous pre_copy error state */ 112 if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR && 113 !(query_flags & MLX5VF_QUERY_CLEANUP)) { 114 /* 115 * In case we had a PRE_COPY error, only query full 116 * image for final image 117 */ 118 if (!(query_flags & MLX5VF_QUERY_FINAL)) { 119 *state_size = 0; 120 complete(&mvdev->saving_migf->save_comp); 121 return 0; 122 } 123 query_flags &= ~MLX5VF_QUERY_INC; 124 } 125 /* Block incremental query which is state-dependent */ 126 if (mvdev->saving_migf->state == MLX5_MIGF_STATE_ERROR) { 127 complete(&mvdev->saving_migf->save_comp); 128 return -ENODEV; 129 } 130 } 131 132 MLX5_SET(query_vhca_migration_state_in, in, opcode, 133 MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); 134 MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); 135 MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); 136 MLX5_SET(query_vhca_migration_state_in, in, incremental, 137 query_flags & MLX5VF_QUERY_INC); 138 MLX5_SET(query_vhca_migration_state_in, in, chunk, mvdev->chunk_mode); 139 140 ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, 141 out); 142 if (inc) 143 complete(&mvdev->saving_migf->save_comp); 144 145 if (ret) 146 return ret; 147 148 *state_size = MLX5_GET(query_vhca_migration_state_out, out, 149 required_umem_size); 150 if (total_size) 151 *total_size = mvdev->chunk_mode ? 152 MLX5_GET64(query_vhca_migration_state_out, out, 153 remaining_total_size) : *state_size; 154 155 if (mig_state && mvdev->mig_state_cap) 156 *mig_state = MLX5_GET(query_vhca_migration_state_out, out, 157 migration_state); 158 159 return 0; 160 } 161 162 static void set_tracker_change_event(struct mlx5vf_pci_core_device *mvdev) 163 { 164 mvdev->tracker.object_changed = true; 165 complete(&mvdev->tracker_comp); 166 } 167 168 static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev) 169 { 170 /* Mark the tracker under an error and wake it up if it's running */ 171 mvdev->tracker.is_err = true; 172 complete(&mvdev->tracker_comp); 173 } 174 175 static int mlx5fv_vf_event(struct notifier_block *nb, 176 unsigned long event, void *data) 177 { 178 struct mlx5vf_pci_core_device *mvdev = 179 container_of(nb, struct mlx5vf_pci_core_device, nb); 180 181 switch (event) { 182 case MLX5_PF_NOTIFY_ENABLE_VF: 183 mutex_lock(&mvdev->state_mutex); 184 mvdev->mdev_detach = false; 185 mlx5vf_state_mutex_unlock(mvdev); 186 break; 187 case MLX5_PF_NOTIFY_DISABLE_VF: 188 mlx5vf_cmd_close_migratable(mvdev); 189 mutex_lock(&mvdev->state_mutex); 190 mvdev->mdev_detach = true; 191 mlx5vf_state_mutex_unlock(mvdev); 192 break; 193 default: 194 break; 195 } 196 197 return 0; 198 } 199 200 void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) 201 { 202 if (!mvdev->migrate_cap) 203 return; 204 205 /* Must be done outside the lock to let it progress */ 206 set_tracker_error(mvdev); 207 mutex_lock(&mvdev->state_mutex); 208 mlx5vf_disable_fds(mvdev, NULL); 209 _mlx5vf_free_page_tracker_resources(mvdev); 210 mlx5vf_state_mutex_unlock(mvdev); 211 } 212 213 void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) 214 { 215 if (!mvdev->migrate_cap) 216 return; 217 218 mlx5_sriov_blocking_notifier_unregister(mvdev->mdev, mvdev->vf_id, 219 &mvdev->nb); 220 destroy_workqueue(mvdev->cb_wq); 221 } 222 223 void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, 224 const struct vfio_migration_ops *mig_ops, 225 const struct vfio_log_ops *log_ops) 226 { 227 struct pci_dev *pdev = mvdev->core_device.pdev; 228 int ret; 229 230 if (!pdev->is_virtfn) 231 return; 232 233 mvdev->mdev = mlx5_vf_get_core_dev(pdev); 234 if (!mvdev->mdev) 235 return; 236 237 if (!MLX5_CAP_GEN(mvdev->mdev, migration)) 238 goto end; 239 240 if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && 241 MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))) 242 goto end; 243 244 mvdev->vf_id = pci_iov_vf_id(pdev); 245 if (mvdev->vf_id < 0) 246 goto end; 247 248 ret = mlx5vf_is_migratable(mvdev->mdev, mvdev->vf_id + 1); 249 if (ret) 250 goto end; 251 252 if (mlx5vf_cmd_get_vhca_id(mvdev->mdev, mvdev->vf_id + 1, 253 &mvdev->vhca_id)) 254 goto end; 255 256 mvdev->cb_wq = alloc_ordered_workqueue("mlx5vf_wq", 0); 257 if (!mvdev->cb_wq) 258 goto end; 259 260 mutex_init(&mvdev->state_mutex); 261 spin_lock_init(&mvdev->reset_lock); 262 mvdev->nb.notifier_call = mlx5fv_vf_event; 263 ret = mlx5_sriov_blocking_notifier_register(mvdev->mdev, mvdev->vf_id, 264 &mvdev->nb); 265 if (ret) { 266 destroy_workqueue(mvdev->cb_wq); 267 goto end; 268 } 269 270 mvdev->migrate_cap = 1; 271 mvdev->core_device.vdev.migration_flags = 272 VFIO_MIGRATION_STOP_COPY | 273 VFIO_MIGRATION_P2P | 274 VFIO_MIGRATION_PRE_COPY; 275 276 mvdev->core_device.vdev.mig_ops = mig_ops; 277 init_completion(&mvdev->tracker_comp); 278 if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) 279 mvdev->core_device.vdev.log_ops = log_ops; 280 281 if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) 282 mvdev->chunk_mode = 1; 283 284 if (MLX5_CAP_GEN_2(mvdev->mdev, migration_state)) 285 mvdev->mig_state_cap = 1; 286 287 end: 288 mlx5_vf_put_core_dev(mvdev->mdev); 289 } 290 291 static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id, 292 u16 *vhca_id) 293 { 294 u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; 295 int out_size; 296 void *out; 297 int ret; 298 299 out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); 300 out = kzalloc(out_size, GFP_KERNEL); 301 if (!out) 302 return -ENOMEM; 303 304 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 305 MLX5_SET(query_hca_cap_in, in, other_function, 1); 306 MLX5_SET(query_hca_cap_in, in, function_id, function_id); 307 MLX5_SET(query_hca_cap_in, in, op_mod, 308 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | 309 HCA_CAP_OPMOD_GET_CUR); 310 311 ret = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); 312 if (ret) 313 goto err_exec; 314 315 *vhca_id = MLX5_GET(query_hca_cap_out, out, 316 capability.cmd_hca_cap.vhca_id); 317 318 err_exec: 319 kfree(out); 320 return ret; 321 } 322 323 static u32 *alloc_mkey_in(u32 npages, u32 pdn) 324 { 325 int inlen; 326 void *mkc; 327 u32 *in; 328 329 inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 330 sizeof(__be64) * round_up(npages, 2); 331 332 in = kvzalloc(inlen, GFP_KERNEL_ACCOUNT); 333 if (!in) 334 return NULL; 335 336 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 337 DIV_ROUND_UP(npages, 2)); 338 339 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 340 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 341 MLX5_SET(mkc, mkc, lr, 1); 342 MLX5_SET(mkc, mkc, lw, 1); 343 MLX5_SET(mkc, mkc, rr, 1); 344 MLX5_SET(mkc, mkc, rw, 1); 345 MLX5_SET(mkc, mkc, pd, pdn); 346 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 347 MLX5_SET(mkc, mkc, qpn, 0xffffff); 348 MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 349 MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); 350 MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE); 351 352 return in; 353 } 354 355 static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in, 356 u32 *mkey) 357 { 358 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 359 sizeof(__be64) * round_up(npages, 2); 360 361 return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen); 362 } 363 364 static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages, 365 u32 *mkey_in, struct dma_iova_state *state, 366 enum dma_data_direction dir) 367 { 368 dma_addr_t addr; 369 __be64 *mtt; 370 int i; 371 372 if (dma_use_iova(state)) { 373 dma_iova_destroy(mdev->device, state, npages * PAGE_SIZE, dir, 374 0); 375 } else { 376 mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, 377 klm_pas_mtt); 378 for (i = npages - 1; i >= 0; i--) { 379 addr = be64_to_cpu(mtt[i]); 380 dma_unmap_page(mdev->device, addr, PAGE_SIZE, dir); 381 } 382 } 383 } 384 385 static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages, 386 struct page **page_list, u32 *mkey_in, 387 struct dma_iova_state *state, 388 enum dma_data_direction dir) 389 { 390 dma_addr_t addr; 391 size_t mapped = 0; 392 __be64 *mtt; 393 int i, err; 394 395 mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt); 396 397 if (dma_iova_try_alloc(mdev->device, state, 0, npages * PAGE_SIZE)) { 398 addr = state->addr; 399 for (i = 0; i < npages; i++) { 400 err = dma_iova_link(mdev->device, state, 401 page_to_phys(page_list[i]), mapped, 402 PAGE_SIZE, dir, 0); 403 if (err) 404 goto error; 405 *mtt++ = cpu_to_be64(addr); 406 addr += PAGE_SIZE; 407 mapped += PAGE_SIZE; 408 } 409 err = dma_iova_sync(mdev->device, state, 0, mapped); 410 if (err) 411 goto error; 412 } else { 413 for (i = 0; i < npages; i++) { 414 addr = dma_map_page(mdev->device, page_list[i], 0, 415 PAGE_SIZE, dir); 416 err = dma_mapping_error(mdev->device, addr); 417 if (err) 418 goto error; 419 *mtt++ = cpu_to_be64(addr); 420 } 421 } 422 return 0; 423 424 error: 425 unregister_dma_pages(mdev, i, mkey_in, state, dir); 426 return err; 427 } 428 429 static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf) 430 { 431 struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev; 432 struct mlx5_core_dev *mdev = mvdev->mdev; 433 int ret; 434 435 lockdep_assert_held(&mvdev->state_mutex); 436 if (mvdev->mdev_detach) 437 return -ENOTCONN; 438 439 if (buf->mkey_in || !buf->npages) 440 return -EINVAL; 441 442 buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn); 443 if (!buf->mkey_in) 444 return -ENOMEM; 445 446 ret = register_dma_pages(mdev, buf->npages, buf->page_list, 447 buf->mkey_in, &buf->state, buf->dma_dir); 448 if (ret) 449 goto err_register_dma; 450 451 ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey); 452 if (ret) 453 goto err_create_mkey; 454 455 return 0; 456 457 err_create_mkey: 458 unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state, 459 buf->dma_dir); 460 err_register_dma: 461 kvfree(buf->mkey_in); 462 buf->mkey_in = NULL; 463 return ret; 464 } 465 466 static void free_page_list(u32 npages, struct page **page_list) 467 { 468 int i; 469 470 /* Undo alloc_pages_bulk() */ 471 for (i = npages - 1; i >= 0; i--) 472 __free_page(page_list[i]); 473 474 kvfree(page_list); 475 } 476 477 void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) 478 { 479 struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev; 480 struct mlx5_core_dev *mdev = mvdev->mdev; 481 482 lockdep_assert_held(&mvdev->state_mutex); 483 WARN_ON(mvdev->mdev_detach); 484 485 if (buf->mkey_in) { 486 mlx5_core_destroy_mkey(mdev, buf->mkey); 487 unregister_dma_pages(mdev, buf->npages, buf->mkey_in, 488 &buf->state, buf->dma_dir); 489 kvfree(buf->mkey_in); 490 } 491 492 free_page_list(buf->npages, buf->page_list); 493 kfree(buf); 494 } 495 496 static int mlx5vf_add_pages(struct page ***page_list, unsigned int npages) 497 { 498 unsigned int filled, done = 0; 499 int i; 500 501 *page_list = 502 kvzalloc_objs(struct page *, npages, GFP_KERNEL_ACCOUNT); 503 if (!*page_list) 504 return -ENOMEM; 505 506 for (;;) { 507 filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, npages - done, 508 *page_list + done); 509 if (!filled) 510 goto err; 511 512 done += filled; 513 if (done == npages) 514 break; 515 } 516 517 return 0; 518 519 err: 520 for (i = 0; i < done; i++) 521 __free_page(*page_list[i]); 522 523 kvfree(*page_list); 524 *page_list = NULL; 525 return -ENOMEM; 526 } 527 528 struct mlx5_vhca_data_buffer * 529 mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages, 530 enum dma_data_direction dma_dir) 531 { 532 struct mlx5_vhca_data_buffer *buf; 533 int ret; 534 535 buf = kzalloc_obj(*buf, GFP_KERNEL_ACCOUNT); 536 if (!buf) 537 return ERR_PTR(-ENOMEM); 538 539 buf->dma_dir = dma_dir; 540 buf->migf = migf; 541 if (npages) { 542 ret = mlx5vf_add_pages(&buf->page_list, npages); 543 if (ret) 544 goto end; 545 546 buf->npages = npages; 547 548 if (dma_dir != DMA_NONE) { 549 ret = mlx5vf_dma_data_buffer(buf); 550 if (ret) 551 goto end; 552 } 553 } 554 555 return buf; 556 end: 557 mlx5vf_free_data_buffer(buf); 558 return ERR_PTR(ret); 559 } 560 561 void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf) 562 { 563 spin_lock_irq(&buf->migf->list_lock); 564 buf->stop_copy_chunk_num = 0; 565 buf->pre_copy_init_bytes_chunk = false; 566 list_add_tail(&buf->buf_elm, &buf->migf->avail_list); 567 spin_unlock_irq(&buf->migf->list_lock); 568 } 569 570 struct mlx5_vhca_data_buffer * 571 mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages, 572 enum dma_data_direction dma_dir) 573 { 574 struct mlx5_vhca_data_buffer *buf, *temp_buf; 575 struct list_head free_list; 576 577 lockdep_assert_held(&migf->mvdev->state_mutex); 578 if (migf->mvdev->mdev_detach) 579 return ERR_PTR(-ENOTCONN); 580 581 INIT_LIST_HEAD(&free_list); 582 583 spin_lock_irq(&migf->list_lock); 584 list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { 585 if (buf->dma_dir == dma_dir) { 586 list_del_init(&buf->buf_elm); 587 if (buf->npages >= npages) { 588 spin_unlock_irq(&migf->list_lock); 589 goto found; 590 } 591 /* 592 * Prevent holding redundant buffers. Put in a free 593 * list and call at the end not under the spin lock 594 * (&migf->list_lock) to mlx5vf_free_data_buffer which 595 * might sleep. 596 */ 597 list_add(&buf->buf_elm, &free_list); 598 } 599 } 600 spin_unlock_irq(&migf->list_lock); 601 buf = mlx5vf_alloc_data_buffer(migf, npages, dma_dir); 602 603 found: 604 while ((temp_buf = list_first_entry_or_null(&free_list, 605 struct mlx5_vhca_data_buffer, buf_elm))) { 606 list_del(&temp_buf->buf_elm); 607 mlx5vf_free_data_buffer(temp_buf); 608 } 609 610 return buf; 611 } 612 613 static void 614 mlx5vf_save_callback_complete(struct mlx5_vf_migration_file *migf, 615 struct mlx5vf_async_data *async_data) 616 { 617 migf->inflight_save = 0; 618 wake_up_interruptible(&migf->poll_wait); 619 kvfree(async_data->out); 620 complete(&migf->save_comp); 621 fput(migf->filp); 622 } 623 624 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) 625 { 626 struct mlx5vf_async_data *async_data = container_of(_work, 627 struct mlx5vf_async_data, work); 628 struct mlx5_vf_migration_file *migf = container_of(async_data, 629 struct mlx5_vf_migration_file, async_data); 630 631 mutex_lock(&migf->lock); 632 if (async_data->status) { 633 mlx5vf_put_data_buffer(async_data->buf); 634 if (async_data->header_buf) 635 mlx5vf_put_data_buffer(async_data->header_buf); 636 if (!async_data->stop_copy_chunk && 637 async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR) 638 migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR; 639 else 640 migf->state = MLX5_MIGF_STATE_ERROR; 641 wake_up_interruptible(&migf->poll_wait); 642 } 643 mutex_unlock(&migf->lock); 644 mlx5vf_save_callback_complete(migf, async_data); 645 } 646 647 static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, 648 size_t image_size, bool initial_pre_copy) 649 { 650 struct mlx5_vf_migration_file *migf = header_buf->migf; 651 struct mlx5_vf_migration_header header = {}; 652 unsigned long flags; 653 struct page *page; 654 u8 *to_buff; 655 656 header.record_size = cpu_to_le64(image_size); 657 header.flags = cpu_to_le32(MLX5_MIGF_HEADER_FLAGS_TAG_MANDATORY); 658 header.tag = cpu_to_le32(MLX5_MIGF_HEADER_TAG_FW_DATA); 659 page = mlx5vf_get_migration_page(header_buf, 0); 660 if (!page) 661 return -EINVAL; 662 to_buff = kmap_local_page(page); 663 memcpy(to_buff, &header, sizeof(header)); 664 kunmap_local(to_buff); 665 header_buf->length = sizeof(header); 666 header_buf->start_pos = header_buf->migf->max_pos; 667 migf->max_pos += header_buf->length; 668 spin_lock_irqsave(&migf->list_lock, flags); 669 list_add_tail(&header_buf->buf_elm, &migf->buf_list); 670 spin_unlock_irqrestore(&migf->list_lock, flags); 671 if (initial_pre_copy) 672 migf->pre_copy_initial_bytes += sizeof(header); 673 return 0; 674 } 675 676 static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) 677 { 678 struct mlx5vf_async_data *async_data = container_of(context, 679 struct mlx5vf_async_data, cb_work); 680 struct mlx5_vf_migration_file *migf = container_of(async_data, 681 struct mlx5_vf_migration_file, async_data); 682 683 if (!status) { 684 size_t next_required_umem_size = 0; 685 bool stop_copy_last_chunk; 686 size_t image_size; 687 unsigned long flags; 688 bool initial_pre_copy = migf->state != MLX5_MIGF_STATE_PRE_COPY && 689 !async_data->stop_copy_chunk; 690 691 image_size = MLX5_GET(save_vhca_state_out, async_data->out, 692 actual_image_size); 693 if (async_data->buf->stop_copy_chunk_num) 694 next_required_umem_size = MLX5_GET(save_vhca_state_out, 695 async_data->out, next_required_umem_size); 696 stop_copy_last_chunk = async_data->stop_copy_chunk && 697 !next_required_umem_size; 698 if (async_data->header_buf) { 699 status = add_buf_header(async_data->header_buf, image_size, 700 initial_pre_copy || 701 async_data->buf->pre_copy_init_bytes_chunk); 702 if (status) 703 goto err; 704 } 705 async_data->buf->length = image_size; 706 async_data->buf->start_pos = migf->max_pos; 707 migf->max_pos += async_data->buf->length; 708 spin_lock_irqsave(&migf->list_lock, flags); 709 list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); 710 if (async_data->buf->stop_copy_chunk_num) { 711 migf->num_ready_chunks++; 712 if (next_required_umem_size && 713 migf->num_ready_chunks >= MAX_NUM_CHUNKS) { 714 /* Delay the next SAVE till one chunk be consumed */ 715 migf->next_required_umem_size = next_required_umem_size; 716 next_required_umem_size = 0; 717 } 718 } 719 spin_unlock_irqrestore(&migf->list_lock, flags); 720 if (initial_pre_copy || async_data->buf->pre_copy_init_bytes_chunk) { 721 migf->pre_copy_initial_bytes += image_size; 722 if (initial_pre_copy) 723 migf->state = MLX5_MIGF_STATE_PRE_COPY; 724 if (async_data->buf->pre_copy_init_bytes_chunk) 725 async_data->buf->pre_copy_init_bytes_chunk = false; 726 } 727 if (stop_copy_last_chunk) 728 migf->state = MLX5_MIGF_STATE_COMPLETE; 729 wake_up_interruptible(&migf->poll_wait); 730 if (next_required_umem_size) 731 mlx5vf_mig_file_set_save_work(migf, 732 /* Picking up the next chunk num */ 733 (async_data->buf->stop_copy_chunk_num % MAX_NUM_CHUNKS) + 1, 734 next_required_umem_size); 735 mlx5vf_save_callback_complete(migf, async_data); 736 return; 737 } 738 739 err: 740 /* The error flow can't run from an interrupt context */ 741 if (status == -EREMOTEIO) { 742 status = MLX5_GET(save_vhca_state_out, async_data->out, status); 743 /* Failed in FW, print cmd out failure details */ 744 mlx5_cmd_out_err(migf->mvdev->mdev, MLX5_CMD_OP_SAVE_VHCA_STATE, 0, 745 async_data->out); 746 } 747 748 async_data->status = status; 749 queue_work(migf->mvdev->cb_wq, &async_data->work); 750 } 751 752 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, 753 struct mlx5_vf_migration_file *migf, 754 struct mlx5_vhca_data_buffer *buf, bool inc, 755 bool track) 756 { 757 u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); 758 u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; 759 struct mlx5_vhca_data_buffer *header_buf = NULL; 760 struct mlx5vf_async_data *async_data; 761 bool pre_copy_cleanup = false; 762 int err; 763 764 lockdep_assert_held(&mvdev->state_mutex); 765 if (mvdev->mdev_detach) 766 return -ENOTCONN; 767 768 err = wait_for_completion_interruptible(&migf->save_comp); 769 if (err) 770 return err; 771 772 if ((migf->state == MLX5_MIGF_STATE_PRE_COPY || 773 migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc) 774 pre_copy_cleanup = true; 775 776 if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) 777 /* 778 * In case we had a PRE_COPY error, SAVE is triggered only for 779 * the final image, read device full image. 780 */ 781 inc = false; 782 783 MLX5_SET(save_vhca_state_in, in, opcode, 784 MLX5_CMD_OP_SAVE_VHCA_STATE); 785 MLX5_SET(save_vhca_state_in, in, op_mod, 0); 786 MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); 787 MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey); 788 MLX5_SET(save_vhca_state_in, in, size, buf->npages * PAGE_SIZE); 789 MLX5_SET(save_vhca_state_in, in, incremental, inc); 790 MLX5_SET(save_vhca_state_in, in, set_track, track); 791 792 async_data = &migf->async_data; 793 async_data->buf = buf; 794 async_data->stop_copy_chunk = (!track && !pre_copy_cleanup); 795 async_data->out = kvzalloc(out_size, GFP_KERNEL); 796 if (!async_data->out) { 797 err = -ENOMEM; 798 goto err_out; 799 } 800 801 if (async_data->stop_copy_chunk) { 802 u8 header_idx = buf->stop_copy_chunk_num ? 803 buf->stop_copy_chunk_num - 1 : 0; 804 805 header_buf = migf->buf_header[header_idx]; 806 migf->buf_header[header_idx] = NULL; 807 } 808 809 if (!header_buf) { 810 header_buf = mlx5vf_get_data_buffer( 811 migf, 812 DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header), 813 PAGE_SIZE), 814 DMA_NONE); 815 if (IS_ERR(header_buf)) { 816 err = PTR_ERR(header_buf); 817 goto err_free; 818 } 819 } 820 821 if (async_data->stop_copy_chunk) 822 migf->state = MLX5_MIGF_STATE_SAVE_STOP_COPY_CHUNK; 823 824 async_data->header_buf = header_buf; 825 get_file(migf->filp); 826 migf->inflight_save = 1; 827 err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), 828 async_data->out, 829 out_size, mlx5vf_save_callback, 830 &async_data->cb_work); 831 if (err) 832 goto err_exec; 833 834 return 0; 835 836 err_exec: 837 migf->inflight_save = 0; 838 wake_up_interruptible(&migf->poll_wait); 839 if (header_buf) 840 mlx5vf_put_data_buffer(header_buf); 841 fput(migf->filp); 842 err_free: 843 kvfree(async_data->out); 844 err_out: 845 complete(&migf->save_comp); 846 return err; 847 } 848 849 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, 850 struct mlx5_vf_migration_file *migf, 851 struct mlx5_vhca_data_buffer *buf) 852 { 853 u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {}; 854 u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {}; 855 int err; 856 857 lockdep_assert_held(&mvdev->state_mutex); 858 if (mvdev->mdev_detach) 859 return -ENOTCONN; 860 861 if (!buf->mkey_in) { 862 err = mlx5vf_dma_data_buffer(buf); 863 if (err) 864 return err; 865 } 866 867 MLX5_SET(load_vhca_state_in, in, opcode, 868 MLX5_CMD_OP_LOAD_VHCA_STATE); 869 MLX5_SET(load_vhca_state_in, in, op_mod, 0); 870 MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); 871 MLX5_SET(load_vhca_state_in, in, mkey, buf->mkey); 872 MLX5_SET(load_vhca_state_in, in, size, buf->length); 873 return mlx5_cmd_exec_inout(mvdev->mdev, load_vhca_state, in, out); 874 } 875 876 int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf) 877 { 878 int err; 879 880 lockdep_assert_held(&migf->mvdev->state_mutex); 881 if (migf->mvdev->mdev_detach) 882 return -ENOTCONN; 883 884 err = mlx5_core_alloc_pd(migf->mvdev->mdev, &migf->pdn); 885 return err; 886 } 887 888 void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf) 889 { 890 lockdep_assert_held(&migf->mvdev->state_mutex); 891 if (migf->mvdev->mdev_detach) 892 return; 893 894 mlx5_core_dealloc_pd(migf->mvdev->mdev, migf->pdn); 895 } 896 897 void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) 898 { 899 struct mlx5_vhca_data_buffer *entry; 900 int i; 901 902 lockdep_assert_held(&migf->mvdev->state_mutex); 903 WARN_ON(migf->mvdev->mdev_detach); 904 905 for (i = 0; i < MAX_NUM_CHUNKS; i++) { 906 if (migf->buf[i]) { 907 mlx5vf_free_data_buffer(migf->buf[i]); 908 migf->buf[i] = NULL; 909 } 910 911 if (migf->buf_header[i]) { 912 mlx5vf_free_data_buffer(migf->buf_header[i]); 913 migf->buf_header[i] = NULL; 914 } 915 } 916 917 list_splice(&migf->avail_list, &migf->buf_list); 918 919 while ((entry = list_first_entry_or_null(&migf->buf_list, 920 struct mlx5_vhca_data_buffer, buf_elm))) { 921 list_del(&entry->buf_elm); 922 mlx5vf_free_data_buffer(entry); 923 } 924 925 mlx5vf_cmd_dealloc_pd(migf); 926 } 927 928 static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev, 929 struct mlx5vf_pci_core_device *mvdev, 930 struct rb_root_cached *ranges, u32 nnodes) 931 { 932 int max_num_range = 933 MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range); 934 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 935 int record_size = MLX5_ST_SZ_BYTES(page_track_range); 936 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 937 struct interval_tree_node *node = NULL; 938 u64 total_ranges_len = 0; 939 u32 num_ranges = nnodes; 940 u8 log_addr_space_size; 941 void *range_list_ptr; 942 void *obj_context; 943 void *cmd_hdr; 944 int inlen; 945 void *in; 946 int err; 947 int i; 948 949 if (num_ranges > max_num_range) { 950 vfio_combine_iova_ranges(ranges, nnodes, max_num_range); 951 num_ranges = max_num_range; 952 } 953 954 inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) + 955 record_size * num_ranges; 956 in = kzalloc(inlen, GFP_KERNEL); 957 if (!in) 958 return -ENOMEM; 959 960 cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in, 961 general_obj_in_cmd_hdr); 962 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, 963 MLX5_CMD_OP_CREATE_GENERAL_OBJECT); 964 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, 965 MLX5_OBJ_TYPE_PAGE_TRACK); 966 obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context); 967 MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id); 968 MLX5_SET(page_track, obj_context, track_type, 1); 969 MLX5_SET(page_track, obj_context, log_page_size, 970 ilog2(tracker->host_qp->tracked_page_size)); 971 MLX5_SET(page_track, obj_context, log_msg_size, 972 ilog2(tracker->host_qp->max_msg_size)); 973 MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn); 974 MLX5_SET(page_track, obj_context, num_ranges, num_ranges); 975 976 range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range); 977 node = interval_tree_iter_first(ranges, 0, ULONG_MAX); 978 for (i = 0; i < num_ranges; i++) { 979 void *addr_range_i_base = range_list_ptr + record_size * i; 980 unsigned long length = node->last - node->start + 1; 981 982 MLX5_SET64(page_track_range, addr_range_i_base, start_address, 983 node->start); 984 MLX5_SET64(page_track_range, addr_range_i_base, length, length); 985 total_ranges_len += length; 986 node = interval_tree_iter_next(node, 0, ULONG_MAX); 987 } 988 989 WARN_ON(node); 990 log_addr_space_size = ilog2(roundup_pow_of_two(total_ranges_len)); 991 if (log_addr_space_size < 992 (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) || 993 log_addr_space_size > 994 (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) { 995 err = -EOPNOTSUPP; 996 goto out; 997 } 998 999 MLX5_SET(page_track, obj_context, log_addr_space_size, 1000 log_addr_space_size); 1001 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 1002 if (err) 1003 goto out; 1004 1005 tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); 1006 out: 1007 kfree(in); 1008 return err; 1009 } 1010 1011 static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev, 1012 u32 tracker_id) 1013 { 1014 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; 1015 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 1016 1017 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); 1018 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); 1019 MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id); 1020 1021 return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); 1022 } 1023 1024 static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev, 1025 u32 tracker_id, unsigned long iova, 1026 unsigned long length, u32 tracker_state) 1027 { 1028 u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {}; 1029 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; 1030 void *obj_context; 1031 void *cmd_hdr; 1032 1033 cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); 1034 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); 1035 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); 1036 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id); 1037 1038 obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context); 1039 MLX5_SET64(page_track, obj_context, modify_field_select, 0x3); 1040 MLX5_SET64(page_track, obj_context, range_start_address, iova); 1041 MLX5_SET64(page_track, obj_context, length, length); 1042 MLX5_SET(page_track, obj_context, state, tracker_state); 1043 1044 return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); 1045 } 1046 1047 static int mlx5vf_cmd_query_tracker(struct mlx5_core_dev *mdev, 1048 struct mlx5_vhca_page_tracker *tracker) 1049 { 1050 u32 out[MLX5_ST_SZ_DW(query_page_track_obj_out)] = {}; 1051 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; 1052 void *obj_context; 1053 void *cmd_hdr; 1054 int err; 1055 1056 cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr); 1057 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); 1058 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK); 1059 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker->id); 1060 1061 err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); 1062 if (err) 1063 return err; 1064 1065 obj_context = MLX5_ADDR_OF(query_page_track_obj_out, out, obj_context); 1066 tracker->status = MLX5_GET(page_track, obj_context, state); 1067 return 0; 1068 } 1069 1070 static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev, 1071 struct mlx5_vhca_cq_buf *buf, int nent, 1072 int cqe_size) 1073 { 1074 struct mlx5_frag_buf *frag_buf = &buf->frag_buf; 1075 u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0); 1076 u8 log_wq_sz = ilog2(cqe_size); 1077 int err; 1078 1079 err = mlx5_frag_buf_alloc_node(mdev, nent * cqe_size, frag_buf, 1080 mdev->priv.numa_node); 1081 if (err) 1082 return err; 1083 1084 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); 1085 buf->cqe_size = cqe_size; 1086 buf->nent = nent; 1087 return 0; 1088 } 1089 1090 static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf) 1091 { 1092 struct mlx5_cqe64 *cqe64; 1093 void *cqe; 1094 int i; 1095 1096 for (i = 0; i < buf->nent; i++) { 1097 cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i); 1098 cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; 1099 cqe64->op_own = MLX5_CQE_INVALID << 4; 1100 } 1101 } 1102 1103 static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev, 1104 struct mlx5_vhca_cq *cq) 1105 { 1106 mlx5_core_destroy_cq(mdev, &cq->mcq); 1107 mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); 1108 mlx5_db_free(mdev, &cq->db); 1109 } 1110 1111 static void mlx5vf_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type) 1112 { 1113 if (type != MLX5_EVENT_TYPE_CQ_ERROR) 1114 return; 1115 1116 set_tracker_error(container_of(mcq, struct mlx5vf_pci_core_device, 1117 tracker.cq.mcq)); 1118 } 1119 1120 static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type, 1121 void *data) 1122 { 1123 struct mlx5_vhca_page_tracker *tracker = 1124 mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb); 1125 struct mlx5vf_pci_core_device *mvdev = container_of( 1126 tracker, struct mlx5vf_pci_core_device, tracker); 1127 struct mlx5_eqe_obj_change *object; 1128 struct mlx5_eqe *eqe = data; 1129 u8 event_type = (u8)type; 1130 u8 queue_type; 1131 u32 obj_id; 1132 int qp_num; 1133 1134 switch (event_type) { 1135 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: 1136 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: 1137 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: 1138 queue_type = eqe->data.qp_srq.type; 1139 if (queue_type != MLX5_EVENT_QUEUE_TYPE_QP) 1140 break; 1141 qp_num = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; 1142 if (qp_num != tracker->host_qp->qpn && 1143 qp_num != tracker->fw_qp->qpn) 1144 break; 1145 set_tracker_error(mvdev); 1146 break; 1147 case MLX5_EVENT_TYPE_OBJECT_CHANGE: 1148 object = &eqe->data.obj_change; 1149 obj_id = be32_to_cpu(object->obj_id); 1150 if (obj_id == tracker->id) 1151 set_tracker_change_event(mvdev); 1152 break; 1153 default: 1154 break; 1155 } 1156 1157 return NOTIFY_OK; 1158 } 1159 1160 static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq, 1161 struct mlx5_eqe *eqe) 1162 { 1163 struct mlx5vf_pci_core_device *mvdev = 1164 container_of(mcq, struct mlx5vf_pci_core_device, 1165 tracker.cq.mcq); 1166 1167 complete(&mvdev->tracker_comp); 1168 } 1169 1170 static int mlx5vf_create_cq(struct mlx5_core_dev *mdev, 1171 struct mlx5_vhca_page_tracker *tracker, 1172 size_t ncqe) 1173 { 1174 int cqe_size = cache_line_size() == 128 ? 128 : 64; 1175 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 1176 struct mlx5_vhca_cq *cq; 1177 int inlen, err, eqn; 1178 void *cqc, *in; 1179 __be64 *pas; 1180 int vector; 1181 1182 cq = &tracker->cq; 1183 ncqe = roundup_pow_of_two(ncqe); 1184 err = mlx5_db_alloc_node(mdev, &cq->db, mdev->priv.numa_node); 1185 if (err) 1186 return err; 1187 1188 cq->ncqe = ncqe; 1189 cq->mcq.set_ci_db = cq->db.db; 1190 cq->mcq.arm_db = cq->db.db + 1; 1191 cq->mcq.cqe_sz = cqe_size; 1192 err = alloc_cq_frag_buf(mdev, &cq->buf, ncqe, cqe_size); 1193 if (err) 1194 goto err_db_free; 1195 1196 init_cq_frag_buf(&cq->buf); 1197 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 1198 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * 1199 cq->buf.frag_buf.npages; 1200 in = kvzalloc(inlen, GFP_KERNEL); 1201 if (!in) { 1202 err = -ENOMEM; 1203 goto err_buff; 1204 } 1205 1206 vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); 1207 err = mlx5_comp_eqn_get(mdev, vector, &eqn); 1208 if (err) 1209 goto err_vec; 1210 1211 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 1212 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 1213 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); 1214 MLX5_SET(cqc, cqc, uar_page, tracker->uar->index); 1215 MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift - 1216 MLX5_ADAPTER_PAGE_SHIFT); 1217 MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); 1218 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 1219 mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas); 1220 cq->mcq.comp = mlx5vf_cq_complete; 1221 cq->mcq.event = mlx5vf_cq_event; 1222 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 1223 if (err) 1224 goto err_vec; 1225 1226 mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map, 1227 cq->mcq.cons_index); 1228 kvfree(in); 1229 return 0; 1230 1231 err_vec: 1232 kvfree(in); 1233 err_buff: 1234 mlx5_frag_buf_free(mdev, &cq->buf.frag_buf); 1235 err_db_free: 1236 mlx5_db_free(mdev, &cq->db); 1237 return err; 1238 } 1239 1240 static struct mlx5_vhca_qp * 1241 mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev, 1242 struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr) 1243 { 1244 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; 1245 struct mlx5_vhca_qp *qp; 1246 u8 log_rq_stride; 1247 u8 log_rq_sz; 1248 void *qpc; 1249 int inlen; 1250 void *in; 1251 int err; 1252 1253 qp = kzalloc_obj(*qp, GFP_KERNEL_ACCOUNT); 1254 if (!qp) 1255 return ERR_PTR(-ENOMEM); 1256 1257 err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node); 1258 if (err) 1259 goto err_free; 1260 1261 if (max_recv_wr) { 1262 qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr); 1263 log_rq_stride = ilog2(MLX5_SEND_WQE_DS); 1264 log_rq_sz = ilog2(qp->rq.wqe_cnt); 1265 err = mlx5_frag_buf_alloc_node(mdev, 1266 wq_get_byte_sz(log_rq_sz, log_rq_stride), 1267 &qp->buf, mdev->priv.numa_node); 1268 if (err) 1269 goto err_db_free; 1270 mlx5_init_fbc(qp->buf.frags, log_rq_stride, log_rq_sz, &qp->rq.fbc); 1271 } 1272 1273 qp->rq.db = &qp->db.db[MLX5_RCV_DBR]; 1274 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 1275 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 1276 qp->buf.npages; 1277 in = kvzalloc(inlen, GFP_KERNEL); 1278 if (!in) { 1279 err = -ENOMEM; 1280 goto err_in; 1281 } 1282 1283 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 1284 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 1285 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 1286 MLX5_SET(qpc, qpc, pd, tracker->pdn); 1287 MLX5_SET(qpc, qpc, uar_page, tracker->uar->index); 1288 MLX5_SET(qpc, qpc, log_page_size, 1289 qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 1290 MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev)); 1291 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 1292 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 1293 MLX5_SET(qpc, qpc, no_sq, 1); 1294 if (max_recv_wr) { 1295 MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn); 1296 MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4); 1297 MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz); 1298 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 1299 MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); 1300 mlx5_fill_page_frag_array(&qp->buf, 1301 (__be64 *)MLX5_ADDR_OF(create_qp_in, 1302 in, pas)); 1303 } else { 1304 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); 1305 } 1306 1307 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); 1308 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); 1309 kvfree(in); 1310 if (err) 1311 goto err_in; 1312 1313 qp->qpn = MLX5_GET(create_qp_out, out, qpn); 1314 return qp; 1315 1316 err_in: 1317 if (max_recv_wr) 1318 mlx5_frag_buf_free(mdev, &qp->buf); 1319 err_db_free: 1320 mlx5_db_free(mdev, &qp->db); 1321 err_free: 1322 kfree(qp); 1323 return ERR_PTR(err); 1324 } 1325 1326 static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp) 1327 { 1328 struct mlx5_wqe_data_seg *data; 1329 unsigned int ix; 1330 1331 WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt); 1332 ix = qp->rq.pc & (qp->rq.wqe_cnt - 1); 1333 data = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ix); 1334 data->byte_count = cpu_to_be32(qp->max_msg_size); 1335 data->lkey = cpu_to_be32(qp->recv_buf.mkey); 1336 data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset); 1337 qp->rq.pc++; 1338 /* Make sure that descriptors are written before doorbell record. */ 1339 dma_wmb(); 1340 *qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff); 1341 } 1342 1343 static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev, 1344 struct mlx5_vhca_qp *qp, u32 remote_qpn, 1345 bool host_qp) 1346 { 1347 u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 1348 u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 1349 u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 1350 void *qpc; 1351 int ret; 1352 1353 /* Init */ 1354 qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc); 1355 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); 1356 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 1357 MLX5_SET(qpc, qpc, rre, 1); 1358 MLX5_SET(qpc, qpc, rwe, 1); 1359 MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP); 1360 MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn); 1361 ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in); 1362 if (ret) 1363 return ret; 1364 1365 if (host_qp) { 1366 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1367 int i; 1368 1369 for (i = 0; i < qp->rq.wqe_cnt; i++) { 1370 mlx5vf_post_recv(qp); 1371 recv_buf->next_rq_offset += qp->max_msg_size; 1372 } 1373 } 1374 1375 /* RTR */ 1376 qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc); 1377 MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); 1378 MLX5_SET(qpc, qpc, mtu, IB_MTU_4096); 1379 MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg)); 1380 MLX5_SET(qpc, qpc, remote_qpn, remote_qpn); 1381 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1); 1382 MLX5_SET(qpc, qpc, primary_address_path.fl, 1); 1383 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 1384 MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP); 1385 MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn); 1386 ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in); 1387 if (ret || host_qp) 1388 return ret; 1389 1390 /* RTS */ 1391 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc); 1392 MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); 1393 MLX5_SET(qpc, qpc, retry_count, 7); 1394 MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */ 1395 MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */ 1396 MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP); 1397 MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn); 1398 1399 return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in); 1400 } 1401 1402 static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev, 1403 struct mlx5_vhca_qp *qp) 1404 { 1405 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; 1406 1407 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); 1408 MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); 1409 mlx5_cmd_exec_in(mdev, destroy_qp, in); 1410 1411 mlx5_frag_buf_free(mdev, &qp->buf); 1412 mlx5_db_free(mdev, &qp->db); 1413 kfree(qp); 1414 } 1415 1416 static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev, 1417 struct mlx5_vhca_qp *qp) 1418 { 1419 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1420 1421 mlx5_core_destroy_mkey(mdev, recv_buf->mkey); 1422 unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in, 1423 &recv_buf->state, DMA_FROM_DEVICE); 1424 kvfree(recv_buf->mkey_in); 1425 free_page_list(recv_buf->npages, recv_buf->page_list); 1426 } 1427 1428 static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev, 1429 struct mlx5_vhca_qp *qp, u32 pdn, 1430 u64 rq_size) 1431 { 1432 unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE); 1433 struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf; 1434 int err; 1435 1436 err = mlx5vf_add_pages(&recv_buf->page_list, npages); 1437 if (err) 1438 return err; 1439 1440 recv_buf->npages = npages; 1441 1442 recv_buf->mkey_in = alloc_mkey_in(npages, pdn); 1443 if (!recv_buf->mkey_in) { 1444 err = -ENOMEM; 1445 goto end; 1446 } 1447 1448 err = register_dma_pages(mdev, npages, recv_buf->page_list, 1449 recv_buf->mkey_in, &recv_buf->state, 1450 DMA_FROM_DEVICE); 1451 if (err) 1452 goto err_register_dma; 1453 1454 err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey); 1455 if (err) 1456 goto err_create_mkey; 1457 1458 return 0; 1459 1460 err_create_mkey: 1461 unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state, 1462 DMA_FROM_DEVICE); 1463 err_register_dma: 1464 kvfree(recv_buf->mkey_in); 1465 recv_buf->mkey_in = NULL; 1466 end: 1467 free_page_list(npages, recv_buf->page_list); 1468 return err; 1469 } 1470 1471 static void 1472 _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev) 1473 { 1474 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1475 struct mlx5_core_dev *mdev = mvdev->mdev; 1476 1477 lockdep_assert_held(&mvdev->state_mutex); 1478 1479 if (!mvdev->log_active) 1480 return; 1481 1482 WARN_ON(mvdev->mdev_detach); 1483 1484 mlx5_eq_notifier_unregister(mdev, &tracker->nb); 1485 mlx5vf_cmd_destroy_tracker(mdev, tracker->id); 1486 mlx5vf_destroy_qp(mdev, tracker->fw_qp); 1487 mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp); 1488 mlx5vf_destroy_qp(mdev, tracker->host_qp); 1489 mlx5vf_destroy_cq(mdev, &tracker->cq); 1490 mlx5_core_dealloc_pd(mdev, tracker->pdn); 1491 mlx5_put_uars_page(mdev, tracker->uar); 1492 mvdev->log_active = false; 1493 } 1494 1495 int mlx5vf_stop_page_tracker(struct vfio_device *vdev) 1496 { 1497 struct mlx5vf_pci_core_device *mvdev = container_of( 1498 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1499 1500 mutex_lock(&mvdev->state_mutex); 1501 if (!mvdev->log_active) 1502 goto end; 1503 1504 _mlx5vf_free_page_tracker_resources(mvdev); 1505 mvdev->log_active = false; 1506 end: 1507 mlx5vf_state_mutex_unlock(mvdev); 1508 return 0; 1509 } 1510 1511 int mlx5vf_start_page_tracker(struct vfio_device *vdev, 1512 struct rb_root_cached *ranges, u32 nnodes, 1513 u64 *page_size) 1514 { 1515 struct mlx5vf_pci_core_device *mvdev = container_of( 1516 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1517 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1518 u8 log_tracked_page = ilog2(*page_size); 1519 struct mlx5_vhca_qp *host_qp; 1520 struct mlx5_vhca_qp *fw_qp; 1521 struct mlx5_core_dev *mdev; 1522 u32 log_max_msg_size; 1523 u32 max_msg_size; 1524 u64 rq_size = SZ_2M; 1525 u32 max_recv_wr; 1526 int err; 1527 1528 mutex_lock(&mvdev->state_mutex); 1529 if (mvdev->mdev_detach) { 1530 err = -ENOTCONN; 1531 goto end; 1532 } 1533 1534 if (mvdev->log_active) { 1535 err = -EINVAL; 1536 goto end; 1537 } 1538 1539 mdev = mvdev->mdev; 1540 log_max_msg_size = MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_msg_size); 1541 max_msg_size = (1ULL << log_max_msg_size); 1542 /* The RQ must hold at least 4 WQEs/messages for successful QP creation */ 1543 if (rq_size < 4ULL * max_msg_size) 1544 rq_size = 4ULL * max_msg_size; 1545 1546 memset(tracker, 0, sizeof(*tracker)); 1547 tracker->uar = mlx5_get_uars_page(mdev); 1548 if (IS_ERR(tracker->uar)) { 1549 err = PTR_ERR(tracker->uar); 1550 goto end; 1551 } 1552 1553 err = mlx5_core_alloc_pd(mdev, &tracker->pdn); 1554 if (err) 1555 goto err_uar; 1556 1557 max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size); 1558 err = mlx5vf_create_cq(mdev, tracker, max_recv_wr); 1559 if (err) 1560 goto err_dealloc_pd; 1561 1562 host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr); 1563 if (IS_ERR(host_qp)) { 1564 err = PTR_ERR(host_qp); 1565 goto err_cq; 1566 } 1567 1568 host_qp->max_msg_size = max_msg_size; 1569 if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1570 pg_track_log_min_page_size)) { 1571 log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1572 pg_track_log_min_page_size); 1573 } else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1574 pg_track_log_max_page_size)) { 1575 log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev, 1576 pg_track_log_max_page_size); 1577 } 1578 1579 host_qp->tracked_page_size = (1ULL << log_tracked_page); 1580 err = mlx5vf_alloc_qp_recv_resources(mdev, host_qp, tracker->pdn, 1581 rq_size); 1582 if (err) 1583 goto err_host_qp; 1584 1585 fw_qp = mlx5vf_create_rc_qp(mdev, tracker, 0); 1586 if (IS_ERR(fw_qp)) { 1587 err = PTR_ERR(fw_qp); 1588 goto err_recv_resources; 1589 } 1590 1591 err = mlx5vf_activate_qp(mdev, host_qp, fw_qp->qpn, true); 1592 if (err) 1593 goto err_activate; 1594 1595 err = mlx5vf_activate_qp(mdev, fw_qp, host_qp->qpn, false); 1596 if (err) 1597 goto err_activate; 1598 1599 tracker->host_qp = host_qp; 1600 tracker->fw_qp = fw_qp; 1601 err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes); 1602 if (err) 1603 goto err_activate; 1604 1605 MLX5_NB_INIT(&tracker->nb, mlx5vf_event_notifier, NOTIFY_ANY); 1606 mlx5_eq_notifier_register(mdev, &tracker->nb); 1607 *page_size = host_qp->tracked_page_size; 1608 mvdev->log_active = true; 1609 mlx5vf_state_mutex_unlock(mvdev); 1610 return 0; 1611 1612 err_activate: 1613 mlx5vf_destroy_qp(mdev, fw_qp); 1614 err_recv_resources: 1615 mlx5vf_free_qp_recv_resources(mdev, host_qp); 1616 err_host_qp: 1617 mlx5vf_destroy_qp(mdev, host_qp); 1618 err_cq: 1619 mlx5vf_destroy_cq(mdev, &tracker->cq); 1620 err_dealloc_pd: 1621 mlx5_core_dealloc_pd(mdev, tracker->pdn); 1622 err_uar: 1623 mlx5_put_uars_page(mdev, tracker->uar); 1624 end: 1625 mlx5vf_state_mutex_unlock(mvdev); 1626 return err; 1627 } 1628 1629 static void 1630 set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp, 1631 struct iova_bitmap *dirty) 1632 { 1633 u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry); 1634 u32 nent = size / entry_size; 1635 u32 nent_in_page; 1636 u32 nent_to_set; 1637 struct page *page; 1638 u32 page_offset; 1639 u32 page_index; 1640 u32 buf_offset; 1641 void *kaddr; 1642 u64 addr; 1643 u64 *buf; 1644 int i; 1645 1646 buf_offset = index * qp->max_msg_size; 1647 if (WARN_ON(buf_offset + size >= qp->recv_buf.npages * PAGE_SIZE || 1648 (nent > qp->max_msg_size / entry_size))) 1649 return; 1650 1651 do { 1652 page_index = buf_offset / PAGE_SIZE; 1653 page_offset = buf_offset % PAGE_SIZE; 1654 nent_in_page = (PAGE_SIZE - page_offset) / entry_size; 1655 page = qp->recv_buf.page_list[page_index]; 1656 kaddr = kmap_local_page(page); 1657 buf = kaddr + page_offset; 1658 nent_to_set = min(nent, nent_in_page); 1659 for (i = 0; i < nent_to_set; i++) { 1660 addr = MLX5_GET(page_track_report_entry, buf + i, 1661 dirty_address_low); 1662 addr |= (u64)MLX5_GET(page_track_report_entry, buf + i, 1663 dirty_address_high) << 32; 1664 iova_bitmap_set(dirty, addr, qp->tracked_page_size); 1665 } 1666 kunmap_local(kaddr); 1667 buf_offset += (nent_to_set * entry_size); 1668 nent -= nent_to_set; 1669 } while (nent); 1670 } 1671 1672 static void 1673 mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe, 1674 struct iova_bitmap *dirty, int *tracker_status) 1675 { 1676 u32 size; 1677 int ix; 1678 1679 qp->rq.cc++; 1680 *tracker_status = be32_to_cpu(cqe->immediate) >> 28; 1681 size = be32_to_cpu(cqe->byte_cnt); 1682 ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1); 1683 1684 /* zero length CQE, no data */ 1685 WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING); 1686 if (size) 1687 set_report_output(size, ix, qp, dirty); 1688 1689 qp->recv_buf.next_rq_offset = ix * qp->max_msg_size; 1690 mlx5vf_post_recv(qp); 1691 } 1692 1693 static void *get_cqe(struct mlx5_vhca_cq *cq, int n) 1694 { 1695 return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n); 1696 } 1697 1698 static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n) 1699 { 1700 void *cqe = get_cqe(cq, n & (cq->ncqe - 1)); 1701 struct mlx5_cqe64 *cqe64; 1702 1703 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 1704 1705 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 1706 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) { 1707 return cqe64; 1708 } else { 1709 return NULL; 1710 } 1711 } 1712 1713 static int 1714 mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp, 1715 struct iova_bitmap *dirty, int *tracker_status) 1716 { 1717 struct mlx5_cqe64 *cqe; 1718 u8 opcode; 1719 1720 cqe = get_sw_cqe(cq, cq->mcq.cons_index); 1721 if (!cqe) 1722 return CQ_EMPTY; 1723 1724 ++cq->mcq.cons_index; 1725 /* 1726 * Make sure we read CQ entry contents after we've checked the 1727 * ownership bit. 1728 */ 1729 rmb(); 1730 opcode = get_cqe_opcode(cqe); 1731 switch (opcode) { 1732 case MLX5_CQE_RESP_SEND_IMM: 1733 mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status); 1734 return CQ_OK; 1735 default: 1736 return CQ_POLL_ERR; 1737 } 1738 } 1739 1740 int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova, 1741 unsigned long length, 1742 struct iova_bitmap *dirty) 1743 { 1744 struct mlx5vf_pci_core_device *mvdev = container_of( 1745 vdev, struct mlx5vf_pci_core_device, core_device.vdev); 1746 struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker; 1747 struct mlx5_vhca_cq *cq = &tracker->cq; 1748 struct mlx5_core_dev *mdev; 1749 int poll_err, err; 1750 1751 mutex_lock(&mvdev->state_mutex); 1752 if (!mvdev->log_active) { 1753 err = -EINVAL; 1754 goto end; 1755 } 1756 1757 if (mvdev->mdev_detach) { 1758 err = -ENOTCONN; 1759 goto end; 1760 } 1761 1762 if (tracker->is_err) { 1763 err = -EIO; 1764 goto end; 1765 } 1766 1767 mdev = mvdev->mdev; 1768 err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length, 1769 MLX5_PAGE_TRACK_STATE_REPORTING); 1770 if (err) 1771 goto end; 1772 1773 tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING; 1774 while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING && 1775 !tracker->is_err) { 1776 poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, dirty, 1777 &tracker->status); 1778 if (poll_err == CQ_EMPTY) { 1779 mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map, 1780 cq->mcq.cons_index); 1781 poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, 1782 dirty, &tracker->status); 1783 if (poll_err == CQ_EMPTY) { 1784 wait_for_completion(&mvdev->tracker_comp); 1785 if (tracker->object_changed) { 1786 tracker->object_changed = false; 1787 err = mlx5vf_cmd_query_tracker(mdev, tracker); 1788 if (err) 1789 goto end; 1790 } 1791 continue; 1792 } 1793 } 1794 if (poll_err == CQ_POLL_ERR) { 1795 err = -EIO; 1796 goto end; 1797 } 1798 mlx5_cq_set_ci(&cq->mcq); 1799 } 1800 1801 if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR) 1802 tracker->is_err = true; 1803 1804 if (tracker->is_err) 1805 err = -EIO; 1806 end: 1807 mlx5vf_state_mutex_unlock(mvdev); 1808 return err; 1809 } 1810