1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2014-2018 Broadcom 4 * Copyright (C) 2023 Raspberry Pi 5 */ 6 7 #include <drm/drm_syncobj.h> 8 9 #include "v3d_drv.h" 10 #include "v3d_regs.h" 11 #include "v3d_trace.h" 12 13 /* Takes the reservation lock on all the BOs being referenced, so that 14 * we can attach fences and update the reservations after pushing the job 15 * to the queue. 16 * 17 * We don't lock the RCL the tile alloc/state BOs, or overflow memory 18 * (all of which are on render->unref_list). They're entirely private 19 * to v3d, so we don't attach dma-buf fences to them. 20 */ 21 static int 22 v3d_lock_bo_reservations(struct v3d_job *job, 23 struct ww_acquire_ctx *acquire_ctx) 24 { 25 int i, ret; 26 27 ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); 28 if (ret) 29 return ret; 30 31 for (i = 0; i < job->bo_count; i++) { 32 ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); 33 if (ret) 34 goto fail; 35 36 ret = drm_sched_job_add_implicit_dependencies(&job->base, 37 job->bo[i], true); 38 if (ret) 39 goto fail; 40 } 41 42 return 0; 43 44 fail: 45 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 46 return ret; 47 } 48 49 /** 50 * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects 51 * referenced by the job. 52 * @dev: DRM device 53 * @file_priv: DRM file for this fd 54 * @job: V3D job being set up 55 * @bo_handles: GEM handles 56 * @bo_count: Number of GEM handles passed in 57 * 58 * The command validator needs to reference BOs by their index within 59 * the submitted job's BO list. This does the validation of the job's 60 * BO list and reference counting for the lifetime of the job. 61 * 62 * Note that this function doesn't need to unreference the BOs on 63 * failure, because that will happen at `v3d_job_free()`. 64 */ 65 static int 66 v3d_lookup_bos(struct drm_device *dev, 67 struct drm_file *file_priv, 68 struct v3d_job *job, 69 u64 bo_handles, 70 u32 bo_count) 71 { 72 job->bo_count = bo_count; 73 74 if (!job->bo_count) { 75 /* See comment on bo_index for why we have to check 76 * this. 77 */ 78 DRM_DEBUG("Rendering requires BOs\n"); 79 return -EINVAL; 80 } 81 82 return drm_gem_objects_lookup(file_priv, 83 (void __user *)(uintptr_t)bo_handles, 84 job->bo_count, &job->bo); 85 } 86 87 static void 88 v3d_job_free(struct kref *ref) 89 { 90 struct v3d_job *job = container_of(ref, struct v3d_job, refcount); 91 int i; 92 93 if (job->bo) { 94 for (i = 0; i < job->bo_count; i++) 95 drm_gem_object_put(job->bo[i]); 96 kvfree(job->bo); 97 } 98 99 dma_fence_put(job->irq_fence); 100 dma_fence_put(job->done_fence); 101 102 if (job->perfmon) 103 v3d_perfmon_put(job->perfmon); 104 105 kfree(job); 106 } 107 108 static void 109 v3d_render_job_free(struct kref *ref) 110 { 111 struct v3d_render_job *job = container_of(ref, struct v3d_render_job, 112 base.refcount); 113 struct v3d_bo *bo, *save; 114 115 list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { 116 drm_gem_object_put(&bo->base.base); 117 } 118 119 v3d_job_free(ref); 120 } 121 122 void v3d_job_cleanup(struct v3d_job *job) 123 { 124 if (!job) 125 return; 126 127 drm_sched_job_cleanup(&job->base); 128 v3d_job_put(job); 129 } 130 131 void v3d_job_put(struct v3d_job *job) 132 { 133 if (!job) 134 return; 135 136 kref_put(&job->refcount, job->free); 137 } 138 139 static int 140 v3d_job_allocate(void **container, size_t size) 141 { 142 *container = kcalloc(1, size, GFP_KERNEL); 143 if (!*container) { 144 DRM_ERROR("Cannot allocate memory for V3D job.\n"); 145 return -ENOMEM; 146 } 147 148 return 0; 149 } 150 151 static void 152 v3d_job_deallocate(void **container) 153 { 154 kfree(*container); 155 *container = NULL; 156 } 157 158 static int 159 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 160 struct v3d_job *job, void (*free)(struct kref *ref), 161 u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) 162 { 163 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 164 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 165 int ret, i; 166 167 job->v3d = v3d; 168 job->free = free; 169 job->file = file_priv; 170 171 ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 172 1, v3d_priv); 173 if (ret) 174 return ret; 175 176 if (has_multisync) { 177 if (se->in_sync_count && se->wait_stage == queue) { 178 struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); 179 180 for (i = 0; i < se->in_sync_count; i++) { 181 struct drm_v3d_sem in; 182 183 if (copy_from_user(&in, handle++, sizeof(in))) { 184 ret = -EFAULT; 185 DRM_DEBUG("Failed to copy wait dep handle.\n"); 186 goto fail_deps; 187 } 188 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); 189 190 // TODO: Investigate why this was filtered out for the IOCTL. 191 if (ret && ret != -ENOENT) 192 goto fail_deps; 193 } 194 } 195 } else { 196 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); 197 198 // TODO: Investigate why this was filtered out for the IOCTL. 199 if (ret && ret != -ENOENT) 200 goto fail_deps; 201 } 202 203 kref_init(&job->refcount); 204 205 return 0; 206 207 fail_deps: 208 drm_sched_job_cleanup(&job->base); 209 return ret; 210 } 211 212 static void 213 v3d_push_job(struct v3d_job *job) 214 { 215 drm_sched_job_arm(&job->base); 216 217 job->done_fence = dma_fence_get(&job->base.s_fence->finished); 218 219 /* put by scheduler job completion */ 220 kref_get(&job->refcount); 221 222 drm_sched_entity_push_job(&job->base); 223 } 224 225 static void 226 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, 227 struct v3d_job *job, 228 struct ww_acquire_ctx *acquire_ctx, 229 u32 out_sync, 230 struct v3d_submit_ext *se, 231 struct dma_fence *done_fence) 232 { 233 struct drm_syncobj *sync_out; 234 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 235 int i; 236 237 for (i = 0; i < job->bo_count; i++) { 238 /* XXX: Use shared fences for read-only objects. */ 239 dma_resv_add_fence(job->bo[i]->resv, job->done_fence, 240 DMA_RESV_USAGE_WRITE); 241 } 242 243 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 244 245 /* Update the return sync object for the job */ 246 /* If it only supports a single signal semaphore*/ 247 if (!has_multisync) { 248 sync_out = drm_syncobj_find(file_priv, out_sync); 249 if (sync_out) { 250 drm_syncobj_replace_fence(sync_out, done_fence); 251 drm_syncobj_put(sync_out); 252 } 253 return; 254 } 255 256 /* If multiple semaphores extension is supported */ 257 if (se->out_sync_count) { 258 for (i = 0; i < se->out_sync_count; i++) { 259 drm_syncobj_replace_fence(se->out_syncs[i].syncobj, 260 done_fence); 261 drm_syncobj_put(se->out_syncs[i].syncobj); 262 } 263 kvfree(se->out_syncs); 264 } 265 } 266 267 static int 268 v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, 269 struct v3d_dev *v3d, 270 struct drm_v3d_submit_csd *args, 271 struct v3d_csd_job **job, 272 struct v3d_job **clean_job, 273 struct v3d_submit_ext *se, 274 struct ww_acquire_ctx *acquire_ctx) 275 { 276 int ret; 277 278 ret = v3d_job_allocate((void *)job, sizeof(**job)); 279 if (ret) 280 return ret; 281 282 ret = v3d_job_init(v3d, file_priv, &(*job)->base, 283 v3d_job_free, args->in_sync, se, V3D_CSD); 284 if (ret) { 285 v3d_job_deallocate((void *)job); 286 return ret; 287 } 288 289 ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); 290 if (ret) 291 return ret; 292 293 ret = v3d_job_init(v3d, file_priv, *clean_job, 294 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 295 if (ret) { 296 v3d_job_deallocate((void *)clean_job); 297 return ret; 298 } 299 300 (*job)->args = *args; 301 302 ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job, 303 args->bo_handles, args->bo_handle_count); 304 if (ret) 305 return ret; 306 307 return v3d_lock_bo_reservations(*clean_job, acquire_ctx); 308 } 309 310 static void 311 v3d_put_multisync_post_deps(struct v3d_submit_ext *se) 312 { 313 unsigned int i; 314 315 if (!(se && se->out_sync_count)) 316 return; 317 318 for (i = 0; i < se->out_sync_count; i++) 319 drm_syncobj_put(se->out_syncs[i].syncobj); 320 kvfree(se->out_syncs); 321 } 322 323 static int 324 v3d_get_multisync_post_deps(struct drm_file *file_priv, 325 struct v3d_submit_ext *se, 326 u32 count, u64 handles) 327 { 328 struct drm_v3d_sem __user *post_deps; 329 int i, ret; 330 331 if (!count) 332 return 0; 333 334 se->out_syncs = (struct v3d_submit_outsync *) 335 kvmalloc_array(count, 336 sizeof(struct v3d_submit_outsync), 337 GFP_KERNEL); 338 if (!se->out_syncs) 339 return -ENOMEM; 340 341 post_deps = u64_to_user_ptr(handles); 342 343 for (i = 0; i < count; i++) { 344 struct drm_v3d_sem out; 345 346 if (copy_from_user(&out, post_deps++, sizeof(out))) { 347 ret = -EFAULT; 348 DRM_DEBUG("Failed to copy post dep handles\n"); 349 goto fail; 350 } 351 352 se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, 353 out.handle); 354 if (!se->out_syncs[i].syncobj) { 355 ret = -EINVAL; 356 goto fail; 357 } 358 } 359 se->out_sync_count = count; 360 361 return 0; 362 363 fail: 364 for (i--; i >= 0; i--) 365 drm_syncobj_put(se->out_syncs[i].syncobj); 366 kvfree(se->out_syncs); 367 368 return ret; 369 } 370 371 /* Get data for multiple binary semaphores synchronization. Parse syncobj 372 * to be signaled when job completes (out_sync). 373 */ 374 static int 375 v3d_get_multisync_submit_deps(struct drm_file *file_priv, 376 struct drm_v3d_extension __user *ext, 377 struct v3d_submit_ext *se) 378 { 379 struct drm_v3d_multi_sync multisync; 380 int ret; 381 382 if (se->in_sync_count || se->out_sync_count) { 383 DRM_DEBUG("Two multisync extensions were added to the same job."); 384 return -EINVAL; 385 } 386 387 if (copy_from_user(&multisync, ext, sizeof(multisync))) 388 return -EFAULT; 389 390 if (multisync.pad) 391 return -EINVAL; 392 393 ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, 394 multisync.out_syncs); 395 if (ret) 396 return ret; 397 398 se->in_sync_count = multisync.in_sync_count; 399 se->in_syncs = multisync.in_syncs; 400 se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; 401 se->wait_stage = multisync.wait_stage; 402 403 return 0; 404 } 405 406 /* Get data for the indirect CSD job submission. */ 407 static int 408 v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv, 409 struct drm_v3d_extension __user *ext, 410 struct v3d_cpu_job *job) 411 { 412 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 413 struct v3d_dev *v3d = v3d_priv->v3d; 414 struct drm_v3d_indirect_csd indirect_csd; 415 struct v3d_indirect_csd_info *info = &job->indirect_csd; 416 417 if (!job) { 418 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 419 return -EINVAL; 420 } 421 422 if (job->job_type) { 423 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 424 return -EINVAL; 425 } 426 427 if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd))) 428 return -EFAULT; 429 430 if (!v3d_has_csd(v3d)) { 431 DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n"); 432 return -EINVAL; 433 } 434 435 job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD; 436 info->offset = indirect_csd.offset; 437 info->wg_size = indirect_csd.wg_size; 438 memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets, 439 sizeof(indirect_csd.wg_uniform_offsets)); 440 441 info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect); 442 443 return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit, 444 &info->job, &info->clean_job, 445 NULL, &info->acquire_ctx); 446 } 447 448 /* Get data for the query timestamp job submission. */ 449 static int 450 v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, 451 struct drm_v3d_extension __user *ext, 452 struct v3d_cpu_job *job) 453 { 454 u32 __user *offsets, *syncs; 455 struct drm_v3d_timestamp_query timestamp; 456 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 457 unsigned int i; 458 int err; 459 460 if (!job) { 461 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 462 return -EINVAL; 463 } 464 465 if (job->job_type) { 466 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 467 return -EINVAL; 468 } 469 470 if (copy_from_user(×tamp, ext, sizeof(timestamp))) 471 return -EFAULT; 472 473 if (timestamp.pad) 474 return -EINVAL; 475 476 job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY; 477 478 query_info->queries = kvmalloc_array(timestamp.count, 479 sizeof(struct v3d_timestamp_query), 480 GFP_KERNEL); 481 if (!query_info->queries) 482 return -ENOMEM; 483 484 offsets = u64_to_user_ptr(timestamp.offsets); 485 syncs = u64_to_user_ptr(timestamp.syncs); 486 487 for (i = 0; i < timestamp.count; i++) { 488 u32 offset, sync; 489 490 if (get_user(offset, offsets++)) { 491 err = -EFAULT; 492 goto error; 493 } 494 495 query_info->queries[i].offset = offset; 496 497 if (get_user(sync, syncs++)) { 498 err = -EFAULT; 499 goto error; 500 } 501 502 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 503 sync); 504 if (!query_info->queries[i].syncobj) { 505 err = -ENOENT; 506 goto error; 507 } 508 } 509 query_info->count = timestamp.count; 510 511 return 0; 512 513 error: 514 v3d_timestamp_query_info_free(&job->timestamp_query, i); 515 return err; 516 } 517 518 static int 519 v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, 520 struct drm_v3d_extension __user *ext, 521 struct v3d_cpu_job *job) 522 { 523 u32 __user *syncs; 524 struct drm_v3d_reset_timestamp_query reset; 525 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 526 unsigned int i; 527 int err; 528 529 if (!job) { 530 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 531 return -EINVAL; 532 } 533 534 if (job->job_type) { 535 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 536 return -EINVAL; 537 } 538 539 if (copy_from_user(&reset, ext, sizeof(reset))) 540 return -EFAULT; 541 542 job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY; 543 544 query_info->queries = kvmalloc_array(reset.count, 545 sizeof(struct v3d_timestamp_query), 546 GFP_KERNEL); 547 if (!query_info->queries) 548 return -ENOMEM; 549 550 syncs = u64_to_user_ptr(reset.syncs); 551 552 for (i = 0; i < reset.count; i++) { 553 u32 sync; 554 555 query_info->queries[i].offset = reset.offset + 8 * i; 556 557 if (get_user(sync, syncs++)) { 558 err = -EFAULT; 559 goto error; 560 } 561 562 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 563 sync); 564 if (!query_info->queries[i].syncobj) { 565 err = -ENOENT; 566 goto error; 567 } 568 } 569 query_info->count = reset.count; 570 571 return 0; 572 573 error: 574 v3d_timestamp_query_info_free(&job->timestamp_query, i); 575 return err; 576 } 577 578 /* Get data for the copy timestamp query results job submission. */ 579 static int 580 v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, 581 struct drm_v3d_extension __user *ext, 582 struct v3d_cpu_job *job) 583 { 584 u32 __user *offsets, *syncs; 585 struct drm_v3d_copy_timestamp_query copy; 586 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 587 unsigned int i; 588 int err; 589 590 if (!job) { 591 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 592 return -EINVAL; 593 } 594 595 if (job->job_type) { 596 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 597 return -EINVAL; 598 } 599 600 if (copy_from_user(©, ext, sizeof(copy))) 601 return -EFAULT; 602 603 if (copy.pad) 604 return -EINVAL; 605 606 job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY; 607 608 query_info->queries = kvmalloc_array(copy.count, 609 sizeof(struct v3d_timestamp_query), 610 GFP_KERNEL); 611 if (!query_info->queries) 612 return -ENOMEM; 613 614 offsets = u64_to_user_ptr(copy.offsets); 615 syncs = u64_to_user_ptr(copy.syncs); 616 617 for (i = 0; i < copy.count; i++) { 618 u32 offset, sync; 619 620 if (get_user(offset, offsets++)) { 621 err = -EFAULT; 622 goto error; 623 } 624 625 query_info->queries[i].offset = offset; 626 627 if (get_user(sync, syncs++)) { 628 err = -EFAULT; 629 goto error; 630 } 631 632 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 633 sync); 634 if (!query_info->queries[i].syncobj) { 635 err = -ENOENT; 636 goto error; 637 } 638 } 639 query_info->count = copy.count; 640 641 job->copy.do_64bit = copy.do_64bit; 642 job->copy.do_partial = copy.do_partial; 643 job->copy.availability_bit = copy.availability_bit; 644 job->copy.offset = copy.offset; 645 job->copy.stride = copy.stride; 646 647 return 0; 648 649 error: 650 v3d_timestamp_query_info_free(&job->timestamp_query, i); 651 return err; 652 } 653 654 static int 655 v3d_copy_query_info(struct v3d_performance_query_info *query_info, 656 unsigned int count, 657 unsigned int nperfmons, 658 u32 __user *syncs, 659 u64 __user *kperfmon_ids, 660 struct drm_file *file_priv) 661 { 662 unsigned int i, j; 663 int err; 664 665 for (i = 0; i < count; i++) { 666 struct v3d_performance_query *query = &query_info->queries[i]; 667 u32 __user *ids_pointer; 668 u32 sync, id; 669 u64 ids; 670 671 if (get_user(sync, syncs++)) { 672 err = -EFAULT; 673 goto error; 674 } 675 676 if (get_user(ids, kperfmon_ids++)) { 677 err = -EFAULT; 678 goto error; 679 } 680 681 query->kperfmon_ids = 682 kvmalloc_array(nperfmons, 683 sizeof(struct v3d_performance_query *), 684 GFP_KERNEL); 685 if (!query->kperfmon_ids) { 686 err = -ENOMEM; 687 goto error; 688 } 689 690 ids_pointer = u64_to_user_ptr(ids); 691 692 for (j = 0; j < nperfmons; j++) { 693 if (get_user(id, ids_pointer++)) { 694 kvfree(query->kperfmon_ids); 695 err = -EFAULT; 696 goto error; 697 } 698 699 query->kperfmon_ids[j] = id; 700 } 701 702 query->syncobj = drm_syncobj_find(file_priv, sync); 703 if (!query->syncobj) { 704 kvfree(query->kperfmon_ids); 705 err = -ENOENT; 706 goto error; 707 } 708 } 709 710 return 0; 711 712 error: 713 v3d_performance_query_info_free(query_info, i); 714 return err; 715 } 716 717 static int 718 v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, 719 struct drm_v3d_extension __user *ext, 720 struct v3d_cpu_job *job) 721 { 722 struct v3d_performance_query_info *query_info = &job->performance_query; 723 struct drm_v3d_reset_performance_query reset; 724 int err; 725 726 if (!job) { 727 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 728 return -EINVAL; 729 } 730 731 if (job->job_type) { 732 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 733 return -EINVAL; 734 } 735 736 if (copy_from_user(&reset, ext, sizeof(reset))) 737 return -EFAULT; 738 739 job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; 740 741 query_info->queries = 742 kvmalloc_array(reset.count, 743 sizeof(struct v3d_performance_query), 744 GFP_KERNEL); 745 if (!query_info->queries) 746 return -ENOMEM; 747 748 err = v3d_copy_query_info(query_info, 749 reset.count, 750 reset.nperfmons, 751 u64_to_user_ptr(reset.syncs), 752 u64_to_user_ptr(reset.kperfmon_ids), 753 file_priv); 754 if (err) 755 return err; 756 757 query_info->count = reset.count; 758 query_info->nperfmons = reset.nperfmons; 759 760 return 0; 761 } 762 763 static int 764 v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, 765 struct drm_v3d_extension __user *ext, 766 struct v3d_cpu_job *job) 767 { 768 struct v3d_performance_query_info *query_info = &job->performance_query; 769 struct drm_v3d_copy_performance_query copy; 770 int err; 771 772 if (!job) { 773 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 774 return -EINVAL; 775 } 776 777 if (job->job_type) { 778 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 779 return -EINVAL; 780 } 781 782 if (copy_from_user(©, ext, sizeof(copy))) 783 return -EFAULT; 784 785 if (copy.pad) 786 return -EINVAL; 787 788 job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; 789 790 query_info->queries = 791 kvmalloc_array(copy.count, 792 sizeof(struct v3d_performance_query), 793 GFP_KERNEL); 794 if (!query_info->queries) 795 return -ENOMEM; 796 797 err = v3d_copy_query_info(query_info, 798 copy.count, 799 copy.nperfmons, 800 u64_to_user_ptr(copy.syncs), 801 u64_to_user_ptr(copy.kperfmon_ids), 802 file_priv); 803 if (err) 804 return err; 805 806 query_info->count = copy.count; 807 query_info->nperfmons = copy.nperfmons; 808 query_info->ncounters = copy.ncounters; 809 810 job->copy.do_64bit = copy.do_64bit; 811 job->copy.do_partial = copy.do_partial; 812 job->copy.availability_bit = copy.availability_bit; 813 job->copy.offset = copy.offset; 814 job->copy.stride = copy.stride; 815 816 return 0; 817 } 818 819 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 820 * according to the extension id (name). 821 */ 822 static int 823 v3d_get_extensions(struct drm_file *file_priv, 824 u64 ext_handles, 825 struct v3d_submit_ext *se, 826 struct v3d_cpu_job *job) 827 { 828 struct drm_v3d_extension __user *user_ext; 829 int ret; 830 831 user_ext = u64_to_user_ptr(ext_handles); 832 while (user_ext) { 833 struct drm_v3d_extension ext; 834 835 if (copy_from_user(&ext, user_ext, sizeof(ext))) { 836 DRM_DEBUG("Failed to copy submit extension\n"); 837 return -EFAULT; 838 } 839 840 switch (ext.id) { 841 case DRM_V3D_EXT_ID_MULTI_SYNC: 842 ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se); 843 break; 844 case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD: 845 ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job); 846 break; 847 case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY: 848 ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job); 849 break; 850 case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY: 851 ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job); 852 break; 853 case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: 854 ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); 855 break; 856 case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 857 ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 858 break; 859 case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: 860 ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); 861 break; 862 default: 863 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); 864 return -EINVAL; 865 } 866 867 if (ret) 868 return ret; 869 870 user_ext = u64_to_user_ptr(ext.next); 871 } 872 873 return 0; 874 } 875 876 /** 877 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 878 * @dev: DRM device 879 * @data: ioctl argument 880 * @file_priv: DRM file for this fd 881 * 882 * This is the main entrypoint for userspace to submit a 3D frame to 883 * the GPU. Userspace provides the binner command list (if 884 * applicable), and the kernel sets up the render command list to draw 885 * to the framebuffer described in the ioctl, using the command lists 886 * that the 3D engine's binner will produce. 887 */ 888 int 889 v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 890 struct drm_file *file_priv) 891 { 892 struct v3d_dev *v3d = to_v3d_dev(dev); 893 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 894 struct drm_v3d_submit_cl *args = data; 895 struct v3d_submit_ext se = {0}; 896 struct v3d_bin_job *bin = NULL; 897 struct v3d_render_job *render = NULL; 898 struct v3d_job *clean_job = NULL; 899 struct v3d_job *last_job; 900 struct ww_acquire_ctx acquire_ctx; 901 int ret = 0; 902 903 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); 904 905 if (args->pad) 906 return -EINVAL; 907 908 if (args->flags && 909 args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | 910 DRM_V3D_SUBMIT_EXTENSION)) { 911 DRM_INFO("invalid flags: %d\n", args->flags); 912 return -EINVAL; 913 } 914 915 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 916 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 917 if (ret) { 918 DRM_DEBUG("Failed to get extensions.\n"); 919 return ret; 920 } 921 } 922 923 ret = v3d_job_allocate((void *)&render, sizeof(*render)); 924 if (ret) 925 return ret; 926 927 ret = v3d_job_init(v3d, file_priv, &render->base, 928 v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); 929 if (ret) { 930 v3d_job_deallocate((void *)&render); 931 goto fail; 932 } 933 934 render->start = args->rcl_start; 935 render->end = args->rcl_end; 936 INIT_LIST_HEAD(&render->unref_list); 937 938 if (args->bcl_start != args->bcl_end) { 939 ret = v3d_job_allocate((void *)&bin, sizeof(*bin)); 940 if (ret) 941 goto fail; 942 943 ret = v3d_job_init(v3d, file_priv, &bin->base, 944 v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); 945 if (ret) { 946 v3d_job_deallocate((void *)&bin); 947 goto fail; 948 } 949 950 bin->start = args->bcl_start; 951 bin->end = args->bcl_end; 952 bin->qma = args->qma; 953 bin->qms = args->qms; 954 bin->qts = args->qts; 955 bin->render = render; 956 } 957 958 if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { 959 ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job)); 960 if (ret) 961 goto fail; 962 963 ret = v3d_job_init(v3d, file_priv, clean_job, 964 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 965 if (ret) { 966 v3d_job_deallocate((void *)&clean_job); 967 goto fail; 968 } 969 970 last_job = clean_job; 971 } else { 972 last_job = &render->base; 973 } 974 975 ret = v3d_lookup_bos(dev, file_priv, last_job, 976 args->bo_handles, args->bo_handle_count); 977 if (ret) 978 goto fail; 979 980 ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); 981 if (ret) 982 goto fail; 983 984 if (args->perfmon_id) { 985 if (v3d->global_perfmon) { 986 ret = -EAGAIN; 987 goto fail_perfmon; 988 } 989 990 render->base.perfmon = v3d_perfmon_find(v3d_priv, 991 args->perfmon_id); 992 993 if (!render->base.perfmon) { 994 ret = -ENOENT; 995 goto fail_perfmon; 996 } 997 } 998 999 mutex_lock(&v3d->sched_lock); 1000 if (bin) { 1001 bin->base.perfmon = render->base.perfmon; 1002 v3d_perfmon_get(bin->base.perfmon); 1003 v3d_push_job(&bin->base); 1004 1005 ret = drm_sched_job_add_dependency(&render->base.base, 1006 dma_fence_get(bin->base.done_fence)); 1007 if (ret) 1008 goto fail_unreserve; 1009 } 1010 1011 v3d_push_job(&render->base); 1012 1013 if (clean_job) { 1014 struct dma_fence *render_fence = 1015 dma_fence_get(render->base.done_fence); 1016 ret = drm_sched_job_add_dependency(&clean_job->base, 1017 render_fence); 1018 if (ret) 1019 goto fail_unreserve; 1020 clean_job->perfmon = render->base.perfmon; 1021 v3d_perfmon_get(clean_job->perfmon); 1022 v3d_push_job(clean_job); 1023 } 1024 1025 mutex_unlock(&v3d->sched_lock); 1026 1027 v3d_attach_fences_and_unlock_reservation(file_priv, 1028 last_job, 1029 &acquire_ctx, 1030 args->out_sync, 1031 &se, 1032 last_job->done_fence); 1033 1034 v3d_job_put(&bin->base); 1035 v3d_job_put(&render->base); 1036 v3d_job_put(clean_job); 1037 1038 return 0; 1039 1040 fail_unreserve: 1041 mutex_unlock(&v3d->sched_lock); 1042 fail_perfmon: 1043 drm_gem_unlock_reservations(last_job->bo, 1044 last_job->bo_count, &acquire_ctx); 1045 fail: 1046 v3d_job_cleanup((void *)bin); 1047 v3d_job_cleanup((void *)render); 1048 v3d_job_cleanup(clean_job); 1049 v3d_put_multisync_post_deps(&se); 1050 1051 return ret; 1052 } 1053 1054 /** 1055 * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. 1056 * @dev: DRM device 1057 * @data: ioctl argument 1058 * @file_priv: DRM file for this fd 1059 * 1060 * Userspace provides the register setup for the TFU, which we don't 1061 * need to validate since the TFU is behind the MMU. 1062 */ 1063 int 1064 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, 1065 struct drm_file *file_priv) 1066 { 1067 struct v3d_dev *v3d = to_v3d_dev(dev); 1068 struct drm_v3d_submit_tfu *args = data; 1069 struct v3d_submit_ext se = {0}; 1070 struct v3d_tfu_job *job = NULL; 1071 struct ww_acquire_ctx acquire_ctx; 1072 int ret = 0; 1073 1074 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); 1075 1076 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1077 DRM_DEBUG("invalid flags: %d\n", args->flags); 1078 return -EINVAL; 1079 } 1080 1081 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1082 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1083 if (ret) { 1084 DRM_DEBUG("Failed to get extensions.\n"); 1085 return ret; 1086 } 1087 } 1088 1089 ret = v3d_job_allocate((void *)&job, sizeof(*job)); 1090 if (ret) 1091 return ret; 1092 1093 ret = v3d_job_init(v3d, file_priv, &job->base, 1094 v3d_job_free, args->in_sync, &se, V3D_TFU); 1095 if (ret) { 1096 v3d_job_deallocate((void *)&job); 1097 goto fail; 1098 } 1099 1100 job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), 1101 sizeof(*job->base.bo), GFP_KERNEL); 1102 if (!job->base.bo) { 1103 ret = -ENOMEM; 1104 goto fail; 1105 } 1106 1107 job->args = *args; 1108 1109 for (job->base.bo_count = 0; 1110 job->base.bo_count < ARRAY_SIZE(args->bo_handles); 1111 job->base.bo_count++) { 1112 struct drm_gem_object *bo; 1113 1114 if (!args->bo_handles[job->base.bo_count]) 1115 break; 1116 1117 bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); 1118 if (!bo) { 1119 DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 1120 job->base.bo_count, 1121 args->bo_handles[job->base.bo_count]); 1122 ret = -ENOENT; 1123 goto fail; 1124 } 1125 job->base.bo[job->base.bo_count] = bo; 1126 } 1127 1128 ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); 1129 if (ret) 1130 goto fail; 1131 1132 mutex_lock(&v3d->sched_lock); 1133 v3d_push_job(&job->base); 1134 mutex_unlock(&v3d->sched_lock); 1135 1136 v3d_attach_fences_and_unlock_reservation(file_priv, 1137 &job->base, &acquire_ctx, 1138 args->out_sync, 1139 &se, 1140 job->base.done_fence); 1141 1142 v3d_job_put(&job->base); 1143 1144 return 0; 1145 1146 fail: 1147 v3d_job_cleanup((void *)job); 1148 v3d_put_multisync_post_deps(&se); 1149 1150 return ret; 1151 } 1152 1153 /** 1154 * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D. 1155 * @dev: DRM device 1156 * @data: ioctl argument 1157 * @file_priv: DRM file for this fd 1158 * 1159 * Userspace provides the register setup for the CSD, which we don't 1160 * need to validate since the CSD is behind the MMU. 1161 */ 1162 int 1163 v3d_submit_csd_ioctl(struct drm_device *dev, void *data, 1164 struct drm_file *file_priv) 1165 { 1166 struct v3d_dev *v3d = to_v3d_dev(dev); 1167 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 1168 struct drm_v3d_submit_csd *args = data; 1169 struct v3d_submit_ext se = {0}; 1170 struct v3d_csd_job *job = NULL; 1171 struct v3d_job *clean_job = NULL; 1172 struct ww_acquire_ctx acquire_ctx; 1173 int ret; 1174 1175 trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); 1176 1177 if (args->pad) 1178 return -EINVAL; 1179 1180 if (!v3d_has_csd(v3d)) { 1181 DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); 1182 return -EINVAL; 1183 } 1184 1185 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1186 DRM_INFO("invalid flags: %d\n", args->flags); 1187 return -EINVAL; 1188 } 1189 1190 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1191 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1192 if (ret) { 1193 DRM_DEBUG("Failed to get extensions.\n"); 1194 return ret; 1195 } 1196 } 1197 1198 ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args, 1199 &job, &clean_job, &se, 1200 &acquire_ctx); 1201 if (ret) 1202 goto fail; 1203 1204 if (args->perfmon_id) { 1205 if (v3d->global_perfmon) { 1206 ret = -EAGAIN; 1207 goto fail_perfmon; 1208 } 1209 1210 job->base.perfmon = v3d_perfmon_find(v3d_priv, 1211 args->perfmon_id); 1212 if (!job->base.perfmon) { 1213 ret = -ENOENT; 1214 goto fail_perfmon; 1215 } 1216 } 1217 1218 mutex_lock(&v3d->sched_lock); 1219 v3d_push_job(&job->base); 1220 1221 ret = drm_sched_job_add_dependency(&clean_job->base, 1222 dma_fence_get(job->base.done_fence)); 1223 if (ret) 1224 goto fail_unreserve; 1225 1226 v3d_push_job(clean_job); 1227 mutex_unlock(&v3d->sched_lock); 1228 1229 v3d_attach_fences_and_unlock_reservation(file_priv, 1230 clean_job, 1231 &acquire_ctx, 1232 args->out_sync, 1233 &se, 1234 clean_job->done_fence); 1235 1236 v3d_job_put(&job->base); 1237 v3d_job_put(clean_job); 1238 1239 return 0; 1240 1241 fail_unreserve: 1242 mutex_unlock(&v3d->sched_lock); 1243 fail_perfmon: 1244 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1245 &acquire_ctx); 1246 fail: 1247 v3d_job_cleanup((void *)job); 1248 v3d_job_cleanup(clean_job); 1249 v3d_put_multisync_post_deps(&se); 1250 1251 return ret; 1252 } 1253 1254 static const unsigned int cpu_job_bo_handle_count[] = { 1255 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1, 1256 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, 1257 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1258 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1259 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1260 [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, 1261 }; 1262 1263 /** 1264 * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D. 1265 * @dev: DRM device 1266 * @data: ioctl argument 1267 * @file_priv: DRM file for this fd 1268 * 1269 * Userspace specifies the CPU job type and data required to perform its 1270 * operations through the drm_v3d_extension struct. 1271 */ 1272 int 1273 v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, 1274 struct drm_file *file_priv) 1275 { 1276 struct v3d_dev *v3d = to_v3d_dev(dev); 1277 struct drm_v3d_submit_cpu *args = data; 1278 struct v3d_submit_ext se = {0}; 1279 struct v3d_submit_ext *out_se = NULL; 1280 struct v3d_cpu_job *cpu_job = NULL; 1281 struct v3d_csd_job *csd_job = NULL; 1282 struct v3d_job *clean_job = NULL; 1283 struct ww_acquire_ctx acquire_ctx; 1284 int ret; 1285 1286 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1287 DRM_INFO("Invalid flags: %d\n", args->flags); 1288 return -EINVAL; 1289 } 1290 1291 ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job)); 1292 if (ret) 1293 return ret; 1294 1295 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1296 ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job); 1297 if (ret) { 1298 DRM_DEBUG("Failed to get extensions.\n"); 1299 goto fail; 1300 } 1301 } 1302 1303 /* Every CPU job must have a CPU job user extension */ 1304 if (!cpu_job->job_type) { 1305 DRM_DEBUG("CPU job must have a CPU job user extension.\n"); 1306 ret = -EINVAL; 1307 goto fail; 1308 } 1309 1310 if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) { 1311 DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n"); 1312 ret = -EINVAL; 1313 goto fail; 1314 } 1315 1316 trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); 1317 1318 ret = v3d_job_init(v3d, file_priv, &cpu_job->base, 1319 v3d_job_free, 0, &se, V3D_CPU); 1320 if (ret) { 1321 v3d_job_deallocate((void *)&cpu_job); 1322 goto fail; 1323 } 1324 1325 clean_job = cpu_job->indirect_csd.clean_job; 1326 csd_job = cpu_job->indirect_csd.job; 1327 1328 if (args->bo_handle_count) { 1329 ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base, 1330 args->bo_handles, args->bo_handle_count); 1331 if (ret) 1332 goto fail; 1333 1334 ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx); 1335 if (ret) 1336 goto fail; 1337 } 1338 1339 mutex_lock(&v3d->sched_lock); 1340 v3d_push_job(&cpu_job->base); 1341 1342 switch (cpu_job->job_type) { 1343 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1344 ret = drm_sched_job_add_dependency(&csd_job->base.base, 1345 dma_fence_get(cpu_job->base.done_fence)); 1346 if (ret) 1347 goto fail_unreserve; 1348 1349 v3d_push_job(&csd_job->base); 1350 1351 ret = drm_sched_job_add_dependency(&clean_job->base, 1352 dma_fence_get(csd_job->base.done_fence)); 1353 if (ret) 1354 goto fail_unreserve; 1355 1356 v3d_push_job(clean_job); 1357 1358 break; 1359 default: 1360 break; 1361 } 1362 mutex_unlock(&v3d->sched_lock); 1363 1364 out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se; 1365 1366 v3d_attach_fences_and_unlock_reservation(file_priv, 1367 &cpu_job->base, 1368 &acquire_ctx, 0, 1369 out_se, cpu_job->base.done_fence); 1370 1371 switch (cpu_job->job_type) { 1372 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1373 v3d_attach_fences_and_unlock_reservation(file_priv, 1374 clean_job, 1375 &cpu_job->indirect_csd.acquire_ctx, 1376 0, &se, clean_job->done_fence); 1377 break; 1378 default: 1379 break; 1380 } 1381 1382 v3d_job_put(&cpu_job->base); 1383 v3d_job_put(&csd_job->base); 1384 v3d_job_put(clean_job); 1385 1386 return 0; 1387 1388 fail_unreserve: 1389 mutex_unlock(&v3d->sched_lock); 1390 1391 drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count, 1392 &acquire_ctx); 1393 1394 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1395 &cpu_job->indirect_csd.acquire_ctx); 1396 1397 fail: 1398 v3d_job_cleanup((void *)cpu_job); 1399 v3d_job_cleanup((void *)csd_job); 1400 v3d_job_cleanup(clean_job); 1401 v3d_put_multisync_post_deps(&se); 1402 kvfree(cpu_job->timestamp_query.queries); 1403 kvfree(cpu_job->performance_query.queries); 1404 1405 return ret; 1406 } 1407