1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2014-2018 Broadcom 4 * Copyright (C) 2023 Raspberry Pi 5 */ 6 7 #include <drm/drm_print.h> 8 #include <drm/drm_syncobj.h> 9 10 #include "v3d_drv.h" 11 #include "v3d_regs.h" 12 #include "v3d_trace.h" 13 14 /* Takes the reservation lock on all the BOs being referenced, so that 15 * we can attach fences and update the reservations after pushing the job 16 * to the queue. 17 * 18 * We don't lock the RCL the tile alloc/state BOs, or overflow memory 19 * (all of which are on render->unref_list). They're entirely private 20 * to v3d, so we don't attach dma-buf fences to them. 21 */ 22 static int 23 v3d_lock_bo_reservations(struct v3d_job *job, 24 struct ww_acquire_ctx *acquire_ctx) 25 { 26 int i, ret; 27 28 ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); 29 if (ret) 30 return ret; 31 32 for (i = 0; i < job->bo_count; i++) { 33 ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); 34 if (ret) 35 goto fail; 36 37 ret = drm_sched_job_add_implicit_dependencies(&job->base, 38 job->bo[i], true); 39 if (ret) 40 goto fail; 41 } 42 43 return 0; 44 45 fail: 46 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 47 return ret; 48 } 49 50 /** 51 * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects 52 * referenced by the job. 53 * @dev: DRM device 54 * @file_priv: DRM file for this fd 55 * @job: V3D job being set up 56 * @bo_handles: GEM handles 57 * @bo_count: Number of GEM handles passed in 58 * 59 * The command validator needs to reference BOs by their index within 60 * the submitted job's BO list. This does the validation of the job's 61 * BO list and reference counting for the lifetime of the job. 62 * 63 * Note that this function doesn't need to unreference the BOs on 64 * failure, because that will happen at `v3d_job_free()`. 65 */ 66 static int 67 v3d_lookup_bos(struct drm_device *dev, 68 struct drm_file *file_priv, 69 struct v3d_job *job, 70 u64 bo_handles, 71 u32 bo_count) 72 { 73 job->bo_count = bo_count; 74 75 if (!job->bo_count) { 76 /* See comment on bo_index for why we have to check 77 * this. 78 */ 79 DRM_DEBUG("Rendering requires BOs\n"); 80 return -EINVAL; 81 } 82 83 return drm_gem_objects_lookup(file_priv, 84 (void __user *)(uintptr_t)bo_handles, 85 job->bo_count, &job->bo); 86 } 87 88 static void 89 v3d_job_free(struct kref *ref) 90 { 91 struct v3d_job *job = container_of(ref, struct v3d_job, refcount); 92 int i; 93 94 if (job->bo) { 95 for (i = 0; i < job->bo_count; i++) 96 drm_gem_object_put(job->bo[i]); 97 kvfree(job->bo); 98 } 99 100 dma_fence_put(job->irq_fence); 101 dma_fence_put(job->done_fence); 102 103 if (job->perfmon) 104 v3d_perfmon_put(job->perfmon); 105 106 kfree(job); 107 } 108 109 static void 110 v3d_render_job_free(struct kref *ref) 111 { 112 struct v3d_render_job *job = container_of(ref, struct v3d_render_job, 113 base.refcount); 114 struct v3d_bo *bo, *save; 115 116 list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { 117 drm_gem_object_put(&bo->base.base); 118 } 119 120 v3d_job_free(ref); 121 } 122 123 void v3d_job_cleanup(struct v3d_job *job) 124 { 125 if (!job) 126 return; 127 128 drm_sched_job_cleanup(&job->base); 129 v3d_job_put(job); 130 } 131 132 void v3d_job_put(struct v3d_job *job) 133 { 134 if (!job) 135 return; 136 137 kref_put(&job->refcount, job->free); 138 } 139 140 static int 141 v3d_job_allocate(void **container, size_t size) 142 { 143 *container = kcalloc(1, size, GFP_KERNEL); 144 if (!*container) { 145 DRM_ERROR("Cannot allocate memory for V3D job.\n"); 146 return -ENOMEM; 147 } 148 149 return 0; 150 } 151 152 static void 153 v3d_job_deallocate(void **container) 154 { 155 kfree(*container); 156 *container = NULL; 157 } 158 159 static int 160 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 161 struct v3d_job *job, void (*free)(struct kref *ref), 162 u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) 163 { 164 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 165 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 166 int ret, i; 167 168 job->v3d = v3d; 169 job->free = free; 170 job->file_priv = v3d_priv; 171 172 ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 173 1, v3d_priv, file_priv->client_id); 174 if (ret) 175 return ret; 176 177 if (has_multisync) { 178 if (se->in_sync_count && se->wait_stage == queue) { 179 struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); 180 181 for (i = 0; i < se->in_sync_count; i++) { 182 struct drm_v3d_sem in; 183 184 if (copy_from_user(&in, handle++, sizeof(in))) { 185 ret = -EFAULT; 186 DRM_DEBUG("Failed to copy wait dep handle.\n"); 187 goto fail_deps; 188 } 189 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); 190 191 // TODO: Investigate why this was filtered out for the IOCTL. 192 if (ret && ret != -ENOENT) 193 goto fail_deps; 194 } 195 } 196 } else { 197 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); 198 199 // TODO: Investigate why this was filtered out for the IOCTL. 200 if (ret && ret != -ENOENT) 201 goto fail_deps; 202 } 203 204 kref_init(&job->refcount); 205 206 return 0; 207 208 fail_deps: 209 drm_sched_job_cleanup(&job->base); 210 return ret; 211 } 212 213 static void 214 v3d_push_job(struct v3d_job *job) 215 { 216 drm_sched_job_arm(&job->base); 217 218 job->done_fence = dma_fence_get(&job->base.s_fence->finished); 219 220 /* put by scheduler job completion */ 221 kref_get(&job->refcount); 222 223 drm_sched_entity_push_job(&job->base); 224 } 225 226 static void 227 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, 228 struct v3d_job *job, 229 struct ww_acquire_ctx *acquire_ctx, 230 u32 out_sync, 231 struct v3d_submit_ext *se, 232 struct dma_fence *done_fence) 233 { 234 struct drm_syncobj *sync_out; 235 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 236 int i; 237 238 for (i = 0; i < job->bo_count; i++) { 239 /* XXX: Use shared fences for read-only objects. */ 240 dma_resv_add_fence(job->bo[i]->resv, job->done_fence, 241 DMA_RESV_USAGE_WRITE); 242 } 243 244 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 245 246 /* Update the return sync object for the job */ 247 /* If it only supports a single signal semaphore*/ 248 if (!has_multisync) { 249 sync_out = drm_syncobj_find(file_priv, out_sync); 250 if (sync_out) { 251 drm_syncobj_replace_fence(sync_out, done_fence); 252 drm_syncobj_put(sync_out); 253 } 254 return; 255 } 256 257 /* If multiple semaphores extension is supported */ 258 if (se->out_sync_count) { 259 for (i = 0; i < se->out_sync_count; i++) { 260 drm_syncobj_replace_fence(se->out_syncs[i].syncobj, 261 done_fence); 262 drm_syncobj_put(se->out_syncs[i].syncobj); 263 } 264 kvfree(se->out_syncs); 265 } 266 } 267 268 static int 269 v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, 270 struct v3d_dev *v3d, 271 struct drm_v3d_submit_csd *args, 272 struct v3d_csd_job **job, 273 struct v3d_job **clean_job, 274 struct v3d_submit_ext *se, 275 struct ww_acquire_ctx *acquire_ctx) 276 { 277 int ret; 278 279 ret = v3d_job_allocate((void *)job, sizeof(**job)); 280 if (ret) 281 return ret; 282 283 ret = v3d_job_init(v3d, file_priv, &(*job)->base, 284 v3d_job_free, args->in_sync, se, V3D_CSD); 285 if (ret) { 286 v3d_job_deallocate((void *)job); 287 return ret; 288 } 289 290 ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); 291 if (ret) 292 return ret; 293 294 ret = v3d_job_init(v3d, file_priv, *clean_job, 295 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 296 if (ret) { 297 v3d_job_deallocate((void *)clean_job); 298 return ret; 299 } 300 301 (*job)->args = *args; 302 303 ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job, 304 args->bo_handles, args->bo_handle_count); 305 if (ret) 306 return ret; 307 308 return v3d_lock_bo_reservations(*clean_job, acquire_ctx); 309 } 310 311 static void 312 v3d_put_multisync_post_deps(struct v3d_submit_ext *se) 313 { 314 unsigned int i; 315 316 if (!(se && se->out_sync_count)) 317 return; 318 319 for (i = 0; i < se->out_sync_count; i++) 320 drm_syncobj_put(se->out_syncs[i].syncobj); 321 kvfree(se->out_syncs); 322 } 323 324 static int 325 v3d_get_multisync_post_deps(struct drm_file *file_priv, 326 struct v3d_submit_ext *se, 327 u32 count, u64 handles) 328 { 329 struct drm_v3d_sem __user *post_deps; 330 int i, ret; 331 332 if (!count) 333 return 0; 334 335 se->out_syncs = (struct v3d_submit_outsync *) 336 kvmalloc_array(count, 337 sizeof(struct v3d_submit_outsync), 338 GFP_KERNEL); 339 if (!se->out_syncs) 340 return -ENOMEM; 341 342 post_deps = u64_to_user_ptr(handles); 343 344 for (i = 0; i < count; i++) { 345 struct drm_v3d_sem out; 346 347 if (copy_from_user(&out, post_deps++, sizeof(out))) { 348 ret = -EFAULT; 349 DRM_DEBUG("Failed to copy post dep handles\n"); 350 goto fail; 351 } 352 353 se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, 354 out.handle); 355 if (!se->out_syncs[i].syncobj) { 356 ret = -EINVAL; 357 goto fail; 358 } 359 } 360 se->out_sync_count = count; 361 362 return 0; 363 364 fail: 365 for (i--; i >= 0; i--) 366 drm_syncobj_put(se->out_syncs[i].syncobj); 367 kvfree(se->out_syncs); 368 369 return ret; 370 } 371 372 /* Get data for multiple binary semaphores synchronization. Parse syncobj 373 * to be signaled when job completes (out_sync). 374 */ 375 static int 376 v3d_get_multisync_submit_deps(struct drm_file *file_priv, 377 struct drm_v3d_extension __user *ext, 378 struct v3d_submit_ext *se) 379 { 380 struct drm_v3d_multi_sync multisync; 381 int ret; 382 383 if (se->in_sync_count || se->out_sync_count) { 384 DRM_DEBUG("Two multisync extensions were added to the same job."); 385 return -EINVAL; 386 } 387 388 if (copy_from_user(&multisync, ext, sizeof(multisync))) 389 return -EFAULT; 390 391 if (multisync.pad) 392 return -EINVAL; 393 394 ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, 395 multisync.out_syncs); 396 if (ret) 397 return ret; 398 399 se->in_sync_count = multisync.in_sync_count; 400 se->in_syncs = multisync.in_syncs; 401 se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; 402 se->wait_stage = multisync.wait_stage; 403 404 return 0; 405 } 406 407 /* Get data for the indirect CSD job submission. */ 408 static int 409 v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv, 410 struct drm_v3d_extension __user *ext, 411 struct v3d_cpu_job *job) 412 { 413 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 414 struct v3d_dev *v3d = v3d_priv->v3d; 415 struct drm_v3d_indirect_csd indirect_csd; 416 struct v3d_indirect_csd_info *info = &job->indirect_csd; 417 418 if (!job) { 419 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 420 return -EINVAL; 421 } 422 423 if (job->job_type) { 424 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 425 return -EINVAL; 426 } 427 428 if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd))) 429 return -EFAULT; 430 431 if (!v3d_has_csd(v3d)) { 432 DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n"); 433 return -EINVAL; 434 } 435 436 job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD; 437 info->offset = indirect_csd.offset; 438 info->wg_size = indirect_csd.wg_size; 439 memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets, 440 sizeof(indirect_csd.wg_uniform_offsets)); 441 442 info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect); 443 444 return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit, 445 &info->job, &info->clean_job, 446 NULL, &info->acquire_ctx); 447 } 448 449 /* Get data for the query timestamp job submission. */ 450 static int 451 v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, 452 struct drm_v3d_extension __user *ext, 453 struct v3d_cpu_job *job) 454 { 455 u32 __user *offsets, *syncs; 456 struct drm_v3d_timestamp_query timestamp; 457 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 458 unsigned int i; 459 int err; 460 461 if (!job) { 462 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 463 return -EINVAL; 464 } 465 466 if (job->job_type) { 467 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 468 return -EINVAL; 469 } 470 471 if (copy_from_user(×tamp, ext, sizeof(timestamp))) 472 return -EFAULT; 473 474 if (timestamp.pad) 475 return -EINVAL; 476 477 job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY; 478 479 query_info->queries = kvmalloc_array(timestamp.count, 480 sizeof(struct v3d_timestamp_query), 481 GFP_KERNEL); 482 if (!query_info->queries) 483 return -ENOMEM; 484 485 offsets = u64_to_user_ptr(timestamp.offsets); 486 syncs = u64_to_user_ptr(timestamp.syncs); 487 488 for (i = 0; i < timestamp.count; i++) { 489 u32 offset, sync; 490 491 if (get_user(offset, offsets++)) { 492 err = -EFAULT; 493 goto error; 494 } 495 496 query_info->queries[i].offset = offset; 497 498 if (get_user(sync, syncs++)) { 499 err = -EFAULT; 500 goto error; 501 } 502 503 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 504 sync); 505 if (!query_info->queries[i].syncobj) { 506 err = -ENOENT; 507 goto error; 508 } 509 } 510 query_info->count = timestamp.count; 511 512 return 0; 513 514 error: 515 v3d_timestamp_query_info_free(&job->timestamp_query, i); 516 return err; 517 } 518 519 static int 520 v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, 521 struct drm_v3d_extension __user *ext, 522 struct v3d_cpu_job *job) 523 { 524 u32 __user *syncs; 525 struct drm_v3d_reset_timestamp_query reset; 526 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 527 unsigned int i; 528 int err; 529 530 if (!job) { 531 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 532 return -EINVAL; 533 } 534 535 if (job->job_type) { 536 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 537 return -EINVAL; 538 } 539 540 if (copy_from_user(&reset, ext, sizeof(reset))) 541 return -EFAULT; 542 543 job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY; 544 545 query_info->queries = kvmalloc_array(reset.count, 546 sizeof(struct v3d_timestamp_query), 547 GFP_KERNEL); 548 if (!query_info->queries) 549 return -ENOMEM; 550 551 syncs = u64_to_user_ptr(reset.syncs); 552 553 for (i = 0; i < reset.count; i++) { 554 u32 sync; 555 556 query_info->queries[i].offset = reset.offset + 8 * i; 557 558 if (get_user(sync, syncs++)) { 559 err = -EFAULT; 560 goto error; 561 } 562 563 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 564 sync); 565 if (!query_info->queries[i].syncobj) { 566 err = -ENOENT; 567 goto error; 568 } 569 } 570 query_info->count = reset.count; 571 572 return 0; 573 574 error: 575 v3d_timestamp_query_info_free(&job->timestamp_query, i); 576 return err; 577 } 578 579 /* Get data for the copy timestamp query results job submission. */ 580 static int 581 v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, 582 struct drm_v3d_extension __user *ext, 583 struct v3d_cpu_job *job) 584 { 585 u32 __user *offsets, *syncs; 586 struct drm_v3d_copy_timestamp_query copy; 587 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 588 unsigned int i; 589 int err; 590 591 if (!job) { 592 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 593 return -EINVAL; 594 } 595 596 if (job->job_type) { 597 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 598 return -EINVAL; 599 } 600 601 if (copy_from_user(©, ext, sizeof(copy))) 602 return -EFAULT; 603 604 if (copy.pad) 605 return -EINVAL; 606 607 job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY; 608 609 query_info->queries = kvmalloc_array(copy.count, 610 sizeof(struct v3d_timestamp_query), 611 GFP_KERNEL); 612 if (!query_info->queries) 613 return -ENOMEM; 614 615 offsets = u64_to_user_ptr(copy.offsets); 616 syncs = u64_to_user_ptr(copy.syncs); 617 618 for (i = 0; i < copy.count; i++) { 619 u32 offset, sync; 620 621 if (get_user(offset, offsets++)) { 622 err = -EFAULT; 623 goto error; 624 } 625 626 query_info->queries[i].offset = offset; 627 628 if (get_user(sync, syncs++)) { 629 err = -EFAULT; 630 goto error; 631 } 632 633 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 634 sync); 635 if (!query_info->queries[i].syncobj) { 636 err = -ENOENT; 637 goto error; 638 } 639 } 640 query_info->count = copy.count; 641 642 job->copy.do_64bit = copy.do_64bit; 643 job->copy.do_partial = copy.do_partial; 644 job->copy.availability_bit = copy.availability_bit; 645 job->copy.offset = copy.offset; 646 job->copy.stride = copy.stride; 647 648 return 0; 649 650 error: 651 v3d_timestamp_query_info_free(&job->timestamp_query, i); 652 return err; 653 } 654 655 static int 656 v3d_copy_query_info(struct v3d_performance_query_info *query_info, 657 unsigned int count, 658 unsigned int nperfmons, 659 u32 __user *syncs, 660 u64 __user *kperfmon_ids, 661 struct drm_file *file_priv) 662 { 663 unsigned int i, j; 664 int err; 665 666 for (i = 0; i < count; i++) { 667 struct v3d_performance_query *query = &query_info->queries[i]; 668 u32 __user *ids_pointer; 669 u32 sync, id; 670 u64 ids; 671 672 if (get_user(sync, syncs++)) { 673 err = -EFAULT; 674 goto error; 675 } 676 677 if (get_user(ids, kperfmon_ids++)) { 678 err = -EFAULT; 679 goto error; 680 } 681 682 query->kperfmon_ids = 683 kvmalloc_array(nperfmons, 684 sizeof(struct v3d_performance_query *), 685 GFP_KERNEL); 686 if (!query->kperfmon_ids) { 687 err = -ENOMEM; 688 goto error; 689 } 690 691 ids_pointer = u64_to_user_ptr(ids); 692 693 for (j = 0; j < nperfmons; j++) { 694 if (get_user(id, ids_pointer++)) { 695 kvfree(query->kperfmon_ids); 696 err = -EFAULT; 697 goto error; 698 } 699 700 query->kperfmon_ids[j] = id; 701 } 702 703 query->syncobj = drm_syncobj_find(file_priv, sync); 704 if (!query->syncobj) { 705 kvfree(query->kperfmon_ids); 706 err = -ENOENT; 707 goto error; 708 } 709 } 710 711 return 0; 712 713 error: 714 v3d_performance_query_info_free(query_info, i); 715 return err; 716 } 717 718 static int 719 v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, 720 struct drm_v3d_extension __user *ext, 721 struct v3d_cpu_job *job) 722 { 723 struct v3d_performance_query_info *query_info = &job->performance_query; 724 struct drm_v3d_reset_performance_query reset; 725 int err; 726 727 if (!job) { 728 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 729 return -EINVAL; 730 } 731 732 if (job->job_type) { 733 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 734 return -EINVAL; 735 } 736 737 if (copy_from_user(&reset, ext, sizeof(reset))) 738 return -EFAULT; 739 740 job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; 741 742 query_info->queries = 743 kvmalloc_array(reset.count, 744 sizeof(struct v3d_performance_query), 745 GFP_KERNEL); 746 if (!query_info->queries) 747 return -ENOMEM; 748 749 err = v3d_copy_query_info(query_info, 750 reset.count, 751 reset.nperfmons, 752 u64_to_user_ptr(reset.syncs), 753 u64_to_user_ptr(reset.kperfmon_ids), 754 file_priv); 755 if (err) 756 return err; 757 758 query_info->count = reset.count; 759 query_info->nperfmons = reset.nperfmons; 760 761 return 0; 762 } 763 764 static int 765 v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, 766 struct drm_v3d_extension __user *ext, 767 struct v3d_cpu_job *job) 768 { 769 struct v3d_performance_query_info *query_info = &job->performance_query; 770 struct drm_v3d_copy_performance_query copy; 771 int err; 772 773 if (!job) { 774 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 775 return -EINVAL; 776 } 777 778 if (job->job_type) { 779 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 780 return -EINVAL; 781 } 782 783 if (copy_from_user(©, ext, sizeof(copy))) 784 return -EFAULT; 785 786 if (copy.pad) 787 return -EINVAL; 788 789 job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; 790 791 query_info->queries = 792 kvmalloc_array(copy.count, 793 sizeof(struct v3d_performance_query), 794 GFP_KERNEL); 795 if (!query_info->queries) 796 return -ENOMEM; 797 798 err = v3d_copy_query_info(query_info, 799 copy.count, 800 copy.nperfmons, 801 u64_to_user_ptr(copy.syncs), 802 u64_to_user_ptr(copy.kperfmon_ids), 803 file_priv); 804 if (err) 805 return err; 806 807 query_info->count = copy.count; 808 query_info->nperfmons = copy.nperfmons; 809 query_info->ncounters = copy.ncounters; 810 811 job->copy.do_64bit = copy.do_64bit; 812 job->copy.do_partial = copy.do_partial; 813 job->copy.availability_bit = copy.availability_bit; 814 job->copy.offset = copy.offset; 815 job->copy.stride = copy.stride; 816 817 return 0; 818 } 819 820 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 821 * according to the extension id (name). 822 */ 823 static int 824 v3d_get_extensions(struct drm_file *file_priv, 825 u64 ext_handles, 826 struct v3d_submit_ext *se, 827 struct v3d_cpu_job *job) 828 { 829 struct drm_v3d_extension __user *user_ext; 830 int ret; 831 832 user_ext = u64_to_user_ptr(ext_handles); 833 while (user_ext) { 834 struct drm_v3d_extension ext; 835 836 if (copy_from_user(&ext, user_ext, sizeof(ext))) { 837 DRM_DEBUG("Failed to copy submit extension\n"); 838 return -EFAULT; 839 } 840 841 switch (ext.id) { 842 case DRM_V3D_EXT_ID_MULTI_SYNC: 843 ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se); 844 break; 845 case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD: 846 ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job); 847 break; 848 case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY: 849 ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job); 850 break; 851 case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY: 852 ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job); 853 break; 854 case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: 855 ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); 856 break; 857 case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 858 ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 859 break; 860 case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: 861 ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); 862 break; 863 default: 864 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); 865 return -EINVAL; 866 } 867 868 if (ret) 869 return ret; 870 871 user_ext = u64_to_user_ptr(ext.next); 872 } 873 874 return 0; 875 } 876 877 /** 878 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 879 * @dev: DRM device 880 * @data: ioctl argument 881 * @file_priv: DRM file for this fd 882 * 883 * This is the main entrypoint for userspace to submit a 3D frame to 884 * the GPU. Userspace provides the binner command list (if 885 * applicable), and the kernel sets up the render command list to draw 886 * to the framebuffer described in the ioctl, using the command lists 887 * that the 3D engine's binner will produce. 888 */ 889 int 890 v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 891 struct drm_file *file_priv) 892 { 893 struct v3d_dev *v3d = to_v3d_dev(dev); 894 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 895 struct drm_v3d_submit_cl *args = data; 896 struct v3d_submit_ext se = {0}; 897 struct v3d_bin_job *bin = NULL; 898 struct v3d_render_job *render = NULL; 899 struct v3d_job *clean_job = NULL; 900 struct v3d_job *last_job; 901 struct ww_acquire_ctx acquire_ctx; 902 int ret = 0; 903 904 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); 905 906 if (args->pad) 907 return -EINVAL; 908 909 if (args->flags && 910 args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | 911 DRM_V3D_SUBMIT_EXTENSION)) { 912 DRM_INFO("invalid flags: %d\n", args->flags); 913 return -EINVAL; 914 } 915 916 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 917 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 918 if (ret) { 919 DRM_DEBUG("Failed to get extensions.\n"); 920 return ret; 921 } 922 } 923 924 ret = v3d_job_allocate((void *)&render, sizeof(*render)); 925 if (ret) 926 return ret; 927 928 ret = v3d_job_init(v3d, file_priv, &render->base, 929 v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); 930 if (ret) { 931 v3d_job_deallocate((void *)&render); 932 goto fail; 933 } 934 935 render->start = args->rcl_start; 936 render->end = args->rcl_end; 937 INIT_LIST_HEAD(&render->unref_list); 938 939 if (args->bcl_start != args->bcl_end) { 940 ret = v3d_job_allocate((void *)&bin, sizeof(*bin)); 941 if (ret) 942 goto fail; 943 944 ret = v3d_job_init(v3d, file_priv, &bin->base, 945 v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); 946 if (ret) { 947 v3d_job_deallocate((void *)&bin); 948 goto fail; 949 } 950 951 bin->start = args->bcl_start; 952 bin->end = args->bcl_end; 953 bin->qma = args->qma; 954 bin->qms = args->qms; 955 bin->qts = args->qts; 956 bin->render = render; 957 } 958 959 if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { 960 ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job)); 961 if (ret) 962 goto fail; 963 964 ret = v3d_job_init(v3d, file_priv, clean_job, 965 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 966 if (ret) { 967 v3d_job_deallocate((void *)&clean_job); 968 goto fail; 969 } 970 971 last_job = clean_job; 972 } else { 973 last_job = &render->base; 974 } 975 976 ret = v3d_lookup_bos(dev, file_priv, last_job, 977 args->bo_handles, args->bo_handle_count); 978 if (ret) 979 goto fail; 980 981 ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); 982 if (ret) 983 goto fail; 984 985 if (args->perfmon_id) { 986 if (v3d->global_perfmon) { 987 ret = -EAGAIN; 988 goto fail_perfmon; 989 } 990 991 render->base.perfmon = v3d_perfmon_find(v3d_priv, 992 args->perfmon_id); 993 994 if (!render->base.perfmon) { 995 ret = -ENOENT; 996 goto fail_perfmon; 997 } 998 } 999 1000 mutex_lock(&v3d->sched_lock); 1001 if (bin) { 1002 bin->base.perfmon = render->base.perfmon; 1003 v3d_perfmon_get(bin->base.perfmon); 1004 v3d_push_job(&bin->base); 1005 1006 ret = drm_sched_job_add_dependency(&render->base.base, 1007 dma_fence_get(bin->base.done_fence)); 1008 if (ret) 1009 goto fail_unreserve; 1010 } 1011 1012 v3d_push_job(&render->base); 1013 1014 if (clean_job) { 1015 struct dma_fence *render_fence = 1016 dma_fence_get(render->base.done_fence); 1017 ret = drm_sched_job_add_dependency(&clean_job->base, 1018 render_fence); 1019 if (ret) 1020 goto fail_unreserve; 1021 clean_job->perfmon = render->base.perfmon; 1022 v3d_perfmon_get(clean_job->perfmon); 1023 v3d_push_job(clean_job); 1024 } 1025 1026 mutex_unlock(&v3d->sched_lock); 1027 1028 v3d_attach_fences_and_unlock_reservation(file_priv, 1029 last_job, 1030 &acquire_ctx, 1031 args->out_sync, 1032 &se, 1033 last_job->done_fence); 1034 1035 v3d_job_put(&bin->base); 1036 v3d_job_put(&render->base); 1037 v3d_job_put(clean_job); 1038 1039 return 0; 1040 1041 fail_unreserve: 1042 mutex_unlock(&v3d->sched_lock); 1043 fail_perfmon: 1044 drm_gem_unlock_reservations(last_job->bo, 1045 last_job->bo_count, &acquire_ctx); 1046 fail: 1047 v3d_job_cleanup((void *)bin); 1048 v3d_job_cleanup((void *)render); 1049 v3d_job_cleanup(clean_job); 1050 v3d_put_multisync_post_deps(&se); 1051 1052 return ret; 1053 } 1054 1055 /** 1056 * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. 1057 * @dev: DRM device 1058 * @data: ioctl argument 1059 * @file_priv: DRM file for this fd 1060 * 1061 * Userspace provides the register setup for the TFU, which we don't 1062 * need to validate since the TFU is behind the MMU. 1063 */ 1064 int 1065 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, 1066 struct drm_file *file_priv) 1067 { 1068 struct v3d_dev *v3d = to_v3d_dev(dev); 1069 struct drm_v3d_submit_tfu *args = data; 1070 struct v3d_submit_ext se = {0}; 1071 struct v3d_tfu_job *job = NULL; 1072 struct ww_acquire_ctx acquire_ctx; 1073 int ret = 0; 1074 1075 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); 1076 1077 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1078 DRM_DEBUG("invalid flags: %d\n", args->flags); 1079 return -EINVAL; 1080 } 1081 1082 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1083 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1084 if (ret) { 1085 DRM_DEBUG("Failed to get extensions.\n"); 1086 return ret; 1087 } 1088 } 1089 1090 ret = v3d_job_allocate((void *)&job, sizeof(*job)); 1091 if (ret) 1092 return ret; 1093 1094 ret = v3d_job_init(v3d, file_priv, &job->base, 1095 v3d_job_free, args->in_sync, &se, V3D_TFU); 1096 if (ret) { 1097 v3d_job_deallocate((void *)&job); 1098 goto fail; 1099 } 1100 1101 job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), 1102 sizeof(*job->base.bo), GFP_KERNEL); 1103 if (!job->base.bo) { 1104 ret = -ENOMEM; 1105 goto fail; 1106 } 1107 1108 job->args = *args; 1109 1110 for (job->base.bo_count = 0; 1111 job->base.bo_count < ARRAY_SIZE(args->bo_handles); 1112 job->base.bo_count++) { 1113 struct drm_gem_object *bo; 1114 1115 if (!args->bo_handles[job->base.bo_count]) 1116 break; 1117 1118 bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); 1119 if (!bo) { 1120 DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 1121 job->base.bo_count, 1122 args->bo_handles[job->base.bo_count]); 1123 ret = -ENOENT; 1124 goto fail; 1125 } 1126 job->base.bo[job->base.bo_count] = bo; 1127 } 1128 1129 ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); 1130 if (ret) 1131 goto fail; 1132 1133 mutex_lock(&v3d->sched_lock); 1134 v3d_push_job(&job->base); 1135 mutex_unlock(&v3d->sched_lock); 1136 1137 v3d_attach_fences_and_unlock_reservation(file_priv, 1138 &job->base, &acquire_ctx, 1139 args->out_sync, 1140 &se, 1141 job->base.done_fence); 1142 1143 v3d_job_put(&job->base); 1144 1145 return 0; 1146 1147 fail: 1148 v3d_job_cleanup((void *)job); 1149 v3d_put_multisync_post_deps(&se); 1150 1151 return ret; 1152 } 1153 1154 /** 1155 * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D. 1156 * @dev: DRM device 1157 * @data: ioctl argument 1158 * @file_priv: DRM file for this fd 1159 * 1160 * Userspace provides the register setup for the CSD, which we don't 1161 * need to validate since the CSD is behind the MMU. 1162 */ 1163 int 1164 v3d_submit_csd_ioctl(struct drm_device *dev, void *data, 1165 struct drm_file *file_priv) 1166 { 1167 struct v3d_dev *v3d = to_v3d_dev(dev); 1168 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 1169 struct drm_v3d_submit_csd *args = data; 1170 struct v3d_submit_ext se = {0}; 1171 struct v3d_csd_job *job = NULL; 1172 struct v3d_job *clean_job = NULL; 1173 struct ww_acquire_ctx acquire_ctx; 1174 int ret; 1175 1176 trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); 1177 1178 if (args->pad) 1179 return -EINVAL; 1180 1181 if (!v3d_has_csd(v3d)) { 1182 DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); 1183 return -EINVAL; 1184 } 1185 1186 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1187 DRM_INFO("invalid flags: %d\n", args->flags); 1188 return -EINVAL; 1189 } 1190 1191 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1192 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1193 if (ret) { 1194 DRM_DEBUG("Failed to get extensions.\n"); 1195 return ret; 1196 } 1197 } 1198 1199 ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args, 1200 &job, &clean_job, &se, 1201 &acquire_ctx); 1202 if (ret) 1203 goto fail; 1204 1205 if (args->perfmon_id) { 1206 if (v3d->global_perfmon) { 1207 ret = -EAGAIN; 1208 goto fail_perfmon; 1209 } 1210 1211 job->base.perfmon = v3d_perfmon_find(v3d_priv, 1212 args->perfmon_id); 1213 if (!job->base.perfmon) { 1214 ret = -ENOENT; 1215 goto fail_perfmon; 1216 } 1217 } 1218 1219 mutex_lock(&v3d->sched_lock); 1220 v3d_push_job(&job->base); 1221 1222 ret = drm_sched_job_add_dependency(&clean_job->base, 1223 dma_fence_get(job->base.done_fence)); 1224 if (ret) 1225 goto fail_unreserve; 1226 1227 v3d_push_job(clean_job); 1228 mutex_unlock(&v3d->sched_lock); 1229 1230 v3d_attach_fences_and_unlock_reservation(file_priv, 1231 clean_job, 1232 &acquire_ctx, 1233 args->out_sync, 1234 &se, 1235 clean_job->done_fence); 1236 1237 v3d_job_put(&job->base); 1238 v3d_job_put(clean_job); 1239 1240 return 0; 1241 1242 fail_unreserve: 1243 mutex_unlock(&v3d->sched_lock); 1244 fail_perfmon: 1245 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1246 &acquire_ctx); 1247 fail: 1248 v3d_job_cleanup((void *)job); 1249 v3d_job_cleanup(clean_job); 1250 v3d_put_multisync_post_deps(&se); 1251 1252 return ret; 1253 } 1254 1255 static const unsigned int cpu_job_bo_handle_count[] = { 1256 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1, 1257 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, 1258 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1259 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1260 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1261 [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, 1262 }; 1263 1264 /** 1265 * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D. 1266 * @dev: DRM device 1267 * @data: ioctl argument 1268 * @file_priv: DRM file for this fd 1269 * 1270 * Userspace specifies the CPU job type and data required to perform its 1271 * operations through the drm_v3d_extension struct. 1272 */ 1273 int 1274 v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, 1275 struct drm_file *file_priv) 1276 { 1277 struct v3d_dev *v3d = to_v3d_dev(dev); 1278 struct drm_v3d_submit_cpu *args = data; 1279 struct v3d_submit_ext se = {0}; 1280 struct v3d_submit_ext *out_se = NULL; 1281 struct v3d_cpu_job *cpu_job = NULL; 1282 struct v3d_csd_job *csd_job = NULL; 1283 struct v3d_job *clean_job = NULL; 1284 struct ww_acquire_ctx acquire_ctx; 1285 int ret; 1286 1287 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1288 DRM_INFO("Invalid flags: %d\n", args->flags); 1289 return -EINVAL; 1290 } 1291 1292 ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job)); 1293 if (ret) 1294 return ret; 1295 1296 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1297 ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job); 1298 if (ret) { 1299 DRM_DEBUG("Failed to get extensions.\n"); 1300 goto fail; 1301 } 1302 } 1303 1304 /* Every CPU job must have a CPU job user extension */ 1305 if (!cpu_job->job_type) { 1306 DRM_DEBUG("CPU job must have a CPU job user extension.\n"); 1307 ret = -EINVAL; 1308 goto fail; 1309 } 1310 1311 if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) { 1312 DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n"); 1313 ret = -EINVAL; 1314 goto fail; 1315 } 1316 1317 trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); 1318 1319 ret = v3d_job_init(v3d, file_priv, &cpu_job->base, 1320 v3d_job_free, 0, &se, V3D_CPU); 1321 if (ret) { 1322 v3d_job_deallocate((void *)&cpu_job); 1323 goto fail; 1324 } 1325 1326 clean_job = cpu_job->indirect_csd.clean_job; 1327 csd_job = cpu_job->indirect_csd.job; 1328 1329 if (args->bo_handle_count) { 1330 ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base, 1331 args->bo_handles, args->bo_handle_count); 1332 if (ret) 1333 goto fail; 1334 1335 ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx); 1336 if (ret) 1337 goto fail; 1338 } 1339 1340 mutex_lock(&v3d->sched_lock); 1341 v3d_push_job(&cpu_job->base); 1342 1343 switch (cpu_job->job_type) { 1344 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1345 ret = drm_sched_job_add_dependency(&csd_job->base.base, 1346 dma_fence_get(cpu_job->base.done_fence)); 1347 if (ret) 1348 goto fail_unreserve; 1349 1350 v3d_push_job(&csd_job->base); 1351 1352 ret = drm_sched_job_add_dependency(&clean_job->base, 1353 dma_fence_get(csd_job->base.done_fence)); 1354 if (ret) 1355 goto fail_unreserve; 1356 1357 v3d_push_job(clean_job); 1358 1359 break; 1360 default: 1361 break; 1362 } 1363 mutex_unlock(&v3d->sched_lock); 1364 1365 out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se; 1366 1367 v3d_attach_fences_and_unlock_reservation(file_priv, 1368 &cpu_job->base, 1369 &acquire_ctx, 0, 1370 out_se, cpu_job->base.done_fence); 1371 1372 switch (cpu_job->job_type) { 1373 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1374 v3d_attach_fences_and_unlock_reservation(file_priv, 1375 clean_job, 1376 &cpu_job->indirect_csd.acquire_ctx, 1377 0, &se, clean_job->done_fence); 1378 break; 1379 default: 1380 break; 1381 } 1382 1383 v3d_job_put(&cpu_job->base); 1384 v3d_job_put(&csd_job->base); 1385 v3d_job_put(clean_job); 1386 1387 return 0; 1388 1389 fail_unreserve: 1390 mutex_unlock(&v3d->sched_lock); 1391 1392 drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count, 1393 &acquire_ctx); 1394 1395 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1396 &cpu_job->indirect_csd.acquire_ctx); 1397 1398 fail: 1399 v3d_job_cleanup((void *)cpu_job); 1400 v3d_job_cleanup((void *)csd_job); 1401 v3d_job_cleanup(clean_job); 1402 v3d_put_multisync_post_deps(&se); 1403 kvfree(cpu_job->timestamp_query.queries); 1404 kvfree(cpu_job->performance_query.queries); 1405 1406 return ret; 1407 } 1408