1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2014-2018 Broadcom 4 * Copyright (C) 2023 Raspberry Pi 5 */ 6 7 #include <drm/drm_syncobj.h> 8 9 #include "v3d_drv.h" 10 #include "v3d_regs.h" 11 #include "v3d_trace.h" 12 13 /* Takes the reservation lock on all the BOs being referenced, so that 14 * at queue submit time we can update the reservations. 15 * 16 * We don't lock the RCL the tile alloc/state BOs, or overflow memory 17 * (all of which are on exec->unref_list). They're entirely private 18 * to v3d, so we don't attach dma-buf fences to them. 19 */ 20 static int 21 v3d_lock_bo_reservations(struct v3d_job *job, 22 struct ww_acquire_ctx *acquire_ctx) 23 { 24 int i, ret; 25 26 ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); 27 if (ret) 28 return ret; 29 30 for (i = 0; i < job->bo_count; i++) { 31 ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); 32 if (ret) 33 goto fail; 34 35 ret = drm_sched_job_add_implicit_dependencies(&job->base, 36 job->bo[i], true); 37 if (ret) 38 goto fail; 39 } 40 41 return 0; 42 43 fail: 44 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 45 return ret; 46 } 47 48 /** 49 * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects 50 * referenced by the job. 51 * @dev: DRM device 52 * @file_priv: DRM file for this fd 53 * @job: V3D job being set up 54 * @bo_handles: GEM handles 55 * @bo_count: Number of GEM handles passed in 56 * 57 * The command validator needs to reference BOs by their index within 58 * the submitted job's BO list. This does the validation of the job's 59 * BO list and reference counting for the lifetime of the job. 60 * 61 * Note that this function doesn't need to unreference the BOs on 62 * failure, because that will happen at v3d_exec_cleanup() time. 63 */ 64 static int 65 v3d_lookup_bos(struct drm_device *dev, 66 struct drm_file *file_priv, 67 struct v3d_job *job, 68 u64 bo_handles, 69 u32 bo_count) 70 { 71 job->bo_count = bo_count; 72 73 if (!job->bo_count) { 74 /* See comment on bo_index for why we have to check 75 * this. 76 */ 77 DRM_DEBUG("Rendering requires BOs\n"); 78 return -EINVAL; 79 } 80 81 return drm_gem_objects_lookup(file_priv, 82 (void __user *)(uintptr_t)bo_handles, 83 job->bo_count, &job->bo); 84 } 85 86 static void 87 v3d_job_free(struct kref *ref) 88 { 89 struct v3d_job *job = container_of(ref, struct v3d_job, refcount); 90 int i; 91 92 if (job->bo) { 93 for (i = 0; i < job->bo_count; i++) 94 drm_gem_object_put(job->bo[i]); 95 kvfree(job->bo); 96 } 97 98 dma_fence_put(job->irq_fence); 99 dma_fence_put(job->done_fence); 100 101 if (job->perfmon) 102 v3d_perfmon_put(job->perfmon); 103 104 kfree(job); 105 } 106 107 static void 108 v3d_render_job_free(struct kref *ref) 109 { 110 struct v3d_render_job *job = container_of(ref, struct v3d_render_job, 111 base.refcount); 112 struct v3d_bo *bo, *save; 113 114 list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { 115 drm_gem_object_put(&bo->base.base); 116 } 117 118 v3d_job_free(ref); 119 } 120 121 void v3d_job_cleanup(struct v3d_job *job) 122 { 123 if (!job) 124 return; 125 126 drm_sched_job_cleanup(&job->base); 127 v3d_job_put(job); 128 } 129 130 void v3d_job_put(struct v3d_job *job) 131 { 132 if (!job) 133 return; 134 135 kref_put(&job->refcount, job->free); 136 } 137 138 static int 139 v3d_job_allocate(void **container, size_t size) 140 { 141 *container = kcalloc(1, size, GFP_KERNEL); 142 if (!*container) { 143 DRM_ERROR("Cannot allocate memory for V3D job.\n"); 144 return -ENOMEM; 145 } 146 147 return 0; 148 } 149 150 static void 151 v3d_job_deallocate(void **container) 152 { 153 kfree(*container); 154 *container = NULL; 155 } 156 157 static int 158 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 159 struct v3d_job *job, void (*free)(struct kref *ref), 160 u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) 161 { 162 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 163 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 164 int ret, i; 165 166 job->v3d = v3d; 167 job->free = free; 168 job->file = file_priv; 169 170 ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 171 1, v3d_priv); 172 if (ret) 173 return ret; 174 175 if (has_multisync) { 176 if (se->in_sync_count && se->wait_stage == queue) { 177 struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); 178 179 for (i = 0; i < se->in_sync_count; i++) { 180 struct drm_v3d_sem in; 181 182 if (copy_from_user(&in, handle++, sizeof(in))) { 183 ret = -EFAULT; 184 DRM_DEBUG("Failed to copy wait dep handle.\n"); 185 goto fail_deps; 186 } 187 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); 188 189 // TODO: Investigate why this was filtered out for the IOCTL. 190 if (ret && ret != -ENOENT) 191 goto fail_deps; 192 } 193 } 194 } else { 195 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); 196 197 // TODO: Investigate why this was filtered out for the IOCTL. 198 if (ret && ret != -ENOENT) 199 goto fail_deps; 200 } 201 202 kref_init(&job->refcount); 203 204 return 0; 205 206 fail_deps: 207 drm_sched_job_cleanup(&job->base); 208 return ret; 209 } 210 211 static void 212 v3d_push_job(struct v3d_job *job) 213 { 214 drm_sched_job_arm(&job->base); 215 216 job->done_fence = dma_fence_get(&job->base.s_fence->finished); 217 218 /* put by scheduler job completion */ 219 kref_get(&job->refcount); 220 221 drm_sched_entity_push_job(&job->base); 222 } 223 224 static void 225 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, 226 struct v3d_job *job, 227 struct ww_acquire_ctx *acquire_ctx, 228 u32 out_sync, 229 struct v3d_submit_ext *se, 230 struct dma_fence *done_fence) 231 { 232 struct drm_syncobj *sync_out; 233 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 234 int i; 235 236 for (i = 0; i < job->bo_count; i++) { 237 /* XXX: Use shared fences for read-only objects. */ 238 dma_resv_add_fence(job->bo[i]->resv, job->done_fence, 239 DMA_RESV_USAGE_WRITE); 240 } 241 242 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 243 244 /* Update the return sync object for the job */ 245 /* If it only supports a single signal semaphore*/ 246 if (!has_multisync) { 247 sync_out = drm_syncobj_find(file_priv, out_sync); 248 if (sync_out) { 249 drm_syncobj_replace_fence(sync_out, done_fence); 250 drm_syncobj_put(sync_out); 251 } 252 return; 253 } 254 255 /* If multiple semaphores extension is supported */ 256 if (se->out_sync_count) { 257 for (i = 0; i < se->out_sync_count; i++) { 258 drm_syncobj_replace_fence(se->out_syncs[i].syncobj, 259 done_fence); 260 drm_syncobj_put(se->out_syncs[i].syncobj); 261 } 262 kvfree(se->out_syncs); 263 } 264 } 265 266 static int 267 v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, 268 struct v3d_dev *v3d, 269 struct drm_v3d_submit_csd *args, 270 struct v3d_csd_job **job, 271 struct v3d_job **clean_job, 272 struct v3d_submit_ext *se, 273 struct ww_acquire_ctx *acquire_ctx) 274 { 275 int ret; 276 277 ret = v3d_job_allocate((void *)job, sizeof(**job)); 278 if (ret) 279 return ret; 280 281 ret = v3d_job_init(v3d, file_priv, &(*job)->base, 282 v3d_job_free, args->in_sync, se, V3D_CSD); 283 if (ret) { 284 v3d_job_deallocate((void *)job); 285 return ret; 286 } 287 288 ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); 289 if (ret) 290 return ret; 291 292 ret = v3d_job_init(v3d, file_priv, *clean_job, 293 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 294 if (ret) { 295 v3d_job_deallocate((void *)clean_job); 296 return ret; 297 } 298 299 (*job)->args = *args; 300 301 ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job, 302 args->bo_handles, args->bo_handle_count); 303 if (ret) 304 return ret; 305 306 return v3d_lock_bo_reservations(*clean_job, acquire_ctx); 307 } 308 309 static void 310 v3d_put_multisync_post_deps(struct v3d_submit_ext *se) 311 { 312 unsigned int i; 313 314 if (!(se && se->out_sync_count)) 315 return; 316 317 for (i = 0; i < se->out_sync_count; i++) 318 drm_syncobj_put(se->out_syncs[i].syncobj); 319 kvfree(se->out_syncs); 320 } 321 322 static int 323 v3d_get_multisync_post_deps(struct drm_file *file_priv, 324 struct v3d_submit_ext *se, 325 u32 count, u64 handles) 326 { 327 struct drm_v3d_sem __user *post_deps; 328 int i, ret; 329 330 if (!count) 331 return 0; 332 333 se->out_syncs = (struct v3d_submit_outsync *) 334 kvmalloc_array(count, 335 sizeof(struct v3d_submit_outsync), 336 GFP_KERNEL); 337 if (!se->out_syncs) 338 return -ENOMEM; 339 340 post_deps = u64_to_user_ptr(handles); 341 342 for (i = 0; i < count; i++) { 343 struct drm_v3d_sem out; 344 345 if (copy_from_user(&out, post_deps++, sizeof(out))) { 346 ret = -EFAULT; 347 DRM_DEBUG("Failed to copy post dep handles\n"); 348 goto fail; 349 } 350 351 se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, 352 out.handle); 353 if (!se->out_syncs[i].syncobj) { 354 ret = -EINVAL; 355 goto fail; 356 } 357 } 358 se->out_sync_count = count; 359 360 return 0; 361 362 fail: 363 for (i--; i >= 0; i--) 364 drm_syncobj_put(se->out_syncs[i].syncobj); 365 kvfree(se->out_syncs); 366 367 return ret; 368 } 369 370 /* Get data for multiple binary semaphores synchronization. Parse syncobj 371 * to be signaled when job completes (out_sync). 372 */ 373 static int 374 v3d_get_multisync_submit_deps(struct drm_file *file_priv, 375 struct drm_v3d_extension __user *ext, 376 struct v3d_submit_ext *se) 377 { 378 struct drm_v3d_multi_sync multisync; 379 int ret; 380 381 if (se->in_sync_count || se->out_sync_count) { 382 DRM_DEBUG("Two multisync extensions were added to the same job."); 383 return -EINVAL; 384 } 385 386 if (copy_from_user(&multisync, ext, sizeof(multisync))) 387 return -EFAULT; 388 389 if (multisync.pad) 390 return -EINVAL; 391 392 ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, 393 multisync.out_syncs); 394 if (ret) 395 return ret; 396 397 se->in_sync_count = multisync.in_sync_count; 398 se->in_syncs = multisync.in_syncs; 399 se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; 400 se->wait_stage = multisync.wait_stage; 401 402 return 0; 403 } 404 405 /* Get data for the indirect CSD job submission. */ 406 static int 407 v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv, 408 struct drm_v3d_extension __user *ext, 409 struct v3d_cpu_job *job) 410 { 411 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 412 struct v3d_dev *v3d = v3d_priv->v3d; 413 struct drm_v3d_indirect_csd indirect_csd; 414 struct v3d_indirect_csd_info *info = &job->indirect_csd; 415 416 if (!job) { 417 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 418 return -EINVAL; 419 } 420 421 if (job->job_type) { 422 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 423 return -EINVAL; 424 } 425 426 if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd))) 427 return -EFAULT; 428 429 if (!v3d_has_csd(v3d)) { 430 DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n"); 431 return -EINVAL; 432 } 433 434 job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD; 435 info->offset = indirect_csd.offset; 436 info->wg_size = indirect_csd.wg_size; 437 memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets, 438 sizeof(indirect_csd.wg_uniform_offsets)); 439 440 info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect); 441 442 return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit, 443 &info->job, &info->clean_job, 444 NULL, &info->acquire_ctx); 445 } 446 447 /* Get data for the query timestamp job submission. */ 448 static int 449 v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, 450 struct drm_v3d_extension __user *ext, 451 struct v3d_cpu_job *job) 452 { 453 u32 __user *offsets, *syncs; 454 struct drm_v3d_timestamp_query timestamp; 455 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 456 unsigned int i; 457 int err; 458 459 if (!job) { 460 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 461 return -EINVAL; 462 } 463 464 if (job->job_type) { 465 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 466 return -EINVAL; 467 } 468 469 if (copy_from_user(×tamp, ext, sizeof(timestamp))) 470 return -EFAULT; 471 472 if (timestamp.pad) 473 return -EINVAL; 474 475 job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY; 476 477 query_info->queries = kvmalloc_array(timestamp.count, 478 sizeof(struct v3d_timestamp_query), 479 GFP_KERNEL); 480 if (!query_info->queries) 481 return -ENOMEM; 482 483 offsets = u64_to_user_ptr(timestamp.offsets); 484 syncs = u64_to_user_ptr(timestamp.syncs); 485 486 for (i = 0; i < timestamp.count; i++) { 487 u32 offset, sync; 488 489 if (get_user(offset, offsets++)) { 490 err = -EFAULT; 491 goto error; 492 } 493 494 query_info->queries[i].offset = offset; 495 496 if (get_user(sync, syncs++)) { 497 err = -EFAULT; 498 goto error; 499 } 500 501 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 502 sync); 503 if (!query_info->queries[i].syncobj) { 504 err = -ENOENT; 505 goto error; 506 } 507 } 508 query_info->count = timestamp.count; 509 510 return 0; 511 512 error: 513 v3d_timestamp_query_info_free(&job->timestamp_query, i); 514 return err; 515 } 516 517 static int 518 v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, 519 struct drm_v3d_extension __user *ext, 520 struct v3d_cpu_job *job) 521 { 522 u32 __user *syncs; 523 struct drm_v3d_reset_timestamp_query reset; 524 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 525 unsigned int i; 526 int err; 527 528 if (!job) { 529 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 530 return -EINVAL; 531 } 532 533 if (job->job_type) { 534 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 535 return -EINVAL; 536 } 537 538 if (copy_from_user(&reset, ext, sizeof(reset))) 539 return -EFAULT; 540 541 job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY; 542 543 query_info->queries = kvmalloc_array(reset.count, 544 sizeof(struct v3d_timestamp_query), 545 GFP_KERNEL); 546 if (!query_info->queries) 547 return -ENOMEM; 548 549 syncs = u64_to_user_ptr(reset.syncs); 550 551 for (i = 0; i < reset.count; i++) { 552 u32 sync; 553 554 query_info->queries[i].offset = reset.offset + 8 * i; 555 556 if (get_user(sync, syncs++)) { 557 err = -EFAULT; 558 goto error; 559 } 560 561 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 562 sync); 563 if (!query_info->queries[i].syncobj) { 564 err = -ENOENT; 565 goto error; 566 } 567 } 568 query_info->count = reset.count; 569 570 return 0; 571 572 error: 573 v3d_timestamp_query_info_free(&job->timestamp_query, i); 574 return err; 575 } 576 577 /* Get data for the copy timestamp query results job submission. */ 578 static int 579 v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, 580 struct drm_v3d_extension __user *ext, 581 struct v3d_cpu_job *job) 582 { 583 u32 __user *offsets, *syncs; 584 struct drm_v3d_copy_timestamp_query copy; 585 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 586 unsigned int i; 587 int err; 588 589 if (!job) { 590 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 591 return -EINVAL; 592 } 593 594 if (job->job_type) { 595 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 596 return -EINVAL; 597 } 598 599 if (copy_from_user(©, ext, sizeof(copy))) 600 return -EFAULT; 601 602 if (copy.pad) 603 return -EINVAL; 604 605 job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY; 606 607 query_info->queries = kvmalloc_array(copy.count, 608 sizeof(struct v3d_timestamp_query), 609 GFP_KERNEL); 610 if (!query_info->queries) 611 return -ENOMEM; 612 613 offsets = u64_to_user_ptr(copy.offsets); 614 syncs = u64_to_user_ptr(copy.syncs); 615 616 for (i = 0; i < copy.count; i++) { 617 u32 offset, sync; 618 619 if (get_user(offset, offsets++)) { 620 err = -EFAULT; 621 goto error; 622 } 623 624 query_info->queries[i].offset = offset; 625 626 if (get_user(sync, syncs++)) { 627 err = -EFAULT; 628 goto error; 629 } 630 631 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 632 sync); 633 if (!query_info->queries[i].syncobj) { 634 err = -ENOENT; 635 goto error; 636 } 637 } 638 query_info->count = copy.count; 639 640 job->copy.do_64bit = copy.do_64bit; 641 job->copy.do_partial = copy.do_partial; 642 job->copy.availability_bit = copy.availability_bit; 643 job->copy.offset = copy.offset; 644 job->copy.stride = copy.stride; 645 646 return 0; 647 648 error: 649 v3d_timestamp_query_info_free(&job->timestamp_query, i); 650 return err; 651 } 652 653 static int 654 v3d_copy_query_info(struct v3d_performance_query_info *query_info, 655 unsigned int count, 656 unsigned int nperfmons, 657 u32 __user *syncs, 658 u64 __user *kperfmon_ids, 659 struct drm_file *file_priv) 660 { 661 unsigned int i, j; 662 int err; 663 664 for (i = 0; i < count; i++) { 665 struct v3d_performance_query *query = &query_info->queries[i]; 666 u32 __user *ids_pointer; 667 u32 sync, id; 668 u64 ids; 669 670 if (get_user(sync, syncs++)) { 671 err = -EFAULT; 672 goto error; 673 } 674 675 if (get_user(ids, kperfmon_ids++)) { 676 err = -EFAULT; 677 goto error; 678 } 679 680 query->kperfmon_ids = 681 kvmalloc_array(nperfmons, 682 sizeof(struct v3d_performance_query *), 683 GFP_KERNEL); 684 if (!query->kperfmon_ids) { 685 err = -ENOMEM; 686 goto error; 687 } 688 689 ids_pointer = u64_to_user_ptr(ids); 690 691 for (j = 0; j < nperfmons; j++) { 692 if (get_user(id, ids_pointer++)) { 693 kvfree(query->kperfmon_ids); 694 err = -EFAULT; 695 goto error; 696 } 697 698 query->kperfmon_ids[j] = id; 699 } 700 701 query->syncobj = drm_syncobj_find(file_priv, sync); 702 if (!query->syncobj) { 703 kvfree(query->kperfmon_ids); 704 err = -ENOENT; 705 goto error; 706 } 707 } 708 709 return 0; 710 711 error: 712 v3d_performance_query_info_free(query_info, i); 713 return err; 714 } 715 716 static int 717 v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, 718 struct drm_v3d_extension __user *ext, 719 struct v3d_cpu_job *job) 720 { 721 struct v3d_performance_query_info *query_info = &job->performance_query; 722 struct drm_v3d_reset_performance_query reset; 723 int err; 724 725 if (!job) { 726 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 727 return -EINVAL; 728 } 729 730 if (job->job_type) { 731 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 732 return -EINVAL; 733 } 734 735 if (copy_from_user(&reset, ext, sizeof(reset))) 736 return -EFAULT; 737 738 job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; 739 740 query_info->queries = 741 kvmalloc_array(reset.count, 742 sizeof(struct v3d_performance_query), 743 GFP_KERNEL); 744 if (!query_info->queries) 745 return -ENOMEM; 746 747 err = v3d_copy_query_info(query_info, 748 reset.count, 749 reset.nperfmons, 750 u64_to_user_ptr(reset.syncs), 751 u64_to_user_ptr(reset.kperfmon_ids), 752 file_priv); 753 if (err) 754 return err; 755 756 query_info->count = reset.count; 757 query_info->nperfmons = reset.nperfmons; 758 759 return 0; 760 } 761 762 static int 763 v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, 764 struct drm_v3d_extension __user *ext, 765 struct v3d_cpu_job *job) 766 { 767 struct v3d_performance_query_info *query_info = &job->performance_query; 768 struct drm_v3d_copy_performance_query copy; 769 int err; 770 771 if (!job) { 772 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 773 return -EINVAL; 774 } 775 776 if (job->job_type) { 777 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 778 return -EINVAL; 779 } 780 781 if (copy_from_user(©, ext, sizeof(copy))) 782 return -EFAULT; 783 784 if (copy.pad) 785 return -EINVAL; 786 787 job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; 788 789 query_info->queries = 790 kvmalloc_array(copy.count, 791 sizeof(struct v3d_performance_query), 792 GFP_KERNEL); 793 if (!query_info->queries) 794 return -ENOMEM; 795 796 err = v3d_copy_query_info(query_info, 797 copy.count, 798 copy.nperfmons, 799 u64_to_user_ptr(copy.syncs), 800 u64_to_user_ptr(copy.kperfmon_ids), 801 file_priv); 802 if (err) 803 return err; 804 805 query_info->count = copy.count; 806 query_info->nperfmons = copy.nperfmons; 807 query_info->ncounters = copy.ncounters; 808 809 job->copy.do_64bit = copy.do_64bit; 810 job->copy.do_partial = copy.do_partial; 811 job->copy.availability_bit = copy.availability_bit; 812 job->copy.offset = copy.offset; 813 job->copy.stride = copy.stride; 814 815 return 0; 816 } 817 818 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 819 * according to the extension id (name). 820 */ 821 static int 822 v3d_get_extensions(struct drm_file *file_priv, 823 u64 ext_handles, 824 struct v3d_submit_ext *se, 825 struct v3d_cpu_job *job) 826 { 827 struct drm_v3d_extension __user *user_ext; 828 int ret; 829 830 user_ext = u64_to_user_ptr(ext_handles); 831 while (user_ext) { 832 struct drm_v3d_extension ext; 833 834 if (copy_from_user(&ext, user_ext, sizeof(ext))) { 835 DRM_DEBUG("Failed to copy submit extension\n"); 836 return -EFAULT; 837 } 838 839 switch (ext.id) { 840 case DRM_V3D_EXT_ID_MULTI_SYNC: 841 ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se); 842 break; 843 case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD: 844 ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job); 845 break; 846 case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY: 847 ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job); 848 break; 849 case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY: 850 ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job); 851 break; 852 case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: 853 ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); 854 break; 855 case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 856 ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 857 break; 858 case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: 859 ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); 860 break; 861 default: 862 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); 863 return -EINVAL; 864 } 865 866 if (ret) 867 return ret; 868 869 user_ext = u64_to_user_ptr(ext.next); 870 } 871 872 return 0; 873 } 874 875 /** 876 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 877 * @dev: DRM device 878 * @data: ioctl argument 879 * @file_priv: DRM file for this fd 880 * 881 * This is the main entrypoint for userspace to submit a 3D frame to 882 * the GPU. Userspace provides the binner command list (if 883 * applicable), and the kernel sets up the render command list to draw 884 * to the framebuffer described in the ioctl, using the command lists 885 * that the 3D engine's binner will produce. 886 */ 887 int 888 v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 889 struct drm_file *file_priv) 890 { 891 struct v3d_dev *v3d = to_v3d_dev(dev); 892 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 893 struct drm_v3d_submit_cl *args = data; 894 struct v3d_submit_ext se = {0}; 895 struct v3d_bin_job *bin = NULL; 896 struct v3d_render_job *render = NULL; 897 struct v3d_job *clean_job = NULL; 898 struct v3d_job *last_job; 899 struct ww_acquire_ctx acquire_ctx; 900 int ret = 0; 901 902 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); 903 904 if (args->pad) 905 return -EINVAL; 906 907 if (args->flags && 908 args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | 909 DRM_V3D_SUBMIT_EXTENSION)) { 910 DRM_INFO("invalid flags: %d\n", args->flags); 911 return -EINVAL; 912 } 913 914 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 915 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 916 if (ret) { 917 DRM_DEBUG("Failed to get extensions.\n"); 918 return ret; 919 } 920 } 921 922 ret = v3d_job_allocate((void *)&render, sizeof(*render)); 923 if (ret) 924 return ret; 925 926 ret = v3d_job_init(v3d, file_priv, &render->base, 927 v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); 928 if (ret) { 929 v3d_job_deallocate((void *)&render); 930 goto fail; 931 } 932 933 render->start = args->rcl_start; 934 render->end = args->rcl_end; 935 INIT_LIST_HEAD(&render->unref_list); 936 937 if (args->bcl_start != args->bcl_end) { 938 ret = v3d_job_allocate((void *)&bin, sizeof(*bin)); 939 if (ret) 940 goto fail; 941 942 ret = v3d_job_init(v3d, file_priv, &bin->base, 943 v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); 944 if (ret) { 945 v3d_job_deallocate((void *)&bin); 946 goto fail; 947 } 948 949 bin->start = args->bcl_start; 950 bin->end = args->bcl_end; 951 bin->qma = args->qma; 952 bin->qms = args->qms; 953 bin->qts = args->qts; 954 bin->render = render; 955 } 956 957 if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { 958 ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job)); 959 if (ret) 960 goto fail; 961 962 ret = v3d_job_init(v3d, file_priv, clean_job, 963 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 964 if (ret) { 965 v3d_job_deallocate((void *)&clean_job); 966 goto fail; 967 } 968 969 last_job = clean_job; 970 } else { 971 last_job = &render->base; 972 } 973 974 ret = v3d_lookup_bos(dev, file_priv, last_job, 975 args->bo_handles, args->bo_handle_count); 976 if (ret) 977 goto fail; 978 979 ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); 980 if (ret) 981 goto fail; 982 983 if (args->perfmon_id) { 984 render->base.perfmon = v3d_perfmon_find(v3d_priv, 985 args->perfmon_id); 986 987 if (!render->base.perfmon) { 988 ret = -ENOENT; 989 goto fail_perfmon; 990 } 991 } 992 993 mutex_lock(&v3d->sched_lock); 994 if (bin) { 995 bin->base.perfmon = render->base.perfmon; 996 v3d_perfmon_get(bin->base.perfmon); 997 v3d_push_job(&bin->base); 998 999 ret = drm_sched_job_add_dependency(&render->base.base, 1000 dma_fence_get(bin->base.done_fence)); 1001 if (ret) 1002 goto fail_unreserve; 1003 } 1004 1005 v3d_push_job(&render->base); 1006 1007 if (clean_job) { 1008 struct dma_fence *render_fence = 1009 dma_fence_get(render->base.done_fence); 1010 ret = drm_sched_job_add_dependency(&clean_job->base, 1011 render_fence); 1012 if (ret) 1013 goto fail_unreserve; 1014 clean_job->perfmon = render->base.perfmon; 1015 v3d_perfmon_get(clean_job->perfmon); 1016 v3d_push_job(clean_job); 1017 } 1018 1019 mutex_unlock(&v3d->sched_lock); 1020 1021 v3d_attach_fences_and_unlock_reservation(file_priv, 1022 last_job, 1023 &acquire_ctx, 1024 args->out_sync, 1025 &se, 1026 last_job->done_fence); 1027 1028 v3d_job_put(&bin->base); 1029 v3d_job_put(&render->base); 1030 v3d_job_put(clean_job); 1031 1032 return 0; 1033 1034 fail_unreserve: 1035 mutex_unlock(&v3d->sched_lock); 1036 fail_perfmon: 1037 drm_gem_unlock_reservations(last_job->bo, 1038 last_job->bo_count, &acquire_ctx); 1039 fail: 1040 v3d_job_cleanup((void *)bin); 1041 v3d_job_cleanup((void *)render); 1042 v3d_job_cleanup(clean_job); 1043 v3d_put_multisync_post_deps(&se); 1044 1045 return ret; 1046 } 1047 1048 /** 1049 * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. 1050 * @dev: DRM device 1051 * @data: ioctl argument 1052 * @file_priv: DRM file for this fd 1053 * 1054 * Userspace provides the register setup for the TFU, which we don't 1055 * need to validate since the TFU is behind the MMU. 1056 */ 1057 int 1058 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, 1059 struct drm_file *file_priv) 1060 { 1061 struct v3d_dev *v3d = to_v3d_dev(dev); 1062 struct drm_v3d_submit_tfu *args = data; 1063 struct v3d_submit_ext se = {0}; 1064 struct v3d_tfu_job *job = NULL; 1065 struct ww_acquire_ctx acquire_ctx; 1066 int ret = 0; 1067 1068 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); 1069 1070 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1071 DRM_DEBUG("invalid flags: %d\n", args->flags); 1072 return -EINVAL; 1073 } 1074 1075 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1076 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1077 if (ret) { 1078 DRM_DEBUG("Failed to get extensions.\n"); 1079 return ret; 1080 } 1081 } 1082 1083 ret = v3d_job_allocate((void *)&job, sizeof(*job)); 1084 if (ret) 1085 return ret; 1086 1087 ret = v3d_job_init(v3d, file_priv, &job->base, 1088 v3d_job_free, args->in_sync, &se, V3D_TFU); 1089 if (ret) { 1090 v3d_job_deallocate((void *)&job); 1091 goto fail; 1092 } 1093 1094 job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), 1095 sizeof(*job->base.bo), GFP_KERNEL); 1096 if (!job->base.bo) { 1097 ret = -ENOMEM; 1098 goto fail; 1099 } 1100 1101 job->args = *args; 1102 1103 for (job->base.bo_count = 0; 1104 job->base.bo_count < ARRAY_SIZE(args->bo_handles); 1105 job->base.bo_count++) { 1106 struct drm_gem_object *bo; 1107 1108 if (!args->bo_handles[job->base.bo_count]) 1109 break; 1110 1111 bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); 1112 if (!bo) { 1113 DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 1114 job->base.bo_count, 1115 args->bo_handles[job->base.bo_count]); 1116 ret = -ENOENT; 1117 goto fail; 1118 } 1119 job->base.bo[job->base.bo_count] = bo; 1120 } 1121 1122 ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); 1123 if (ret) 1124 goto fail; 1125 1126 mutex_lock(&v3d->sched_lock); 1127 v3d_push_job(&job->base); 1128 mutex_unlock(&v3d->sched_lock); 1129 1130 v3d_attach_fences_and_unlock_reservation(file_priv, 1131 &job->base, &acquire_ctx, 1132 args->out_sync, 1133 &se, 1134 job->base.done_fence); 1135 1136 v3d_job_put(&job->base); 1137 1138 return 0; 1139 1140 fail: 1141 v3d_job_cleanup((void *)job); 1142 v3d_put_multisync_post_deps(&se); 1143 1144 return ret; 1145 } 1146 1147 /** 1148 * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D. 1149 * @dev: DRM device 1150 * @data: ioctl argument 1151 * @file_priv: DRM file for this fd 1152 * 1153 * Userspace provides the register setup for the CSD, which we don't 1154 * need to validate since the CSD is behind the MMU. 1155 */ 1156 int 1157 v3d_submit_csd_ioctl(struct drm_device *dev, void *data, 1158 struct drm_file *file_priv) 1159 { 1160 struct v3d_dev *v3d = to_v3d_dev(dev); 1161 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 1162 struct drm_v3d_submit_csd *args = data; 1163 struct v3d_submit_ext se = {0}; 1164 struct v3d_csd_job *job = NULL; 1165 struct v3d_job *clean_job = NULL; 1166 struct ww_acquire_ctx acquire_ctx; 1167 int ret; 1168 1169 trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); 1170 1171 if (args->pad) 1172 return -EINVAL; 1173 1174 if (!v3d_has_csd(v3d)) { 1175 DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); 1176 return -EINVAL; 1177 } 1178 1179 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1180 DRM_INFO("invalid flags: %d\n", args->flags); 1181 return -EINVAL; 1182 } 1183 1184 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1185 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1186 if (ret) { 1187 DRM_DEBUG("Failed to get extensions.\n"); 1188 return ret; 1189 } 1190 } 1191 1192 ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args, 1193 &job, &clean_job, &se, 1194 &acquire_ctx); 1195 if (ret) 1196 goto fail; 1197 1198 if (args->perfmon_id) { 1199 job->base.perfmon = v3d_perfmon_find(v3d_priv, 1200 args->perfmon_id); 1201 if (!job->base.perfmon) { 1202 ret = -ENOENT; 1203 goto fail_perfmon; 1204 } 1205 } 1206 1207 mutex_lock(&v3d->sched_lock); 1208 v3d_push_job(&job->base); 1209 1210 ret = drm_sched_job_add_dependency(&clean_job->base, 1211 dma_fence_get(job->base.done_fence)); 1212 if (ret) 1213 goto fail_unreserve; 1214 1215 v3d_push_job(clean_job); 1216 mutex_unlock(&v3d->sched_lock); 1217 1218 v3d_attach_fences_and_unlock_reservation(file_priv, 1219 clean_job, 1220 &acquire_ctx, 1221 args->out_sync, 1222 &se, 1223 clean_job->done_fence); 1224 1225 v3d_job_put(&job->base); 1226 v3d_job_put(clean_job); 1227 1228 return 0; 1229 1230 fail_unreserve: 1231 mutex_unlock(&v3d->sched_lock); 1232 fail_perfmon: 1233 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1234 &acquire_ctx); 1235 fail: 1236 v3d_job_cleanup((void *)job); 1237 v3d_job_cleanup(clean_job); 1238 v3d_put_multisync_post_deps(&se); 1239 1240 return ret; 1241 } 1242 1243 static const unsigned int cpu_job_bo_handle_count[] = { 1244 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1, 1245 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, 1246 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1247 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1248 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1249 [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, 1250 }; 1251 1252 /** 1253 * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D. 1254 * @dev: DRM device 1255 * @data: ioctl argument 1256 * @file_priv: DRM file for this fd 1257 * 1258 * Userspace specifies the CPU job type and data required to perform its 1259 * operations through the drm_v3d_extension struct. 1260 */ 1261 int 1262 v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, 1263 struct drm_file *file_priv) 1264 { 1265 struct v3d_dev *v3d = to_v3d_dev(dev); 1266 struct drm_v3d_submit_cpu *args = data; 1267 struct v3d_submit_ext se = {0}; 1268 struct v3d_submit_ext *out_se = NULL; 1269 struct v3d_cpu_job *cpu_job = NULL; 1270 struct v3d_csd_job *csd_job = NULL; 1271 struct v3d_job *clean_job = NULL; 1272 struct ww_acquire_ctx acquire_ctx; 1273 int ret; 1274 1275 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1276 DRM_INFO("Invalid flags: %d\n", args->flags); 1277 return -EINVAL; 1278 } 1279 1280 ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job)); 1281 if (ret) 1282 return ret; 1283 1284 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1285 ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job); 1286 if (ret) { 1287 DRM_DEBUG("Failed to get extensions.\n"); 1288 goto fail; 1289 } 1290 } 1291 1292 /* Every CPU job must have a CPU job user extension */ 1293 if (!cpu_job->job_type) { 1294 DRM_DEBUG("CPU job must have a CPU job user extension.\n"); 1295 ret = -EINVAL; 1296 goto fail; 1297 } 1298 1299 if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) { 1300 DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n"); 1301 ret = -EINVAL; 1302 goto fail; 1303 } 1304 1305 trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); 1306 1307 ret = v3d_job_init(v3d, file_priv, &cpu_job->base, 1308 v3d_job_free, 0, &se, V3D_CPU); 1309 if (ret) { 1310 v3d_job_deallocate((void *)&cpu_job); 1311 goto fail; 1312 } 1313 1314 clean_job = cpu_job->indirect_csd.clean_job; 1315 csd_job = cpu_job->indirect_csd.job; 1316 1317 if (args->bo_handle_count) { 1318 ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base, 1319 args->bo_handles, args->bo_handle_count); 1320 if (ret) 1321 goto fail; 1322 1323 ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx); 1324 if (ret) 1325 goto fail; 1326 } 1327 1328 mutex_lock(&v3d->sched_lock); 1329 v3d_push_job(&cpu_job->base); 1330 1331 switch (cpu_job->job_type) { 1332 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1333 ret = drm_sched_job_add_dependency(&csd_job->base.base, 1334 dma_fence_get(cpu_job->base.done_fence)); 1335 if (ret) 1336 goto fail_unreserve; 1337 1338 v3d_push_job(&csd_job->base); 1339 1340 ret = drm_sched_job_add_dependency(&clean_job->base, 1341 dma_fence_get(csd_job->base.done_fence)); 1342 if (ret) 1343 goto fail_unreserve; 1344 1345 v3d_push_job(clean_job); 1346 1347 break; 1348 default: 1349 break; 1350 } 1351 mutex_unlock(&v3d->sched_lock); 1352 1353 out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se; 1354 1355 v3d_attach_fences_and_unlock_reservation(file_priv, 1356 &cpu_job->base, 1357 &acquire_ctx, 0, 1358 out_se, cpu_job->base.done_fence); 1359 1360 switch (cpu_job->job_type) { 1361 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1362 v3d_attach_fences_and_unlock_reservation(file_priv, 1363 clean_job, 1364 &cpu_job->indirect_csd.acquire_ctx, 1365 0, &se, clean_job->done_fence); 1366 break; 1367 default: 1368 break; 1369 } 1370 1371 v3d_job_put(&cpu_job->base); 1372 v3d_job_put(&csd_job->base); 1373 v3d_job_put(clean_job); 1374 1375 return 0; 1376 1377 fail_unreserve: 1378 mutex_unlock(&v3d->sched_lock); 1379 1380 drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count, 1381 &acquire_ctx); 1382 1383 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1384 &cpu_job->indirect_csd.acquire_ctx); 1385 1386 fail: 1387 v3d_job_cleanup((void *)cpu_job); 1388 v3d_job_cleanup((void *)csd_job); 1389 v3d_job_cleanup(clean_job); 1390 v3d_put_multisync_post_deps(&se); 1391 kvfree(cpu_job->timestamp_query.queries); 1392 kvfree(cpu_job->performance_query.queries); 1393 1394 return ret; 1395 } 1396