1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2014-2018 Broadcom 4 * Copyright (C) 2023 Raspberry Pi 5 */ 6 7 #include <drm/drm_syncobj.h> 8 9 #include "v3d_drv.h" 10 #include "v3d_regs.h" 11 #include "v3d_trace.h" 12 13 /* Takes the reservation lock on all the BOs being referenced, so that 14 * at queue submit time we can update the reservations. 15 * 16 * We don't lock the RCL the tile alloc/state BOs, or overflow memory 17 * (all of which are on exec->unref_list). They're entirely private 18 * to v3d, so we don't attach dma-buf fences to them. 19 */ 20 static int 21 v3d_lock_bo_reservations(struct v3d_job *job, 22 struct ww_acquire_ctx *acquire_ctx) 23 { 24 int i, ret; 25 26 ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); 27 if (ret) 28 return ret; 29 30 for (i = 0; i < job->bo_count; i++) { 31 ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); 32 if (ret) 33 goto fail; 34 35 ret = drm_sched_job_add_implicit_dependencies(&job->base, 36 job->bo[i], true); 37 if (ret) 38 goto fail; 39 } 40 41 return 0; 42 43 fail: 44 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 45 return ret; 46 } 47 48 /** 49 * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects 50 * referenced by the job. 51 * @dev: DRM device 52 * @file_priv: DRM file for this fd 53 * @job: V3D job being set up 54 * @bo_handles: GEM handles 55 * @bo_count: Number of GEM handles passed in 56 * 57 * The command validator needs to reference BOs by their index within 58 * the submitted job's BO list. This does the validation of the job's 59 * BO list and reference counting for the lifetime of the job. 60 * 61 * Note that this function doesn't need to unreference the BOs on 62 * failure, because that will happen at v3d_exec_cleanup() time. 63 */ 64 static int 65 v3d_lookup_bos(struct drm_device *dev, 66 struct drm_file *file_priv, 67 struct v3d_job *job, 68 u64 bo_handles, 69 u32 bo_count) 70 { 71 job->bo_count = bo_count; 72 73 if (!job->bo_count) { 74 /* See comment on bo_index for why we have to check 75 * this. 76 */ 77 DRM_DEBUG("Rendering requires BOs\n"); 78 return -EINVAL; 79 } 80 81 return drm_gem_objects_lookup(file_priv, 82 (void __user *)(uintptr_t)bo_handles, 83 job->bo_count, &job->bo); 84 } 85 86 static void 87 v3d_job_free(struct kref *ref) 88 { 89 struct v3d_job *job = container_of(ref, struct v3d_job, refcount); 90 int i; 91 92 if (job->bo) { 93 for (i = 0; i < job->bo_count; i++) 94 drm_gem_object_put(job->bo[i]); 95 kvfree(job->bo); 96 } 97 98 dma_fence_put(job->irq_fence); 99 dma_fence_put(job->done_fence); 100 101 if (job->perfmon) 102 v3d_perfmon_put(job->perfmon); 103 104 kfree(job); 105 } 106 107 static void 108 v3d_render_job_free(struct kref *ref) 109 { 110 struct v3d_render_job *job = container_of(ref, struct v3d_render_job, 111 base.refcount); 112 struct v3d_bo *bo, *save; 113 114 list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { 115 drm_gem_object_put(&bo->base.base); 116 } 117 118 v3d_job_free(ref); 119 } 120 121 void v3d_job_cleanup(struct v3d_job *job) 122 { 123 if (!job) 124 return; 125 126 drm_sched_job_cleanup(&job->base); 127 v3d_job_put(job); 128 } 129 130 void v3d_job_put(struct v3d_job *job) 131 { 132 if (!job) 133 return; 134 135 kref_put(&job->refcount, job->free); 136 } 137 138 static int 139 v3d_job_allocate(void **container, size_t size) 140 { 141 *container = kcalloc(1, size, GFP_KERNEL); 142 if (!*container) { 143 DRM_ERROR("Cannot allocate memory for V3D job.\n"); 144 return -ENOMEM; 145 } 146 147 return 0; 148 } 149 150 static void 151 v3d_job_deallocate(void **container) 152 { 153 kfree(*container); 154 *container = NULL; 155 } 156 157 static int 158 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 159 struct v3d_job *job, void (*free)(struct kref *ref), 160 u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) 161 { 162 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 163 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 164 int ret, i; 165 166 job->v3d = v3d; 167 job->free = free; 168 job->file = file_priv; 169 170 ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 171 1, v3d_priv); 172 if (ret) 173 return ret; 174 175 if (has_multisync) { 176 if (se->in_sync_count && se->wait_stage == queue) { 177 struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); 178 179 for (i = 0; i < se->in_sync_count; i++) { 180 struct drm_v3d_sem in; 181 182 if (copy_from_user(&in, handle++, sizeof(in))) { 183 ret = -EFAULT; 184 DRM_DEBUG("Failed to copy wait dep handle.\n"); 185 goto fail_deps; 186 } 187 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); 188 189 // TODO: Investigate why this was filtered out for the IOCTL. 190 if (ret && ret != -ENOENT) 191 goto fail_deps; 192 } 193 } 194 } else { 195 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); 196 197 // TODO: Investigate why this was filtered out for the IOCTL. 198 if (ret && ret != -ENOENT) 199 goto fail_deps; 200 } 201 202 kref_init(&job->refcount); 203 204 return 0; 205 206 fail_deps: 207 drm_sched_job_cleanup(&job->base); 208 return ret; 209 } 210 211 static void 212 v3d_push_job(struct v3d_job *job) 213 { 214 drm_sched_job_arm(&job->base); 215 216 job->done_fence = dma_fence_get(&job->base.s_fence->finished); 217 218 /* put by scheduler job completion */ 219 kref_get(&job->refcount); 220 221 drm_sched_entity_push_job(&job->base); 222 } 223 224 static void 225 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, 226 struct v3d_job *job, 227 struct ww_acquire_ctx *acquire_ctx, 228 u32 out_sync, 229 struct v3d_submit_ext *se, 230 struct dma_fence *done_fence) 231 { 232 struct drm_syncobj *sync_out; 233 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 234 int i; 235 236 for (i = 0; i < job->bo_count; i++) { 237 /* XXX: Use shared fences for read-only objects. */ 238 dma_resv_add_fence(job->bo[i]->resv, job->done_fence, 239 DMA_RESV_USAGE_WRITE); 240 } 241 242 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 243 244 /* Update the return sync object for the job */ 245 /* If it only supports a single signal semaphore*/ 246 if (!has_multisync) { 247 sync_out = drm_syncobj_find(file_priv, out_sync); 248 if (sync_out) { 249 drm_syncobj_replace_fence(sync_out, done_fence); 250 drm_syncobj_put(sync_out); 251 } 252 return; 253 } 254 255 /* If multiple semaphores extension is supported */ 256 if (se->out_sync_count) { 257 for (i = 0; i < se->out_sync_count; i++) { 258 drm_syncobj_replace_fence(se->out_syncs[i].syncobj, 259 done_fence); 260 drm_syncobj_put(se->out_syncs[i].syncobj); 261 } 262 kvfree(se->out_syncs); 263 } 264 } 265 266 static int 267 v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, 268 struct v3d_dev *v3d, 269 struct drm_v3d_submit_csd *args, 270 struct v3d_csd_job **job, 271 struct v3d_job **clean_job, 272 struct v3d_submit_ext *se, 273 struct ww_acquire_ctx *acquire_ctx) 274 { 275 int ret; 276 277 ret = v3d_job_allocate((void *)job, sizeof(**job)); 278 if (ret) 279 return ret; 280 281 ret = v3d_job_init(v3d, file_priv, &(*job)->base, 282 v3d_job_free, args->in_sync, se, V3D_CSD); 283 if (ret) { 284 v3d_job_deallocate((void *)job); 285 return ret; 286 } 287 288 ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); 289 if (ret) 290 return ret; 291 292 ret = v3d_job_init(v3d, file_priv, *clean_job, 293 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 294 if (ret) { 295 v3d_job_deallocate((void *)clean_job); 296 return ret; 297 } 298 299 (*job)->args = *args; 300 301 ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job, 302 args->bo_handles, args->bo_handle_count); 303 if (ret) 304 return ret; 305 306 return v3d_lock_bo_reservations(*clean_job, acquire_ctx); 307 } 308 309 static void 310 v3d_put_multisync_post_deps(struct v3d_submit_ext *se) 311 { 312 unsigned int i; 313 314 if (!(se && se->out_sync_count)) 315 return; 316 317 for (i = 0; i < se->out_sync_count; i++) 318 drm_syncobj_put(se->out_syncs[i].syncobj); 319 kvfree(se->out_syncs); 320 } 321 322 static int 323 v3d_get_multisync_post_deps(struct drm_file *file_priv, 324 struct v3d_submit_ext *se, 325 u32 count, u64 handles) 326 { 327 struct drm_v3d_sem __user *post_deps; 328 int i, ret; 329 330 if (!count) 331 return 0; 332 333 se->out_syncs = (struct v3d_submit_outsync *) 334 kvmalloc_array(count, 335 sizeof(struct v3d_submit_outsync), 336 GFP_KERNEL); 337 if (!se->out_syncs) 338 return -ENOMEM; 339 340 post_deps = u64_to_user_ptr(handles); 341 342 for (i = 0; i < count; i++) { 343 struct drm_v3d_sem out; 344 345 if (copy_from_user(&out, post_deps++, sizeof(out))) { 346 ret = -EFAULT; 347 DRM_DEBUG("Failed to copy post dep handles\n"); 348 goto fail; 349 } 350 351 se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, 352 out.handle); 353 if (!se->out_syncs[i].syncobj) { 354 ret = -EINVAL; 355 goto fail; 356 } 357 } 358 se->out_sync_count = count; 359 360 return 0; 361 362 fail: 363 for (i--; i >= 0; i--) 364 drm_syncobj_put(se->out_syncs[i].syncobj); 365 kvfree(se->out_syncs); 366 367 return ret; 368 } 369 370 /* Get data for multiple binary semaphores synchronization. Parse syncobj 371 * to be signaled when job completes (out_sync). 372 */ 373 static int 374 v3d_get_multisync_submit_deps(struct drm_file *file_priv, 375 struct drm_v3d_extension __user *ext, 376 struct v3d_submit_ext *se) 377 { 378 struct drm_v3d_multi_sync multisync; 379 int ret; 380 381 if (se->in_sync_count || se->out_sync_count) { 382 DRM_DEBUG("Two multisync extensions were added to the same job."); 383 return -EINVAL; 384 } 385 386 if (copy_from_user(&multisync, ext, sizeof(multisync))) 387 return -EFAULT; 388 389 if (multisync.pad) 390 return -EINVAL; 391 392 ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, 393 multisync.out_syncs); 394 if (ret) 395 return ret; 396 397 se->in_sync_count = multisync.in_sync_count; 398 se->in_syncs = multisync.in_syncs; 399 se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; 400 se->wait_stage = multisync.wait_stage; 401 402 return 0; 403 } 404 405 /* Get data for the indirect CSD job submission. */ 406 static int 407 v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv, 408 struct drm_v3d_extension __user *ext, 409 struct v3d_cpu_job *job) 410 { 411 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 412 struct v3d_dev *v3d = v3d_priv->v3d; 413 struct drm_v3d_indirect_csd indirect_csd; 414 struct v3d_indirect_csd_info *info = &job->indirect_csd; 415 416 if (!job) { 417 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 418 return -EINVAL; 419 } 420 421 if (job->job_type) { 422 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 423 return -EINVAL; 424 } 425 426 if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd))) 427 return -EFAULT; 428 429 if (!v3d_has_csd(v3d)) { 430 DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n"); 431 return -EINVAL; 432 } 433 434 job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD; 435 info->offset = indirect_csd.offset; 436 info->wg_size = indirect_csd.wg_size; 437 memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets, 438 sizeof(indirect_csd.wg_uniform_offsets)); 439 440 info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect); 441 442 return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit, 443 &info->job, &info->clean_job, 444 NULL, &info->acquire_ctx); 445 } 446 447 /* Get data for the query timestamp job submission. */ 448 static int 449 v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, 450 struct drm_v3d_extension __user *ext, 451 struct v3d_cpu_job *job) 452 { 453 u32 __user *offsets, *syncs; 454 struct drm_v3d_timestamp_query timestamp; 455 456 if (!job) { 457 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 458 return -EINVAL; 459 } 460 461 if (job->job_type) { 462 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 463 return -EINVAL; 464 } 465 466 if (copy_from_user(×tamp, ext, sizeof(timestamp))) 467 return -EFAULT; 468 469 if (timestamp.pad) 470 return -EINVAL; 471 472 job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY; 473 474 job->timestamp_query.queries = kvmalloc_array(timestamp.count, 475 sizeof(struct v3d_timestamp_query), 476 GFP_KERNEL); 477 if (!job->timestamp_query.queries) 478 return -ENOMEM; 479 480 offsets = u64_to_user_ptr(timestamp.offsets); 481 syncs = u64_to_user_ptr(timestamp.syncs); 482 483 for (int i = 0; i < timestamp.count; i++) { 484 u32 offset, sync; 485 486 if (copy_from_user(&offset, offsets++, sizeof(offset))) { 487 kvfree(job->timestamp_query.queries); 488 return -EFAULT; 489 } 490 491 job->timestamp_query.queries[i].offset = offset; 492 493 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 494 kvfree(job->timestamp_query.queries); 495 return -EFAULT; 496 } 497 498 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 499 } 500 job->timestamp_query.count = timestamp.count; 501 502 return 0; 503 } 504 505 static int 506 v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, 507 struct drm_v3d_extension __user *ext, 508 struct v3d_cpu_job *job) 509 { 510 u32 __user *syncs; 511 struct drm_v3d_reset_timestamp_query reset; 512 513 if (!job) { 514 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 515 return -EINVAL; 516 } 517 518 if (job->job_type) { 519 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 520 return -EINVAL; 521 } 522 523 if (copy_from_user(&reset, ext, sizeof(reset))) 524 return -EFAULT; 525 526 job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY; 527 528 job->timestamp_query.queries = kvmalloc_array(reset.count, 529 sizeof(struct v3d_timestamp_query), 530 GFP_KERNEL); 531 if (!job->timestamp_query.queries) 532 return -ENOMEM; 533 534 syncs = u64_to_user_ptr(reset.syncs); 535 536 for (int i = 0; i < reset.count; i++) { 537 u32 sync; 538 539 job->timestamp_query.queries[i].offset = reset.offset + 8 * i; 540 541 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 542 kvfree(job->timestamp_query.queries); 543 return -EFAULT; 544 } 545 546 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 547 } 548 job->timestamp_query.count = reset.count; 549 550 return 0; 551 } 552 553 /* Get data for the copy timestamp query results job submission. */ 554 static int 555 v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, 556 struct drm_v3d_extension __user *ext, 557 struct v3d_cpu_job *job) 558 { 559 u32 __user *offsets, *syncs; 560 struct drm_v3d_copy_timestamp_query copy; 561 int i; 562 563 if (!job) { 564 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 565 return -EINVAL; 566 } 567 568 if (job->job_type) { 569 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 570 return -EINVAL; 571 } 572 573 if (copy_from_user(©, ext, sizeof(copy))) 574 return -EFAULT; 575 576 if (copy.pad) 577 return -EINVAL; 578 579 job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY; 580 581 job->timestamp_query.queries = kvmalloc_array(copy.count, 582 sizeof(struct v3d_timestamp_query), 583 GFP_KERNEL); 584 if (!job->timestamp_query.queries) 585 return -ENOMEM; 586 587 offsets = u64_to_user_ptr(copy.offsets); 588 syncs = u64_to_user_ptr(copy.syncs); 589 590 for (i = 0; i < copy.count; i++) { 591 u32 offset, sync; 592 593 if (copy_from_user(&offset, offsets++, sizeof(offset))) { 594 kvfree(job->timestamp_query.queries); 595 return -EFAULT; 596 } 597 598 job->timestamp_query.queries[i].offset = offset; 599 600 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 601 kvfree(job->timestamp_query.queries); 602 return -EFAULT; 603 } 604 605 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 606 } 607 job->timestamp_query.count = copy.count; 608 609 job->copy.do_64bit = copy.do_64bit; 610 job->copy.do_partial = copy.do_partial; 611 job->copy.availability_bit = copy.availability_bit; 612 job->copy.offset = copy.offset; 613 job->copy.stride = copy.stride; 614 615 return 0; 616 } 617 618 static int 619 v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, 620 struct drm_v3d_extension __user *ext, 621 struct v3d_cpu_job *job) 622 { 623 u32 __user *syncs; 624 u64 __user *kperfmon_ids; 625 struct drm_v3d_reset_performance_query reset; 626 627 if (!job) { 628 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 629 return -EINVAL; 630 } 631 632 if (job->job_type) { 633 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 634 return -EINVAL; 635 } 636 637 if (copy_from_user(&reset, ext, sizeof(reset))) 638 return -EFAULT; 639 640 job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; 641 642 job->performance_query.queries = kvmalloc_array(reset.count, 643 sizeof(struct v3d_performance_query), 644 GFP_KERNEL); 645 if (!job->performance_query.queries) 646 return -ENOMEM; 647 648 syncs = u64_to_user_ptr(reset.syncs); 649 kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); 650 651 for (int i = 0; i < reset.count; i++) { 652 u32 sync; 653 u64 ids; 654 u32 __user *ids_pointer; 655 u32 id; 656 657 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 658 kvfree(job->performance_query.queries); 659 return -EFAULT; 660 } 661 662 job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 663 664 if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { 665 kvfree(job->performance_query.queries); 666 return -EFAULT; 667 } 668 669 ids_pointer = u64_to_user_ptr(ids); 670 671 for (int j = 0; j < reset.nperfmons; j++) { 672 if (copy_from_user(&id, ids_pointer++, sizeof(id))) { 673 kvfree(job->performance_query.queries); 674 return -EFAULT; 675 } 676 677 job->performance_query.queries[i].kperfmon_ids[j] = id; 678 } 679 } 680 job->performance_query.count = reset.count; 681 job->performance_query.nperfmons = reset.nperfmons; 682 683 return 0; 684 } 685 686 static int 687 v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, 688 struct drm_v3d_extension __user *ext, 689 struct v3d_cpu_job *job) 690 { 691 u32 __user *syncs; 692 u64 __user *kperfmon_ids; 693 struct drm_v3d_copy_performance_query copy; 694 695 if (!job) { 696 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 697 return -EINVAL; 698 } 699 700 if (job->job_type) { 701 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 702 return -EINVAL; 703 } 704 705 if (copy_from_user(©, ext, sizeof(copy))) 706 return -EFAULT; 707 708 if (copy.pad) 709 return -EINVAL; 710 711 job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; 712 713 job->performance_query.queries = kvmalloc_array(copy.count, 714 sizeof(struct v3d_performance_query), 715 GFP_KERNEL); 716 if (!job->performance_query.queries) 717 return -ENOMEM; 718 719 syncs = u64_to_user_ptr(copy.syncs); 720 kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); 721 722 for (int i = 0; i < copy.count; i++) { 723 u32 sync; 724 u64 ids; 725 u32 __user *ids_pointer; 726 u32 id; 727 728 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 729 kvfree(job->performance_query.queries); 730 return -EFAULT; 731 } 732 733 job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 734 735 if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { 736 kvfree(job->performance_query.queries); 737 return -EFAULT; 738 } 739 740 ids_pointer = u64_to_user_ptr(ids); 741 742 for (int j = 0; j < copy.nperfmons; j++) { 743 if (copy_from_user(&id, ids_pointer++, sizeof(id))) { 744 kvfree(job->performance_query.queries); 745 return -EFAULT; 746 } 747 748 job->performance_query.queries[i].kperfmon_ids[j] = id; 749 } 750 } 751 job->performance_query.count = copy.count; 752 job->performance_query.nperfmons = copy.nperfmons; 753 job->performance_query.ncounters = copy.ncounters; 754 755 job->copy.do_64bit = copy.do_64bit; 756 job->copy.do_partial = copy.do_partial; 757 job->copy.availability_bit = copy.availability_bit; 758 job->copy.offset = copy.offset; 759 job->copy.stride = copy.stride; 760 761 return 0; 762 } 763 764 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 765 * according to the extension id (name). 766 */ 767 static int 768 v3d_get_extensions(struct drm_file *file_priv, 769 u64 ext_handles, 770 struct v3d_submit_ext *se, 771 struct v3d_cpu_job *job) 772 { 773 struct drm_v3d_extension __user *user_ext; 774 int ret; 775 776 user_ext = u64_to_user_ptr(ext_handles); 777 while (user_ext) { 778 struct drm_v3d_extension ext; 779 780 if (copy_from_user(&ext, user_ext, sizeof(ext))) { 781 DRM_DEBUG("Failed to copy submit extension\n"); 782 return -EFAULT; 783 } 784 785 switch (ext.id) { 786 case DRM_V3D_EXT_ID_MULTI_SYNC: 787 ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se); 788 break; 789 case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD: 790 ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job); 791 break; 792 case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY: 793 ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job); 794 break; 795 case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY: 796 ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job); 797 break; 798 case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: 799 ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); 800 break; 801 case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 802 ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 803 break; 804 case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: 805 ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); 806 break; 807 default: 808 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); 809 return -EINVAL; 810 } 811 812 if (ret) 813 return ret; 814 815 user_ext = u64_to_user_ptr(ext.next); 816 } 817 818 return 0; 819 } 820 821 /** 822 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 823 * @dev: DRM device 824 * @data: ioctl argument 825 * @file_priv: DRM file for this fd 826 * 827 * This is the main entrypoint for userspace to submit a 3D frame to 828 * the GPU. Userspace provides the binner command list (if 829 * applicable), and the kernel sets up the render command list to draw 830 * to the framebuffer described in the ioctl, using the command lists 831 * that the 3D engine's binner will produce. 832 */ 833 int 834 v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 835 struct drm_file *file_priv) 836 { 837 struct v3d_dev *v3d = to_v3d_dev(dev); 838 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 839 struct drm_v3d_submit_cl *args = data; 840 struct v3d_submit_ext se = {0}; 841 struct v3d_bin_job *bin = NULL; 842 struct v3d_render_job *render = NULL; 843 struct v3d_job *clean_job = NULL; 844 struct v3d_job *last_job; 845 struct ww_acquire_ctx acquire_ctx; 846 int ret = 0; 847 848 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); 849 850 if (args->pad) 851 return -EINVAL; 852 853 if (args->flags && 854 args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | 855 DRM_V3D_SUBMIT_EXTENSION)) { 856 DRM_INFO("invalid flags: %d\n", args->flags); 857 return -EINVAL; 858 } 859 860 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 861 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 862 if (ret) { 863 DRM_DEBUG("Failed to get extensions.\n"); 864 return ret; 865 } 866 } 867 868 ret = v3d_job_allocate((void *)&render, sizeof(*render)); 869 if (ret) 870 return ret; 871 872 ret = v3d_job_init(v3d, file_priv, &render->base, 873 v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); 874 if (ret) { 875 v3d_job_deallocate((void *)&render); 876 goto fail; 877 } 878 879 render->start = args->rcl_start; 880 render->end = args->rcl_end; 881 INIT_LIST_HEAD(&render->unref_list); 882 883 if (args->bcl_start != args->bcl_end) { 884 ret = v3d_job_allocate((void *)&bin, sizeof(*bin)); 885 if (ret) 886 goto fail; 887 888 ret = v3d_job_init(v3d, file_priv, &bin->base, 889 v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); 890 if (ret) { 891 v3d_job_deallocate((void *)&bin); 892 goto fail; 893 } 894 895 bin->start = args->bcl_start; 896 bin->end = args->bcl_end; 897 bin->qma = args->qma; 898 bin->qms = args->qms; 899 bin->qts = args->qts; 900 bin->render = render; 901 } 902 903 if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { 904 ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job)); 905 if (ret) 906 goto fail; 907 908 ret = v3d_job_init(v3d, file_priv, clean_job, 909 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 910 if (ret) { 911 v3d_job_deallocate((void *)&clean_job); 912 goto fail; 913 } 914 915 last_job = clean_job; 916 } else { 917 last_job = &render->base; 918 } 919 920 ret = v3d_lookup_bos(dev, file_priv, last_job, 921 args->bo_handles, args->bo_handle_count); 922 if (ret) 923 goto fail; 924 925 ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); 926 if (ret) 927 goto fail; 928 929 if (args->perfmon_id) { 930 render->base.perfmon = v3d_perfmon_find(v3d_priv, 931 args->perfmon_id); 932 933 if (!render->base.perfmon) { 934 ret = -ENOENT; 935 goto fail_perfmon; 936 } 937 } 938 939 mutex_lock(&v3d->sched_lock); 940 if (bin) { 941 bin->base.perfmon = render->base.perfmon; 942 v3d_perfmon_get(bin->base.perfmon); 943 v3d_push_job(&bin->base); 944 945 ret = drm_sched_job_add_dependency(&render->base.base, 946 dma_fence_get(bin->base.done_fence)); 947 if (ret) 948 goto fail_unreserve; 949 } 950 951 v3d_push_job(&render->base); 952 953 if (clean_job) { 954 struct dma_fence *render_fence = 955 dma_fence_get(render->base.done_fence); 956 ret = drm_sched_job_add_dependency(&clean_job->base, 957 render_fence); 958 if (ret) 959 goto fail_unreserve; 960 clean_job->perfmon = render->base.perfmon; 961 v3d_perfmon_get(clean_job->perfmon); 962 v3d_push_job(clean_job); 963 } 964 965 mutex_unlock(&v3d->sched_lock); 966 967 v3d_attach_fences_and_unlock_reservation(file_priv, 968 last_job, 969 &acquire_ctx, 970 args->out_sync, 971 &se, 972 last_job->done_fence); 973 974 v3d_job_put(&bin->base); 975 v3d_job_put(&render->base); 976 v3d_job_put(clean_job); 977 978 return 0; 979 980 fail_unreserve: 981 mutex_unlock(&v3d->sched_lock); 982 fail_perfmon: 983 drm_gem_unlock_reservations(last_job->bo, 984 last_job->bo_count, &acquire_ctx); 985 fail: 986 v3d_job_cleanup((void *)bin); 987 v3d_job_cleanup((void *)render); 988 v3d_job_cleanup(clean_job); 989 v3d_put_multisync_post_deps(&se); 990 991 return ret; 992 } 993 994 /** 995 * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. 996 * @dev: DRM device 997 * @data: ioctl argument 998 * @file_priv: DRM file for this fd 999 * 1000 * Userspace provides the register setup for the TFU, which we don't 1001 * need to validate since the TFU is behind the MMU. 1002 */ 1003 int 1004 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, 1005 struct drm_file *file_priv) 1006 { 1007 struct v3d_dev *v3d = to_v3d_dev(dev); 1008 struct drm_v3d_submit_tfu *args = data; 1009 struct v3d_submit_ext se = {0}; 1010 struct v3d_tfu_job *job = NULL; 1011 struct ww_acquire_ctx acquire_ctx; 1012 int ret = 0; 1013 1014 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); 1015 1016 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1017 DRM_DEBUG("invalid flags: %d\n", args->flags); 1018 return -EINVAL; 1019 } 1020 1021 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1022 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1023 if (ret) { 1024 DRM_DEBUG("Failed to get extensions.\n"); 1025 return ret; 1026 } 1027 } 1028 1029 ret = v3d_job_allocate((void *)&job, sizeof(*job)); 1030 if (ret) 1031 return ret; 1032 1033 ret = v3d_job_init(v3d, file_priv, &job->base, 1034 v3d_job_free, args->in_sync, &se, V3D_TFU); 1035 if (ret) { 1036 v3d_job_deallocate((void *)&job); 1037 goto fail; 1038 } 1039 1040 job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), 1041 sizeof(*job->base.bo), GFP_KERNEL); 1042 if (!job->base.bo) { 1043 ret = -ENOMEM; 1044 goto fail; 1045 } 1046 1047 job->args = *args; 1048 1049 for (job->base.bo_count = 0; 1050 job->base.bo_count < ARRAY_SIZE(args->bo_handles); 1051 job->base.bo_count++) { 1052 struct drm_gem_object *bo; 1053 1054 if (!args->bo_handles[job->base.bo_count]) 1055 break; 1056 1057 bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); 1058 if (!bo) { 1059 DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 1060 job->base.bo_count, 1061 args->bo_handles[job->base.bo_count]); 1062 ret = -ENOENT; 1063 goto fail; 1064 } 1065 job->base.bo[job->base.bo_count] = bo; 1066 } 1067 1068 ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); 1069 if (ret) 1070 goto fail; 1071 1072 mutex_lock(&v3d->sched_lock); 1073 v3d_push_job(&job->base); 1074 mutex_unlock(&v3d->sched_lock); 1075 1076 v3d_attach_fences_and_unlock_reservation(file_priv, 1077 &job->base, &acquire_ctx, 1078 args->out_sync, 1079 &se, 1080 job->base.done_fence); 1081 1082 v3d_job_put(&job->base); 1083 1084 return 0; 1085 1086 fail: 1087 v3d_job_cleanup((void *)job); 1088 v3d_put_multisync_post_deps(&se); 1089 1090 return ret; 1091 } 1092 1093 /** 1094 * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D. 1095 * @dev: DRM device 1096 * @data: ioctl argument 1097 * @file_priv: DRM file for this fd 1098 * 1099 * Userspace provides the register setup for the CSD, which we don't 1100 * need to validate since the CSD is behind the MMU. 1101 */ 1102 int 1103 v3d_submit_csd_ioctl(struct drm_device *dev, void *data, 1104 struct drm_file *file_priv) 1105 { 1106 struct v3d_dev *v3d = to_v3d_dev(dev); 1107 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 1108 struct drm_v3d_submit_csd *args = data; 1109 struct v3d_submit_ext se = {0}; 1110 struct v3d_csd_job *job = NULL; 1111 struct v3d_job *clean_job = NULL; 1112 struct ww_acquire_ctx acquire_ctx; 1113 int ret; 1114 1115 trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); 1116 1117 if (args->pad) 1118 return -EINVAL; 1119 1120 if (!v3d_has_csd(v3d)) { 1121 DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); 1122 return -EINVAL; 1123 } 1124 1125 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1126 DRM_INFO("invalid flags: %d\n", args->flags); 1127 return -EINVAL; 1128 } 1129 1130 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1131 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1132 if (ret) { 1133 DRM_DEBUG("Failed to get extensions.\n"); 1134 return ret; 1135 } 1136 } 1137 1138 ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args, 1139 &job, &clean_job, &se, 1140 &acquire_ctx); 1141 if (ret) 1142 goto fail; 1143 1144 if (args->perfmon_id) { 1145 job->base.perfmon = v3d_perfmon_find(v3d_priv, 1146 args->perfmon_id); 1147 if (!job->base.perfmon) { 1148 ret = -ENOENT; 1149 goto fail_perfmon; 1150 } 1151 } 1152 1153 mutex_lock(&v3d->sched_lock); 1154 v3d_push_job(&job->base); 1155 1156 ret = drm_sched_job_add_dependency(&clean_job->base, 1157 dma_fence_get(job->base.done_fence)); 1158 if (ret) 1159 goto fail_unreserve; 1160 1161 v3d_push_job(clean_job); 1162 mutex_unlock(&v3d->sched_lock); 1163 1164 v3d_attach_fences_and_unlock_reservation(file_priv, 1165 clean_job, 1166 &acquire_ctx, 1167 args->out_sync, 1168 &se, 1169 clean_job->done_fence); 1170 1171 v3d_job_put(&job->base); 1172 v3d_job_put(clean_job); 1173 1174 return 0; 1175 1176 fail_unreserve: 1177 mutex_unlock(&v3d->sched_lock); 1178 fail_perfmon: 1179 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1180 &acquire_ctx); 1181 fail: 1182 v3d_job_cleanup((void *)job); 1183 v3d_job_cleanup(clean_job); 1184 v3d_put_multisync_post_deps(&se); 1185 1186 return ret; 1187 } 1188 1189 static const unsigned int cpu_job_bo_handle_count[] = { 1190 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1, 1191 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, 1192 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1193 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1194 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1195 [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, 1196 }; 1197 1198 /** 1199 * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D. 1200 * @dev: DRM device 1201 * @data: ioctl argument 1202 * @file_priv: DRM file for this fd 1203 * 1204 * Userspace specifies the CPU job type and data required to perform its 1205 * operations through the drm_v3d_extension struct. 1206 */ 1207 int 1208 v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, 1209 struct drm_file *file_priv) 1210 { 1211 struct v3d_dev *v3d = to_v3d_dev(dev); 1212 struct drm_v3d_submit_cpu *args = data; 1213 struct v3d_submit_ext se = {0}; 1214 struct v3d_submit_ext *out_se = NULL; 1215 struct v3d_cpu_job *cpu_job = NULL; 1216 struct v3d_csd_job *csd_job = NULL; 1217 struct v3d_job *clean_job = NULL; 1218 struct ww_acquire_ctx acquire_ctx; 1219 int ret; 1220 1221 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1222 DRM_INFO("Invalid flags: %d\n", args->flags); 1223 return -EINVAL; 1224 } 1225 1226 ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job)); 1227 if (ret) 1228 return ret; 1229 1230 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1231 ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job); 1232 if (ret) { 1233 DRM_DEBUG("Failed to get extensions.\n"); 1234 goto fail; 1235 } 1236 } 1237 1238 /* Every CPU job must have a CPU job user extension */ 1239 if (!cpu_job->job_type) { 1240 DRM_DEBUG("CPU job must have a CPU job user extension.\n"); 1241 ret = -EINVAL; 1242 goto fail; 1243 } 1244 1245 if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) { 1246 DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n"); 1247 ret = -EINVAL; 1248 goto fail; 1249 } 1250 1251 trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); 1252 1253 ret = v3d_job_init(v3d, file_priv, &cpu_job->base, 1254 v3d_job_free, 0, &se, V3D_CPU); 1255 if (ret) { 1256 v3d_job_deallocate((void *)&cpu_job); 1257 goto fail; 1258 } 1259 1260 clean_job = cpu_job->indirect_csd.clean_job; 1261 csd_job = cpu_job->indirect_csd.job; 1262 1263 if (args->bo_handle_count) { 1264 ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base, 1265 args->bo_handles, args->bo_handle_count); 1266 if (ret) 1267 goto fail; 1268 1269 ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx); 1270 if (ret) 1271 goto fail; 1272 } 1273 1274 mutex_lock(&v3d->sched_lock); 1275 v3d_push_job(&cpu_job->base); 1276 1277 switch (cpu_job->job_type) { 1278 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1279 ret = drm_sched_job_add_dependency(&csd_job->base.base, 1280 dma_fence_get(cpu_job->base.done_fence)); 1281 if (ret) 1282 goto fail_unreserve; 1283 1284 v3d_push_job(&csd_job->base); 1285 1286 ret = drm_sched_job_add_dependency(&clean_job->base, 1287 dma_fence_get(csd_job->base.done_fence)); 1288 if (ret) 1289 goto fail_unreserve; 1290 1291 v3d_push_job(clean_job); 1292 1293 break; 1294 default: 1295 break; 1296 } 1297 mutex_unlock(&v3d->sched_lock); 1298 1299 out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se; 1300 1301 v3d_attach_fences_and_unlock_reservation(file_priv, 1302 &cpu_job->base, 1303 &acquire_ctx, 0, 1304 out_se, cpu_job->base.done_fence); 1305 1306 switch (cpu_job->job_type) { 1307 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1308 v3d_attach_fences_and_unlock_reservation(file_priv, 1309 clean_job, 1310 &cpu_job->indirect_csd.acquire_ctx, 1311 0, &se, clean_job->done_fence); 1312 break; 1313 default: 1314 break; 1315 } 1316 1317 v3d_job_put(&cpu_job->base); 1318 v3d_job_put(&csd_job->base); 1319 v3d_job_put(clean_job); 1320 1321 return 0; 1322 1323 fail_unreserve: 1324 mutex_unlock(&v3d->sched_lock); 1325 1326 drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count, 1327 &acquire_ctx); 1328 1329 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1330 &cpu_job->indirect_csd.acquire_ctx); 1331 1332 fail: 1333 v3d_job_cleanup((void *)cpu_job); 1334 v3d_job_cleanup((void *)csd_job); 1335 v3d_job_cleanup(clean_job); 1336 v3d_put_multisync_post_deps(&se); 1337 kvfree(cpu_job->timestamp_query.queries); 1338 kvfree(cpu_job->performance_query.queries); 1339 1340 return ret; 1341 } 1342