1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2014-2018 Broadcom 4 * Copyright (C) 2023 Raspberry Pi 5 */ 6 7 #include <drm/drm_syncobj.h> 8 9 #include "v3d_drv.h" 10 #include "v3d_regs.h" 11 #include "v3d_trace.h" 12 13 /* Takes the reservation lock on all the BOs being referenced, so that 14 * at queue submit time we can update the reservations. 15 * 16 * We don't lock the RCL the tile alloc/state BOs, or overflow memory 17 * (all of which are on exec->unref_list). They're entirely private 18 * to v3d, so we don't attach dma-buf fences to them. 19 */ 20 static int 21 v3d_lock_bo_reservations(struct v3d_job *job, 22 struct ww_acquire_ctx *acquire_ctx) 23 { 24 int i, ret; 25 26 ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); 27 if (ret) 28 return ret; 29 30 for (i = 0; i < job->bo_count; i++) { 31 ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); 32 if (ret) 33 goto fail; 34 35 ret = drm_sched_job_add_implicit_dependencies(&job->base, 36 job->bo[i], true); 37 if (ret) 38 goto fail; 39 } 40 41 return 0; 42 43 fail: 44 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 45 return ret; 46 } 47 48 /** 49 * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects 50 * referenced by the job. 51 * @dev: DRM device 52 * @file_priv: DRM file for this fd 53 * @job: V3D job being set up 54 * @bo_handles: GEM handles 55 * @bo_count: Number of GEM handles passed in 56 * 57 * The command validator needs to reference BOs by their index within 58 * the submitted job's BO list. This does the validation of the job's 59 * BO list and reference counting for the lifetime of the job. 60 * 61 * Note that this function doesn't need to unreference the BOs on 62 * failure, because that will happen at v3d_exec_cleanup() time. 63 */ 64 static int 65 v3d_lookup_bos(struct drm_device *dev, 66 struct drm_file *file_priv, 67 struct v3d_job *job, 68 u64 bo_handles, 69 u32 bo_count) 70 { 71 job->bo_count = bo_count; 72 73 if (!job->bo_count) { 74 /* See comment on bo_index for why we have to check 75 * this. 76 */ 77 DRM_DEBUG("Rendering requires BOs\n"); 78 return -EINVAL; 79 } 80 81 return drm_gem_objects_lookup(file_priv, 82 (void __user *)(uintptr_t)bo_handles, 83 job->bo_count, &job->bo); 84 } 85 86 static void 87 v3d_job_free(struct kref *ref) 88 { 89 struct v3d_job *job = container_of(ref, struct v3d_job, refcount); 90 int i; 91 92 if (job->bo) { 93 for (i = 0; i < job->bo_count; i++) 94 drm_gem_object_put(job->bo[i]); 95 kvfree(job->bo); 96 } 97 98 dma_fence_put(job->irq_fence); 99 dma_fence_put(job->done_fence); 100 101 if (job->perfmon) 102 v3d_perfmon_put(job->perfmon); 103 104 kfree(job); 105 } 106 107 static void 108 v3d_render_job_free(struct kref *ref) 109 { 110 struct v3d_render_job *job = container_of(ref, struct v3d_render_job, 111 base.refcount); 112 struct v3d_bo *bo, *save; 113 114 list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { 115 drm_gem_object_put(&bo->base.base); 116 } 117 118 v3d_job_free(ref); 119 } 120 121 void v3d_job_cleanup(struct v3d_job *job) 122 { 123 if (!job) 124 return; 125 126 drm_sched_job_cleanup(&job->base); 127 v3d_job_put(job); 128 } 129 130 void v3d_job_put(struct v3d_job *job) 131 { 132 if (!job) 133 return; 134 135 kref_put(&job->refcount, job->free); 136 } 137 138 static int 139 v3d_job_allocate(void **container, size_t size) 140 { 141 *container = kcalloc(1, size, GFP_KERNEL); 142 if (!*container) { 143 DRM_ERROR("Cannot allocate memory for V3D job.\n"); 144 return -ENOMEM; 145 } 146 147 return 0; 148 } 149 150 static int 151 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 152 struct v3d_job *job, void (*free)(struct kref *ref), 153 u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) 154 { 155 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 156 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 157 int ret, i; 158 159 job->v3d = v3d; 160 job->free = free; 161 job->file = file_priv; 162 163 ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 164 1, v3d_priv); 165 if (ret) 166 return ret; 167 168 if (has_multisync) { 169 if (se->in_sync_count && se->wait_stage == queue) { 170 struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); 171 172 for (i = 0; i < se->in_sync_count; i++) { 173 struct drm_v3d_sem in; 174 175 if (copy_from_user(&in, handle++, sizeof(in))) { 176 ret = -EFAULT; 177 DRM_DEBUG("Failed to copy wait dep handle.\n"); 178 goto fail_deps; 179 } 180 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); 181 182 // TODO: Investigate why this was filtered out for the IOCTL. 183 if (ret && ret != -ENOENT) 184 goto fail_deps; 185 } 186 } 187 } else { 188 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); 189 190 // TODO: Investigate why this was filtered out for the IOCTL. 191 if (ret && ret != -ENOENT) 192 goto fail_deps; 193 } 194 195 kref_init(&job->refcount); 196 197 return 0; 198 199 fail_deps: 200 drm_sched_job_cleanup(&job->base); 201 return ret; 202 } 203 204 static void 205 v3d_push_job(struct v3d_job *job) 206 { 207 drm_sched_job_arm(&job->base); 208 209 job->done_fence = dma_fence_get(&job->base.s_fence->finished); 210 211 /* put by scheduler job completion */ 212 kref_get(&job->refcount); 213 214 drm_sched_entity_push_job(&job->base); 215 } 216 217 static void 218 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, 219 struct v3d_job *job, 220 struct ww_acquire_ctx *acquire_ctx, 221 u32 out_sync, 222 struct v3d_submit_ext *se, 223 struct dma_fence *done_fence) 224 { 225 struct drm_syncobj *sync_out; 226 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 227 int i; 228 229 for (i = 0; i < job->bo_count; i++) { 230 /* XXX: Use shared fences for read-only objects. */ 231 dma_resv_add_fence(job->bo[i]->resv, job->done_fence, 232 DMA_RESV_USAGE_WRITE); 233 } 234 235 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 236 237 /* Update the return sync object for the job */ 238 /* If it only supports a single signal semaphore*/ 239 if (!has_multisync) { 240 sync_out = drm_syncobj_find(file_priv, out_sync); 241 if (sync_out) { 242 drm_syncobj_replace_fence(sync_out, done_fence); 243 drm_syncobj_put(sync_out); 244 } 245 return; 246 } 247 248 /* If multiple semaphores extension is supported */ 249 if (se->out_sync_count) { 250 for (i = 0; i < se->out_sync_count; i++) { 251 drm_syncobj_replace_fence(se->out_syncs[i].syncobj, 252 done_fence); 253 drm_syncobj_put(se->out_syncs[i].syncobj); 254 } 255 kvfree(se->out_syncs); 256 } 257 } 258 259 static int 260 v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, 261 struct v3d_dev *v3d, 262 struct drm_v3d_submit_csd *args, 263 struct v3d_csd_job **job, 264 struct v3d_job **clean_job, 265 struct v3d_submit_ext *se, 266 struct ww_acquire_ctx *acquire_ctx) 267 { 268 int ret; 269 270 ret = v3d_job_allocate((void *)job, sizeof(**job)); 271 if (ret) 272 return ret; 273 274 ret = v3d_job_init(v3d, file_priv, &(*job)->base, 275 v3d_job_free, args->in_sync, se, V3D_CSD); 276 if (ret) 277 return ret; 278 279 ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); 280 if (ret) 281 return ret; 282 283 ret = v3d_job_init(v3d, file_priv, *clean_job, 284 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 285 if (ret) 286 return ret; 287 288 (*job)->args = *args; 289 290 ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job, 291 args->bo_handles, args->bo_handle_count); 292 if (ret) 293 return ret; 294 295 return v3d_lock_bo_reservations(*clean_job, acquire_ctx); 296 } 297 298 static void 299 v3d_put_multisync_post_deps(struct v3d_submit_ext *se) 300 { 301 unsigned int i; 302 303 if (!(se && se->out_sync_count)) 304 return; 305 306 for (i = 0; i < se->out_sync_count; i++) 307 drm_syncobj_put(se->out_syncs[i].syncobj); 308 kvfree(se->out_syncs); 309 } 310 311 static int 312 v3d_get_multisync_post_deps(struct drm_file *file_priv, 313 struct v3d_submit_ext *se, 314 u32 count, u64 handles) 315 { 316 struct drm_v3d_sem __user *post_deps; 317 int i, ret; 318 319 if (!count) 320 return 0; 321 322 se->out_syncs = (struct v3d_submit_outsync *) 323 kvmalloc_array(count, 324 sizeof(struct v3d_submit_outsync), 325 GFP_KERNEL); 326 if (!se->out_syncs) 327 return -ENOMEM; 328 329 post_deps = u64_to_user_ptr(handles); 330 331 for (i = 0; i < count; i++) { 332 struct drm_v3d_sem out; 333 334 if (copy_from_user(&out, post_deps++, sizeof(out))) { 335 ret = -EFAULT; 336 DRM_DEBUG("Failed to copy post dep handles\n"); 337 goto fail; 338 } 339 340 se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, 341 out.handle); 342 if (!se->out_syncs[i].syncobj) { 343 ret = -EINVAL; 344 goto fail; 345 } 346 } 347 se->out_sync_count = count; 348 349 return 0; 350 351 fail: 352 for (i--; i >= 0; i--) 353 drm_syncobj_put(se->out_syncs[i].syncobj); 354 kvfree(se->out_syncs); 355 356 return ret; 357 } 358 359 /* Get data for multiple binary semaphores synchronization. Parse syncobj 360 * to be signaled when job completes (out_sync). 361 */ 362 static int 363 v3d_get_multisync_submit_deps(struct drm_file *file_priv, 364 struct drm_v3d_extension __user *ext, 365 struct v3d_submit_ext *se) 366 { 367 struct drm_v3d_multi_sync multisync; 368 int ret; 369 370 if (se->in_sync_count || se->out_sync_count) { 371 DRM_DEBUG("Two multisync extensions were added to the same job."); 372 return -EINVAL; 373 } 374 375 if (copy_from_user(&multisync, ext, sizeof(multisync))) 376 return -EFAULT; 377 378 if (multisync.pad) 379 return -EINVAL; 380 381 ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, 382 multisync.out_syncs); 383 if (ret) 384 return ret; 385 386 se->in_sync_count = multisync.in_sync_count; 387 se->in_syncs = multisync.in_syncs; 388 se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; 389 se->wait_stage = multisync.wait_stage; 390 391 return 0; 392 } 393 394 /* Get data for the indirect CSD job submission. */ 395 static int 396 v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv, 397 struct drm_v3d_extension __user *ext, 398 struct v3d_cpu_job *job) 399 { 400 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 401 struct v3d_dev *v3d = v3d_priv->v3d; 402 struct drm_v3d_indirect_csd indirect_csd; 403 struct v3d_indirect_csd_info *info = &job->indirect_csd; 404 405 if (!job) { 406 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 407 return -EINVAL; 408 } 409 410 if (job->job_type) { 411 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 412 return -EINVAL; 413 } 414 415 if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd))) 416 return -EFAULT; 417 418 if (!v3d_has_csd(v3d)) { 419 DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n"); 420 return -EINVAL; 421 } 422 423 job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD; 424 info->offset = indirect_csd.offset; 425 info->wg_size = indirect_csd.wg_size; 426 memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets, 427 sizeof(indirect_csd.wg_uniform_offsets)); 428 429 info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect); 430 431 return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit, 432 &info->job, &info->clean_job, 433 NULL, &info->acquire_ctx); 434 } 435 436 /* Get data for the query timestamp job submission. */ 437 static int 438 v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, 439 struct drm_v3d_extension __user *ext, 440 struct v3d_cpu_job *job) 441 { 442 u32 __user *offsets, *syncs; 443 struct drm_v3d_timestamp_query timestamp; 444 445 if (!job) { 446 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 447 return -EINVAL; 448 } 449 450 if (job->job_type) { 451 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 452 return -EINVAL; 453 } 454 455 if (copy_from_user(×tamp, ext, sizeof(timestamp))) 456 return -EFAULT; 457 458 if (timestamp.pad) 459 return -EINVAL; 460 461 job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY; 462 463 job->timestamp_query.queries = kvmalloc_array(timestamp.count, 464 sizeof(struct v3d_timestamp_query), 465 GFP_KERNEL); 466 if (!job->timestamp_query.queries) 467 return -ENOMEM; 468 469 offsets = u64_to_user_ptr(timestamp.offsets); 470 syncs = u64_to_user_ptr(timestamp.syncs); 471 472 for (int i = 0; i < timestamp.count; i++) { 473 u32 offset, sync; 474 475 if (copy_from_user(&offset, offsets++, sizeof(offset))) { 476 kvfree(job->timestamp_query.queries); 477 return -EFAULT; 478 } 479 480 job->timestamp_query.queries[i].offset = offset; 481 482 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 483 kvfree(job->timestamp_query.queries); 484 return -EFAULT; 485 } 486 487 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 488 } 489 job->timestamp_query.count = timestamp.count; 490 491 return 0; 492 } 493 494 static int 495 v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, 496 struct drm_v3d_extension __user *ext, 497 struct v3d_cpu_job *job) 498 { 499 u32 __user *syncs; 500 struct drm_v3d_reset_timestamp_query reset; 501 502 if (!job) { 503 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 504 return -EINVAL; 505 } 506 507 if (job->job_type) { 508 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 509 return -EINVAL; 510 } 511 512 if (copy_from_user(&reset, ext, sizeof(reset))) 513 return -EFAULT; 514 515 job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY; 516 517 job->timestamp_query.queries = kvmalloc_array(reset.count, 518 sizeof(struct v3d_timestamp_query), 519 GFP_KERNEL); 520 if (!job->timestamp_query.queries) 521 return -ENOMEM; 522 523 syncs = u64_to_user_ptr(reset.syncs); 524 525 for (int i = 0; i < reset.count; i++) { 526 u32 sync; 527 528 job->timestamp_query.queries[i].offset = reset.offset + 8 * i; 529 530 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 531 kvfree(job->timestamp_query.queries); 532 return -EFAULT; 533 } 534 535 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 536 } 537 job->timestamp_query.count = reset.count; 538 539 return 0; 540 } 541 542 /* Get data for the copy timestamp query results job submission. */ 543 static int 544 v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, 545 struct drm_v3d_extension __user *ext, 546 struct v3d_cpu_job *job) 547 { 548 u32 __user *offsets, *syncs; 549 struct drm_v3d_copy_timestamp_query copy; 550 int i; 551 552 if (!job) { 553 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 554 return -EINVAL; 555 } 556 557 if (job->job_type) { 558 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 559 return -EINVAL; 560 } 561 562 if (copy_from_user(©, ext, sizeof(copy))) 563 return -EFAULT; 564 565 if (copy.pad) 566 return -EINVAL; 567 568 job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY; 569 570 job->timestamp_query.queries = kvmalloc_array(copy.count, 571 sizeof(struct v3d_timestamp_query), 572 GFP_KERNEL); 573 if (!job->timestamp_query.queries) 574 return -ENOMEM; 575 576 offsets = u64_to_user_ptr(copy.offsets); 577 syncs = u64_to_user_ptr(copy.syncs); 578 579 for (i = 0; i < copy.count; i++) { 580 u32 offset, sync; 581 582 if (copy_from_user(&offset, offsets++, sizeof(offset))) { 583 kvfree(job->timestamp_query.queries); 584 return -EFAULT; 585 } 586 587 job->timestamp_query.queries[i].offset = offset; 588 589 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 590 kvfree(job->timestamp_query.queries); 591 return -EFAULT; 592 } 593 594 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 595 } 596 job->timestamp_query.count = copy.count; 597 598 job->copy.do_64bit = copy.do_64bit; 599 job->copy.do_partial = copy.do_partial; 600 job->copy.availability_bit = copy.availability_bit; 601 job->copy.offset = copy.offset; 602 job->copy.stride = copy.stride; 603 604 return 0; 605 } 606 607 static int 608 v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, 609 struct drm_v3d_extension __user *ext, 610 struct v3d_cpu_job *job) 611 { 612 u32 __user *syncs; 613 u64 __user *kperfmon_ids; 614 struct drm_v3d_reset_performance_query reset; 615 616 if (!job) { 617 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 618 return -EINVAL; 619 } 620 621 if (job->job_type) { 622 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 623 return -EINVAL; 624 } 625 626 if (copy_from_user(&reset, ext, sizeof(reset))) 627 return -EFAULT; 628 629 job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; 630 631 job->performance_query.queries = kvmalloc_array(reset.count, 632 sizeof(struct v3d_performance_query), 633 GFP_KERNEL); 634 if (!job->performance_query.queries) 635 return -ENOMEM; 636 637 syncs = u64_to_user_ptr(reset.syncs); 638 kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); 639 640 for (int i = 0; i < reset.count; i++) { 641 u32 sync; 642 u64 ids; 643 u32 __user *ids_pointer; 644 u32 id; 645 646 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 647 kvfree(job->performance_query.queries); 648 return -EFAULT; 649 } 650 651 job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 652 653 if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { 654 kvfree(job->performance_query.queries); 655 return -EFAULT; 656 } 657 658 ids_pointer = u64_to_user_ptr(ids); 659 660 for (int j = 0; j < reset.nperfmons; j++) { 661 if (copy_from_user(&id, ids_pointer++, sizeof(id))) { 662 kvfree(job->performance_query.queries); 663 return -EFAULT; 664 } 665 666 job->performance_query.queries[i].kperfmon_ids[j] = id; 667 } 668 } 669 job->performance_query.count = reset.count; 670 job->performance_query.nperfmons = reset.nperfmons; 671 672 return 0; 673 } 674 675 static int 676 v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, 677 struct drm_v3d_extension __user *ext, 678 struct v3d_cpu_job *job) 679 { 680 u32 __user *syncs; 681 u64 __user *kperfmon_ids; 682 struct drm_v3d_copy_performance_query copy; 683 684 if (!job) { 685 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 686 return -EINVAL; 687 } 688 689 if (job->job_type) { 690 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 691 return -EINVAL; 692 } 693 694 if (copy_from_user(©, ext, sizeof(copy))) 695 return -EFAULT; 696 697 if (copy.pad) 698 return -EINVAL; 699 700 job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; 701 702 job->performance_query.queries = kvmalloc_array(copy.count, 703 sizeof(struct v3d_performance_query), 704 GFP_KERNEL); 705 if (!job->performance_query.queries) 706 return -ENOMEM; 707 708 syncs = u64_to_user_ptr(copy.syncs); 709 kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); 710 711 for (int i = 0; i < copy.count; i++) { 712 u32 sync; 713 u64 ids; 714 u32 __user *ids_pointer; 715 u32 id; 716 717 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 718 kvfree(job->performance_query.queries); 719 return -EFAULT; 720 } 721 722 job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 723 724 if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { 725 kvfree(job->performance_query.queries); 726 return -EFAULT; 727 } 728 729 ids_pointer = u64_to_user_ptr(ids); 730 731 for (int j = 0; j < copy.nperfmons; j++) { 732 if (copy_from_user(&id, ids_pointer++, sizeof(id))) { 733 kvfree(job->performance_query.queries); 734 return -EFAULT; 735 } 736 737 job->performance_query.queries[i].kperfmon_ids[j] = id; 738 } 739 } 740 job->performance_query.count = copy.count; 741 job->performance_query.nperfmons = copy.nperfmons; 742 job->performance_query.ncounters = copy.ncounters; 743 744 job->copy.do_64bit = copy.do_64bit; 745 job->copy.do_partial = copy.do_partial; 746 job->copy.availability_bit = copy.availability_bit; 747 job->copy.offset = copy.offset; 748 job->copy.stride = copy.stride; 749 750 return 0; 751 } 752 753 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 754 * according to the extension id (name). 755 */ 756 static int 757 v3d_get_extensions(struct drm_file *file_priv, 758 u64 ext_handles, 759 struct v3d_submit_ext *se, 760 struct v3d_cpu_job *job) 761 { 762 struct drm_v3d_extension __user *user_ext; 763 int ret; 764 765 user_ext = u64_to_user_ptr(ext_handles); 766 while (user_ext) { 767 struct drm_v3d_extension ext; 768 769 if (copy_from_user(&ext, user_ext, sizeof(ext))) { 770 DRM_DEBUG("Failed to copy submit extension\n"); 771 return -EFAULT; 772 } 773 774 switch (ext.id) { 775 case DRM_V3D_EXT_ID_MULTI_SYNC: 776 ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se); 777 break; 778 case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD: 779 ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job); 780 break; 781 case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY: 782 ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job); 783 break; 784 case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY: 785 ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job); 786 break; 787 case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: 788 ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); 789 break; 790 case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 791 ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 792 break; 793 case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: 794 ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); 795 break; 796 default: 797 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); 798 return -EINVAL; 799 } 800 801 if (ret) 802 return ret; 803 804 user_ext = u64_to_user_ptr(ext.next); 805 } 806 807 return 0; 808 } 809 810 /** 811 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 812 * @dev: DRM device 813 * @data: ioctl argument 814 * @file_priv: DRM file for this fd 815 * 816 * This is the main entrypoint for userspace to submit a 3D frame to 817 * the GPU. Userspace provides the binner command list (if 818 * applicable), and the kernel sets up the render command list to draw 819 * to the framebuffer described in the ioctl, using the command lists 820 * that the 3D engine's binner will produce. 821 */ 822 int 823 v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 824 struct drm_file *file_priv) 825 { 826 struct v3d_dev *v3d = to_v3d_dev(dev); 827 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 828 struct drm_v3d_submit_cl *args = data; 829 struct v3d_submit_ext se = {0}; 830 struct v3d_bin_job *bin = NULL; 831 struct v3d_render_job *render = NULL; 832 struct v3d_job *clean_job = NULL; 833 struct v3d_job *last_job; 834 struct ww_acquire_ctx acquire_ctx; 835 int ret = 0; 836 837 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); 838 839 if (args->pad) 840 return -EINVAL; 841 842 if (args->flags && 843 args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | 844 DRM_V3D_SUBMIT_EXTENSION)) { 845 DRM_INFO("invalid flags: %d\n", args->flags); 846 return -EINVAL; 847 } 848 849 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 850 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 851 if (ret) { 852 DRM_DEBUG("Failed to get extensions.\n"); 853 return ret; 854 } 855 } 856 857 ret = v3d_job_allocate((void *)&render, sizeof(*render)); 858 if (ret) 859 return ret; 860 861 ret = v3d_job_init(v3d, file_priv, &render->base, 862 v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); 863 if (ret) 864 goto fail; 865 866 render->start = args->rcl_start; 867 render->end = args->rcl_end; 868 INIT_LIST_HEAD(&render->unref_list); 869 870 if (args->bcl_start != args->bcl_end) { 871 ret = v3d_job_allocate((void *)&bin, sizeof(*bin)); 872 if (ret) 873 goto fail; 874 875 ret = v3d_job_init(v3d, file_priv, &bin->base, 876 v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); 877 if (ret) 878 goto fail; 879 880 bin->start = args->bcl_start; 881 bin->end = args->bcl_end; 882 bin->qma = args->qma; 883 bin->qms = args->qms; 884 bin->qts = args->qts; 885 bin->render = render; 886 } 887 888 if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { 889 ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job)); 890 if (ret) 891 goto fail; 892 893 ret = v3d_job_init(v3d, file_priv, clean_job, 894 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 895 if (ret) 896 goto fail; 897 898 last_job = clean_job; 899 } else { 900 last_job = &render->base; 901 } 902 903 ret = v3d_lookup_bos(dev, file_priv, last_job, 904 args->bo_handles, args->bo_handle_count); 905 if (ret) 906 goto fail; 907 908 ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); 909 if (ret) 910 goto fail; 911 912 if (args->perfmon_id) { 913 render->base.perfmon = v3d_perfmon_find(v3d_priv, 914 args->perfmon_id); 915 916 if (!render->base.perfmon) { 917 ret = -ENOENT; 918 goto fail_perfmon; 919 } 920 } 921 922 mutex_lock(&v3d->sched_lock); 923 if (bin) { 924 bin->base.perfmon = render->base.perfmon; 925 v3d_perfmon_get(bin->base.perfmon); 926 v3d_push_job(&bin->base); 927 928 ret = drm_sched_job_add_dependency(&render->base.base, 929 dma_fence_get(bin->base.done_fence)); 930 if (ret) 931 goto fail_unreserve; 932 } 933 934 v3d_push_job(&render->base); 935 936 if (clean_job) { 937 struct dma_fence *render_fence = 938 dma_fence_get(render->base.done_fence); 939 ret = drm_sched_job_add_dependency(&clean_job->base, 940 render_fence); 941 if (ret) 942 goto fail_unreserve; 943 clean_job->perfmon = render->base.perfmon; 944 v3d_perfmon_get(clean_job->perfmon); 945 v3d_push_job(clean_job); 946 } 947 948 mutex_unlock(&v3d->sched_lock); 949 950 v3d_attach_fences_and_unlock_reservation(file_priv, 951 last_job, 952 &acquire_ctx, 953 args->out_sync, 954 &se, 955 last_job->done_fence); 956 957 v3d_job_put(&bin->base); 958 v3d_job_put(&render->base); 959 v3d_job_put(clean_job); 960 961 return 0; 962 963 fail_unreserve: 964 mutex_unlock(&v3d->sched_lock); 965 fail_perfmon: 966 drm_gem_unlock_reservations(last_job->bo, 967 last_job->bo_count, &acquire_ctx); 968 fail: 969 v3d_job_cleanup((void *)bin); 970 v3d_job_cleanup((void *)render); 971 v3d_job_cleanup(clean_job); 972 v3d_put_multisync_post_deps(&se); 973 974 return ret; 975 } 976 977 /** 978 * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. 979 * @dev: DRM device 980 * @data: ioctl argument 981 * @file_priv: DRM file for this fd 982 * 983 * Userspace provides the register setup for the TFU, which we don't 984 * need to validate since the TFU is behind the MMU. 985 */ 986 int 987 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, 988 struct drm_file *file_priv) 989 { 990 struct v3d_dev *v3d = to_v3d_dev(dev); 991 struct drm_v3d_submit_tfu *args = data; 992 struct v3d_submit_ext se = {0}; 993 struct v3d_tfu_job *job = NULL; 994 struct ww_acquire_ctx acquire_ctx; 995 int ret = 0; 996 997 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); 998 999 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1000 DRM_DEBUG("invalid flags: %d\n", args->flags); 1001 return -EINVAL; 1002 } 1003 1004 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1005 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1006 if (ret) { 1007 DRM_DEBUG("Failed to get extensions.\n"); 1008 return ret; 1009 } 1010 } 1011 1012 ret = v3d_job_allocate((void *)&job, sizeof(*job)); 1013 if (ret) 1014 return ret; 1015 1016 ret = v3d_job_init(v3d, file_priv, &job->base, 1017 v3d_job_free, args->in_sync, &se, V3D_TFU); 1018 if (ret) 1019 goto fail; 1020 1021 job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), 1022 sizeof(*job->base.bo), GFP_KERNEL); 1023 if (!job->base.bo) { 1024 ret = -ENOMEM; 1025 goto fail; 1026 } 1027 1028 job->args = *args; 1029 1030 for (job->base.bo_count = 0; 1031 job->base.bo_count < ARRAY_SIZE(args->bo_handles); 1032 job->base.bo_count++) { 1033 struct drm_gem_object *bo; 1034 1035 if (!args->bo_handles[job->base.bo_count]) 1036 break; 1037 1038 bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); 1039 if (!bo) { 1040 DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 1041 job->base.bo_count, 1042 args->bo_handles[job->base.bo_count]); 1043 ret = -ENOENT; 1044 goto fail; 1045 } 1046 job->base.bo[job->base.bo_count] = bo; 1047 } 1048 1049 ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); 1050 if (ret) 1051 goto fail; 1052 1053 mutex_lock(&v3d->sched_lock); 1054 v3d_push_job(&job->base); 1055 mutex_unlock(&v3d->sched_lock); 1056 1057 v3d_attach_fences_and_unlock_reservation(file_priv, 1058 &job->base, &acquire_ctx, 1059 args->out_sync, 1060 &se, 1061 job->base.done_fence); 1062 1063 v3d_job_put(&job->base); 1064 1065 return 0; 1066 1067 fail: 1068 v3d_job_cleanup((void *)job); 1069 v3d_put_multisync_post_deps(&se); 1070 1071 return ret; 1072 } 1073 1074 /** 1075 * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D. 1076 * @dev: DRM device 1077 * @data: ioctl argument 1078 * @file_priv: DRM file for this fd 1079 * 1080 * Userspace provides the register setup for the CSD, which we don't 1081 * need to validate since the CSD is behind the MMU. 1082 */ 1083 int 1084 v3d_submit_csd_ioctl(struct drm_device *dev, void *data, 1085 struct drm_file *file_priv) 1086 { 1087 struct v3d_dev *v3d = to_v3d_dev(dev); 1088 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 1089 struct drm_v3d_submit_csd *args = data; 1090 struct v3d_submit_ext se = {0}; 1091 struct v3d_csd_job *job = NULL; 1092 struct v3d_job *clean_job = NULL; 1093 struct ww_acquire_ctx acquire_ctx; 1094 int ret; 1095 1096 trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); 1097 1098 if (args->pad) 1099 return -EINVAL; 1100 1101 if (!v3d_has_csd(v3d)) { 1102 DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); 1103 return -EINVAL; 1104 } 1105 1106 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1107 DRM_INFO("invalid flags: %d\n", args->flags); 1108 return -EINVAL; 1109 } 1110 1111 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1112 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1113 if (ret) { 1114 DRM_DEBUG("Failed to get extensions.\n"); 1115 return ret; 1116 } 1117 } 1118 1119 ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args, 1120 &job, &clean_job, &se, 1121 &acquire_ctx); 1122 if (ret) 1123 goto fail; 1124 1125 if (args->perfmon_id) { 1126 job->base.perfmon = v3d_perfmon_find(v3d_priv, 1127 args->perfmon_id); 1128 if (!job->base.perfmon) { 1129 ret = -ENOENT; 1130 goto fail_perfmon; 1131 } 1132 } 1133 1134 mutex_lock(&v3d->sched_lock); 1135 v3d_push_job(&job->base); 1136 1137 ret = drm_sched_job_add_dependency(&clean_job->base, 1138 dma_fence_get(job->base.done_fence)); 1139 if (ret) 1140 goto fail_unreserve; 1141 1142 v3d_push_job(clean_job); 1143 mutex_unlock(&v3d->sched_lock); 1144 1145 v3d_attach_fences_and_unlock_reservation(file_priv, 1146 clean_job, 1147 &acquire_ctx, 1148 args->out_sync, 1149 &se, 1150 clean_job->done_fence); 1151 1152 v3d_job_put(&job->base); 1153 v3d_job_put(clean_job); 1154 1155 return 0; 1156 1157 fail_unreserve: 1158 mutex_unlock(&v3d->sched_lock); 1159 fail_perfmon: 1160 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1161 &acquire_ctx); 1162 fail: 1163 v3d_job_cleanup((void *)job); 1164 v3d_job_cleanup(clean_job); 1165 v3d_put_multisync_post_deps(&se); 1166 1167 return ret; 1168 } 1169 1170 static const unsigned int cpu_job_bo_handle_count[] = { 1171 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1, 1172 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, 1173 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1174 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1175 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1176 [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, 1177 }; 1178 1179 /** 1180 * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D. 1181 * @dev: DRM device 1182 * @data: ioctl argument 1183 * @file_priv: DRM file for this fd 1184 * 1185 * Userspace specifies the CPU job type and data required to perform its 1186 * operations through the drm_v3d_extension struct. 1187 */ 1188 int 1189 v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, 1190 struct drm_file *file_priv) 1191 { 1192 struct v3d_dev *v3d = to_v3d_dev(dev); 1193 struct drm_v3d_submit_cpu *args = data; 1194 struct v3d_submit_ext se = {0}; 1195 struct v3d_submit_ext *out_se = NULL; 1196 struct v3d_cpu_job *cpu_job = NULL; 1197 struct v3d_csd_job *csd_job = NULL; 1198 struct v3d_job *clean_job = NULL; 1199 struct ww_acquire_ctx acquire_ctx; 1200 int ret; 1201 1202 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1203 DRM_INFO("Invalid flags: %d\n", args->flags); 1204 return -EINVAL; 1205 } 1206 1207 ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job)); 1208 if (ret) 1209 return ret; 1210 1211 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1212 ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job); 1213 if (ret) { 1214 DRM_DEBUG("Failed to get extensions.\n"); 1215 goto fail; 1216 } 1217 } 1218 1219 /* Every CPU job must have a CPU job user extension */ 1220 if (!cpu_job->job_type) { 1221 DRM_DEBUG("CPU job must have a CPU job user extension.\n"); 1222 ret = -EINVAL; 1223 goto fail; 1224 } 1225 1226 if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) { 1227 DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n"); 1228 ret = -EINVAL; 1229 goto fail; 1230 } 1231 1232 trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); 1233 1234 ret = v3d_job_init(v3d, file_priv, &cpu_job->base, 1235 v3d_job_free, 0, &se, V3D_CPU); 1236 if (ret) 1237 goto fail; 1238 1239 clean_job = cpu_job->indirect_csd.clean_job; 1240 csd_job = cpu_job->indirect_csd.job; 1241 1242 if (args->bo_handle_count) { 1243 ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base, 1244 args->bo_handles, args->bo_handle_count); 1245 if (ret) 1246 goto fail; 1247 1248 ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx); 1249 if (ret) 1250 goto fail; 1251 } 1252 1253 mutex_lock(&v3d->sched_lock); 1254 v3d_push_job(&cpu_job->base); 1255 1256 switch (cpu_job->job_type) { 1257 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1258 ret = drm_sched_job_add_dependency(&csd_job->base.base, 1259 dma_fence_get(cpu_job->base.done_fence)); 1260 if (ret) 1261 goto fail_unreserve; 1262 1263 v3d_push_job(&csd_job->base); 1264 1265 ret = drm_sched_job_add_dependency(&clean_job->base, 1266 dma_fence_get(csd_job->base.done_fence)); 1267 if (ret) 1268 goto fail_unreserve; 1269 1270 v3d_push_job(clean_job); 1271 1272 break; 1273 default: 1274 break; 1275 } 1276 mutex_unlock(&v3d->sched_lock); 1277 1278 out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se; 1279 1280 v3d_attach_fences_and_unlock_reservation(file_priv, 1281 &cpu_job->base, 1282 &acquire_ctx, 0, 1283 out_se, cpu_job->base.done_fence); 1284 1285 switch (cpu_job->job_type) { 1286 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1287 v3d_attach_fences_and_unlock_reservation(file_priv, 1288 clean_job, 1289 &cpu_job->indirect_csd.acquire_ctx, 1290 0, &se, clean_job->done_fence); 1291 break; 1292 default: 1293 break; 1294 } 1295 1296 v3d_job_put(&cpu_job->base); 1297 v3d_job_put(&csd_job->base); 1298 v3d_job_put(clean_job); 1299 1300 return 0; 1301 1302 fail_unreserve: 1303 mutex_unlock(&v3d->sched_lock); 1304 1305 drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count, 1306 &acquire_ctx); 1307 1308 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1309 &cpu_job->indirect_csd.acquire_ctx); 1310 1311 fail: 1312 v3d_job_cleanup((void *)cpu_job); 1313 v3d_job_cleanup((void *)csd_job); 1314 v3d_job_cleanup(clean_job); 1315 v3d_put_multisync_post_deps(&se); 1316 kvfree(cpu_job->timestamp_query.queries); 1317 kvfree(cpu_job->performance_query.queries); 1318 1319 return ret; 1320 } 1321