1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2014-2018 Broadcom 4 * Copyright (C) 2023 Raspberry Pi 5 */ 6 7 #include <drm/drm_syncobj.h> 8 9 #include "v3d_drv.h" 10 #include "v3d_regs.h" 11 #include "v3d_trace.h" 12 13 /* Takes the reservation lock on all the BOs being referenced, so that 14 * at queue submit time we can update the reservations. 15 * 16 * We don't lock the RCL the tile alloc/state BOs, or overflow memory 17 * (all of which are on exec->unref_list). They're entirely private 18 * to v3d, so we don't attach dma-buf fences to them. 19 */ 20 static int 21 v3d_lock_bo_reservations(struct v3d_job *job, 22 struct ww_acquire_ctx *acquire_ctx) 23 { 24 int i, ret; 25 26 ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); 27 if (ret) 28 return ret; 29 30 for (i = 0; i < job->bo_count; i++) { 31 ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); 32 if (ret) 33 goto fail; 34 35 ret = drm_sched_job_add_implicit_dependencies(&job->base, 36 job->bo[i], true); 37 if (ret) 38 goto fail; 39 } 40 41 return 0; 42 43 fail: 44 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 45 return ret; 46 } 47 48 /** 49 * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects 50 * referenced by the job. 51 * @dev: DRM device 52 * @file_priv: DRM file for this fd 53 * @job: V3D job being set up 54 * @bo_handles: GEM handles 55 * @bo_count: Number of GEM handles passed in 56 * 57 * The command validator needs to reference BOs by their index within 58 * the submitted job's BO list. This does the validation of the job's 59 * BO list and reference counting for the lifetime of the job. 60 * 61 * Note that this function doesn't need to unreference the BOs on 62 * failure, because that will happen at v3d_exec_cleanup() time. 63 */ 64 static int 65 v3d_lookup_bos(struct drm_device *dev, 66 struct drm_file *file_priv, 67 struct v3d_job *job, 68 u64 bo_handles, 69 u32 bo_count) 70 { 71 job->bo_count = bo_count; 72 73 if (!job->bo_count) { 74 /* See comment on bo_index for why we have to check 75 * this. 76 */ 77 DRM_DEBUG("Rendering requires BOs\n"); 78 return -EINVAL; 79 } 80 81 return drm_gem_objects_lookup(file_priv, 82 (void __user *)(uintptr_t)bo_handles, 83 job->bo_count, &job->bo); 84 } 85 86 static void 87 v3d_job_free(struct kref *ref) 88 { 89 struct v3d_job *job = container_of(ref, struct v3d_job, refcount); 90 int i; 91 92 if (job->bo) { 93 for (i = 0; i < job->bo_count; i++) 94 drm_gem_object_put(job->bo[i]); 95 kvfree(job->bo); 96 } 97 98 dma_fence_put(job->irq_fence); 99 dma_fence_put(job->done_fence); 100 101 if (job->perfmon) 102 v3d_perfmon_put(job->perfmon); 103 104 kfree(job); 105 } 106 107 static void 108 v3d_render_job_free(struct kref *ref) 109 { 110 struct v3d_render_job *job = container_of(ref, struct v3d_render_job, 111 base.refcount); 112 struct v3d_bo *bo, *save; 113 114 list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { 115 drm_gem_object_put(&bo->base.base); 116 } 117 118 v3d_job_free(ref); 119 } 120 121 void v3d_job_cleanup(struct v3d_job *job) 122 { 123 if (!job) 124 return; 125 126 drm_sched_job_cleanup(&job->base); 127 v3d_job_put(job); 128 } 129 130 void v3d_job_put(struct v3d_job *job) 131 { 132 if (!job) 133 return; 134 135 kref_put(&job->refcount, job->free); 136 } 137 138 static int 139 v3d_job_allocate(void **container, size_t size) 140 { 141 *container = kcalloc(1, size, GFP_KERNEL); 142 if (!*container) { 143 DRM_ERROR("Cannot allocate memory for V3D job.\n"); 144 return -ENOMEM; 145 } 146 147 return 0; 148 } 149 150 static void 151 v3d_job_deallocate(void **container) 152 { 153 kfree(*container); 154 *container = NULL; 155 } 156 157 static int 158 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 159 struct v3d_job *job, void (*free)(struct kref *ref), 160 u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) 161 { 162 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 163 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 164 int ret, i; 165 166 job->v3d = v3d; 167 job->free = free; 168 job->file = file_priv; 169 170 ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 171 1, v3d_priv); 172 if (ret) 173 return ret; 174 175 if (has_multisync) { 176 if (se->in_sync_count && se->wait_stage == queue) { 177 struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); 178 179 for (i = 0; i < se->in_sync_count; i++) { 180 struct drm_v3d_sem in; 181 182 if (copy_from_user(&in, handle++, sizeof(in))) { 183 ret = -EFAULT; 184 DRM_DEBUG("Failed to copy wait dep handle.\n"); 185 goto fail_deps; 186 } 187 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); 188 189 // TODO: Investigate why this was filtered out for the IOCTL. 190 if (ret && ret != -ENOENT) 191 goto fail_deps; 192 } 193 } 194 } else { 195 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); 196 197 // TODO: Investigate why this was filtered out for the IOCTL. 198 if (ret && ret != -ENOENT) 199 goto fail_deps; 200 } 201 202 kref_init(&job->refcount); 203 204 return 0; 205 206 fail_deps: 207 drm_sched_job_cleanup(&job->base); 208 return ret; 209 } 210 211 static void 212 v3d_push_job(struct v3d_job *job) 213 { 214 drm_sched_job_arm(&job->base); 215 216 job->done_fence = dma_fence_get(&job->base.s_fence->finished); 217 218 /* put by scheduler job completion */ 219 kref_get(&job->refcount); 220 221 drm_sched_entity_push_job(&job->base); 222 } 223 224 static void 225 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, 226 struct v3d_job *job, 227 struct ww_acquire_ctx *acquire_ctx, 228 u32 out_sync, 229 struct v3d_submit_ext *se, 230 struct dma_fence *done_fence) 231 { 232 struct drm_syncobj *sync_out; 233 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 234 int i; 235 236 for (i = 0; i < job->bo_count; i++) { 237 /* XXX: Use shared fences for read-only objects. */ 238 dma_resv_add_fence(job->bo[i]->resv, job->done_fence, 239 DMA_RESV_USAGE_WRITE); 240 } 241 242 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 243 244 /* Update the return sync object for the job */ 245 /* If it only supports a single signal semaphore*/ 246 if (!has_multisync) { 247 sync_out = drm_syncobj_find(file_priv, out_sync); 248 if (sync_out) { 249 drm_syncobj_replace_fence(sync_out, done_fence); 250 drm_syncobj_put(sync_out); 251 } 252 return; 253 } 254 255 /* If multiple semaphores extension is supported */ 256 if (se->out_sync_count) { 257 for (i = 0; i < se->out_sync_count; i++) { 258 drm_syncobj_replace_fence(se->out_syncs[i].syncobj, 259 done_fence); 260 drm_syncobj_put(se->out_syncs[i].syncobj); 261 } 262 kvfree(se->out_syncs); 263 } 264 } 265 266 static int 267 v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, 268 struct v3d_dev *v3d, 269 struct drm_v3d_submit_csd *args, 270 struct v3d_csd_job **job, 271 struct v3d_job **clean_job, 272 struct v3d_submit_ext *se, 273 struct ww_acquire_ctx *acquire_ctx) 274 { 275 int ret; 276 277 ret = v3d_job_allocate((void *)job, sizeof(**job)); 278 if (ret) 279 return ret; 280 281 ret = v3d_job_init(v3d, file_priv, &(*job)->base, 282 v3d_job_free, args->in_sync, se, V3D_CSD); 283 if (ret) { 284 v3d_job_deallocate((void *)job); 285 return ret; 286 } 287 288 ret = v3d_job_allocate((void *)clean_job, sizeof(**clean_job)); 289 if (ret) 290 return ret; 291 292 ret = v3d_job_init(v3d, file_priv, *clean_job, 293 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 294 if (ret) { 295 v3d_job_deallocate((void *)clean_job); 296 return ret; 297 } 298 299 (*job)->args = *args; 300 301 ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job, 302 args->bo_handles, args->bo_handle_count); 303 if (ret) 304 return ret; 305 306 return v3d_lock_bo_reservations(*clean_job, acquire_ctx); 307 } 308 309 static void 310 v3d_put_multisync_post_deps(struct v3d_submit_ext *se) 311 { 312 unsigned int i; 313 314 if (!(se && se->out_sync_count)) 315 return; 316 317 for (i = 0; i < se->out_sync_count; i++) 318 drm_syncobj_put(se->out_syncs[i].syncobj); 319 kvfree(se->out_syncs); 320 } 321 322 static int 323 v3d_get_multisync_post_deps(struct drm_file *file_priv, 324 struct v3d_submit_ext *se, 325 u32 count, u64 handles) 326 { 327 struct drm_v3d_sem __user *post_deps; 328 int i, ret; 329 330 if (!count) 331 return 0; 332 333 se->out_syncs = (struct v3d_submit_outsync *) 334 kvmalloc_array(count, 335 sizeof(struct v3d_submit_outsync), 336 GFP_KERNEL); 337 if (!se->out_syncs) 338 return -ENOMEM; 339 340 post_deps = u64_to_user_ptr(handles); 341 342 for (i = 0; i < count; i++) { 343 struct drm_v3d_sem out; 344 345 if (copy_from_user(&out, post_deps++, sizeof(out))) { 346 ret = -EFAULT; 347 DRM_DEBUG("Failed to copy post dep handles\n"); 348 goto fail; 349 } 350 351 se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, 352 out.handle); 353 if (!se->out_syncs[i].syncobj) { 354 ret = -EINVAL; 355 goto fail; 356 } 357 } 358 se->out_sync_count = count; 359 360 return 0; 361 362 fail: 363 for (i--; i >= 0; i--) 364 drm_syncobj_put(se->out_syncs[i].syncobj); 365 kvfree(se->out_syncs); 366 367 return ret; 368 } 369 370 /* Get data for multiple binary semaphores synchronization. Parse syncobj 371 * to be signaled when job completes (out_sync). 372 */ 373 static int 374 v3d_get_multisync_submit_deps(struct drm_file *file_priv, 375 struct drm_v3d_extension __user *ext, 376 struct v3d_submit_ext *se) 377 { 378 struct drm_v3d_multi_sync multisync; 379 int ret; 380 381 if (se->in_sync_count || se->out_sync_count) { 382 DRM_DEBUG("Two multisync extensions were added to the same job."); 383 return -EINVAL; 384 } 385 386 if (copy_from_user(&multisync, ext, sizeof(multisync))) 387 return -EFAULT; 388 389 if (multisync.pad) 390 return -EINVAL; 391 392 ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, 393 multisync.out_syncs); 394 if (ret) 395 return ret; 396 397 se->in_sync_count = multisync.in_sync_count; 398 se->in_syncs = multisync.in_syncs; 399 se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; 400 se->wait_stage = multisync.wait_stage; 401 402 return 0; 403 } 404 405 /* Get data for the indirect CSD job submission. */ 406 static int 407 v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv, 408 struct drm_v3d_extension __user *ext, 409 struct v3d_cpu_job *job) 410 { 411 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 412 struct v3d_dev *v3d = v3d_priv->v3d; 413 struct drm_v3d_indirect_csd indirect_csd; 414 struct v3d_indirect_csd_info *info = &job->indirect_csd; 415 416 if (!job) { 417 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 418 return -EINVAL; 419 } 420 421 if (job->job_type) { 422 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 423 return -EINVAL; 424 } 425 426 if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd))) 427 return -EFAULT; 428 429 if (!v3d_has_csd(v3d)) { 430 DRM_DEBUG("Attempting CSD submit on non-CSD hardware.\n"); 431 return -EINVAL; 432 } 433 434 job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD; 435 info->offset = indirect_csd.offset; 436 info->wg_size = indirect_csd.wg_size; 437 memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets, 438 sizeof(indirect_csd.wg_uniform_offsets)); 439 440 info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect); 441 442 return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit, 443 &info->job, &info->clean_job, 444 NULL, &info->acquire_ctx); 445 } 446 447 /* Get data for the query timestamp job submission. */ 448 static int 449 v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, 450 struct drm_v3d_extension __user *ext, 451 struct v3d_cpu_job *job) 452 { 453 u32 __user *offsets, *syncs; 454 struct drm_v3d_timestamp_query timestamp; 455 unsigned int i; 456 int err; 457 458 if (!job) { 459 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 460 return -EINVAL; 461 } 462 463 if (job->job_type) { 464 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 465 return -EINVAL; 466 } 467 468 if (copy_from_user(×tamp, ext, sizeof(timestamp))) 469 return -EFAULT; 470 471 if (timestamp.pad) 472 return -EINVAL; 473 474 job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY; 475 476 job->timestamp_query.queries = kvmalloc_array(timestamp.count, 477 sizeof(struct v3d_timestamp_query), 478 GFP_KERNEL); 479 if (!job->timestamp_query.queries) 480 return -ENOMEM; 481 482 offsets = u64_to_user_ptr(timestamp.offsets); 483 syncs = u64_to_user_ptr(timestamp.syncs); 484 485 for (i = 0; i < timestamp.count; i++) { 486 u32 offset, sync; 487 488 if (copy_from_user(&offset, offsets++, sizeof(offset))) { 489 err = -EFAULT; 490 goto error; 491 } 492 493 job->timestamp_query.queries[i].offset = offset; 494 495 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 496 err = -EFAULT; 497 goto error; 498 } 499 500 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 501 if (!job->timestamp_query.queries[i].syncobj) { 502 err = -ENOENT; 503 goto error; 504 } 505 } 506 job->timestamp_query.count = timestamp.count; 507 508 return 0; 509 510 error: 511 v3d_timestamp_query_info_free(&job->timestamp_query, i); 512 return err; 513 } 514 515 static int 516 v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, 517 struct drm_v3d_extension __user *ext, 518 struct v3d_cpu_job *job) 519 { 520 u32 __user *syncs; 521 struct drm_v3d_reset_timestamp_query reset; 522 unsigned int i; 523 int err; 524 525 if (!job) { 526 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 527 return -EINVAL; 528 } 529 530 if (job->job_type) { 531 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 532 return -EINVAL; 533 } 534 535 if (copy_from_user(&reset, ext, sizeof(reset))) 536 return -EFAULT; 537 538 job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY; 539 540 job->timestamp_query.queries = kvmalloc_array(reset.count, 541 sizeof(struct v3d_timestamp_query), 542 GFP_KERNEL); 543 if (!job->timestamp_query.queries) 544 return -ENOMEM; 545 546 syncs = u64_to_user_ptr(reset.syncs); 547 548 for (i = 0; i < reset.count; i++) { 549 u32 sync; 550 551 job->timestamp_query.queries[i].offset = reset.offset + 8 * i; 552 553 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 554 err = -EFAULT; 555 goto error; 556 } 557 558 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 559 if (!job->timestamp_query.queries[i].syncobj) { 560 err = -ENOENT; 561 goto error; 562 } 563 } 564 job->timestamp_query.count = reset.count; 565 566 return 0; 567 568 error: 569 v3d_timestamp_query_info_free(&job->timestamp_query, i); 570 return err; 571 } 572 573 /* Get data for the copy timestamp query results job submission. */ 574 static int 575 v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, 576 struct drm_v3d_extension __user *ext, 577 struct v3d_cpu_job *job) 578 { 579 u32 __user *offsets, *syncs; 580 struct drm_v3d_copy_timestamp_query copy; 581 unsigned int i; 582 int err; 583 584 if (!job) { 585 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 586 return -EINVAL; 587 } 588 589 if (job->job_type) { 590 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 591 return -EINVAL; 592 } 593 594 if (copy_from_user(©, ext, sizeof(copy))) 595 return -EFAULT; 596 597 if (copy.pad) 598 return -EINVAL; 599 600 job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY; 601 602 job->timestamp_query.queries = kvmalloc_array(copy.count, 603 sizeof(struct v3d_timestamp_query), 604 GFP_KERNEL); 605 if (!job->timestamp_query.queries) 606 return -ENOMEM; 607 608 offsets = u64_to_user_ptr(copy.offsets); 609 syncs = u64_to_user_ptr(copy.syncs); 610 611 for (i = 0; i < copy.count; i++) { 612 u32 offset, sync; 613 614 if (copy_from_user(&offset, offsets++, sizeof(offset))) { 615 err = -EFAULT; 616 goto error; 617 } 618 619 job->timestamp_query.queries[i].offset = offset; 620 621 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 622 err = -EFAULT; 623 goto error; 624 } 625 626 job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 627 if (!job->timestamp_query.queries[i].syncobj) { 628 err = -ENOENT; 629 goto error; 630 } 631 } 632 job->timestamp_query.count = copy.count; 633 634 job->copy.do_64bit = copy.do_64bit; 635 job->copy.do_partial = copy.do_partial; 636 job->copy.availability_bit = copy.availability_bit; 637 job->copy.offset = copy.offset; 638 job->copy.stride = copy.stride; 639 640 return 0; 641 642 error: 643 v3d_timestamp_query_info_free(&job->timestamp_query, i); 644 return err; 645 } 646 647 static int 648 v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, 649 struct drm_v3d_extension __user *ext, 650 struct v3d_cpu_job *job) 651 { 652 u32 __user *syncs; 653 u64 __user *kperfmon_ids; 654 struct drm_v3d_reset_performance_query reset; 655 unsigned int i, j; 656 int err; 657 658 if (!job) { 659 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 660 return -EINVAL; 661 } 662 663 if (job->job_type) { 664 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 665 return -EINVAL; 666 } 667 668 if (copy_from_user(&reset, ext, sizeof(reset))) 669 return -EFAULT; 670 671 if (reset.nperfmons > V3D_MAX_PERFMONS) 672 return -EINVAL; 673 674 job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; 675 676 job->performance_query.queries = kvmalloc_array(reset.count, 677 sizeof(struct v3d_performance_query), 678 GFP_KERNEL); 679 if (!job->performance_query.queries) 680 return -ENOMEM; 681 682 syncs = u64_to_user_ptr(reset.syncs); 683 kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); 684 685 for (i = 0; i < reset.count; i++) { 686 u32 sync; 687 u64 ids; 688 u32 __user *ids_pointer; 689 u32 id; 690 691 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 692 err = -EFAULT; 693 goto error; 694 } 695 696 if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { 697 err = -EFAULT; 698 goto error; 699 } 700 701 ids_pointer = u64_to_user_ptr(ids); 702 703 for (j = 0; j < reset.nperfmons; j++) { 704 if (copy_from_user(&id, ids_pointer++, sizeof(id))) { 705 err = -EFAULT; 706 goto error; 707 } 708 709 job->performance_query.queries[i].kperfmon_ids[j] = id; 710 } 711 712 job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 713 if (!job->performance_query.queries[i].syncobj) { 714 err = -ENOENT; 715 goto error; 716 } 717 } 718 job->performance_query.count = reset.count; 719 job->performance_query.nperfmons = reset.nperfmons; 720 721 return 0; 722 723 error: 724 v3d_performance_query_info_free(&job->performance_query, i); 725 return err; 726 } 727 728 static int 729 v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, 730 struct drm_v3d_extension __user *ext, 731 struct v3d_cpu_job *job) 732 { 733 u32 __user *syncs; 734 u64 __user *kperfmon_ids; 735 struct drm_v3d_copy_performance_query copy; 736 unsigned int i, j; 737 int err; 738 739 if (!job) { 740 DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); 741 return -EINVAL; 742 } 743 744 if (job->job_type) { 745 DRM_DEBUG("Two CPU job extensions were added to the same CPU job.\n"); 746 return -EINVAL; 747 } 748 749 if (copy_from_user(©, ext, sizeof(copy))) 750 return -EFAULT; 751 752 if (copy.pad) 753 return -EINVAL; 754 755 if (copy.nperfmons > V3D_MAX_PERFMONS) 756 return -EINVAL; 757 758 job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; 759 760 job->performance_query.queries = kvmalloc_array(copy.count, 761 sizeof(struct v3d_performance_query), 762 GFP_KERNEL); 763 if (!job->performance_query.queries) 764 return -ENOMEM; 765 766 syncs = u64_to_user_ptr(copy.syncs); 767 kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); 768 769 for (i = 0; i < copy.count; i++) { 770 u32 sync; 771 u64 ids; 772 u32 __user *ids_pointer; 773 u32 id; 774 775 if (copy_from_user(&sync, syncs++, sizeof(sync))) { 776 err = -EFAULT; 777 goto error; 778 } 779 780 if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { 781 err = -EFAULT; 782 goto error; 783 } 784 785 ids_pointer = u64_to_user_ptr(ids); 786 787 for (j = 0; j < copy.nperfmons; j++) { 788 if (copy_from_user(&id, ids_pointer++, sizeof(id))) { 789 err = -EFAULT; 790 goto error; 791 } 792 793 job->performance_query.queries[i].kperfmon_ids[j] = id; 794 } 795 796 job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); 797 if (!job->performance_query.queries[i].syncobj) { 798 err = -ENOENT; 799 goto error; 800 } 801 } 802 job->performance_query.count = copy.count; 803 job->performance_query.nperfmons = copy.nperfmons; 804 job->performance_query.ncounters = copy.ncounters; 805 806 job->copy.do_64bit = copy.do_64bit; 807 job->copy.do_partial = copy.do_partial; 808 job->copy.availability_bit = copy.availability_bit; 809 job->copy.offset = copy.offset; 810 job->copy.stride = copy.stride; 811 812 return 0; 813 814 error: 815 v3d_performance_query_info_free(&job->performance_query, i); 816 return err; 817 } 818 819 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 820 * according to the extension id (name). 821 */ 822 static int 823 v3d_get_extensions(struct drm_file *file_priv, 824 u64 ext_handles, 825 struct v3d_submit_ext *se, 826 struct v3d_cpu_job *job) 827 { 828 struct drm_v3d_extension __user *user_ext; 829 int ret; 830 831 user_ext = u64_to_user_ptr(ext_handles); 832 while (user_ext) { 833 struct drm_v3d_extension ext; 834 835 if (copy_from_user(&ext, user_ext, sizeof(ext))) { 836 DRM_DEBUG("Failed to copy submit extension\n"); 837 return -EFAULT; 838 } 839 840 switch (ext.id) { 841 case DRM_V3D_EXT_ID_MULTI_SYNC: 842 ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se); 843 break; 844 case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD: 845 ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job); 846 break; 847 case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY: 848 ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job); 849 break; 850 case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY: 851 ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job); 852 break; 853 case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: 854 ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); 855 break; 856 case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 857 ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 858 break; 859 case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: 860 ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); 861 break; 862 default: 863 DRM_DEBUG_DRIVER("Unknown extension id: %d\n", ext.id); 864 return -EINVAL; 865 } 866 867 if (ret) 868 return ret; 869 870 user_ext = u64_to_user_ptr(ext.next); 871 } 872 873 return 0; 874 } 875 876 /** 877 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 878 * @dev: DRM device 879 * @data: ioctl argument 880 * @file_priv: DRM file for this fd 881 * 882 * This is the main entrypoint for userspace to submit a 3D frame to 883 * the GPU. Userspace provides the binner command list (if 884 * applicable), and the kernel sets up the render command list to draw 885 * to the framebuffer described in the ioctl, using the command lists 886 * that the 3D engine's binner will produce. 887 */ 888 int 889 v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 890 struct drm_file *file_priv) 891 { 892 struct v3d_dev *v3d = to_v3d_dev(dev); 893 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 894 struct drm_v3d_submit_cl *args = data; 895 struct v3d_submit_ext se = {0}; 896 struct v3d_bin_job *bin = NULL; 897 struct v3d_render_job *render = NULL; 898 struct v3d_job *clean_job = NULL; 899 struct v3d_job *last_job; 900 struct ww_acquire_ctx acquire_ctx; 901 int ret = 0; 902 903 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); 904 905 if (args->pad) 906 return -EINVAL; 907 908 if (args->flags && 909 args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | 910 DRM_V3D_SUBMIT_EXTENSION)) { 911 DRM_INFO("invalid flags: %d\n", args->flags); 912 return -EINVAL; 913 } 914 915 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 916 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 917 if (ret) { 918 DRM_DEBUG("Failed to get extensions.\n"); 919 return ret; 920 } 921 } 922 923 ret = v3d_job_allocate((void *)&render, sizeof(*render)); 924 if (ret) 925 return ret; 926 927 ret = v3d_job_init(v3d, file_priv, &render->base, 928 v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); 929 if (ret) { 930 v3d_job_deallocate((void *)&render); 931 goto fail; 932 } 933 934 render->start = args->rcl_start; 935 render->end = args->rcl_end; 936 INIT_LIST_HEAD(&render->unref_list); 937 938 if (args->bcl_start != args->bcl_end) { 939 ret = v3d_job_allocate((void *)&bin, sizeof(*bin)); 940 if (ret) 941 goto fail; 942 943 ret = v3d_job_init(v3d, file_priv, &bin->base, 944 v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); 945 if (ret) { 946 v3d_job_deallocate((void *)&bin); 947 goto fail; 948 } 949 950 bin->start = args->bcl_start; 951 bin->end = args->bcl_end; 952 bin->qma = args->qma; 953 bin->qms = args->qms; 954 bin->qts = args->qts; 955 bin->render = render; 956 } 957 958 if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { 959 ret = v3d_job_allocate((void *)&clean_job, sizeof(*clean_job)); 960 if (ret) 961 goto fail; 962 963 ret = v3d_job_init(v3d, file_priv, clean_job, 964 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 965 if (ret) { 966 v3d_job_deallocate((void *)&clean_job); 967 goto fail; 968 } 969 970 last_job = clean_job; 971 } else { 972 last_job = &render->base; 973 } 974 975 ret = v3d_lookup_bos(dev, file_priv, last_job, 976 args->bo_handles, args->bo_handle_count); 977 if (ret) 978 goto fail; 979 980 ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); 981 if (ret) 982 goto fail; 983 984 if (args->perfmon_id) { 985 render->base.perfmon = v3d_perfmon_find(v3d_priv, 986 args->perfmon_id); 987 988 if (!render->base.perfmon) { 989 ret = -ENOENT; 990 goto fail_perfmon; 991 } 992 } 993 994 mutex_lock(&v3d->sched_lock); 995 if (bin) { 996 bin->base.perfmon = render->base.perfmon; 997 v3d_perfmon_get(bin->base.perfmon); 998 v3d_push_job(&bin->base); 999 1000 ret = drm_sched_job_add_dependency(&render->base.base, 1001 dma_fence_get(bin->base.done_fence)); 1002 if (ret) 1003 goto fail_unreserve; 1004 } 1005 1006 v3d_push_job(&render->base); 1007 1008 if (clean_job) { 1009 struct dma_fence *render_fence = 1010 dma_fence_get(render->base.done_fence); 1011 ret = drm_sched_job_add_dependency(&clean_job->base, 1012 render_fence); 1013 if (ret) 1014 goto fail_unreserve; 1015 clean_job->perfmon = render->base.perfmon; 1016 v3d_perfmon_get(clean_job->perfmon); 1017 v3d_push_job(clean_job); 1018 } 1019 1020 mutex_unlock(&v3d->sched_lock); 1021 1022 v3d_attach_fences_and_unlock_reservation(file_priv, 1023 last_job, 1024 &acquire_ctx, 1025 args->out_sync, 1026 &se, 1027 last_job->done_fence); 1028 1029 v3d_job_put(&bin->base); 1030 v3d_job_put(&render->base); 1031 v3d_job_put(clean_job); 1032 1033 return 0; 1034 1035 fail_unreserve: 1036 mutex_unlock(&v3d->sched_lock); 1037 fail_perfmon: 1038 drm_gem_unlock_reservations(last_job->bo, 1039 last_job->bo_count, &acquire_ctx); 1040 fail: 1041 v3d_job_cleanup((void *)bin); 1042 v3d_job_cleanup((void *)render); 1043 v3d_job_cleanup(clean_job); 1044 v3d_put_multisync_post_deps(&se); 1045 1046 return ret; 1047 } 1048 1049 /** 1050 * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. 1051 * @dev: DRM device 1052 * @data: ioctl argument 1053 * @file_priv: DRM file for this fd 1054 * 1055 * Userspace provides the register setup for the TFU, which we don't 1056 * need to validate since the TFU is behind the MMU. 1057 */ 1058 int 1059 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, 1060 struct drm_file *file_priv) 1061 { 1062 struct v3d_dev *v3d = to_v3d_dev(dev); 1063 struct drm_v3d_submit_tfu *args = data; 1064 struct v3d_submit_ext se = {0}; 1065 struct v3d_tfu_job *job = NULL; 1066 struct ww_acquire_ctx acquire_ctx; 1067 int ret = 0; 1068 1069 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); 1070 1071 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1072 DRM_DEBUG("invalid flags: %d\n", args->flags); 1073 return -EINVAL; 1074 } 1075 1076 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1077 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1078 if (ret) { 1079 DRM_DEBUG("Failed to get extensions.\n"); 1080 return ret; 1081 } 1082 } 1083 1084 ret = v3d_job_allocate((void *)&job, sizeof(*job)); 1085 if (ret) 1086 return ret; 1087 1088 ret = v3d_job_init(v3d, file_priv, &job->base, 1089 v3d_job_free, args->in_sync, &se, V3D_TFU); 1090 if (ret) { 1091 v3d_job_deallocate((void *)&job); 1092 goto fail; 1093 } 1094 1095 job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles), 1096 sizeof(*job->base.bo), GFP_KERNEL); 1097 if (!job->base.bo) { 1098 ret = -ENOMEM; 1099 goto fail; 1100 } 1101 1102 job->args = *args; 1103 1104 for (job->base.bo_count = 0; 1105 job->base.bo_count < ARRAY_SIZE(args->bo_handles); 1106 job->base.bo_count++) { 1107 struct drm_gem_object *bo; 1108 1109 if (!args->bo_handles[job->base.bo_count]) 1110 break; 1111 1112 bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); 1113 if (!bo) { 1114 DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 1115 job->base.bo_count, 1116 args->bo_handles[job->base.bo_count]); 1117 ret = -ENOENT; 1118 goto fail; 1119 } 1120 job->base.bo[job->base.bo_count] = bo; 1121 } 1122 1123 ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); 1124 if (ret) 1125 goto fail; 1126 1127 mutex_lock(&v3d->sched_lock); 1128 v3d_push_job(&job->base); 1129 mutex_unlock(&v3d->sched_lock); 1130 1131 v3d_attach_fences_and_unlock_reservation(file_priv, 1132 &job->base, &acquire_ctx, 1133 args->out_sync, 1134 &se, 1135 job->base.done_fence); 1136 1137 v3d_job_put(&job->base); 1138 1139 return 0; 1140 1141 fail: 1142 v3d_job_cleanup((void *)job); 1143 v3d_put_multisync_post_deps(&se); 1144 1145 return ret; 1146 } 1147 1148 /** 1149 * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D. 1150 * @dev: DRM device 1151 * @data: ioctl argument 1152 * @file_priv: DRM file for this fd 1153 * 1154 * Userspace provides the register setup for the CSD, which we don't 1155 * need to validate since the CSD is behind the MMU. 1156 */ 1157 int 1158 v3d_submit_csd_ioctl(struct drm_device *dev, void *data, 1159 struct drm_file *file_priv) 1160 { 1161 struct v3d_dev *v3d = to_v3d_dev(dev); 1162 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 1163 struct drm_v3d_submit_csd *args = data; 1164 struct v3d_submit_ext se = {0}; 1165 struct v3d_csd_job *job = NULL; 1166 struct v3d_job *clean_job = NULL; 1167 struct ww_acquire_ctx acquire_ctx; 1168 int ret; 1169 1170 trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); 1171 1172 if (args->pad) 1173 return -EINVAL; 1174 1175 if (!v3d_has_csd(v3d)) { 1176 DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n"); 1177 return -EINVAL; 1178 } 1179 1180 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1181 DRM_INFO("invalid flags: %d\n", args->flags); 1182 return -EINVAL; 1183 } 1184 1185 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1186 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1187 if (ret) { 1188 DRM_DEBUG("Failed to get extensions.\n"); 1189 return ret; 1190 } 1191 } 1192 1193 ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args, 1194 &job, &clean_job, &se, 1195 &acquire_ctx); 1196 if (ret) 1197 goto fail; 1198 1199 if (args->perfmon_id) { 1200 job->base.perfmon = v3d_perfmon_find(v3d_priv, 1201 args->perfmon_id); 1202 if (!job->base.perfmon) { 1203 ret = -ENOENT; 1204 goto fail_perfmon; 1205 } 1206 } 1207 1208 mutex_lock(&v3d->sched_lock); 1209 v3d_push_job(&job->base); 1210 1211 ret = drm_sched_job_add_dependency(&clean_job->base, 1212 dma_fence_get(job->base.done_fence)); 1213 if (ret) 1214 goto fail_unreserve; 1215 1216 v3d_push_job(clean_job); 1217 mutex_unlock(&v3d->sched_lock); 1218 1219 v3d_attach_fences_and_unlock_reservation(file_priv, 1220 clean_job, 1221 &acquire_ctx, 1222 args->out_sync, 1223 &se, 1224 clean_job->done_fence); 1225 1226 v3d_job_put(&job->base); 1227 v3d_job_put(clean_job); 1228 1229 return 0; 1230 1231 fail_unreserve: 1232 mutex_unlock(&v3d->sched_lock); 1233 fail_perfmon: 1234 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1235 &acquire_ctx); 1236 fail: 1237 v3d_job_cleanup((void *)job); 1238 v3d_job_cleanup(clean_job); 1239 v3d_put_multisync_post_deps(&se); 1240 1241 return ret; 1242 } 1243 1244 static const unsigned int cpu_job_bo_handle_count[] = { 1245 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1, 1246 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, 1247 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1248 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1249 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1250 [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, 1251 }; 1252 1253 /** 1254 * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D. 1255 * @dev: DRM device 1256 * @data: ioctl argument 1257 * @file_priv: DRM file for this fd 1258 * 1259 * Userspace specifies the CPU job type and data required to perform its 1260 * operations through the drm_v3d_extension struct. 1261 */ 1262 int 1263 v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, 1264 struct drm_file *file_priv) 1265 { 1266 struct v3d_dev *v3d = to_v3d_dev(dev); 1267 struct drm_v3d_submit_cpu *args = data; 1268 struct v3d_submit_ext se = {0}; 1269 struct v3d_submit_ext *out_se = NULL; 1270 struct v3d_cpu_job *cpu_job = NULL; 1271 struct v3d_csd_job *csd_job = NULL; 1272 struct v3d_job *clean_job = NULL; 1273 struct ww_acquire_ctx acquire_ctx; 1274 int ret; 1275 1276 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1277 DRM_INFO("Invalid flags: %d\n", args->flags); 1278 return -EINVAL; 1279 } 1280 1281 ret = v3d_job_allocate((void *)&cpu_job, sizeof(*cpu_job)); 1282 if (ret) 1283 return ret; 1284 1285 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1286 ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job); 1287 if (ret) { 1288 DRM_DEBUG("Failed to get extensions.\n"); 1289 goto fail; 1290 } 1291 } 1292 1293 /* Every CPU job must have a CPU job user extension */ 1294 if (!cpu_job->job_type) { 1295 DRM_DEBUG("CPU job must have a CPU job user extension.\n"); 1296 ret = -EINVAL; 1297 goto fail; 1298 } 1299 1300 if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) { 1301 DRM_DEBUG("This CPU job was not submitted with the proper number of BOs.\n"); 1302 ret = -EINVAL; 1303 goto fail; 1304 } 1305 1306 trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); 1307 1308 ret = v3d_job_init(v3d, file_priv, &cpu_job->base, 1309 v3d_job_free, 0, &se, V3D_CPU); 1310 if (ret) { 1311 v3d_job_deallocate((void *)&cpu_job); 1312 goto fail; 1313 } 1314 1315 clean_job = cpu_job->indirect_csd.clean_job; 1316 csd_job = cpu_job->indirect_csd.job; 1317 1318 if (args->bo_handle_count) { 1319 ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base, 1320 args->bo_handles, args->bo_handle_count); 1321 if (ret) 1322 goto fail; 1323 1324 ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx); 1325 if (ret) 1326 goto fail; 1327 } 1328 1329 mutex_lock(&v3d->sched_lock); 1330 v3d_push_job(&cpu_job->base); 1331 1332 switch (cpu_job->job_type) { 1333 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1334 ret = drm_sched_job_add_dependency(&csd_job->base.base, 1335 dma_fence_get(cpu_job->base.done_fence)); 1336 if (ret) 1337 goto fail_unreserve; 1338 1339 v3d_push_job(&csd_job->base); 1340 1341 ret = drm_sched_job_add_dependency(&clean_job->base, 1342 dma_fence_get(csd_job->base.done_fence)); 1343 if (ret) 1344 goto fail_unreserve; 1345 1346 v3d_push_job(clean_job); 1347 1348 break; 1349 default: 1350 break; 1351 } 1352 mutex_unlock(&v3d->sched_lock); 1353 1354 out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se; 1355 1356 v3d_attach_fences_and_unlock_reservation(file_priv, 1357 &cpu_job->base, 1358 &acquire_ctx, 0, 1359 out_se, cpu_job->base.done_fence); 1360 1361 switch (cpu_job->job_type) { 1362 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1363 v3d_attach_fences_and_unlock_reservation(file_priv, 1364 clean_job, 1365 &cpu_job->indirect_csd.acquire_ctx, 1366 0, &se, clean_job->done_fence); 1367 break; 1368 default: 1369 break; 1370 } 1371 1372 v3d_job_put(&cpu_job->base); 1373 v3d_job_put(&csd_job->base); 1374 v3d_job_put(clean_job); 1375 1376 return 0; 1377 1378 fail_unreserve: 1379 mutex_unlock(&v3d->sched_lock); 1380 1381 drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count, 1382 &acquire_ctx); 1383 1384 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1385 &cpu_job->indirect_csd.acquire_ctx); 1386 1387 fail: 1388 v3d_job_cleanup((void *)cpu_job); 1389 v3d_job_cleanup((void *)csd_job); 1390 v3d_job_cleanup(clean_job); 1391 v3d_put_multisync_post_deps(&se); 1392 kvfree(cpu_job->timestamp_query.queries); 1393 kvfree(cpu_job->performance_query.queries); 1394 1395 return ret; 1396 } 1397