1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2014-2018 Broadcom 4 * Copyright (C) 2023 Raspberry Pi 5 */ 6 7 #include <drm/drm_print.h> 8 #include <drm/drm_syncobj.h> 9 10 #include "v3d_drv.h" 11 #include "v3d_regs.h" 12 #include "v3d_trace.h" 13 14 /* Takes the reservation lock on all the BOs being referenced, so that 15 * we can attach fences and update the reservations after pushing the job 16 * to the queue. 17 * 18 * We don't lock the RCL the tile alloc/state BOs, or overflow memory 19 * (all of which are on render->unref_list). They're entirely private 20 * to v3d, so we don't attach dma-buf fences to them. 21 */ 22 static int 23 v3d_lock_bo_reservations(struct v3d_job *job, 24 struct ww_acquire_ctx *acquire_ctx) 25 { 26 int i, ret; 27 28 ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx); 29 if (ret) 30 return ret; 31 32 for (i = 0; i < job->bo_count; i++) { 33 ret = dma_resv_reserve_fences(job->bo[i]->resv, 1); 34 if (ret) 35 goto fail; 36 37 ret = drm_sched_job_add_implicit_dependencies(&job->base, 38 job->bo[i], true); 39 if (ret) 40 goto fail; 41 } 42 43 return 0; 44 45 fail: 46 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 47 return ret; 48 } 49 50 /** 51 * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects 52 * referenced by the job. 53 * @dev: DRM device 54 * @file_priv: DRM file for this fd 55 * @job: V3D job being set up 56 * @bo_handles: GEM handles 57 * @bo_count: Number of GEM handles passed in 58 * 59 * The command validator needs to reference BOs by their index within 60 * the submitted job's BO list. This does the validation of the job's 61 * BO list and reference counting for the lifetime of the job. 62 * 63 * Note that this function doesn't need to unreference the BOs on 64 * failure, because that will happen at `v3d_job_free()`. 65 */ 66 static int 67 v3d_lookup_bos(struct drm_device *dev, 68 struct drm_file *file_priv, 69 struct v3d_job *job, 70 u64 bo_handles, 71 u32 bo_count) 72 { 73 job->bo_count = bo_count; 74 75 if (!job->bo_count) { 76 /* See comment on bo_index for why we have to check 77 * this. 78 */ 79 drm_warn(dev, "Rendering requires BOs\n"); 80 return -EINVAL; 81 } 82 83 return drm_gem_objects_lookup(file_priv, 84 (void __user *)(uintptr_t)bo_handles, 85 job->bo_count, &job->bo); 86 } 87 88 static void 89 v3d_job_free(struct kref *ref) 90 { 91 struct v3d_job *job = container_of(ref, struct v3d_job, refcount); 92 int i; 93 94 if (job->bo) { 95 for (i = 0; i < job->bo_count; i++) 96 drm_gem_object_put(job->bo[i]); 97 kvfree(job->bo); 98 } 99 100 dma_fence_put(job->irq_fence); 101 dma_fence_put(job->done_fence); 102 103 if (job->perfmon) 104 v3d_perfmon_put(job->perfmon); 105 106 v3d_stats_put(job->client_stats); 107 v3d_stats_put(job->global_stats); 108 109 kfree(job); 110 } 111 112 static void 113 v3d_render_job_free(struct kref *ref) 114 { 115 struct v3d_render_job *job = container_of(ref, struct v3d_render_job, 116 base.refcount); 117 struct v3d_bo *bo, *save; 118 119 list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) { 120 drm_gem_object_put(&bo->base.base); 121 } 122 123 v3d_job_free(ref); 124 } 125 126 void v3d_job_cleanup(struct v3d_job *job) 127 { 128 if (!job) 129 return; 130 131 drm_sched_job_cleanup(&job->base); 132 v3d_job_put(job); 133 } 134 135 void v3d_job_put(struct v3d_job *job) 136 { 137 if (!job) 138 return; 139 140 kref_put(&job->refcount, job->free); 141 } 142 143 static int 144 v3d_job_allocate(struct v3d_dev *v3d, void **container, size_t size) 145 { 146 *container = kcalloc(1, size, GFP_KERNEL); 147 if (!*container) { 148 drm_err(&v3d->drm, "Cannot allocate memory for V3D job.\n"); 149 return -ENOMEM; 150 } 151 152 return 0; 153 } 154 155 static void 156 v3d_job_deallocate(void **container) 157 { 158 kfree(*container); 159 *container = NULL; 160 } 161 162 static int 163 v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv, 164 struct v3d_job *job, void (*free)(struct kref *ref), 165 u32 in_sync, struct v3d_submit_ext *se, enum v3d_queue queue) 166 { 167 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 168 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 169 int ret, i; 170 171 job->v3d = v3d; 172 job->free = free; 173 job->file_priv = v3d_priv; 174 175 ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue], 176 1, v3d_priv, file_priv->client_id); 177 if (ret) 178 return ret; 179 180 if (has_multisync) { 181 if (se->in_sync_count && se->wait_stage == queue) { 182 struct drm_v3d_sem __user *handle = u64_to_user_ptr(se->in_syncs); 183 184 for (i = 0; i < se->in_sync_count; i++) { 185 struct drm_v3d_sem in; 186 187 if (copy_from_user(&in, handle++, sizeof(in))) { 188 ret = -EFAULT; 189 drm_dbg(&v3d->drm, "Failed to copy wait dep handle.\n"); 190 goto fail_deps; 191 } 192 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in.handle, 0); 193 194 // TODO: Investigate why this was filtered out for the IOCTL. 195 if (ret && ret != -ENOENT) 196 goto fail_deps; 197 } 198 } 199 } else { 200 ret = drm_sched_job_add_syncobj_dependency(&job->base, file_priv, in_sync, 0); 201 202 // TODO: Investigate why this was filtered out for the IOCTL. 203 if (ret && ret != -ENOENT) 204 goto fail_deps; 205 } 206 207 kref_init(&job->refcount); 208 209 job->client_stats = v3d_stats_get(v3d_priv->stats[queue]); 210 job->global_stats = v3d_stats_get(v3d->queue[queue].stats); 211 212 return 0; 213 214 fail_deps: 215 drm_sched_job_cleanup(&job->base); 216 return ret; 217 } 218 219 static void 220 v3d_push_job(struct v3d_job *job) 221 { 222 drm_sched_job_arm(&job->base); 223 224 job->done_fence = dma_fence_get(&job->base.s_fence->finished); 225 226 /* put by scheduler job completion */ 227 kref_get(&job->refcount); 228 229 drm_sched_entity_push_job(&job->base); 230 } 231 232 static void 233 v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv, 234 struct v3d_job *job, 235 struct ww_acquire_ctx *acquire_ctx, 236 u32 out_sync, 237 struct v3d_submit_ext *se, 238 struct dma_fence *done_fence) 239 { 240 struct drm_syncobj *sync_out; 241 bool has_multisync = se && (se->flags & DRM_V3D_EXT_ID_MULTI_SYNC); 242 int i; 243 244 for (i = 0; i < job->bo_count; i++) { 245 /* XXX: Use shared fences for read-only objects. */ 246 dma_resv_add_fence(job->bo[i]->resv, job->done_fence, 247 DMA_RESV_USAGE_WRITE); 248 } 249 250 drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx); 251 252 /* Update the return sync object for the job */ 253 /* If it only supports a single signal semaphore*/ 254 if (!has_multisync) { 255 sync_out = drm_syncobj_find(file_priv, out_sync); 256 if (sync_out) { 257 drm_syncobj_replace_fence(sync_out, done_fence); 258 drm_syncobj_put(sync_out); 259 } 260 return; 261 } 262 263 /* If multiple semaphores extension is supported */ 264 if (se->out_sync_count) { 265 for (i = 0; i < se->out_sync_count; i++) { 266 drm_syncobj_replace_fence(se->out_syncs[i].syncobj, 267 done_fence); 268 drm_syncobj_put(se->out_syncs[i].syncobj); 269 } 270 kvfree(se->out_syncs); 271 } 272 } 273 274 static int 275 v3d_setup_csd_jobs_and_bos(struct drm_file *file_priv, 276 struct v3d_dev *v3d, 277 struct drm_v3d_submit_csd *args, 278 struct v3d_csd_job **job, 279 struct v3d_job **clean_job, 280 struct v3d_submit_ext *se, 281 struct ww_acquire_ctx *acquire_ctx) 282 { 283 int ret; 284 285 ret = v3d_job_allocate(v3d, (void *)job, sizeof(**job)); 286 if (ret) 287 return ret; 288 289 ret = v3d_job_init(v3d, file_priv, &(*job)->base, 290 v3d_job_free, args->in_sync, se, V3D_CSD); 291 if (ret) { 292 v3d_job_deallocate((void *)job); 293 return ret; 294 } 295 296 ret = v3d_job_allocate(v3d, (void *)clean_job, sizeof(**clean_job)); 297 if (ret) 298 return ret; 299 300 ret = v3d_job_init(v3d, file_priv, *clean_job, 301 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 302 if (ret) { 303 v3d_job_deallocate((void *)clean_job); 304 return ret; 305 } 306 307 (*job)->args = *args; 308 309 ret = v3d_lookup_bos(&v3d->drm, file_priv, *clean_job, 310 args->bo_handles, args->bo_handle_count); 311 if (ret) 312 return ret; 313 314 return v3d_lock_bo_reservations(*clean_job, acquire_ctx); 315 } 316 317 static void 318 v3d_put_multisync_post_deps(struct v3d_submit_ext *se) 319 { 320 unsigned int i; 321 322 if (!(se && se->out_sync_count)) 323 return; 324 325 for (i = 0; i < se->out_sync_count; i++) 326 drm_syncobj_put(se->out_syncs[i].syncobj); 327 kvfree(se->out_syncs); 328 } 329 330 static int 331 v3d_get_multisync_post_deps(struct drm_file *file_priv, 332 struct v3d_submit_ext *se, 333 u32 count, u64 handles) 334 { 335 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 336 struct v3d_dev *v3d = v3d_priv->v3d; 337 struct drm_v3d_sem __user *post_deps; 338 int i, ret; 339 340 if (!count) 341 return 0; 342 343 se->out_syncs = (struct v3d_submit_outsync *) 344 kvmalloc_objs(struct v3d_submit_outsync, count); 345 if (!se->out_syncs) 346 return -ENOMEM; 347 348 post_deps = u64_to_user_ptr(handles); 349 350 for (i = 0; i < count; i++) { 351 struct drm_v3d_sem out; 352 353 if (copy_from_user(&out, post_deps++, sizeof(out))) { 354 ret = -EFAULT; 355 drm_dbg(&v3d->drm, "Failed to copy post dep handles\n"); 356 goto fail; 357 } 358 359 se->out_syncs[i].syncobj = drm_syncobj_find(file_priv, 360 out.handle); 361 if (!se->out_syncs[i].syncobj) { 362 ret = -EINVAL; 363 goto fail; 364 } 365 } 366 se->out_sync_count = count; 367 368 return 0; 369 370 fail: 371 for (i--; i >= 0; i--) 372 drm_syncobj_put(se->out_syncs[i].syncobj); 373 kvfree(se->out_syncs); 374 375 return ret; 376 } 377 378 /* Get data for multiple binary semaphores synchronization. Parse syncobj 379 * to be signaled when job completes (out_sync). 380 */ 381 static int 382 v3d_get_multisync_submit_deps(struct drm_file *file_priv, 383 struct drm_v3d_extension __user *ext, 384 struct v3d_submit_ext *se) 385 { 386 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 387 struct v3d_dev *v3d = v3d_priv->v3d; 388 struct drm_v3d_multi_sync multisync; 389 int ret; 390 391 if (se->in_sync_count || se->out_sync_count) { 392 drm_dbg(&v3d->drm, "Two multisync extensions were added to the same job."); 393 return -EINVAL; 394 } 395 396 if (copy_from_user(&multisync, ext, sizeof(multisync))) 397 return -EFAULT; 398 399 if (multisync.pad) 400 return -EINVAL; 401 402 if (!multisync.in_sync_count && !multisync.out_sync_count) { 403 drm_dbg(&v3d->drm, "Empty multisync extension\n"); 404 return -EINVAL; 405 } 406 407 ret = v3d_get_multisync_post_deps(file_priv, se, multisync.out_sync_count, 408 multisync.out_syncs); 409 if (ret) 410 return ret; 411 412 se->in_sync_count = multisync.in_sync_count; 413 se->in_syncs = multisync.in_syncs; 414 se->flags |= DRM_V3D_EXT_ID_MULTI_SYNC; 415 se->wait_stage = multisync.wait_stage; 416 417 return 0; 418 } 419 420 /* Returns false if the CPU job has an invalid configuration. */ 421 static bool 422 v3d_validate_cpu_job(struct drm_file *file_priv, struct v3d_cpu_job *job) 423 { 424 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 425 struct v3d_dev *v3d = v3d_priv->v3d; 426 427 if (!job) { 428 drm_dbg(&v3d->drm, "CPU job extension was attached to a GPU job.\n"); 429 return false; 430 } 431 432 if (job->job_type) { 433 drm_dbg(&v3d->drm, "Two CPU job extensions were added to the same CPU job.\n"); 434 return false; 435 } 436 437 return true; 438 } 439 440 /* Get data for the indirect CSD job submission. */ 441 static int 442 v3d_get_cpu_indirect_csd_params(struct drm_file *file_priv, 443 struct drm_v3d_extension __user *ext, 444 struct v3d_cpu_job *job) 445 { 446 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 447 struct v3d_dev *v3d = v3d_priv->v3d; 448 struct drm_v3d_indirect_csd indirect_csd; 449 struct v3d_indirect_csd_info *info = &job->indirect_csd; 450 451 if (!v3d_validate_cpu_job(file_priv, job)) 452 return -EINVAL; 453 454 if (copy_from_user(&indirect_csd, ext, sizeof(indirect_csd))) 455 return -EFAULT; 456 457 if (!v3d_has_csd(v3d)) { 458 drm_warn(&v3d->drm, "Attempting CSD submit on non-CSD hardware.\n"); 459 return -EINVAL; 460 } 461 462 job->job_type = V3D_CPU_JOB_TYPE_INDIRECT_CSD; 463 info->offset = indirect_csd.offset; 464 info->wg_size = indirect_csd.wg_size; 465 memcpy(&info->wg_uniform_offsets, &indirect_csd.wg_uniform_offsets, 466 sizeof(indirect_csd.wg_uniform_offsets)); 467 468 info->indirect = drm_gem_object_lookup(file_priv, indirect_csd.indirect); 469 470 return v3d_setup_csd_jobs_and_bos(file_priv, v3d, &indirect_csd.submit, 471 &info->job, &info->clean_job, 472 NULL, &info->acquire_ctx); 473 } 474 475 /* Get data for the query timestamp job submission. */ 476 static int 477 v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, 478 struct drm_v3d_extension __user *ext, 479 struct v3d_cpu_job *job) 480 { 481 u32 __user *offsets, *syncs; 482 struct drm_v3d_timestamp_query timestamp; 483 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 484 unsigned int i; 485 int err; 486 487 if (!v3d_validate_cpu_job(file_priv, job)) 488 return -EINVAL; 489 490 if (copy_from_user(×tamp, ext, sizeof(timestamp))) 491 return -EFAULT; 492 493 if (timestamp.pad) 494 return -EINVAL; 495 496 job->job_type = V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY; 497 498 query_info->queries = kvmalloc_objs(struct v3d_timestamp_query, 499 timestamp.count); 500 if (!query_info->queries) 501 return -ENOMEM; 502 503 offsets = u64_to_user_ptr(timestamp.offsets); 504 syncs = u64_to_user_ptr(timestamp.syncs); 505 506 for (i = 0; i < timestamp.count; i++) { 507 u32 offset, sync; 508 509 if (get_user(offset, offsets++)) { 510 err = -EFAULT; 511 goto error; 512 } 513 514 query_info->queries[i].offset = offset; 515 516 if (get_user(sync, syncs++)) { 517 err = -EFAULT; 518 goto error; 519 } 520 521 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 522 sync); 523 if (!query_info->queries[i].syncobj) { 524 err = -ENOENT; 525 goto error; 526 } 527 } 528 query_info->count = timestamp.count; 529 530 return 0; 531 532 error: 533 v3d_timestamp_query_info_free(&job->timestamp_query, i); 534 return err; 535 } 536 537 static int 538 v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, 539 struct drm_v3d_extension __user *ext, 540 struct v3d_cpu_job *job) 541 { 542 u32 __user *syncs; 543 struct drm_v3d_reset_timestamp_query reset; 544 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 545 unsigned int i; 546 int err; 547 548 if (!v3d_validate_cpu_job(file_priv, job)) 549 return -EINVAL; 550 551 if (copy_from_user(&reset, ext, sizeof(reset))) 552 return -EFAULT; 553 554 job->job_type = V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY; 555 556 query_info->queries = kvmalloc_objs(struct v3d_timestamp_query, 557 reset.count); 558 if (!query_info->queries) 559 return -ENOMEM; 560 561 syncs = u64_to_user_ptr(reset.syncs); 562 563 for (i = 0; i < reset.count; i++) { 564 u32 sync; 565 566 query_info->queries[i].offset = reset.offset + 8 * i; 567 568 if (get_user(sync, syncs++)) { 569 err = -EFAULT; 570 goto error; 571 } 572 573 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 574 sync); 575 if (!query_info->queries[i].syncobj) { 576 err = -ENOENT; 577 goto error; 578 } 579 } 580 query_info->count = reset.count; 581 582 return 0; 583 584 error: 585 v3d_timestamp_query_info_free(&job->timestamp_query, i); 586 return err; 587 } 588 589 /* Get data for the copy timestamp query results job submission. */ 590 static int 591 v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, 592 struct drm_v3d_extension __user *ext, 593 struct v3d_cpu_job *job) 594 { 595 u32 __user *offsets, *syncs; 596 struct drm_v3d_copy_timestamp_query copy; 597 struct v3d_timestamp_query_info *query_info = &job->timestamp_query; 598 unsigned int i; 599 int err; 600 601 if (!v3d_validate_cpu_job(file_priv, job)) 602 return -EINVAL; 603 604 if (copy_from_user(©, ext, sizeof(copy))) 605 return -EFAULT; 606 607 if (copy.pad) 608 return -EINVAL; 609 610 job->job_type = V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY; 611 612 query_info->queries = kvmalloc_objs(struct v3d_timestamp_query, 613 copy.count); 614 if (!query_info->queries) 615 return -ENOMEM; 616 617 offsets = u64_to_user_ptr(copy.offsets); 618 syncs = u64_to_user_ptr(copy.syncs); 619 620 for (i = 0; i < copy.count; i++) { 621 u32 offset, sync; 622 623 if (get_user(offset, offsets++)) { 624 err = -EFAULT; 625 goto error; 626 } 627 628 query_info->queries[i].offset = offset; 629 630 if (get_user(sync, syncs++)) { 631 err = -EFAULT; 632 goto error; 633 } 634 635 query_info->queries[i].syncobj = drm_syncobj_find(file_priv, 636 sync); 637 if (!query_info->queries[i].syncobj) { 638 err = -ENOENT; 639 goto error; 640 } 641 } 642 query_info->count = copy.count; 643 644 job->copy.do_64bit = copy.do_64bit; 645 job->copy.do_partial = copy.do_partial; 646 job->copy.availability_bit = copy.availability_bit; 647 job->copy.offset = copy.offset; 648 job->copy.stride = copy.stride; 649 650 return 0; 651 652 error: 653 v3d_timestamp_query_info_free(&job->timestamp_query, i); 654 return err; 655 } 656 657 static int 658 v3d_copy_query_info(struct v3d_performance_query_info *query_info, 659 unsigned int count, 660 unsigned int nperfmons, 661 u32 __user *syncs, 662 u64 __user *kperfmon_ids, 663 struct drm_file *file_priv) 664 { 665 unsigned int i, j; 666 int err; 667 668 for (i = 0; i < count; i++) { 669 struct v3d_performance_query *query = &query_info->queries[i]; 670 u32 __user *ids_pointer; 671 u32 sync, id; 672 u64 ids; 673 674 if (get_user(sync, syncs++)) { 675 err = -EFAULT; 676 goto error; 677 } 678 679 if (get_user(ids, kperfmon_ids++)) { 680 err = -EFAULT; 681 goto error; 682 } 683 684 query->kperfmon_ids = 685 kvmalloc_array(nperfmons, 686 sizeof(struct v3d_performance_query *), 687 GFP_KERNEL); 688 if (!query->kperfmon_ids) { 689 err = -ENOMEM; 690 goto error; 691 } 692 693 ids_pointer = u64_to_user_ptr(ids); 694 695 for (j = 0; j < nperfmons; j++) { 696 if (get_user(id, ids_pointer++)) { 697 kvfree(query->kperfmon_ids); 698 err = -EFAULT; 699 goto error; 700 } 701 702 query->kperfmon_ids[j] = id; 703 } 704 705 query->syncobj = drm_syncobj_find(file_priv, sync); 706 if (!query->syncobj) { 707 kvfree(query->kperfmon_ids); 708 err = -ENOENT; 709 goto error; 710 } 711 } 712 713 return 0; 714 715 error: 716 v3d_performance_query_info_free(query_info, i); 717 return err; 718 } 719 720 static int 721 v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, 722 struct drm_v3d_extension __user *ext, 723 struct v3d_cpu_job *job) 724 { 725 struct v3d_performance_query_info *query_info = &job->performance_query; 726 struct drm_v3d_reset_performance_query reset; 727 int err; 728 729 if (!v3d_validate_cpu_job(file_priv, job)) 730 return -EINVAL; 731 732 if (copy_from_user(&reset, ext, sizeof(reset))) 733 return -EFAULT; 734 735 job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; 736 737 query_info->queries = 738 kvmalloc_objs(struct v3d_performance_query, reset.count); 739 if (!query_info->queries) 740 return -ENOMEM; 741 742 err = v3d_copy_query_info(query_info, 743 reset.count, 744 reset.nperfmons, 745 u64_to_user_ptr(reset.syncs), 746 u64_to_user_ptr(reset.kperfmon_ids), 747 file_priv); 748 if (err) 749 return err; 750 751 query_info->count = reset.count; 752 query_info->nperfmons = reset.nperfmons; 753 754 return 0; 755 } 756 757 static int 758 v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, 759 struct drm_v3d_extension __user *ext, 760 struct v3d_cpu_job *job) 761 { 762 struct v3d_performance_query_info *query_info = &job->performance_query; 763 struct drm_v3d_copy_performance_query copy; 764 int err; 765 766 if (!v3d_validate_cpu_job(file_priv, job)) 767 return -EINVAL; 768 769 if (copy_from_user(©, ext, sizeof(copy))) 770 return -EFAULT; 771 772 if (copy.pad) 773 return -EINVAL; 774 775 job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; 776 777 query_info->queries = 778 kvmalloc_objs(struct v3d_performance_query, copy.count); 779 if (!query_info->queries) 780 return -ENOMEM; 781 782 err = v3d_copy_query_info(query_info, 783 copy.count, 784 copy.nperfmons, 785 u64_to_user_ptr(copy.syncs), 786 u64_to_user_ptr(copy.kperfmon_ids), 787 file_priv); 788 if (err) 789 return err; 790 791 query_info->count = copy.count; 792 query_info->nperfmons = copy.nperfmons; 793 query_info->ncounters = copy.ncounters; 794 795 job->copy.do_64bit = copy.do_64bit; 796 job->copy.do_partial = copy.do_partial; 797 job->copy.availability_bit = copy.availability_bit; 798 job->copy.offset = copy.offset; 799 job->copy.stride = copy.stride; 800 801 return 0; 802 } 803 804 /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data 805 * according to the extension id (name). 806 */ 807 static int 808 v3d_get_extensions(struct drm_file *file_priv, 809 u64 ext_handles, 810 struct v3d_submit_ext *se, 811 struct v3d_cpu_job *job) 812 { 813 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 814 struct v3d_dev *v3d = v3d_priv->v3d; 815 struct drm_v3d_extension __user *user_ext; 816 int ret; 817 818 user_ext = u64_to_user_ptr(ext_handles); 819 while (user_ext) { 820 struct drm_v3d_extension ext; 821 822 if (copy_from_user(&ext, user_ext, sizeof(ext))) { 823 drm_dbg(&v3d->drm, "Failed to copy submit extension\n"); 824 return -EFAULT; 825 } 826 827 switch (ext.id) { 828 case DRM_V3D_EXT_ID_MULTI_SYNC: 829 ret = v3d_get_multisync_submit_deps(file_priv, user_ext, se); 830 break; 831 case DRM_V3D_EXT_ID_CPU_INDIRECT_CSD: 832 ret = v3d_get_cpu_indirect_csd_params(file_priv, user_ext, job); 833 break; 834 case DRM_V3D_EXT_ID_CPU_TIMESTAMP_QUERY: 835 ret = v3d_get_cpu_timestamp_query_params(file_priv, user_ext, job); 836 break; 837 case DRM_V3D_EXT_ID_CPU_RESET_TIMESTAMP_QUERY: 838 ret = v3d_get_cpu_reset_timestamp_params(file_priv, user_ext, job); 839 break; 840 case DRM_V3D_EXT_ID_CPU_COPY_TIMESTAMP_QUERY: 841 ret = v3d_get_cpu_copy_query_results_params(file_priv, user_ext, job); 842 break; 843 case DRM_V3D_EXT_ID_CPU_RESET_PERFORMANCE_QUERY: 844 ret = v3d_get_cpu_reset_performance_params(file_priv, user_ext, job); 845 break; 846 case DRM_V3D_EXT_ID_CPU_COPY_PERFORMANCE_QUERY: 847 ret = v3d_get_cpu_copy_performance_query_params(file_priv, user_ext, job); 848 break; 849 default: 850 drm_dbg(&v3d->drm, "Unknown V3D extension ID: %d\n", ext.id); 851 return -EINVAL; 852 } 853 854 if (ret) 855 return ret; 856 857 user_ext = u64_to_user_ptr(ext.next); 858 } 859 860 return 0; 861 } 862 863 /** 864 * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 865 * @dev: DRM device 866 * @data: ioctl argument 867 * @file_priv: DRM file for this fd 868 * 869 * This is the main entrypoint for userspace to submit a 3D frame to 870 * the GPU. Userspace provides the binner command list (if 871 * applicable), and the kernel sets up the render command list to draw 872 * to the framebuffer described in the ioctl, using the command lists 873 * that the 3D engine's binner will produce. 874 */ 875 int 876 v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 877 struct drm_file *file_priv) 878 { 879 struct v3d_dev *v3d = to_v3d_dev(dev); 880 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 881 struct drm_v3d_submit_cl *args = data; 882 struct v3d_submit_ext se = {0}; 883 struct v3d_bin_job *bin = NULL; 884 struct v3d_render_job *render = NULL; 885 struct v3d_job *clean_job = NULL; 886 struct v3d_job *last_job; 887 struct ww_acquire_ctx acquire_ctx; 888 int ret = 0; 889 890 trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end); 891 892 if (args->pad) 893 return -EINVAL; 894 895 if (args->flags && 896 args->flags & ~(DRM_V3D_SUBMIT_CL_FLUSH_CACHE | 897 DRM_V3D_SUBMIT_EXTENSION)) { 898 drm_dbg(dev, "invalid flags: %d\n", args->flags); 899 return -EINVAL; 900 } 901 902 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 903 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 904 if (ret) { 905 drm_dbg(dev, "Failed to get extensions.\n"); 906 return ret; 907 } 908 } 909 910 ret = v3d_job_allocate(v3d, (void *)&render, sizeof(*render)); 911 if (ret) 912 return ret; 913 914 ret = v3d_job_init(v3d, file_priv, &render->base, 915 v3d_render_job_free, args->in_sync_rcl, &se, V3D_RENDER); 916 if (ret) { 917 v3d_job_deallocate((void *)&render); 918 goto fail; 919 } 920 921 render->start = args->rcl_start; 922 render->end = args->rcl_end; 923 INIT_LIST_HEAD(&render->unref_list); 924 925 if (args->bcl_start != args->bcl_end) { 926 ret = v3d_job_allocate(v3d, (void *)&bin, sizeof(*bin)); 927 if (ret) 928 goto fail; 929 930 ret = v3d_job_init(v3d, file_priv, &bin->base, 931 v3d_job_free, args->in_sync_bcl, &se, V3D_BIN); 932 if (ret) { 933 v3d_job_deallocate((void *)&bin); 934 goto fail; 935 } 936 937 bin->start = args->bcl_start; 938 bin->end = args->bcl_end; 939 bin->qma = args->qma; 940 bin->qms = args->qms; 941 bin->qts = args->qts; 942 bin->render = render; 943 } 944 945 if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE) { 946 ret = v3d_job_allocate(v3d, (void *)&clean_job, sizeof(*clean_job)); 947 if (ret) 948 goto fail; 949 950 ret = v3d_job_init(v3d, file_priv, clean_job, 951 v3d_job_free, 0, NULL, V3D_CACHE_CLEAN); 952 if (ret) { 953 v3d_job_deallocate((void *)&clean_job); 954 goto fail; 955 } 956 957 last_job = clean_job; 958 } else { 959 last_job = &render->base; 960 } 961 962 ret = v3d_lookup_bos(dev, file_priv, last_job, 963 args->bo_handles, args->bo_handle_count); 964 if (ret) 965 goto fail; 966 967 ret = v3d_lock_bo_reservations(last_job, &acquire_ctx); 968 if (ret) 969 goto fail; 970 971 if (args->perfmon_id) { 972 if (v3d->global_perfmon) { 973 ret = -EAGAIN; 974 goto fail_perfmon; 975 } 976 977 render->base.perfmon = v3d_perfmon_find(v3d_priv, 978 args->perfmon_id); 979 980 if (!render->base.perfmon) { 981 ret = -ENOENT; 982 goto fail_perfmon; 983 } 984 } 985 986 mutex_lock(&v3d->sched_lock); 987 if (bin) { 988 bin->base.perfmon = render->base.perfmon; 989 v3d_perfmon_get(bin->base.perfmon); 990 v3d_push_job(&bin->base); 991 992 ret = drm_sched_job_add_dependency(&render->base.base, 993 dma_fence_get(bin->base.done_fence)); 994 if (ret) 995 goto fail_unreserve; 996 } 997 998 v3d_push_job(&render->base); 999 1000 if (clean_job) { 1001 struct dma_fence *render_fence = 1002 dma_fence_get(render->base.done_fence); 1003 ret = drm_sched_job_add_dependency(&clean_job->base, 1004 render_fence); 1005 if (ret) 1006 goto fail_unreserve; 1007 clean_job->perfmon = render->base.perfmon; 1008 v3d_perfmon_get(clean_job->perfmon); 1009 v3d_push_job(clean_job); 1010 } 1011 1012 mutex_unlock(&v3d->sched_lock); 1013 1014 v3d_attach_fences_and_unlock_reservation(file_priv, 1015 last_job, 1016 &acquire_ctx, 1017 args->out_sync, 1018 &se, 1019 last_job->done_fence); 1020 1021 v3d_job_put(&bin->base); 1022 v3d_job_put(&render->base); 1023 v3d_job_put(clean_job); 1024 1025 return 0; 1026 1027 fail_unreserve: 1028 mutex_unlock(&v3d->sched_lock); 1029 fail_perfmon: 1030 drm_gem_unlock_reservations(last_job->bo, 1031 last_job->bo_count, &acquire_ctx); 1032 fail: 1033 v3d_job_cleanup((void *)bin); 1034 v3d_job_cleanup((void *)render); 1035 v3d_job_cleanup(clean_job); 1036 v3d_put_multisync_post_deps(&se); 1037 1038 return ret; 1039 } 1040 1041 /** 1042 * v3d_submit_tfu_ioctl() - Submits a TFU (texture formatting) job to the V3D. 1043 * @dev: DRM device 1044 * @data: ioctl argument 1045 * @file_priv: DRM file for this fd 1046 * 1047 * Userspace provides the register setup for the TFU, which we don't 1048 * need to validate since the TFU is behind the MMU. 1049 */ 1050 int 1051 v3d_submit_tfu_ioctl(struct drm_device *dev, void *data, 1052 struct drm_file *file_priv) 1053 { 1054 struct v3d_dev *v3d = to_v3d_dev(dev); 1055 struct drm_v3d_submit_tfu *args = data; 1056 struct v3d_submit_ext se = {0}; 1057 struct v3d_tfu_job *job = NULL; 1058 struct ww_acquire_ctx acquire_ctx; 1059 int ret = 0; 1060 1061 trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia); 1062 1063 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1064 drm_dbg(dev, "invalid flags: %d\n", args->flags); 1065 return -EINVAL; 1066 } 1067 1068 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1069 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1070 if (ret) { 1071 drm_dbg(dev, "Failed to get extensions.\n"); 1072 return ret; 1073 } 1074 } 1075 1076 ret = v3d_job_allocate(v3d, (void *)&job, sizeof(*job)); 1077 if (ret) 1078 return ret; 1079 1080 ret = v3d_job_init(v3d, file_priv, &job->base, 1081 v3d_job_free, args->in_sync, &se, V3D_TFU); 1082 if (ret) { 1083 v3d_job_deallocate((void *)&job); 1084 goto fail; 1085 } 1086 1087 job->base.bo = kzalloc_objs(*job->base.bo, ARRAY_SIZE(args->bo_handles)); 1088 if (!job->base.bo) { 1089 ret = -ENOMEM; 1090 goto fail; 1091 } 1092 1093 job->args = *args; 1094 1095 for (job->base.bo_count = 0; 1096 job->base.bo_count < ARRAY_SIZE(args->bo_handles); 1097 job->base.bo_count++) { 1098 struct drm_gem_object *bo; 1099 1100 if (!args->bo_handles[job->base.bo_count]) 1101 break; 1102 1103 bo = drm_gem_object_lookup(file_priv, args->bo_handles[job->base.bo_count]); 1104 if (!bo) { 1105 drm_dbg(dev, "Failed to look up GEM BO %d: %d\n", 1106 job->base.bo_count, 1107 args->bo_handles[job->base.bo_count]); 1108 ret = -ENOENT; 1109 goto fail; 1110 } 1111 job->base.bo[job->base.bo_count] = bo; 1112 } 1113 1114 ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); 1115 if (ret) 1116 goto fail; 1117 1118 mutex_lock(&v3d->sched_lock); 1119 v3d_push_job(&job->base); 1120 mutex_unlock(&v3d->sched_lock); 1121 1122 v3d_attach_fences_and_unlock_reservation(file_priv, 1123 &job->base, &acquire_ctx, 1124 args->out_sync, 1125 &se, 1126 job->base.done_fence); 1127 1128 v3d_job_put(&job->base); 1129 1130 return 0; 1131 1132 fail: 1133 v3d_job_cleanup((void *)job); 1134 v3d_put_multisync_post_deps(&se); 1135 1136 return ret; 1137 } 1138 1139 /** 1140 * v3d_submit_csd_ioctl() - Submits a CSD (compute shader) job to the V3D. 1141 * @dev: DRM device 1142 * @data: ioctl argument 1143 * @file_priv: DRM file for this fd 1144 * 1145 * Userspace provides the register setup for the CSD, which we don't 1146 * need to validate since the CSD is behind the MMU. 1147 */ 1148 int 1149 v3d_submit_csd_ioctl(struct drm_device *dev, void *data, 1150 struct drm_file *file_priv) 1151 { 1152 struct v3d_dev *v3d = to_v3d_dev(dev); 1153 struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 1154 struct drm_v3d_submit_csd *args = data; 1155 struct v3d_submit_ext se = {0}; 1156 struct v3d_csd_job *job = NULL; 1157 struct v3d_job *clean_job = NULL; 1158 struct ww_acquire_ctx acquire_ctx; 1159 int ret; 1160 1161 trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]); 1162 1163 if (args->pad) 1164 return -EINVAL; 1165 1166 if (!v3d_has_csd(v3d)) { 1167 drm_warn(dev, "Attempting CSD submit on non-CSD hardware\n"); 1168 return -EINVAL; 1169 } 1170 1171 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1172 drm_dbg(dev, "invalid flags: %d\n", args->flags); 1173 return -EINVAL; 1174 } 1175 1176 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1177 ret = v3d_get_extensions(file_priv, args->extensions, &se, NULL); 1178 if (ret) { 1179 drm_dbg(dev, "Failed to get extensions.\n"); 1180 return ret; 1181 } 1182 } 1183 1184 ret = v3d_setup_csd_jobs_and_bos(file_priv, v3d, args, 1185 &job, &clean_job, &se, 1186 &acquire_ctx); 1187 if (ret) 1188 goto fail; 1189 1190 if (args->perfmon_id) { 1191 if (v3d->global_perfmon) { 1192 ret = -EAGAIN; 1193 goto fail_perfmon; 1194 } 1195 1196 job->base.perfmon = v3d_perfmon_find(v3d_priv, 1197 args->perfmon_id); 1198 if (!job->base.perfmon) { 1199 ret = -ENOENT; 1200 goto fail_perfmon; 1201 } 1202 } 1203 1204 mutex_lock(&v3d->sched_lock); 1205 v3d_push_job(&job->base); 1206 1207 ret = drm_sched_job_add_dependency(&clean_job->base, 1208 dma_fence_get(job->base.done_fence)); 1209 if (ret) 1210 goto fail_unreserve; 1211 1212 v3d_push_job(clean_job); 1213 mutex_unlock(&v3d->sched_lock); 1214 1215 v3d_attach_fences_and_unlock_reservation(file_priv, 1216 clean_job, 1217 &acquire_ctx, 1218 args->out_sync, 1219 &se, 1220 clean_job->done_fence); 1221 1222 v3d_job_put(&job->base); 1223 v3d_job_put(clean_job); 1224 1225 return 0; 1226 1227 fail_unreserve: 1228 mutex_unlock(&v3d->sched_lock); 1229 fail_perfmon: 1230 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1231 &acquire_ctx); 1232 fail: 1233 v3d_job_cleanup((void *)job); 1234 v3d_job_cleanup(clean_job); 1235 v3d_put_multisync_post_deps(&se); 1236 1237 return ret; 1238 } 1239 1240 static const unsigned int cpu_job_bo_handle_count[] = { 1241 [V3D_CPU_JOB_TYPE_INDIRECT_CSD] = 1, 1242 [V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY] = 1, 1243 [V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY] = 1, 1244 [V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY] = 2, 1245 [V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY] = 0, 1246 [V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY] = 1, 1247 }; 1248 1249 /** 1250 * v3d_submit_cpu_ioctl() - Submits a CPU job to the V3D. 1251 * @dev: DRM device 1252 * @data: ioctl argument 1253 * @file_priv: DRM file for this fd 1254 * 1255 * Userspace specifies the CPU job type and data required to perform its 1256 * operations through the drm_v3d_extension struct. 1257 */ 1258 int 1259 v3d_submit_cpu_ioctl(struct drm_device *dev, void *data, 1260 struct drm_file *file_priv) 1261 { 1262 struct v3d_dev *v3d = to_v3d_dev(dev); 1263 struct drm_v3d_submit_cpu *args = data; 1264 struct v3d_submit_ext se = {0}; 1265 struct v3d_submit_ext *out_se = NULL; 1266 struct v3d_cpu_job *cpu_job = NULL; 1267 struct v3d_csd_job *csd_job = NULL; 1268 struct v3d_job *clean_job = NULL; 1269 struct ww_acquire_ctx acquire_ctx; 1270 int ret; 1271 1272 if (args->flags && !(args->flags & DRM_V3D_SUBMIT_EXTENSION)) { 1273 drm_dbg(dev, "Invalid flags: %d\n", args->flags); 1274 return -EINVAL; 1275 } 1276 1277 ret = v3d_job_allocate(v3d, (void *)&cpu_job, sizeof(*cpu_job)); 1278 if (ret) 1279 return ret; 1280 1281 if (args->flags & DRM_V3D_SUBMIT_EXTENSION) { 1282 ret = v3d_get_extensions(file_priv, args->extensions, &se, cpu_job); 1283 if (ret) { 1284 drm_dbg(dev, "Failed to get extensions.\n"); 1285 goto fail; 1286 } 1287 } 1288 1289 /* Every CPU job must have a CPU job user extension */ 1290 if (!cpu_job->job_type) { 1291 drm_dbg(dev, "CPU job must have a CPU job user extension.\n"); 1292 ret = -EINVAL; 1293 goto fail; 1294 } 1295 1296 if (args->bo_handle_count != cpu_job_bo_handle_count[cpu_job->job_type]) { 1297 drm_dbg(dev, "This CPU job was not submitted with the proper number of BOs.\n"); 1298 ret = -EINVAL; 1299 goto fail; 1300 } 1301 1302 trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type); 1303 1304 ret = v3d_job_init(v3d, file_priv, &cpu_job->base, 1305 v3d_job_free, 0, &se, V3D_CPU); 1306 if (ret) { 1307 v3d_job_deallocate((void *)&cpu_job); 1308 goto fail; 1309 } 1310 1311 clean_job = cpu_job->indirect_csd.clean_job; 1312 csd_job = cpu_job->indirect_csd.job; 1313 1314 if (args->bo_handle_count) { 1315 ret = v3d_lookup_bos(dev, file_priv, &cpu_job->base, 1316 args->bo_handles, args->bo_handle_count); 1317 if (ret) 1318 goto fail; 1319 1320 ret = v3d_lock_bo_reservations(&cpu_job->base, &acquire_ctx); 1321 if (ret) 1322 goto fail; 1323 } 1324 1325 mutex_lock(&v3d->sched_lock); 1326 v3d_push_job(&cpu_job->base); 1327 1328 switch (cpu_job->job_type) { 1329 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1330 ret = drm_sched_job_add_dependency(&csd_job->base.base, 1331 dma_fence_get(cpu_job->base.done_fence)); 1332 if (ret) 1333 goto fail_unreserve; 1334 1335 v3d_push_job(&csd_job->base); 1336 1337 ret = drm_sched_job_add_dependency(&clean_job->base, 1338 dma_fence_get(csd_job->base.done_fence)); 1339 if (ret) 1340 goto fail_unreserve; 1341 1342 v3d_push_job(clean_job); 1343 1344 break; 1345 default: 1346 break; 1347 } 1348 mutex_unlock(&v3d->sched_lock); 1349 1350 out_se = (cpu_job->job_type == V3D_CPU_JOB_TYPE_INDIRECT_CSD) ? NULL : &se; 1351 1352 v3d_attach_fences_and_unlock_reservation(file_priv, 1353 &cpu_job->base, 1354 &acquire_ctx, 0, 1355 out_se, cpu_job->base.done_fence); 1356 1357 switch (cpu_job->job_type) { 1358 case V3D_CPU_JOB_TYPE_INDIRECT_CSD: 1359 v3d_attach_fences_and_unlock_reservation(file_priv, 1360 clean_job, 1361 &cpu_job->indirect_csd.acquire_ctx, 1362 0, &se, clean_job->done_fence); 1363 break; 1364 default: 1365 break; 1366 } 1367 1368 v3d_job_put(&cpu_job->base); 1369 v3d_job_put(&csd_job->base); 1370 v3d_job_put(clean_job); 1371 1372 return 0; 1373 1374 fail_unreserve: 1375 mutex_unlock(&v3d->sched_lock); 1376 1377 drm_gem_unlock_reservations(cpu_job->base.bo, cpu_job->base.bo_count, 1378 &acquire_ctx); 1379 1380 drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count, 1381 &cpu_job->indirect_csd.acquire_ctx); 1382 1383 fail: 1384 v3d_job_cleanup((void *)cpu_job); 1385 v3d_job_cleanup((void *)csd_job); 1386 v3d_job_cleanup(clean_job); 1387 v3d_put_multisync_post_deps(&se); 1388 kvfree(cpu_job->timestamp_query.queries); 1389 kvfree(cpu_job->performance_query.queries); 1390 1391 return ret; 1392 } 1393