1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <linux/module.h> 25 #include <linux/platform_device.h> 26 #include <linux/pm_runtime.h> 27 #include <linux/device.h> 28 #include <linux/io.h> 29 30 #include "uapi/drm/vc4_drm.h" 31 #include "vc4_drv.h" 32 #include "vc4_regs.h" 33 #include "vc4_trace.h" 34 35 static void 36 vc4_queue_hangcheck(struct drm_device *dev) 37 { 38 struct vc4_dev *vc4 = to_vc4_dev(dev); 39 40 mod_timer(&vc4->hangcheck.timer, 41 round_jiffies_up(jiffies + msecs_to_jiffies(100))); 42 } 43 44 struct vc4_hang_state { 45 struct drm_vc4_get_hang_state user_state; 46 47 u32 bo_count; 48 struct drm_gem_object **bo; 49 }; 50 51 static void 52 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) 53 { 54 unsigned int i; 55 56 for (i = 0; i < state->user_state.bo_count; i++) 57 drm_gem_object_unreference_unlocked(state->bo[i]); 58 59 kfree(state); 60 } 61 62 int 63 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 64 struct drm_file *file_priv) 65 { 66 struct drm_vc4_get_hang_state *get_state = data; 67 struct drm_vc4_get_hang_state_bo *bo_state; 68 struct vc4_hang_state *kernel_state; 69 struct drm_vc4_get_hang_state *state; 70 struct vc4_dev *vc4 = to_vc4_dev(dev); 71 unsigned long irqflags; 72 u32 i; 73 int ret = 0; 74 75 spin_lock_irqsave(&vc4->job_lock, irqflags); 76 kernel_state = vc4->hang_state; 77 if (!kernel_state) { 78 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 79 return -ENOENT; 80 } 81 state = &kernel_state->user_state; 82 83 /* If the user's array isn't big enough, just return the 84 * required array size. 85 */ 86 if (get_state->bo_count < state->bo_count) { 87 get_state->bo_count = state->bo_count; 88 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 89 return 0; 90 } 91 92 vc4->hang_state = NULL; 93 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 94 95 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ 96 state->bo = get_state->bo; 97 memcpy(get_state, state, sizeof(*state)); 98 99 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); 100 if (!bo_state) { 101 ret = -ENOMEM; 102 goto err_free; 103 } 104 105 for (i = 0; i < state->bo_count; i++) { 106 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); 107 u32 handle; 108 109 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], 110 &handle); 111 112 if (ret) { 113 state->bo_count = i - 1; 114 goto err; 115 } 116 bo_state[i].handle = handle; 117 bo_state[i].paddr = vc4_bo->base.paddr; 118 bo_state[i].size = vc4_bo->base.base.size; 119 } 120 121 if (copy_to_user((void __user *)(uintptr_t)get_state->bo, 122 bo_state, 123 state->bo_count * sizeof(*bo_state))) 124 ret = -EFAULT; 125 126 kfree(bo_state); 127 128 err_free: 129 130 vc4_free_hang_state(dev, kernel_state); 131 132 err: 133 return ret; 134 } 135 136 static void 137 vc4_save_hang_state(struct drm_device *dev) 138 { 139 struct vc4_dev *vc4 = to_vc4_dev(dev); 140 struct drm_vc4_get_hang_state *state; 141 struct vc4_hang_state *kernel_state; 142 struct vc4_exec_info *exec[2]; 143 struct vc4_bo *bo; 144 unsigned long irqflags; 145 unsigned int i, j, unref_list_count, prev_idx; 146 147 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); 148 if (!kernel_state) 149 return; 150 151 state = &kernel_state->user_state; 152 153 spin_lock_irqsave(&vc4->job_lock, irqflags); 154 exec[0] = vc4_first_bin_job(vc4); 155 exec[1] = vc4_first_render_job(vc4); 156 if (!exec[0] && !exec[1]) { 157 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 158 return; 159 } 160 161 /* Get the bos from both binner and renderer into hang state. */ 162 state->bo_count = 0; 163 for (i = 0; i < 2; i++) { 164 if (!exec[i]) 165 continue; 166 167 unref_list_count = 0; 168 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) 169 unref_list_count++; 170 state->bo_count += exec[i]->bo_count + unref_list_count; 171 } 172 173 kernel_state->bo = kcalloc(state->bo_count, 174 sizeof(*kernel_state->bo), GFP_ATOMIC); 175 176 if (!kernel_state->bo) { 177 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 178 return; 179 } 180 181 prev_idx = 0; 182 for (i = 0; i < 2; i++) { 183 if (!exec[i]) 184 continue; 185 186 for (j = 0; j < exec[i]->bo_count; j++) { 187 drm_gem_object_reference(&exec[i]->bo[j]->base); 188 kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; 189 } 190 191 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { 192 drm_gem_object_reference(&bo->base.base); 193 kernel_state->bo[j + prev_idx] = &bo->base.base; 194 j++; 195 } 196 prev_idx = j + 1; 197 } 198 199 if (exec[0]) 200 state->start_bin = exec[0]->ct0ca; 201 if (exec[1]) 202 state->start_render = exec[1]->ct1ca; 203 204 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 205 206 state->ct0ca = V3D_READ(V3D_CTNCA(0)); 207 state->ct0ea = V3D_READ(V3D_CTNEA(0)); 208 209 state->ct1ca = V3D_READ(V3D_CTNCA(1)); 210 state->ct1ea = V3D_READ(V3D_CTNEA(1)); 211 212 state->ct0cs = V3D_READ(V3D_CTNCS(0)); 213 state->ct1cs = V3D_READ(V3D_CTNCS(1)); 214 215 state->ct0ra0 = V3D_READ(V3D_CT00RA0); 216 state->ct1ra0 = V3D_READ(V3D_CT01RA0); 217 218 state->bpca = V3D_READ(V3D_BPCA); 219 state->bpcs = V3D_READ(V3D_BPCS); 220 state->bpoa = V3D_READ(V3D_BPOA); 221 state->bpos = V3D_READ(V3D_BPOS); 222 223 state->vpmbase = V3D_READ(V3D_VPMBASE); 224 225 state->dbge = V3D_READ(V3D_DBGE); 226 state->fdbgo = V3D_READ(V3D_FDBGO); 227 state->fdbgb = V3D_READ(V3D_FDBGB); 228 state->fdbgr = V3D_READ(V3D_FDBGR); 229 state->fdbgs = V3D_READ(V3D_FDBGS); 230 state->errstat = V3D_READ(V3D_ERRSTAT); 231 232 spin_lock_irqsave(&vc4->job_lock, irqflags); 233 if (vc4->hang_state) { 234 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 235 vc4_free_hang_state(dev, kernel_state); 236 } else { 237 vc4->hang_state = kernel_state; 238 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 239 } 240 } 241 242 static void 243 vc4_reset(struct drm_device *dev) 244 { 245 struct vc4_dev *vc4 = to_vc4_dev(dev); 246 247 DRM_INFO("Resetting GPU.\n"); 248 249 mutex_lock(&vc4->power_lock); 250 if (vc4->power_refcount) { 251 /* Power the device off and back on the by dropping the 252 * reference on runtime PM. 253 */ 254 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); 255 pm_runtime_get_sync(&vc4->v3d->pdev->dev); 256 } 257 mutex_unlock(&vc4->power_lock); 258 259 vc4_irq_reset(dev); 260 261 /* Rearm the hangcheck -- another job might have been waiting 262 * for our hung one to get kicked off, and vc4_irq_reset() 263 * would have started it. 264 */ 265 vc4_queue_hangcheck(dev); 266 } 267 268 static void 269 vc4_reset_work(struct work_struct *work) 270 { 271 struct vc4_dev *vc4 = 272 container_of(work, struct vc4_dev, hangcheck.reset_work); 273 274 vc4_save_hang_state(vc4->dev); 275 276 vc4_reset(vc4->dev); 277 } 278 279 static void 280 vc4_hangcheck_elapsed(unsigned long data) 281 { 282 struct drm_device *dev = (struct drm_device *)data; 283 struct vc4_dev *vc4 = to_vc4_dev(dev); 284 uint32_t ct0ca, ct1ca; 285 unsigned long irqflags; 286 struct vc4_exec_info *bin_exec, *render_exec; 287 288 spin_lock_irqsave(&vc4->job_lock, irqflags); 289 290 bin_exec = vc4_first_bin_job(vc4); 291 render_exec = vc4_first_render_job(vc4); 292 293 /* If idle, we can stop watching for hangs. */ 294 if (!bin_exec && !render_exec) { 295 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 296 return; 297 } 298 299 ct0ca = V3D_READ(V3D_CTNCA(0)); 300 ct1ca = V3D_READ(V3D_CTNCA(1)); 301 302 /* If we've made any progress in execution, rearm the timer 303 * and wait. 304 */ 305 if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || 306 (render_exec && ct1ca != render_exec->last_ct1ca)) { 307 if (bin_exec) 308 bin_exec->last_ct0ca = ct0ca; 309 if (render_exec) 310 render_exec->last_ct1ca = ct1ca; 311 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 312 vc4_queue_hangcheck(dev); 313 return; 314 } 315 316 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 317 318 /* We've gone too long with no progress, reset. This has to 319 * be done from a work struct, since resetting can sleep and 320 * this timer hook isn't allowed to. 321 */ 322 schedule_work(&vc4->hangcheck.reset_work); 323 } 324 325 static void 326 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) 327 { 328 struct vc4_dev *vc4 = to_vc4_dev(dev); 329 330 /* Set the current and end address of the control list. 331 * Writing the end register is what starts the job. 332 */ 333 V3D_WRITE(V3D_CTNCA(thread), start); 334 V3D_WRITE(V3D_CTNEA(thread), end); 335 } 336 337 int 338 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, 339 bool interruptible) 340 { 341 struct vc4_dev *vc4 = to_vc4_dev(dev); 342 int ret = 0; 343 unsigned long timeout_expire; 344 DEFINE_WAIT(wait); 345 346 if (vc4->finished_seqno >= seqno) 347 return 0; 348 349 if (timeout_ns == 0) 350 return -ETIME; 351 352 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); 353 354 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); 355 for (;;) { 356 prepare_to_wait(&vc4->job_wait_queue, &wait, 357 interruptible ? TASK_INTERRUPTIBLE : 358 TASK_UNINTERRUPTIBLE); 359 360 if (interruptible && signal_pending(current)) { 361 ret = -ERESTARTSYS; 362 break; 363 } 364 365 if (vc4->finished_seqno >= seqno) 366 break; 367 368 if (timeout_ns != ~0ull) { 369 if (time_after_eq(jiffies, timeout_expire)) { 370 ret = -ETIME; 371 break; 372 } 373 schedule_timeout(timeout_expire - jiffies); 374 } else { 375 schedule(); 376 } 377 } 378 379 finish_wait(&vc4->job_wait_queue, &wait); 380 trace_vc4_wait_for_seqno_end(dev, seqno); 381 382 return ret; 383 } 384 385 static void 386 vc4_flush_caches(struct drm_device *dev) 387 { 388 struct vc4_dev *vc4 = to_vc4_dev(dev); 389 390 /* Flush the GPU L2 caches. These caches sit on top of system 391 * L3 (the 128kb or so shared with the CPU), and are 392 * non-allocating in the L3. 393 */ 394 V3D_WRITE(V3D_L2CACTL, 395 V3D_L2CACTL_L2CCLR); 396 397 V3D_WRITE(V3D_SLCACTL, 398 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 399 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | 400 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 401 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 402 } 403 404 /* Sets the registers for the next job to be actually be executed in 405 * the hardware. 406 * 407 * The job_lock should be held during this. 408 */ 409 void 410 vc4_submit_next_bin_job(struct drm_device *dev) 411 { 412 struct vc4_dev *vc4 = to_vc4_dev(dev); 413 struct vc4_exec_info *exec; 414 415 again: 416 exec = vc4_first_bin_job(vc4); 417 if (!exec) 418 return; 419 420 vc4_flush_caches(dev); 421 422 /* Either put the job in the binner if it uses the binner, or 423 * immediately move it to the to-be-rendered queue. 424 */ 425 if (exec->ct0ca != exec->ct0ea) { 426 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 427 } else { 428 vc4_move_job_to_render(dev, exec); 429 goto again; 430 } 431 } 432 433 void 434 vc4_submit_next_render_job(struct drm_device *dev) 435 { 436 struct vc4_dev *vc4 = to_vc4_dev(dev); 437 struct vc4_exec_info *exec = vc4_first_render_job(vc4); 438 439 if (!exec) 440 return; 441 442 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 443 } 444 445 void 446 vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) 447 { 448 struct vc4_dev *vc4 = to_vc4_dev(dev); 449 bool was_empty = list_empty(&vc4->render_job_list); 450 451 list_move_tail(&exec->head, &vc4->render_job_list); 452 if (was_empty) 453 vc4_submit_next_render_job(dev); 454 } 455 456 static void 457 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 458 { 459 struct vc4_bo *bo; 460 unsigned i; 461 462 for (i = 0; i < exec->bo_count; i++) { 463 bo = to_vc4_bo(&exec->bo[i]->base); 464 bo->seqno = seqno; 465 } 466 467 list_for_each_entry(bo, &exec->unref_list, unref_head) { 468 bo->seqno = seqno; 469 } 470 471 for (i = 0; i < exec->rcl_write_bo_count; i++) { 472 bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); 473 bo->write_seqno = seqno; 474 } 475 } 476 477 /* Queues a struct vc4_exec_info for execution. If no job is 478 * currently executing, then submits it. 479 * 480 * Unlike most GPUs, our hardware only handles one command list at a 481 * time. To queue multiple jobs at once, we'd need to edit the 482 * previous command list to have a jump to the new one at the end, and 483 * then bump the end address. That's a change for a later date, 484 * though. 485 */ 486 static void 487 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 488 { 489 struct vc4_dev *vc4 = to_vc4_dev(dev); 490 uint64_t seqno; 491 unsigned long irqflags; 492 493 spin_lock_irqsave(&vc4->job_lock, irqflags); 494 495 seqno = ++vc4->emit_seqno; 496 exec->seqno = seqno; 497 vc4_update_bo_seqnos(exec, seqno); 498 499 list_add_tail(&exec->head, &vc4->bin_job_list); 500 501 /* If no job was executing, kick ours off. Otherwise, it'll 502 * get started when the previous job's flush done interrupt 503 * occurs. 504 */ 505 if (vc4_first_bin_job(vc4) == exec) { 506 vc4_submit_next_bin_job(dev); 507 vc4_queue_hangcheck(dev); 508 } 509 510 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 511 } 512 513 /** 514 * Looks up a bunch of GEM handles for BOs and stores the array for 515 * use in the command validator that actually writes relocated 516 * addresses pointing to them. 517 */ 518 static int 519 vc4_cl_lookup_bos(struct drm_device *dev, 520 struct drm_file *file_priv, 521 struct vc4_exec_info *exec) 522 { 523 struct drm_vc4_submit_cl *args = exec->args; 524 uint32_t *handles; 525 int ret = 0; 526 int i; 527 528 exec->bo_count = args->bo_handle_count; 529 530 if (!exec->bo_count) { 531 /* See comment on bo_index for why we have to check 532 * this. 533 */ 534 DRM_ERROR("Rendering requires BOs to validate\n"); 535 return -EINVAL; 536 } 537 538 exec->bo = drm_calloc_large(exec->bo_count, 539 sizeof(struct drm_gem_cma_object *)); 540 if (!exec->bo) { 541 DRM_ERROR("Failed to allocate validated BO pointers\n"); 542 return -ENOMEM; 543 } 544 545 handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); 546 if (!handles) { 547 ret = -ENOMEM; 548 DRM_ERROR("Failed to allocate incoming GEM handles\n"); 549 goto fail; 550 } 551 552 if (copy_from_user(handles, 553 (void __user *)(uintptr_t)args->bo_handles, 554 exec->bo_count * sizeof(uint32_t))) { 555 ret = -EFAULT; 556 DRM_ERROR("Failed to copy in GEM handles\n"); 557 goto fail; 558 } 559 560 spin_lock(&file_priv->table_lock); 561 for (i = 0; i < exec->bo_count; i++) { 562 struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 563 handles[i]); 564 if (!bo) { 565 DRM_ERROR("Failed to look up GEM BO %d: %d\n", 566 i, handles[i]); 567 ret = -EINVAL; 568 spin_unlock(&file_priv->table_lock); 569 goto fail; 570 } 571 drm_gem_object_reference(bo); 572 exec->bo[i] = (struct drm_gem_cma_object *)bo; 573 } 574 spin_unlock(&file_priv->table_lock); 575 576 fail: 577 drm_free_large(handles); 578 return ret; 579 } 580 581 static int 582 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) 583 { 584 struct drm_vc4_submit_cl *args = exec->args; 585 void *temp = NULL; 586 void *bin; 587 int ret = 0; 588 uint32_t bin_offset = 0; 589 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, 590 16); 591 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; 592 uint32_t exec_size = uniforms_offset + args->uniforms_size; 593 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * 594 args->shader_rec_count); 595 struct vc4_bo *bo; 596 597 if (shader_rec_offset < args->bin_cl_size || 598 uniforms_offset < shader_rec_offset || 599 exec_size < uniforms_offset || 600 args->shader_rec_count >= (UINT_MAX / 601 sizeof(struct vc4_shader_state)) || 602 temp_size < exec_size) { 603 DRM_ERROR("overflow in exec arguments\n"); 604 ret = -EINVAL; 605 goto fail; 606 } 607 608 /* Allocate space where we'll store the copied in user command lists 609 * and shader records. 610 * 611 * We don't just copy directly into the BOs because we need to 612 * read the contents back for validation, and I think the 613 * bo->vaddr is uncached access. 614 */ 615 temp = drm_malloc_ab(temp_size, 1); 616 if (!temp) { 617 DRM_ERROR("Failed to allocate storage for copying " 618 "in bin/render CLs.\n"); 619 ret = -ENOMEM; 620 goto fail; 621 } 622 bin = temp + bin_offset; 623 exec->shader_rec_u = temp + shader_rec_offset; 624 exec->uniforms_u = temp + uniforms_offset; 625 exec->shader_state = temp + exec_size; 626 exec->shader_state_size = args->shader_rec_count; 627 628 if (copy_from_user(bin, 629 (void __user *)(uintptr_t)args->bin_cl, 630 args->bin_cl_size)) { 631 ret = -EFAULT; 632 goto fail; 633 } 634 635 if (copy_from_user(exec->shader_rec_u, 636 (void __user *)(uintptr_t)args->shader_rec, 637 args->shader_rec_size)) { 638 ret = -EFAULT; 639 goto fail; 640 } 641 642 if (copy_from_user(exec->uniforms_u, 643 (void __user *)(uintptr_t)args->uniforms, 644 args->uniforms_size)) { 645 ret = -EFAULT; 646 goto fail; 647 } 648 649 bo = vc4_bo_create(dev, exec_size, true); 650 if (IS_ERR(bo)) { 651 DRM_ERROR("Couldn't allocate BO for binning\n"); 652 ret = PTR_ERR(bo); 653 goto fail; 654 } 655 exec->exec_bo = &bo->base; 656 657 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, 658 &exec->unref_list); 659 660 exec->ct0ca = exec->exec_bo->paddr + bin_offset; 661 662 exec->bin_u = bin; 663 664 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; 665 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; 666 exec->shader_rec_size = args->shader_rec_size; 667 668 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; 669 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; 670 exec->uniforms_size = args->uniforms_size; 671 672 ret = vc4_validate_bin_cl(dev, 673 exec->exec_bo->vaddr + bin_offset, 674 bin, 675 exec); 676 if (ret) 677 goto fail; 678 679 ret = vc4_validate_shader_recs(dev, exec); 680 if (ret) 681 goto fail; 682 683 /* Block waiting on any previous rendering into the CS's VBO, 684 * IB, or textures, so that pixels are actually written by the 685 * time we try to read them. 686 */ 687 ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); 688 689 fail: 690 drm_free_large(temp); 691 return ret; 692 } 693 694 static void 695 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 696 { 697 struct vc4_dev *vc4 = to_vc4_dev(dev); 698 unsigned i; 699 700 if (exec->bo) { 701 for (i = 0; i < exec->bo_count; i++) 702 drm_gem_object_unreference_unlocked(&exec->bo[i]->base); 703 drm_free_large(exec->bo); 704 } 705 706 while (!list_empty(&exec->unref_list)) { 707 struct vc4_bo *bo = list_first_entry(&exec->unref_list, 708 struct vc4_bo, unref_head); 709 list_del(&bo->unref_head); 710 drm_gem_object_unreference_unlocked(&bo->base.base); 711 } 712 713 mutex_lock(&vc4->power_lock); 714 if (--vc4->power_refcount == 0) { 715 pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); 716 pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); 717 } 718 mutex_unlock(&vc4->power_lock); 719 720 kfree(exec); 721 } 722 723 void 724 vc4_job_handle_completed(struct vc4_dev *vc4) 725 { 726 unsigned long irqflags; 727 struct vc4_seqno_cb *cb, *cb_temp; 728 729 spin_lock_irqsave(&vc4->job_lock, irqflags); 730 while (!list_empty(&vc4->job_done_list)) { 731 struct vc4_exec_info *exec = 732 list_first_entry(&vc4->job_done_list, 733 struct vc4_exec_info, head); 734 list_del(&exec->head); 735 736 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 737 vc4_complete_exec(vc4->dev, exec); 738 spin_lock_irqsave(&vc4->job_lock, irqflags); 739 } 740 741 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { 742 if (cb->seqno <= vc4->finished_seqno) { 743 list_del_init(&cb->work.entry); 744 schedule_work(&cb->work); 745 } 746 } 747 748 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 749 } 750 751 static void vc4_seqno_cb_work(struct work_struct *work) 752 { 753 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); 754 755 cb->func(cb); 756 } 757 758 int vc4_queue_seqno_cb(struct drm_device *dev, 759 struct vc4_seqno_cb *cb, uint64_t seqno, 760 void (*func)(struct vc4_seqno_cb *cb)) 761 { 762 struct vc4_dev *vc4 = to_vc4_dev(dev); 763 int ret = 0; 764 unsigned long irqflags; 765 766 cb->func = func; 767 INIT_WORK(&cb->work, vc4_seqno_cb_work); 768 769 spin_lock_irqsave(&vc4->job_lock, irqflags); 770 if (seqno > vc4->finished_seqno) { 771 cb->seqno = seqno; 772 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); 773 } else { 774 schedule_work(&cb->work); 775 } 776 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 777 778 return ret; 779 } 780 781 /* Scheduled when any job has been completed, this walks the list of 782 * jobs that had completed and unrefs their BOs and frees their exec 783 * structs. 784 */ 785 static void 786 vc4_job_done_work(struct work_struct *work) 787 { 788 struct vc4_dev *vc4 = 789 container_of(work, struct vc4_dev, job_done_work); 790 791 vc4_job_handle_completed(vc4); 792 } 793 794 static int 795 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, 796 uint64_t seqno, 797 uint64_t *timeout_ns) 798 { 799 unsigned long start = jiffies; 800 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); 801 802 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { 803 uint64_t delta = jiffies_to_nsecs(jiffies - start); 804 805 if (*timeout_ns >= delta) 806 *timeout_ns -= delta; 807 } 808 809 return ret; 810 } 811 812 int 813 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 814 struct drm_file *file_priv) 815 { 816 struct drm_vc4_wait_seqno *args = data; 817 818 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, 819 &args->timeout_ns); 820 } 821 822 int 823 vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 824 struct drm_file *file_priv) 825 { 826 int ret; 827 struct drm_vc4_wait_bo *args = data; 828 struct drm_gem_object *gem_obj; 829 struct vc4_bo *bo; 830 831 if (args->pad != 0) 832 return -EINVAL; 833 834 gem_obj = drm_gem_object_lookup(file_priv, args->handle); 835 if (!gem_obj) { 836 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); 837 return -EINVAL; 838 } 839 bo = to_vc4_bo(gem_obj); 840 841 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, 842 &args->timeout_ns); 843 844 drm_gem_object_unreference_unlocked(gem_obj); 845 return ret; 846 } 847 848 /** 849 * Submits a command list to the VC4. 850 * 851 * This is what is called batchbuffer emitting on other hardware. 852 */ 853 int 854 vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 855 struct drm_file *file_priv) 856 { 857 struct vc4_dev *vc4 = to_vc4_dev(dev); 858 struct drm_vc4_submit_cl *args = data; 859 struct vc4_exec_info *exec; 860 int ret = 0; 861 862 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 863 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); 864 return -EINVAL; 865 } 866 867 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 868 if (!exec) { 869 DRM_ERROR("malloc failure on exec struct\n"); 870 return -ENOMEM; 871 } 872 873 mutex_lock(&vc4->power_lock); 874 if (vc4->power_refcount++ == 0) 875 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 876 mutex_unlock(&vc4->power_lock); 877 if (ret < 0) { 878 kfree(exec); 879 return ret; 880 } 881 882 exec->args = args; 883 INIT_LIST_HEAD(&exec->unref_list); 884 885 ret = vc4_cl_lookup_bos(dev, file_priv, exec); 886 if (ret) 887 goto fail; 888 889 if (exec->args->bin_cl_size != 0) { 890 ret = vc4_get_bcl(dev, exec); 891 if (ret) 892 goto fail; 893 } else { 894 exec->ct0ca = 0; 895 exec->ct0ea = 0; 896 } 897 898 ret = vc4_get_rcl(dev, exec); 899 if (ret) 900 goto fail; 901 902 /* Clear this out of the struct we'll be putting in the queue, 903 * since it's part of our stack. 904 */ 905 exec->args = NULL; 906 907 vc4_queue_submit(dev, exec); 908 909 /* Return the seqno for our job. */ 910 args->seqno = vc4->emit_seqno; 911 912 return 0; 913 914 fail: 915 vc4_complete_exec(vc4->dev, exec); 916 917 return ret; 918 } 919 920 void 921 vc4_gem_init(struct drm_device *dev) 922 { 923 struct vc4_dev *vc4 = to_vc4_dev(dev); 924 925 INIT_LIST_HEAD(&vc4->bin_job_list); 926 INIT_LIST_HEAD(&vc4->render_job_list); 927 INIT_LIST_HEAD(&vc4->job_done_list); 928 INIT_LIST_HEAD(&vc4->seqno_cb_list); 929 spin_lock_init(&vc4->job_lock); 930 931 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); 932 setup_timer(&vc4->hangcheck.timer, 933 vc4_hangcheck_elapsed, 934 (unsigned long)dev); 935 936 INIT_WORK(&vc4->job_done_work, vc4_job_done_work); 937 938 mutex_init(&vc4->power_lock); 939 } 940 941 void 942 vc4_gem_destroy(struct drm_device *dev) 943 { 944 struct vc4_dev *vc4 = to_vc4_dev(dev); 945 946 /* Waiting for exec to finish would need to be done before 947 * unregistering V3D. 948 */ 949 WARN_ON(vc4->emit_seqno != vc4->finished_seqno); 950 951 /* V3D should already have disabled its interrupt and cleared 952 * the overflow allocation registers. Now free the object. 953 */ 954 if (vc4->overflow_mem) { 955 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 956 vc4->overflow_mem = NULL; 957 } 958 959 if (vc4->hang_state) 960 vc4_free_hang_state(dev, vc4->hang_state); 961 962 vc4_bo_cache_destroy(dev); 963 } 964