1 /* 2 * Copyright © 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <linux/module.h> 25 #include <linux/platform_device.h> 26 #include <linux/pm_runtime.h> 27 #include <linux/device.h> 28 #include <linux/io.h> 29 30 #include "uapi/drm/vc4_drm.h" 31 #include "vc4_drv.h" 32 #include "vc4_regs.h" 33 #include "vc4_trace.h" 34 35 static void 36 vc4_queue_hangcheck(struct drm_device *dev) 37 { 38 struct vc4_dev *vc4 = to_vc4_dev(dev); 39 40 mod_timer(&vc4->hangcheck.timer, 41 round_jiffies_up(jiffies + msecs_to_jiffies(100))); 42 } 43 44 struct vc4_hang_state { 45 struct drm_vc4_get_hang_state user_state; 46 47 u32 bo_count; 48 struct drm_gem_object **bo; 49 }; 50 51 static void 52 vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) 53 { 54 unsigned int i; 55 56 mutex_lock(&dev->struct_mutex); 57 for (i = 0; i < state->user_state.bo_count; i++) 58 drm_gem_object_unreference(state->bo[i]); 59 mutex_unlock(&dev->struct_mutex); 60 61 kfree(state); 62 } 63 64 int 65 vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, 66 struct drm_file *file_priv) 67 { 68 struct drm_vc4_get_hang_state *get_state = data; 69 struct drm_vc4_get_hang_state_bo *bo_state; 70 struct vc4_hang_state *kernel_state; 71 struct drm_vc4_get_hang_state *state; 72 struct vc4_dev *vc4 = to_vc4_dev(dev); 73 unsigned long irqflags; 74 u32 i; 75 int ret = 0; 76 77 spin_lock_irqsave(&vc4->job_lock, irqflags); 78 kernel_state = vc4->hang_state; 79 if (!kernel_state) { 80 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 81 return -ENOENT; 82 } 83 state = &kernel_state->user_state; 84 85 /* If the user's array isn't big enough, just return the 86 * required array size. 87 */ 88 if (get_state->bo_count < state->bo_count) { 89 get_state->bo_count = state->bo_count; 90 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 91 return 0; 92 } 93 94 vc4->hang_state = NULL; 95 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 96 97 /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ 98 state->bo = get_state->bo; 99 memcpy(get_state, state, sizeof(*state)); 100 101 bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); 102 if (!bo_state) { 103 ret = -ENOMEM; 104 goto err_free; 105 } 106 107 for (i = 0; i < state->bo_count; i++) { 108 struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); 109 u32 handle; 110 111 ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], 112 &handle); 113 114 if (ret) { 115 state->bo_count = i - 1; 116 goto err; 117 } 118 bo_state[i].handle = handle; 119 bo_state[i].paddr = vc4_bo->base.paddr; 120 bo_state[i].size = vc4_bo->base.base.size; 121 } 122 123 if (copy_to_user((void __user *)(uintptr_t)get_state->bo, 124 bo_state, 125 state->bo_count * sizeof(*bo_state))) 126 ret = -EFAULT; 127 128 kfree(bo_state); 129 130 err_free: 131 132 vc4_free_hang_state(dev, kernel_state); 133 134 err: 135 return ret; 136 } 137 138 static void 139 vc4_save_hang_state(struct drm_device *dev) 140 { 141 struct vc4_dev *vc4 = to_vc4_dev(dev); 142 struct drm_vc4_get_hang_state *state; 143 struct vc4_hang_state *kernel_state; 144 struct vc4_exec_info *exec[2]; 145 struct vc4_bo *bo; 146 unsigned long irqflags; 147 unsigned int i, j, unref_list_count, prev_idx; 148 149 kernel_state = kcalloc(1, sizeof(*kernel_state), GFP_KERNEL); 150 if (!kernel_state) 151 return; 152 153 state = &kernel_state->user_state; 154 155 spin_lock_irqsave(&vc4->job_lock, irqflags); 156 exec[0] = vc4_first_bin_job(vc4); 157 exec[1] = vc4_first_render_job(vc4); 158 if (!exec[0] && !exec[1]) { 159 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 160 return; 161 } 162 163 /* Get the bos from both binner and renderer into hang state. */ 164 state->bo_count = 0; 165 for (i = 0; i < 2; i++) { 166 if (!exec[i]) 167 continue; 168 169 unref_list_count = 0; 170 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) 171 unref_list_count++; 172 state->bo_count += exec[i]->bo_count + unref_list_count; 173 } 174 175 kernel_state->bo = kcalloc(state->bo_count, 176 sizeof(*kernel_state->bo), GFP_ATOMIC); 177 178 if (!kernel_state->bo) { 179 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 180 return; 181 } 182 183 prev_idx = 0; 184 for (i = 0; i < 2; i++) { 185 if (!exec[i]) 186 continue; 187 188 for (j = 0; j < exec[i]->bo_count; j++) { 189 drm_gem_object_reference(&exec[i]->bo[j]->base); 190 kernel_state->bo[j + prev_idx] = &exec[i]->bo[j]->base; 191 } 192 193 list_for_each_entry(bo, &exec[i]->unref_list, unref_head) { 194 drm_gem_object_reference(&bo->base.base); 195 kernel_state->bo[j + prev_idx] = &bo->base.base; 196 j++; 197 } 198 prev_idx = j + 1; 199 } 200 201 if (exec[0]) 202 state->start_bin = exec[0]->ct0ca; 203 if (exec[1]) 204 state->start_render = exec[1]->ct1ca; 205 206 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 207 208 state->ct0ca = V3D_READ(V3D_CTNCA(0)); 209 state->ct0ea = V3D_READ(V3D_CTNEA(0)); 210 211 state->ct1ca = V3D_READ(V3D_CTNCA(1)); 212 state->ct1ea = V3D_READ(V3D_CTNEA(1)); 213 214 state->ct0cs = V3D_READ(V3D_CTNCS(0)); 215 state->ct1cs = V3D_READ(V3D_CTNCS(1)); 216 217 state->ct0ra0 = V3D_READ(V3D_CT00RA0); 218 state->ct1ra0 = V3D_READ(V3D_CT01RA0); 219 220 state->bpca = V3D_READ(V3D_BPCA); 221 state->bpcs = V3D_READ(V3D_BPCS); 222 state->bpoa = V3D_READ(V3D_BPOA); 223 state->bpos = V3D_READ(V3D_BPOS); 224 225 state->vpmbase = V3D_READ(V3D_VPMBASE); 226 227 state->dbge = V3D_READ(V3D_DBGE); 228 state->fdbgo = V3D_READ(V3D_FDBGO); 229 state->fdbgb = V3D_READ(V3D_FDBGB); 230 state->fdbgr = V3D_READ(V3D_FDBGR); 231 state->fdbgs = V3D_READ(V3D_FDBGS); 232 state->errstat = V3D_READ(V3D_ERRSTAT); 233 234 spin_lock_irqsave(&vc4->job_lock, irqflags); 235 if (vc4->hang_state) { 236 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 237 vc4_free_hang_state(dev, kernel_state); 238 } else { 239 vc4->hang_state = kernel_state; 240 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 241 } 242 } 243 244 static void 245 vc4_reset(struct drm_device *dev) 246 { 247 struct vc4_dev *vc4 = to_vc4_dev(dev); 248 249 DRM_INFO("Resetting GPU.\n"); 250 251 mutex_lock(&vc4->power_lock); 252 if (vc4->power_refcount) { 253 /* Power the device off and back on the by dropping the 254 * reference on runtime PM. 255 */ 256 pm_runtime_put_sync_suspend(&vc4->v3d->pdev->dev); 257 pm_runtime_get_sync(&vc4->v3d->pdev->dev); 258 } 259 mutex_unlock(&vc4->power_lock); 260 261 vc4_irq_reset(dev); 262 263 /* Rearm the hangcheck -- another job might have been waiting 264 * for our hung one to get kicked off, and vc4_irq_reset() 265 * would have started it. 266 */ 267 vc4_queue_hangcheck(dev); 268 } 269 270 static void 271 vc4_reset_work(struct work_struct *work) 272 { 273 struct vc4_dev *vc4 = 274 container_of(work, struct vc4_dev, hangcheck.reset_work); 275 276 vc4_save_hang_state(vc4->dev); 277 278 vc4_reset(vc4->dev); 279 } 280 281 static void 282 vc4_hangcheck_elapsed(unsigned long data) 283 { 284 struct drm_device *dev = (struct drm_device *)data; 285 struct vc4_dev *vc4 = to_vc4_dev(dev); 286 uint32_t ct0ca, ct1ca; 287 unsigned long irqflags; 288 struct vc4_exec_info *bin_exec, *render_exec; 289 290 spin_lock_irqsave(&vc4->job_lock, irqflags); 291 292 bin_exec = vc4_first_bin_job(vc4); 293 render_exec = vc4_first_render_job(vc4); 294 295 /* If idle, we can stop watching for hangs. */ 296 if (!bin_exec && !render_exec) { 297 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 298 return; 299 } 300 301 ct0ca = V3D_READ(V3D_CTNCA(0)); 302 ct1ca = V3D_READ(V3D_CTNCA(1)); 303 304 /* If we've made any progress in execution, rearm the timer 305 * and wait. 306 */ 307 if ((bin_exec && ct0ca != bin_exec->last_ct0ca) || 308 (render_exec && ct1ca != render_exec->last_ct1ca)) { 309 if (bin_exec) 310 bin_exec->last_ct0ca = ct0ca; 311 if (render_exec) 312 render_exec->last_ct1ca = ct1ca; 313 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 314 vc4_queue_hangcheck(dev); 315 return; 316 } 317 318 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 319 320 /* We've gone too long with no progress, reset. This has to 321 * be done from a work struct, since resetting can sleep and 322 * this timer hook isn't allowed to. 323 */ 324 schedule_work(&vc4->hangcheck.reset_work); 325 } 326 327 static void 328 submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end) 329 { 330 struct vc4_dev *vc4 = to_vc4_dev(dev); 331 332 /* Set the current and end address of the control list. 333 * Writing the end register is what starts the job. 334 */ 335 V3D_WRITE(V3D_CTNCA(thread), start); 336 V3D_WRITE(V3D_CTNEA(thread), end); 337 } 338 339 int 340 vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, 341 bool interruptible) 342 { 343 struct vc4_dev *vc4 = to_vc4_dev(dev); 344 int ret = 0; 345 unsigned long timeout_expire; 346 DEFINE_WAIT(wait); 347 348 if (vc4->finished_seqno >= seqno) 349 return 0; 350 351 if (timeout_ns == 0) 352 return -ETIME; 353 354 timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns); 355 356 trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns); 357 for (;;) { 358 prepare_to_wait(&vc4->job_wait_queue, &wait, 359 interruptible ? TASK_INTERRUPTIBLE : 360 TASK_UNINTERRUPTIBLE); 361 362 if (interruptible && signal_pending(current)) { 363 ret = -ERESTARTSYS; 364 break; 365 } 366 367 if (vc4->finished_seqno >= seqno) 368 break; 369 370 if (timeout_ns != ~0ull) { 371 if (time_after_eq(jiffies, timeout_expire)) { 372 ret = -ETIME; 373 break; 374 } 375 schedule_timeout(timeout_expire - jiffies); 376 } else { 377 schedule(); 378 } 379 } 380 381 finish_wait(&vc4->job_wait_queue, &wait); 382 trace_vc4_wait_for_seqno_end(dev, seqno); 383 384 return ret; 385 } 386 387 static void 388 vc4_flush_caches(struct drm_device *dev) 389 { 390 struct vc4_dev *vc4 = to_vc4_dev(dev); 391 392 /* Flush the GPU L2 caches. These caches sit on top of system 393 * L3 (the 128kb or so shared with the CPU), and are 394 * non-allocating in the L3. 395 */ 396 V3D_WRITE(V3D_L2CACTL, 397 V3D_L2CACTL_L2CCLR); 398 399 V3D_WRITE(V3D_SLCACTL, 400 VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) | 401 VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) | 402 VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 403 VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 404 } 405 406 /* Sets the registers for the next job to be actually be executed in 407 * the hardware. 408 * 409 * The job_lock should be held during this. 410 */ 411 void 412 vc4_submit_next_bin_job(struct drm_device *dev) 413 { 414 struct vc4_dev *vc4 = to_vc4_dev(dev); 415 struct vc4_exec_info *exec; 416 417 again: 418 exec = vc4_first_bin_job(vc4); 419 if (!exec) 420 return; 421 422 vc4_flush_caches(dev); 423 424 /* Disable the binner's pre-loaded overflow memory address */ 425 V3D_WRITE(V3D_BPOA, 0); 426 V3D_WRITE(V3D_BPOS, 0); 427 428 /* Either put the job in the binner if it uses the binner, or 429 * immediately move it to the to-be-rendered queue. 430 */ 431 if (exec->ct0ca != exec->ct0ea) { 432 submit_cl(dev, 0, exec->ct0ca, exec->ct0ea); 433 } else { 434 vc4_move_job_to_render(dev, exec); 435 goto again; 436 } 437 } 438 439 void 440 vc4_submit_next_render_job(struct drm_device *dev) 441 { 442 struct vc4_dev *vc4 = to_vc4_dev(dev); 443 struct vc4_exec_info *exec = vc4_first_render_job(vc4); 444 445 if (!exec) 446 return; 447 448 submit_cl(dev, 1, exec->ct1ca, exec->ct1ea); 449 } 450 451 void 452 vc4_move_job_to_render(struct drm_device *dev, struct vc4_exec_info *exec) 453 { 454 struct vc4_dev *vc4 = to_vc4_dev(dev); 455 bool was_empty = list_empty(&vc4->render_job_list); 456 457 list_move_tail(&exec->head, &vc4->render_job_list); 458 if (was_empty) 459 vc4_submit_next_render_job(dev); 460 } 461 462 static void 463 vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) 464 { 465 struct vc4_bo *bo; 466 unsigned i; 467 468 for (i = 0; i < exec->bo_count; i++) { 469 bo = to_vc4_bo(&exec->bo[i]->base); 470 bo->seqno = seqno; 471 } 472 473 list_for_each_entry(bo, &exec->unref_list, unref_head) { 474 bo->seqno = seqno; 475 } 476 } 477 478 /* Queues a struct vc4_exec_info for execution. If no job is 479 * currently executing, then submits it. 480 * 481 * Unlike most GPUs, our hardware only handles one command list at a 482 * time. To queue multiple jobs at once, we'd need to edit the 483 * previous command list to have a jump to the new one at the end, and 484 * then bump the end address. That's a change for a later date, 485 * though. 486 */ 487 static void 488 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) 489 { 490 struct vc4_dev *vc4 = to_vc4_dev(dev); 491 uint64_t seqno; 492 unsigned long irqflags; 493 494 spin_lock_irqsave(&vc4->job_lock, irqflags); 495 496 seqno = ++vc4->emit_seqno; 497 exec->seqno = seqno; 498 vc4_update_bo_seqnos(exec, seqno); 499 500 list_add_tail(&exec->head, &vc4->bin_job_list); 501 502 /* If no job was executing, kick ours off. Otherwise, it'll 503 * get started when the previous job's flush done interrupt 504 * occurs. 505 */ 506 if (vc4_first_bin_job(vc4) == exec) { 507 vc4_submit_next_bin_job(dev); 508 vc4_queue_hangcheck(dev); 509 } 510 511 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 512 } 513 514 /** 515 * Looks up a bunch of GEM handles for BOs and stores the array for 516 * use in the command validator that actually writes relocated 517 * addresses pointing to them. 518 */ 519 static int 520 vc4_cl_lookup_bos(struct drm_device *dev, 521 struct drm_file *file_priv, 522 struct vc4_exec_info *exec) 523 { 524 struct drm_vc4_submit_cl *args = exec->args; 525 uint32_t *handles; 526 int ret = 0; 527 int i; 528 529 exec->bo_count = args->bo_handle_count; 530 531 if (!exec->bo_count) { 532 /* See comment on bo_index for why we have to check 533 * this. 534 */ 535 DRM_ERROR("Rendering requires BOs to validate\n"); 536 return -EINVAL; 537 } 538 539 exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *), 540 GFP_KERNEL); 541 if (!exec->bo) { 542 DRM_ERROR("Failed to allocate validated BO pointers\n"); 543 return -ENOMEM; 544 } 545 546 handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); 547 if (!handles) { 548 DRM_ERROR("Failed to allocate incoming GEM handles\n"); 549 goto fail; 550 } 551 552 ret = copy_from_user(handles, 553 (void __user *)(uintptr_t)args->bo_handles, 554 exec->bo_count * sizeof(uint32_t)); 555 if (ret) { 556 DRM_ERROR("Failed to copy in GEM handles\n"); 557 goto fail; 558 } 559 560 spin_lock(&file_priv->table_lock); 561 for (i = 0; i < exec->bo_count; i++) { 562 struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 563 handles[i]); 564 if (!bo) { 565 DRM_ERROR("Failed to look up GEM BO %d: %d\n", 566 i, handles[i]); 567 ret = -EINVAL; 568 spin_unlock(&file_priv->table_lock); 569 goto fail; 570 } 571 drm_gem_object_reference(bo); 572 exec->bo[i] = (struct drm_gem_cma_object *)bo; 573 } 574 spin_unlock(&file_priv->table_lock); 575 576 fail: 577 kfree(handles); 578 return 0; 579 } 580 581 static int 582 vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) 583 { 584 struct drm_vc4_submit_cl *args = exec->args; 585 void *temp = NULL; 586 void *bin; 587 int ret = 0; 588 uint32_t bin_offset = 0; 589 uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size, 590 16); 591 uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size; 592 uint32_t exec_size = uniforms_offset + args->uniforms_size; 593 uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) * 594 args->shader_rec_count); 595 struct vc4_bo *bo; 596 597 if (uniforms_offset < shader_rec_offset || 598 exec_size < uniforms_offset || 599 args->shader_rec_count >= (UINT_MAX / 600 sizeof(struct vc4_shader_state)) || 601 temp_size < exec_size) { 602 DRM_ERROR("overflow in exec arguments\n"); 603 goto fail; 604 } 605 606 /* Allocate space where we'll store the copied in user command lists 607 * and shader records. 608 * 609 * We don't just copy directly into the BOs because we need to 610 * read the contents back for validation, and I think the 611 * bo->vaddr is uncached access. 612 */ 613 temp = kmalloc(temp_size, GFP_KERNEL); 614 if (!temp) { 615 DRM_ERROR("Failed to allocate storage for copying " 616 "in bin/render CLs.\n"); 617 ret = -ENOMEM; 618 goto fail; 619 } 620 bin = temp + bin_offset; 621 exec->shader_rec_u = temp + shader_rec_offset; 622 exec->uniforms_u = temp + uniforms_offset; 623 exec->shader_state = temp + exec_size; 624 exec->shader_state_size = args->shader_rec_count; 625 626 if (copy_from_user(bin, 627 (void __user *)(uintptr_t)args->bin_cl, 628 args->bin_cl_size)) { 629 ret = -EFAULT; 630 goto fail; 631 } 632 633 if (copy_from_user(exec->shader_rec_u, 634 (void __user *)(uintptr_t)args->shader_rec, 635 args->shader_rec_size)) { 636 ret = -EFAULT; 637 goto fail; 638 } 639 640 if (copy_from_user(exec->uniforms_u, 641 (void __user *)(uintptr_t)args->uniforms, 642 args->uniforms_size)) { 643 ret = -EFAULT; 644 goto fail; 645 } 646 647 bo = vc4_bo_create(dev, exec_size, true); 648 if (IS_ERR(bo)) { 649 DRM_ERROR("Couldn't allocate BO for binning\n"); 650 ret = PTR_ERR(bo); 651 goto fail; 652 } 653 exec->exec_bo = &bo->base; 654 655 list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head, 656 &exec->unref_list); 657 658 exec->ct0ca = exec->exec_bo->paddr + bin_offset; 659 660 exec->bin_u = bin; 661 662 exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset; 663 exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset; 664 exec->shader_rec_size = args->shader_rec_size; 665 666 exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset; 667 exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset; 668 exec->uniforms_size = args->uniforms_size; 669 670 ret = vc4_validate_bin_cl(dev, 671 exec->exec_bo->vaddr + bin_offset, 672 bin, 673 exec); 674 if (ret) 675 goto fail; 676 677 ret = vc4_validate_shader_recs(dev, exec); 678 679 fail: 680 kfree(temp); 681 return ret; 682 } 683 684 static void 685 vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) 686 { 687 struct vc4_dev *vc4 = to_vc4_dev(dev); 688 unsigned i; 689 690 /* Need the struct lock for drm_gem_object_unreference(). */ 691 mutex_lock(&dev->struct_mutex); 692 if (exec->bo) { 693 for (i = 0; i < exec->bo_count; i++) 694 drm_gem_object_unreference(&exec->bo[i]->base); 695 kfree(exec->bo); 696 } 697 698 while (!list_empty(&exec->unref_list)) { 699 struct vc4_bo *bo = list_first_entry(&exec->unref_list, 700 struct vc4_bo, unref_head); 701 list_del(&bo->unref_head); 702 drm_gem_object_unreference(&bo->base.base); 703 } 704 mutex_unlock(&dev->struct_mutex); 705 706 mutex_lock(&vc4->power_lock); 707 if (--vc4->power_refcount == 0) 708 pm_runtime_put(&vc4->v3d->pdev->dev); 709 mutex_unlock(&vc4->power_lock); 710 711 kfree(exec); 712 } 713 714 void 715 vc4_job_handle_completed(struct vc4_dev *vc4) 716 { 717 unsigned long irqflags; 718 struct vc4_seqno_cb *cb, *cb_temp; 719 720 spin_lock_irqsave(&vc4->job_lock, irqflags); 721 while (!list_empty(&vc4->job_done_list)) { 722 struct vc4_exec_info *exec = 723 list_first_entry(&vc4->job_done_list, 724 struct vc4_exec_info, head); 725 list_del(&exec->head); 726 727 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 728 vc4_complete_exec(vc4->dev, exec); 729 spin_lock_irqsave(&vc4->job_lock, irqflags); 730 } 731 732 list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { 733 if (cb->seqno <= vc4->finished_seqno) { 734 list_del_init(&cb->work.entry); 735 schedule_work(&cb->work); 736 } 737 } 738 739 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 740 } 741 742 static void vc4_seqno_cb_work(struct work_struct *work) 743 { 744 struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); 745 746 cb->func(cb); 747 } 748 749 int vc4_queue_seqno_cb(struct drm_device *dev, 750 struct vc4_seqno_cb *cb, uint64_t seqno, 751 void (*func)(struct vc4_seqno_cb *cb)) 752 { 753 struct vc4_dev *vc4 = to_vc4_dev(dev); 754 int ret = 0; 755 unsigned long irqflags; 756 757 cb->func = func; 758 INIT_WORK(&cb->work, vc4_seqno_cb_work); 759 760 spin_lock_irqsave(&vc4->job_lock, irqflags); 761 if (seqno > vc4->finished_seqno) { 762 cb->seqno = seqno; 763 list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); 764 } else { 765 schedule_work(&cb->work); 766 } 767 spin_unlock_irqrestore(&vc4->job_lock, irqflags); 768 769 return ret; 770 } 771 772 /* Scheduled when any job has been completed, this walks the list of 773 * jobs that had completed and unrefs their BOs and frees their exec 774 * structs. 775 */ 776 static void 777 vc4_job_done_work(struct work_struct *work) 778 { 779 struct vc4_dev *vc4 = 780 container_of(work, struct vc4_dev, job_done_work); 781 782 vc4_job_handle_completed(vc4); 783 } 784 785 static int 786 vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev, 787 uint64_t seqno, 788 uint64_t *timeout_ns) 789 { 790 unsigned long start = jiffies; 791 int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true); 792 793 if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) { 794 uint64_t delta = jiffies_to_nsecs(jiffies - start); 795 796 if (*timeout_ns >= delta) 797 *timeout_ns -= delta; 798 } 799 800 return ret; 801 } 802 803 int 804 vc4_wait_seqno_ioctl(struct drm_device *dev, void *data, 805 struct drm_file *file_priv) 806 { 807 struct drm_vc4_wait_seqno *args = data; 808 809 return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno, 810 &args->timeout_ns); 811 } 812 813 int 814 vc4_wait_bo_ioctl(struct drm_device *dev, void *data, 815 struct drm_file *file_priv) 816 { 817 int ret; 818 struct drm_vc4_wait_bo *args = data; 819 struct drm_gem_object *gem_obj; 820 struct vc4_bo *bo; 821 822 if (args->pad != 0) 823 return -EINVAL; 824 825 gem_obj = drm_gem_object_lookup(file_priv, args->handle); 826 if (!gem_obj) { 827 DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); 828 return -EINVAL; 829 } 830 bo = to_vc4_bo(gem_obj); 831 832 ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, 833 &args->timeout_ns); 834 835 drm_gem_object_unreference_unlocked(gem_obj); 836 return ret; 837 } 838 839 /** 840 * Submits a command list to the VC4. 841 * 842 * This is what is called batchbuffer emitting on other hardware. 843 */ 844 int 845 vc4_submit_cl_ioctl(struct drm_device *dev, void *data, 846 struct drm_file *file_priv) 847 { 848 struct vc4_dev *vc4 = to_vc4_dev(dev); 849 struct drm_vc4_submit_cl *args = data; 850 struct vc4_exec_info *exec; 851 int ret = 0; 852 853 if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { 854 DRM_ERROR("Unknown flags: 0x%02x\n", args->flags); 855 return -EINVAL; 856 } 857 858 exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 859 if (!exec) { 860 DRM_ERROR("malloc failure on exec struct\n"); 861 return -ENOMEM; 862 } 863 864 mutex_lock(&vc4->power_lock); 865 if (vc4->power_refcount++ == 0) 866 ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); 867 mutex_unlock(&vc4->power_lock); 868 if (ret < 0) { 869 kfree(exec); 870 return ret; 871 } 872 873 exec->args = args; 874 INIT_LIST_HEAD(&exec->unref_list); 875 876 ret = vc4_cl_lookup_bos(dev, file_priv, exec); 877 if (ret) 878 goto fail; 879 880 if (exec->args->bin_cl_size != 0) { 881 ret = vc4_get_bcl(dev, exec); 882 if (ret) 883 goto fail; 884 } else { 885 exec->ct0ca = 0; 886 exec->ct0ea = 0; 887 } 888 889 ret = vc4_get_rcl(dev, exec); 890 if (ret) 891 goto fail; 892 893 /* Clear this out of the struct we'll be putting in the queue, 894 * since it's part of our stack. 895 */ 896 exec->args = NULL; 897 898 vc4_queue_submit(dev, exec); 899 900 /* Return the seqno for our job. */ 901 args->seqno = vc4->emit_seqno; 902 903 return 0; 904 905 fail: 906 vc4_complete_exec(vc4->dev, exec); 907 908 return ret; 909 } 910 911 void 912 vc4_gem_init(struct drm_device *dev) 913 { 914 struct vc4_dev *vc4 = to_vc4_dev(dev); 915 916 INIT_LIST_HEAD(&vc4->bin_job_list); 917 INIT_LIST_HEAD(&vc4->render_job_list); 918 INIT_LIST_HEAD(&vc4->job_done_list); 919 INIT_LIST_HEAD(&vc4->seqno_cb_list); 920 spin_lock_init(&vc4->job_lock); 921 922 INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); 923 setup_timer(&vc4->hangcheck.timer, 924 vc4_hangcheck_elapsed, 925 (unsigned long)dev); 926 927 INIT_WORK(&vc4->job_done_work, vc4_job_done_work); 928 929 mutex_init(&vc4->power_lock); 930 } 931 932 void 933 vc4_gem_destroy(struct drm_device *dev) 934 { 935 struct vc4_dev *vc4 = to_vc4_dev(dev); 936 937 /* Waiting for exec to finish would need to be done before 938 * unregistering V3D. 939 */ 940 WARN_ON(vc4->emit_seqno != vc4->finished_seqno); 941 942 /* V3D should already have disabled its interrupt and cleared 943 * the overflow allocation registers. Now free the object. 944 */ 945 if (vc4->overflow_mem) { 946 drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); 947 vc4->overflow_mem = NULL; 948 } 949 950 vc4_bo_cache_destroy(dev); 951 952 if (vc4->hang_state) 953 vc4_free_hang_state(dev, vc4->hang_state); 954 } 955