1 /* 2 * Copyright (C) 2013 Red Hat 3 * Author: Rob Clark <robdclark@gmail.com> 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 as published by 7 * the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * You should have received a copy of the GNU General Public License along with 15 * this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 #include "msm_gpu.h" 19 #include "msm_gem.h" 20 #include "msm_mmu.h" 21 #include "msm_fence.h" 22 23 24 /* 25 * Power Management: 26 */ 27 28 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING 29 #include <mach/board.h> 30 static void bs_init(struct msm_gpu *gpu) 31 { 32 if (gpu->bus_scale_table) { 33 gpu->bsc = msm_bus_scale_register_client(gpu->bus_scale_table); 34 DBG("bus scale client: %08x", gpu->bsc); 35 } 36 } 37 38 static void bs_fini(struct msm_gpu *gpu) 39 { 40 if (gpu->bsc) { 41 msm_bus_scale_unregister_client(gpu->bsc); 42 gpu->bsc = 0; 43 } 44 } 45 46 static void bs_set(struct msm_gpu *gpu, int idx) 47 { 48 if (gpu->bsc) { 49 DBG("set bus scaling: %d", idx); 50 msm_bus_scale_client_update_request(gpu->bsc, idx); 51 } 52 } 53 #else 54 static void bs_init(struct msm_gpu *gpu) {} 55 static void bs_fini(struct msm_gpu *gpu) {} 56 static void bs_set(struct msm_gpu *gpu, int idx) {} 57 #endif 58 59 static int enable_pwrrail(struct msm_gpu *gpu) 60 { 61 struct drm_device *dev = gpu->dev; 62 int ret = 0; 63 64 if (gpu->gpu_reg) { 65 ret = regulator_enable(gpu->gpu_reg); 66 if (ret) { 67 dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret); 68 return ret; 69 } 70 } 71 72 if (gpu->gpu_cx) { 73 ret = regulator_enable(gpu->gpu_cx); 74 if (ret) { 75 dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret); 76 return ret; 77 } 78 } 79 80 return 0; 81 } 82 83 static int disable_pwrrail(struct msm_gpu *gpu) 84 { 85 if (gpu->gpu_cx) 86 regulator_disable(gpu->gpu_cx); 87 if (gpu->gpu_reg) 88 regulator_disable(gpu->gpu_reg); 89 return 0; 90 } 91 92 static int enable_clk(struct msm_gpu *gpu) 93 { 94 int i; 95 96 if (gpu->core_clk && gpu->fast_rate) 97 clk_set_rate(gpu->core_clk, gpu->fast_rate); 98 99 /* Set the RBBM timer rate to 19.2Mhz */ 100 if (gpu->rbbmtimer_clk) 101 clk_set_rate(gpu->rbbmtimer_clk, 19200000); 102 103 for (i = gpu->nr_clocks - 1; i >= 0; i--) 104 if (gpu->grp_clks[i]) 105 clk_prepare(gpu->grp_clks[i]); 106 107 for (i = gpu->nr_clocks - 1; i >= 0; i--) 108 if (gpu->grp_clks[i]) 109 clk_enable(gpu->grp_clks[i]); 110 111 return 0; 112 } 113 114 static int disable_clk(struct msm_gpu *gpu) 115 { 116 int i; 117 118 for (i = gpu->nr_clocks - 1; i >= 0; i--) 119 if (gpu->grp_clks[i]) 120 clk_disable(gpu->grp_clks[i]); 121 122 for (i = gpu->nr_clocks - 1; i >= 0; i--) 123 if (gpu->grp_clks[i]) 124 clk_unprepare(gpu->grp_clks[i]); 125 126 /* 127 * Set the clock to a deliberately low rate. On older targets the clock 128 * speed had to be non zero to avoid problems. On newer targets this 129 * will be rounded down to zero anyway so it all works out. 130 */ 131 if (gpu->core_clk) 132 clk_set_rate(gpu->core_clk, 27000000); 133 134 if (gpu->rbbmtimer_clk) 135 clk_set_rate(gpu->rbbmtimer_clk, 0); 136 137 return 0; 138 } 139 140 static int enable_axi(struct msm_gpu *gpu) 141 { 142 if (gpu->ebi1_clk) 143 clk_prepare_enable(gpu->ebi1_clk); 144 if (gpu->bus_freq) 145 bs_set(gpu, gpu->bus_freq); 146 return 0; 147 } 148 149 static int disable_axi(struct msm_gpu *gpu) 150 { 151 if (gpu->ebi1_clk) 152 clk_disable_unprepare(gpu->ebi1_clk); 153 if (gpu->bus_freq) 154 bs_set(gpu, 0); 155 return 0; 156 } 157 158 int msm_gpu_pm_resume(struct msm_gpu *gpu) 159 { 160 int ret; 161 162 DBG("%s", gpu->name); 163 164 ret = enable_pwrrail(gpu); 165 if (ret) 166 return ret; 167 168 ret = enable_clk(gpu); 169 if (ret) 170 return ret; 171 172 ret = enable_axi(gpu); 173 if (ret) 174 return ret; 175 176 gpu->needs_hw_init = true; 177 178 return 0; 179 } 180 181 int msm_gpu_pm_suspend(struct msm_gpu *gpu) 182 { 183 int ret; 184 185 DBG("%s", gpu->name); 186 187 ret = disable_axi(gpu); 188 if (ret) 189 return ret; 190 191 ret = disable_clk(gpu); 192 if (ret) 193 return ret; 194 195 ret = disable_pwrrail(gpu); 196 if (ret) 197 return ret; 198 199 return 0; 200 } 201 202 int msm_gpu_hw_init(struct msm_gpu *gpu) 203 { 204 int ret; 205 206 if (!gpu->needs_hw_init) 207 return 0; 208 209 disable_irq(gpu->irq); 210 ret = gpu->funcs->hw_init(gpu); 211 if (!ret) 212 gpu->needs_hw_init = false; 213 enable_irq(gpu->irq); 214 215 return ret; 216 } 217 218 /* 219 * Hangcheck detection for locked gpu: 220 */ 221 222 static void retire_submits(struct msm_gpu *gpu); 223 224 static void recover_worker(struct work_struct *work) 225 { 226 struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); 227 struct drm_device *dev = gpu->dev; 228 struct msm_gem_submit *submit; 229 uint32_t fence = gpu->funcs->last_fence(gpu); 230 231 msm_update_fence(gpu->fctx, fence + 1); 232 233 mutex_lock(&dev->struct_mutex); 234 235 dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); 236 list_for_each_entry(submit, &gpu->submit_list, node) { 237 if (submit->fence->seqno == (fence + 1)) { 238 struct task_struct *task; 239 240 rcu_read_lock(); 241 task = pid_task(submit->pid, PIDTYPE_PID); 242 if (task) { 243 dev_err(dev->dev, "%s: offending task: %s\n", 244 gpu->name, task->comm); 245 } 246 rcu_read_unlock(); 247 break; 248 } 249 } 250 251 if (msm_gpu_active(gpu)) { 252 /* retire completed submits, plus the one that hung: */ 253 retire_submits(gpu); 254 255 pm_runtime_get_sync(&gpu->pdev->dev); 256 gpu->funcs->recover(gpu); 257 pm_runtime_put_sync(&gpu->pdev->dev); 258 259 /* replay the remaining submits after the one that hung: */ 260 list_for_each_entry(submit, &gpu->submit_list, node) { 261 gpu->funcs->submit(gpu, submit, NULL); 262 } 263 } 264 265 mutex_unlock(&dev->struct_mutex); 266 267 msm_gpu_retire(gpu); 268 } 269 270 static void hangcheck_timer_reset(struct msm_gpu *gpu) 271 { 272 DBG("%s", gpu->name); 273 mod_timer(&gpu->hangcheck_timer, 274 round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); 275 } 276 277 static void hangcheck_handler(unsigned long data) 278 { 279 struct msm_gpu *gpu = (struct msm_gpu *)data; 280 struct drm_device *dev = gpu->dev; 281 struct msm_drm_private *priv = dev->dev_private; 282 uint32_t fence = gpu->funcs->last_fence(gpu); 283 284 if (fence != gpu->hangcheck_fence) { 285 /* some progress has been made.. ya! */ 286 gpu->hangcheck_fence = fence; 287 } else if (fence < gpu->fctx->last_fence) { 288 /* no progress and not done.. hung! */ 289 gpu->hangcheck_fence = fence; 290 dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", 291 gpu->name); 292 dev_err(dev->dev, "%s: completed fence: %u\n", 293 gpu->name, fence); 294 dev_err(dev->dev, "%s: submitted fence: %u\n", 295 gpu->name, gpu->fctx->last_fence); 296 queue_work(priv->wq, &gpu->recover_work); 297 } 298 299 /* if still more pending work, reset the hangcheck timer: */ 300 if (gpu->fctx->last_fence > gpu->hangcheck_fence) 301 hangcheck_timer_reset(gpu); 302 303 /* workaround for missing irq: */ 304 queue_work(priv->wq, &gpu->retire_work); 305 } 306 307 /* 308 * Performance Counters: 309 */ 310 311 /* called under perf_lock */ 312 static int update_hw_cntrs(struct msm_gpu *gpu, uint32_t ncntrs, uint32_t *cntrs) 313 { 314 uint32_t current_cntrs[ARRAY_SIZE(gpu->last_cntrs)]; 315 int i, n = min(ncntrs, gpu->num_perfcntrs); 316 317 /* read current values: */ 318 for (i = 0; i < gpu->num_perfcntrs; i++) 319 current_cntrs[i] = gpu_read(gpu, gpu->perfcntrs[i].sample_reg); 320 321 /* update cntrs: */ 322 for (i = 0; i < n; i++) 323 cntrs[i] = current_cntrs[i] - gpu->last_cntrs[i]; 324 325 /* save current values: */ 326 for (i = 0; i < gpu->num_perfcntrs; i++) 327 gpu->last_cntrs[i] = current_cntrs[i]; 328 329 return n; 330 } 331 332 static void update_sw_cntrs(struct msm_gpu *gpu) 333 { 334 ktime_t time; 335 uint32_t elapsed; 336 unsigned long flags; 337 338 spin_lock_irqsave(&gpu->perf_lock, flags); 339 if (!gpu->perfcntr_active) 340 goto out; 341 342 time = ktime_get(); 343 elapsed = ktime_to_us(ktime_sub(time, gpu->last_sample.time)); 344 345 gpu->totaltime += elapsed; 346 if (gpu->last_sample.active) 347 gpu->activetime += elapsed; 348 349 gpu->last_sample.active = msm_gpu_active(gpu); 350 gpu->last_sample.time = time; 351 352 out: 353 spin_unlock_irqrestore(&gpu->perf_lock, flags); 354 } 355 356 void msm_gpu_perfcntr_start(struct msm_gpu *gpu) 357 { 358 unsigned long flags; 359 360 pm_runtime_get_sync(&gpu->pdev->dev); 361 362 spin_lock_irqsave(&gpu->perf_lock, flags); 363 /* we could dynamically enable/disable perfcntr registers too.. */ 364 gpu->last_sample.active = msm_gpu_active(gpu); 365 gpu->last_sample.time = ktime_get(); 366 gpu->activetime = gpu->totaltime = 0; 367 gpu->perfcntr_active = true; 368 update_hw_cntrs(gpu, 0, NULL); 369 spin_unlock_irqrestore(&gpu->perf_lock, flags); 370 } 371 372 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu) 373 { 374 gpu->perfcntr_active = false; 375 pm_runtime_put_sync(&gpu->pdev->dev); 376 } 377 378 /* returns -errno or # of cntrs sampled */ 379 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime, 380 uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs) 381 { 382 unsigned long flags; 383 int ret; 384 385 spin_lock_irqsave(&gpu->perf_lock, flags); 386 387 if (!gpu->perfcntr_active) { 388 ret = -EINVAL; 389 goto out; 390 } 391 392 *activetime = gpu->activetime; 393 *totaltime = gpu->totaltime; 394 395 gpu->activetime = gpu->totaltime = 0; 396 397 ret = update_hw_cntrs(gpu, ncntrs, cntrs); 398 399 out: 400 spin_unlock_irqrestore(&gpu->perf_lock, flags); 401 402 return ret; 403 } 404 405 /* 406 * Cmdstream submission/retirement: 407 */ 408 409 static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) 410 { 411 int i; 412 413 for (i = 0; i < submit->nr_bos; i++) { 414 struct msm_gem_object *msm_obj = submit->bos[i].obj; 415 /* move to inactive: */ 416 msm_gem_move_to_inactive(&msm_obj->base); 417 msm_gem_put_iova(&msm_obj->base, gpu->id); 418 drm_gem_object_unreference(&msm_obj->base); 419 } 420 421 pm_runtime_mark_last_busy(&gpu->pdev->dev); 422 pm_runtime_put_autosuspend(&gpu->pdev->dev); 423 msm_gem_submit_free(submit); 424 } 425 426 static void retire_submits(struct msm_gpu *gpu) 427 { 428 struct drm_device *dev = gpu->dev; 429 430 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 431 432 while (!list_empty(&gpu->submit_list)) { 433 struct msm_gem_submit *submit; 434 435 submit = list_first_entry(&gpu->submit_list, 436 struct msm_gem_submit, node); 437 438 if (dma_fence_is_signaled(submit->fence)) { 439 retire_submit(gpu, submit); 440 } else { 441 break; 442 } 443 } 444 } 445 446 static void retire_worker(struct work_struct *work) 447 { 448 struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); 449 struct drm_device *dev = gpu->dev; 450 uint32_t fence = gpu->funcs->last_fence(gpu); 451 452 msm_update_fence(gpu->fctx, fence); 453 454 mutex_lock(&dev->struct_mutex); 455 retire_submits(gpu); 456 mutex_unlock(&dev->struct_mutex); 457 } 458 459 /* call from irq handler to schedule work to retire bo's */ 460 void msm_gpu_retire(struct msm_gpu *gpu) 461 { 462 struct msm_drm_private *priv = gpu->dev->dev_private; 463 queue_work(priv->wq, &gpu->retire_work); 464 update_sw_cntrs(gpu); 465 } 466 467 /* add bo's to gpu's ring, and kick gpu: */ 468 void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, 469 struct msm_file_private *ctx) 470 { 471 struct drm_device *dev = gpu->dev; 472 struct msm_drm_private *priv = dev->dev_private; 473 int i; 474 475 WARN_ON(!mutex_is_locked(&dev->struct_mutex)); 476 477 pm_runtime_get_sync(&gpu->pdev->dev); 478 479 msm_gpu_hw_init(gpu); 480 481 list_add_tail(&submit->node, &gpu->submit_list); 482 483 msm_rd_dump_submit(submit); 484 485 update_sw_cntrs(gpu); 486 487 for (i = 0; i < submit->nr_bos; i++) { 488 struct msm_gem_object *msm_obj = submit->bos[i].obj; 489 uint64_t iova; 490 491 /* can't happen yet.. but when we add 2d support we'll have 492 * to deal w/ cross-ring synchronization: 493 */ 494 WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); 495 496 /* submit takes a reference to the bo and iova until retired: */ 497 drm_gem_object_reference(&msm_obj->base); 498 msm_gem_get_iova_locked(&msm_obj->base, 499 submit->gpu->id, &iova); 500 501 if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) 502 msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence); 503 else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) 504 msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->fence); 505 } 506 507 gpu->funcs->submit(gpu, submit, ctx); 508 priv->lastctx = ctx; 509 510 hangcheck_timer_reset(gpu); 511 } 512 513 /* 514 * Init/Cleanup: 515 */ 516 517 static irqreturn_t irq_handler(int irq, void *data) 518 { 519 struct msm_gpu *gpu = data; 520 return gpu->funcs->irq(gpu); 521 } 522 523 static struct clk *get_clock(struct device *dev, const char *name) 524 { 525 struct clk *clk = devm_clk_get(dev, name); 526 527 return IS_ERR(clk) ? NULL : clk; 528 } 529 530 static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu) 531 { 532 struct device *dev = &pdev->dev; 533 struct property *prop; 534 const char *name; 535 int i = 0; 536 537 gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names"); 538 if (gpu->nr_clocks < 1) { 539 gpu->nr_clocks = 0; 540 return 0; 541 } 542 543 gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks, 544 GFP_KERNEL); 545 if (!gpu->grp_clks) 546 return -ENOMEM; 547 548 of_property_for_each_string(dev->of_node, "clock-names", prop, name) { 549 gpu->grp_clks[i] = get_clock(dev, name); 550 551 /* Remember the key clocks that we need to control later */ 552 if (!strcmp(name, "core")) 553 gpu->core_clk = gpu->grp_clks[i]; 554 else if (!strcmp(name, "rbbmtimer")) 555 gpu->rbbmtimer_clk = gpu->grp_clks[i]; 556 557 ++i; 558 } 559 560 return 0; 561 } 562 563 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, 564 struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, 565 const char *name, const char *ioname, const char *irqname, int ringsz) 566 { 567 struct iommu_domain *iommu; 568 int ret; 569 570 if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) 571 gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); 572 573 gpu->dev = drm; 574 gpu->funcs = funcs; 575 gpu->name = name; 576 gpu->fctx = msm_fence_context_alloc(drm, name); 577 if (IS_ERR(gpu->fctx)) { 578 ret = PTR_ERR(gpu->fctx); 579 gpu->fctx = NULL; 580 goto fail; 581 } 582 583 INIT_LIST_HEAD(&gpu->active_list); 584 INIT_WORK(&gpu->retire_work, retire_worker); 585 INIT_WORK(&gpu->recover_work, recover_worker); 586 587 INIT_LIST_HEAD(&gpu->submit_list); 588 589 setup_timer(&gpu->hangcheck_timer, hangcheck_handler, 590 (unsigned long)gpu); 591 592 spin_lock_init(&gpu->perf_lock); 593 594 595 /* Map registers: */ 596 gpu->mmio = msm_ioremap(pdev, ioname, name); 597 if (IS_ERR(gpu->mmio)) { 598 ret = PTR_ERR(gpu->mmio); 599 goto fail; 600 } 601 602 /* Get Interrupt: */ 603 gpu->irq = platform_get_irq_byname(pdev, irqname); 604 if (gpu->irq < 0) { 605 ret = gpu->irq; 606 dev_err(drm->dev, "failed to get irq: %d\n", ret); 607 goto fail; 608 } 609 610 ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler, 611 IRQF_TRIGGER_HIGH, gpu->name, gpu); 612 if (ret) { 613 dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret); 614 goto fail; 615 } 616 617 ret = get_clocks(pdev, gpu); 618 if (ret) 619 goto fail; 620 621 gpu->ebi1_clk = msm_clk_get(pdev, "bus"); 622 DBG("ebi1_clk: %p", gpu->ebi1_clk); 623 if (IS_ERR(gpu->ebi1_clk)) 624 gpu->ebi1_clk = NULL; 625 626 /* Acquire regulators: */ 627 gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd"); 628 DBG("gpu_reg: %p", gpu->gpu_reg); 629 if (IS_ERR(gpu->gpu_reg)) 630 gpu->gpu_reg = NULL; 631 632 gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx"); 633 DBG("gpu_cx: %p", gpu->gpu_cx); 634 if (IS_ERR(gpu->gpu_cx)) 635 gpu->gpu_cx = NULL; 636 637 /* Setup IOMMU.. eventually we will (I think) do this once per context 638 * and have separate page tables per context. For now, to keep things 639 * simple and to get something working, just use a single address space: 640 */ 641 iommu = iommu_domain_alloc(&platform_bus_type); 642 if (iommu) { 643 /* TODO 32b vs 64b address space.. */ 644 iommu->geometry.aperture_start = SZ_16M; 645 iommu->geometry.aperture_end = 0xffffffff; 646 647 dev_info(drm->dev, "%s: using IOMMU\n", name); 648 gpu->aspace = msm_gem_address_space_create(&pdev->dev, 649 iommu, "gpu"); 650 if (IS_ERR(gpu->aspace)) { 651 ret = PTR_ERR(gpu->aspace); 652 dev_err(drm->dev, "failed to init iommu: %d\n", ret); 653 gpu->aspace = NULL; 654 iommu_domain_free(iommu); 655 goto fail; 656 } 657 658 } else { 659 dev_info(drm->dev, "%s: no IOMMU, fallback to VRAM carveout!\n", name); 660 } 661 gpu->id = msm_register_address_space(drm, gpu->aspace); 662 663 664 /* Create ringbuffer: */ 665 mutex_lock(&drm->struct_mutex); 666 gpu->rb = msm_ringbuffer_new(gpu, ringsz); 667 mutex_unlock(&drm->struct_mutex); 668 if (IS_ERR(gpu->rb)) { 669 ret = PTR_ERR(gpu->rb); 670 gpu->rb = NULL; 671 dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); 672 goto fail; 673 } 674 675 gpu->pdev = pdev; 676 platform_set_drvdata(pdev, gpu); 677 678 bs_init(gpu); 679 680 return 0; 681 682 fail: 683 return ret; 684 } 685 686 void msm_gpu_cleanup(struct msm_gpu *gpu) 687 { 688 DBG("%s", gpu->name); 689 690 WARN_ON(!list_empty(&gpu->active_list)); 691 692 bs_fini(gpu); 693 694 if (gpu->rb) { 695 if (gpu->rb_iova) 696 msm_gem_put_iova(gpu->rb->bo, gpu->id); 697 msm_ringbuffer_destroy(gpu->rb); 698 } 699 700 if (gpu->fctx) 701 msm_fence_context_free(gpu->fctx); 702 } 703