1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Tegra host1x driver 4 * 5 * Copyright (c) 2010-2013, NVIDIA Corporation. 6 */ 7 8 #include <linux/clk.h> 9 #include <linux/delay.h> 10 #include <linux/dma-mapping.h> 11 #include <linux/io.h> 12 #include <linux/list.h> 13 #include <linux/module.h> 14 #include <linux/of_device.h> 15 #include <linux/of.h> 16 #include <linux/pm_runtime.h> 17 #include <linux/slab.h> 18 19 #include <soc/tegra/common.h> 20 21 #define CREATE_TRACE_POINTS 22 #include <trace/events/host1x.h> 23 #undef CREATE_TRACE_POINTS 24 25 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) 26 #include <asm/dma-iommu.h> 27 #endif 28 29 #include "bus.h" 30 #include "channel.h" 31 #include "debug.h" 32 #include "dev.h" 33 #include "intr.h" 34 35 #include "hw/host1x01.h" 36 #include "hw/host1x02.h" 37 #include "hw/host1x04.h" 38 #include "hw/host1x05.h" 39 #include "hw/host1x06.h" 40 #include "hw/host1x07.h" 41 42 void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r) 43 { 44 writel(v, host1x->hv_regs + r); 45 } 46 47 u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r) 48 { 49 return readl(host1x->hv_regs + r); 50 } 51 52 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) 53 { 54 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 55 56 writel(v, sync_regs + r); 57 } 58 59 u32 host1x_sync_readl(struct host1x *host1x, u32 r) 60 { 61 void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; 62 63 return readl(sync_regs + r); 64 } 65 66 void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) 67 { 68 writel(v, ch->regs + r); 69 } 70 71 u32 host1x_ch_readl(struct host1x_channel *ch, u32 r) 72 { 73 return readl(ch->regs + r); 74 } 75 76 static const struct host1x_info host1x01_info = { 77 .nb_channels = 8, 78 .nb_pts = 32, 79 .nb_mlocks = 16, 80 .nb_bases = 8, 81 .init = host1x01_init, 82 .sync_offset = 0x3000, 83 .dma_mask = DMA_BIT_MASK(32), 84 .has_wide_gather = false, 85 .has_hypervisor = false, 86 .num_sid_entries = 0, 87 .sid_table = NULL, 88 .reserve_vblank_syncpts = true, 89 }; 90 91 static const struct host1x_info host1x02_info = { 92 .nb_channels = 9, 93 .nb_pts = 32, 94 .nb_mlocks = 16, 95 .nb_bases = 12, 96 .init = host1x02_init, 97 .sync_offset = 0x3000, 98 .dma_mask = DMA_BIT_MASK(32), 99 .has_wide_gather = false, 100 .has_hypervisor = false, 101 .num_sid_entries = 0, 102 .sid_table = NULL, 103 .reserve_vblank_syncpts = true, 104 }; 105 106 static const struct host1x_info host1x04_info = { 107 .nb_channels = 12, 108 .nb_pts = 192, 109 .nb_mlocks = 16, 110 .nb_bases = 64, 111 .init = host1x04_init, 112 .sync_offset = 0x2100, 113 .dma_mask = DMA_BIT_MASK(34), 114 .has_wide_gather = false, 115 .has_hypervisor = false, 116 .num_sid_entries = 0, 117 .sid_table = NULL, 118 .reserve_vblank_syncpts = false, 119 }; 120 121 static const struct host1x_info host1x05_info = { 122 .nb_channels = 14, 123 .nb_pts = 192, 124 .nb_mlocks = 16, 125 .nb_bases = 64, 126 .init = host1x05_init, 127 .sync_offset = 0x2100, 128 .dma_mask = DMA_BIT_MASK(34), 129 .has_wide_gather = false, 130 .has_hypervisor = false, 131 .num_sid_entries = 0, 132 .sid_table = NULL, 133 .reserve_vblank_syncpts = false, 134 }; 135 136 static const struct host1x_sid_entry tegra186_sid_table[] = { 137 { 138 /* VIC */ 139 .base = 0x1af0, 140 .offset = 0x30, 141 .limit = 0x34 142 }, 143 { 144 /* NVDEC */ 145 .base = 0x1b00, 146 .offset = 0x30, 147 .limit = 0x34 148 }, 149 }; 150 151 static const struct host1x_info host1x06_info = { 152 .nb_channels = 63, 153 .nb_pts = 576, 154 .nb_mlocks = 24, 155 .nb_bases = 16, 156 .init = host1x06_init, 157 .sync_offset = 0x0, 158 .dma_mask = DMA_BIT_MASK(40), 159 .has_wide_gather = true, 160 .has_hypervisor = true, 161 .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), 162 .sid_table = tegra186_sid_table, 163 .reserve_vblank_syncpts = false, 164 }; 165 166 static const struct host1x_sid_entry tegra194_sid_table[] = { 167 { 168 /* VIC */ 169 .base = 0x1af0, 170 .offset = 0x30, 171 .limit = 0x34 172 }, 173 { 174 /* NVDEC */ 175 .base = 0x1b00, 176 .offset = 0x30, 177 .limit = 0x34 178 }, 179 { 180 /* NVDEC1 */ 181 .base = 0x1bc0, 182 .offset = 0x30, 183 .limit = 0x34 184 }, 185 }; 186 187 static const struct host1x_info host1x07_info = { 188 .nb_channels = 63, 189 .nb_pts = 704, 190 .nb_mlocks = 32, 191 .nb_bases = 0, 192 .init = host1x07_init, 193 .sync_offset = 0x0, 194 .dma_mask = DMA_BIT_MASK(40), 195 .has_wide_gather = true, 196 .has_hypervisor = true, 197 .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), 198 .sid_table = tegra194_sid_table, 199 .reserve_vblank_syncpts = false, 200 }; 201 202 static const struct of_device_id host1x_of_match[] = { 203 { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, }, 204 { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, }, 205 { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, }, 206 { .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, }, 207 { .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, }, 208 { .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, }, 209 { .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, }, 210 { }, 211 }; 212 MODULE_DEVICE_TABLE(of, host1x_of_match); 213 214 static void host1x_setup_sid_table(struct host1x *host) 215 { 216 const struct host1x_info *info = host->info; 217 unsigned int i; 218 219 if (!info->has_hypervisor) 220 return; 221 222 for (i = 0; i < info->num_sid_entries; i++) { 223 const struct host1x_sid_entry *entry = &info->sid_table[i]; 224 225 host1x_hypervisor_writel(host, entry->offset, entry->base); 226 host1x_hypervisor_writel(host, entry->limit, entry->base + 4); 227 } 228 } 229 230 static bool host1x_wants_iommu(struct host1x *host1x) 231 { 232 /* 233 * If we support addressing a maximum of 32 bits of physical memory 234 * and if the host1x firewall is enabled, there's no need to enable 235 * IOMMU support. This can happen for example on Tegra20, Tegra30 236 * and Tegra114. 237 * 238 * Tegra124 and later can address up to 34 bits of physical memory and 239 * many platforms come equipped with more than 2 GiB of system memory, 240 * which requires crossing the 4 GiB boundary. But there's a catch: on 241 * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can 242 * only address up to 32 bits of memory in GATHER opcodes, which means 243 * that command buffers need to either be in the first 2 GiB of system 244 * memory (which could quickly lead to memory exhaustion), or command 245 * buffers need to be treated differently from other buffers (which is 246 * not possible with the current ABI). 247 * 248 * A third option is to use the IOMMU in these cases to make sure all 249 * buffers will be mapped into a 32-bit IOVA space that host1x can 250 * address. This allows all of the system memory to be used and works 251 * within the limitations of the host1x on these SoCs. 252 * 253 * In summary, default to enable IOMMU on Tegra124 and later. For any 254 * of the earlier SoCs, only use the IOMMU for additional safety when 255 * the host1x firewall is disabled. 256 */ 257 if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) { 258 if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 259 return false; 260 } 261 262 return true; 263 } 264 265 static struct iommu_domain *host1x_iommu_attach(struct host1x *host) 266 { 267 struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); 268 int err; 269 270 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) 271 if (host->dev->archdata.mapping) { 272 struct dma_iommu_mapping *mapping = 273 to_dma_iommu_mapping(host->dev); 274 arm_iommu_detach_device(host->dev); 275 arm_iommu_release_mapping(mapping); 276 277 domain = iommu_get_domain_for_dev(host->dev); 278 } 279 #endif 280 281 /* 282 * We may not always want to enable IOMMU support (for example if the 283 * host1x firewall is already enabled and we don't support addressing 284 * more than 32 bits of physical memory), so check for that first. 285 * 286 * Similarly, if host1x is already attached to an IOMMU (via the DMA 287 * API), don't try to attach again. 288 */ 289 if (!host1x_wants_iommu(host) || domain) 290 return domain; 291 292 host->group = iommu_group_get(host->dev); 293 if (host->group) { 294 struct iommu_domain_geometry *geometry; 295 dma_addr_t start, end; 296 unsigned long order; 297 298 err = iova_cache_get(); 299 if (err < 0) 300 goto put_group; 301 302 host->domain = iommu_domain_alloc(&platform_bus_type); 303 if (!host->domain) { 304 err = -ENOMEM; 305 goto put_cache; 306 } 307 308 err = iommu_attach_group(host->domain, host->group); 309 if (err) { 310 if (err == -ENODEV) 311 err = 0; 312 313 goto free_domain; 314 } 315 316 geometry = &host->domain->geometry; 317 start = geometry->aperture_start & host->info->dma_mask; 318 end = geometry->aperture_end & host->info->dma_mask; 319 320 order = __ffs(host->domain->pgsize_bitmap); 321 init_iova_domain(&host->iova, 1UL << order, start >> order); 322 host->iova_end = end; 323 324 domain = host->domain; 325 } 326 327 return domain; 328 329 free_domain: 330 iommu_domain_free(host->domain); 331 host->domain = NULL; 332 put_cache: 333 iova_cache_put(); 334 put_group: 335 iommu_group_put(host->group); 336 host->group = NULL; 337 338 return ERR_PTR(err); 339 } 340 341 static int host1x_iommu_init(struct host1x *host) 342 { 343 u64 mask = host->info->dma_mask; 344 struct iommu_domain *domain; 345 int err; 346 347 domain = host1x_iommu_attach(host); 348 if (IS_ERR(domain)) { 349 err = PTR_ERR(domain); 350 dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); 351 return err; 352 } 353 354 /* 355 * If we're not behind an IOMMU make sure we don't get push buffers 356 * that are allocated outside of the range addressable by the GATHER 357 * opcode. 358 * 359 * Newer generations of Tegra (Tegra186 and later) support a wide 360 * variant of the GATHER opcode that allows addressing more bits. 361 */ 362 if (!domain && !host->info->has_wide_gather) 363 mask = DMA_BIT_MASK(32); 364 365 err = dma_coerce_mask_and_coherent(host->dev, mask); 366 if (err < 0) { 367 dev_err(host->dev, "failed to set DMA mask: %d\n", err); 368 return err; 369 } 370 371 return 0; 372 } 373 374 static void host1x_iommu_exit(struct host1x *host) 375 { 376 if (host->domain) { 377 put_iova_domain(&host->iova); 378 iommu_detach_group(host->domain, host->group); 379 380 iommu_domain_free(host->domain); 381 host->domain = NULL; 382 383 iova_cache_put(); 384 385 iommu_group_put(host->group); 386 host->group = NULL; 387 } 388 } 389 390 static int host1x_get_resets(struct host1x *host) 391 { 392 int err; 393 394 host->resets[0].id = "mc"; 395 host->resets[1].id = "host1x"; 396 host->nresets = ARRAY_SIZE(host->resets); 397 398 err = devm_reset_control_bulk_get_optional_exclusive_released( 399 host->dev, host->nresets, host->resets); 400 if (err) { 401 dev_err(host->dev, "failed to get reset: %d\n", err); 402 return err; 403 } 404 405 if (WARN_ON(!host->resets[1].rstc)) 406 return -ENOENT; 407 408 return 0; 409 } 410 411 static int host1x_probe(struct platform_device *pdev) 412 { 413 struct host1x *host; 414 struct resource *regs, *hv_regs = NULL; 415 int syncpt_irq; 416 int err; 417 418 host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); 419 if (!host) 420 return -ENOMEM; 421 422 host->info = of_device_get_match_data(&pdev->dev); 423 424 if (host->info->has_hypervisor) { 425 regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm"); 426 if (!regs) { 427 dev_err(&pdev->dev, "failed to get vm registers\n"); 428 return -ENXIO; 429 } 430 431 hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, 432 "hypervisor"); 433 if (!hv_regs) { 434 dev_err(&pdev->dev, 435 "failed to get hypervisor registers\n"); 436 return -ENXIO; 437 } 438 } else { 439 regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); 440 if (!regs) { 441 dev_err(&pdev->dev, "failed to get registers\n"); 442 return -ENXIO; 443 } 444 } 445 446 syncpt_irq = platform_get_irq(pdev, 0); 447 if (syncpt_irq < 0) 448 return syncpt_irq; 449 450 host1x_bo_cache_init(&host->cache); 451 mutex_init(&host->devices_lock); 452 INIT_LIST_HEAD(&host->devices); 453 INIT_LIST_HEAD(&host->list); 454 host->dev = &pdev->dev; 455 456 /* set common host1x device data */ 457 platform_set_drvdata(pdev, host); 458 459 host->regs = devm_ioremap_resource(&pdev->dev, regs); 460 if (IS_ERR(host->regs)) 461 return PTR_ERR(host->regs); 462 463 if (host->info->has_hypervisor) { 464 host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs); 465 if (IS_ERR(host->hv_regs)) 466 return PTR_ERR(host->hv_regs); 467 } 468 469 host->dev->dma_parms = &host->dma_parms; 470 dma_set_max_seg_size(host->dev, UINT_MAX); 471 472 if (host->info->init) { 473 err = host->info->init(host); 474 if (err) 475 return err; 476 } 477 478 host->clk = devm_clk_get(&pdev->dev, NULL); 479 if (IS_ERR(host->clk)) { 480 err = PTR_ERR(host->clk); 481 482 if (err != -EPROBE_DEFER) 483 dev_err(&pdev->dev, "failed to get clock: %d\n", err); 484 485 return err; 486 } 487 488 err = host1x_get_resets(host); 489 if (err) 490 return err; 491 492 err = host1x_iommu_init(host); 493 if (err < 0) { 494 dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); 495 return err; 496 } 497 498 err = host1x_channel_list_init(&host->channel_list, 499 host->info->nb_channels); 500 if (err) { 501 dev_err(&pdev->dev, "failed to initialize channel list\n"); 502 goto iommu_exit; 503 } 504 505 err = host1x_syncpt_init(host); 506 if (err) { 507 dev_err(&pdev->dev, "failed to initialize syncpts\n"); 508 goto free_channels; 509 } 510 511 err = host1x_intr_init(host, syncpt_irq); 512 if (err) { 513 dev_err(&pdev->dev, "failed to initialize interrupts\n"); 514 goto deinit_syncpt; 515 } 516 517 pm_runtime_enable(&pdev->dev); 518 519 err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); 520 if (err) 521 goto pm_disable; 522 523 /* the driver's code isn't ready yet for the dynamic RPM */ 524 err = pm_runtime_resume_and_get(&pdev->dev); 525 if (err) 526 goto pm_disable; 527 528 host1x_debug_init(host); 529 530 err = host1x_register(host); 531 if (err < 0) 532 goto deinit_debugfs; 533 534 err = devm_of_platform_populate(&pdev->dev); 535 if (err < 0) 536 goto unregister; 537 538 return 0; 539 540 unregister: 541 host1x_unregister(host); 542 deinit_debugfs: 543 host1x_debug_deinit(host); 544 545 pm_runtime_put_sync_suspend(&pdev->dev); 546 pm_disable: 547 pm_runtime_disable(&pdev->dev); 548 549 host1x_intr_deinit(host); 550 deinit_syncpt: 551 host1x_syncpt_deinit(host); 552 free_channels: 553 host1x_channel_list_free(&host->channel_list); 554 iommu_exit: 555 host1x_iommu_exit(host); 556 557 return err; 558 } 559 560 static int host1x_remove(struct platform_device *pdev) 561 { 562 struct host1x *host = platform_get_drvdata(pdev); 563 564 host1x_unregister(host); 565 host1x_debug_deinit(host); 566 567 pm_runtime_force_suspend(&pdev->dev); 568 569 host1x_intr_deinit(host); 570 host1x_syncpt_deinit(host); 571 host1x_iommu_exit(host); 572 host1x_bo_cache_destroy(&host->cache); 573 574 return 0; 575 } 576 577 static int __maybe_unused host1x_runtime_suspend(struct device *dev) 578 { 579 struct host1x *host = dev_get_drvdata(dev); 580 int err; 581 582 host1x_intr_stop(host); 583 host1x_syncpt_save(host); 584 585 err = reset_control_bulk_assert(host->nresets, host->resets); 586 if (err) { 587 dev_err(dev, "failed to assert reset: %d\n", err); 588 goto resume_host1x; 589 } 590 591 usleep_range(1000, 2000); 592 593 clk_disable_unprepare(host->clk); 594 reset_control_bulk_release(host->nresets, host->resets); 595 596 return 0; 597 598 resume_host1x: 599 host1x_setup_sid_table(host); 600 host1x_syncpt_restore(host); 601 host1x_intr_start(host); 602 603 return err; 604 } 605 606 static int __maybe_unused host1x_runtime_resume(struct device *dev) 607 { 608 struct host1x *host = dev_get_drvdata(dev); 609 int err; 610 611 err = reset_control_bulk_acquire(host->nresets, host->resets); 612 if (err) { 613 dev_err(dev, "failed to acquire reset: %d\n", err); 614 return err; 615 } 616 617 err = clk_prepare_enable(host->clk); 618 if (err) { 619 dev_err(dev, "failed to enable clock: %d\n", err); 620 goto release_reset; 621 } 622 623 err = reset_control_bulk_deassert(host->nresets, host->resets); 624 if (err < 0) { 625 dev_err(dev, "failed to deassert reset: %d\n", err); 626 goto disable_clk; 627 } 628 629 host1x_setup_sid_table(host); 630 host1x_syncpt_restore(host); 631 host1x_intr_start(host); 632 633 return 0; 634 635 disable_clk: 636 clk_disable_unprepare(host->clk); 637 release_reset: 638 reset_control_bulk_release(host->nresets, host->resets); 639 640 return err; 641 } 642 643 static const struct dev_pm_ops host1x_pm_ops = { 644 SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume, 645 NULL) 646 /* TODO: add system suspend-resume once driver will be ready for that */ 647 }; 648 649 static struct platform_driver tegra_host1x_driver = { 650 .driver = { 651 .name = "tegra-host1x", 652 .of_match_table = host1x_of_match, 653 .pm = &host1x_pm_ops, 654 }, 655 .probe = host1x_probe, 656 .remove = host1x_remove, 657 }; 658 659 static struct platform_driver * const drivers[] = { 660 &tegra_host1x_driver, 661 &tegra_mipi_driver, 662 }; 663 664 static int __init tegra_host1x_init(void) 665 { 666 int err; 667 668 err = bus_register(&host1x_bus_type); 669 if (err < 0) 670 return err; 671 672 err = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); 673 if (err < 0) 674 bus_unregister(&host1x_bus_type); 675 676 return err; 677 } 678 module_init(tegra_host1x_init); 679 680 static void __exit tegra_host1x_exit(void) 681 { 682 platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); 683 bus_unregister(&host1x_bus_type); 684 } 685 module_exit(tegra_host1x_exit); 686 687 /** 688 * host1x_get_dma_mask() - query the supported DMA mask for host1x 689 * @host1x: host1x instance 690 * 691 * Note that this returns the supported DMA mask for host1x, which can be 692 * different from the applicable DMA mask under certain circumstances. 693 */ 694 u64 host1x_get_dma_mask(struct host1x *host1x) 695 { 696 return host1x->info->dma_mask; 697 } 698 EXPORT_SYMBOL(host1x_get_dma_mask); 699 700 MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>"); 701 MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>"); 702 MODULE_DESCRIPTION("Host1x driver for Tegra products"); 703 MODULE_LICENSE("GPL"); 704