1 // SPDX-License-Identifier: GPL-2.0 or MIT 2 /* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */ 3 /* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */ 4 /* Copyright 2019 Collabora ltd. */ 5 6 #include <linux/bitfield.h> 7 #include <linux/bitmap.h> 8 #include <linux/delay.h> 9 #include <linux/dma-mapping.h> 10 #include <linux/interrupt.h> 11 #include <linux/io.h> 12 #include <linux/iopoll.h> 13 #include <linux/platform_device.h> 14 #include <linux/pm_runtime.h> 15 16 #include <drm/drm_drv.h> 17 #include <drm/drm_managed.h> 18 19 #include "panthor_device.h" 20 #include "panthor_gpu.h" 21 #include "panthor_regs.h" 22 23 /** 24 * struct panthor_gpu - GPU block management data. 25 */ 26 struct panthor_gpu { 27 /** @irq: GPU irq. */ 28 struct panthor_irq irq; 29 30 /** @reqs_lock: Lock protecting access to pending_reqs. */ 31 spinlock_t reqs_lock; 32 33 /** @pending_reqs: Pending GPU requests. */ 34 u32 pending_reqs; 35 36 /** @reqs_acked: GPU request wait queue. */ 37 wait_queue_head_t reqs_acked; 38 }; 39 40 /** 41 * struct panthor_model - GPU model description 42 */ 43 struct panthor_model { 44 /** @name: Model name. */ 45 const char *name; 46 47 /** @arch_major: Major version number of architecture. */ 48 u8 arch_major; 49 50 /** @product_major: Major version number of product. */ 51 u8 product_major; 52 }; 53 54 /** 55 * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified 56 * by a combination of the major architecture version and the major product 57 * version. 58 * @_name: Name for the GPU model. 59 * @_arch_major: Architecture major. 60 * @_product_major: Product major. 61 */ 62 #define GPU_MODEL(_name, _arch_major, _product_major) \ 63 {\ 64 .name = __stringify(_name), \ 65 .arch_major = _arch_major, \ 66 .product_major = _product_major, \ 67 } 68 69 static const struct panthor_model gpu_models[] = { 70 GPU_MODEL(g610, 10, 7), 71 {}, 72 }; 73 74 #define GPU_INTERRUPTS_MASK \ 75 (GPU_IRQ_FAULT | \ 76 GPU_IRQ_PROTM_FAULT | \ 77 GPU_IRQ_RESET_COMPLETED | \ 78 GPU_IRQ_CLEAN_CACHES_COMPLETED) 79 80 static void panthor_gpu_coherency_set(struct panthor_device *ptdev) 81 { 82 gpu_write(ptdev, GPU_COHERENCY_PROTOCOL, 83 ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE); 84 } 85 86 static void panthor_gpu_init_info(struct panthor_device *ptdev) 87 { 88 const struct panthor_model *model; 89 u32 arch_major, product_major; 90 u32 major, minor, status; 91 unsigned int i; 92 93 ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); 94 ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID); 95 ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID); 96 ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES); 97 ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES); 98 ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES); 99 ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES); 100 ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES); 101 ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES); 102 ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS); 103 ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE); 104 ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE); 105 ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES); 106 for (i = 0; i < 4; i++) 107 ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i)); 108 109 ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); 110 111 ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO); 112 ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32; 113 114 ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO); 115 ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32; 116 117 ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO); 118 ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32; 119 120 arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id); 121 product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id); 122 major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id); 123 minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id); 124 status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id); 125 126 for (model = gpu_models; model->name; model++) { 127 if (model->arch_major == arch_major && 128 model->product_major == product_major) 129 break; 130 } 131 132 drm_info(&ptdev->base, 133 "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x", 134 model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16, 135 major, minor, status); 136 137 drm_info(&ptdev->base, 138 "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x", 139 ptdev->gpu_info.l2_features, 140 ptdev->gpu_info.tiler_features, 141 ptdev->gpu_info.mem_features, 142 ptdev->gpu_info.mmu_features, 143 ptdev->gpu_info.as_present); 144 145 drm_info(&ptdev->base, 146 "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx", 147 ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present, 148 ptdev->gpu_info.tiler_present); 149 } 150 151 static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status) 152 { 153 if (status & GPU_IRQ_FAULT) { 154 u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS); 155 u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) | 156 gpu_read(ptdev, GPU_FAULT_ADDR_LO); 157 158 drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n", 159 fault_status, panthor_exception_name(ptdev, fault_status & 0xFF), 160 address); 161 } 162 if (status & GPU_IRQ_PROTM_FAULT) 163 drm_warn(&ptdev->base, "GPU Fault in protected mode\n"); 164 165 spin_lock(&ptdev->gpu->reqs_lock); 166 if (status & ptdev->gpu->pending_reqs) { 167 ptdev->gpu->pending_reqs &= ~status; 168 wake_up_all(&ptdev->gpu->reqs_acked); 169 } 170 spin_unlock(&ptdev->gpu->reqs_lock); 171 } 172 PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler); 173 174 /** 175 * panthor_gpu_unplug() - Called when the GPU is unplugged. 176 * @ptdev: Device to unplug. 177 */ 178 void panthor_gpu_unplug(struct panthor_device *ptdev) 179 { 180 unsigned long flags; 181 182 /* Make sure the IRQ handler is not running after that point. */ 183 if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) 184 panthor_gpu_irq_suspend(&ptdev->gpu->irq); 185 186 /* Wake-up all waiters. */ 187 spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); 188 ptdev->gpu->pending_reqs = 0; 189 wake_up_all(&ptdev->gpu->reqs_acked); 190 spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); 191 } 192 193 /** 194 * panthor_gpu_init() - Initialize the GPU block 195 * @ptdev: Device. 196 * 197 * Return: 0 on success, a negative error code otherwise. 198 */ 199 int panthor_gpu_init(struct panthor_device *ptdev) 200 { 201 struct panthor_gpu *gpu; 202 u32 pa_bits; 203 int ret, irq; 204 205 gpu = drmm_kzalloc(&ptdev->base, sizeof(*gpu), GFP_KERNEL); 206 if (!gpu) 207 return -ENOMEM; 208 209 spin_lock_init(&gpu->reqs_lock); 210 init_waitqueue_head(&gpu->reqs_acked); 211 ptdev->gpu = gpu; 212 panthor_gpu_init_info(ptdev); 213 214 dma_set_max_seg_size(ptdev->base.dev, UINT_MAX); 215 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features); 216 ret = dma_set_mask_and_coherent(ptdev->base.dev, DMA_BIT_MASK(pa_bits)); 217 if (ret) 218 return ret; 219 220 irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu"); 221 if (irq < 0) 222 return irq; 223 224 ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK); 225 if (ret) 226 return ret; 227 228 return 0; 229 } 230 231 /** 232 * panthor_gpu_block_power_off() - Power-off a specific block of the GPU 233 * @ptdev: Device. 234 * @blk_name: Block name. 235 * @pwroff_reg: Power-off register for this block. 236 * @pwrtrans_reg: Power transition register for this block. 237 * @mask: Sub-elements to power-off. 238 * @timeout_us: Timeout in microseconds. 239 * 240 * Return: 0 on success, a negative error code otherwise. 241 */ 242 int panthor_gpu_block_power_off(struct panthor_device *ptdev, 243 const char *blk_name, 244 u32 pwroff_reg, u32 pwrtrans_reg, 245 u64 mask, u32 timeout_us) 246 { 247 u32 val, i; 248 int ret; 249 250 for (i = 0; i < 2; i++) { 251 u32 mask32 = mask >> (i * 32); 252 253 if (!mask32) 254 continue; 255 256 ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), 257 val, !(mask32 & val), 258 100, timeout_us); 259 if (ret) { 260 drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", 261 blk_name, mask); 262 return ret; 263 } 264 } 265 266 if (mask & GENMASK(31, 0)) 267 gpu_write(ptdev, pwroff_reg, mask); 268 269 if (mask >> 32) 270 gpu_write(ptdev, pwroff_reg + 4, mask >> 32); 271 272 for (i = 0; i < 2; i++) { 273 u32 mask32 = mask >> (i * 32); 274 275 if (!mask32) 276 continue; 277 278 ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), 279 val, !(mask32 & val), 280 100, timeout_us); 281 if (ret) { 282 drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", 283 blk_name, mask); 284 return ret; 285 } 286 } 287 288 return 0; 289 } 290 291 /** 292 * panthor_gpu_block_power_on() - Power-on a specific block of the GPU 293 * @ptdev: Device. 294 * @blk_name: Block name. 295 * @pwron_reg: Power-on register for this block. 296 * @pwrtrans_reg: Power transition register for this block. 297 * @rdy_reg: Power transition ready register. 298 * @mask: Sub-elements to power-on. 299 * @timeout_us: Timeout in microseconds. 300 * 301 * Return: 0 on success, a negative error code otherwise. 302 */ 303 int panthor_gpu_block_power_on(struct panthor_device *ptdev, 304 const char *blk_name, 305 u32 pwron_reg, u32 pwrtrans_reg, 306 u32 rdy_reg, u64 mask, u32 timeout_us) 307 { 308 u32 val, i; 309 int ret; 310 311 for (i = 0; i < 2; i++) { 312 u32 mask32 = mask >> (i * 32); 313 314 if (!mask32) 315 continue; 316 317 ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), 318 val, !(mask32 & val), 319 100, timeout_us); 320 if (ret) { 321 drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", 322 blk_name, mask); 323 return ret; 324 } 325 } 326 327 if (mask & GENMASK(31, 0)) 328 gpu_write(ptdev, pwron_reg, mask); 329 330 if (mask >> 32) 331 gpu_write(ptdev, pwron_reg + 4, mask >> 32); 332 333 for (i = 0; i < 2; i++) { 334 u32 mask32 = mask >> (i * 32); 335 336 if (!mask32) 337 continue; 338 339 ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4), 340 val, (mask32 & val) == mask32, 341 100, timeout_us); 342 if (ret) { 343 drm_err(&ptdev->base, "timeout waiting on %s:%llx readiness", 344 blk_name, mask); 345 return ret; 346 } 347 } 348 349 return 0; 350 } 351 352 /** 353 * panthor_gpu_l2_power_on() - Power-on the L2-cache 354 * @ptdev: Device. 355 * 356 * Return: 0 on success, a negative error code otherwise. 357 */ 358 int panthor_gpu_l2_power_on(struct panthor_device *ptdev) 359 { 360 if (ptdev->gpu_info.l2_present != 1) { 361 /* 362 * Only support one core group now. 363 * ~(l2_present - 1) unsets all bits in l2_present except 364 * the bottom bit. (l2_present - 2) has all the bits in 365 * the first core group set. AND them together to generate 366 * a mask of cores in the first core group. 367 */ 368 u64 core_mask = ~(ptdev->gpu_info.l2_present - 1) & 369 (ptdev->gpu_info.l2_present - 2); 370 drm_info_once(&ptdev->base, "using only 1st core group (%lu cores from %lu)\n", 371 hweight64(core_mask), 372 hweight64(ptdev->gpu_info.shader_present)); 373 } 374 375 /* Set the desired coherency mode before the power up of L2 */ 376 panthor_gpu_coherency_set(ptdev); 377 378 return panthor_gpu_power_on(ptdev, L2, 1, 20000); 379 } 380 381 /** 382 * panthor_gpu_flush_caches() - Flush caches 383 * @ptdev: Device. 384 * @l2: L2 flush type. 385 * @lsc: LSC flush type. 386 * @other: Other flush type. 387 * 388 * Return: 0 on success, a negative error code otherwise. 389 */ 390 int panthor_gpu_flush_caches(struct panthor_device *ptdev, 391 u32 l2, u32 lsc, u32 other) 392 { 393 bool timedout = false; 394 unsigned long flags; 395 396 spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); 397 if (!drm_WARN_ON(&ptdev->base, 398 ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { 399 ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED; 400 gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other)); 401 } 402 spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); 403 404 if (!wait_event_timeout(ptdev->gpu->reqs_acked, 405 !(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED), 406 msecs_to_jiffies(100))) { 407 spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); 408 if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 && 409 !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED)) 410 timedout = true; 411 else 412 ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED; 413 spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); 414 } 415 416 if (timedout) { 417 drm_err(&ptdev->base, "Flush caches timeout"); 418 return -ETIMEDOUT; 419 } 420 421 return 0; 422 } 423 424 /** 425 * panthor_gpu_soft_reset() - Issue a soft-reset 426 * @ptdev: Device. 427 * 428 * Return: 0 on success, a negative error code otherwise. 429 */ 430 int panthor_gpu_soft_reset(struct panthor_device *ptdev) 431 { 432 bool timedout = false; 433 unsigned long flags; 434 435 spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); 436 if (!drm_WARN_ON(&ptdev->base, 437 ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) { 438 ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED; 439 gpu_write(ptdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED); 440 gpu_write(ptdev, GPU_CMD, GPU_SOFT_RESET); 441 } 442 spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); 443 444 if (!wait_event_timeout(ptdev->gpu->reqs_acked, 445 !(ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED), 446 msecs_to_jiffies(100))) { 447 spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); 448 if ((ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED) != 0 && 449 !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_RESET_COMPLETED)) 450 timedout = true; 451 else 452 ptdev->gpu->pending_reqs &= ~GPU_IRQ_RESET_COMPLETED; 453 spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); 454 } 455 456 if (timedout) { 457 drm_err(&ptdev->base, "Soft reset timeout"); 458 return -ETIMEDOUT; 459 } 460 461 return 0; 462 } 463 464 /** 465 * panthor_gpu_suspend() - Suspend the GPU block. 466 * @ptdev: Device. 467 * 468 * Suspend the GPU irq. This should be called last in the suspend procedure, 469 * after all other blocks have been suspented. 470 */ 471 void panthor_gpu_suspend(struct panthor_device *ptdev) 472 { 473 /* On a fast reset, simply power down the L2. */ 474 if (!ptdev->reset.fast) 475 panthor_gpu_soft_reset(ptdev); 476 else 477 panthor_gpu_power_off(ptdev, L2, 1, 20000); 478 479 panthor_gpu_irq_suspend(&ptdev->gpu->irq); 480 } 481 482 /** 483 * panthor_gpu_resume() - Resume the GPU block. 484 * @ptdev: Device. 485 * 486 * Resume the IRQ handler and power-on the L2-cache. 487 * The FW takes care of powering the other blocks. 488 */ 489 void panthor_gpu_resume(struct panthor_device *ptdev) 490 { 491 panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK); 492 panthor_gpu_l2_power_on(ptdev); 493 } 494 495 /** 496 * panthor_gpu_read_64bit_counter() - Read a 64-bit counter at a given offset. 497 * @ptdev: Device. 498 * @reg: The offset of the register to read. 499 * 500 * Return: The counter value. 501 */ 502 static u64 503 panthor_gpu_read_64bit_counter(struct panthor_device *ptdev, u32 reg) 504 { 505 u32 hi, lo; 506 507 do { 508 hi = gpu_read(ptdev, reg + 0x4); 509 lo = gpu_read(ptdev, reg); 510 } while (hi != gpu_read(ptdev, reg + 0x4)); 511 512 return ((u64)hi << 32) | lo; 513 } 514 515 /** 516 * panthor_gpu_read_timestamp() - Read the timestamp register. 517 * @ptdev: Device. 518 * 519 * Return: The GPU timestamp value. 520 */ 521 u64 panthor_gpu_read_timestamp(struct panthor_device *ptdev) 522 { 523 return panthor_gpu_read_64bit_counter(ptdev, GPU_TIMESTAMP_LO); 524 } 525 526 /** 527 * panthor_gpu_read_timestamp_offset() - Read the timestamp offset register. 528 * @ptdev: Device. 529 * 530 * Return: The GPU timestamp offset value. 531 */ 532 u64 panthor_gpu_read_timestamp_offset(struct panthor_device *ptdev) 533 { 534 u32 hi, lo; 535 536 hi = gpu_read(ptdev, GPU_TIMESTAMP_OFFSET_HI); 537 lo = gpu_read(ptdev, GPU_TIMESTAMP_OFFSET_LO); 538 539 return ((u64)hi << 32) | lo; 540 } 541